Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
dir.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3  * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License version 2.
8  */
9 
10 /*
11  * Implements Extendible Hashing as described in:
12  * "Extendible Hashing" by Fagin, et al in
13  * __ACM Trans. on Database Systems__, Sept 1979.
14  *
15  *
16  * Here's the layout of dirents which is essentially the same as that of ext2
17  * within a single block. The field de_name_len is the number of bytes
18  * actually required for the name (no null terminator). The field de_rec_len
19  * is the number of bytes allocated to the dirent. The offset of the next
20  * dirent in the block is (dirent + dirent->de_rec_len). When a dirent is
21  * deleted, the preceding dirent inherits its allocated space, ie
22  * prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained
23  * by adding de_rec_len to the current dirent, this essentially causes the
24  * deleted dirent to get jumped over when iterating through all the dirents.
25  *
26  * When deleting the first dirent in a block, there is no previous dirent so
27  * the field de_ino is set to zero to designate it as deleted. When allocating
28  * a dirent, gfs2_dirent_alloc iterates through the dirents in a block. If the
29  * first dirent has (de_ino == 0) and de_rec_len is large enough, this first
30  * dirent is allocated. Otherwise it must go through all the 'used' dirents
31  * searching for one in which the amount of total space minus the amount of
32  * used space will provide enough space for the new dirent.
33  *
34  * There are two types of blocks in which dirents reside. In a stuffed dinode,
35  * the dirents begin at offset sizeof(struct gfs2_dinode) from the beginning of
36  * the block. In leaves, they begin at offset sizeof(struct gfs2_leaf) from the
37  * beginning of the leaf block. The dirents reside in leaves when
38  *
39  * dip->i_diskflags & GFS2_DIF_EXHASH is true
40  *
41  * Otherwise, the dirents are "linear", within a single stuffed dinode block.
42  *
43  * When the dirents are in leaves, the actual contents of the directory file are
44  * used as an array of 64-bit block pointers pointing to the leaf blocks. The
45  * dirents are NOT in the directory file itself. There can be more than one
46  * block pointer in the array that points to the same leaf. In fact, when a
47  * directory is first converted from linear to exhash, all of the pointers
48  * point to the same leaf.
49  *
50  * When a leaf is completely full, the size of the hash table can be
51  * doubled unless it is already at the maximum size which is hard coded into
52  * GFS2_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list,
53  * but never before the maximum hash table size has been reached.
54  */
55 
56 #include <linux/slab.h>
57 #include <linux/spinlock.h>
58 #include <linux/buffer_head.h>
59 #include <linux/sort.h>
60 #include <linux/gfs2_ondisk.h>
61 #include <linux/crc32.h>
62 #include <linux/vmalloc.h>
63 
64 #include "gfs2.h"
65 #include "incore.h"
66 #include "dir.h"
67 #include "glock.h"
68 #include "inode.h"
69 #include "meta_io.h"
70 #include "quota.h"
71 #include "rgrp.h"
72 #include "trans.h"
73 #include "bmap.h"
74 #include "util.h"
75 
76 #define IS_LEAF 1 /* Hashed (leaf) directory */
77 #define IS_DINODE 2 /* Linear (stuffed dinode block) directory */
78 
79 #define MAX_RA_BLOCKS 32 /* max read-ahead blocks */
80 
81 #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1)
82 #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1))
83 
86 
87 typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent,
88  const struct qstr *name, void *opaque);
89 
91  struct buffer_head **bhp)
92 {
93  struct buffer_head *bh;
94 
95  bh = gfs2_meta_new(ip->i_gl, block);
96  gfs2_trans_add_bh(ip->i_gl, bh, 1);
97  gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD);
98  gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
99  *bhp = bh;
100  return 0;
101 }
102 
103 static int gfs2_dir_get_existing_buffer(struct gfs2_inode *ip, u64 block,
104  struct buffer_head **bhp)
105 {
106  struct buffer_head *bh;
107  int error;
108 
109  error = gfs2_meta_read(ip->i_gl, block, DIO_WAIT, &bh);
110  if (error)
111  return error;
112  if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), bh, GFS2_METATYPE_JD)) {
113  brelse(bh);
114  return -EIO;
115  }
116  *bhp = bh;
117  return 0;
118 }
119 
120 static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
121  unsigned int offset, unsigned int size)
122 {
123  struct buffer_head *dibh;
124  int error;
125 
126  error = gfs2_meta_inode_buffer(ip, &dibh);
127  if (error)
128  return error;
129 
130  gfs2_trans_add_bh(ip->i_gl, dibh, 1);
131  memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
132  if (ip->i_inode.i_size < offset + size)
133  i_size_write(&ip->i_inode, offset + size);
134  ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
135  gfs2_dinode_out(ip, dibh->b_data);
136 
137  brelse(dibh);
138 
139  return size;
140 }
141 
142 
143 
153 static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf,
154  u64 offset, unsigned int size)
155 {
156  struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
157  struct buffer_head *dibh;
158  u64 lblock, dblock;
159  u32 extlen = 0;
160  unsigned int o;
161  int copied = 0;
162  int error = 0;
163  int new = 0;
164 
165  if (!size)
166  return 0;
167 
168  if (gfs2_is_stuffed(ip) &&
169  offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
170  return gfs2_dir_write_stuffed(ip, buf, (unsigned int)offset,
171  size);
172 
173  if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
174  return -EINVAL;
175 
176  if (gfs2_is_stuffed(ip)) {
177  error = gfs2_unstuff_dinode(ip, NULL);
178  if (error)
179  return error;
180  }
181 
182  lblock = offset;
183  o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
184 
185  while (copied < size) {
186  unsigned int amount;
187  struct buffer_head *bh;
188 
189  amount = size - copied;
190  if (amount > sdp->sd_sb.sb_bsize - o)
191  amount = sdp->sd_sb.sb_bsize - o;
192 
193  if (!extlen) {
194  new = 1;
195  error = gfs2_extent_map(&ip->i_inode, lblock, &new,
196  &dblock, &extlen);
197  if (error)
198  goto fail;
199  error = -EIO;
200  if (gfs2_assert_withdraw(sdp, dblock))
201  goto fail;
202  }
203 
204  if (amount == sdp->sd_jbsize || new)
205  error = gfs2_dir_get_new_buffer(ip, dblock, &bh);
206  else
207  error = gfs2_dir_get_existing_buffer(ip, dblock, &bh);
208 
209  if (error)
210  goto fail;
211 
212  gfs2_trans_add_bh(ip->i_gl, bh, 1);
213  memcpy(bh->b_data + o, buf, amount);
214  brelse(bh);
215 
216  buf += amount;
217  copied += amount;
218  lblock++;
219  dblock++;
220  extlen--;
221 
222  o = sizeof(struct gfs2_meta_header);
223  }
224 
225 out:
226  error = gfs2_meta_inode_buffer(ip, &dibh);
227  if (error)
228  return error;
229 
230  if (ip->i_inode.i_size < offset + copied)
231  i_size_write(&ip->i_inode, offset + copied);
232  ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
233 
234  gfs2_trans_add_bh(ip->i_gl, dibh, 1);
235  gfs2_dinode_out(ip, dibh->b_data);
236  brelse(dibh);
237 
238  return copied;
239 fail:
240  if (copied)
241  goto out;
242  return error;
243 }
244 
245 static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, __be64 *buf,
246  unsigned int size)
247 {
248  struct buffer_head *dibh;
249  int error;
250 
251  error = gfs2_meta_inode_buffer(ip, &dibh);
252  if (!error) {
253  memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), size);
254  brelse(dibh);
255  }
256 
257  return (error) ? error : size;
258 }
259 
260 
269 static int gfs2_dir_read_data(struct gfs2_inode *ip, __be64 *buf,
270  unsigned int size)
271 {
272  struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
273  u64 lblock, dblock;
274  u32 extlen = 0;
275  unsigned int o;
276  int copied = 0;
277  int error = 0;
278 
279  if (gfs2_is_stuffed(ip))
280  return gfs2_dir_read_stuffed(ip, buf, size);
281 
282  if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
283  return -EINVAL;
284 
285  lblock = 0;
286  o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
287 
288  while (copied < size) {
289  unsigned int amount;
290  struct buffer_head *bh;
291  int new;
292 
293  amount = size - copied;
294  if (amount > sdp->sd_sb.sb_bsize - o)
295  amount = sdp->sd_sb.sb_bsize - o;
296 
297  if (!extlen) {
298  new = 0;
299  error = gfs2_extent_map(&ip->i_inode, lblock, &new,
300  &dblock, &extlen);
301  if (error || !dblock)
302  goto fail;
303  BUG_ON(extlen < 1);
304  bh = gfs2_meta_ra(ip->i_gl, dblock, extlen);
305  } else {
306  error = gfs2_meta_read(ip->i_gl, dblock, DIO_WAIT, &bh);
307  if (error)
308  goto fail;
309  }
310  error = gfs2_metatype_check(sdp, bh, GFS2_METATYPE_JD);
311  if (error) {
312  brelse(bh);
313  goto fail;
314  }
315  dblock++;
316  extlen--;
317  memcpy(buf, bh->b_data + o, amount);
318  brelse(bh);
319  buf += (amount/sizeof(__be64));
320  copied += amount;
321  lblock++;
322  o = sizeof(struct gfs2_meta_header);
323  }
324 
325  return copied;
326 fail:
327  return (copied) ? copied : error;
328 }
329 
337 static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip)
338 {
339  struct inode *inode = &ip->i_inode;
340  int ret;
341  u32 hsize;
342  __be64 *hc;
343 
345 
346  hc = ip->i_hash_cache;
347  if (hc)
348  return hc;
349 
350  hsize = 1 << ip->i_depth;
351  hsize *= sizeof(__be64);
352  if (hsize != i_size_read(&ip->i_inode)) {
353  gfs2_consist_inode(ip);
354  return ERR_PTR(-EIO);
355  }
356 
357  hc = kmalloc(hsize, GFP_NOFS);
358  ret = -ENOMEM;
359  if (hc == NULL)
360  return ERR_PTR(-ENOMEM);
361 
362  ret = gfs2_dir_read_data(ip, hc, hsize);
363  if (ret < 0) {
364  kfree(hc);
365  return ERR_PTR(ret);
366  }
367 
368  spin_lock(&inode->i_lock);
369  if (ip->i_hash_cache)
370  kfree(hc);
371  else
372  ip->i_hash_cache = hc;
373  spin_unlock(&inode->i_lock);
374 
375  return ip->i_hash_cache;
376 }
377 
385 {
386  __be64 *hc = ip->i_hash_cache;
387  ip->i_hash_cache = NULL;
388  kfree(hc);
389 }
390 
391 static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent)
392 {
393  return dent->de_inum.no_addr == 0 || dent->de_inum.no_formal_ino == 0;
394 }
395 
396 static inline int __gfs2_dirent_find(const struct gfs2_dirent *dent,
397  const struct qstr *name, int ret)
398 {
399  if (!gfs2_dirent_sentinel(dent) &&
400  be32_to_cpu(dent->de_hash) == name->hash &&
401  be16_to_cpu(dent->de_name_len) == name->len &&
402  memcmp(dent+1, name->name, name->len) == 0)
403  return ret;
404  return 0;
405 }
406 
407 static int gfs2_dirent_find(const struct gfs2_dirent *dent,
408  const struct qstr *name,
409  void *opaque)
410 {
411  return __gfs2_dirent_find(dent, name, 1);
412 }
413 
414 static int gfs2_dirent_prev(const struct gfs2_dirent *dent,
415  const struct qstr *name,
416  void *opaque)
417 {
418  return __gfs2_dirent_find(dent, name, 2);
419 }
420 
421 /*
422  * name->name holds ptr to start of block.
423  * name->len holds size of block.
424  */
425 static int gfs2_dirent_last(const struct gfs2_dirent *dent,
426  const struct qstr *name,
427  void *opaque)
428 {
429  const char *start = name->name;
430  const char *end = (const char *)dent + be16_to_cpu(dent->de_rec_len);
431  if (name->len == (end - start))
432  return 1;
433  return 0;
434 }
435 
436 static int gfs2_dirent_find_space(const struct gfs2_dirent *dent,
437  const struct qstr *name,
438  void *opaque)
439 {
440  unsigned required = GFS2_DIRENT_SIZE(name->len);
441  unsigned actual = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
442  unsigned totlen = be16_to_cpu(dent->de_rec_len);
443 
444  if (gfs2_dirent_sentinel(dent))
445  actual = 0;
446  if (totlen - actual >= required)
447  return 1;
448  return 0;
449 }
450 
452  const struct gfs2_dirent **pdent;
453  unsigned offset;
454 };
455 
456 static int gfs2_dirent_gather(const struct gfs2_dirent *dent,
457  const struct qstr *name,
458  void *opaque)
459 {
460  struct dirent_gather *g = opaque;
461  if (!gfs2_dirent_sentinel(dent)) {
462  g->pdent[g->offset++] = dent;
463  }
464  return 0;
465 }
466 
467 /*
468  * Other possible things to check:
469  * - Inode located within filesystem size (and on valid block)
470  * - Valid directory entry type
471  * Not sure how heavy-weight we want to make this... could also check
472  * hash is correct for example, but that would take a lot of extra time.
473  * For now the most important thing is to check that the various sizes
474  * are correct.
475  */
476 static int gfs2_check_dirent(struct gfs2_dirent *dent, unsigned int offset,
477  unsigned int size, unsigned int len, int first)
478 {
479  const char *msg = "gfs2_dirent too small";
480  if (unlikely(size < sizeof(struct gfs2_dirent)))
481  goto error;
482  msg = "gfs2_dirent misaligned";
483  if (unlikely(offset & 0x7))
484  goto error;
485  msg = "gfs2_dirent points beyond end of block";
486  if (unlikely(offset + size > len))
487  goto error;
488  msg = "zero inode number";
489  if (unlikely(!first && gfs2_dirent_sentinel(dent)))
490  goto error;
491  msg = "name length is greater than space in dirent";
492  if (!gfs2_dirent_sentinel(dent) &&
493  unlikely(sizeof(struct gfs2_dirent)+be16_to_cpu(dent->de_name_len) >
494  size))
495  goto error;
496  return 0;
497 error:
498  printk(KERN_WARNING "gfs2_check_dirent: %s (%s)\n", msg,
499  first ? "first in block" : "not first in block");
500  return -EIO;
501 }
502 
503 static int gfs2_dirent_offset(const void *buf)
504 {
505  const struct gfs2_meta_header *h = buf;
506  int offset;
507 
508  BUG_ON(buf == NULL);
509 
510  switch(be32_to_cpu(h->mh_type)) {
511  case GFS2_METATYPE_LF:
512  offset = sizeof(struct gfs2_leaf);
513  break;
514  case GFS2_METATYPE_DI:
515  offset = sizeof(struct gfs2_dinode);
516  break;
517  default:
518  goto wrong_type;
519  }
520  return offset;
521 wrong_type:
522  printk(KERN_WARNING "gfs2_scan_dirent: wrong block type %u\n",
523  be32_to_cpu(h->mh_type));
524  return -1;
525 }
526 
527 static struct gfs2_dirent *gfs2_dirent_scan(struct inode *inode, void *buf,
528  unsigned int len, gfs2_dscan_t scan,
529  const struct qstr *name,
530  void *opaque)
531 {
532  struct gfs2_dirent *dent, *prev;
533  unsigned offset;
534  unsigned size;
535  int ret = 0;
536 
537  ret = gfs2_dirent_offset(buf);
538  if (ret < 0)
539  goto consist_inode;
540 
541  offset = ret;
542  prev = NULL;
543  dent = buf + offset;
544  size = be16_to_cpu(dent->de_rec_len);
545  if (gfs2_check_dirent(dent, offset, size, len, 1))
546  goto consist_inode;
547  do {
548  ret = scan(dent, name, opaque);
549  if (ret)
550  break;
551  offset += size;
552  if (offset == len)
553  break;
554  prev = dent;
555  dent = buf + offset;
556  size = be16_to_cpu(dent->de_rec_len);
557  if (gfs2_check_dirent(dent, offset, size, len, 0))
558  goto consist_inode;
559  } while(1);
560 
561  switch(ret) {
562  case 0:
563  return NULL;
564  case 1:
565  return dent;
566  case 2:
567  return prev ? prev : dent;
568  default:
569  BUG_ON(ret > 0);
570  return ERR_PTR(ret);
571  }
572 
573 consist_inode:
574  gfs2_consist_inode(GFS2_I(inode));
575  return ERR_PTR(-EIO);
576 }
577 
578 static int dirent_check_reclen(struct gfs2_inode *dip,
579  const struct gfs2_dirent *d, const void *end_p)
580 {
581  const void *ptr = d;
583 
584  if (unlikely(rec_len < sizeof(struct gfs2_dirent)))
585  goto broken;
586  ptr += rec_len;
587  if (ptr < end_p)
588  return rec_len;
589  if (ptr == end_p)
590  return -ENOENT;
591 broken:
592  gfs2_consist_inode(dip);
593  return -EIO;
594 }
595 
605 static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh,
606  struct gfs2_dirent **dent)
607 {
608  struct gfs2_dirent *cur = *dent, *tmp;
609  char *bh_end = bh->b_data + bh->b_size;
610  int ret;
611 
612  ret = dirent_check_reclen(dip, cur, bh_end);
613  if (ret < 0)
614  return ret;
615 
616  tmp = (void *)cur + ret;
617  ret = dirent_check_reclen(dip, tmp, bh_end);
618  if (ret == -EIO)
619  return ret;
620 
621  /* Only the first dent could ever have de_inum.no_addr == 0 */
622  if (gfs2_dirent_sentinel(tmp)) {
623  gfs2_consist_inode(dip);
624  return -EIO;
625  }
626 
627  *dent = tmp;
628  return 0;
629 }
630 
640 static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
641  struct gfs2_dirent *prev, struct gfs2_dirent *cur)
642 {
643  u16 cur_rec_len, prev_rec_len;
644 
645  if (gfs2_dirent_sentinel(cur)) {
646  gfs2_consist_inode(dip);
647  return;
648  }
649 
650  gfs2_trans_add_bh(dip->i_gl, bh, 1);
651 
652  /* If there is no prev entry, this is the first entry in the block.
653  The de_rec_len is already as big as it needs to be. Just zero
654  out the inode number and return. */
655 
656  if (!prev) {
657  cur->de_inum.no_addr = 0;
658  cur->de_inum.no_formal_ino = 0;
659  return;
660  }
661 
662  /* Combine this dentry with the previous one. */
663 
664  prev_rec_len = be16_to_cpu(prev->de_rec_len);
665  cur_rec_len = be16_to_cpu(cur->de_rec_len);
666 
667  if ((char *)prev + prev_rec_len != (char *)cur)
668  gfs2_consist_inode(dip);
669  if ((char *)cur + cur_rec_len > bh->b_data + bh->b_size)
670  gfs2_consist_inode(dip);
671 
672  prev_rec_len += cur_rec_len;
673  prev->de_rec_len = cpu_to_be16(prev_rec_len);
674 }
675 
676 /*
677  * Takes a dent from which to grab space as an argument. Returns the
678  * newly created dent.
679  */
680 static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode,
681  struct gfs2_dirent *dent,
682  const struct qstr *name,
683  struct buffer_head *bh)
684 {
685  struct gfs2_inode *ip = GFS2_I(inode);
686  struct gfs2_dirent *ndent;
687  unsigned offset = 0, totlen;
688 
689  if (!gfs2_dirent_sentinel(dent))
690  offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len));
691  totlen = be16_to_cpu(dent->de_rec_len);
692  BUG_ON(offset + name->len > totlen);
693  gfs2_trans_add_bh(ip->i_gl, bh, 1);
694  ndent = (struct gfs2_dirent *)((char *)dent + offset);
695  dent->de_rec_len = cpu_to_be16(offset);
696  gfs2_qstr2dirent(name, totlen - offset, ndent);
697  return ndent;
698 }
699 
700 static struct gfs2_dirent *gfs2_dirent_alloc(struct inode *inode,
701  struct buffer_head *bh,
702  const struct qstr *name)
703 {
704  struct gfs2_dirent *dent;
705  dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
706  gfs2_dirent_find_space, name, NULL);
707  if (!dent || IS_ERR(dent))
708  return dent;
709  return gfs2_init_dirent(inode, dent, name, bh);
710 }
711 
712 static int get_leaf(struct gfs2_inode *dip, u64 leaf_no,
713  struct buffer_head **bhp)
714 {
715  int error;
716 
717  error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_WAIT, bhp);
718  if (!error && gfs2_metatype_check(GFS2_SB(&dip->i_inode), *bhp, GFS2_METATYPE_LF)) {
719  /* printk(KERN_INFO "block num=%llu\n", leaf_no); */
720  error = -EIO;
721  }
722 
723  return error;
724 }
725 
735 static int get_leaf_nr(struct gfs2_inode *dip, u32 index,
736  u64 *leaf_out)
737 {
738  __be64 *hash;
739 
740  hash = gfs2_dir_get_hash_table(dip);
741  if (IS_ERR(hash))
742  return PTR_ERR(hash);
743  *leaf_out = be64_to_cpu(*(hash + index));
744  return 0;
745 }
746 
747 static int get_first_leaf(struct gfs2_inode *dip, u32 index,
748  struct buffer_head **bh_out)
749 {
750  u64 leaf_no;
751  int error;
752 
753  error = get_leaf_nr(dip, index, &leaf_no);
754  if (!error)
755  error = get_leaf(dip, leaf_no, bh_out);
756 
757  return error;
758 }
759 
760 static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode,
761  const struct qstr *name,
762  gfs2_dscan_t scan,
763  struct buffer_head **pbh)
764 {
765  struct buffer_head *bh;
766  struct gfs2_dirent *dent;
767  struct gfs2_inode *ip = GFS2_I(inode);
768  int error;
769 
770  if (ip->i_diskflags & GFS2_DIF_EXHASH) {
771  struct gfs2_leaf *leaf;
772  unsigned hsize = 1 << ip->i_depth;
773  unsigned index;
774  u64 ln;
775  if (hsize * sizeof(u64) != i_size_read(inode)) {
776  gfs2_consist_inode(ip);
777  return ERR_PTR(-EIO);
778  }
779 
780  index = name->hash >> (32 - ip->i_depth);
781  error = get_first_leaf(ip, index, &bh);
782  if (error)
783  return ERR_PTR(error);
784  do {
785  dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
786  scan, name, NULL);
787  if (dent)
788  goto got_dent;
789  leaf = (struct gfs2_leaf *)bh->b_data;
790  ln = be64_to_cpu(leaf->lf_next);
791  brelse(bh);
792  if (!ln)
793  break;
794 
795  error = get_leaf(ip, ln, &bh);
796  } while(!error);
797 
798  return error ? ERR_PTR(error) : NULL;
799  }
800 
801 
802  error = gfs2_meta_inode_buffer(ip, &bh);
803  if (error)
804  return ERR_PTR(error);
805  dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, scan, name, NULL);
806 got_dent:
807  if (unlikely(dent == NULL || IS_ERR(dent))) {
808  brelse(bh);
809  bh = NULL;
810  }
811  *pbh = bh;
812  return dent;
813 }
814 
815 static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, u16 depth)
816 {
817  struct gfs2_inode *ip = GFS2_I(inode);
818  unsigned int n = 1;
819  u64 bn;
820  int error;
821  struct buffer_head *bh;
822  struct gfs2_leaf *leaf;
823  struct gfs2_dirent *dent;
824  struct qstr name = { .name = "" };
825 
826  error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
827  if (error)
828  return NULL;
829  bh = gfs2_meta_new(ip->i_gl, bn);
830  if (!bh)
831  return NULL;
832 
833  gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1);
834  gfs2_trans_add_bh(ip->i_gl, bh, 1);
835  gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
836  leaf = (struct gfs2_leaf *)bh->b_data;
837  leaf->lf_depth = cpu_to_be16(depth);
838  leaf->lf_entries = 0;
840  leaf->lf_next = 0;
841  memset(leaf->lf_reserved, 0, sizeof(leaf->lf_reserved));
842  dent = (struct gfs2_dirent *)(leaf+1);
843  gfs2_qstr2dirent(&name, bh->b_size - sizeof(struct gfs2_leaf), dent);
844  *pbh = bh;
845  return leaf;
846 }
847 
855 static int dir_make_exhash(struct inode *inode)
856 {
857  struct gfs2_inode *dip = GFS2_I(inode);
858  struct gfs2_sbd *sdp = GFS2_SB(inode);
859  struct gfs2_dirent *dent;
860  struct qstr args;
861  struct buffer_head *bh, *dibh;
862  struct gfs2_leaf *leaf;
863  int y;
864  u32 x;
865  __be64 *lp;
866  u64 bn;
867  int error;
868 
869  error = gfs2_meta_inode_buffer(dip, &dibh);
870  if (error)
871  return error;
872 
873  /* Turn over a new leaf */
874 
875  leaf = new_leaf(inode, &bh, 0);
876  if (!leaf)
877  return -ENOSPC;
878  bn = bh->b_blocknr;
879 
880  gfs2_assert(sdp, dip->i_entries < (1 << 16));
881  leaf->lf_entries = cpu_to_be16(dip->i_entries);
882 
883  /* Copy dirents */
884 
885  gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_leaf), dibh,
886  sizeof(struct gfs2_dinode));
887 
888  /* Find last entry */
889 
890  x = 0;
891  args.len = bh->b_size - sizeof(struct gfs2_dinode) +
893  args.name = bh->b_data;
894  dent = gfs2_dirent_scan(&dip->i_inode, bh->b_data, bh->b_size,
895  gfs2_dirent_last, &args, NULL);
896  if (!dent) {
897  brelse(bh);
898  brelse(dibh);
899  return -EIO;
900  }
901  if (IS_ERR(dent)) {
902  brelse(bh);
903  brelse(dibh);
904  return PTR_ERR(dent);
905  }
906 
907  /* Adjust the last dirent's record length
908  (Remember that dent still points to the last entry.) */
909 
911  sizeof(struct gfs2_dinode) -
912  sizeof(struct gfs2_leaf));
913 
914  brelse(bh);
915 
916  /* We're done with the new leaf block, now setup the new
917  hash table. */
918 
919  gfs2_trans_add_bh(dip->i_gl, dibh, 1);
920  gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
921 
922  lp = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode));
923 
924  for (x = sdp->sd_hash_ptrs; x--; lp++)
925  *lp = cpu_to_be64(bn);
926 
927  i_size_write(inode, sdp->sd_sb.sb_bsize / 2);
928  gfs2_add_inode_blocks(&dip->i_inode, 1);
930 
931  for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
932  dip->i_depth = y;
933 
934  gfs2_dinode_out(dip, dibh->b_data);
935 
936  brelse(dibh);
937 
938  return 0;
939 }
940 
950 static int dir_split_leaf(struct inode *inode, const struct qstr *name)
951 {
952  struct gfs2_inode *dip = GFS2_I(inode);
953  struct buffer_head *nbh, *obh, *dibh;
954  struct gfs2_leaf *nleaf, *oleaf;
955  struct gfs2_dirent *dent = NULL, *prev = NULL, *next = NULL, *new;
956  u32 start, len, half_len, divider;
957  u64 bn, leaf_no;
958  __be64 *lp;
959  u32 index;
960  int x, moved = 0;
961  int error;
962 
963  index = name->hash >> (32 - dip->i_depth);
964  error = get_leaf_nr(dip, index, &leaf_no);
965  if (error)
966  return error;
967 
968  /* Get the old leaf block */
969  error = get_leaf(dip, leaf_no, &obh);
970  if (error)
971  return error;
972 
973  oleaf = (struct gfs2_leaf *)obh->b_data;
974  if (dip->i_depth == be16_to_cpu(oleaf->lf_depth)) {
975  brelse(obh);
976  return 1; /* can't split */
977  }
978 
979  gfs2_trans_add_bh(dip->i_gl, obh, 1);
980 
981  nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1);
982  if (!nleaf) {
983  brelse(obh);
984  return -ENOSPC;
985  }
986  bn = nbh->b_blocknr;
987 
988  /* Compute the start and len of leaf pointers in the hash table. */
989  len = 1 << (dip->i_depth - be16_to_cpu(oleaf->lf_depth));
990  half_len = len >> 1;
991  if (!half_len) {
992  printk(KERN_WARNING "i_depth %u lf_depth %u index %u\n", dip->i_depth, be16_to_cpu(oleaf->lf_depth), index);
993  gfs2_consist_inode(dip);
994  error = -EIO;
995  goto fail_brelse;
996  }
997 
998  start = (index & ~(len - 1));
999 
1000  /* Change the pointers.
1001  Don't bother distinguishing stuffed from non-stuffed.
1002  This code is complicated enough already. */
1003  lp = kmalloc(half_len * sizeof(__be64), GFP_NOFS);
1004  if (!lp) {
1005  error = -ENOMEM;
1006  goto fail_brelse;
1007  }
1008 
1009  /* Change the pointers */
1010  for (x = 0; x < half_len; x++)
1011  lp[x] = cpu_to_be64(bn);
1012 
1013  gfs2_dir_hash_inval(dip);
1014 
1015  error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(u64),
1016  half_len * sizeof(u64));
1017  if (error != half_len * sizeof(u64)) {
1018  if (error >= 0)
1019  error = -EIO;
1020  goto fail_lpfree;
1021  }
1022 
1023  kfree(lp);
1024 
1025  /* Compute the divider */
1026  divider = (start + half_len) << (32 - dip->i_depth);
1027 
1028  /* Copy the entries */
1029  dent = (struct gfs2_dirent *)(obh->b_data + sizeof(struct gfs2_leaf));
1030 
1031  do {
1032  next = dent;
1033  if (dirent_next(dip, obh, &next))
1034  next = NULL;
1035 
1036  if (!gfs2_dirent_sentinel(dent) &&
1037  be32_to_cpu(dent->de_hash) < divider) {
1038  struct qstr str;
1039  str.name = (char*)(dent+1);
1040  str.len = be16_to_cpu(dent->de_name_len);
1041  str.hash = be32_to_cpu(dent->de_hash);
1042  new = gfs2_dirent_alloc(inode, nbh, &str);
1043  if (IS_ERR(new)) {
1044  error = PTR_ERR(new);
1045  break;
1046  }
1047 
1048  new->de_inum = dent->de_inum; /* No endian worries */
1049  new->de_type = dent->de_type; /* No endian worries */
1050  be16_add_cpu(&nleaf->lf_entries, 1);
1051 
1052  dirent_del(dip, obh, prev, dent);
1053 
1054  if (!oleaf->lf_entries)
1055  gfs2_consist_inode(dip);
1056  be16_add_cpu(&oleaf->lf_entries, -1);
1057 
1058  if (!prev)
1059  prev = dent;
1060 
1061  moved = 1;
1062  } else {
1063  prev = dent;
1064  }
1065  dent = next;
1066  } while (dent);
1067 
1068  oleaf->lf_depth = nleaf->lf_depth;
1069 
1070  error = gfs2_meta_inode_buffer(dip, &dibh);
1071  if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) {
1072  gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1073  gfs2_add_inode_blocks(&dip->i_inode, 1);
1074  gfs2_dinode_out(dip, dibh->b_data);
1075  brelse(dibh);
1076  }
1077 
1078  brelse(obh);
1079  brelse(nbh);
1080 
1081  return error;
1082 
1083 fail_lpfree:
1084  kfree(lp);
1085 
1086 fail_brelse:
1087  brelse(obh);
1088  brelse(nbh);
1089  return error;
1090 }
1091 
1099 static int dir_double_exhash(struct gfs2_inode *dip)
1100 {
1101  struct buffer_head *dibh;
1102  u32 hsize;
1103  u32 hsize_bytes;
1104  __be64 *hc;
1105  __be64 *hc2, *h;
1106  int x;
1107  int error = 0;
1108 
1109  hsize = 1 << dip->i_depth;
1110  hsize_bytes = hsize * sizeof(__be64);
1111 
1112  hc = gfs2_dir_get_hash_table(dip);
1113  if (IS_ERR(hc))
1114  return PTR_ERR(hc);
1115 
1116  h = hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS);
1117  if (!hc2)
1118  return -ENOMEM;
1119 
1120  error = gfs2_meta_inode_buffer(dip, &dibh);
1121  if (error)
1122  goto out_kfree;
1123 
1124  for (x = 0; x < hsize; x++) {
1125  *h++ = *hc;
1126  *h++ = *hc;
1127  hc++;
1128  }
1129 
1130  error = gfs2_dir_write_data(dip, (char *)hc2, 0, hsize_bytes * 2);
1131  if (error != (hsize_bytes * 2))
1132  goto fail;
1133 
1134  gfs2_dir_hash_inval(dip);
1135  dip->i_hash_cache = hc2;
1136  dip->i_depth++;
1137  gfs2_dinode_out(dip, dibh->b_data);
1138  brelse(dibh);
1139  return 0;
1140 
1141 fail:
1142  /* Replace original hash table & size */
1143  gfs2_dir_write_data(dip, (char *)hc, 0, hsize_bytes);
1144  i_size_write(&dip->i_inode, hsize_bytes);
1145  gfs2_dinode_out(dip, dibh->b_data);
1146  brelse(dibh);
1147 out_kfree:
1148  kfree(hc2);
1149  return error;
1150 }
1151 
1163 static int compare_dents(const void *a, const void *b)
1164 {
1165  const struct gfs2_dirent *dent_a, *dent_b;
1166  u32 hash_a, hash_b;
1167  int ret = 0;
1168 
1169  dent_a = *(const struct gfs2_dirent **)a;
1170  hash_a = be32_to_cpu(dent_a->de_hash);
1171 
1172  dent_b = *(const struct gfs2_dirent **)b;
1173  hash_b = be32_to_cpu(dent_b->de_hash);
1174 
1175  if (hash_a > hash_b)
1176  ret = 1;
1177  else if (hash_a < hash_b)
1178  ret = -1;
1179  else {
1180  unsigned int len_a = be16_to_cpu(dent_a->de_name_len);
1181  unsigned int len_b = be16_to_cpu(dent_b->de_name_len);
1182 
1183  if (len_a > len_b)
1184  ret = 1;
1185  else if (len_a < len_b)
1186  ret = -1;
1187  else
1188  ret = memcmp(dent_a + 1, dent_b + 1, len_a);
1189  }
1190 
1191  return ret;
1192 }
1193 
1212 static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
1213  void *opaque, filldir_t filldir,
1214  const struct gfs2_dirent **darr, u32 entries,
1215  int *copied)
1216 {
1217  const struct gfs2_dirent *dent, *dent_next;
1218  u64 off, off_next;
1219  unsigned int x, y;
1220  int run = 0;
1221  int error = 0;
1222 
1223  sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL);
1224 
1225  dent_next = darr[0];
1226  off_next = be32_to_cpu(dent_next->de_hash);
1227  off_next = gfs2_disk_hash2offset(off_next);
1228 
1229  for (x = 0, y = 1; x < entries; x++, y++) {
1230  dent = dent_next;
1231  off = off_next;
1232 
1233  if (y < entries) {
1234  dent_next = darr[y];
1235  off_next = be32_to_cpu(dent_next->de_hash);
1236  off_next = gfs2_disk_hash2offset(off_next);
1237 
1238  if (off < *offset)
1239  continue;
1240  *offset = off;
1241 
1242  if (off_next == off) {
1243  if (*copied && !run)
1244  return 1;
1245  run = 1;
1246  } else
1247  run = 0;
1248  } else {
1249  if (off < *offset)
1250  continue;
1251  *offset = off;
1252  }
1253 
1254  error = filldir(opaque, (const char *)(dent + 1),
1255  be16_to_cpu(dent->de_name_len),
1256  off, be64_to_cpu(dent->de_inum.no_addr),
1257  be16_to_cpu(dent->de_type));
1258  if (error)
1259  return 1;
1260 
1261  *copied = 1;
1262  }
1263 
1264  /* Increment the *offset by one, so the next time we come into the
1265  do_filldir fxn, we get the next entry instead of the last one in the
1266  current leaf */
1267 
1268  (*offset)++;
1269 
1270  return 0;
1271 }
1272 
1273 static void *gfs2_alloc_sort_buffer(unsigned size)
1274 {
1275  void *ptr = NULL;
1276 
1277  if (size < KMALLOC_MAX_SIZE)
1278  ptr = kmalloc(size, GFP_NOFS | __GFP_NOWARN);
1279  if (!ptr)
1280  ptr = __vmalloc(size, GFP_NOFS, PAGE_KERNEL);
1281  return ptr;
1282 }
1283 
1284 static void gfs2_free_sort_buffer(void *ptr)
1285 {
1286  if (is_vmalloc_addr(ptr))
1287  vfree(ptr);
1288  else
1289  kfree(ptr);
1290 }
1291 
1292 static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
1293  filldir_t filldir, int *copied, unsigned *depth,
1294  u64 leaf_no)
1295 {
1296  struct gfs2_inode *ip = GFS2_I(inode);
1297  struct gfs2_sbd *sdp = GFS2_SB(inode);
1298  struct buffer_head *bh;
1299  struct gfs2_leaf *lf;
1300  unsigned entries = 0, entries2 = 0;
1301  unsigned leaves = 0;
1302  const struct gfs2_dirent **darr, *dent;
1303  struct dirent_gather g;
1304  struct buffer_head **larr;
1305  int leaf = 0;
1306  int error, i;
1307  u64 lfn = leaf_no;
1308 
1309  do {
1310  error = get_leaf(ip, lfn, &bh);
1311  if (error)
1312  goto out;
1313  lf = (struct gfs2_leaf *)bh->b_data;
1314  if (leaves == 0)
1315  *depth = be16_to_cpu(lf->lf_depth);
1316  entries += be16_to_cpu(lf->lf_entries);
1317  leaves++;
1318  lfn = be64_to_cpu(lf->lf_next);
1319  brelse(bh);
1320  } while(lfn);
1321 
1322  if (!entries)
1323  return 0;
1324 
1325  error = -ENOMEM;
1326  /*
1327  * The extra 99 entries are not normally used, but are a buffer
1328  * zone in case the number of entries in the leaf is corrupt.
1329  * 99 is the maximum number of entries that can fit in a single
1330  * leaf block.
1331  */
1332  larr = gfs2_alloc_sort_buffer((leaves + entries + 99) * sizeof(void *));
1333  if (!larr)
1334  goto out;
1335  darr = (const struct gfs2_dirent **)(larr + leaves);
1336  g.pdent = darr;
1337  g.offset = 0;
1338  lfn = leaf_no;
1339 
1340  do {
1341  error = get_leaf(ip, lfn, &bh);
1342  if (error)
1343  goto out_free;
1344  lf = (struct gfs2_leaf *)bh->b_data;
1345  lfn = be64_to_cpu(lf->lf_next);
1346  if (lf->lf_entries) {
1347  entries2 += be16_to_cpu(lf->lf_entries);
1348  dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size,
1349  gfs2_dirent_gather, NULL, &g);
1350  error = PTR_ERR(dent);
1351  if (IS_ERR(dent))
1352  goto out_free;
1353  if (entries2 != g.offset) {
1354  fs_warn(sdp, "Number of entries corrupt in dir "
1355  "leaf %llu, entries2 (%u) != "
1356  "g.offset (%u)\n",
1357  (unsigned long long)bh->b_blocknr,
1358  entries2, g.offset);
1359 
1360  error = -EIO;
1361  goto out_free;
1362  }
1363  error = 0;
1364  larr[leaf++] = bh;
1365  } else {
1366  brelse(bh);
1367  }
1368  } while(lfn);
1369 
1370  BUG_ON(entries2 != entries);
1371  error = do_filldir_main(ip, offset, opaque, filldir, darr,
1372  entries, copied);
1373 out_free:
1374  for(i = 0; i < leaf; i++)
1375  brelse(larr[i]);
1376  gfs2_free_sort_buffer(larr);
1377 out:
1378  return error;
1379 }
1380 
1389 static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index,
1390  struct file_ra_state *f_ra)
1391 {
1392  struct gfs2_inode *ip = GFS2_I(inode);
1393  struct gfs2_glock *gl = ip->i_gl;
1394  struct buffer_head *bh;
1395  u64 blocknr = 0, last;
1396  unsigned count;
1397 
1398  /* First check if we've already read-ahead for the whole range. */
1399  if (index + MAX_RA_BLOCKS < f_ra->start)
1400  return;
1401 
1402  f_ra->start = max((pgoff_t)index, f_ra->start);
1403  for (count = 0; count < MAX_RA_BLOCKS; count++) {
1404  if (f_ra->start >= hsize) /* if exceeded the hash table */
1405  break;
1406 
1407  last = blocknr;
1408  blocknr = be64_to_cpu(ip->i_hash_cache[f_ra->start]);
1409  f_ra->start++;
1410  if (blocknr == last)
1411  continue;
1412 
1413  bh = gfs2_getbuf(gl, blocknr, 1);
1414  if (trylock_buffer(bh)) {
1415  if (buffer_uptodate(bh)) {
1416  unlock_buffer(bh);
1417  brelse(bh);
1418  continue;
1419  }
1420  bh->b_end_io = end_buffer_read_sync;
1421  submit_bh(READA | REQ_META, bh);
1422  continue;
1423  }
1424  brelse(bh);
1425  }
1426 }
1427 
1438 static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
1439  filldir_t filldir, struct file_ra_state *f_ra)
1440 {
1441  struct gfs2_inode *dip = GFS2_I(inode);
1442  u32 hsize, len = 0;
1443  u32 hash, index;
1444  __be64 *lp;
1445  int copied = 0;
1446  int error = 0;
1447  unsigned depth = 0;
1448 
1449  hsize = 1 << dip->i_depth;
1450  hash = gfs2_dir_offset2hash(*offset);
1451  index = hash >> (32 - dip->i_depth);
1452 
1453  if (dip->i_hash_cache == NULL)
1454  f_ra->start = 0;
1455  lp = gfs2_dir_get_hash_table(dip);
1456  if (IS_ERR(lp))
1457  return PTR_ERR(lp);
1458 
1459  gfs2_dir_readahead(inode, hsize, index, f_ra);
1460 
1461  while (index < hsize) {
1462  error = gfs2_dir_read_leaf(inode, offset, opaque, filldir,
1463  &copied, &depth,
1464  be64_to_cpu(lp[index]));
1465  if (error)
1466  break;
1467 
1468  len = 1 << (dip->i_depth - depth);
1469  index = (index & ~(len - 1)) + len;
1470  }
1471 
1472  if (error > 0)
1473  error = 0;
1474  return error;
1475 }
1476 
1477 int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
1478  filldir_t filldir, struct file_ra_state *f_ra)
1479 {
1480  struct gfs2_inode *dip = GFS2_I(inode);
1481  struct gfs2_sbd *sdp = GFS2_SB(inode);
1482  struct dirent_gather g;
1483  const struct gfs2_dirent **darr, *dent;
1484  struct buffer_head *dibh;
1485  int copied = 0;
1486  int error;
1487 
1488  if (!dip->i_entries)
1489  return 0;
1490 
1491  if (dip->i_diskflags & GFS2_DIF_EXHASH)
1492  return dir_e_read(inode, offset, opaque, filldir, f_ra);
1493 
1494  if (!gfs2_is_stuffed(dip)) {
1495  gfs2_consist_inode(dip);
1496  return -EIO;
1497  }
1498 
1499  error = gfs2_meta_inode_buffer(dip, &dibh);
1500  if (error)
1501  return error;
1502 
1503  error = -ENOMEM;
1504  /* 96 is max number of dirents which can be stuffed into an inode */
1505  darr = kmalloc(96 * sizeof(struct gfs2_dirent *), GFP_NOFS);
1506  if (darr) {
1507  g.pdent = darr;
1508  g.offset = 0;
1509  dent = gfs2_dirent_scan(inode, dibh->b_data, dibh->b_size,
1510  gfs2_dirent_gather, NULL, &g);
1511  if (IS_ERR(dent)) {
1512  error = PTR_ERR(dent);
1513  goto out;
1514  }
1515  if (dip->i_entries != g.offset) {
1516  fs_warn(sdp, "Number of entries corrupt in dir %llu, "
1517  "ip->i_entries (%u) != g.offset (%u)\n",
1518  (unsigned long long)dip->i_no_addr,
1519  dip->i_entries,
1520  g.offset);
1521  error = -EIO;
1522  goto out;
1523  }
1524  error = do_filldir_main(dip, offset, opaque, filldir, darr,
1525  dip->i_entries, &copied);
1526 out:
1527  kfree(darr);
1528  }
1529 
1530  if (error > 0)
1531  error = 0;
1532 
1533  brelse(dibh);
1534 
1535  return error;
1536 }
1537 
1550 struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
1551 {
1552  struct buffer_head *bh;
1553  struct gfs2_dirent *dent;
1554  struct inode *inode;
1555 
1556  dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
1557  if (dent) {
1558  if (IS_ERR(dent))
1559  return ERR_CAST(dent);
1560  inode = gfs2_inode_lookup(dir->i_sb,
1561  be16_to_cpu(dent->de_type),
1562  be64_to_cpu(dent->de_inum.no_addr),
1563  be64_to_cpu(dent->de_inum.no_formal_ino), 0);
1564  brelse(bh);
1565  return inode;
1566  }
1567  return ERR_PTR(-ENOENT);
1568 }
1569 
1570 int gfs2_dir_check(struct inode *dir, const struct qstr *name,
1571  const struct gfs2_inode *ip)
1572 {
1573  struct buffer_head *bh;
1574  struct gfs2_dirent *dent;
1575  int ret = -ENOENT;
1576 
1577  dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
1578  if (dent) {
1579  if (IS_ERR(dent))
1580  return PTR_ERR(dent);
1581  if (ip) {
1582  if (be64_to_cpu(dent->de_inum.no_addr) != ip->i_no_addr)
1583  goto out;
1584  if (be64_to_cpu(dent->de_inum.no_formal_ino) !=
1585  ip->i_no_formal_ino)
1586  goto out;
1587  if (unlikely(IF2DT(ip->i_inode.i_mode) !=
1588  be16_to_cpu(dent->de_type))) {
1589  gfs2_consist_inode(GFS2_I(dir));
1590  ret = -EIO;
1591  goto out;
1592  }
1593  }
1594  ret = 0;
1595 out:
1596  brelse(bh);
1597  }
1598  return ret;
1599 }
1600 
1601 static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1602 {
1603  struct buffer_head *bh, *obh;
1604  struct gfs2_inode *ip = GFS2_I(inode);
1605  struct gfs2_leaf *leaf, *oleaf;
1606  int error;
1607  u32 index;
1608  u64 bn;
1609 
1610  index = name->hash >> (32 - ip->i_depth);
1611  error = get_first_leaf(ip, index, &obh);
1612  if (error)
1613  return error;
1614  do {
1615  oleaf = (struct gfs2_leaf *)obh->b_data;
1616  bn = be64_to_cpu(oleaf->lf_next);
1617  if (!bn)
1618  break;
1619  brelse(obh);
1620  error = get_leaf(ip, bn, &obh);
1621  if (error)
1622  return error;
1623  } while(1);
1624 
1625  gfs2_trans_add_bh(ip->i_gl, obh, 1);
1626 
1627  leaf = new_leaf(inode, &bh, be16_to_cpu(oleaf->lf_depth));
1628  if (!leaf) {
1629  brelse(obh);
1630  return -ENOSPC;
1631  }
1632  oleaf->lf_next = cpu_to_be64(bh->b_blocknr);
1633  brelse(bh);
1634  brelse(obh);
1635 
1636  error = gfs2_meta_inode_buffer(ip, &bh);
1637  if (error)
1638  return error;
1639  gfs2_trans_add_bh(ip->i_gl, bh, 1);
1640  gfs2_add_inode_blocks(&ip->i_inode, 1);
1641  gfs2_dinode_out(ip, bh->b_data);
1642  brelse(bh);
1643  return 0;
1644 }
1645 
1656 int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1657  const struct gfs2_inode *nip)
1658 {
1659  struct gfs2_inode *ip = GFS2_I(inode);
1660  struct buffer_head *bh;
1661  struct gfs2_dirent *dent;
1662  struct gfs2_leaf *leaf;
1663  int error;
1664 
1665  while(1) {
1666  dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space,
1667  &bh);
1668  if (dent) {
1669  if (IS_ERR(dent))
1670  return PTR_ERR(dent);
1671  dent = gfs2_init_dirent(inode, dent, name, bh);
1672  gfs2_inum_out(nip, dent);
1673  dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode));
1674  if (ip->i_diskflags & GFS2_DIF_EXHASH) {
1675  leaf = (struct gfs2_leaf *)bh->b_data;
1676  be16_add_cpu(&leaf->lf_entries, 1);
1677  }
1678  brelse(bh);
1679  error = gfs2_meta_inode_buffer(ip, &bh);
1680  if (error)
1681  break;
1682  gfs2_trans_add_bh(ip->i_gl, bh, 1);
1683  ip->i_entries++;
1684  ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1685  if (S_ISDIR(nip->i_inode.i_mode))
1686  inc_nlink(&ip->i_inode);
1687  gfs2_dinode_out(ip, bh->b_data);
1688  brelse(bh);
1689  error = 0;
1690  break;
1691  }
1692  if (!(ip->i_diskflags & GFS2_DIF_EXHASH)) {
1693  error = dir_make_exhash(inode);
1694  if (error)
1695  break;
1696  continue;
1697  }
1698  error = dir_split_leaf(inode, name);
1699  if (error == 0)
1700  continue;
1701  if (error < 0)
1702  break;
1703  if (ip->i_depth < GFS2_DIR_MAX_DEPTH) {
1704  error = dir_double_exhash(ip);
1705  if (error)
1706  break;
1707  error = dir_split_leaf(inode, name);
1708  if (error < 0)
1709  break;
1710  if (error == 0)
1711  continue;
1712  }
1713  error = dir_new_leaf(inode, name);
1714  if (!error)
1715  continue;
1716  error = -ENOSPC;
1717  break;
1718  }
1719  return error;
1720 }
1721 
1722 
1731 int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
1732 {
1733  const struct qstr *name = &dentry->d_name;
1734  struct gfs2_dirent *dent, *prev = NULL;
1735  struct buffer_head *bh;
1736 
1737  /* Returns _either_ the entry (if its first in block) or the
1738  previous entry otherwise */
1739  dent = gfs2_dirent_search(&dip->i_inode, name, gfs2_dirent_prev, &bh);
1740  if (!dent) {
1741  gfs2_consist_inode(dip);
1742  return -EIO;
1743  }
1744  if (IS_ERR(dent)) {
1745  gfs2_consist_inode(dip);
1746  return PTR_ERR(dent);
1747  }
1748  /* If not first in block, adjust pointers accordingly */
1749  if (gfs2_dirent_find(dent, name, NULL) == 0) {
1750  prev = dent;
1751  dent = (struct gfs2_dirent *)((char *)dent + be16_to_cpu(prev->de_rec_len));
1752  }
1753 
1754  dirent_del(dip, bh, prev, dent);
1755  if (dip->i_diskflags & GFS2_DIF_EXHASH) {
1756  struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
1757  u16 entries = be16_to_cpu(leaf->lf_entries);
1758  if (!entries)
1759  gfs2_consist_inode(dip);
1760  leaf->lf_entries = cpu_to_be16(--entries);
1761  }
1762  brelse(bh);
1763 
1764  if (!dip->i_entries)
1765  gfs2_consist_inode(dip);
1766  dip->i_entries--;
1767  dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
1768  if (S_ISDIR(dentry->d_inode->i_mode))
1769  drop_nlink(&dip->i_inode);
1770  mark_inode_dirty(&dip->i_inode);
1771 
1772  return 0;
1773 }
1774 
1788 int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1789  const struct gfs2_inode *nip, unsigned int new_type)
1790 {
1791  struct buffer_head *bh;
1792  struct gfs2_dirent *dent;
1793  int error;
1794 
1795  dent = gfs2_dirent_search(&dip->i_inode, filename, gfs2_dirent_find, &bh);
1796  if (!dent) {
1797  gfs2_consist_inode(dip);
1798  return -EIO;
1799  }
1800  if (IS_ERR(dent))
1801  return PTR_ERR(dent);
1802 
1803  gfs2_trans_add_bh(dip->i_gl, bh, 1);
1804  gfs2_inum_out(nip, dent);
1805  dent->de_type = cpu_to_be16(new_type);
1806 
1807  if (dip->i_diskflags & GFS2_DIF_EXHASH) {
1808  brelse(bh);
1809  error = gfs2_meta_inode_buffer(dip, &bh);
1810  if (error)
1811  return error;
1812  gfs2_trans_add_bh(dip->i_gl, bh, 1);
1813  }
1814 
1815  dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
1816  gfs2_dinode_out(dip, bh->b_data);
1817  brelse(bh);
1818  return 0;
1819 }
1820 
1833 static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1834  u64 leaf_no, struct buffer_head *leaf_bh,
1835  int last_dealloc)
1836 {
1837  struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1838  struct gfs2_leaf *tmp_leaf;
1839  struct gfs2_rgrp_list rlist;
1840  struct buffer_head *bh, *dibh;
1841  u64 blk, nblk;
1842  unsigned int rg_blocks = 0, l_blocks = 0;
1843  char *ht;
1844  unsigned int x, size = len * sizeof(u64);
1845  int error;
1846 
1847  error = gfs2_rindex_update(sdp);
1848  if (error)
1849  return error;
1850 
1851  memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
1852 
1853  ht = kzalloc(size, GFP_NOFS);
1854  if (!ht)
1855  return -ENOMEM;
1856 
1858  if (error)
1859  goto out;
1860 
1861  /* Count the number of leaves */
1862  bh = leaf_bh;
1863 
1864  for (blk = leaf_no; blk; blk = nblk) {
1865  if (blk != leaf_no) {
1866  error = get_leaf(dip, blk, &bh);
1867  if (error)
1868  goto out_rlist;
1869  }
1870  tmp_leaf = (struct gfs2_leaf *)bh->b_data;
1871  nblk = be64_to_cpu(tmp_leaf->lf_next);
1872  if (blk != leaf_no)
1873  brelse(bh);
1874 
1875  gfs2_rlist_add(dip, &rlist, blk);
1876  l_blocks++;
1877  }
1878 
1880 
1881  for (x = 0; x < rlist.rl_rgrps; x++) {
1882  struct gfs2_rgrpd *rgd;
1883  rgd = rlist.rl_ghs[x].gh_gl->gl_object;
1884  rg_blocks += rgd->rd_length;
1885  }
1886 
1887  error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
1888  if (error)
1889  goto out_rlist;
1890 
1891  error = gfs2_trans_begin(sdp,
1892  rg_blocks + (DIV_ROUND_UP(size, sdp->sd_jbsize) + 1) +
1893  RES_DINODE + RES_STATFS + RES_QUOTA, l_blocks);
1894  if (error)
1895  goto out_rg_gunlock;
1896 
1897  bh = leaf_bh;
1898 
1899  for (blk = leaf_no; blk; blk = nblk) {
1900  if (blk != leaf_no) {
1901  error = get_leaf(dip, blk, &bh);
1902  if (error)
1903  goto out_end_trans;
1904  }
1905  tmp_leaf = (struct gfs2_leaf *)bh->b_data;
1906  nblk = be64_to_cpu(tmp_leaf->lf_next);
1907  if (blk != leaf_no)
1908  brelse(bh);
1909 
1910  gfs2_free_meta(dip, blk, 1);
1911  gfs2_add_inode_blocks(&dip->i_inode, -1);
1912  }
1913 
1914  error = gfs2_dir_write_data(dip, ht, index * sizeof(u64), size);
1915  if (error != size) {
1916  if (error >= 0)
1917  error = -EIO;
1918  goto out_end_trans;
1919  }
1920 
1921  error = gfs2_meta_inode_buffer(dip, &dibh);
1922  if (error)
1923  goto out_end_trans;
1924 
1925  gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1926  /* On the last dealloc, make this a regular file in case we crash.
1927  (We don't want to free these blocks a second time.) */
1928  if (last_dealloc)
1929  dip->i_inode.i_mode = S_IFREG;
1930  gfs2_dinode_out(dip, dibh->b_data);
1931  brelse(dibh);
1932 
1933 out_end_trans:
1934  gfs2_trans_end(sdp);
1935 out_rg_gunlock:
1936  gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
1937 out_rlist:
1939  gfs2_quota_unhold(dip);
1940 out:
1941  kfree(ht);
1942  return error;
1943 }
1944 
1956 {
1957  struct buffer_head *bh;
1958  struct gfs2_leaf *leaf;
1959  u32 hsize, len;
1960  u32 index = 0, next_index;
1961  __be64 *lp;
1962  u64 leaf_no;
1963  int error = 0, last;
1964 
1965  hsize = 1 << dip->i_depth;
1966 
1967  lp = gfs2_dir_get_hash_table(dip);
1968  if (IS_ERR(lp))
1969  return PTR_ERR(lp);
1970 
1971  while (index < hsize) {
1972  leaf_no = be64_to_cpu(lp[index]);
1973  if (leaf_no) {
1974  error = get_leaf(dip, leaf_no, &bh);
1975  if (error)
1976  goto out;
1977  leaf = (struct gfs2_leaf *)bh->b_data;
1978  len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth));
1979 
1980  next_index = (index & ~(len - 1)) + len;
1981  last = ((next_index >= hsize) ? 1 : 0);
1982  error = leaf_dealloc(dip, index, len, leaf_no, bh,
1983  last);
1984  brelse(bh);
1985  if (error)
1986  goto out;
1987  index = next_index;
1988  } else
1989  index++;
1990  }
1991 
1992  if (index != hsize) {
1993  gfs2_consist_inode(dip);
1994  error = -EIO;
1995  }
1996 
1997 out:
1998 
1999  return error;
2000 }
2001 
2010 int gfs2_diradd_alloc_required(struct inode *inode, const struct qstr *name)
2011 {
2012  struct gfs2_dirent *dent;
2013  struct buffer_head *bh;
2014 
2015  dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, &bh);
2016  if (!dent) {
2017  return 1;
2018  }
2019  if (IS_ERR(dent))
2020  return PTR_ERR(dent);
2021  brelse(bh);
2022  return 0;
2023 }
2024