Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
xfs_dir2_leaf.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_ag.h"
26 #include "xfs_mount.h"
27 #include "xfs_da_btree.h"
28 #include "xfs_bmap_btree.h"
29 #include "xfs_dinode.h"
30 #include "xfs_inode.h"
31 #include "xfs_bmap.h"
32 #include "xfs_dir2_format.h"
33 #include "xfs_dir2_priv.h"
34 #include "xfs_error.h"
35 #include "xfs_trace.h"
36 
37 /*
38  * Local function declarations.
39  */
40 #ifdef DEBUG
41 static void xfs_dir2_leaf_check(struct xfs_inode *dp, struct xfs_buf *bp);
42 #else
43 #define xfs_dir2_leaf_check(dp, bp)
44 #endif
45 static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp,
46  int *indexp, struct xfs_buf **dbpp);
47 static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_buf *bp,
48  int first, int last);
49 static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp);
50 
51 
52 /*
53  * Convert a block form directory to a leaf form directory.
54  */
55 int /* error */
57  xfs_da_args_t *args, /* operation arguments */
58  struct xfs_buf *dbp) /* input block's buffer */
59 {
60  __be16 *bestsp; /* leaf's bestsp entries */
61  xfs_dablk_t blkno; /* leaf block's bno */
62  xfs_dir2_data_hdr_t *hdr; /* block header */
63  xfs_dir2_leaf_entry_t *blp; /* block's leaf entries */
64  xfs_dir2_block_tail_t *btp; /* block's tail */
65  xfs_inode_t *dp; /* incore directory inode */
66  int error; /* error return code */
67  struct xfs_buf *lbp; /* leaf block's buffer */
68  xfs_dir2_db_t ldb; /* leaf block's bno */
69  xfs_dir2_leaf_t *leaf; /* leaf structure */
70  xfs_dir2_leaf_tail_t *ltp; /* leaf's tail */
71  xfs_mount_t *mp; /* filesystem mount point */
72  int needlog; /* need to log block header */
73  int needscan; /* need to rescan bestfree */
74  xfs_trans_t *tp; /* transaction pointer */
75 
76  trace_xfs_dir2_block_to_leaf(args);
77 
78  dp = args->dp;
79  mp = dp->i_mount;
80  tp = args->trans;
81  /*
82  * Add the leaf block to the inode.
83  * This interface will only put blocks in the leaf/node range.
84  * Since that's empty now, we'll get the root (block 0 in range).
85  */
86  if ((error = xfs_da_grow_inode(args, &blkno))) {
87  return error;
88  }
89  ldb = xfs_dir2_da_to_db(mp, blkno);
90  ASSERT(ldb == XFS_DIR2_LEAF_FIRSTDB(mp));
91  /*
92  * Initialize the leaf block, get a buffer for it.
93  */
94  if ((error = xfs_dir2_leaf_init(args, ldb, &lbp, XFS_DIR2_LEAF1_MAGIC))) {
95  return error;
96  }
97  ASSERT(lbp != NULL);
98  leaf = lbp->b_addr;
99  hdr = dbp->b_addr;
100  xfs_dir2_data_check(dp, dbp);
101  btp = xfs_dir2_block_tail_p(mp, hdr);
102  blp = xfs_dir2_block_leaf_p(btp);
103  /*
104  * Set the counts in the leaf header.
105  */
106  leaf->hdr.count = cpu_to_be16(be32_to_cpu(btp->count));
107  leaf->hdr.stale = cpu_to_be16(be32_to_cpu(btp->stale));
108  /*
109  * Could compact these but I think we always do the conversion
110  * after squeezing out stale entries.
111  */
112  memcpy(leaf->ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t));
113  xfs_dir2_leaf_log_ents(tp, lbp, 0, be16_to_cpu(leaf->hdr.count) - 1);
114  needscan = 0;
115  needlog = 1;
116  /*
117  * Make the space formerly occupied by the leaf entries and block
118  * tail be free.
119  */
120  xfs_dir2_data_make_free(tp, dbp,
121  (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
122  (xfs_dir2_data_aoff_t)((char *)hdr + mp->m_dirblksize -
123  (char *)blp),
124  &needlog, &needscan);
125  /*
126  * Fix up the block header, make it a data block.
127  */
129  if (needscan)
130  xfs_dir2_data_freescan(mp, hdr, &needlog);
131  /*
132  * Set up leaf tail and bests table.
133  */
134  ltp = xfs_dir2_leaf_tail_p(mp, leaf);
135  ltp->bestcount = cpu_to_be32(1);
136  bestsp = xfs_dir2_leaf_bests_p(ltp);
137  bestsp[0] = hdr->bestfree[0].length;
138  /*
139  * Log the data header and leaf bests table.
140  */
141  if (needlog)
142  xfs_dir2_data_log_header(tp, dbp);
143  xfs_dir2_leaf_check(dp, lbp);
144  xfs_dir2_data_check(dp, dbp);
145  xfs_dir2_leaf_log_bests(tp, lbp, 0, 0);
146  return 0;
147 }
148 
149 STATIC void
151  struct xfs_dir2_leaf *leaf,
152  int index,
153  int *lowstale,
154  int *highstale)
155 {
156  /*
157  * Find the first stale entry before our index, if any.
158  */
159  for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) {
160  if (leaf->ents[*lowstale].address ==
162  break;
163  }
164 
165  /*
166  * Find the first stale entry at or after our index, if any.
167  * Stop if the result would require moving more entries than using
168  * lowstale.
169  */
170  for (*highstale = index;
171  *highstale < be16_to_cpu(leaf->hdr.count);
172  ++*highstale) {
173  if (leaf->ents[*highstale].address ==
175  break;
176  if (*lowstale >= 0 && index - *lowstale <= *highstale - index)
177  break;
178  }
179 }
180 
181 struct xfs_dir2_leaf_entry *
183  xfs_dir2_leaf_t *leaf, /* leaf structure */
184  int index, /* leaf table position */
185  int compact, /* need to compact leaves */
186  int lowstale, /* index of prev stale leaf */
187  int highstale, /* index of next stale leaf */
188  int *lfloglow, /* low leaf logging index */
189  int *lfloghigh) /* high leaf logging index */
190 {
191  if (!leaf->hdr.stale) {
192  xfs_dir2_leaf_entry_t *lep; /* leaf entry table pointer */
193 
194  /*
195  * Now we need to make room to insert the leaf entry.
196  *
197  * If there are no stale entries, just insert a hole at index.
198  */
199  lep = &leaf->ents[index];
200  if (index < be16_to_cpu(leaf->hdr.count))
201  memmove(lep + 1, lep,
202  (be16_to_cpu(leaf->hdr.count) - index) *
203  sizeof(*lep));
204 
205  /*
206  * Record low and high logging indices for the leaf.
207  */
208  *lfloglow = index;
209  *lfloghigh = be16_to_cpu(leaf->hdr.count);
210  be16_add_cpu(&leaf->hdr.count, 1);
211  return lep;
212  }
213 
214  /*
215  * There are stale entries.
216  *
217  * We will use one of them for the new entry. It's probably not at
218  * the right location, so we'll have to shift some up or down first.
219  *
220  * If we didn't compact before, we need to find the nearest stale
221  * entries before and after our insertion point.
222  */
223  if (compact == 0)
224  xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale);
225 
226  /*
227  * If the low one is better, use it.
228  */
229  if (lowstale >= 0 &&
230  (highstale == be16_to_cpu(leaf->hdr.count) ||
231  index - lowstale - 1 < highstale - index)) {
232  ASSERT(index - lowstale - 1 >= 0);
233  ASSERT(leaf->ents[lowstale].address ==
235 
236  /*
237  * Copy entries up to cover the stale entry and make room
238  * for the new entry.
239  */
240  if (index - lowstale - 1 > 0) {
241  memmove(&leaf->ents[lowstale],
242  &leaf->ents[lowstale + 1],
243  (index - lowstale - 1) *
244  sizeof(xfs_dir2_leaf_entry_t));
245  }
246  *lfloglow = MIN(lowstale, *lfloglow);
247  *lfloghigh = MAX(index - 1, *lfloghigh);
248  be16_add_cpu(&leaf->hdr.stale, -1);
249  return &leaf->ents[index - 1];
250  }
251 
252  /*
253  * The high one is better, so use that one.
254  */
255  ASSERT(highstale - index >= 0);
256  ASSERT(leaf->ents[highstale].address ==
258 
259  /*
260  * Copy entries down to cover the stale entry and make room for the
261  * new entry.
262  */
263  if (highstale - index > 0) {
264  memmove(&leaf->ents[index + 1],
265  &leaf->ents[index],
266  (highstale - index) * sizeof(xfs_dir2_leaf_entry_t));
267  }
268  *lfloglow = MIN(index, *lfloglow);
269  *lfloghigh = MAX(highstale, *lfloghigh);
270  be16_add_cpu(&leaf->hdr.stale, -1);
271  return &leaf->ents[index];
272 }
273 
274 /*
275  * Add an entry to a leaf form directory.
276  */
277 int /* error */
279  xfs_da_args_t *args) /* operation arguments */
280 {
281  __be16 *bestsp; /* freespace table in leaf */
282  int compact; /* need to compact leaves */
283  xfs_dir2_data_hdr_t *hdr; /* data block header */
284  struct xfs_buf *dbp; /* data block buffer */
285  xfs_dir2_data_entry_t *dep; /* data block entry */
286  xfs_inode_t *dp; /* incore directory inode */
287  xfs_dir2_data_unused_t *dup; /* data unused entry */
288  int error; /* error return value */
289  int grown; /* allocated new data block */
290  int highstale; /* index of next stale leaf */
291  int i; /* temporary, index */
292  int index; /* leaf table position */
293  struct xfs_buf *lbp; /* leaf's buffer */
294  xfs_dir2_leaf_t *leaf; /* leaf structure */
295  int length; /* length of new entry */
296  xfs_dir2_leaf_entry_t *lep; /* leaf entry table pointer */
297  int lfloglow; /* low leaf logging index */
298  int lfloghigh; /* high leaf logging index */
299  int lowstale; /* index of prev stale leaf */
300  xfs_dir2_leaf_tail_t *ltp; /* leaf tail pointer */
301  xfs_mount_t *mp; /* filesystem mount point */
302  int needbytes; /* leaf block bytes needed */
303  int needlog; /* need to log data header */
304  int needscan; /* need to rescan data free */
305  __be16 *tagp; /* end of data entry */
306  xfs_trans_t *tp; /* transaction pointer */
307  xfs_dir2_db_t use_block; /* data block number */
308 
309  trace_xfs_dir2_leaf_addname(args);
310 
311  dp = args->dp;
312  tp = args->trans;
313  mp = dp->i_mount;
314  /*
315  * Read the leaf block.
316  */
317  error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
318  XFS_DATA_FORK);
319  if (error) {
320  return error;
321  }
322  ASSERT(lbp != NULL);
323  /*
324  * Look up the entry by hash value and name.
325  * We know it's not there, our caller has already done a lookup.
326  * So the index is of the entry to insert in front of.
327  * But if there are dup hash values the index is of the first of those.
328  */
329  index = xfs_dir2_leaf_search_hash(args, lbp);
330  leaf = lbp->b_addr;
331  ltp = xfs_dir2_leaf_tail_p(mp, leaf);
332  bestsp = xfs_dir2_leaf_bests_p(ltp);
333  length = xfs_dir2_data_entsize(args->namelen);
334  /*
335  * See if there are any entries with the same hash value
336  * and space in their block for the new entry.
337  * This is good because it puts multiple same-hash value entries
338  * in a data block, improving the lookup of those entries.
339  */
340  for (use_block = -1, lep = &leaf->ents[index];
341  index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval;
342  index++, lep++) {
344  continue;
345  i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
346  ASSERT(i < be32_to_cpu(ltp->bestcount));
347  ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF));
348  if (be16_to_cpu(bestsp[i]) >= length) {
349  use_block = i;
350  break;
351  }
352  }
353  /*
354  * Didn't find a block yet, linear search all the data blocks.
355  */
356  if (use_block == -1) {
357  for (i = 0; i < be32_to_cpu(ltp->bestcount); i++) {
358  /*
359  * Remember a block we see that's missing.
360  */
361  if (bestsp[i] == cpu_to_be16(NULLDATAOFF) &&
362  use_block == -1)
363  use_block = i;
364  else if (be16_to_cpu(bestsp[i]) >= length) {
365  use_block = i;
366  break;
367  }
368  }
369  }
370  /*
371  * How many bytes do we need in the leaf block?
372  */
373  needbytes = 0;
374  if (!leaf->hdr.stale)
375  needbytes += sizeof(xfs_dir2_leaf_entry_t);
376  if (use_block == -1)
377  needbytes += sizeof(xfs_dir2_data_off_t);
378 
379  /*
380  * Now kill use_block if it refers to a missing block, so we
381  * can use it as an indication of allocation needed.
382  */
383  if (use_block != -1 && bestsp[use_block] == cpu_to_be16(NULLDATAOFF))
384  use_block = -1;
385  /*
386  * If we don't have enough free bytes but we can make enough
387  * by compacting out stale entries, we'll do that.
388  */
389  if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <
390  needbytes && be16_to_cpu(leaf->hdr.stale) > 1) {
391  compact = 1;
392  }
393  /*
394  * Otherwise if we don't have enough free bytes we need to
395  * convert to node form.
396  */
397  else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(
398  leaf->hdr.count)] < needbytes) {
399  /*
400  * Just checking or no space reservation, give up.
401  */
402  if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
403  args->total == 0) {
404  xfs_trans_brelse(tp, lbp);
405  return XFS_ERROR(ENOSPC);
406  }
407  /*
408  * Convert to node form.
409  */
410  error = xfs_dir2_leaf_to_node(args, lbp);
411  if (error)
412  return error;
413  /*
414  * Then add the new entry.
415  */
416  return xfs_dir2_node_addname(args);
417  }
418  /*
419  * Otherwise it will fit without compaction.
420  */
421  else
422  compact = 0;
423  /*
424  * If just checking, then it will fit unless we needed to allocate
425  * a new data block.
426  */
427  if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
428  xfs_trans_brelse(tp, lbp);
429  return use_block == -1 ? XFS_ERROR(ENOSPC) : 0;
430  }
431  /*
432  * If no allocations are allowed, return now before we've
433  * changed anything.
434  */
435  if (args->total == 0 && use_block == -1) {
436  xfs_trans_brelse(tp, lbp);
437  return XFS_ERROR(ENOSPC);
438  }
439  /*
440  * Need to compact the leaf entries, removing stale ones.
441  * Leave one stale entry behind - the one closest to our
442  * insertion index - and we'll shift that one to our insertion
443  * point later.
444  */
445  if (compact) {
446  xfs_dir2_leaf_compact_x1(lbp, &index, &lowstale, &highstale,
447  &lfloglow, &lfloghigh);
448  }
449  /*
450  * There are stale entries, so we'll need log-low and log-high
451  * impossibly bad values later.
452  */
453  else if (be16_to_cpu(leaf->hdr.stale)) {
454  lfloglow = be16_to_cpu(leaf->hdr.count);
455  lfloghigh = -1;
456  }
457  /*
458  * If there was no data block space found, we need to allocate
459  * a new one.
460  */
461  if (use_block == -1) {
462  /*
463  * Add the new data block.
464  */
465  if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE,
466  &use_block))) {
467  xfs_trans_brelse(tp, lbp);
468  return error;
469  }
470  /*
471  * Initialize the block.
472  */
473  if ((error = xfs_dir2_data_init(args, use_block, &dbp))) {
474  xfs_trans_brelse(tp, lbp);
475  return error;
476  }
477  /*
478  * If we're adding a new data block on the end we need to
479  * extend the bests table. Copy it up one entry.
480  */
481  if (use_block >= be32_to_cpu(ltp->bestcount)) {
482  bestsp--;
483  memmove(&bestsp[0], &bestsp[1],
484  be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0]));
485  be32_add_cpu(&ltp->bestcount, 1);
486  xfs_dir2_leaf_log_tail(tp, lbp);
487  xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
488  }
489  /*
490  * If we're filling in a previously empty block just log it.
491  */
492  else
493  xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
494  hdr = dbp->b_addr;
495  bestsp[use_block] = hdr->bestfree[0].length;
496  grown = 1;
497  }
498  /*
499  * Already had space in some data block.
500  * Just read that one in.
501  */
502  else {
503  if ((error =
504  xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, use_block),
505  -1, &dbp, XFS_DATA_FORK))) {
506  xfs_trans_brelse(tp, lbp);
507  return error;
508  }
509  hdr = dbp->b_addr;
510  grown = 0;
511  }
512  xfs_dir2_data_check(dp, dbp);
513  /*
514  * Point to the biggest freespace in our data block.
515  */
516  dup = (xfs_dir2_data_unused_t *)
517  ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset));
518  ASSERT(be16_to_cpu(dup->length) >= length);
519  needscan = needlog = 0;
520  /*
521  * Mark the initial part of our freespace in use for the new entry.
522  */
523  xfs_dir2_data_use_free(tp, dbp, dup,
524  (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
525  &needlog, &needscan);
526  /*
527  * Initialize our new entry (at last).
528  */
529  dep = (xfs_dir2_data_entry_t *)dup;
530  dep->inumber = cpu_to_be64(args->inumber);
531  dep->namelen = args->namelen;
532  memcpy(dep->name, args->name, dep->namelen);
533  tagp = xfs_dir2_data_entry_tag_p(dep);
534  *tagp = cpu_to_be16((char *)dep - (char *)hdr);
535  /*
536  * Need to scan fix up the bestfree table.
537  */
538  if (needscan)
539  xfs_dir2_data_freescan(mp, hdr, &needlog);
540  /*
541  * Need to log the data block's header.
542  */
543  if (needlog)
544  xfs_dir2_data_log_header(tp, dbp);
545  xfs_dir2_data_log_entry(tp, dbp, dep);
546  /*
547  * If the bests table needs to be changed, do it.
548  * Log the change unless we've already done that.
549  */
550  if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(hdr->bestfree[0].length)) {
551  bestsp[use_block] = hdr->bestfree[0].length;
552  if (!grown)
553  xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
554  }
555 
556  lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale,
557  highstale, &lfloglow, &lfloghigh);
558 
559  /*
560  * Fill in the new leaf entry.
561  */
562  lep->hashval = cpu_to_be32(args->hashval);
563  lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, use_block,
564  be16_to_cpu(*tagp)));
565  /*
566  * Log the leaf fields and give up the buffers.
567  */
568  xfs_dir2_leaf_log_header(tp, lbp);
569  xfs_dir2_leaf_log_ents(tp, lbp, lfloglow, lfloghigh);
570  xfs_dir2_leaf_check(dp, lbp);
571  xfs_dir2_data_check(dp, dbp);
572  return 0;
573 }
574 
575 #ifdef DEBUG
576 /*
577  * Check the internal consistency of a leaf1 block.
578  * Pop an assert if something is wrong.
579  */
580 STATIC void
582  struct xfs_inode *dp, /* incore directory inode */
583  struct xfs_buf *bp) /* leaf's buffer */
584 {
585  int i; /* leaf index */
586  xfs_dir2_leaf_t *leaf; /* leaf structure */
587  xfs_dir2_leaf_tail_t *ltp; /* leaf tail pointer */
588  xfs_mount_t *mp; /* filesystem mount point */
589  int stale; /* count of stale leaves */
590 
591  leaf = bp->b_addr;
592  mp = dp->i_mount;
594  /*
595  * This value is not restrictive enough.
596  * Should factor in the size of the bests table as well.
597  * We can deduce a value for that from di_size.
598  */
599  ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
600  ltp = xfs_dir2_leaf_tail_p(mp, leaf);
601  /*
602  * Leaves and bests don't overlap.
603  */
604  ASSERT((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <=
605  (char *)xfs_dir2_leaf_bests_p(ltp));
606  /*
607  * Check hash value order, count stale entries.
608  */
609  for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
610  if (i + 1 < be16_to_cpu(leaf->hdr.count))
611  ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
612  be32_to_cpu(leaf->ents[i + 1].hashval));
614  stale++;
615  }
616  ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
617 }
618 #endif /* DEBUG */
619 
620 /*
621  * Compact out any stale entries in the leaf.
622  * Log the header and changed leaf entries, if any.
623  */
624 void
626  xfs_da_args_t *args, /* operation arguments */
627  struct xfs_buf *bp) /* leaf buffer */
628 {
629  int from; /* source leaf index */
630  xfs_dir2_leaf_t *leaf; /* leaf structure */
631  int loglow; /* first leaf entry to log */
632  int to; /* target leaf index */
633 
634  leaf = bp->b_addr;
635  if (!leaf->hdr.stale) {
636  return;
637  }
638  /*
639  * Compress out the stale entries in place.
640  */
641  for (from = to = 0, loglow = -1; from < be16_to_cpu(leaf->hdr.count); from++) {
642  if (leaf->ents[from].address ==
644  continue;
645  /*
646  * Only actually copy the entries that are different.
647  */
648  if (from > to) {
649  if (loglow == -1)
650  loglow = to;
651  leaf->ents[to] = leaf->ents[from];
652  }
653  to++;
654  }
655  /*
656  * Update and log the header, log the leaf entries.
657  */
658  ASSERT(be16_to_cpu(leaf->hdr.stale) == from - to);
659  be16_add_cpu(&leaf->hdr.count, -(be16_to_cpu(leaf->hdr.stale)));
660  leaf->hdr.stale = 0;
661  xfs_dir2_leaf_log_header(args->trans, bp);
662  if (loglow != -1)
663  xfs_dir2_leaf_log_ents(args->trans, bp, loglow, to - 1);
664 }
665 
666 /*
667  * Compact the leaf entries, removing stale ones.
668  * Leave one stale entry behind - the one closest to our
669  * insertion index - and the caller will shift that one to our insertion
670  * point later.
671  * Return new insertion index, where the remaining stale entry is,
672  * and leaf logging indices.
673  */
674 void
676  struct xfs_buf *bp, /* leaf buffer */
677  int *indexp, /* insertion index */
678  int *lowstalep, /* out: stale entry before us */
679  int *highstalep, /* out: stale entry after us */
680  int *lowlogp, /* out: low log index */
681  int *highlogp) /* out: high log index */
682 {
683  int from; /* source copy index */
684  int highstale; /* stale entry at/after index */
685  int index; /* insertion index */
686  int keepstale; /* source index of kept stale */
687  xfs_dir2_leaf_t *leaf; /* leaf structure */
688  int lowstale; /* stale entry before index */
689  int newindex=0; /* new insertion index */
690  int to; /* destination copy index */
691 
692  leaf = bp->b_addr;
693  ASSERT(be16_to_cpu(leaf->hdr.stale) > 1);
694  index = *indexp;
695 
696  xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale);
697 
698  /*
699  * Pick the better of lowstale and highstale.
700  */
701  if (lowstale >= 0 &&
702  (highstale == be16_to_cpu(leaf->hdr.count) ||
703  index - lowstale <= highstale - index))
704  keepstale = lowstale;
705  else
706  keepstale = highstale;
707  /*
708  * Copy the entries in place, removing all the stale entries
709  * except keepstale.
710  */
711  for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
712  /*
713  * Notice the new value of index.
714  */
715  if (index == from)
716  newindex = to;
717  if (from != keepstale &&
718  leaf->ents[from].address ==
720  if (from == to)
721  *lowlogp = to;
722  continue;
723  }
724  /*
725  * Record the new keepstale value for the insertion.
726  */
727  if (from == keepstale)
728  lowstale = highstale = to;
729  /*
730  * Copy only the entries that have moved.
731  */
732  if (from > to)
733  leaf->ents[to] = leaf->ents[from];
734  to++;
735  }
736  ASSERT(from > to);
737  /*
738  * If the insertion point was past the last entry,
739  * set the new insertion point accordingly.
740  */
741  if (index == from)
742  newindex = to;
743  *indexp = newindex;
744  /*
745  * Adjust the leaf header values.
746  */
747  be16_add_cpu(&leaf->hdr.count, -(from - to));
748  leaf->hdr.stale = cpu_to_be16(1);
749  /*
750  * Remember the low/high stale value only in the "right"
751  * direction.
752  */
753  if (lowstale >= newindex)
754  lowstale = -1;
755  else
756  highstale = be16_to_cpu(leaf->hdr.count);
757  *highlogp = be16_to_cpu(leaf->hdr.count) - 1;
758  *lowstalep = lowstale;
759  *highstalep = highstale;
760 }
761 
763  xfs_extlen_t map_blocks; /* number of fsbs in map */
764  xfs_dablk_t map_off; /* last mapped file offset */
765  int map_size; /* total entries in *map */
766  int map_valid; /* valid entries in *map */
767  int nmap; /* mappings to ask xfs_bmapi */
768  xfs_dir2_db_t curdb; /* db for current block */
769  int ra_current; /* number of read-ahead blks */
770  int ra_index; /* *map index for read-ahead */
771  int ra_offset; /* map entry offset for ra */
772  int ra_want; /* readahead count wanted */
773  struct xfs_bmbt_irec map[]; /* map vector for blocks */
774 };
775 
776 STATIC int
778  struct xfs_inode *dp,
779  size_t bufsize,
780  struct xfs_dir2_leaf_map_info *mip,
781  xfs_dir2_off_t *curoff,
782  struct xfs_buf **bpp)
783 {
784  struct xfs_mount *mp = dp->i_mount;
785  struct xfs_buf *bp = *bpp;
786  struct xfs_bmbt_irec *map = mip->map;
787  int error = 0;
788  int length;
789  int i;
790  int j;
791 
792  /*
793  * If we have a buffer, we need to release it and
794  * take it out of the mapping.
795  */
796 
797  if (bp) {
798  xfs_trans_brelse(NULL, bp);
799  bp = NULL;
800  mip->map_blocks -= mp->m_dirblkfsbs;
801  /*
802  * Loop to get rid of the extents for the
803  * directory block.
804  */
805  for (i = mp->m_dirblkfsbs; i > 0; ) {
806  j = min_t(int, map->br_blockcount, i);
807  map->br_blockcount -= j;
808  map->br_startblock += j;
809  map->br_startoff += j;
810  /*
811  * If mapping is done, pitch it from
812  * the table.
813  */
814  if (!map->br_blockcount && --mip->map_valid)
815  memmove(&map[0], &map[1],
816  sizeof(map[0]) * mip->map_valid);
817  i -= j;
818  }
819  }
820 
821  /*
822  * Recalculate the readahead blocks wanted.
823  */
824  mip->ra_want = howmany(bufsize + mp->m_dirblksize,
825  mp->m_sb.sb_blocksize) - 1;
826  ASSERT(mip->ra_want >= 0);
827 
828  /*
829  * If we don't have as many as we want, and we haven't
830  * run out of data blocks, get some more mappings.
831  */
832  if (1 + mip->ra_want > mip->map_blocks &&
833  mip->map_off < xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) {
834  /*
835  * Get more bmaps, fill in after the ones
836  * we already have in the table.
837  */
838  mip->nmap = mip->map_size - mip->map_valid;
839  error = xfs_bmapi_read(dp, mip->map_off,
840  xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET) -
841  mip->map_off,
842  &map[mip->map_valid], &mip->nmap, 0);
843 
844  /*
845  * Don't know if we should ignore this or try to return an
846  * error. The trouble with returning errors is that readdir
847  * will just stop without actually passing the error through.
848  */
849  if (error)
850  goto out; /* XXX */
851 
852  /*
853  * If we got all the mappings we asked for, set the final map
854  * offset based on the last bmap value received. Otherwise,
855  * we've reached the end.
856  */
857  if (mip->nmap == mip->map_size - mip->map_valid) {
858  i = mip->map_valid + mip->nmap - 1;
859  mip->map_off = map[i].br_startoff + map[i].br_blockcount;
860  } else
861  mip->map_off = xfs_dir2_byte_to_da(mp,
863 
864  /*
865  * Look for holes in the mapping, and eliminate them. Count up
866  * the valid blocks.
867  */
868  for (i = mip->map_valid; i < mip->map_valid + mip->nmap; ) {
869  if (map[i].br_startblock == HOLESTARTBLOCK) {
870  mip->nmap--;
871  length = mip->map_valid + mip->nmap - i;
872  if (length)
873  memmove(&map[i], &map[i + 1],
874  sizeof(map[i]) * length);
875  } else {
876  mip->map_blocks += map[i].br_blockcount;
877  i++;
878  }
879  }
880  mip->map_valid += mip->nmap;
881  }
882 
883  /*
884  * No valid mappings, so no more data blocks.
885  */
886  if (!mip->map_valid) {
887  *curoff = xfs_dir2_da_to_byte(mp, mip->map_off);
888  goto out;
889  }
890 
891  /*
892  * Read the directory block starting at the first mapping.
893  */
894  mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
895  error = xfs_da_read_buf(NULL, dp, map->br_startoff,
896  map->br_blockcount >= mp->m_dirblkfsbs ?
897  XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1,
898  &bp, XFS_DATA_FORK);
899 
900  /*
901  * Should just skip over the data block instead of giving up.
902  */
903  if (error)
904  goto out; /* XXX */
905 
906  /*
907  * Adjust the current amount of read-ahead: we just read a block that
908  * was previously ra.
909  */
910  if (mip->ra_current)
911  mip->ra_current -= mp->m_dirblkfsbs;
912 
913  /*
914  * Do we need more readahead?
915  */
916  for (mip->ra_index = mip->ra_offset = i = 0;
917  mip->ra_want > mip->ra_current && i < mip->map_blocks;
918  i += mp->m_dirblkfsbs) {
919  ASSERT(mip->ra_index < mip->map_valid);
920  /*
921  * Read-ahead a contiguous directory block.
922  */
923  if (i > mip->ra_current &&
924  map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) {
925  xfs_buf_readahead(mp->m_ddev_targp,
926  XFS_FSB_TO_DADDR(mp,
927  map[mip->ra_index].br_startblock +
928  mip->ra_offset),
929  (int)BTOBB(mp->m_dirblksize));
930  mip->ra_current = i;
931  }
932 
933  /*
934  * Read-ahead a non-contiguous directory block. This doesn't
935  * use our mapping, but this is a very rare case.
936  */
937  else if (i > mip->ra_current) {
939  map[mip->ra_index].br_startoff +
940  mip->ra_offset,
941  XFS_DATA_FORK);
942  mip->ra_current = i;
943  }
944 
945  /*
946  * Advance offset through the mapping table.
947  */
948  for (j = 0; j < mp->m_dirblkfsbs; j++) {
949  /*
950  * The rest of this extent but not more than a dir
951  * block.
952  */
953  length = min_t(int, mp->m_dirblkfsbs,
954  map[mip->ra_index].br_blockcount -
955  mip->ra_offset);
956  j += length;
957  mip->ra_offset += length;
958 
959  /*
960  * Advance to the next mapping if this one is used up.
961  */
962  if (mip->ra_offset == map[mip->ra_index].br_blockcount) {
963  mip->ra_offset = 0;
964  mip->ra_index++;
965  }
966  }
967  }
968 
969 out:
970  *bpp = bp;
971  return error;
972 }
973 
974 /*
975  * Getdents (readdir) for leaf and node directories.
976  * This reads the data blocks only, so is the same for both forms.
977  */
978 int /* error */
980  xfs_inode_t *dp, /* incore directory inode */
981  void *dirent,
982  size_t bufsize,
983  xfs_off_t *offset,
984  filldir_t filldir)
985 {
986  struct xfs_buf *bp = NULL; /* data block buffer */
987  xfs_dir2_data_hdr_t *hdr; /* data block header */
988  xfs_dir2_data_entry_t *dep; /* data entry */
989  xfs_dir2_data_unused_t *dup; /* unused entry */
990  int error = 0; /* error return value */
991  int length; /* temporary length value */
992  xfs_mount_t *mp; /* filesystem mount point */
993  int byteoff; /* offset in current block */
994  xfs_dir2_off_t curoff; /* current overall offset */
995  xfs_dir2_off_t newoff; /* new curoff after new blk */
996  char *ptr = NULL; /* pointer to current data */
998 
999  /*
1000  * If the offset is at or past the largest allowed value,
1001  * give up right away.
1002  */
1003  if (*offset >= XFS_DIR2_MAX_DATAPTR)
1004  return 0;
1005 
1006  mp = dp->i_mount;
1007 
1008  /*
1009  * Set up to bmap a number of blocks based on the caller's
1010  * buffer size, the directory block size, and the filesystem
1011  * block size.
1012  */
1013  length = howmany(bufsize + mp->m_dirblksize,
1014  mp->m_sb.sb_blocksize);
1015  map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) +
1016  (length * sizeof(struct xfs_bmbt_irec)),
1017  KM_SLEEP);
1018  map_info->map_size = length;
1019 
1020  /*
1021  * Inside the loop we keep the main offset value as a byte offset
1022  * in the directory file.
1023  */
1024  curoff = xfs_dir2_dataptr_to_byte(mp, *offset);
1025 
1026  /*
1027  * Force this conversion through db so we truncate the offset
1028  * down to get the start of the data block.
1029  */
1030  map_info->map_off = xfs_dir2_db_to_da(mp,
1031  xfs_dir2_byte_to_db(mp, curoff));
1032 
1033  /*
1034  * Loop over directory entries until we reach the end offset.
1035  * Get more blocks and readahead as necessary.
1036  */
1037  while (curoff < XFS_DIR2_LEAF_OFFSET) {
1038  /*
1039  * If we have no buffer, or we're off the end of the
1040  * current buffer, need to get another one.
1041  */
1042  if (!bp || ptr >= (char *)bp->b_addr + mp->m_dirblksize) {
1043 
1044  error = xfs_dir2_leaf_readbuf(dp, bufsize, map_info,
1045  &curoff, &bp);
1046  if (error || !map_info->map_valid)
1047  break;
1048 
1049  /*
1050  * Having done a read, we need to set a new offset.
1051  */
1052  newoff = xfs_dir2_db_off_to_byte(mp, map_info->curdb, 0);
1053  /*
1054  * Start of the current block.
1055  */
1056  if (curoff < newoff)
1057  curoff = newoff;
1058  /*
1059  * Make sure we're in the right block.
1060  */
1061  else if (curoff > newoff)
1062  ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
1063  map_info->curdb);
1064  hdr = bp->b_addr;
1065  xfs_dir2_data_check(dp, bp);
1066  /*
1067  * Find our position in the block.
1068  */
1069  ptr = (char *)(hdr + 1);
1070  byteoff = xfs_dir2_byte_to_off(mp, curoff);
1071  /*
1072  * Skip past the header.
1073  */
1074  if (byteoff == 0)
1075  curoff += (uint)sizeof(*hdr);
1076  /*
1077  * Skip past entries until we reach our offset.
1078  */
1079  else {
1080  while ((char *)ptr - (char *)hdr < byteoff) {
1081  dup = (xfs_dir2_data_unused_t *)ptr;
1082 
1083  if (be16_to_cpu(dup->freetag)
1085 
1086  length = be16_to_cpu(dup->length);
1087  ptr += length;
1088  continue;
1089  }
1090  dep = (xfs_dir2_data_entry_t *)ptr;
1091  length =
1092  xfs_dir2_data_entsize(dep->namelen);
1093  ptr += length;
1094  }
1095  /*
1096  * Now set our real offset.
1097  */
1098  curoff =
1099  xfs_dir2_db_off_to_byte(mp,
1100  xfs_dir2_byte_to_db(mp, curoff),
1101  (char *)ptr - (char *)hdr);
1102  if (ptr >= (char *)hdr + mp->m_dirblksize) {
1103  continue;
1104  }
1105  }
1106  }
1107  /*
1108  * We have a pointer to an entry.
1109  * Is it a live one?
1110  */
1111  dup = (xfs_dir2_data_unused_t *)ptr;
1112  /*
1113  * No, it's unused, skip over it.
1114  */
1116  length = be16_to_cpu(dup->length);
1117  ptr += length;
1118  curoff += length;
1119  continue;
1120  }
1121 
1122  dep = (xfs_dir2_data_entry_t *)ptr;
1123  length = xfs_dir2_data_entsize(dep->namelen);
1124 
1125  if (filldir(dirent, (char *)dep->name, dep->namelen,
1126  xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff,
1127  be64_to_cpu(dep->inumber), DT_UNKNOWN))
1128  break;
1129 
1130  /*
1131  * Advance to next entry in the block.
1132  */
1133  ptr += length;
1134  curoff += length;
1135  /* bufsize may have just been a guess; don't go negative */
1136  bufsize = bufsize > length ? bufsize - length : 0;
1137  }
1138 
1139  /*
1140  * All done. Set output offset value to current offset.
1141  */
1142  if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR))
1143  *offset = XFS_DIR2_MAX_DATAPTR & 0x7fffffff;
1144  else
1145  *offset = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
1146  kmem_free(map_info);
1147  if (bp)
1148  xfs_trans_brelse(NULL, bp);
1149  return error;
1150 }
1151 
1152 /*
1153  * Initialize a new leaf block, leaf1 or leafn magic accepted.
1154  */
1155 int
1157  xfs_da_args_t *args, /* operation arguments */
1158  xfs_dir2_db_t bno, /* directory block number */
1159  struct xfs_buf **bpp, /* out: leaf buffer */
1160  int magic) /* magic number for block */
1161 {
1162  struct xfs_buf *bp; /* leaf buffer */
1163  xfs_inode_t *dp; /* incore directory inode */
1164  int error; /* error return code */
1165  xfs_dir2_leaf_t *leaf; /* leaf structure */
1166  xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */
1167  xfs_mount_t *mp; /* filesystem mount point */
1168  xfs_trans_t *tp; /* transaction pointer */
1169 
1170  dp = args->dp;
1171  ASSERT(dp != NULL);
1172  tp = args->trans;
1173  mp = dp->i_mount;
1174  ASSERT(bno >= XFS_DIR2_LEAF_FIRSTDB(mp) &&
1175  bno < XFS_DIR2_FREE_FIRSTDB(mp));
1176  /*
1177  * Get the buffer for the block.
1178  */
1179  error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp,
1180  XFS_DATA_FORK);
1181  if (error) {
1182  return error;
1183  }
1184  ASSERT(bp != NULL);
1185  leaf = bp->b_addr;
1186  /*
1187  * Initialize the header.
1188  */
1189  leaf->hdr.info.magic = cpu_to_be16(magic);
1190  leaf->hdr.info.forw = 0;
1191  leaf->hdr.info.back = 0;
1192  leaf->hdr.count = 0;
1193  leaf->hdr.stale = 0;
1194  xfs_dir2_leaf_log_header(tp, bp);
1195  /*
1196  * If it's a leaf-format directory initialize the tail.
1197  * In this case our caller has the real bests table to copy into
1198  * the block.
1199  */
1200  if (magic == XFS_DIR2_LEAF1_MAGIC) {
1201  ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1202  ltp->bestcount = 0;
1203  xfs_dir2_leaf_log_tail(tp, bp);
1204  }
1205  *bpp = bp;
1206  return 0;
1207 }
1208 
1209 /*
1210  * Log the bests entries indicated from a leaf1 block.
1211  */
1212 static void
1213 xfs_dir2_leaf_log_bests(
1214  xfs_trans_t *tp, /* transaction pointer */
1215  struct xfs_buf *bp, /* leaf buffer */
1216  int first, /* first entry to log */
1217  int last) /* last entry to log */
1218 {
1219  __be16 *firstb; /* pointer to first entry */
1220  __be16 *lastb; /* pointer to last entry */
1221  xfs_dir2_leaf_t *leaf; /* leaf structure */
1222  xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */
1223 
1224  leaf = bp->b_addr;
1226  ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf);
1227  firstb = xfs_dir2_leaf_bests_p(ltp) + first;
1228  lastb = xfs_dir2_leaf_bests_p(ltp) + last;
1229  xfs_trans_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf),
1230  (uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));
1231 }
1232 
1233 /*
1234  * Log the leaf entries indicated from a leaf1 or leafn block.
1235  */
1236 void
1238  xfs_trans_t *tp, /* transaction pointer */
1239  struct xfs_buf *bp, /* leaf buffer */
1240  int first, /* first entry to log */
1241  int last) /* last entry to log */
1242 {
1243  xfs_dir2_leaf_entry_t *firstlep; /* pointer to first entry */
1244  xfs_dir2_leaf_entry_t *lastlep; /* pointer to last entry */
1245  xfs_dir2_leaf_t *leaf; /* leaf structure */
1246 
1247  leaf = bp->b_addr;
1250  firstlep = &leaf->ents[first];
1251  lastlep = &leaf->ents[last];
1252  xfs_trans_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
1253  (uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1));
1254 }
1255 
1256 /*
1257  * Log the header of the leaf1 or leafn block.
1258  */
1259 void
1261  struct xfs_trans *tp,
1262  struct xfs_buf *bp)
1263 {
1264  xfs_dir2_leaf_t *leaf; /* leaf structure */
1265 
1266  leaf = bp->b_addr;
1269  xfs_trans_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
1270  (uint)(sizeof(leaf->hdr) - 1));
1271 }
1272 
1273 /*
1274  * Log the tail of the leaf1 block.
1275  */
1276 STATIC void
1277 xfs_dir2_leaf_log_tail(
1278  struct xfs_trans *tp,
1279  struct xfs_buf *bp)
1280 {
1281  xfs_dir2_leaf_t *leaf; /* leaf structure */
1282  xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */
1283  xfs_mount_t *mp; /* filesystem mount point */
1284 
1285  mp = tp->t_mountp;
1286  leaf = bp->b_addr;
1288  ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1289  xfs_trans_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
1290  (uint)(mp->m_dirblksize - 1));
1291 }
1292 
1293 /*
1294  * Look up the entry referred to by args in the leaf format directory.
1295  * Most of the work is done by the xfs_dir2_leaf_lookup_int routine which
1296  * is also used by the node-format code.
1297  */
1298 int
1300  xfs_da_args_t *args) /* operation arguments */
1301 {
1302  struct xfs_buf *dbp; /* data block buffer */
1303  xfs_dir2_data_entry_t *dep; /* data block entry */
1304  xfs_inode_t *dp; /* incore directory inode */
1305  int error; /* error return code */
1306  int index; /* found entry index */
1307  struct xfs_buf *lbp; /* leaf buffer */
1308  xfs_dir2_leaf_t *leaf; /* leaf structure */
1309  xfs_dir2_leaf_entry_t *lep; /* leaf entry */
1310  xfs_trans_t *tp; /* transaction pointer */
1311 
1312  trace_xfs_dir2_leaf_lookup(args);
1313 
1314  /*
1315  * Look up name in the leaf block, returning both buffers and index.
1316  */
1317  if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
1318  return error;
1319  }
1320  tp = args->trans;
1321  dp = args->dp;
1322  xfs_dir2_leaf_check(dp, lbp);
1323  leaf = lbp->b_addr;
1324  /*
1325  * Get to the leaf entry and contained data entry address.
1326  */
1327  lep = &leaf->ents[index];
1328  /*
1329  * Point to the data entry.
1330  */
1331  dep = (xfs_dir2_data_entry_t *)
1332  ((char *)dbp->b_addr +
1333  xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
1334  /*
1335  * Return the found inode number & CI name if appropriate
1336  */
1337  args->inumber = be64_to_cpu(dep->inumber);
1338  error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
1339  xfs_trans_brelse(tp, dbp);
1340  xfs_trans_brelse(tp, lbp);
1341  return XFS_ERROR(error);
1342 }
1343 
1344 /*
1345  * Look up name/hash in the leaf block.
1346  * Fill in indexp with the found index, and dbpp with the data buffer.
1347  * If not found dbpp will be NULL, and ENOENT comes back.
1348  * lbpp will always be filled in with the leaf buffer unless there's an error.
1349  */
1350 static int /* error */
1351 xfs_dir2_leaf_lookup_int(
1352  xfs_da_args_t *args, /* operation arguments */
1353  struct xfs_buf **lbpp, /* out: leaf buffer */
1354  int *indexp, /* out: index in leaf block */
1355  struct xfs_buf **dbpp) /* out: data buffer */
1356 {
1357  xfs_dir2_db_t curdb = -1; /* current data block number */
1358  struct xfs_buf *dbp = NULL; /* data buffer */
1359  xfs_dir2_data_entry_t *dep; /* data entry */
1360  xfs_inode_t *dp; /* incore directory inode */
1361  int error; /* error return code */
1362  int index; /* index in leaf block */
1363  struct xfs_buf *lbp; /* leaf buffer */
1364  xfs_dir2_leaf_entry_t *lep; /* leaf entry */
1365  xfs_dir2_leaf_t *leaf; /* leaf structure */
1366  xfs_mount_t *mp; /* filesystem mount point */
1367  xfs_dir2_db_t newdb; /* new data block number */
1368  xfs_trans_t *tp; /* transaction pointer */
1369  xfs_dir2_db_t cidb = -1; /* case match data block no. */
1370  enum xfs_dacmp cmp; /* name compare result */
1371 
1372  dp = args->dp;
1373  tp = args->trans;
1374  mp = dp->i_mount;
1375  /*
1376  * Read the leaf block into the buffer.
1377  */
1378  error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
1379  XFS_DATA_FORK);
1380  if (error)
1381  return error;
1382  *lbpp = lbp;
1383  leaf = lbp->b_addr;
1384  xfs_dir2_leaf_check(dp, lbp);
1385  /*
1386  * Look for the first leaf entry with our hash value.
1387  */
1388  index = xfs_dir2_leaf_search_hash(args, lbp);
1389  /*
1390  * Loop over all the entries with the right hash value
1391  * looking to match the name.
1392  */
1393  for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
1394  be32_to_cpu(lep->hashval) == args->hashval;
1395  lep++, index++) {
1396  /*
1397  * Skip over stale leaf entries.
1398  */
1400  continue;
1401  /*
1402  * Get the new data block number.
1403  */
1404  newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
1405  /*
1406  * If it's not the same as the old data block number,
1407  * need to pitch the old one and read the new one.
1408  */
1409  if (newdb != curdb) {
1410  if (dbp)
1411  xfs_trans_brelse(tp, dbp);
1412  error = xfs_da_read_buf(tp, dp,
1413  xfs_dir2_db_to_da(mp, newdb),
1414  -1, &dbp, XFS_DATA_FORK);
1415  if (error) {
1416  xfs_trans_brelse(tp, lbp);
1417  return error;
1418  }
1419  xfs_dir2_data_check(dp, dbp);
1420  curdb = newdb;
1421  }
1422  /*
1423  * Point to the data entry.
1424  */
1425  dep = (xfs_dir2_data_entry_t *)((char *)dbp->b_addr +
1426  xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
1427  /*
1428  * Compare name and if it's an exact match, return the index
1429  * and buffer. If it's the first case-insensitive match, store
1430  * the index and buffer and continue looking for an exact match.
1431  */
1432  cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
1433  if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
1434  args->cmpresult = cmp;
1435  *indexp = index;
1436  /* case exact match: return the current buffer. */
1437  if (cmp == XFS_CMP_EXACT) {
1438  *dbpp = dbp;
1439  return 0;
1440  }
1441  cidb = curdb;
1442  }
1443  }
1445  /*
1446  * Here, we can only be doing a lookup (not a rename or remove).
1447  * If a case-insensitive match was found earlier, re-read the
1448  * appropriate data block if required and return it.
1449  */
1450  if (args->cmpresult == XFS_CMP_CASE) {
1451  ASSERT(cidb != -1);
1452  if (cidb != curdb) {
1453  xfs_trans_brelse(tp, dbp);
1454  error = xfs_da_read_buf(tp, dp,
1455  xfs_dir2_db_to_da(mp, cidb),
1456  -1, &dbp, XFS_DATA_FORK);
1457  if (error) {
1458  xfs_trans_brelse(tp, lbp);
1459  return error;
1460  }
1461  }
1462  *dbpp = dbp;
1463  return 0;
1464  }
1465  /*
1466  * No match found, return ENOENT.
1467  */
1468  ASSERT(cidb == -1);
1469  if (dbp)
1470  xfs_trans_brelse(tp, dbp);
1471  xfs_trans_brelse(tp, lbp);
1472  return XFS_ERROR(ENOENT);
1473 }
1474 
1475 /*
1476  * Remove an entry from a leaf format directory.
1477  */
1478 int /* error */
1480  xfs_da_args_t *args) /* operation arguments */
1481 {
1482  __be16 *bestsp; /* leaf block best freespace */
1483  xfs_dir2_data_hdr_t *hdr; /* data block header */
1484  xfs_dir2_db_t db; /* data block number */
1485  struct xfs_buf *dbp; /* data block buffer */
1486  xfs_dir2_data_entry_t *dep; /* data entry structure */
1487  xfs_inode_t *dp; /* incore directory inode */
1488  int error; /* error return code */
1489  xfs_dir2_db_t i; /* temporary data block # */
1490  int index; /* index into leaf entries */
1491  struct xfs_buf *lbp; /* leaf buffer */
1492  xfs_dir2_leaf_t *leaf; /* leaf structure */
1493  xfs_dir2_leaf_entry_t *lep; /* leaf entry */
1494  xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */
1495  xfs_mount_t *mp; /* filesystem mount point */
1496  int needlog; /* need to log data header */
1497  int needscan; /* need to rescan data frees */
1498  xfs_dir2_data_off_t oldbest; /* old value of best free */
1499  xfs_trans_t *tp; /* transaction pointer */
1500 
1501  trace_xfs_dir2_leaf_removename(args);
1502 
1503  /*
1504  * Lookup the leaf entry, get the leaf and data blocks read in.
1505  */
1506  if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
1507  return error;
1508  }
1509  dp = args->dp;
1510  tp = args->trans;
1511  mp = dp->i_mount;
1512  leaf = lbp->b_addr;
1513  hdr = dbp->b_addr;
1514  xfs_dir2_data_check(dp, dbp);
1515  /*
1516  * Point to the leaf entry, use that to point to the data entry.
1517  */
1518  lep = &leaf->ents[index];
1519  db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
1520  dep = (xfs_dir2_data_entry_t *)
1521  ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
1522  needscan = needlog = 0;
1523  oldbest = be16_to_cpu(hdr->bestfree[0].length);
1524  ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1525  bestsp = xfs_dir2_leaf_bests_p(ltp);
1526  ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
1527  /*
1528  * Mark the former data entry unused.
1529  */
1530  xfs_dir2_data_make_free(tp, dbp,
1531  (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
1532  xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
1533  /*
1534  * We just mark the leaf entry stale by putting a null in it.
1535  */
1536  be16_add_cpu(&leaf->hdr.stale, 1);
1537  xfs_dir2_leaf_log_header(tp, lbp);
1539  xfs_dir2_leaf_log_ents(tp, lbp, index, index);
1540  /*
1541  * Scan the freespace in the data block again if necessary,
1542  * log the data block header if necessary.
1543  */
1544  if (needscan)
1545  xfs_dir2_data_freescan(mp, hdr, &needlog);
1546  if (needlog)
1547  xfs_dir2_data_log_header(tp, dbp);
1548  /*
1549  * If the longest freespace in the data block has changed,
1550  * put the new value in the bests table and log that.
1551  */
1552  if (be16_to_cpu(hdr->bestfree[0].length) != oldbest) {
1553  bestsp[db] = hdr->bestfree[0].length;
1554  xfs_dir2_leaf_log_bests(tp, lbp, db, db);
1555  }
1556  xfs_dir2_data_check(dp, dbp);
1557  /*
1558  * If the data block is now empty then get rid of the data block.
1559  */
1560  if (be16_to_cpu(hdr->bestfree[0].length) ==
1561  mp->m_dirblksize - (uint)sizeof(*hdr)) {
1562  ASSERT(db != mp->m_dirdatablk);
1563  if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
1564  /*
1565  * Nope, can't get rid of it because it caused
1566  * allocation of a bmap btree block to do so.
1567  * Just go on, returning success, leaving the
1568  * empty block in place.
1569  */
1570  if (error == ENOSPC && args->total == 0)
1571  error = 0;
1572  xfs_dir2_leaf_check(dp, lbp);
1573  return error;
1574  }
1575  dbp = NULL;
1576  /*
1577  * If this is the last data block then compact the
1578  * bests table by getting rid of entries.
1579  */
1580  if (db == be32_to_cpu(ltp->bestcount) - 1) {
1581  /*
1582  * Look for the last active entry (i).
1583  */
1584  for (i = db - 1; i > 0; i--) {
1585  if (bestsp[i] != cpu_to_be16(NULLDATAOFF))
1586  break;
1587  }
1588  /*
1589  * Copy the table down so inactive entries at the
1590  * end are removed.
1591  */
1592  memmove(&bestsp[db - i], bestsp,
1593  (be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp));
1594  be32_add_cpu(&ltp->bestcount, -(db - i));
1595  xfs_dir2_leaf_log_tail(tp, lbp);
1596  xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1597  } else
1598  bestsp[db] = cpu_to_be16(NULLDATAOFF);
1599  }
1600  /*
1601  * If the data block was not the first one, drop it.
1602  */
1603  else if (db != mp->m_dirdatablk)
1604  dbp = NULL;
1605 
1606  xfs_dir2_leaf_check(dp, lbp);
1607  /*
1608  * See if we can convert to block form.
1609  */
1610  return xfs_dir2_leaf_to_block(args, lbp, dbp);
1611 }
1612 
1613 /*
1614  * Replace the inode number in a leaf format directory entry.
1615  */
1616 int /* error */
1618  xfs_da_args_t *args) /* operation arguments */
1619 {
1620  struct xfs_buf *dbp; /* data block buffer */
1621  xfs_dir2_data_entry_t *dep; /* data block entry */
1622  xfs_inode_t *dp; /* incore directory inode */
1623  int error; /* error return code */
1624  int index; /* index of leaf entry */
1625  struct xfs_buf *lbp; /* leaf buffer */
1626  xfs_dir2_leaf_t *leaf; /* leaf structure */
1627  xfs_dir2_leaf_entry_t *lep; /* leaf entry */
1628  xfs_trans_t *tp; /* transaction pointer */
1629 
1630  trace_xfs_dir2_leaf_replace(args);
1631 
1632  /*
1633  * Look up the entry.
1634  */
1635  if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
1636  return error;
1637  }
1638  dp = args->dp;
1639  leaf = lbp->b_addr;
1640  /*
1641  * Point to the leaf entry, get data address from it.
1642  */
1643  lep = &leaf->ents[index];
1644  /*
1645  * Point to the data entry.
1646  */
1647  dep = (xfs_dir2_data_entry_t *)
1648  ((char *)dbp->b_addr +
1649  xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
1650  ASSERT(args->inumber != be64_to_cpu(dep->inumber));
1651  /*
1652  * Put the new inode number in, log it.
1653  */
1654  dep->inumber = cpu_to_be64(args->inumber);
1655  tp = args->trans;
1656  xfs_dir2_data_log_entry(tp, dbp, dep);
1657  xfs_dir2_leaf_check(dp, lbp);
1658  xfs_trans_brelse(tp, lbp);
1659  return 0;
1660 }
1661 
1662 /*
1663  * Return index in the leaf block (lbp) which is either the first
1664  * one with this hash value, or if there are none, the insert point
1665  * for that hash value.
1666  */
1667 int /* index value */
1669  xfs_da_args_t *args, /* operation arguments */
1670  struct xfs_buf *lbp) /* leaf buffer */
1671 {
1672  xfs_dahash_t hash=0; /* hash from this entry */
1673  xfs_dahash_t hashwant; /* hash value looking for */
1674  int high; /* high leaf index */
1675  int low; /* low leaf index */
1676  xfs_dir2_leaf_t *leaf; /* leaf structure */
1677  xfs_dir2_leaf_entry_t *lep; /* leaf entry */
1678  int mid=0; /* current leaf index */
1679 
1680  leaf = lbp->b_addr;
1681 #ifndef __KERNEL__
1682  if (!leaf->hdr.count)
1683  return 0;
1684 #endif
1685  /*
1686  * Note, the table cannot be empty, so we have to go through the loop.
1687  * Binary search the leaf entries looking for our hash value.
1688  */
1689  for (lep = leaf->ents, low = 0, high = be16_to_cpu(leaf->hdr.count) - 1,
1690  hashwant = args->hashval;
1691  low <= high; ) {
1692  mid = (low + high) >> 1;
1693  if ((hash = be32_to_cpu(lep[mid].hashval)) == hashwant)
1694  break;
1695  if (hash < hashwant)
1696  low = mid + 1;
1697  else
1698  high = mid - 1;
1699  }
1700  /*
1701  * Found one, back up through all the equal hash values.
1702  */
1703  if (hash == hashwant) {
1704  while (mid > 0 && be32_to_cpu(lep[mid - 1].hashval) == hashwant) {
1705  mid--;
1706  }
1707  }
1708  /*
1709  * Need to point to an entry higher than ours.
1710  */
1711  else if (hash < hashwant)
1712  mid++;
1713  return mid;
1714 }
1715 
1716 /*
1717  * Trim off a trailing data block. We know it's empty since the leaf
1718  * freespace table says so.
1719  */
1720 int /* error */
1722  xfs_da_args_t *args, /* operation arguments */
1723  struct xfs_buf *lbp, /* leaf buffer */
1724  xfs_dir2_db_t db) /* data block number */
1725 {
1726  __be16 *bestsp; /* leaf bests table */
1727  struct xfs_buf *dbp; /* data block buffer */
1728  xfs_inode_t *dp; /* incore directory inode */
1729  int error; /* error return value */
1730  xfs_dir2_leaf_t *leaf; /* leaf structure */
1731  xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */
1732  xfs_mount_t *mp; /* filesystem mount point */
1733  xfs_trans_t *tp; /* transaction pointer */
1734 
1735  dp = args->dp;
1736  mp = dp->i_mount;
1737  tp = args->trans;
1738  /*
1739  * Read the offending data block. We need its buffer.
1740  */
1741  if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp,
1742  XFS_DATA_FORK))) {
1743  return error;
1744  }
1745 
1746  leaf = lbp->b_addr;
1747  ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1748 
1749 #ifdef DEBUG
1750 {
1751  struct xfs_dir2_data_hdr *hdr = dbp->b_addr;
1752 
1754  ASSERT(be16_to_cpu(hdr->bestfree[0].length) ==
1755  mp->m_dirblksize - (uint)sizeof(*hdr));
1756  ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
1757 }
1758 #endif
1759 
1760  /*
1761  * Get rid of the data block.
1762  */
1763  if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
1764  ASSERT(error != ENOSPC);
1765  xfs_trans_brelse(tp, dbp);
1766  return error;
1767  }
1768  /*
1769  * Eliminate the last bests entry from the table.
1770  */
1771  bestsp = xfs_dir2_leaf_bests_p(ltp);
1772  be32_add_cpu(&ltp->bestcount, -1);
1773  memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp));
1774  xfs_dir2_leaf_log_tail(tp, lbp);
1775  xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1776  return 0;
1777 }
1778 
1779 static inline size_t
1780 xfs_dir2_leaf_size(
1781  struct xfs_dir2_leaf_hdr *hdr,
1782  int counts)
1783 {
1784  int entries;
1785 
1786  entries = be16_to_cpu(hdr->count) - be16_to_cpu(hdr->stale);
1787  return sizeof(xfs_dir2_leaf_hdr_t) +
1788  entries * sizeof(xfs_dir2_leaf_entry_t) +
1789  counts * sizeof(xfs_dir2_data_off_t) +
1790  sizeof(xfs_dir2_leaf_tail_t);
1791 }
1792 
1793 /*
1794  * Convert node form directory to leaf form directory.
1795  * The root of the node form dir needs to already be a LEAFN block.
1796  * Just return if we can't do anything.
1797  */
1798 int /* error */
1800  xfs_da_state_t *state) /* directory operation state */
1801 {
1802  xfs_da_args_t *args; /* operation arguments */
1803  xfs_inode_t *dp; /* incore directory inode */
1804  int error; /* error return code */
1805  struct xfs_buf *fbp; /* buffer for freespace block */
1806  xfs_fileoff_t fo; /* freespace file offset */
1807  xfs_dir2_free_t *free; /* freespace structure */
1808  struct xfs_buf *lbp; /* buffer for leaf block */
1809  xfs_dir2_leaf_tail_t *ltp; /* tail of leaf structure */
1810  xfs_dir2_leaf_t *leaf; /* leaf structure */
1811  xfs_mount_t *mp; /* filesystem mount point */
1812  int rval; /* successful free trim? */
1813  xfs_trans_t *tp; /* transaction pointer */
1814 
1815  /*
1816  * There's more than a leaf level in the btree, so there must
1817  * be multiple leafn blocks. Give up.
1818  */
1819  if (state->path.active > 1)
1820  return 0;
1821  args = state->args;
1822 
1823  trace_xfs_dir2_node_to_leaf(args);
1824 
1825  mp = state->mp;
1826  dp = args->dp;
1827  tp = args->trans;
1828  /*
1829  * Get the last offset in the file.
1830  */
1831  if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK))) {
1832  return error;
1833  }
1834  fo -= mp->m_dirblkfsbs;
1835  /*
1836  * If there are freespace blocks other than the first one,
1837  * take this opportunity to remove trailing empty freespace blocks
1838  * that may have been left behind during no-space-reservation
1839  * operations.
1840  */
1841  while (fo > mp->m_dirfreeblk) {
1842  if ((error = xfs_dir2_node_trim_free(args, fo, &rval))) {
1843  return error;
1844  }
1845  if (rval)
1846  fo -= mp->m_dirblkfsbs;
1847  else
1848  return 0;
1849  }
1850  /*
1851  * Now find the block just before the freespace block.
1852  */
1853  if ((error = xfs_bmap_last_before(tp, dp, &fo, XFS_DATA_FORK))) {
1854  return error;
1855  }
1856  /*
1857  * If it's not the single leaf block, give up.
1858  */
1859  if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + mp->m_dirblksize)
1860  return 0;
1861  lbp = state->path.blk[0].bp;
1862  leaf = lbp->b_addr;
1864  /*
1865  * Read the freespace block.
1866  */
1867  if ((error = xfs_da_read_buf(tp, dp, mp->m_dirfreeblk, -1, &fbp,
1868  XFS_DATA_FORK))) {
1869  return error;
1870  }
1871  free = fbp->b_addr;
1873  ASSERT(!free->hdr.firstdb);
1874 
1875  /*
1876  * Now see if the leafn and free data will fit in a leaf1.
1877  * If not, release the buffer and give up.
1878  */
1879  if (xfs_dir2_leaf_size(&leaf->hdr, be32_to_cpu(free->hdr.nvalid)) >
1880  mp->m_dirblksize) {
1881  xfs_trans_brelse(tp, fbp);
1882  return 0;
1883  }
1884 
1885  /*
1886  * If the leaf has any stale entries in it, compress them out.
1887  * The compact routine will log the header.
1888  */
1889  if (be16_to_cpu(leaf->hdr.stale))
1890  xfs_dir2_leaf_compact(args, lbp);
1891  else
1892  xfs_dir2_leaf_log_header(tp, lbp);
1894  /*
1895  * Set up the leaf tail from the freespace block.
1896  */
1897  ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1898  ltp->bestcount = free->hdr.nvalid;
1899  /*
1900  * Set up the leaf bests table.
1901  */
1902  memcpy(xfs_dir2_leaf_bests_p(ltp), free->bests,
1903  be32_to_cpu(ltp->bestcount) * sizeof(xfs_dir2_data_off_t));
1904  xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1905  xfs_dir2_leaf_log_tail(tp, lbp);
1906  xfs_dir2_leaf_check(dp, lbp);
1907  /*
1908  * Get rid of the freespace block.
1909  */
1910  error = xfs_dir2_shrink_inode(args, XFS_DIR2_FREE_FIRSTDB(mp), fbp);
1911  if (error) {
1912  /*
1913  * This can't fail here because it can only happen when
1914  * punching out the middle of an extent, and this is an
1915  * isolated block.
1916  */
1917  ASSERT(error != ENOSPC);
1918  return error;
1919  }
1920  fbp = NULL;
1921  /*
1922  * Now see if we can convert the single-leaf directory
1923  * down to a block form directory.
1924  * This routine always kills the dabuf for the leaf, so
1925  * eliminate it from the path.
1926  */
1927  error = xfs_dir2_leaf_to_block(args, lbp, NULL);
1928  state->path.blk[0].bp = NULL;
1929  return error;
1930 }