Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
xfs_da_btree.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_ag.h"
26 #include "xfs_mount.h"
27 #include "xfs_da_btree.h"
28 #include "xfs_bmap_btree.h"
29 #include "xfs_dir2.h"
30 #include "xfs_dir2_format.h"
31 #include "xfs_dir2_priv.h"
32 #include "xfs_dinode.h"
33 #include "xfs_inode.h"
34 #include "xfs_inode_item.h"
35 #include "xfs_alloc.h"
36 #include "xfs_bmap.h"
37 #include "xfs_attr.h"
38 #include "xfs_attr_leaf.h"
39 #include "xfs_error.h"
40 #include "xfs_trace.h"
41 
42 /*
43  * xfs_da_btree.c
44  *
45  * Routines to implement directories as Btrees of hashed names.
46  */
47 
48 /*========================================================================
49  * Function prototypes for the kernel.
50  *========================================================================*/
51 
52 /*
53  * Routines used for growing the Btree.
54  */
56  xfs_da_state_blk_t *existing_root,
57  xfs_da_state_blk_t *new_child);
59  xfs_da_state_blk_t *existing_blk,
60  xfs_da_state_blk_t *split_blk,
61  xfs_da_state_blk_t *blk_to_add,
62  int treelevel,
63  int *result);
65  xfs_da_state_blk_t *node_blk_1,
66  xfs_da_state_blk_t *node_blk_2);
68  xfs_da_state_blk_t *old_node_blk,
69  xfs_da_state_blk_t *new_node_blk);
70 
71 /*
72  * Routines used for shrinking the Btree.
73  */
75  xfs_da_state_blk_t *root_blk);
78  xfs_da_state_blk_t *drop_blk);
80  xfs_da_state_blk_t *src_node_blk,
81  xfs_da_state_blk_t *dst_node_blk);
82 
83 /*
84  * Utility routines.
85  */
86 STATIC uint xfs_da_node_lasthash(struct xfs_buf *bp, int *count);
87 STATIC int xfs_da_node_order(struct xfs_buf *node1_bp,
88  struct xfs_buf *node2_bp);
90  xfs_da_state_blk_t *drop_blk,
91  xfs_da_state_blk_t *save_blk);
93 
94 /*========================================================================
95  * Routines used for growing the Btree.
96  *========================================================================*/
97 
98 /*
99  * Create the initial contents of an intermediate node.
100  */
101 int
103  struct xfs_buf **bpp, int whichfork)
104 {
106  struct xfs_buf *bp;
107  int error;
108  xfs_trans_t *tp;
109 
110  trace_xfs_da_node_create(args);
111 
112  tp = args->trans;
113  error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork);
114  if (error)
115  return(error);
116  ASSERT(bp != NULL);
117  node = bp->b_addr;
118  node->hdr.info.forw = 0;
119  node->hdr.info.back = 0;
120  node->hdr.info.magic = cpu_to_be16(XFS_DA_NODE_MAGIC);
121  node->hdr.info.pad = 0;
122  node->hdr.count = 0;
123  node->hdr.level = cpu_to_be16(level);
124 
125  xfs_trans_log_buf(tp, bp,
126  XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
127 
128  *bpp = bp;
129  return(0);
130 }
131 
132 /*
133  * Split a leaf node, rebalance, then possibly split
134  * intermediate nodes, rebalance, etc.
135  */
136 int /* error */
138 {
139  xfs_da_state_blk_t *oldblk, *newblk, *addblk;
141  struct xfs_buf *bp;
142  int max, action, error, i;
143 
144  trace_xfs_da_split(state->args);
145 
146  /*
147  * Walk back up the tree splitting/inserting/adjusting as necessary.
148  * If we need to insert and there isn't room, split the node, then
149  * decide which fragment to insert the new block from below into.
150  * Note that we may split the root this way, but we need more fixup.
151  */
152  max = state->path.active - 1;
153  ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
154  ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
155  state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
156 
157  addblk = &state->path.blk[max]; /* initial dummy value */
158  for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
159  oldblk = &state->path.blk[i];
160  newblk = &state->altpath.blk[i];
161 
162  /*
163  * If a leaf node then
164  * Allocate a new leaf node, then rebalance across them.
165  * else if an intermediate node then
166  * We split on the last layer, must we split the node?
167  */
168  switch (oldblk->magic) {
169  case XFS_ATTR_LEAF_MAGIC:
170  error = xfs_attr_leaf_split(state, oldblk, newblk);
171  if ((error != 0) && (error != ENOSPC)) {
172  return(error); /* GROT: attr is inconsistent */
173  }
174  if (!error) {
175  addblk = newblk;
176  break;
177  }
178  /*
179  * Entry wouldn't fit, split the leaf again.
180  */
181  state->extravalid = 1;
182  if (state->inleaf) {
183  state->extraafter = 0; /* before newblk */
184  trace_xfs_attr_leaf_split_before(state->args);
185  error = xfs_attr_leaf_split(state, oldblk,
186  &state->extrablk);
187  } else {
188  state->extraafter = 1; /* after newblk */
189  trace_xfs_attr_leaf_split_after(state->args);
190  error = xfs_attr_leaf_split(state, newblk,
191  &state->extrablk);
192  }
193  if (error)
194  return(error); /* GROT: attr inconsistent */
195  addblk = newblk;
196  break;
198  error = xfs_dir2_leafn_split(state, oldblk, newblk);
199  if (error)
200  return error;
201  addblk = newblk;
202  break;
203  case XFS_DA_NODE_MAGIC:
204  error = xfs_da_node_split(state, oldblk, newblk, addblk,
205  max - i, &action);
206  addblk->bp = NULL;
207  if (error)
208  return(error); /* GROT: dir is inconsistent */
209  /*
210  * Record the newly split block for the next time thru?
211  */
212  if (action)
213  addblk = newblk;
214  else
215  addblk = NULL;
216  break;
217  }
218 
219  /*
220  * Update the btree to show the new hashval for this child.
221  */
222  xfs_da_fixhashpath(state, &state->path);
223  }
224  if (!addblk)
225  return(0);
226 
227  /*
228  * Split the root node.
229  */
230  ASSERT(state->path.active == 0);
231  oldblk = &state->path.blk[0];
232  error = xfs_da_root_split(state, oldblk, addblk);
233  if (error) {
234  addblk->bp = NULL;
235  return(error); /* GROT: dir is inconsistent */
236  }
237 
238  /*
239  * Update pointers to the node which used to be block 0 and
240  * just got bumped because of the addition of a new root node.
241  * There might be three blocks involved if a double split occurred,
242  * and the original block 0 could be at any position in the list.
243  */
244 
245  node = oldblk->bp->b_addr;
246  if (node->hdr.info.forw) {
247  if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) {
248  bp = addblk->bp;
249  } else {
250  ASSERT(state->extravalid);
251  bp = state->extrablk.bp;
252  }
253  node = bp->b_addr;
254  node->hdr.info.back = cpu_to_be32(oldblk->blkno);
255  xfs_trans_log_buf(state->args->trans, bp,
256  XFS_DA_LOGRANGE(node, &node->hdr.info,
257  sizeof(node->hdr.info)));
258  }
259  node = oldblk->bp->b_addr;
260  if (node->hdr.info.back) {
261  if (be32_to_cpu(node->hdr.info.back) == addblk->blkno) {
262  bp = addblk->bp;
263  } else {
264  ASSERT(state->extravalid);
265  bp = state->extrablk.bp;
266  }
267  node = bp->b_addr;
268  node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
269  xfs_trans_log_buf(state->args->trans, bp,
270  XFS_DA_LOGRANGE(node, &node->hdr.info,
271  sizeof(node->hdr.info)));
272  }
273  addblk->bp = NULL;
274  return(0);
275 }
276 
277 /*
278  * Split the root. We have to create a new root and point to the two
279  * parts (the split old root) that we just created. Copy block zero to
280  * the EOF, extending the inode in process.
281  */
282 STATIC int /* error */
284  xfs_da_state_blk_t *blk2)
285 {
286  xfs_da_intnode_t *node, *oldroot;
288  xfs_dablk_t blkno;
289  struct xfs_buf *bp;
290  int error, size;
291  xfs_inode_t *dp;
292  xfs_trans_t *tp;
293  xfs_mount_t *mp;
295 
296  trace_xfs_da_root_split(state->args);
297 
298  /*
299  * Copy the existing (incorrect) block from the root node position
300  * to a free space somewhere.
301  */
302  args = state->args;
303  ASSERT(args != NULL);
304  error = xfs_da_grow_inode(args, &blkno);
305  if (error)
306  return(error);
307  dp = args->dp;
308  tp = args->trans;
309  mp = state->mp;
310  error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork);
311  if (error)
312  return(error);
313  ASSERT(bp != NULL);
314  node = bp->b_addr;
315  oldroot = blk1->bp->b_addr;
316  if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
317  size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] -
318  (char *)oldroot);
319  } else {
320  ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
321  leaf = (xfs_dir2_leaf_t *)oldroot;
322  size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] -
323  (char *)leaf);
324  }
325  memcpy(node, oldroot, size);
326  xfs_trans_log_buf(tp, bp, 0, size - 1);
327  blk1->bp = bp;
328  blk1->blkno = blkno;
329 
330  /*
331  * Set up the new root node.
332  */
333  error = xfs_da_node_create(args,
334  (args->whichfork == XFS_DATA_FORK) ? mp->m_dirleafblk : 0,
335  be16_to_cpu(node->hdr.level) + 1, &bp, args->whichfork);
336  if (error)
337  return(error);
338  node = bp->b_addr;
339  node->btree[0].hashval = cpu_to_be32(blk1->hashval);
340  node->btree[0].before = cpu_to_be32(blk1->blkno);
341  node->btree[1].hashval = cpu_to_be32(blk2->hashval);
342  node->btree[1].before = cpu_to_be32(blk2->blkno);
343  node->hdr.count = cpu_to_be16(2);
344 
345 #ifdef DEBUG
346  if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) {
347  ASSERT(blk1->blkno >= mp->m_dirleafblk &&
348  blk1->blkno < mp->m_dirfreeblk);
349  ASSERT(blk2->blkno >= mp->m_dirleafblk &&
350  blk2->blkno < mp->m_dirfreeblk);
351  }
352 #endif
353 
354  /* Header is already logged by xfs_da_node_create */
355  xfs_trans_log_buf(tp, bp,
356  XFS_DA_LOGRANGE(node, node->btree,
357  sizeof(xfs_da_node_entry_t) * 2));
358 
359  return(0);
360 }
361 
362 /*
363  * Split the node, rebalance, then add the new entry.
364  */
365 STATIC int /* error */
367  xfs_da_state_blk_t *newblk,
368  xfs_da_state_blk_t *addblk,
369  int treelevel, int *result)
370 {
372  xfs_dablk_t blkno;
373  int newcount, error;
374  int useextra;
375 
376  trace_xfs_da_node_split(state->args);
377 
378  node = oldblk->bp->b_addr;
379  ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
380 
381  /*
382  * With V2 dirs the extra block is data or freespace.
383  */
384  useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK;
385  newcount = 1 + useextra;
386  /*
387  * Do we have to split the node?
388  */
389  if ((be16_to_cpu(node->hdr.count) + newcount) > state->node_ents) {
390  /*
391  * Allocate a new node, add to the doubly linked chain of
392  * nodes, then move some of our excess entries into it.
393  */
394  error = xfs_da_grow_inode(state->args, &blkno);
395  if (error)
396  return(error); /* GROT: dir is inconsistent */
397 
398  error = xfs_da_node_create(state->args, blkno, treelevel,
399  &newblk->bp, state->args->whichfork);
400  if (error)
401  return(error); /* GROT: dir is inconsistent */
402  newblk->blkno = blkno;
403  newblk->magic = XFS_DA_NODE_MAGIC;
404  xfs_da_node_rebalance(state, oldblk, newblk);
405  error = xfs_da_blk_link(state, oldblk, newblk);
406  if (error)
407  return(error);
408  *result = 1;
409  } else {
410  *result = 0;
411  }
412 
413  /*
414  * Insert the new entry(s) into the correct block
415  * (updating last hashval in the process).
416  *
417  * xfs_da_node_add() inserts BEFORE the given index,
418  * and as a result of using node_lookup_int() we always
419  * point to a valid entry (not after one), but a split
420  * operation always results in a new block whose hashvals
421  * FOLLOW the current block.
422  *
423  * If we had double-split op below us, then add the extra block too.
424  */
425  node = oldblk->bp->b_addr;
426  if (oldblk->index <= be16_to_cpu(node->hdr.count)) {
427  oldblk->index++;
428  xfs_da_node_add(state, oldblk, addblk);
429  if (useextra) {
430  if (state->extraafter)
431  oldblk->index++;
432  xfs_da_node_add(state, oldblk, &state->extrablk);
433  state->extravalid = 0;
434  }
435  } else {
436  newblk->index++;
437  xfs_da_node_add(state, newblk, addblk);
438  if (useextra) {
439  if (state->extraafter)
440  newblk->index++;
441  xfs_da_node_add(state, newblk, &state->extrablk);
442  state->extravalid = 0;
443  }
444  }
445 
446  return(0);
447 }
448 
449 /*
450  * Balance the btree elements between two intermediate nodes,
451  * usually one full and one empty.
452  *
453  * NOTE: if blk2 is empty, then it will get the upper half of blk1.
454  */
455 STATIC void
457  xfs_da_state_blk_t *blk2)
458 {
459  xfs_da_intnode_t *node1, *node2, *tmpnode;
460  xfs_da_node_entry_t *btree_s, *btree_d;
461  int count, tmp;
462  xfs_trans_t *tp;
463 
464  trace_xfs_da_node_rebalance(state->args);
465 
466  node1 = blk1->bp->b_addr;
467  node2 = blk2->bp->b_addr;
468  /*
469  * Figure out how many entries need to move, and in which direction.
470  * Swap the nodes around if that makes it simpler.
471  */
472  if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
473  ((be32_to_cpu(node2->btree[0].hashval) < be32_to_cpu(node1->btree[0].hashval)) ||
474  (be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval) <
475  be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval)))) {
476  tmpnode = node1;
477  node1 = node2;
478  node2 = tmpnode;
479  }
480  ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
481  ASSERT(node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
482  count = (be16_to_cpu(node1->hdr.count) - be16_to_cpu(node2->hdr.count)) / 2;
483  if (count == 0)
484  return;
485  tp = state->args->trans;
486  /*
487  * Two cases: high-to-low and low-to-high.
488  */
489  if (count > 0) {
490  /*
491  * Move elements in node2 up to make a hole.
492  */
493  if ((tmp = be16_to_cpu(node2->hdr.count)) > 0) {
494  tmp *= (uint)sizeof(xfs_da_node_entry_t);
495  btree_s = &node2->btree[0];
496  btree_d = &node2->btree[count];
497  memmove(btree_d, btree_s, tmp);
498  }
499 
500  /*
501  * Move the req'd B-tree elements from high in node1 to
502  * low in node2.
503  */
504  be16_add_cpu(&node2->hdr.count, count);
505  tmp = count * (uint)sizeof(xfs_da_node_entry_t);
506  btree_s = &node1->btree[be16_to_cpu(node1->hdr.count) - count];
507  btree_d = &node2->btree[0];
508  memcpy(btree_d, btree_s, tmp);
509  be16_add_cpu(&node1->hdr.count, -count);
510  } else {
511  /*
512  * Move the req'd B-tree elements from low in node2 to
513  * high in node1.
514  */
515  count = -count;
516  tmp = count * (uint)sizeof(xfs_da_node_entry_t);
517  btree_s = &node2->btree[0];
518  btree_d = &node1->btree[be16_to_cpu(node1->hdr.count)];
519  memcpy(btree_d, btree_s, tmp);
520  be16_add_cpu(&node1->hdr.count, count);
521  xfs_trans_log_buf(tp, blk1->bp,
522  XFS_DA_LOGRANGE(node1, btree_d, tmp));
523 
524  /*
525  * Move elements in node2 down to fill the hole.
526  */
527  tmp = be16_to_cpu(node2->hdr.count) - count;
528  tmp *= (uint)sizeof(xfs_da_node_entry_t);
529  btree_s = &node2->btree[count];
530  btree_d = &node2->btree[0];
531  memmove(btree_d, btree_s, tmp);
532  be16_add_cpu(&node2->hdr.count, -count);
533  }
534 
535  /*
536  * Log header of node 1 and all current bits of node 2.
537  */
538  xfs_trans_log_buf(tp, blk1->bp,
539  XFS_DA_LOGRANGE(node1, &node1->hdr, sizeof(node1->hdr)));
540  xfs_trans_log_buf(tp, blk2->bp,
541  XFS_DA_LOGRANGE(node2, &node2->hdr,
542  sizeof(node2->hdr) +
543  sizeof(node2->btree[0]) * be16_to_cpu(node2->hdr.count)));
544 
545  /*
546  * Record the last hashval from each block for upward propagation.
547  * (note: don't use the swapped node pointers)
548  */
549  node1 = blk1->bp->b_addr;
550  node2 = blk2->bp->b_addr;
551  blk1->hashval = be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval);
552  blk2->hashval = be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval);
553 
554  /*
555  * Adjust the expected index for insertion.
556  */
557  if (blk1->index >= be16_to_cpu(node1->hdr.count)) {
558  blk2->index = blk1->index - be16_to_cpu(node1->hdr.count);
559  blk1->index = be16_to_cpu(node1->hdr.count) + 1; /* make it invalid */
560  }
561 }
562 
563 /*
564  * Add a new entry to an intermediate node.
565  */
566 STATIC void
568  xfs_da_state_blk_t *newblk)
569 {
571  xfs_da_node_entry_t *btree;
572  int tmp;
573 
574  trace_xfs_da_node_add(state->args);
575 
576  node = oldblk->bp->b_addr;
577  ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
578  ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
579  ASSERT(newblk->blkno != 0);
580  if (state->args->whichfork == XFS_DATA_FORK)
581  ASSERT(newblk->blkno >= state->mp->m_dirleafblk &&
582  newblk->blkno < state->mp->m_dirfreeblk);
583 
584  /*
585  * We may need to make some room before we insert the new node.
586  */
587  tmp = 0;
588  btree = &node->btree[ oldblk->index ];
589  if (oldblk->index < be16_to_cpu(node->hdr.count)) {
590  tmp = (be16_to_cpu(node->hdr.count) - oldblk->index) * (uint)sizeof(*btree);
591  memmove(btree + 1, btree, tmp);
592  }
593  btree->hashval = cpu_to_be32(newblk->hashval);
594  btree->before = cpu_to_be32(newblk->blkno);
595  xfs_trans_log_buf(state->args->trans, oldblk->bp,
596  XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree)));
597  be16_add_cpu(&node->hdr.count, 1);
598  xfs_trans_log_buf(state->args->trans, oldblk->bp,
599  XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
600 
601  /*
602  * Copy the last hash value from the oldblk to propagate upwards.
603  */
604  oldblk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1 ].hashval);
605 }
606 
607 /*========================================================================
608  * Routines used for shrinking the Btree.
609  *========================================================================*/
610 
611 /*
612  * Deallocate an empty leaf node, remove it from its parent,
613  * possibly deallocating that block, etc...
614  */
615 int
617 {
618  xfs_da_state_blk_t *drop_blk, *save_blk;
619  int action, error;
620 
621  trace_xfs_da_join(state->args);
622 
623  action = 0;
624  drop_blk = &state->path.blk[ state->path.active-1 ];
625  save_blk = &state->altpath.blk[ state->path.active-1 ];
626  ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
627  ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
628  drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
629 
630  /*
631  * Walk back up the tree joining/deallocating as necessary.
632  * When we stop dropping blocks, break out.
633  */
634  for ( ; state->path.active >= 2; drop_blk--, save_blk--,
635  state->path.active--) {
636  /*
637  * See if we can combine the block with a neighbor.
638  * (action == 0) => no options, just leave
639  * (action == 1) => coalesce, then unlink
640  * (action == 2) => block empty, unlink it
641  */
642  switch (drop_blk->magic) {
643  case XFS_ATTR_LEAF_MAGIC:
644  error = xfs_attr_leaf_toosmall(state, &action);
645  if (error)
646  return(error);
647  if (action == 0)
648  return(0);
649  xfs_attr_leaf_unbalance(state, drop_blk, save_blk);
650  break;
652  error = xfs_dir2_leafn_toosmall(state, &action);
653  if (error)
654  return error;
655  if (action == 0)
656  return 0;
657  xfs_dir2_leafn_unbalance(state, drop_blk, save_blk);
658  break;
659  case XFS_DA_NODE_MAGIC:
660  /*
661  * Remove the offending node, fixup hashvals,
662  * check for a toosmall neighbor.
663  */
664  xfs_da_node_remove(state, drop_blk);
665  xfs_da_fixhashpath(state, &state->path);
666  error = xfs_da_node_toosmall(state, &action);
667  if (error)
668  return(error);
669  if (action == 0)
670  return 0;
671  xfs_da_node_unbalance(state, drop_blk, save_blk);
672  break;
673  }
674  xfs_da_fixhashpath(state, &state->altpath);
675  error = xfs_da_blk_unlink(state, drop_blk, save_blk);
677  if (error)
678  return(error);
679  error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
680  drop_blk->bp);
681  drop_blk->bp = NULL;
682  if (error)
683  return(error);
684  }
685  /*
686  * We joined all the way to the top. If it turns out that
687  * we only have one entry in the root, make the child block
688  * the new root.
689  */
690  xfs_da_node_remove(state, drop_blk);
691  xfs_da_fixhashpath(state, &state->path);
692  error = xfs_da_root_join(state, &state->path.blk[0]);
693  return(error);
694 }
695 
696 #ifdef DEBUG
697 static void
699 {
700  __be16 magic = blkinfo->magic;
701 
702  if (level == 1) {
704  magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
705  } else
707  ASSERT(!blkinfo->forw);
708  ASSERT(!blkinfo->back);
709 }
710 #else /* !DEBUG */
711 #define xfs_da_blkinfo_onlychild_validate(blkinfo, level)
712 #endif /* !DEBUG */
713 
714 /*
715  * We have only one entry in the root. Copy the only remaining child of
716  * the old root to block 0 as the new root node.
717  */
718 STATIC int
720 {
721  xfs_da_intnode_t *oldroot;
724  struct xfs_buf *bp;
725  int error;
726 
727  trace_xfs_da_root_join(state->args);
728 
729  args = state->args;
730  ASSERT(args != NULL);
731  ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
732  oldroot = root_blk->bp->b_addr;
733  ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
734  ASSERT(!oldroot->hdr.info.forw);
735  ASSERT(!oldroot->hdr.info.back);
736 
737  /*
738  * If the root has more than one child, then don't do anything.
739  */
740  if (be16_to_cpu(oldroot->hdr.count) > 1)
741  return(0);
742 
743  /*
744  * Read in the (only) child block, then copy those bytes into
745  * the root block's buffer and free the original child block.
746  */
747  child = be32_to_cpu(oldroot->btree[0].before);
748  ASSERT(child != 0);
749  error = xfs_da_read_buf(args->trans, args->dp, child, -1, &bp,
750  args->whichfork);
751  if (error)
752  return(error);
753  ASSERT(bp != NULL);
755  be16_to_cpu(oldroot->hdr.level));
756 
757  memcpy(root_blk->bp->b_addr, bp->b_addr, state->blocksize);
758  xfs_trans_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
759  error = xfs_da_shrink_inode(args, child, bp);
760  return(error);
761 }
762 
763 /*
764  * Check a node block and its neighbors to see if the block should be
765  * collapsed into one or the other neighbor. Always keep the block
766  * with the smaller block number.
767  * If the current block is over 50% full, don't try to join it, return 0.
768  * If the block is empty, fill in the state structure and return 2.
769  * If it can be collapsed, fill in the state structure and return 1.
770  * If nothing can be done, return 0.
771  */
772 STATIC int
774 {
778  int count, forward, error, retval, i;
779  xfs_dablk_t blkno;
780  struct xfs_buf *bp;
781 
782  /*
783  * Check for the degenerate case of the block being over 50% full.
784  * If so, it's not worth even looking to see if we might be able
785  * to coalesce with a sibling.
786  */
787  blk = &state->path.blk[ state->path.active-1 ];
788  info = blk->bp->b_addr;
790  node = (xfs_da_intnode_t *)info;
791  count = be16_to_cpu(node->hdr.count);
792  if (count > (state->node_ents >> 1)) {
793  *action = 0; /* blk over 50%, don't try to join */
794  return(0); /* blk over 50%, don't try to join */
795  }
796 
797  /*
798  * Check for the degenerate case of the block being empty.
799  * If the block is empty, we'll simply delete it, no need to
800  * coalesce it with a sibling block. We choose (arbitrarily)
801  * to merge with the forward block unless it is NULL.
802  */
803  if (count == 0) {
804  /*
805  * Make altpath point to the block we want to keep and
806  * path point to the block we want to drop (this one).
807  */
808  forward = (info->forw != 0);
809  memcpy(&state->altpath, &state->path, sizeof(state->path));
810  error = xfs_da_path_shift(state, &state->altpath, forward,
811  0, &retval);
812  if (error)
813  return(error);
814  if (retval) {
815  *action = 0;
816  } else {
817  *action = 2;
818  }
819  return(0);
820  }
821 
822  /*
823  * Examine each sibling block to see if we can coalesce with
824  * at least 25% free space to spare. We need to figure out
825  * whether to merge with the forward or the backward block.
826  * We prefer coalescing with the lower numbered sibling so as
827  * to shrink a directory over time.
828  */
829  /* start with smaller blk num */
830  forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back));
831  for (i = 0; i < 2; forward = !forward, i++) {
832  if (forward)
833  blkno = be32_to_cpu(info->forw);
834  else
835  blkno = be32_to_cpu(info->back);
836  if (blkno == 0)
837  continue;
838  error = xfs_da_read_buf(state->args->trans, state->args->dp,
839  blkno, -1, &bp, state->args->whichfork);
840  if (error)
841  return(error);
842  ASSERT(bp != NULL);
843 
844  node = (xfs_da_intnode_t *)info;
845  count = state->node_ents;
846  count -= state->node_ents >> 2;
847  count -= be16_to_cpu(node->hdr.count);
848  node = bp->b_addr;
849  ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
850  count -= be16_to_cpu(node->hdr.count);
851  xfs_trans_brelse(state->args->trans, bp);
852  if (count >= 0)
853  break; /* fits with at least 25% to spare */
854  }
855  if (i >= 2) {
856  *action = 0;
857  return(0);
858  }
859 
860  /*
861  * Make altpath point to the block we want to keep (the lower
862  * numbered block) and path point to the block we want to drop.
863  */
864  memcpy(&state->altpath, &state->path, sizeof(state->path));
865  if (blkno < blk->blkno) {
866  error = xfs_da_path_shift(state, &state->altpath, forward,
867  0, &retval);
868  if (error) {
869  return(error);
870  }
871  if (retval) {
872  *action = 0;
873  return(0);
874  }
875  } else {
876  error = xfs_da_path_shift(state, &state->path, forward,
877  0, &retval);
878  if (error) {
879  return(error);
880  }
881  if (retval) {
882  *action = 0;
883  return(0);
884  }
885  }
886  *action = 1;
887  return(0);
888 }
889 
890 /*
891  * Walk back up the tree adjusting hash values as necessary,
892  * when we stop making changes, return.
893  */
894 void
896 {
899  xfs_da_node_entry_t *btree;
900  xfs_dahash_t lasthash=0;
901  int level, count;
902 
903  level = path->active-1;
904  blk = &path->blk[ level ];
905  switch (blk->magic) {
906  case XFS_ATTR_LEAF_MAGIC:
907  lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
908  if (count == 0)
909  return;
910  break;
912  lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count);
913  if (count == 0)
914  return;
915  break;
916  case XFS_DA_NODE_MAGIC:
917  lasthash = xfs_da_node_lasthash(blk->bp, &count);
918  if (count == 0)
919  return;
920  break;
921  }
922  for (blk--, level--; level >= 0; blk--, level--) {
923  node = blk->bp->b_addr;
924  ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
925  btree = &node->btree[ blk->index ];
926  if (be32_to_cpu(btree->hashval) == lasthash)
927  break;
928  blk->hashval = lasthash;
929  btree->hashval = cpu_to_be32(lasthash);
930  xfs_trans_log_buf(state->args->trans, blk->bp,
931  XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
932 
933  lasthash = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
934  }
935 }
936 
937 /*
938  * Remove an entry from an intermediate node.
939  */
940 STATIC void
942 {
944  xfs_da_node_entry_t *btree;
945  int tmp;
946 
947  trace_xfs_da_node_remove(state->args);
948 
949  node = drop_blk->bp->b_addr;
950  ASSERT(drop_blk->index < be16_to_cpu(node->hdr.count));
951  ASSERT(drop_blk->index >= 0);
952 
953  /*
954  * Copy over the offending entry, or just zero it out.
955  */
956  btree = &node->btree[drop_blk->index];
957  if (drop_blk->index < (be16_to_cpu(node->hdr.count)-1)) {
958  tmp = be16_to_cpu(node->hdr.count) - drop_blk->index - 1;
959  tmp *= (uint)sizeof(xfs_da_node_entry_t);
960  memmove(btree, btree + 1, tmp);
961  xfs_trans_log_buf(state->args->trans, drop_blk->bp,
962  XFS_DA_LOGRANGE(node, btree, tmp));
963  btree = &node->btree[be16_to_cpu(node->hdr.count)-1];
964  }
965  memset((char *)btree, 0, sizeof(xfs_da_node_entry_t));
966  xfs_trans_log_buf(state->args->trans, drop_blk->bp,
967  XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
968  be16_add_cpu(&node->hdr.count, -1);
969  xfs_trans_log_buf(state->args->trans, drop_blk->bp,
970  XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
971 
972  /*
973  * Copy the last hash value from the block to propagate upwards.
974  */
975  btree--;
976  drop_blk->hashval = be32_to_cpu(btree->hashval);
977 }
978 
979 /*
980  * Unbalance the btree elements between two intermediate nodes,
981  * move all Btree elements from one node into another.
982  */
983 STATIC void
985  xfs_da_state_blk_t *save_blk)
986 {
987  xfs_da_intnode_t *drop_node, *save_node;
988  xfs_da_node_entry_t *btree;
989  int tmp;
990  xfs_trans_t *tp;
991 
992  trace_xfs_da_node_unbalance(state->args);
993 
994  drop_node = drop_blk->bp->b_addr;
995  save_node = save_blk->bp->b_addr;
996  ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
997  ASSERT(save_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
998  tp = state->args->trans;
999 
1000  /*
1001  * If the dying block has lower hashvals, then move all the
1002  * elements in the remaining block up to make a hole.
1003  */
1004  if ((be32_to_cpu(drop_node->btree[0].hashval) < be32_to_cpu(save_node->btree[ 0 ].hashval)) ||
1005  (be32_to_cpu(drop_node->btree[be16_to_cpu(drop_node->hdr.count)-1].hashval) <
1006  be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval)))
1007  {
1008  btree = &save_node->btree[be16_to_cpu(drop_node->hdr.count)];
1009  tmp = be16_to_cpu(save_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t);
1010  memmove(btree, &save_node->btree[0], tmp);
1011  btree = &save_node->btree[0];
1012  xfs_trans_log_buf(tp, save_blk->bp,
1013  XFS_DA_LOGRANGE(save_node, btree,
1014  (be16_to_cpu(save_node->hdr.count) + be16_to_cpu(drop_node->hdr.count)) *
1015  sizeof(xfs_da_node_entry_t)));
1016  } else {
1017  btree = &save_node->btree[be16_to_cpu(save_node->hdr.count)];
1018  xfs_trans_log_buf(tp, save_blk->bp,
1019  XFS_DA_LOGRANGE(save_node, btree,
1020  be16_to_cpu(drop_node->hdr.count) *
1021  sizeof(xfs_da_node_entry_t)));
1022  }
1023 
1024  /*
1025  * Move all the B-tree elements from drop_blk to save_blk.
1026  */
1027  tmp = be16_to_cpu(drop_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t);
1028  memcpy(btree, &drop_node->btree[0], tmp);
1029  be16_add_cpu(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count));
1030 
1031  xfs_trans_log_buf(tp, save_blk->bp,
1032  XFS_DA_LOGRANGE(save_node, &save_node->hdr,
1033  sizeof(save_node->hdr)));
1034 
1035  /*
1036  * Save the last hashval in the remaining block for upward propagation.
1037  */
1038  save_blk->hashval = be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval);
1039 }
1040 
1041 /*========================================================================
1042  * Routines used for finding things in the Btree.
1043  *========================================================================*/
1044 
1045 /*
1046  * Walk down the Btree looking for a particular filename, filling
1047  * in the state structure as we go.
1048  *
1049  * We will set the state structure to point to each of the elements
1050  * in each of the nodes where either the hashval is or should be.
1051  *
1052  * We support duplicate hashval's so for each entry in the current
1053  * node that could contain the desired hashval, descend. This is a
1054  * pruned depth-first tree search.
1055  */
1056 int /* error */
1058 {
1062  xfs_da_node_entry_t *btree;
1063  xfs_dablk_t blkno;
1064  int probe, span, max, error, retval;
1065  xfs_dahash_t hashval, btreehashval;
1067 
1068  args = state->args;
1069 
1070  /*
1071  * Descend thru the B-tree searching each level for the right
1072  * node to use, until the right hashval is found.
1073  */
1074  blkno = (args->whichfork == XFS_DATA_FORK)? state->mp->m_dirleafblk : 0;
1075  for (blk = &state->path.blk[0], state->path.active = 1;
1076  state->path.active <= XFS_DA_NODE_MAXDEPTH;
1077  blk++, state->path.active++) {
1078  /*
1079  * Read the next node down in the tree.
1080  */
1081  blk->blkno = blkno;
1082  error = xfs_da_read_buf(args->trans, args->dp, blkno,
1083  -1, &blk->bp, args->whichfork);
1084  if (error) {
1085  blk->blkno = 0;
1086  state->path.active--;
1087  return(error);
1088  }
1089  curr = blk->bp->b_addr;
1090  blk->magic = be16_to_cpu(curr->magic);
1091  ASSERT(blk->magic == XFS_DA_NODE_MAGIC ||
1092  blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1093  blk->magic == XFS_ATTR_LEAF_MAGIC);
1094 
1095  /*
1096  * Search an intermediate node for a match.
1097  */
1098  if (blk->magic == XFS_DA_NODE_MAGIC) {
1099  node = blk->bp->b_addr;
1100  max = be16_to_cpu(node->hdr.count);
1101  blk->hashval = be32_to_cpu(node->btree[max-1].hashval);
1102 
1103  /*
1104  * Binary search. (note: small blocks will skip loop)
1105  */
1106  probe = span = max / 2;
1107  hashval = args->hashval;
1108  for (btree = &node->btree[probe]; span > 4;
1109  btree = &node->btree[probe]) {
1110  span /= 2;
1111  btreehashval = be32_to_cpu(btree->hashval);
1112  if (btreehashval < hashval)
1113  probe += span;
1114  else if (btreehashval > hashval)
1115  probe -= span;
1116  else
1117  break;
1118  }
1119  ASSERT((probe >= 0) && (probe < max));
1120  ASSERT((span <= 4) || (be32_to_cpu(btree->hashval) == hashval));
1121 
1122  /*
1123  * Since we may have duplicate hashval's, find the first
1124  * matching hashval in the node.
1125  */
1126  while ((probe > 0) && (be32_to_cpu(btree->hashval) >= hashval)) {
1127  btree--;
1128  probe--;
1129  }
1130  while ((probe < max) && (be32_to_cpu(btree->hashval) < hashval)) {
1131  btree++;
1132  probe++;
1133  }
1134 
1135  /*
1136  * Pick the right block to descend on.
1137  */
1138  if (probe == max) {
1139  blk->index = max-1;
1140  blkno = be32_to_cpu(node->btree[max-1].before);
1141  } else {
1142  blk->index = probe;
1143  blkno = be32_to_cpu(btree->before);
1144  }
1145  } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1146  blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
1147  break;
1148  } else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1149  blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
1150  break;
1151  }
1152  }
1153 
1154  /*
1155  * A leaf block that ends in the hashval that we are interested in
1156  * (final hashval == search hashval) means that the next block may
1157  * contain more entries with the same hashval, shift upward to the
1158  * next leaf and keep searching.
1159  */
1160  for (;;) {
1161  if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1162  retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
1163  &blk->index, state);
1164  } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1165  retval = xfs_attr_leaf_lookup_int(blk->bp, args);
1166  blk->index = args->index;
1167  args->blkno = blk->blkno;
1168  } else {
1169  ASSERT(0);
1170  return XFS_ERROR(EFSCORRUPTED);
1171  }
1172  if (((retval == ENOENT) || (retval == ENOATTR)) &&
1173  (blk->hashval == args->hashval)) {
1174  error = xfs_da_path_shift(state, &state->path, 1, 1,
1175  &retval);
1176  if (error)
1177  return(error);
1178  if (retval == 0) {
1179  continue;
1180  } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1181  /* path_shift() gives ENOENT */
1182  retval = XFS_ERROR(ENOATTR);
1183  }
1184  }
1185  break;
1186  }
1187  *result = retval;
1188  return(0);
1189 }
1190 
1191 /*========================================================================
1192  * Utility routines.
1193  *========================================================================*/
1194 
1195 /*
1196  * Link a new block into a doubly linked list of blocks (of whatever type).
1197  */
1198 int /* error */
1200  xfs_da_state_blk_t *new_blk)
1201 {
1202  xfs_da_blkinfo_t *old_info, *new_info, *tmp_info;
1204  int before=0, error;
1205  struct xfs_buf *bp;
1206 
1207  /*
1208  * Set up environment.
1209  */
1210  args = state->args;
1211  ASSERT(args != NULL);
1212  old_info = old_blk->bp->b_addr;
1213  new_info = new_blk->bp->b_addr;
1214  ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
1215  old_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1216  old_blk->magic == XFS_ATTR_LEAF_MAGIC);
1217  ASSERT(old_blk->magic == be16_to_cpu(old_info->magic));
1218  ASSERT(new_blk->magic == be16_to_cpu(new_info->magic));
1219  ASSERT(old_blk->magic == new_blk->magic);
1220 
1221  switch (old_blk->magic) {
1222  case XFS_ATTR_LEAF_MAGIC:
1223  before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
1224  break;
1225  case XFS_DIR2_LEAFN_MAGIC:
1226  before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp);
1227  break;
1228  case XFS_DA_NODE_MAGIC:
1229  before = xfs_da_node_order(old_blk->bp, new_blk->bp);
1230  break;
1231  }
1232 
1233  /*
1234  * Link blocks in appropriate order.
1235  */
1236  if (before) {
1237  /*
1238  * Link new block in before existing block.
1239  */
1240  trace_xfs_da_link_before(args);
1241  new_info->forw = cpu_to_be32(old_blk->blkno);
1242  new_info->back = old_info->back;
1243  if (old_info->back) {
1244  error = xfs_da_read_buf(args->trans, args->dp,
1245  be32_to_cpu(old_info->back),
1246  -1, &bp, args->whichfork);
1247  if (error)
1248  return(error);
1249  ASSERT(bp != NULL);
1250  tmp_info = bp->b_addr;
1251  ASSERT(be16_to_cpu(tmp_info->magic) == be16_to_cpu(old_info->magic));
1252  ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno);
1253  tmp_info->forw = cpu_to_be32(new_blk->blkno);
1254  xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1255  }
1256  old_info->back = cpu_to_be32(new_blk->blkno);
1257  } else {
1258  /*
1259  * Link new block in after existing block.
1260  */
1261  trace_xfs_da_link_after(args);
1262  new_info->forw = old_info->forw;
1263  new_info->back = cpu_to_be32(old_blk->blkno);
1264  if (old_info->forw) {
1265  error = xfs_da_read_buf(args->trans, args->dp,
1266  be32_to_cpu(old_info->forw),
1267  -1, &bp, args->whichfork);
1268  if (error)
1269  return(error);
1270  ASSERT(bp != NULL);
1271  tmp_info = bp->b_addr;
1272  ASSERT(tmp_info->magic == old_info->magic);
1273  ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno);
1274  tmp_info->back = cpu_to_be32(new_blk->blkno);
1275  xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1276  }
1277  old_info->forw = cpu_to_be32(new_blk->blkno);
1278  }
1279 
1280  xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
1281  xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
1282  return(0);
1283 }
1284 
1285 /*
1286  * Compare two intermediate nodes for "order".
1287  */
1288 STATIC int
1290  struct xfs_buf *node1_bp,
1291  struct xfs_buf *node2_bp)
1292 {
1293  xfs_da_intnode_t *node1, *node2;
1294 
1295  node1 = node1_bp->b_addr;
1296  node2 = node2_bp->b_addr;
1297  ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) &&
1298  node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
1299  if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
1300  ((be32_to_cpu(node2->btree[0].hashval) <
1301  be32_to_cpu(node1->btree[0].hashval)) ||
1302  (be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval) <
1303  be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval)))) {
1304  return(1);
1305  }
1306  return(0);
1307 }
1308 
1309 /*
1310  * Pick up the last hashvalue from an intermediate node.
1311  */
1312 STATIC uint
1314  struct xfs_buf *bp,
1315  int *count)
1316 {
1318 
1319  node = bp->b_addr;
1320  ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
1321  if (count)
1322  *count = be16_to_cpu(node->hdr.count);
1323  if (!node->hdr.count)
1324  return(0);
1325  return be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
1326 }
1327 
1328 /*
1329  * Unlink a block from a doubly linked list of blocks.
1330  */
1331 STATIC int /* error */
1333  xfs_da_state_blk_t *save_blk)
1334 {
1335  xfs_da_blkinfo_t *drop_info, *save_info, *tmp_info;
1337  struct xfs_buf *bp;
1338  int error;
1339 
1340  /*
1341  * Set up environment.
1342  */
1343  args = state->args;
1344  ASSERT(args != NULL);
1345  save_info = save_blk->bp->b_addr;
1346  drop_info = drop_blk->bp->b_addr;
1347  ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
1348  save_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1349  save_blk->magic == XFS_ATTR_LEAF_MAGIC);
1350  ASSERT(save_blk->magic == be16_to_cpu(save_info->magic));
1351  ASSERT(drop_blk->magic == be16_to_cpu(drop_info->magic));
1352  ASSERT(save_blk->magic == drop_blk->magic);
1353  ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) ||
1354  (be32_to_cpu(save_info->back) == drop_blk->blkno));
1355  ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) ||
1356  (be32_to_cpu(drop_info->back) == save_blk->blkno));
1357 
1358  /*
1359  * Unlink the leaf block from the doubly linked chain of leaves.
1360  */
1361  if (be32_to_cpu(save_info->back) == drop_blk->blkno) {
1362  trace_xfs_da_unlink_back(args);
1363  save_info->back = drop_info->back;
1364  if (drop_info->back) {
1365  error = xfs_da_read_buf(args->trans, args->dp,
1366  be32_to_cpu(drop_info->back),
1367  -1, &bp, args->whichfork);
1368  if (error)
1369  return(error);
1370  ASSERT(bp != NULL);
1371  tmp_info = bp->b_addr;
1372  ASSERT(tmp_info->magic == save_info->magic);
1373  ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno);
1374  tmp_info->forw = cpu_to_be32(save_blk->blkno);
1375  xfs_trans_log_buf(args->trans, bp, 0,
1376  sizeof(*tmp_info) - 1);
1377  }
1378  } else {
1379  trace_xfs_da_unlink_forward(args);
1380  save_info->forw = drop_info->forw;
1381  if (drop_info->forw) {
1382  error = xfs_da_read_buf(args->trans, args->dp,
1383  be32_to_cpu(drop_info->forw),
1384  -1, &bp, args->whichfork);
1385  if (error)
1386  return(error);
1387  ASSERT(bp != NULL);
1388  tmp_info = bp->b_addr;
1389  ASSERT(tmp_info->magic == save_info->magic);
1390  ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno);
1391  tmp_info->back = cpu_to_be32(save_blk->blkno);
1392  xfs_trans_log_buf(args->trans, bp, 0,
1393  sizeof(*tmp_info) - 1);
1394  }
1395  }
1396 
1397  xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
1398  return(0);
1399 }
1400 
1401 /*
1402  * Move a path "forward" or "!forward" one block at the current level.
1403  *
1404  * This routine will adjust a "path" to point to the next block
1405  * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the
1406  * Btree, including updating pointers to the intermediate nodes between
1407  * the new bottom and the root.
1408  */
1409 int /* error */
1411  int forward, int release, int *result)
1412 {
1417  xfs_dablk_t blkno=0;
1418  int level, error;
1419 
1420  /*
1421  * Roll up the Btree looking for the first block where our
1422  * current index is not at the edge of the block. Note that
1423  * we skip the bottom layer because we want the sibling block.
1424  */
1425  args = state->args;
1426  ASSERT(args != NULL);
1427  ASSERT(path != NULL);
1428  ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1429  level = (path->active-1) - 1; /* skip bottom layer in path */
1430  for (blk = &path->blk[level]; level >= 0; blk--, level--) {
1431  ASSERT(blk->bp != NULL);
1432  node = blk->bp->b_addr;
1433  ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
1434  if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) {
1435  blk->index++;
1436  blkno = be32_to_cpu(node->btree[blk->index].before);
1437  break;
1438  } else if (!forward && (blk->index > 0)) {
1439  blk->index--;
1440  blkno = be32_to_cpu(node->btree[blk->index].before);
1441  break;
1442  }
1443  }
1444  if (level < 0) {
1445  *result = XFS_ERROR(ENOENT); /* we're out of our tree */
1447  return(0);
1448  }
1449 
1450  /*
1451  * Roll down the edge of the subtree until we reach the
1452  * same depth we were at originally.
1453  */
1454  for (blk++, level++; level < path->active; blk++, level++) {
1455  /*
1456  * Release the old block.
1457  * (if it's dirty, trans won't actually let go)
1458  */
1459  if (release)
1460  xfs_trans_brelse(args->trans, blk->bp);
1461 
1462  /*
1463  * Read the next child block.
1464  */
1465  blk->blkno = blkno;
1466  error = xfs_da_read_buf(args->trans, args->dp, blkno, -1,
1467  &blk->bp, args->whichfork);
1468  if (error)
1469  return(error);
1470  ASSERT(blk->bp != NULL);
1471  info = blk->bp->b_addr;
1475  blk->magic = be16_to_cpu(info->magic);
1476  if (blk->magic == XFS_DA_NODE_MAGIC) {
1477  node = (xfs_da_intnode_t *)info;
1478  blk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
1479  if (forward)
1480  blk->index = 0;
1481  else
1482  blk->index = be16_to_cpu(node->hdr.count)-1;
1483  blkno = be32_to_cpu(node->btree[blk->index].before);
1484  } else {
1485  ASSERT(level == path->active-1);
1486  blk->index = 0;
1487  switch(blk->magic) {
1488  case XFS_ATTR_LEAF_MAGIC:
1489  blk->hashval = xfs_attr_leaf_lasthash(blk->bp,
1490  NULL);
1491  break;
1492  case XFS_DIR2_LEAFN_MAGIC:
1493  blk->hashval = xfs_dir2_leafn_lasthash(blk->bp,
1494  NULL);
1495  break;
1496  default:
1497  ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC ||
1498  blk->magic == XFS_DIR2_LEAFN_MAGIC);
1499  break;
1500  }
1501  }
1502  }
1503  *result = 0;
1504  return(0);
1505 }
1506 
1507 
1508 /*========================================================================
1509  * Utility routines.
1510  *========================================================================*/
1511 
1512 /*
1513  * Implement a simple hash on a character string.
1514  * Rotate the hash value by 7 bits, then XOR each character in.
1515  * This is implemented with some source-level loop unrolling.
1516  */
1518 xfs_da_hashname(const __uint8_t *name, int namelen)
1519 {
1521 
1522  /*
1523  * Do four characters at a time as long as we can.
1524  */
1525  for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
1526  hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
1527  (name[3] << 0) ^ rol32(hash, 7 * 4);
1528 
1529  /*
1530  * Now do the rest of the characters.
1531  */
1532  switch (namelen) {
1533  case 3:
1534  return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
1535  rol32(hash, 7 * 3);
1536  case 2:
1537  return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
1538  case 1:
1539  return (name[0] << 0) ^ rol32(hash, 7 * 1);
1540  default: /* case 0: */
1541  return hash;
1542  }
1543 }
1544 
1545 enum xfs_dacmp
1547  struct xfs_da_args *args,
1548  const unsigned char *name,
1549  int len)
1550 {
1551  return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
1553 }
1554 
1555 static xfs_dahash_t
1556 xfs_default_hashname(
1557  struct xfs_name *name)
1558 {
1559  return xfs_da_hashname(name->name, name->len);
1560 }
1561 
1563  .hashname = xfs_default_hashname,
1564  .compname = xfs_da_compname
1565 };
1566 
1567 int
1569  struct xfs_da_args *args,
1570  xfs_fileoff_t *bno,
1571  int count)
1572 {
1573  struct xfs_trans *tp = args->trans;
1574  struct xfs_inode *dp = args->dp;
1575  int w = args->whichfork;
1576  xfs_drfsbno_t nblks = dp->i_d.di_nblocks;
1577  struct xfs_bmbt_irec map, *mapp;
1578  int nmap, error, got, i, mapi;
1579 
1580  /*
1581  * Find a spot in the file space to put the new block.
1582  */
1583  error = xfs_bmap_first_unused(tp, dp, count, bno, w);
1584  if (error)
1585  return error;
1586 
1587  /*
1588  * Try mapping it in one filesystem block.
1589  */
1590  nmap = 1;
1591  ASSERT(args->firstblock != NULL);
1592  error = xfs_bmapi_write(tp, dp, *bno, count,
1593  xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
1594  args->firstblock, args->total, &map, &nmap,
1595  args->flist);
1596  if (error)
1597  return error;
1598 
1599  ASSERT(nmap <= 1);
1600  if (nmap == 1) {
1601  mapp = &map;
1602  mapi = 1;
1603  } else if (nmap == 0 && count > 1) {
1604  xfs_fileoff_t b;
1605  int c;
1606 
1607  /*
1608  * If we didn't get it and the block might work if fragmented,
1609  * try without the CONTIG flag. Loop until we get it all.
1610  */
1611  mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
1612  for (b = *bno, mapi = 0; b < *bno + count; ) {
1613  nmap = MIN(XFS_BMAP_MAX_NMAP, count);
1614  c = (int)(*bno + count - b);
1615  error = xfs_bmapi_write(tp, dp, b, c,
1616  xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
1617  args->firstblock, args->total,
1618  &mapp[mapi], &nmap, args->flist);
1619  if (error)
1620  goto out_free_map;
1621  if (nmap < 1)
1622  break;
1623  mapi += nmap;
1624  b = mapp[mapi - 1].br_startoff +
1625  mapp[mapi - 1].br_blockcount;
1626  }
1627  } else {
1628  mapi = 0;
1629  mapp = NULL;
1630  }
1631 
1632  /*
1633  * Count the blocks we got, make sure it matches the total.
1634  */
1635  for (i = 0, got = 0; i < mapi; i++)
1636  got += mapp[i].br_blockcount;
1637  if (got != count || mapp[0].br_startoff != *bno ||
1638  mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
1639  *bno + count) {
1640  error = XFS_ERROR(ENOSPC);
1641  goto out_free_map;
1642  }
1643 
1644  /* account for newly allocated blocks in reserved blocks total */
1645  args->total -= dp->i_d.di_nblocks - nblks;
1646 
1647 out_free_map:
1648  if (mapp != &map)
1649  kmem_free(mapp);
1650  return error;
1651 }
1652 
1653 /*
1654  * Add a block to the btree ahead of the file.
1655  * Return the new block number to the caller.
1656  */
1657 int
1659  struct xfs_da_args *args,
1660  xfs_dablk_t *new_blkno)
1661 {
1662  xfs_fileoff_t bno;
1663  int count;
1664  int error;
1665 
1666  trace_xfs_da_grow_inode(args);
1667 
1668  if (args->whichfork == XFS_DATA_FORK) {
1669  bno = args->dp->i_mount->m_dirleafblk;
1670  count = args->dp->i_mount->m_dirblkfsbs;
1671  } else {
1672  bno = 0;
1673  count = 1;
1674  }
1675 
1676  error = xfs_da_grow_inode_int(args, &bno, count);
1677  if (!error)
1678  *new_blkno = (xfs_dablk_t)bno;
1679  return error;
1680 }
1681 
1682 /*
1683  * Ick. We need to always be able to remove a btree block, even
1684  * if there's no space reservation because the filesystem is full.
1685  * This is called if xfs_bunmapi on a btree block fails due to ENOSPC.
1686  * It swaps the target block with the last block in the file. The
1687  * last block in the file can always be removed since it can't cause
1688  * a bmap btree split to do that.
1689  */
1690 STATIC int
1692  xfs_da_args_t *args,
1693  xfs_dablk_t *dead_blknop,
1694  struct xfs_buf **dead_bufp)
1695 {
1696  xfs_dablk_t dead_blkno, last_blkno, sib_blkno, par_blkno;
1697  struct xfs_buf *dead_buf, *last_buf, *sib_buf, *par_buf;
1698  xfs_fileoff_t lastoff;
1699  xfs_inode_t *ip;
1700  xfs_trans_t *tp;
1701  xfs_mount_t *mp;
1702  int error, w, entno, level, dead_level;
1703  xfs_da_blkinfo_t *dead_info, *sib_info;
1704  xfs_da_intnode_t *par_node, *dead_node;
1705  xfs_dir2_leaf_t *dead_leaf2;
1706  xfs_dahash_t dead_hash;
1707 
1708  trace_xfs_da_swap_lastblock(args);
1709 
1710  dead_buf = *dead_bufp;
1711  dead_blkno = *dead_blknop;
1712  tp = args->trans;
1713  ip = args->dp;
1714  w = args->whichfork;
1715  ASSERT(w == XFS_DATA_FORK);
1716  mp = ip->i_mount;
1717  lastoff = mp->m_dirfreeblk;
1718  error = xfs_bmap_last_before(tp, ip, &lastoff, w);
1719  if (error)
1720  return error;
1721  if (unlikely(lastoff == 0)) {
1722  XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW,
1723  mp);
1724  return XFS_ERROR(EFSCORRUPTED);
1725  }
1726  /*
1727  * Read the last block in the btree space.
1728  */
1729  last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs;
1730  if ((error = xfs_da_read_buf(tp, ip, last_blkno, -1, &last_buf, w)))
1731  return error;
1732  /*
1733  * Copy the last block into the dead buffer and log it.
1734  */
1735  memcpy(dead_buf->b_addr, last_buf->b_addr, mp->m_dirblksize);
1736  xfs_trans_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1);
1737  dead_info = dead_buf->b_addr;
1738  /*
1739  * Get values from the moved block.
1740  */
1741  if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) {
1742  dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
1743  dead_level = 0;
1744  dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval);
1745  } else {
1746  ASSERT(dead_info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
1747  dead_node = (xfs_da_intnode_t *)dead_info;
1748  dead_level = be16_to_cpu(dead_node->hdr.level);
1749  dead_hash = be32_to_cpu(dead_node->btree[be16_to_cpu(dead_node->hdr.count) - 1].hashval);
1750  }
1751  sib_buf = par_buf = NULL;
1752  /*
1753  * If the moved block has a left sibling, fix up the pointers.
1754  */
1755  if ((sib_blkno = be32_to_cpu(dead_info->back))) {
1756  if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
1757  goto done;
1758  sib_info = sib_buf->b_addr;
1759  if (unlikely(
1760  be32_to_cpu(sib_info->forw) != last_blkno ||
1761  sib_info->magic != dead_info->magic)) {
1762  XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)",
1763  XFS_ERRLEVEL_LOW, mp);
1764  error = XFS_ERROR(EFSCORRUPTED);
1765  goto done;
1766  }
1767  sib_info->forw = cpu_to_be32(dead_blkno);
1768  xfs_trans_log_buf(tp, sib_buf,
1769  XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
1770  sizeof(sib_info->forw)));
1771  sib_buf = NULL;
1772  }
1773  /*
1774  * If the moved block has a right sibling, fix up the pointers.
1775  */
1776  if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
1777  if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
1778  goto done;
1779  sib_info = sib_buf->b_addr;
1780  if (unlikely(
1781  be32_to_cpu(sib_info->back) != last_blkno ||
1782  sib_info->magic != dead_info->magic)) {
1783  XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)",
1784  XFS_ERRLEVEL_LOW, mp);
1785  error = XFS_ERROR(EFSCORRUPTED);
1786  goto done;
1787  }
1788  sib_info->back = cpu_to_be32(dead_blkno);
1789  xfs_trans_log_buf(tp, sib_buf,
1790  XFS_DA_LOGRANGE(sib_info, &sib_info->back,
1791  sizeof(sib_info->back)));
1792  sib_buf = NULL;
1793  }
1794  par_blkno = mp->m_dirleafblk;
1795  level = -1;
1796  /*
1797  * Walk down the tree looking for the parent of the moved block.
1798  */
1799  for (;;) {
1800  if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
1801  goto done;
1802  par_node = par_buf->b_addr;
1803  if (unlikely(par_node->hdr.info.magic !=
1805  (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) {
1806  XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
1807  XFS_ERRLEVEL_LOW, mp);
1808  error = XFS_ERROR(EFSCORRUPTED);
1809  goto done;
1810  }
1811  level = be16_to_cpu(par_node->hdr.level);
1812  for (entno = 0;
1813  entno < be16_to_cpu(par_node->hdr.count) &&
1814  be32_to_cpu(par_node->btree[entno].hashval) < dead_hash;
1815  entno++)
1816  continue;
1817  if (unlikely(entno == be16_to_cpu(par_node->hdr.count))) {
1818  XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)",
1819  XFS_ERRLEVEL_LOW, mp);
1820  error = XFS_ERROR(EFSCORRUPTED);
1821  goto done;
1822  }
1823  par_blkno = be32_to_cpu(par_node->btree[entno].before);
1824  if (level == dead_level + 1)
1825  break;
1826  xfs_trans_brelse(tp, par_buf);
1827  par_buf = NULL;
1828  }
1829  /*
1830  * We're in the right parent block.
1831  * Look for the right entry.
1832  */
1833  for (;;) {
1834  for (;
1835  entno < be16_to_cpu(par_node->hdr.count) &&
1836  be32_to_cpu(par_node->btree[entno].before) != last_blkno;
1837  entno++)
1838  continue;
1839  if (entno < be16_to_cpu(par_node->hdr.count))
1840  break;
1841  par_blkno = be32_to_cpu(par_node->hdr.info.forw);
1842  xfs_trans_brelse(tp, par_buf);
1843  par_buf = NULL;
1844  if (unlikely(par_blkno == 0)) {
1845  XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)",
1846  XFS_ERRLEVEL_LOW, mp);
1847  error = XFS_ERROR(EFSCORRUPTED);
1848  goto done;
1849  }
1850  if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
1851  goto done;
1852  par_node = par_buf->b_addr;
1853  if (unlikely(
1854  be16_to_cpu(par_node->hdr.level) != level ||
1855  par_node->hdr.info.magic != cpu_to_be16(XFS_DA_NODE_MAGIC))) {
1856  XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
1857  XFS_ERRLEVEL_LOW, mp);
1858  error = XFS_ERROR(EFSCORRUPTED);
1859  goto done;
1860  }
1861  entno = 0;
1862  }
1863  /*
1864  * Update the parent entry pointing to the moved block.
1865  */
1866  par_node->btree[entno].before = cpu_to_be32(dead_blkno);
1867  xfs_trans_log_buf(tp, par_buf,
1868  XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before,
1869  sizeof(par_node->btree[entno].before)));
1870  *dead_blknop = last_blkno;
1871  *dead_bufp = last_buf;
1872  return 0;
1873 done:
1874  if (par_buf)
1875  xfs_trans_brelse(tp, par_buf);
1876  if (sib_buf)
1877  xfs_trans_brelse(tp, sib_buf);
1878  xfs_trans_brelse(tp, last_buf);
1879  return error;
1880 }
1881 
1882 /*
1883  * Remove a btree block from a directory or attribute.
1884  */
1885 int
1887  xfs_da_args_t *args,
1888  xfs_dablk_t dead_blkno,
1889  struct xfs_buf *dead_buf)
1890 {
1891  xfs_inode_t *dp;
1892  int done, error, w, count;
1893  xfs_trans_t *tp;
1894  xfs_mount_t *mp;
1895 
1896  trace_xfs_da_shrink_inode(args);
1897 
1898  dp = args->dp;
1899  w = args->whichfork;
1900  tp = args->trans;
1901  mp = dp->i_mount;
1902  if (w == XFS_DATA_FORK)
1903  count = mp->m_dirblkfsbs;
1904  else
1905  count = 1;
1906  for (;;) {
1907  /*
1908  * Remove extents. If we get ENOSPC for a dir we have to move
1909  * the last block to the place we want to kill.
1910  */
1911  if ((error = xfs_bunmapi(tp, dp, dead_blkno, count,
1912  xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
1913  0, args->firstblock, args->flist,
1914  &done)) == ENOSPC) {
1915  if (w != XFS_DATA_FORK)
1916  break;
1917  if ((error = xfs_da_swap_lastblock(args, &dead_blkno,
1918  &dead_buf)))
1919  break;
1920  } else {
1921  break;
1922  }
1923  }
1924  xfs_trans_binval(tp, dead_buf);
1925  return error;
1926 }
1927 
1928 /*
1929  * See if the mapping(s) for this btree block are valid, i.e.
1930  * don't contain holes, are logically contiguous, and cover the whole range.
1931  */
1932 STATIC int
1934  int nmap,
1935  xfs_bmbt_irec_t *mapp,
1936  xfs_dablk_t bno,
1937  int count)
1938 {
1939  int i;
1940  xfs_fileoff_t off;
1941 
1942  for (i = 0, off = bno; i < nmap; i++) {
1943  if (mapp[i].br_startblock == HOLESTARTBLOCK ||
1944  mapp[i].br_startblock == DELAYSTARTBLOCK) {
1945  return 0;
1946  }
1947  if (off != mapp[i].br_startoff) {
1948  return 0;
1949  }
1950  off += mapp[i].br_blockcount;
1951  }
1952  return off == bno + count;
1953 }
1954 
1955 /*
1956  * Convert a struct xfs_bmbt_irec to a struct xfs_buf_map.
1957  *
1958  * For the single map case, it is assumed that the caller has provided a pointer
1959  * to a valid xfs_buf_map. For the multiple map case, this function will
1960  * allocate the xfs_buf_map to hold all the maps and replace the caller's single
1961  * map pointer with the allocated map.
1962  */
1963 static int
1964 xfs_buf_map_from_irec(
1965  struct xfs_mount *mp,
1966  struct xfs_buf_map **mapp,
1967  unsigned int *nmaps,
1968  struct xfs_bmbt_irec *irecs,
1969  unsigned int nirecs)
1970 {
1971  struct xfs_buf_map *map;
1972  int i;
1973 
1974  ASSERT(*nmaps == 1);
1975  ASSERT(nirecs >= 1);
1976 
1977  if (nirecs > 1) {
1978  map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), KM_SLEEP);
1979  if (!map)
1980  return ENOMEM;
1981  *mapp = map;
1982  }
1983 
1984  *nmaps = nirecs;
1985  map = *mapp;
1986  for (i = 0; i < *nmaps; i++) {
1987  ASSERT(irecs[i].br_startblock != DELAYSTARTBLOCK &&
1988  irecs[i].br_startblock != HOLESTARTBLOCK);
1989  map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock);
1990  map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount);
1991  }
1992  return 0;
1993 }
1994 
1995 /*
1996  * Map the block we are given ready for reading. There are three possible return
1997  * values:
1998  * -1 - will be returned if we land in a hole and mappedbno == -2 so the
1999  * caller knows not to execute a subsequent read.
2000  * 0 - if we mapped the block successfully
2001  * >0 - positive error number if there was an error.
2002  */
2003 static int
2004 xfs_dabuf_map(
2005  struct xfs_trans *trans,
2006  struct xfs_inode *dp,
2007  xfs_dablk_t bno,
2008  xfs_daddr_t mappedbno,
2009  int whichfork,
2010  struct xfs_buf_map **map,
2011  int *nmaps)
2012 {
2013  struct xfs_mount *mp = dp->i_mount;
2014  int nfsb;
2015  int error = 0;
2016  struct xfs_bmbt_irec irec;
2017  struct xfs_bmbt_irec *irecs = &irec;
2018  int nirecs;
2019 
2020  ASSERT(map && *map);
2021  ASSERT(*nmaps == 1);
2022 
2023  nfsb = (whichfork == XFS_DATA_FORK) ? mp->m_dirblkfsbs : 1;
2024 
2025  /*
2026  * Caller doesn't have a mapping. -2 means don't complain
2027  * if we land in a hole.
2028  */
2029  if (mappedbno == -1 || mappedbno == -2) {
2030  /*
2031  * Optimize the one-block case.
2032  */
2033  if (nfsb != 1)
2034  irecs = kmem_zalloc(sizeof(irec) * nfsb, KM_SLEEP);
2035 
2036  nirecs = nfsb;
2037  error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs,
2038  &nirecs, xfs_bmapi_aflag(whichfork));
2039  if (error)
2040  goto out;
2041  } else {
2042  irecs->br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
2043  irecs->br_startoff = (xfs_fileoff_t)bno;
2044  irecs->br_blockcount = nfsb;
2045  irecs->br_state = 0;
2046  nirecs = 1;
2047  }
2048 
2049  if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) {
2050  error = mappedbno == -2 ? -1 : XFS_ERROR(EFSCORRUPTED);
2051  if (unlikely(error == EFSCORRUPTED)) {
2053  int i;
2054  xfs_alert(mp, "%s: bno %lld dir: inode %lld",
2055  __func__, (long long)bno,
2056  (long long)dp->i_ino);
2057  for (i = 0; i < *nmaps; i++) {
2058  xfs_alert(mp,
2059 "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d",
2060  i,
2061  (long long)irecs[i].br_startoff,
2062  (long long)irecs[i].br_startblock,
2063  (long long)irecs[i].br_blockcount,
2064  irecs[i].br_state);
2065  }
2066  }
2067  XFS_ERROR_REPORT("xfs_da_do_buf(1)",
2068  XFS_ERRLEVEL_LOW, mp);
2069  }
2070  goto out;
2071  }
2072  error = xfs_buf_map_from_irec(mp, map, nmaps, irecs, nirecs);
2073 out:
2074  if (irecs != &irec)
2075  kmem_free(irecs);
2076  return error;
2077 }
2078 
2079 /*
2080  * Get a buffer for the dir/attr block.
2081  */
2082 int
2084  struct xfs_trans *trans,
2085  struct xfs_inode *dp,
2086  xfs_dablk_t bno,
2087  xfs_daddr_t mappedbno,
2088  struct xfs_buf **bpp,
2089  int whichfork)
2090 {
2091  struct xfs_buf *bp;
2092  struct xfs_buf_map map;
2093  struct xfs_buf_map *mapp;
2094  int nmap;
2095  int error;
2096 
2097  *bpp = NULL;
2098  mapp = &map;
2099  nmap = 1;
2100  error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork,
2101  &mapp, &nmap);
2102  if (error) {
2103  /* mapping a hole is not an error, but we don't continue */
2104  if (error == -1)
2105  error = 0;
2106  goto out_free;
2107  }
2108 
2109  bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp,
2110  mapp, nmap, 0);
2111  error = bp ? bp->b_error : XFS_ERROR(EIO);
2112  if (error) {
2113  xfs_trans_brelse(trans, bp);
2114  goto out_free;
2115  }
2116 
2117  *bpp = bp;
2118 
2119 out_free:
2120  if (mapp != &map)
2121  kmem_free(mapp);
2122 
2123  return error;
2124 }
2125 
2126 /*
2127  * Get a buffer for the dir/attr block, fill in the contents.
2128  */
2129 int
2131  struct xfs_trans *trans,
2132  struct xfs_inode *dp,
2133  xfs_dablk_t bno,
2134  xfs_daddr_t mappedbno,
2135  struct xfs_buf **bpp,
2136  int whichfork)
2137 {
2138  struct xfs_buf *bp;
2139  struct xfs_buf_map map;
2140  struct xfs_buf_map *mapp;
2141  int nmap;
2142  int error;
2143 
2144  *bpp = NULL;
2145  mapp = &map;
2146  nmap = 1;
2147  error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork,
2148  &mapp, &nmap);
2149  if (error) {
2150  /* mapping a hole is not an error, but we don't continue */
2151  if (error == -1)
2152  error = 0;
2153  goto out_free;
2154  }
2155 
2156  error = xfs_trans_read_buf_map(dp->i_mount, trans,
2157  dp->i_mount->m_ddev_targp,
2158  mapp, nmap, 0, &bp);
2159  if (error)
2160  goto out_free;
2161 
2162  if (whichfork == XFS_ATTR_FORK)
2163  xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
2164  else
2165  xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
2166 
2167  /*
2168  * This verification code will be moved to a CRC verification callback
2169  * function so just leave it here unchanged until then.
2170  */
2171  {
2173  xfs_dir2_free_t *free = bp->b_addr;
2174  xfs_da_blkinfo_t *info = bp->b_addr;
2175  uint magic, magic1;
2176  struct xfs_mount *mp = dp->i_mount;
2177 
2178  magic = be16_to_cpu(info->magic);
2179  magic1 = be32_to_cpu(hdr->magic);
2180  if (unlikely(
2181  XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
2182  (magic != XFS_ATTR_LEAF_MAGIC) &&
2183  (magic != XFS_DIR2_LEAF1_MAGIC) &&
2184  (magic != XFS_DIR2_LEAFN_MAGIC) &&
2185  (magic1 != XFS_DIR2_BLOCK_MAGIC) &&
2186  (magic1 != XFS_DIR2_DATA_MAGIC) &&
2190  trace_xfs_da_btree_corrupt(bp, _RET_IP_);
2191  XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)",
2192  XFS_ERRLEVEL_LOW, mp, info);
2193  error = XFS_ERROR(EFSCORRUPTED);
2194  xfs_trans_brelse(trans, bp);
2195  goto out_free;
2196  }
2197  }
2198  *bpp = bp;
2199 out_free:
2200  if (mapp != &map)
2201  kmem_free(mapp);
2202 
2203  return error;
2204 }
2205 
2206 /*
2207  * Readahead the dir/attr block.
2208  */
2209 xfs_daddr_t
2211  struct xfs_trans *trans,
2212  struct xfs_inode *dp,
2213  xfs_dablk_t bno,
2214  int whichfork)
2215 {
2216  xfs_daddr_t mappedbno = -1;
2217  struct xfs_buf_map map;
2218  struct xfs_buf_map *mapp;
2219  int nmap;
2220  int error;
2221 
2222  mapp = &map;
2223  nmap = 1;
2224  error = xfs_dabuf_map(trans, dp, bno, -1, whichfork,
2225  &mapp, &nmap);
2226  if (error) {
2227  /* mapping a hole is not an error, but we don't continue */
2228  if (error == -1)
2229  error = 0;
2230  goto out_free;
2231  }
2232 
2233  mappedbno = mapp[0].bm_bn;
2234  xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap);
2235 
2236 out_free:
2237  if (mapp != &map)
2238  kmem_free(mapp);
2239 
2240  if (error)
2241  return -1;
2242  return mappedbno;
2243 }
2244 
2245 kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */
2246 
2247 /*
2248  * Allocate a dir-state structure.
2249  * We don't put them on the stack since they're large.
2250  */
2253 {
2254  return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS);
2255 }
2256 
2257 /*
2258  * Kill the altpath contents of a da-state structure.
2259  */
2260 STATIC void
2262 {
2263  int i;
2264 
2265  for (i = 0; i < state->altpath.active; i++)
2266  state->altpath.blk[i].bp = NULL;
2267  state->altpath.active = 0;
2268 }
2269 
2270 /*
2271  * Free a da-state structure.
2272  */
2273 void
2275 {
2277 #ifdef DEBUG
2278  memset((char *)state, 0, sizeof(*state));
2279 #endif /* DEBUG */
2280  kmem_zone_free(xfs_da_state_zone, state);
2281 }