Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
xfs_btree.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_ag.h"
26 #include "xfs_mount.h"
27 #include "xfs_bmap_btree.h"
28 #include "xfs_alloc_btree.h"
29 #include "xfs_ialloc_btree.h"
30 #include "xfs_dinode.h"
31 #include "xfs_inode.h"
32 #include "xfs_inode_item.h"
33 #include "xfs_btree.h"
34 #include "xfs_error.h"
35 #include "xfs_trace.h"
36 
37 /*
38  * Cursor allocation zone.
39  */
41 
42 /*
43  * Btree magic numbers.
44  */
45 const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
47 };
48 
49 
50 STATIC int /* error (0 or EFSCORRUPTED) */
52  struct xfs_btree_cur *cur, /* btree cursor */
53  struct xfs_btree_block *block, /* btree long form block pointer */
54  int level, /* level of the btree block */
55  struct xfs_buf *bp) /* buffer for block, if any */
56 {
57  int lblock_ok; /* block passes checks */
58  struct xfs_mount *mp; /* file system mount point */
59 
60  mp = cur->bc_mp;
61  lblock_ok =
62  be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
63  be16_to_cpu(block->bb_level) == level &&
64  be16_to_cpu(block->bb_numrecs) <=
65  cur->bc_ops->get_maxrecs(cur, level) &&
66  block->bb_u.l.bb_leftsib &&
67  (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
69  be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
70  block->bb_u.l.bb_rightsib &&
71  (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
73  be64_to_cpu(block->bb_u.l.bb_rightsib)));
74  if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
77  if (bp)
78  trace_xfs_btree_corrupt(bp, _RET_IP_);
79  XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
80  mp);
81  return XFS_ERROR(EFSCORRUPTED);
82  }
83  return 0;
84 }
85 
86 STATIC int /* error (0 or EFSCORRUPTED) */
88  struct xfs_btree_cur *cur, /* btree cursor */
89  struct xfs_btree_block *block, /* btree short form block pointer */
90  int level, /* level of the btree block */
91  struct xfs_buf *bp) /* buffer containing block */
92 {
93  struct xfs_buf *agbp; /* buffer for ag. freespace struct */
94  struct xfs_agf *agf; /* ag. freespace structure */
95  xfs_agblock_t agflen; /* native ag. freespace length */
96  int sblock_ok; /* block passes checks */
97 
98  agbp = cur->bc_private.a.agbp;
99  agf = XFS_BUF_TO_AGF(agbp);
100  agflen = be32_to_cpu(agf->agf_length);
101  sblock_ok =
102  be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
103  be16_to_cpu(block->bb_level) == level &&
104  be16_to_cpu(block->bb_numrecs) <=
105  cur->bc_ops->get_maxrecs(cur, level) &&
106  (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
107  be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
108  block->bb_u.s.bb_leftsib &&
109  (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
110  be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
111  block->bb_u.s.bb_rightsib;
112  if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
115  if (bp)
116  trace_xfs_btree_corrupt(bp, _RET_IP_);
117  XFS_CORRUPTION_ERROR("xfs_btree_check_sblock",
118  XFS_ERRLEVEL_LOW, cur->bc_mp, block);
119  return XFS_ERROR(EFSCORRUPTED);
120  }
121  return 0;
122 }
123 
124 /*
125  * Debug routine: check that block header is ok.
126  */
127 int
129  struct xfs_btree_cur *cur, /* btree cursor */
130  struct xfs_btree_block *block, /* generic btree block pointer */
131  int level, /* level of the btree block */
132  struct xfs_buf *bp) /* buffer containing block, if any */
133 {
134  if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
135  return xfs_btree_check_lblock(cur, block, level, bp);
136  else
137  return xfs_btree_check_sblock(cur, block, level, bp);
138 }
139 
140 /*
141  * Check that (long) pointer is ok.
142  */
143 int /* error (0 or EFSCORRUPTED) */
145  struct xfs_btree_cur *cur, /* btree cursor */
146  xfs_dfsbno_t bno, /* btree block disk address */
147  int level) /* btree block level */
148 {
150  level > 0 &&
151  bno != NULLDFSBNO &&
152  XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
153  return 0;
154 }
155 
156 #ifdef DEBUG
157 /*
158  * Check that (short) pointer is ok.
159  */
160 STATIC int /* error (0 or EFSCORRUPTED) */
161 xfs_btree_check_sptr(
162  struct xfs_btree_cur *cur, /* btree cursor */
163  xfs_agblock_t bno, /* btree block disk address */
164  int level) /* btree block level */
165 {
166  xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks;
167 
169  level > 0 &&
170  bno != NULLAGBLOCK &&
171  bno != 0 &&
172  bno < agblocks);
173  return 0;
174 }
175 
176 /*
177  * Check that block ptr is ok.
178  */
179 STATIC int /* error (0 or EFSCORRUPTED) */
180 xfs_btree_check_ptr(
181  struct xfs_btree_cur *cur, /* btree cursor */
182  union xfs_btree_ptr *ptr, /* btree block disk address */
183  int index, /* offset from ptr to check */
184  int level) /* btree block level */
185 {
186  if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
187  return xfs_btree_check_lptr(cur,
188  be64_to_cpu((&ptr->l)[index]), level);
189  } else {
190  return xfs_btree_check_sptr(cur,
191  be32_to_cpu((&ptr->s)[index]), level);
192  }
193 }
194 #endif
195 
196 /*
197  * Delete the btree cursor.
198  */
199 void
201  xfs_btree_cur_t *cur, /* btree cursor */
202  int error) /* del because of error */
203 {
204  int i; /* btree level */
205 
206  /*
207  * Clear the buffer pointers, and release the buffers.
208  * If we're doing this in the face of an error, we
209  * need to make sure to inspect all of the entries
210  * in the bc_bufs array for buffers to be unlocked.
211  * This is because some of the btree code works from
212  * level n down to 0, and if we get an error along
213  * the way we won't have initialized all the entries
214  * down to 0.
215  */
216  for (i = 0; i < cur->bc_nlevels; i++) {
217  if (cur->bc_bufs[i])
218  xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
219  else if (!error)
220  break;
221  }
222  /*
223  * Can't free a bmap cursor without having dealt with the
224  * allocated indirect blocks' accounting.
225  */
226  ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
227  cur->bc_private.b.allocated == 0);
228  /*
229  * Free the cursor.
230  */
231  kmem_zone_free(xfs_btree_cur_zone, cur);
232 }
233 
234 /*
235  * Duplicate the btree cursor.
236  * Allocate a new one, copy the record, re-get the buffers.
237  */
238 int /* error */
240  xfs_btree_cur_t *cur, /* input cursor */
241  xfs_btree_cur_t **ncur) /* output cursor */
242 {
243  xfs_buf_t *bp; /* btree block's buffer pointer */
244  int error; /* error return value */
245  int i; /* level number of btree block */
246  xfs_mount_t *mp; /* mount structure for filesystem */
247  xfs_btree_cur_t *new; /* new cursor value */
248  xfs_trans_t *tp; /* transaction pointer, can be NULL */
249 
250  tp = cur->bc_tp;
251  mp = cur->bc_mp;
252 
253  /*
254  * Allocate a new cursor like the old one.
255  */
256  new = cur->bc_ops->dup_cursor(cur);
257 
258  /*
259  * Copy the record currently in the cursor.
260  */
261  new->bc_rec = cur->bc_rec;
262 
263  /*
264  * For each level current, re-get the buffer and copy the ptr value.
265  */
266  for (i = 0; i < new->bc_nlevels; i++) {
267  new->bc_ptrs[i] = cur->bc_ptrs[i];
268  new->bc_ra[i] = cur->bc_ra[i];
269  if ((bp = cur->bc_bufs[i])) {
270  if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
271  XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) {
272  xfs_btree_del_cursor(new, error);
273  *ncur = NULL;
274  return error;
275  }
276  new->bc_bufs[i] = bp;
277  ASSERT(!xfs_buf_geterror(bp));
278  } else
279  new->bc_bufs[i] = NULL;
280  }
281  *ncur = new;
282  return 0;
283 }
284 
285 /*
286  * XFS btree block layout and addressing:
287  *
288  * There are two types of blocks in the btree: leaf and non-leaf blocks.
289  *
290  * The leaf record start with a header then followed by records containing
291  * the values. A non-leaf block also starts with the same header, and
292  * then first contains lookup keys followed by an equal number of pointers
293  * to the btree blocks at the previous level.
294  *
295  * +--------+-------+-------+-------+-------+-------+-------+
296  * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
297  * +--------+-------+-------+-------+-------+-------+-------+
298  *
299  * +--------+-------+-------+-------+-------+-------+-------+
300  * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
301  * +--------+-------+-------+-------+-------+-------+-------+
302  *
303  * The header is called struct xfs_btree_block for reasons better left unknown
304  * and comes in different versions for short (32bit) and long (64bit) block
305  * pointers. The record and key structures are defined by the btree instances
306  * and opaque to the btree core. The block pointers are simple disk endian
307  * integers, available in a short (32bit) and long (64bit) variant.
308  *
309  * The helpers below calculate the offset of a given record, key or pointer
310  * into a btree block (xfs_btree_*_offset) or return a pointer to the given
311  * record, key or pointer (xfs_btree_*_addr). Note that all addressing
312  * inside the btree block is done using indices starting at one, not zero!
313  */
314 
315 /*
316  * Return size of the btree block header for this btree instance.
317  */
318 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
319 {
320  return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
323 }
324 
325 /*
326  * Return size of btree block pointers for this btree instance.
327  */
328 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
329 {
330  return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
331  sizeof(__be64) : sizeof(__be32);
332 }
333 
334 /*
335  * Calculate offset of the n-th record in a btree block.
336  */
337 STATIC size_t
339  struct xfs_btree_cur *cur,
340  int n)
341 {
342  return xfs_btree_block_len(cur) +
343  (n - 1) * cur->bc_ops->rec_len;
344 }
345 
346 /*
347  * Calculate offset of the n-th key in a btree block.
348  */
349 STATIC size_t
351  struct xfs_btree_cur *cur,
352  int n)
353 {
354  return xfs_btree_block_len(cur) +
355  (n - 1) * cur->bc_ops->key_len;
356 }
357 
358 /*
359  * Calculate offset of the n-th block pointer in a btree block.
360  */
361 STATIC size_t
363  struct xfs_btree_cur *cur,
364  int n,
365  int level)
366 {
367  return xfs_btree_block_len(cur) +
368  cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
369  (n - 1) * xfs_btree_ptr_len(cur);
370 }
371 
372 /*
373  * Return a pointer to the n-th record in the btree block.
374  */
375 STATIC union xfs_btree_rec *
377  struct xfs_btree_cur *cur,
378  int n,
379  struct xfs_btree_block *block)
380 {
381  return (union xfs_btree_rec *)
382  ((char *)block + xfs_btree_rec_offset(cur, n));
383 }
384 
385 /*
386  * Return a pointer to the n-th key in the btree block.
387  */
388 STATIC union xfs_btree_key *
390  struct xfs_btree_cur *cur,
391  int n,
392  struct xfs_btree_block *block)
393 {
394  return (union xfs_btree_key *)
395  ((char *)block + xfs_btree_key_offset(cur, n));
396 }
397 
398 /*
399  * Return a pointer to the n-th block pointer in the btree block.
400  */
401 STATIC union xfs_btree_ptr *
403  struct xfs_btree_cur *cur,
404  int n,
405  struct xfs_btree_block *block)
406 {
407  int level = xfs_btree_get_level(block);
408 
409  ASSERT(block->bb_level != 0);
410 
411  return (union xfs_btree_ptr *)
412  ((char *)block + xfs_btree_ptr_offset(cur, n, level));
413 }
414 
415 /*
416  * Get a the root block which is stored in the inode.
417  *
418  * For now this btree implementation assumes the btree root is always
419  * stored in the if_broot field of an inode fork.
420  */
421 STATIC struct xfs_btree_block *
423  struct xfs_btree_cur *cur)
424 {
425  struct xfs_ifork *ifp;
426 
427  ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
428  return (struct xfs_btree_block *)ifp->if_broot;
429 }
430 
431 /*
432  * Retrieve the block pointer from the cursor at the given level.
433  * This may be an inode btree root or from a buffer.
434  */
435 STATIC struct xfs_btree_block * /* generic btree block pointer */
437  struct xfs_btree_cur *cur, /* btree cursor */
438  int level, /* level in btree */
439  struct xfs_buf **bpp) /* buffer containing the block */
440 {
441  if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
442  (level == cur->bc_nlevels - 1)) {
443  *bpp = NULL;
444  return xfs_btree_get_iroot(cur);
445  }
446 
447  *bpp = cur->bc_bufs[level];
448  return XFS_BUF_TO_BLOCK(*bpp);
449 }
450 
451 /*
452  * Get a buffer for the block, return it with no data read.
453  * Long-form addressing.
454  */
455 xfs_buf_t * /* buffer for fsbno */
457  xfs_mount_t *mp, /* file system mount point */
458  xfs_trans_t *tp, /* transaction pointer */
459  xfs_fsblock_t fsbno, /* file system block number */
460  uint lock) /* lock flags for get_buf */
461 {
462  xfs_buf_t *bp; /* buffer pointer (return value) */
463  xfs_daddr_t d; /* real disk block address */
464 
465  ASSERT(fsbno != NULLFSBLOCK);
466  d = XFS_FSB_TO_DADDR(mp, fsbno);
467  bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
468  ASSERT(!xfs_buf_geterror(bp));
469  return bp;
470 }
471 
472 /*
473  * Get a buffer for the block, return it with no data read.
474  * Short-form addressing.
475  */
476 xfs_buf_t * /* buffer for agno/agbno */
478  xfs_mount_t *mp, /* file system mount point */
479  xfs_trans_t *tp, /* transaction pointer */
480  xfs_agnumber_t agno, /* allocation group number */
481  xfs_agblock_t agbno, /* allocation group block number */
482  uint lock) /* lock flags for get_buf */
483 {
484  xfs_buf_t *bp; /* buffer pointer (return value) */
485  xfs_daddr_t d; /* real disk block address */
486 
487  ASSERT(agno != NULLAGNUMBER);
488  ASSERT(agbno != NULLAGBLOCK);
489  d = XFS_AGB_TO_DADDR(mp, agno, agbno);
490  bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
491  ASSERT(!xfs_buf_geterror(bp));
492  return bp;
493 }
494 
495 /*
496  * Check for the cursor referring to the last block at the given level.
497  */
498 int /* 1=is last block, 0=not last block */
500  xfs_btree_cur_t *cur, /* btree cursor */
501  int level) /* level to check */
502 {
503  struct xfs_btree_block *block; /* generic btree block pointer */
504  xfs_buf_t *bp; /* buffer containing block */
505 
506  block = xfs_btree_get_block(cur, level, &bp);
507  xfs_btree_check_block(cur, block, level, bp);
508  if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
509  return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO);
510  else
511  return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
512 }
513 
514 /*
515  * Change the cursor to point to the first record at the given level.
516  * Other levels are unaffected.
517  */
518 STATIC int /* success=1, failure=0 */
520  xfs_btree_cur_t *cur, /* btree cursor */
521  int level) /* level to change */
522 {
523  struct xfs_btree_block *block; /* generic btree block pointer */
524  xfs_buf_t *bp; /* buffer containing block */
525 
526  /*
527  * Get the block pointer for this level.
528  */
529  block = xfs_btree_get_block(cur, level, &bp);
530  xfs_btree_check_block(cur, block, level, bp);
531  /*
532  * It's empty, there is no such record.
533  */
534  if (!block->bb_numrecs)
535  return 0;
536  /*
537  * Set the ptr value to 1, that's the first record/key.
538  */
539  cur->bc_ptrs[level] = 1;
540  return 1;
541 }
542 
543 /*
544  * Change the cursor to point to the last record in the current block
545  * at the given level. Other levels are unaffected.
546  */
547 STATIC int /* success=1, failure=0 */
549  xfs_btree_cur_t *cur, /* btree cursor */
550  int level) /* level to change */
551 {
552  struct xfs_btree_block *block; /* generic btree block pointer */
553  xfs_buf_t *bp; /* buffer containing block */
554 
555  /*
556  * Get the block pointer for this level.
557  */
558  block = xfs_btree_get_block(cur, level, &bp);
559  xfs_btree_check_block(cur, block, level, bp);
560  /*
561  * It's empty, there is no such record.
562  */
563  if (!block->bb_numrecs)
564  return 0;
565  /*
566  * Set the ptr value to numrecs, that's the last record/key.
567  */
568  cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
569  return 1;
570 }
571 
572 /*
573  * Compute first and last byte offsets for the fields given.
574  * Interprets the offsets table, which contains struct field offsets.
575  */
576 void
578  __int64_t fields, /* bitmask of fields */
579  const short *offsets, /* table of field offsets */
580  int nbits, /* number of bits to inspect */
581  int *first, /* output: first byte offset */
582  int *last) /* output: last byte offset */
583 {
584  int i; /* current bit number */
585  __int64_t imask; /* mask for current bit number */
586 
587  ASSERT(fields != 0);
588  /*
589  * Find the lowest bit, so the first byte offset.
590  */
591  for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
592  if (imask & fields) {
593  *first = offsets[i];
594  break;
595  }
596  }
597  /*
598  * Find the highest bit, so the last byte offset.
599  */
600  for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
601  if (imask & fields) {
602  *last = offsets[i + 1] - 1;
603  break;
604  }
605  }
606 }
607 
608 /*
609  * Get a buffer for the block, return it read in.
610  * Long-form addressing.
611  */
612 int /* error */
614  xfs_mount_t *mp, /* file system mount point */
615  xfs_trans_t *tp, /* transaction pointer */
616  xfs_fsblock_t fsbno, /* file system block number */
617  uint lock, /* lock flags for read_buf */
618  xfs_buf_t **bpp, /* buffer for fsbno */
619  int refval) /* ref count value for buffer */
620 {
621  xfs_buf_t *bp; /* return value */
622  xfs_daddr_t d; /* real disk block address */
623  int error;
624 
625  ASSERT(fsbno != NULLFSBLOCK);
626  d = XFS_FSB_TO_DADDR(mp, fsbno);
627  if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
628  mp->m_bsize, lock, &bp))) {
629  return error;
630  }
631  ASSERT(!xfs_buf_geterror(bp));
632  if (bp)
633  xfs_buf_set_ref(bp, refval);
634  *bpp = bp;
635  return 0;
636 }
637 
638 /*
639  * Read-ahead the block, don't wait for it, don't return a buffer.
640  * Long-form addressing.
641  */
642 /* ARGSUSED */
643 void
645  xfs_mount_t *mp, /* file system mount point */
646  xfs_fsblock_t fsbno, /* file system block number */
647  xfs_extlen_t count) /* count of filesystem blocks */
648 {
649  xfs_daddr_t d;
650 
651  ASSERT(fsbno != NULLFSBLOCK);
652  d = XFS_FSB_TO_DADDR(mp, fsbno);
653  xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count);
654 }
655 
656 /*
657  * Read-ahead the block, don't wait for it, don't return a buffer.
658  * Short-form addressing.
659  */
660 /* ARGSUSED */
661 void
663  xfs_mount_t *mp, /* file system mount point */
664  xfs_agnumber_t agno, /* allocation group number */
665  xfs_agblock_t agbno, /* allocation group block number */
666  xfs_extlen_t count) /* count of filesystem blocks */
667 {
668  xfs_daddr_t d;
669 
670  ASSERT(agno != NULLAGNUMBER);
671  ASSERT(agbno != NULLAGBLOCK);
672  d = XFS_AGB_TO_DADDR(mp, agno, agbno);
673  xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count);
674 }
675 
676 STATIC int
678  struct xfs_btree_cur *cur,
679  int lr,
680  struct xfs_btree_block *block)
681 {
682  int rval = 0;
683  xfs_dfsbno_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
684  xfs_dfsbno_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
685 
686  if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
687  xfs_btree_reada_bufl(cur->bc_mp, left, 1);
688  rval++;
689  }
690 
691  if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
692  xfs_btree_reada_bufl(cur->bc_mp, right, 1);
693  rval++;
694  }
695 
696  return rval;
697 }
698 
699 STATIC int
701  struct xfs_btree_cur *cur,
702  int lr,
703  struct xfs_btree_block *block)
704 {
705  int rval = 0;
706  xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib);
707  xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib);
708 
709 
710  if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
711  xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
712  left, 1);
713  rval++;
714  }
715 
716  if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
717  xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
718  right, 1);
719  rval++;
720  }
721 
722  return rval;
723 }
724 
725 /*
726  * Read-ahead btree blocks, at the given level.
727  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
728  */
729 STATIC int
731  struct xfs_btree_cur *cur, /* btree cursor */
732  int lev, /* level in btree */
733  int lr) /* left/right bits */
734 {
735  struct xfs_btree_block *block;
736 
737  /*
738  * No readahead needed if we are at the root level and the
739  * btree root is stored in the inode.
740  */
741  if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
742  (lev == cur->bc_nlevels - 1))
743  return 0;
744 
745  if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
746  return 0;
747 
748  cur->bc_ra[lev] |= lr;
749  block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
750 
751  if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
752  return xfs_btree_readahead_lblock(cur, lr, block);
753  return xfs_btree_readahead_sblock(cur, lr, block);
754 }
755 
756 /*
757  * Set the buffer for level "lev" in the cursor to bp, releasing
758  * any previous buffer.
759  */
760 STATIC void
762  xfs_btree_cur_t *cur, /* btree cursor */
763  int lev, /* level in btree */
764  xfs_buf_t *bp) /* new buffer to set */
765 {
766  struct xfs_btree_block *b; /* btree block */
767 
768  if (cur->bc_bufs[lev])
769  xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]);
770  cur->bc_bufs[lev] = bp;
771  cur->bc_ra[lev] = 0;
772 
773  b = XFS_BUF_TO_BLOCK(bp);
774  if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
775  if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO))
776  cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
777  if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO))
778  cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
779  } else {
780  if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
781  cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
782  if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
783  cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
784  }
785 }
786 
787 STATIC int
789  struct xfs_btree_cur *cur,
790  union xfs_btree_ptr *ptr)
791 {
792  if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
793  return ptr->l == cpu_to_be64(NULLDFSBNO);
794  else
795  return ptr->s == cpu_to_be32(NULLAGBLOCK);
796 }
797 
798 STATIC void
800  struct xfs_btree_cur *cur,
801  union xfs_btree_ptr *ptr)
802 {
803  if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
804  ptr->l = cpu_to_be64(NULLDFSBNO);
805  else
806  ptr->s = cpu_to_be32(NULLAGBLOCK);
807 }
808 
809 /*
810  * Get/set/init sibling pointers
811  */
812 STATIC void
814  struct xfs_btree_cur *cur,
815  struct xfs_btree_block *block,
816  union xfs_btree_ptr *ptr,
817  int lr)
818 {
819  ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
820 
821  if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
822  if (lr == XFS_BB_RIGHTSIB)
823  ptr->l = block->bb_u.l.bb_rightsib;
824  else
825  ptr->l = block->bb_u.l.bb_leftsib;
826  } else {
827  if (lr == XFS_BB_RIGHTSIB)
828  ptr->s = block->bb_u.s.bb_rightsib;
829  else
830  ptr->s = block->bb_u.s.bb_leftsib;
831  }
832 }
833 
834 STATIC void
836  struct xfs_btree_cur *cur,
837  struct xfs_btree_block *block,
838  union xfs_btree_ptr *ptr,
839  int lr)
840 {
841  ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
842 
843  if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
844  if (lr == XFS_BB_RIGHTSIB)
845  block->bb_u.l.bb_rightsib = ptr->l;
846  else
847  block->bb_u.l.bb_leftsib = ptr->l;
848  } else {
849  if (lr == XFS_BB_RIGHTSIB)
850  block->bb_u.s.bb_rightsib = ptr->s;
851  else
852  block->bb_u.s.bb_leftsib = ptr->s;
853  }
854 }
855 
856 STATIC void
858  struct xfs_btree_cur *cur,
859  int level,
860  int numrecs,
861  struct xfs_btree_block *new) /* new block */
862 {
863  new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
864  new->bb_level = cpu_to_be16(level);
865  new->bb_numrecs = cpu_to_be16(numrecs);
866 
867  if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
868  new->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
869  new->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
870  } else {
871  new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
872  new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
873  }
874 }
875 
876 /*
877  * Return true if ptr is the last record in the btree and
878  * we need to track updateÑ• to this record. The decision
879  * will be further refined in the update_lastrec method.
880  */
881 STATIC int
883  struct xfs_btree_cur *cur,
884  struct xfs_btree_block *block,
885  int level)
886 {
887  union xfs_btree_ptr ptr;
888 
889  if (level > 0)
890  return 0;
891  if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
892  return 0;
893 
894  xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
895  if (!xfs_btree_ptr_is_null(cur, &ptr))
896  return 0;
897  return 1;
898 }
899 
900 STATIC void
902  struct xfs_btree_cur *cur,
903  struct xfs_buf *bp,
904  union xfs_btree_ptr *ptr)
905 {
906  if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
907  ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
908  XFS_BUF_ADDR(bp)));
909  else {
911  XFS_BUF_ADDR(bp)));
912  }
913 }
914 
915 STATIC xfs_daddr_t
917  struct xfs_btree_cur *cur,
918  union xfs_btree_ptr *ptr)
919 {
920  if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
921  ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
922 
923  return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
924  } else {
925  ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
926  ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
927 
928  return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
929  be32_to_cpu(ptr->s));
930  }
931 }
932 
933 STATIC void
935  struct xfs_btree_cur *cur,
936  struct xfs_buf *bp)
937 {
938  switch (cur->bc_btnum) {
939  case XFS_BTNUM_BNO:
940  case XFS_BTNUM_CNT:
941  xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
942  break;
943  case XFS_BTNUM_INO:
944  xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
945  break;
946  case XFS_BTNUM_BMAP:
947  xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
948  break;
949  default:
950  ASSERT(0);
951  }
952 }
953 
954 STATIC int
956  struct xfs_btree_cur *cur,
957  union xfs_btree_ptr *ptr,
958  int flags,
959  struct xfs_btree_block **block,
960  struct xfs_buf **bpp)
961 {
962  struct xfs_mount *mp = cur->bc_mp;
963  xfs_daddr_t d;
964 
965  /* need to sort out how callers deal with failures first */
966  ASSERT(!(flags & XBF_TRYLOCK));
967 
968  d = xfs_btree_ptr_to_daddr(cur, ptr);
969  *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
970  mp->m_bsize, flags);
971 
972  if (!*bpp)
973  return ENOMEM;
974 
975  *block = XFS_BUF_TO_BLOCK(*bpp);
976  return 0;
977 }
978 
979 /*
980  * Read in the buffer at the given ptr and return the buffer and
981  * the block pointer within the buffer.
982  */
983 STATIC int
985  struct xfs_btree_cur *cur,
986  union xfs_btree_ptr *ptr,
987  int level,
988  int flags,
989  struct xfs_btree_block **block,
990  struct xfs_buf **bpp)
991 {
992  struct xfs_mount *mp = cur->bc_mp;
993  xfs_daddr_t d;
994  int error;
995 
996  /* need to sort out how callers deal with failures first */
997  ASSERT(!(flags & XBF_TRYLOCK));
998 
999  d = xfs_btree_ptr_to_daddr(cur, ptr);
1000  error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
1001  mp->m_bsize, flags, bpp);
1002  if (error)
1003  return error;
1004 
1005  ASSERT(!xfs_buf_geterror(*bpp));
1006 
1007  xfs_btree_set_refs(cur, *bpp);
1008  *block = XFS_BUF_TO_BLOCK(*bpp);
1009 
1010  error = xfs_btree_check_block(cur, *block, level, *bpp);
1011  if (error)
1012  xfs_trans_brelse(cur->bc_tp, *bpp);
1013  return error;
1014 }
1015 
1016 /*
1017  * Copy keys from one btree block to another.
1018  */
1019 STATIC void
1021  struct xfs_btree_cur *cur,
1022  union xfs_btree_key *dst_key,
1023  union xfs_btree_key *src_key,
1024  int numkeys)
1025 {
1026  ASSERT(numkeys >= 0);
1027  memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1028 }
1029 
1030 /*
1031  * Copy records from one btree block to another.
1032  */
1033 STATIC void
1035  struct xfs_btree_cur *cur,
1036  union xfs_btree_rec *dst_rec,
1037  union xfs_btree_rec *src_rec,
1038  int numrecs)
1039 {
1040  ASSERT(numrecs >= 0);
1041  memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1042 }
1043 
1044 /*
1045  * Copy block pointers from one btree block to another.
1046  */
1047 STATIC void
1049  struct xfs_btree_cur *cur,
1050  union xfs_btree_ptr *dst_ptr,
1051  union xfs_btree_ptr *src_ptr,
1052  int numptrs)
1053 {
1054  ASSERT(numptrs >= 0);
1055  memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1056 }
1057 
1058 /*
1059  * Shift keys one index left/right inside a single btree block.
1060  */
1061 STATIC void
1063  struct xfs_btree_cur *cur,
1064  union xfs_btree_key *key,
1065  int dir,
1066  int numkeys)
1067 {
1068  char *dst_key;
1069 
1070  ASSERT(numkeys >= 0);
1071  ASSERT(dir == 1 || dir == -1);
1072 
1073  dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1074  memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1075 }
1076 
1077 /*
1078  * Shift records one index left/right inside a single btree block.
1079  */
1080 STATIC void
1082  struct xfs_btree_cur *cur,
1083  union xfs_btree_rec *rec,
1084  int dir,
1085  int numrecs)
1086 {
1087  char *dst_rec;
1088 
1089  ASSERT(numrecs >= 0);
1090  ASSERT(dir == 1 || dir == -1);
1091 
1092  dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1093  memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1094 }
1095 
1096 /*
1097  * Shift block pointers one index left/right inside a single btree block.
1098  */
1099 STATIC void
1101  struct xfs_btree_cur *cur,
1102  union xfs_btree_ptr *ptr,
1103  int dir,
1104  int numptrs)
1105 {
1106  char *dst_ptr;
1107 
1108  ASSERT(numptrs >= 0);
1109  ASSERT(dir == 1 || dir == -1);
1110 
1111  dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1112  memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1113 }
1114 
1115 /*
1116  * Log key values from the btree block.
1117  */
1118 STATIC void
1120  struct xfs_btree_cur *cur,
1121  struct xfs_buf *bp,
1122  int first,
1123  int last)
1124 {
1125  XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1126  XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1127 
1128  if (bp) {
1129  xfs_trans_log_buf(cur->bc_tp, bp,
1130  xfs_btree_key_offset(cur, first),
1131  xfs_btree_key_offset(cur, last + 1) - 1);
1132  } else {
1133  xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1134  xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1135  }
1136 
1137  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1138 }
1139 
1140 /*
1141  * Log record values from the btree block.
1142  */
1143 void
1145  struct xfs_btree_cur *cur,
1146  struct xfs_buf *bp,
1147  int first,
1148  int last)
1149 {
1150  XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1151  XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1152 
1153  xfs_trans_log_buf(cur->bc_tp, bp,
1154  xfs_btree_rec_offset(cur, first),
1155  xfs_btree_rec_offset(cur, last + 1) - 1);
1156 
1157  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1158 }
1159 
1160 /*
1161  * Log block pointer fields from a btree block (nonleaf).
1162  */
1163 STATIC void
1165  struct xfs_btree_cur *cur, /* btree cursor */
1166  struct xfs_buf *bp, /* buffer containing btree block */
1167  int first, /* index of first pointer to log */
1168  int last) /* index of last pointer to log */
1169 {
1170  XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1171  XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1172 
1173  if (bp) {
1174  struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
1175  int level = xfs_btree_get_level(block);
1176 
1177  xfs_trans_log_buf(cur->bc_tp, bp,
1178  xfs_btree_ptr_offset(cur, first, level),
1179  xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1180  } else {
1181  xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1182  xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1183  }
1184 
1185  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1186 }
1187 
1188 /*
1189  * Log fields from a btree block header.
1190  */
1191 void
1193  struct xfs_btree_cur *cur, /* btree cursor */
1194  struct xfs_buf *bp, /* buffer containing btree block */
1195  int fields) /* mask of fields: XFS_BB_... */
1196 {
1197  int first; /* first byte offset logged */
1198  int last; /* last byte offset logged */
1199  static const short soffsets[] = { /* table of offsets (short) */
1203  offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1204  offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
1206  };
1207  static const short loffsets[] = { /* table of offsets (long) */
1211  offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1212  offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
1214  };
1215 
1216  XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1217  XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
1218 
1219  if (bp) {
1220  xfs_btree_offsets(fields,
1221  (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1222  loffsets : soffsets,
1223  XFS_BB_NUM_BITS, &first, &last);
1224  xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1225  } else {
1226  xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1227  xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1228  }
1229 
1230  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1231 }
1232 
1233 /*
1234  * Increment cursor by one record at the level.
1235  * For nonzero levels the leaf-ward information is untouched.
1236  */
1237 int /* error */
1239  struct xfs_btree_cur *cur,
1240  int level,
1241  int *stat) /* success/failure */
1242 {
1243  struct xfs_btree_block *block;
1244  union xfs_btree_ptr ptr;
1245  struct xfs_buf *bp;
1246  int error; /* error return value */
1247  int lev;
1248 
1249  XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1250  XFS_BTREE_TRACE_ARGI(cur, level);
1251 
1252  ASSERT(level < cur->bc_nlevels);
1253 
1254  /* Read-ahead to the right at this level. */
1256 
1257  /* Get a pointer to the btree block. */
1258  block = xfs_btree_get_block(cur, level, &bp);
1259 
1260 #ifdef DEBUG
1261  error = xfs_btree_check_block(cur, block, level, bp);
1262  if (error)
1263  goto error0;
1264 #endif
1265 
1266  /* We're done if we remain in the block after the increment. */
1267  if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1268  goto out1;
1269 
1270  /* Fail if we just went off the right edge of the tree. */
1271  xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1272  if (xfs_btree_ptr_is_null(cur, &ptr))
1273  goto out0;
1274 
1275  XFS_BTREE_STATS_INC(cur, increment);
1276 
1277  /*
1278  * March up the tree incrementing pointers.
1279  * Stop when we don't go off the right edge of a block.
1280  */
1281  for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1282  block = xfs_btree_get_block(cur, lev, &bp);
1283 
1284 #ifdef DEBUG
1285  error = xfs_btree_check_block(cur, block, lev, bp);
1286  if (error)
1287  goto error0;
1288 #endif
1289 
1290  if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1291  break;
1292 
1293  /* Read-ahead the right block for the next loop. */
1295  }
1296 
1297  /*
1298  * If we went off the root then we are either seriously
1299  * confused or have the tree root in an inode.
1300  */
1301  if (lev == cur->bc_nlevels) {
1302  if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1303  goto out0;
1304  ASSERT(0);
1305  error = EFSCORRUPTED;
1306  goto error0;
1307  }
1308  ASSERT(lev < cur->bc_nlevels);
1309 
1310  /*
1311  * Now walk back down the tree, fixing up the cursor's buffer
1312  * pointers and key numbers.
1313  */
1314  for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1315  union xfs_btree_ptr *ptrp;
1316 
1317  ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1318  error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1319  0, &block, &bp);
1320  if (error)
1321  goto error0;
1322 
1323  xfs_btree_setbuf(cur, lev, bp);
1324  cur->bc_ptrs[lev] = 1;
1325  }
1326 out1:
1327  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1328  *stat = 1;
1329  return 0;
1330 
1331 out0:
1332  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1333  *stat = 0;
1334  return 0;
1335 
1336 error0:
1337  XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1338  return error;
1339 }
1340 
1341 /*
1342  * Decrement cursor by one record at the level.
1343  * For nonzero levels the leaf-ward information is untouched.
1344  */
1345 int /* error */
1347  struct xfs_btree_cur *cur,
1348  int level,
1349  int *stat) /* success/failure */
1350 {
1351  struct xfs_btree_block *block;
1352  xfs_buf_t *bp;
1353  int error; /* error return value */
1354  int lev;
1355  union xfs_btree_ptr ptr;
1356 
1357  XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1358  XFS_BTREE_TRACE_ARGI(cur, level);
1359 
1360  ASSERT(level < cur->bc_nlevels);
1361 
1362  /* Read-ahead to the left at this level. */
1364 
1365  /* We're done if we remain in the block after the decrement. */
1366  if (--cur->bc_ptrs[level] > 0)
1367  goto out1;
1368 
1369  /* Get a pointer to the btree block. */
1370  block = xfs_btree_get_block(cur, level, &bp);
1371 
1372 #ifdef DEBUG
1373  error = xfs_btree_check_block(cur, block, level, bp);
1374  if (error)
1375  goto error0;
1376 #endif
1377 
1378  /* Fail if we just went off the left edge of the tree. */
1379  xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1380  if (xfs_btree_ptr_is_null(cur, &ptr))
1381  goto out0;
1382 
1383  XFS_BTREE_STATS_INC(cur, decrement);
1384 
1385  /*
1386  * March up the tree decrementing pointers.
1387  * Stop when we don't go off the left edge of a block.
1388  */
1389  for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1390  if (--cur->bc_ptrs[lev] > 0)
1391  break;
1392  /* Read-ahead the left block for the next loop. */
1394  }
1395 
1396  /*
1397  * If we went off the root then we are seriously confused.
1398  * or the root of the tree is in an inode.
1399  */
1400  if (lev == cur->bc_nlevels) {
1401  if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1402  goto out0;
1403  ASSERT(0);
1404  error = EFSCORRUPTED;
1405  goto error0;
1406  }
1407  ASSERT(lev < cur->bc_nlevels);
1408 
1409  /*
1410  * Now walk back down the tree, fixing up the cursor's buffer
1411  * pointers and key numbers.
1412  */
1413  for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1414  union xfs_btree_ptr *ptrp;
1415 
1416  ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1417  error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1418  0, &block, &bp);
1419  if (error)
1420  goto error0;
1421  xfs_btree_setbuf(cur, lev, bp);
1422  cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1423  }
1424 out1:
1425  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1426  *stat = 1;
1427  return 0;
1428 
1429 out0:
1430  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1431  *stat = 0;
1432  return 0;
1433 
1434 error0:
1435  XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1436  return error;
1437 }
1438 
1439 STATIC int
1441  struct xfs_btree_cur *cur, /* btree cursor */
1442  int level, /* level in the btree */
1443  union xfs_btree_ptr *pp, /* ptr to btree block */
1444  struct xfs_btree_block **blkp) /* return btree block */
1445 {
1446  struct xfs_buf *bp; /* buffer pointer for btree block */
1447  int error = 0;
1448 
1449  /* special case the root block if in an inode */
1450  if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1451  (level == cur->bc_nlevels - 1)) {
1452  *blkp = xfs_btree_get_iroot(cur);
1453  return 0;
1454  }
1455 
1456  /*
1457  * If the old buffer at this level for the disk address we are
1458  * looking for re-use it.
1459  *
1460  * Otherwise throw it away and get a new one.
1461  */
1462  bp = cur->bc_bufs[level];
1463  if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1464  *blkp = XFS_BUF_TO_BLOCK(bp);
1465  return 0;
1466  }
1467 
1468  error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp);
1469  if (error)
1470  return error;
1471 
1472  xfs_btree_setbuf(cur, level, bp);
1473  return 0;
1474 }
1475 
1476 /*
1477  * Get current search key. For level 0 we don't actually have a key
1478  * structure so we make one up from the record. For all other levels
1479  * we just return the right key.
1480  */
1481 STATIC union xfs_btree_key *
1483  struct xfs_btree_cur *cur,
1484  int level,
1485  int keyno,
1486  struct xfs_btree_block *block,
1487  union xfs_btree_key *kp)
1488 {
1489  if (level == 0) {
1490  cur->bc_ops->init_key_from_rec(kp,
1491  xfs_btree_rec_addr(cur, keyno, block));
1492  return kp;
1493  }
1494 
1495  return xfs_btree_key_addr(cur, keyno, block);
1496 }
1497 
1498 /*
1499  * Lookup the record. The cursor is made to point to it, based on dir.
1500  * Return 0 if can't find any such record, 1 for success.
1501  */
1502 int /* error */
1504  struct xfs_btree_cur *cur, /* btree cursor */
1505  xfs_lookup_t dir, /* <=, ==, or >= */
1506  int *stat) /* success/failure */
1507 {
1508  struct xfs_btree_block *block; /* current btree block */
1509  __int64_t diff; /* difference for the current key */
1510  int error; /* error return value */
1511  int keyno; /* current key number */
1512  int level; /* level in the btree */
1513  union xfs_btree_ptr *pp; /* ptr to btree block */
1514  union xfs_btree_ptr ptr; /* ptr to btree block */
1515 
1516  XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1517  XFS_BTREE_TRACE_ARGI(cur, dir);
1518 
1520 
1521  block = NULL;
1522  keyno = 0;
1523 
1524  /* initialise start pointer from cursor */
1525  cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1526  pp = &ptr;
1527 
1528  /*
1529  * Iterate over each level in the btree, starting at the root.
1530  * For each level above the leaves, find the key we need, based
1531  * on the lookup record, then follow the corresponding block
1532  * pointer down to the next level.
1533  */
1534  for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1535  /* Get the block we need to do the lookup on. */
1536  error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1537  if (error)
1538  goto error0;
1539 
1540  if (diff == 0) {
1541  /*
1542  * If we already had a key match at a higher level, we
1543  * know we need to use the first entry in this block.
1544  */
1545  keyno = 1;
1546  } else {
1547  /* Otherwise search this block. Do a binary search. */
1548 
1549  int high; /* high entry number */
1550  int low; /* low entry number */
1551 
1552  /* Set low and high entry numbers, 1-based. */
1553  low = 1;
1554  high = xfs_btree_get_numrecs(block);
1555  if (!high) {
1556  /* Block is empty, must be an empty leaf. */
1557  ASSERT(level == 0 && cur->bc_nlevels == 1);
1558 
1559  cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1560  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1561  *stat = 0;
1562  return 0;
1563  }
1564 
1565  /* Binary search the block. */
1566  while (low <= high) {
1567  union xfs_btree_key key;
1568  union xfs_btree_key *kp;
1569 
1570  XFS_BTREE_STATS_INC(cur, compare);
1571 
1572  /* keyno is average of low and high. */
1573  keyno = (low + high) >> 1;
1574 
1575  /* Get current search key */
1576  kp = xfs_lookup_get_search_key(cur, level,
1577  keyno, block, &key);
1578 
1579  /*
1580  * Compute difference to get next direction:
1581  * - less than, move right
1582  * - greater than, move left
1583  * - equal, we're done
1584  */
1585  diff = cur->bc_ops->key_diff(cur, kp);
1586  if (diff < 0)
1587  low = keyno + 1;
1588  else if (diff > 0)
1589  high = keyno - 1;
1590  else
1591  break;
1592  }
1593  }
1594 
1595  /*
1596  * If there are more levels, set up for the next level
1597  * by getting the block number and filling in the cursor.
1598  */
1599  if (level > 0) {
1600  /*
1601  * If we moved left, need the previous key number,
1602  * unless there isn't one.
1603  */
1604  if (diff > 0 && --keyno < 1)
1605  keyno = 1;
1606  pp = xfs_btree_ptr_addr(cur, keyno, block);
1607 
1608 #ifdef DEBUG
1609  error = xfs_btree_check_ptr(cur, pp, 0, level);
1610  if (error)
1611  goto error0;
1612 #endif
1613  cur->bc_ptrs[level] = keyno;
1614  }
1615  }
1616 
1617  /* Done with the search. See if we need to adjust the results. */
1618  if (dir != XFS_LOOKUP_LE && diff < 0) {
1619  keyno++;
1620  /*
1621  * If ge search and we went off the end of the block, but it's
1622  * not the last block, we're in the wrong block.
1623  */
1624  xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1625  if (dir == XFS_LOOKUP_GE &&
1626  keyno > xfs_btree_get_numrecs(block) &&
1627  !xfs_btree_ptr_is_null(cur, &ptr)) {
1628  int i;
1629 
1630  cur->bc_ptrs[0] = keyno;
1631  error = xfs_btree_increment(cur, 0, &i);
1632  if (error)
1633  goto error0;
1634  XFS_WANT_CORRUPTED_RETURN(i == 1);
1635  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1636  *stat = 1;
1637  return 0;
1638  }
1639  } else if (dir == XFS_LOOKUP_LE && diff > 0)
1640  keyno--;
1641  cur->bc_ptrs[0] = keyno;
1642 
1643  /* Return if we succeeded or not. */
1644  if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
1645  *stat = 0;
1646  else if (dir != XFS_LOOKUP_EQ || diff == 0)
1647  *stat = 1;
1648  else
1649  *stat = 0;
1650  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1651  return 0;
1652 
1653 error0:
1654  XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1655  return error;
1656 }
1657 
1658 /*
1659  * Update keys at all levels from here to the root along the cursor's path.
1660  */
1661 STATIC int
1663  struct xfs_btree_cur *cur,
1664  union xfs_btree_key *keyp,
1665  int level)
1666 {
1667  struct xfs_btree_block *block;
1668  struct xfs_buf *bp;
1669  union xfs_btree_key *kp;
1670  int ptr;
1671 
1672  XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1673  XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
1674 
1675  ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
1676 
1677  /*
1678  * Go up the tree from this level toward the root.
1679  * At each level, update the key value to the value input.
1680  * Stop when we reach a level where the cursor isn't pointing
1681  * at the first entry in the block.
1682  */
1683  for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1684 #ifdef DEBUG
1685  int error;
1686 #endif
1687  block = xfs_btree_get_block(cur, level, &bp);
1688 #ifdef DEBUG
1689  error = xfs_btree_check_block(cur, block, level, bp);
1690  if (error) {
1691  XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1692  return error;
1693  }
1694 #endif
1695  ptr = cur->bc_ptrs[level];
1696  kp = xfs_btree_key_addr(cur, ptr, block);
1697  xfs_btree_copy_keys(cur, kp, keyp, 1);
1698  xfs_btree_log_keys(cur, bp, ptr, ptr);
1699  }
1700 
1701  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1702  return 0;
1703 }
1704 
1705 /*
1706  * Update the record referred to by cur to the value in the
1707  * given record. This either works (return 0) or gets an
1708  * EFSCORRUPTED error.
1709  */
1710 int
1712  struct xfs_btree_cur *cur,
1713  union xfs_btree_rec *rec)
1714 {
1715  struct xfs_btree_block *block;
1716  struct xfs_buf *bp;
1717  int error;
1718  int ptr;
1719  union xfs_btree_rec *rp;
1720 
1721  XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1722  XFS_BTREE_TRACE_ARGR(cur, rec);
1723 
1724  /* Pick up the current block. */
1725  block = xfs_btree_get_block(cur, 0, &bp);
1726 
1727 #ifdef DEBUG
1728  error = xfs_btree_check_block(cur, block, 0, bp);
1729  if (error)
1730  goto error0;
1731 #endif
1732  /* Get the address of the rec to be updated. */
1733  ptr = cur->bc_ptrs[0];
1734  rp = xfs_btree_rec_addr(cur, ptr, block);
1735 
1736  /* Fill in the new contents and log them. */
1737  xfs_btree_copy_recs(cur, rp, rec, 1);
1738  xfs_btree_log_recs(cur, bp, ptr, ptr);
1739 
1740  /*
1741  * If we are tracking the last record in the tree and
1742  * we are at the far right edge of the tree, update it.
1743  */
1744  if (xfs_btree_is_lastrec(cur, block, 0)) {
1745  cur->bc_ops->update_lastrec(cur, block, rec,
1746  ptr, LASTREC_UPDATE);
1747  }
1748 
1749  /* Updating first rec in leaf. Pass new key value up to our parent. */
1750  if (ptr == 1) {
1751  union xfs_btree_key key;
1752 
1753  cur->bc_ops->init_key_from_rec(&key, rec);
1754  error = xfs_btree_updkey(cur, &key, 1);
1755  if (error)
1756  goto error0;
1757  }
1758 
1759  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1760  return 0;
1761 
1762 error0:
1763  XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1764  return error;
1765 }
1766 
1767 /*
1768  * Move 1 record left from cur/level if possible.
1769  * Update cur to reflect the new path.
1770  */
1771 STATIC int /* error */
1773  struct xfs_btree_cur *cur,
1774  int level,
1775  int *stat) /* success/failure */
1776 {
1777  union xfs_btree_key key; /* btree key */
1778  struct xfs_buf *lbp; /* left buffer pointer */
1779  struct xfs_btree_block *left; /* left btree block */
1780  int lrecs; /* left record count */
1781  struct xfs_buf *rbp; /* right buffer pointer */
1782  struct xfs_btree_block *right; /* right btree block */
1783  int rrecs; /* right record count */
1784  union xfs_btree_ptr lptr; /* left btree pointer */
1785  union xfs_btree_key *rkp = NULL; /* right btree key */
1786  union xfs_btree_ptr *rpp = NULL; /* right address pointer */
1787  union xfs_btree_rec *rrp = NULL; /* right record pointer */
1788  int error; /* error return value */
1789 
1790  XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1791  XFS_BTREE_TRACE_ARGI(cur, level);
1792 
1793  if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1794  level == cur->bc_nlevels - 1)
1795  goto out0;
1796 
1797  /* Set up variables for this block as "right". */
1798  right = xfs_btree_get_block(cur, level, &rbp);
1799 
1800 #ifdef DEBUG
1801  error = xfs_btree_check_block(cur, right, level, rbp);
1802  if (error)
1803  goto error0;
1804 #endif
1805 
1806  /* If we've got no left sibling then we can't shift an entry left. */
1807  xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
1808  if (xfs_btree_ptr_is_null(cur, &lptr))
1809  goto out0;
1810 
1811  /*
1812  * If the cursor entry is the one that would be moved, don't
1813  * do it... it's too complicated.
1814  */
1815  if (cur->bc_ptrs[level] <= 1)
1816  goto out0;
1817 
1818  /* Set up the left neighbor as "left". */
1819  error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp);
1820  if (error)
1821  goto error0;
1822 
1823  /* If it's full, it can't take another entry. */
1824  lrecs = xfs_btree_get_numrecs(left);
1825  if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
1826  goto out0;
1827 
1828  rrecs = xfs_btree_get_numrecs(right);
1829 
1830  /*
1831  * We add one entry to the left side and remove one for the right side.
1832  * Account for it here, the changes will be updated on disk and logged
1833  * later.
1834  */
1835  lrecs++;
1836  rrecs--;
1837 
1838  XFS_BTREE_STATS_INC(cur, lshift);
1839  XFS_BTREE_STATS_ADD(cur, moves, 1);
1840 
1841  /*
1842  * If non-leaf, copy a key and a ptr to the left block.
1843  * Log the changes to the left block.
1844  */
1845  if (level > 0) {
1846  /* It's a non-leaf. Move keys and pointers. */
1847  union xfs_btree_key *lkp; /* left btree key */
1848  union xfs_btree_ptr *lpp; /* left address pointer */
1849 
1850  lkp = xfs_btree_key_addr(cur, lrecs, left);
1851  rkp = xfs_btree_key_addr(cur, 1, right);
1852 
1853  lpp = xfs_btree_ptr_addr(cur, lrecs, left);
1854  rpp = xfs_btree_ptr_addr(cur, 1, right);
1855 #ifdef DEBUG
1856  error = xfs_btree_check_ptr(cur, rpp, 0, level);
1857  if (error)
1858  goto error0;
1859 #endif
1860  xfs_btree_copy_keys(cur, lkp, rkp, 1);
1861  xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
1862 
1863  xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
1864  xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
1865 
1866  ASSERT(cur->bc_ops->keys_inorder(cur,
1867  xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
1868  } else {
1869  /* It's a leaf. Move records. */
1870  union xfs_btree_rec *lrp; /* left record pointer */
1871 
1872  lrp = xfs_btree_rec_addr(cur, lrecs, left);
1873  rrp = xfs_btree_rec_addr(cur, 1, right);
1874 
1875  xfs_btree_copy_recs(cur, lrp, rrp, 1);
1876  xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
1877 
1878  ASSERT(cur->bc_ops->recs_inorder(cur,
1879  xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
1880  }
1881 
1882  xfs_btree_set_numrecs(left, lrecs);
1884 
1885  xfs_btree_set_numrecs(right, rrecs);
1887 
1888  /*
1889  * Slide the contents of right down one entry.
1890  */
1891  XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
1892  if (level > 0) {
1893  /* It's a nonleaf. operate on keys and ptrs */
1894 #ifdef DEBUG
1895  int i; /* loop index */
1896 
1897  for (i = 0; i < rrecs; i++) {
1898  error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
1899  if (error)
1900  goto error0;
1901  }
1902 #endif
1904  xfs_btree_key_addr(cur, 2, right),
1905  -1, rrecs);
1907  xfs_btree_ptr_addr(cur, 2, right),
1908  -1, rrecs);
1909 
1910  xfs_btree_log_keys(cur, rbp, 1, rrecs);
1911  xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
1912  } else {
1913  /* It's a leaf. operate on records */
1915  xfs_btree_rec_addr(cur, 2, right),
1916  -1, rrecs);
1917  xfs_btree_log_recs(cur, rbp, 1, rrecs);
1918 
1919  /*
1920  * If it's the first record in the block, we'll need a key
1921  * structure to pass up to the next level (updkey).
1922  */
1923  cur->bc_ops->init_key_from_rec(&key,
1924  xfs_btree_rec_addr(cur, 1, right));
1925  rkp = &key;
1926  }
1927 
1928  /* Update the parent key values of right. */
1929  error = xfs_btree_updkey(cur, rkp, level + 1);
1930  if (error)
1931  goto error0;
1932 
1933  /* Slide the cursor value left one. */
1934  cur->bc_ptrs[level]--;
1935 
1936  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1937  *stat = 1;
1938  return 0;
1939 
1940 out0:
1941  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1942  *stat = 0;
1943  return 0;
1944 
1945 error0:
1946  XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1947  return error;
1948 }
1949 
1950 /*
1951  * Move 1 record right from cur/level if possible.
1952  * Update cur to reflect the new path.
1953  */
1954 STATIC int /* error */
1956  struct xfs_btree_cur *cur,
1957  int level,
1958  int *stat) /* success/failure */
1959 {
1960  union xfs_btree_key key; /* btree key */
1961  struct xfs_buf *lbp; /* left buffer pointer */
1962  struct xfs_btree_block *left; /* left btree block */
1963  struct xfs_buf *rbp; /* right buffer pointer */
1964  struct xfs_btree_block *right; /* right btree block */
1965  struct xfs_btree_cur *tcur; /* temporary btree cursor */
1966  union xfs_btree_ptr rptr; /* right block pointer */
1967  union xfs_btree_key *rkp; /* right btree key */
1968  int rrecs; /* right record count */
1969  int lrecs; /* left record count */
1970  int error; /* error return value */
1971  int i; /* loop counter */
1972 
1973  XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1974  XFS_BTREE_TRACE_ARGI(cur, level);
1975 
1976  if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1977  (level == cur->bc_nlevels - 1))
1978  goto out0;
1979 
1980  /* Set up variables for this block as "left". */
1981  left = xfs_btree_get_block(cur, level, &lbp);
1982 
1983 #ifdef DEBUG
1984  error = xfs_btree_check_block(cur, left, level, lbp);
1985  if (error)
1986  goto error0;
1987 #endif
1988 
1989  /* If we've got no right sibling then we can't shift an entry right. */
1990  xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
1991  if (xfs_btree_ptr_is_null(cur, &rptr))
1992  goto out0;
1993 
1994  /*
1995  * If the cursor entry is the one that would be moved, don't
1996  * do it... it's too complicated.
1997  */
1998  lrecs = xfs_btree_get_numrecs(left);
1999  if (cur->bc_ptrs[level] >= lrecs)
2000  goto out0;
2001 
2002  /* Set up the right neighbor as "right". */
2003  error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp);
2004  if (error)
2005  goto error0;
2006 
2007  /* If it's full, it can't take another entry. */
2008  rrecs = xfs_btree_get_numrecs(right);
2009  if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2010  goto out0;
2011 
2012  XFS_BTREE_STATS_INC(cur, rshift);
2013  XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2014 
2015  /*
2016  * Make a hole at the start of the right neighbor block, then
2017  * copy the last left block entry to the hole.
2018  */
2019  if (level > 0) {
2020  /* It's a nonleaf. make a hole in the keys and ptrs */
2021  union xfs_btree_key *lkp;
2022  union xfs_btree_ptr *lpp;
2023  union xfs_btree_ptr *rpp;
2024 
2025  lkp = xfs_btree_key_addr(cur, lrecs, left);
2026  lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2027  rkp = xfs_btree_key_addr(cur, 1, right);
2028  rpp = xfs_btree_ptr_addr(cur, 1, right);
2029 
2030 #ifdef DEBUG
2031  for (i = rrecs - 1; i >= 0; i--) {
2032  error = xfs_btree_check_ptr(cur, rpp, i, level);
2033  if (error)
2034  goto error0;
2035  }
2036 #endif
2037 
2038  xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2039  xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2040 
2041 #ifdef DEBUG
2042  error = xfs_btree_check_ptr(cur, lpp, 0, level);
2043  if (error)
2044  goto error0;
2045 #endif
2046 
2047  /* Now put the new data in, and log it. */
2048  xfs_btree_copy_keys(cur, rkp, lkp, 1);
2049  xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2050 
2051  xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2052  xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2053 
2054  ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2055  xfs_btree_key_addr(cur, 2, right)));
2056  } else {
2057  /* It's a leaf. make a hole in the records */
2058  union xfs_btree_rec *lrp;
2059  union xfs_btree_rec *rrp;
2060 
2061  lrp = xfs_btree_rec_addr(cur, lrecs, left);
2062  rrp = xfs_btree_rec_addr(cur, 1, right);
2063 
2064  xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2065 
2066  /* Now put the new data in, and log it. */
2067  xfs_btree_copy_recs(cur, rrp, lrp, 1);
2068  xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2069 
2070  cur->bc_ops->init_key_from_rec(&key, rrp);
2071  rkp = &key;
2072 
2073  ASSERT(cur->bc_ops->recs_inorder(cur, rrp,
2074  xfs_btree_rec_addr(cur, 2, right)));
2075  }
2076 
2077  /*
2078  * Decrement and log left's numrecs, bump and log right's numrecs.
2079  */
2080  xfs_btree_set_numrecs(left, --lrecs);
2082 
2083  xfs_btree_set_numrecs(right, ++rrecs);
2085 
2086  /*
2087  * Using a temporary cursor, update the parent key values of the
2088  * block on the right.
2089  */
2090  error = xfs_btree_dup_cursor(cur, &tcur);
2091  if (error)
2092  goto error0;
2093  i = xfs_btree_lastrec(tcur, level);
2094  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2095 
2096  error = xfs_btree_increment(tcur, level, &i);
2097  if (error)
2098  goto error1;
2099 
2100  error = xfs_btree_updkey(tcur, rkp, level + 1);
2101  if (error)
2102  goto error1;
2103 
2105 
2106  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2107  *stat = 1;
2108  return 0;
2109 
2110 out0:
2111  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2112  *stat = 0;
2113  return 0;
2114 
2115 error0:
2116  XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2117  return error;
2118 
2119 error1:
2120  XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2122  return error;
2123 }
2124 
2125 /*
2126  * Split cur/level block in half.
2127  * Return new block number and the key to its first
2128  * record (to be inserted into parent).
2129  */
2130 STATIC int /* error */
2132  struct xfs_btree_cur *cur,
2133  int level,
2134  union xfs_btree_ptr *ptrp,
2135  union xfs_btree_key *key,
2136  struct xfs_btree_cur **curp,
2137  int *stat) /* success/failure */
2138 {
2139  union xfs_btree_ptr lptr; /* left sibling block ptr */
2140  struct xfs_buf *lbp; /* left buffer pointer */
2141  struct xfs_btree_block *left; /* left btree block */
2142  union xfs_btree_ptr rptr; /* right sibling block ptr */
2143  struct xfs_buf *rbp; /* right buffer pointer */
2144  struct xfs_btree_block *right; /* right btree block */
2145  union xfs_btree_ptr rrptr; /* right-right sibling ptr */
2146  struct xfs_buf *rrbp; /* right-right buffer pointer */
2147  struct xfs_btree_block *rrblock; /* right-right btree block */
2148  int lrecs;
2149  int rrecs;
2150  int src_index;
2151  int error; /* error return value */
2152 #ifdef DEBUG
2153  int i;
2154 #endif
2155 
2156  XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2157  XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
2158 
2159  XFS_BTREE_STATS_INC(cur, split);
2160 
2161  /* Set up left block (current one). */
2162  left = xfs_btree_get_block(cur, level, &lbp);
2163 
2164 #ifdef DEBUG
2165  error = xfs_btree_check_block(cur, left, level, lbp);
2166  if (error)
2167  goto error0;
2168 #endif
2169 
2170  xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2171 
2172  /* Allocate the new block. If we can't do it, we're toast. Give up. */
2173  error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat);
2174  if (error)
2175  goto error0;
2176  if (*stat == 0)
2177  goto out0;
2178  XFS_BTREE_STATS_INC(cur, alloc);
2179 
2180  /* Set up the new block as "right". */
2181  error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
2182  if (error)
2183  goto error0;
2184 
2185  /* Fill in the btree header for the new right block. */
2186  xfs_btree_init_block(cur, xfs_btree_get_level(left), 0, right);
2187 
2188  /*
2189  * Split the entries between the old and the new block evenly.
2190  * Make sure that if there's an odd number of entries now, that
2191  * each new block will have the same number of entries.
2192  */
2193  lrecs = xfs_btree_get_numrecs(left);
2194  rrecs = lrecs / 2;
2195  if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
2196  rrecs++;
2197  src_index = (lrecs - rrecs + 1);
2198 
2199  XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2200 
2201  /*
2202  * Copy btree block entries from the left block over to the
2203  * new block, the right. Update the right block and log the
2204  * changes.
2205  */
2206  if (level > 0) {
2207  /* It's a non-leaf. Move keys and pointers. */
2208  union xfs_btree_key *lkp; /* left btree key */
2209  union xfs_btree_ptr *lpp; /* left address pointer */
2210  union xfs_btree_key *rkp; /* right btree key */
2211  union xfs_btree_ptr *rpp; /* right address pointer */
2212 
2213  lkp = xfs_btree_key_addr(cur, src_index, left);
2214  lpp = xfs_btree_ptr_addr(cur, src_index, left);
2215  rkp = xfs_btree_key_addr(cur, 1, right);
2216  rpp = xfs_btree_ptr_addr(cur, 1, right);
2217 
2218 #ifdef DEBUG
2219  for (i = src_index; i < rrecs; i++) {
2220  error = xfs_btree_check_ptr(cur, lpp, i, level);
2221  if (error)
2222  goto error0;
2223  }
2224 #endif
2225 
2226  xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2227  xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2228 
2229  xfs_btree_log_keys(cur, rbp, 1, rrecs);
2230  xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2231 
2232  /* Grab the keys to the entries moved to the right block */
2233  xfs_btree_copy_keys(cur, key, rkp, 1);
2234  } else {
2235  /* It's a leaf. Move records. */
2236  union xfs_btree_rec *lrp; /* left record pointer */
2237  union xfs_btree_rec *rrp; /* right record pointer */
2238 
2239  lrp = xfs_btree_rec_addr(cur, src_index, left);
2240  rrp = xfs_btree_rec_addr(cur, 1, right);
2241 
2242  xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2243  xfs_btree_log_recs(cur, rbp, 1, rrecs);
2244 
2245  cur->bc_ops->init_key_from_rec(key,
2246  xfs_btree_rec_addr(cur, 1, right));
2247  }
2248 
2249 
2250  /*
2251  * Find the left block number by looking in the buffer.
2252  * Adjust numrecs, sibling pointers.
2253  */
2254  xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2255  xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2256  xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2257  xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2258 
2259  lrecs -= rrecs;
2260  xfs_btree_set_numrecs(left, lrecs);
2261  xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2262 
2265 
2266  /*
2267  * If there's a block to the new block's right, make that block
2268  * point back to right instead of to left.
2269  */
2270  if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
2271  error = xfs_btree_read_buf_block(cur, &rrptr, level,
2272  0, &rrblock, &rrbp);
2273  if (error)
2274  goto error0;
2275  xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2276  xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2277  }
2278  /*
2279  * If the cursor is really in the right block, move it there.
2280  * If it's just pointing past the last entry in left, then we'll
2281  * insert there, so don't change anything in that case.
2282  */
2283  if (cur->bc_ptrs[level] > lrecs + 1) {
2284  xfs_btree_setbuf(cur, level, rbp);
2285  cur->bc_ptrs[level] -= lrecs;
2286  }
2287  /*
2288  * If there are more levels, we'll need another cursor which refers
2289  * the right block, no matter where this cursor was.
2290  */
2291  if (level + 1 < cur->bc_nlevels) {
2292  error = xfs_btree_dup_cursor(cur, curp);
2293  if (error)
2294  goto error0;
2295  (*curp)->bc_ptrs[level + 1]++;
2296  }
2297  *ptrp = rptr;
2298  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2299  *stat = 1;
2300  return 0;
2301 out0:
2302  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2303  *stat = 0;
2304  return 0;
2305 
2306 error0:
2307  XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2308  return error;
2309 }
2310 
2311 /*
2312  * Copy the old inode root contents into a real block and make the
2313  * broot point to it.
2314  */
2315 int /* error */
2317  struct xfs_btree_cur *cur, /* btree cursor */
2318  int *logflags, /* logging flags for inode */
2319  int *stat) /* return status - 0 fail */
2320 {
2321  struct xfs_buf *cbp; /* buffer for cblock */
2322  struct xfs_btree_block *block; /* btree block */
2323  struct xfs_btree_block *cblock; /* child btree block */
2324  union xfs_btree_key *ckp; /* child key pointer */
2325  union xfs_btree_ptr *cpp; /* child ptr pointer */
2326  union xfs_btree_key *kp; /* pointer to btree key */
2327  union xfs_btree_ptr *pp; /* pointer to block addr */
2328  union xfs_btree_ptr nptr; /* new block addr */
2329  int level; /* btree level */
2330  int error; /* error return code */
2331 #ifdef DEBUG
2332  int i; /* loop counter */
2333 #endif
2334 
2335  XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2336  XFS_BTREE_STATS_INC(cur, newroot);
2337 
2339 
2340  level = cur->bc_nlevels - 1;
2341 
2342  block = xfs_btree_get_iroot(cur);
2343  pp = xfs_btree_ptr_addr(cur, 1, block);
2344 
2345  /* Allocate the new block. If we can't do it, we're toast. Give up. */
2346  error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat);
2347  if (error)
2348  goto error0;
2349  if (*stat == 0) {
2350  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2351  return 0;
2352  }
2353  XFS_BTREE_STATS_INC(cur, alloc);
2354 
2355  /* Copy the root into a real block. */
2356  error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
2357  if (error)
2358  goto error0;
2359 
2360  memcpy(cblock, block, xfs_btree_block_len(cur));
2361 
2362  be16_add_cpu(&block->bb_level, 1);
2363  xfs_btree_set_numrecs(block, 1);
2364  cur->bc_nlevels++;
2365  cur->bc_ptrs[level + 1] = 1;
2366 
2367  kp = xfs_btree_key_addr(cur, 1, block);
2368  ckp = xfs_btree_key_addr(cur, 1, cblock);
2369  xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
2370 
2371  cpp = xfs_btree_ptr_addr(cur, 1, cblock);
2372 #ifdef DEBUG
2373  for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
2374  error = xfs_btree_check_ptr(cur, pp, i, level);
2375  if (error)
2376  goto error0;
2377  }
2378 #endif
2379  xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
2380 
2381 #ifdef DEBUG
2382  error = xfs_btree_check_ptr(cur, &nptr, 0, level);
2383  if (error)
2384  goto error0;
2385 #endif
2386  xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
2387 
2388  xfs_iroot_realloc(cur->bc_private.b.ip,
2389  1 - xfs_btree_get_numrecs(cblock),
2390  cur->bc_private.b.whichfork);
2391 
2392  xfs_btree_setbuf(cur, level, cbp);
2393 
2394  /*
2395  * Do all this logging at the end so that
2396  * the root is at the right level.
2397  */
2399  xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2400  xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2401 
2402  *logflags |=
2403  XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork);
2404  *stat = 1;
2405  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2406  return 0;
2407 error0:
2408  XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2409  return error;
2410 }
2411 
2412 /*
2413  * Allocate a new root block, fill it in.
2414  */
2415 STATIC int /* error */
2417  struct xfs_btree_cur *cur, /* btree cursor */
2418  int *stat) /* success/failure */
2419 {
2420  struct xfs_btree_block *block; /* one half of the old root block */
2421  struct xfs_buf *bp; /* buffer containing block */
2422  int error; /* error return value */
2423  struct xfs_buf *lbp; /* left buffer pointer */
2424  struct xfs_btree_block *left; /* left btree block */
2425  struct xfs_buf *nbp; /* new (root) buffer */
2426  struct xfs_btree_block *new; /* new (root) btree block */
2427  int nptr; /* new value for key index, 1 or 2 */
2428  struct xfs_buf *rbp; /* right buffer pointer */
2429  struct xfs_btree_block *right; /* right btree block */
2430  union xfs_btree_ptr rptr;
2431  union xfs_btree_ptr lptr;
2432 
2433  XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2434  XFS_BTREE_STATS_INC(cur, newroot);
2435 
2436  /* initialise our start point from the cursor */
2437  cur->bc_ops->init_ptr_from_cur(cur, &rptr);
2438 
2439  /* Allocate the new block. If we can't do it, we're toast. Give up. */
2440  error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat);
2441  if (error)
2442  goto error0;
2443  if (*stat == 0)
2444  goto out0;
2445  XFS_BTREE_STATS_INC(cur, alloc);
2446 
2447  /* Set up the new block. */
2448  error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
2449  if (error)
2450  goto error0;
2451 
2452  /* Set the root in the holding structure increasing the level by 1. */
2453  cur->bc_ops->set_root(cur, &lptr, 1);
2454 
2455  /*
2456  * At the previous root level there are now two blocks: the old root,
2457  * and the new block generated when it was split. We don't know which
2458  * one the cursor is pointing at, so we set up variables "left" and
2459  * "right" for each case.
2460  */
2461  block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
2462 
2463 #ifdef DEBUG
2464  error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
2465  if (error)
2466  goto error0;
2467 #endif
2468 
2469  xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
2470  if (!xfs_btree_ptr_is_null(cur, &rptr)) {
2471  /* Our block is left, pick up the right block. */
2472  lbp = bp;
2473  xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2474  left = block;
2475  error = xfs_btree_read_buf_block(cur, &rptr,
2476  cur->bc_nlevels - 1, 0, &right, &rbp);
2477  if (error)
2478  goto error0;
2479  bp = rbp;
2480  nptr = 1;
2481  } else {
2482  /* Our block is right, pick up the left block. */
2483  rbp = bp;
2484  xfs_btree_buf_to_ptr(cur, rbp, &rptr);
2485  right = block;
2486  xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2487  error = xfs_btree_read_buf_block(cur, &lptr,
2488  cur->bc_nlevels - 1, 0, &left, &lbp);
2489  if (error)
2490  goto error0;
2491  bp = lbp;
2492  nptr = 2;
2493  }
2494  /* Fill in the new block's btree header and log it. */
2495  xfs_btree_init_block(cur, cur->bc_nlevels, 2, new);
2497  ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
2498  !xfs_btree_ptr_is_null(cur, &rptr));
2499 
2500  /* Fill in the key data in the new root. */
2501  if (xfs_btree_get_level(left) > 0) {
2502  xfs_btree_copy_keys(cur,
2503  xfs_btree_key_addr(cur, 1, new),
2504  xfs_btree_key_addr(cur, 1, left), 1);
2505  xfs_btree_copy_keys(cur,
2506  xfs_btree_key_addr(cur, 2, new),
2507  xfs_btree_key_addr(cur, 1, right), 1);
2508  } else {
2509  cur->bc_ops->init_key_from_rec(
2510  xfs_btree_key_addr(cur, 1, new),
2511  xfs_btree_rec_addr(cur, 1, left));
2512  cur->bc_ops->init_key_from_rec(
2513  xfs_btree_key_addr(cur, 2, new),
2514  xfs_btree_rec_addr(cur, 1, right));
2515  }
2516  xfs_btree_log_keys(cur, nbp, 1, 2);
2517 
2518  /* Fill in the pointer data in the new root. */
2519  xfs_btree_copy_ptrs(cur,
2520  xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
2521  xfs_btree_copy_ptrs(cur,
2522  xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
2523  xfs_btree_log_ptrs(cur, nbp, 1, 2);
2524 
2525  /* Fix up the cursor. */
2526  xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
2527  cur->bc_ptrs[cur->bc_nlevels] = nptr;
2528  cur->bc_nlevels++;
2529  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2530  *stat = 1;
2531  return 0;
2532 error0:
2533  XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2534  return error;
2535 out0:
2536  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2537  *stat = 0;
2538  return 0;
2539 }
2540 
2541 STATIC int
2543  struct xfs_btree_cur *cur, /* btree cursor */
2544  int level, /* btree level */
2545  int numrecs,/* # of recs in block */
2546  int *oindex,/* old tree index */
2547  int *index, /* new tree index */
2548  union xfs_btree_ptr *nptr, /* new btree ptr */
2549  struct xfs_btree_cur **ncur, /* new btree cursor */
2550  union xfs_btree_rec *nrec, /* new record */
2551  int *stat)
2552 {
2553  union xfs_btree_key key; /* new btree key value */
2554  int error = 0;
2555 
2556  if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2557  level == cur->bc_nlevels - 1) {
2558  struct xfs_inode *ip = cur->bc_private.b.ip;
2559 
2560  if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
2561  /* A root block that can be made bigger. */
2562 
2563  xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
2564  } else {
2565  /* A root block that needs replacing */
2566  int logflags = 0;
2567 
2568  error = xfs_btree_new_iroot(cur, &logflags, stat);
2569  if (error || *stat == 0)
2570  return error;
2571 
2572  xfs_trans_log_inode(cur->bc_tp, ip, logflags);
2573  }
2574 
2575  return 0;
2576  }
2577 
2578  /* First, try shifting an entry to the right neighbor. */
2579  error = xfs_btree_rshift(cur, level, stat);
2580  if (error || *stat)
2581  return error;
2582 
2583  /* Next, try shifting an entry to the left neighbor. */
2584  error = xfs_btree_lshift(cur, level, stat);
2585  if (error)
2586  return error;
2587 
2588  if (*stat) {
2589  *oindex = *index = cur->bc_ptrs[level];
2590  return 0;
2591  }
2592 
2593  /*
2594  * Next, try splitting the current block in half.
2595  *
2596  * If this works we have to re-set our variables because we
2597  * could be in a different block now.
2598  */
2599  error = xfs_btree_split(cur, level, nptr, &key, ncur, stat);
2600  if (error || *stat == 0)
2601  return error;
2602 
2603 
2604  *index = cur->bc_ptrs[level];
2605  cur->bc_ops->init_rec_from_key(&key, nrec);
2606  return 0;
2607 }
2608 
2609 /*
2610  * Insert one record/level. Return information to the caller
2611  * allowing the next level up to proceed if necessary.
2612  */
2613 STATIC int
2615  struct xfs_btree_cur *cur, /* btree cursor */
2616  int level, /* level to insert record at */
2617  union xfs_btree_ptr *ptrp, /* i/o: block number inserted */
2618  union xfs_btree_rec *recp, /* i/o: record data inserted */
2619  struct xfs_btree_cur **curp, /* output: new cursor replacing cur */
2620  int *stat) /* success/failure */
2621 {
2622  struct xfs_btree_block *block; /* btree block */
2623  struct xfs_buf *bp; /* buffer for block */
2624  union xfs_btree_key key; /* btree key */
2625  union xfs_btree_ptr nptr; /* new block ptr */
2626  struct xfs_btree_cur *ncur; /* new btree cursor */
2627  union xfs_btree_rec nrec; /* new record count */
2628  int optr; /* old key/record index */
2629  int ptr; /* key/record index */
2630  int numrecs;/* number of records */
2631  int error; /* error return value */
2632 #ifdef DEBUG
2633  int i;
2634 #endif
2635 
2636  XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2637  XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp);
2638 
2639  ncur = NULL;
2640 
2641  /*
2642  * If we have an external root pointer, and we've made it to the
2643  * root level, allocate a new root block and we're done.
2644  */
2645  if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2646  (level >= cur->bc_nlevels)) {
2647  error = xfs_btree_new_root(cur, stat);
2648  xfs_btree_set_ptr_null(cur, ptrp);
2649 
2650  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2651  return error;
2652  }
2653 
2654  /* If we're off the left edge, return failure. */
2655  ptr = cur->bc_ptrs[level];
2656  if (ptr == 0) {
2657  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2658  *stat = 0;
2659  return 0;
2660  }
2661 
2662  /* Make a key out of the record data to be inserted, and save it. */
2663  cur->bc_ops->init_key_from_rec(&key, recp);
2664 
2665  optr = ptr;
2666 
2667  XFS_BTREE_STATS_INC(cur, insrec);
2668 
2669  /* Get pointers to the btree buffer and block. */
2670  block = xfs_btree_get_block(cur, level, &bp);
2671  numrecs = xfs_btree_get_numrecs(block);
2672 
2673 #ifdef DEBUG
2674  error = xfs_btree_check_block(cur, block, level, bp);
2675  if (error)
2676  goto error0;
2677 
2678  /* Check that the new entry is being inserted in the right place. */
2679  if (ptr <= numrecs) {
2680  if (level == 0) {
2681  ASSERT(cur->bc_ops->recs_inorder(cur, recp,
2682  xfs_btree_rec_addr(cur, ptr, block)));
2683  } else {
2684  ASSERT(cur->bc_ops->keys_inorder(cur, &key,
2685  xfs_btree_key_addr(cur, ptr, block)));
2686  }
2687  }
2688 #endif
2689 
2690  /*
2691  * If the block is full, we can't insert the new entry until we
2692  * make the block un-full.
2693  */
2694  xfs_btree_set_ptr_null(cur, &nptr);
2695  if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
2696  error = xfs_btree_make_block_unfull(cur, level, numrecs,
2697  &optr, &ptr, &nptr, &ncur, &nrec, stat);
2698  if (error || *stat == 0)
2699  goto error0;
2700  }
2701 
2702  /*
2703  * The current block may have changed if the block was
2704  * previously full and we have just made space in it.
2705  */
2706  block = xfs_btree_get_block(cur, level, &bp);
2707  numrecs = xfs_btree_get_numrecs(block);
2708 
2709 #ifdef DEBUG
2710  error = xfs_btree_check_block(cur, block, level, bp);
2711  if (error)
2712  return error;
2713 #endif
2714 
2715  /*
2716  * At this point we know there's room for our new entry in the block
2717  * we're pointing at.
2718  */
2719  XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
2720 
2721  if (level > 0) {
2722  /* It's a nonleaf. make a hole in the keys and ptrs */
2723  union xfs_btree_key *kp;
2724  union xfs_btree_ptr *pp;
2725 
2726  kp = xfs_btree_key_addr(cur, ptr, block);
2727  pp = xfs_btree_ptr_addr(cur, ptr, block);
2728 
2729 #ifdef DEBUG
2730  for (i = numrecs - ptr; i >= 0; i--) {
2731  error = xfs_btree_check_ptr(cur, pp, i, level);
2732  if (error)
2733  return error;
2734  }
2735 #endif
2736 
2737  xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
2738  xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
2739 
2740 #ifdef DEBUG
2741  error = xfs_btree_check_ptr(cur, ptrp, 0, level);
2742  if (error)
2743  goto error0;
2744 #endif
2745 
2746  /* Now put the new data in, bump numrecs and log it. */
2747  xfs_btree_copy_keys(cur, kp, &key, 1);
2748  xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
2749  numrecs++;
2750  xfs_btree_set_numrecs(block, numrecs);
2751  xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
2752  xfs_btree_log_keys(cur, bp, ptr, numrecs);
2753 #ifdef DEBUG
2754  if (ptr < numrecs) {
2755  ASSERT(cur->bc_ops->keys_inorder(cur, kp,
2756  xfs_btree_key_addr(cur, ptr + 1, block)));
2757  }
2758 #endif
2759  } else {
2760  /* It's a leaf. make a hole in the records */
2761  union xfs_btree_rec *rp;
2762 
2763  rp = xfs_btree_rec_addr(cur, ptr, block);
2764 
2765  xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
2766 
2767  /* Now put the new data in, bump numrecs and log it. */
2768  xfs_btree_copy_recs(cur, rp, recp, 1);
2769  xfs_btree_set_numrecs(block, ++numrecs);
2770  xfs_btree_log_recs(cur, bp, ptr, numrecs);
2771 #ifdef DEBUG
2772  if (ptr < numrecs) {
2773  ASSERT(cur->bc_ops->recs_inorder(cur, rp,
2774  xfs_btree_rec_addr(cur, ptr + 1, block)));
2775  }
2776 #endif
2777  }
2778 
2779  /* Log the new number of records in the btree header. */
2781 
2782  /* If we inserted at the start of a block, update the parents' keys. */
2783  if (optr == 1) {
2784  error = xfs_btree_updkey(cur, &key, level + 1);
2785  if (error)
2786  goto error0;
2787  }
2788 
2789  /*
2790  * If we are tracking the last record in the tree and
2791  * we are at the far right edge of the tree, update it.
2792  */
2793  if (xfs_btree_is_lastrec(cur, block, level)) {
2794  cur->bc_ops->update_lastrec(cur, block, recp,
2795  ptr, LASTREC_INSREC);
2796  }
2797 
2798  /*
2799  * Return the new block number, if any.
2800  * If there is one, give back a record value and a cursor too.
2801  */
2802  *ptrp = nptr;
2803  if (!xfs_btree_ptr_is_null(cur, &nptr)) {
2804  *recp = nrec;
2805  *curp = ncur;
2806  }
2807 
2808  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2809  *stat = 1;
2810  return 0;
2811 
2812 error0:
2813  XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2814  return error;
2815 }
2816 
2817 /*
2818  * Insert the record at the point referenced by cur.
2819  *
2820  * A multi-level split of the tree on insert will invalidate the original
2821  * cursor. All callers of this function should assume that the cursor is
2822  * no longer valid and revalidate it.
2823  */
2824 int
2826  struct xfs_btree_cur *cur,
2827  int *stat)
2828 {
2829  int error; /* error return value */
2830  int i; /* result value, 0 for failure */
2831  int level; /* current level number in btree */
2832  union xfs_btree_ptr nptr; /* new block number (split result) */
2833  struct xfs_btree_cur *ncur; /* new cursor (split result) */
2834  struct xfs_btree_cur *pcur; /* previous level's cursor */
2835  union xfs_btree_rec rec; /* record to insert */
2836 
2837  level = 0;
2838  ncur = NULL;
2839  pcur = cur;
2840 
2841  xfs_btree_set_ptr_null(cur, &nptr);
2842  cur->bc_ops->init_rec_from_cur(cur, &rec);
2843 
2844  /*
2845  * Loop going up the tree, starting at the leaf level.
2846  * Stop when we don't get a split block, that must mean that
2847  * the insert is finished with this level.
2848  */
2849  do {
2850  /*
2851  * Insert nrec/nptr into this level of the tree.
2852  * Note if we fail, nptr will be null.
2853  */
2854  error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i);
2855  if (error) {
2856  if (pcur != cur)
2858  goto error0;
2859  }
2860 
2861  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2862  level++;
2863 
2864  /*
2865  * See if the cursor we just used is trash.
2866  * Can't trash the caller's cursor, but otherwise we should
2867  * if ncur is a new cursor or we're about to be done.
2868  */
2869  if (pcur != cur &&
2870  (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
2871  /* Save the state from the cursor before we trash it */
2872  if (cur->bc_ops->update_cursor)
2873  cur->bc_ops->update_cursor(pcur, cur);
2874  cur->bc_nlevels = pcur->bc_nlevels;
2876  }
2877  /* If we got a new cursor, switch to it. */
2878  if (ncur) {
2879  pcur = ncur;
2880  ncur = NULL;
2881  }
2882  } while (!xfs_btree_ptr_is_null(cur, &nptr));
2883 
2884  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2885  *stat = i;
2886  return 0;
2887 error0:
2888  XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2889  return error;
2890 }
2891 
2892 /*
2893  * Try to merge a non-leaf block back into the inode root.
2894  *
2895  * Note: the killroot names comes from the fact that we're effectively
2896  * killing the old root block. But because we can't just delete the
2897  * inode we have to copy the single block it was pointing to into the
2898  * inode.
2899  */
2900 STATIC int
2902  struct xfs_btree_cur *cur)
2903 {
2904  int whichfork = cur->bc_private.b.whichfork;
2905  struct xfs_inode *ip = cur->bc_private.b.ip;
2906  struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
2907  struct xfs_btree_block *block;
2908  struct xfs_btree_block *cblock;
2909  union xfs_btree_key *kp;
2910  union xfs_btree_key *ckp;
2911  union xfs_btree_ptr *pp;
2912  union xfs_btree_ptr *cpp;
2913  struct xfs_buf *cbp;
2914  int level;
2915  int index;
2916  int numrecs;
2917 #ifdef DEBUG
2918  union xfs_btree_ptr ptr;
2919  int i;
2920 #endif
2921 
2922  XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2923 
2925  ASSERT(cur->bc_nlevels > 1);
2926 
2927  /*
2928  * Don't deal with the root block needs to be a leaf case.
2929  * We're just going to turn the thing back into extents anyway.
2930  */
2931  level = cur->bc_nlevels - 1;
2932  if (level == 1)
2933  goto out0;
2934 
2935  /*
2936  * Give up if the root has multiple children.
2937  */
2938  block = xfs_btree_get_iroot(cur);
2939  if (xfs_btree_get_numrecs(block) != 1)
2940  goto out0;
2941 
2942  cblock = xfs_btree_get_block(cur, level - 1, &cbp);
2943  numrecs = xfs_btree_get_numrecs(cblock);
2944 
2945  /*
2946  * Only do this if the next level will fit.
2947  * Then the data must be copied up to the inode,
2948  * instead of freeing the root you free the next level.
2949  */
2950  if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
2951  goto out0;
2952 
2953  XFS_BTREE_STATS_INC(cur, killroot);
2954 
2955 #ifdef DEBUG
2956  xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
2957  ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
2958  xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
2959  ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
2960 #endif
2961 
2962  index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
2963  if (index) {
2964  xfs_iroot_realloc(cur->bc_private.b.ip, index,
2965  cur->bc_private.b.whichfork);
2966  block = ifp->if_broot;
2967  }
2968 
2969  be16_add_cpu(&block->bb_numrecs, index);
2970  ASSERT(block->bb_numrecs == cblock->bb_numrecs);
2971 
2972  kp = xfs_btree_key_addr(cur, 1, block);
2973  ckp = xfs_btree_key_addr(cur, 1, cblock);
2974  xfs_btree_copy_keys(cur, kp, ckp, numrecs);
2975 
2976  pp = xfs_btree_ptr_addr(cur, 1, block);
2977  cpp = xfs_btree_ptr_addr(cur, 1, cblock);
2978 #ifdef DEBUG
2979  for (i = 0; i < numrecs; i++) {
2980  int error;
2981 
2982  error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
2983  if (error) {
2984  XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2985  return error;
2986  }
2987  }
2988 #endif
2989  xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
2990 
2991  cur->bc_ops->free_block(cur, cbp);
2992  XFS_BTREE_STATS_INC(cur, free);
2993 
2994  cur->bc_bufs[level - 1] = NULL;
2995  be16_add_cpu(&block->bb_level, -1);
2996  xfs_trans_log_inode(cur->bc_tp, ip,
2997  XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork));
2998  cur->bc_nlevels--;
2999 out0:
3000  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3001  return 0;
3002 }
3003 
3004 /*
3005  * Kill the current root node, and replace it with it's only child node.
3006  */
3007 STATIC int
3009  struct xfs_btree_cur *cur,
3010  struct xfs_buf *bp,
3011  int level,
3012  union xfs_btree_ptr *newroot)
3013 {
3014  int error;
3015 
3016  XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3017  XFS_BTREE_STATS_INC(cur, killroot);
3018 
3019  /*
3020  * Update the root pointer, decreasing the level by 1 and then
3021  * free the old root.
3022  */
3023  cur->bc_ops->set_root(cur, newroot, -1);
3024 
3025  error = cur->bc_ops->free_block(cur, bp);
3026  if (error) {
3027  XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3028  return error;
3029  }
3030 
3031  XFS_BTREE_STATS_INC(cur, free);
3032 
3033  cur->bc_bufs[level] = NULL;
3034  cur->bc_ra[level] = 0;
3035  cur->bc_nlevels--;
3036 
3037  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3038  return 0;
3039 }
3040 
3041 STATIC int
3043  struct xfs_btree_cur *cur,
3044  int level,
3045  int *stat)
3046 {
3047  int error;
3048  int i;
3049 
3050  if (level > 0) {
3051  error = xfs_btree_decrement(cur, level, &i);
3052  if (error)
3053  return error;
3054  }
3055 
3056  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3057  *stat = 1;
3058  return 0;
3059 }
3060 
3061 /*
3062  * Single level of the btree record deletion routine.
3063  * Delete record pointed to by cur/level.
3064  * Remove the record from its block then rebalance the tree.
3065  * Return 0 for error, 1 for done, 2 to go on to the next level.
3066  */
3067 STATIC int /* error */
3069  struct xfs_btree_cur *cur, /* btree cursor */
3070  int level, /* level removing record from */
3071  int *stat) /* fail/done/go-on */
3072 {
3073  struct xfs_btree_block *block; /* btree block */
3074  union xfs_btree_ptr cptr; /* current block ptr */
3075  struct xfs_buf *bp; /* buffer for block */
3076  int error; /* error return value */
3077  int i; /* loop counter */
3078  union xfs_btree_key key; /* storage for keyp */
3079  union xfs_btree_key *keyp = &key; /* passed to the next level */
3080  union xfs_btree_ptr lptr; /* left sibling block ptr */
3081  struct xfs_buf *lbp; /* left buffer pointer */
3082  struct xfs_btree_block *left; /* left btree block */
3083  int lrecs = 0; /* left record count */
3084  int ptr; /* key/record index */
3085  union xfs_btree_ptr rptr; /* right sibling block ptr */
3086  struct xfs_buf *rbp; /* right buffer pointer */
3087  struct xfs_btree_block *right; /* right btree block */
3088  struct xfs_btree_block *rrblock; /* right-right btree block */
3089  struct xfs_buf *rrbp; /* right-right buffer pointer */
3090  int rrecs = 0; /* right record count */
3091  struct xfs_btree_cur *tcur; /* temporary btree cursor */
3092  int numrecs; /* temporary numrec count */
3093 
3094  XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3095  XFS_BTREE_TRACE_ARGI(cur, level);
3096 
3097  tcur = NULL;
3098 
3099  /* Get the index of the entry being deleted, check for nothing there. */
3100  ptr = cur->bc_ptrs[level];
3101  if (ptr == 0) {
3102  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3103  *stat = 0;
3104  return 0;
3105  }
3106 
3107  /* Get the buffer & block containing the record or key/ptr. */
3108  block = xfs_btree_get_block(cur, level, &bp);
3109  numrecs = xfs_btree_get_numrecs(block);
3110 
3111 #ifdef DEBUG
3112  error = xfs_btree_check_block(cur, block, level, bp);
3113  if (error)
3114  goto error0;
3115 #endif
3116 
3117  /* Fail if we're off the end of the block. */
3118  if (ptr > numrecs) {
3119  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3120  *stat = 0;
3121  return 0;
3122  }
3123 
3124  XFS_BTREE_STATS_INC(cur, delrec);
3125  XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3126 
3127  /* Excise the entries being deleted. */
3128  if (level > 0) {
3129  /* It's a nonleaf. operate on keys and ptrs */
3130  union xfs_btree_key *lkp;
3131  union xfs_btree_ptr *lpp;
3132 
3133  lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3134  lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3135 
3136 #ifdef DEBUG
3137  for (i = 0; i < numrecs - ptr; i++) {
3138  error = xfs_btree_check_ptr(cur, lpp, i, level);
3139  if (error)
3140  goto error0;
3141  }
3142 #endif
3143 
3144  if (ptr < numrecs) {
3145  xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3146  xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3147  xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3148  xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3149  }
3150 
3151  /*
3152  * If it's the first record in the block, we'll need to pass a
3153  * key up to the next level (updkey).
3154  */
3155  if (ptr == 1)
3156  keyp = xfs_btree_key_addr(cur, 1, block);
3157  } else {
3158  /* It's a leaf. operate on records */
3159  if (ptr < numrecs) {
3161  xfs_btree_rec_addr(cur, ptr + 1, block),
3162  -1, numrecs - ptr);
3163  xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3164  }
3165 
3166  /*
3167  * If it's the first record in the block, we'll need a key
3168  * structure to pass up to the next level (updkey).
3169  */
3170  if (ptr == 1) {
3171  cur->bc_ops->init_key_from_rec(&key,
3172  xfs_btree_rec_addr(cur, 1, block));
3173  keyp = &key;
3174  }
3175  }
3176 
3177  /*
3178  * Decrement and log the number of entries in the block.
3179  */
3180  xfs_btree_set_numrecs(block, --numrecs);
3182 
3183  /*
3184  * If we are tracking the last record in the tree and
3185  * we are at the far right edge of the tree, update it.
3186  */
3187  if (xfs_btree_is_lastrec(cur, block, level)) {
3188  cur->bc_ops->update_lastrec(cur, block, NULL,
3189  ptr, LASTREC_DELREC);
3190  }
3191 
3192  /*
3193  * We're at the root level. First, shrink the root block in-memory.
3194  * Try to get rid of the next level down. If we can't then there's
3195  * nothing left to do.
3196  */
3197  if (level == cur->bc_nlevels - 1) {
3198  if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3199  xfs_iroot_realloc(cur->bc_private.b.ip, -1,
3200  cur->bc_private.b.whichfork);
3201 
3202  error = xfs_btree_kill_iroot(cur);
3203  if (error)
3204  goto error0;
3205 
3206  error = xfs_btree_dec_cursor(cur, level, stat);
3207  if (error)
3208  goto error0;
3209  *stat = 1;
3210  return 0;
3211  }
3212 
3213  /*
3214  * If this is the root level, and there's only one entry left,
3215  * and it's NOT the leaf level, then we can get rid of this
3216  * level.
3217  */
3218  if (numrecs == 1 && level > 0) {
3219  union xfs_btree_ptr *pp;
3220  /*
3221  * pp is still set to the first pointer in the block.
3222  * Make it the new root of the btree.
3223  */
3224  pp = xfs_btree_ptr_addr(cur, 1, block);
3225  error = xfs_btree_kill_root(cur, bp, level, pp);
3226  if (error)
3227  goto error0;
3228  } else if (level > 0) {
3229  error = xfs_btree_dec_cursor(cur, level, stat);
3230  if (error)
3231  goto error0;
3232  }
3233  *stat = 1;
3234  return 0;
3235  }
3236 
3237  /*
3238  * If we deleted the leftmost entry in the block, update the
3239  * key values above us in the tree.
3240  */
3241  if (ptr == 1) {
3242  error = xfs_btree_updkey(cur, keyp, level + 1);
3243  if (error)
3244  goto error0;
3245  }
3246 
3247  /*
3248  * If the number of records remaining in the block is at least
3249  * the minimum, we're done.
3250  */
3251  if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3252  error = xfs_btree_dec_cursor(cur, level, stat);
3253  if (error)
3254  goto error0;
3255  return 0;
3256  }
3257 
3258  /*
3259  * Otherwise, we have to move some records around to keep the
3260  * tree balanced. Look at the left and right sibling blocks to
3261  * see if we can re-balance by moving only one record.
3262  */
3263  xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3264  xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
3265 
3266  if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3267  /*
3268  * One child of root, need to get a chance to copy its contents
3269  * into the root and delete it. Can't go up to next level,
3270  * there's nothing to delete there.
3271  */
3272  if (xfs_btree_ptr_is_null(cur, &rptr) &&
3273  xfs_btree_ptr_is_null(cur, &lptr) &&
3274  level == cur->bc_nlevels - 2) {
3275  error = xfs_btree_kill_iroot(cur);
3276  if (!error)
3277  error = xfs_btree_dec_cursor(cur, level, stat);
3278  if (error)
3279  goto error0;
3280  return 0;
3281  }
3282  }
3283 
3284  ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
3285  !xfs_btree_ptr_is_null(cur, &lptr));
3286 
3287  /*
3288  * Duplicate the cursor so our btree manipulations here won't
3289  * disrupt the next level up.
3290  */
3291  error = xfs_btree_dup_cursor(cur, &tcur);
3292  if (error)
3293  goto error0;
3294 
3295  /*
3296  * If there's a right sibling, see if it's ok to shift an entry
3297  * out of it.
3298  */
3299  if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3300  /*
3301  * Move the temp cursor to the last entry in the next block.
3302  * Actually any entry but the first would suffice.
3303  */
3304  i = xfs_btree_lastrec(tcur, level);
3305  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3306 
3307  error = xfs_btree_increment(tcur, level, &i);
3308  if (error)
3309  goto error0;
3310  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3311 
3312  i = xfs_btree_lastrec(tcur, level);
3313  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3314 
3315  /* Grab a pointer to the block. */
3316  right = xfs_btree_get_block(tcur, level, &rbp);
3317 #ifdef DEBUG
3318  error = xfs_btree_check_block(tcur, right, level, rbp);
3319  if (error)
3320  goto error0;
3321 #endif
3322  /* Grab the current block number, for future use. */
3323  xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
3324 
3325  /*
3326  * If right block is full enough so that removing one entry
3327  * won't make it too empty, and left-shifting an entry out
3328  * of right to us works, we're done.
3329  */
3330  if (xfs_btree_get_numrecs(right) - 1 >=
3331  cur->bc_ops->get_minrecs(tcur, level)) {
3332  error = xfs_btree_lshift(tcur, level, &i);
3333  if (error)
3334  goto error0;
3335  if (i) {
3336  ASSERT(xfs_btree_get_numrecs(block) >=
3337  cur->bc_ops->get_minrecs(tcur, level));
3338 
3340  tcur = NULL;
3341 
3342  error = xfs_btree_dec_cursor(cur, level, stat);
3343  if (error)
3344  goto error0;
3345  return 0;
3346  }
3347  }
3348 
3349  /*
3350  * Otherwise, grab the number of records in right for
3351  * future reference, and fix up the temp cursor to point
3352  * to our block again (last record).
3353  */
3354  rrecs = xfs_btree_get_numrecs(right);
3355  if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3356  i = xfs_btree_firstrec(tcur, level);
3357  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3358 
3359  error = xfs_btree_decrement(tcur, level, &i);
3360  if (error)
3361  goto error0;
3362  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3363  }
3364  }
3365 
3366  /*
3367  * If there's a left sibling, see if it's ok to shift an entry
3368  * out of it.
3369  */
3370  if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3371  /*
3372  * Move the temp cursor to the first entry in the
3373  * previous block.
3374  */
3375  i = xfs_btree_firstrec(tcur, level);
3376  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3377 
3378  error = xfs_btree_decrement(tcur, level, &i);
3379  if (error)
3380  goto error0;
3381  i = xfs_btree_firstrec(tcur, level);
3382  XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3383 
3384  /* Grab a pointer to the block. */
3385  left = xfs_btree_get_block(tcur, level, &lbp);
3386 #ifdef DEBUG
3387  error = xfs_btree_check_block(cur, left, level, lbp);
3388  if (error)
3389  goto error0;
3390 #endif
3391  /* Grab the current block number, for future use. */
3392  xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
3393 
3394  /*
3395  * If left block is full enough so that removing one entry
3396  * won't make it too empty, and right-shifting an entry out
3397  * of left to us works, we're done.
3398  */
3399  if (xfs_btree_get_numrecs(left) - 1 >=
3400  cur->bc_ops->get_minrecs(tcur, level)) {
3401  error = xfs_btree_rshift(tcur, level, &i);
3402  if (error)
3403  goto error0;
3404  if (i) {
3405  ASSERT(xfs_btree_get_numrecs(block) >=
3406  cur->bc_ops->get_minrecs(tcur, level));
3408  tcur = NULL;
3409  if (level == 0)
3410  cur->bc_ptrs[0]++;
3411  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3412  *stat = 1;
3413  return 0;
3414  }
3415  }
3416 
3417  /*
3418  * Otherwise, grab the number of records in right for
3419  * future reference.
3420  */
3421  lrecs = xfs_btree_get_numrecs(left);
3422  }
3423 
3424  /* Delete the temp cursor, we're done with it. */
3426  tcur = NULL;
3427 
3428  /* If here, we need to do a join to keep the tree balanced. */
3429  ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
3430 
3431  if (!xfs_btree_ptr_is_null(cur, &lptr) &&
3432  lrecs + xfs_btree_get_numrecs(block) <=
3433  cur->bc_ops->get_maxrecs(cur, level)) {
3434  /*
3435  * Set "right" to be the starting block,
3436  * "left" to be the left neighbor.
3437  */
3438  rptr = cptr;
3439  right = block;
3440  rbp = bp;
3441  error = xfs_btree_read_buf_block(cur, &lptr, level,
3442  0, &left, &lbp);
3443  if (error)
3444  goto error0;
3445 
3446  /*
3447  * If that won't work, see if we can join with the right neighbor block.
3448  */
3449  } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
3450  rrecs + xfs_btree_get_numrecs(block) <=
3451  cur->bc_ops->get_maxrecs(cur, level)) {
3452  /*
3453  * Set "left" to be the starting block,
3454  * "right" to be the right neighbor.
3455  */
3456  lptr = cptr;
3457  left = block;
3458  lbp = bp;
3459  error = xfs_btree_read_buf_block(cur, &rptr, level,
3460  0, &right, &rbp);
3461  if (error)
3462  goto error0;
3463 
3464  /*
3465  * Otherwise, we can't fix the imbalance.
3466  * Just return. This is probably a logic error, but it's not fatal.
3467  */
3468  } else {
3469  error = xfs_btree_dec_cursor(cur, level, stat);
3470  if (error)
3471  goto error0;
3472  return 0;
3473  }
3474 
3475  rrecs = xfs_btree_get_numrecs(right);
3476  lrecs = xfs_btree_get_numrecs(left);
3477 
3478  /*
3479  * We're now going to join "left" and "right" by moving all the stuff
3480  * in "right" to "left" and deleting "right".
3481  */
3482  XFS_BTREE_STATS_ADD(cur, moves, rrecs);
3483  if (level > 0) {
3484  /* It's a non-leaf. Move keys and pointers. */
3485  union xfs_btree_key *lkp; /* left btree key */
3486  union xfs_btree_ptr *lpp; /* left address pointer */
3487  union xfs_btree_key *rkp; /* right btree key */
3488  union xfs_btree_ptr *rpp; /* right address pointer */
3489 
3490  lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
3491  lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
3492  rkp = xfs_btree_key_addr(cur, 1, right);
3493  rpp = xfs_btree_ptr_addr(cur, 1, right);
3494 #ifdef DEBUG
3495  for (i = 1; i < rrecs; i++) {
3496  error = xfs_btree_check_ptr(cur, rpp, i, level);
3497  if (error)
3498  goto error0;
3499  }
3500 #endif
3501  xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
3502  xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
3503 
3504  xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
3505  xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
3506  } else {
3507  /* It's a leaf. Move records. */
3508  union xfs_btree_rec *lrp; /* left record pointer */
3509  union xfs_btree_rec *rrp; /* right record pointer */
3510 
3511  lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
3512  rrp = xfs_btree_rec_addr(cur, 1, right);
3513 
3514  xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
3515  xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
3516  }
3517 
3518  XFS_BTREE_STATS_INC(cur, join);
3519 
3520  /*
3521  * Fix up the number of records and right block pointer in the
3522  * surviving block, and log it.
3523  */
3524  xfs_btree_set_numrecs(left, lrecs + rrecs);
3525  xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
3526  xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3528 
3529  /* If there is a right sibling, point it to the remaining block. */
3530  xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3531  if (!xfs_btree_ptr_is_null(cur, &cptr)) {
3532  error = xfs_btree_read_buf_block(cur, &cptr, level,
3533  0, &rrblock, &rrbp);
3534  if (error)
3535  goto error0;
3536  xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
3537  xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
3538  }
3539 
3540  /* Free the deleted block. */
3541  error = cur->bc_ops->free_block(cur, rbp);
3542  if (error)
3543  goto error0;
3544  XFS_BTREE_STATS_INC(cur, free);
3545 
3546  /*
3547  * If we joined with the left neighbor, set the buffer in the
3548  * cursor to the left block, and fix up the index.
3549  */
3550  if (bp != lbp) {
3551  cur->bc_bufs[level] = lbp;
3552  cur->bc_ptrs[level] += lrecs;
3553  cur->bc_ra[level] = 0;
3554  }
3555  /*
3556  * If we joined with the right neighbor and there's a level above
3557  * us, increment the cursor at that level.
3558  */
3559  else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
3560  (level + 1 < cur->bc_nlevels)) {
3561  error = xfs_btree_increment(cur, level + 1, &i);
3562  if (error)
3563  goto error0;
3564  }
3565 
3566  /*
3567  * Readjust the ptr at this level if it's not a leaf, since it's
3568  * still pointing at the deletion point, which makes the cursor
3569  * inconsistent. If this makes the ptr 0, the caller fixes it up.
3570  * We can't use decrement because it would change the next level up.
3571  */
3572  if (level > 0)
3573  cur->bc_ptrs[level]--;
3574 
3575  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3576  /* Return value means the next level up has something to do. */
3577  *stat = 2;
3578  return 0;
3579 
3580 error0:
3581  XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3582  if (tcur)
3584  return error;
3585 }
3586 
3587 /*
3588  * Delete the record pointed to by cur.
3589  * The cursor refers to the place where the record was (could be inserted)
3590  * when the operation returns.
3591  */
3592 int /* error */
3594  struct xfs_btree_cur *cur,
3595  int *stat) /* success/failure */
3596 {
3597  int error; /* error return value */
3598  int level;
3599  int i;
3600 
3601  XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3602 
3603  /*
3604  * Go up the tree, starting at leaf level.
3605  *
3606  * If 2 is returned then a join was done; go to the next level.
3607  * Otherwise we are done.
3608  */
3609  for (level = 0, i = 2; i == 2; level++) {
3610  error = xfs_btree_delrec(cur, level, &i);
3611  if (error)
3612  goto error0;
3613  }
3614 
3615  if (i == 0) {
3616  for (level = 1; level < cur->bc_nlevels; level++) {
3617  if (cur->bc_ptrs[level] == 0) {
3618  error = xfs_btree_decrement(cur, level, &i);
3619  if (error)
3620  goto error0;
3621  break;
3622  }
3623  }
3624  }
3625 
3626  XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3627  *stat = i;
3628  return 0;
3629 error0:
3630  XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3631  return error;
3632 }
3633 
3634 /*
3635  * Get the data from the pointed-to record.
3636  */
3637 int /* error */
3639  struct xfs_btree_cur *cur, /* btree cursor */
3640  union xfs_btree_rec **recp, /* output: btree record */
3641  int *stat) /* output: success/failure */
3642 {
3643  struct xfs_btree_block *block; /* btree block */
3644  struct xfs_buf *bp; /* buffer pointer */
3645  int ptr; /* record number */
3646 #ifdef DEBUG
3647  int error; /* error return value */
3648 #endif
3649 
3650  ptr = cur->bc_ptrs[0];
3651  block = xfs_btree_get_block(cur, 0, &bp);
3652 
3653 #ifdef DEBUG
3654  error = xfs_btree_check_block(cur, block, 0, bp);
3655  if (error)
3656  return error;
3657 #endif
3658 
3659  /*
3660  * Off the right end or left end, return failure.
3661  */
3662  if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
3663  *stat = 0;
3664  return 0;
3665  }
3666 
3667  /*
3668  * Point to the record and extract its data.
3669  */
3670  *recp = xfs_btree_rec_addr(cur, ptr, block);
3671  *stat = 1;
3672  return 0;
3673 }