Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
btree.c
Go to the documentation of this file.
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19  *
20  * Written by Koji Sato <[email protected]>.
21  */
22 
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32 #include "dat.h"
33 
34 static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
35 {
36  struct nilfs_btree_path *path;
38 
40  if (path == NULL)
41  goto out;
42 
43  for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
44  path[level].bp_bh = NULL;
45  path[level].bp_sib_bh = NULL;
46  path[level].bp_index = 0;
49  path[level].bp_op = NULL;
50  }
51 
52 out:
53  return path;
54 }
55 
56 static void nilfs_btree_free_path(struct nilfs_btree_path *path)
57 {
58  int level = NILFS_BTREE_LEVEL_DATA;
59 
60  for (; level < NILFS_BTREE_LEVEL_MAX; level++)
61  brelse(path[level].bp_bh);
62 
64 }
65 
66 /*
67  * B-tree node operations
68  */
69 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
70  __u64 ptr, struct buffer_head **bhp)
71 {
72  struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
73  struct buffer_head *bh;
74 
75  bh = nilfs_btnode_create_block(btnc, ptr);
76  if (!bh)
77  return -ENOMEM;
78 
79  set_buffer_nilfs_volatile(bh);
80  *bhp = bh;
81  return 0;
82 }
83 
84 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
85 {
86  return node->bn_flags;
87 }
88 
89 static void
90 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
91 {
92  node->bn_flags = flags;
93 }
94 
95 static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
96 {
97  return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
98 }
99 
100 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
101 {
102  return node->bn_level;
103 }
104 
105 static void
106 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
107 {
108  node->bn_level = level;
109 }
110 
111 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
112 {
113  return le16_to_cpu(node->bn_nchildren);
114 }
115 
116 static void
117 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
118 {
119  node->bn_nchildren = cpu_to_le16(nchildren);
120 }
121 
122 static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
123 {
124  return 1 << btree->b_inode->i_blkbits;
125 }
126 
127 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
128 {
129  return btree->b_nchildren_per_block;
130 }
131 
132 static __le64 *
133 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
134 {
135  return (__le64 *)((char *)(node + 1) +
136  (nilfs_btree_node_root(node) ?
138 }
139 
140 static __le64 *
141 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
142 {
143  return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
144 }
145 
146 static __u64
147 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
148 {
149  return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
150 }
151 
152 static void
153 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
154 {
155  *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
156 }
157 
158 static __u64
159 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
160  int ncmax)
161 {
162  return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
163 }
164 
165 static void
166 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
167  int ncmax)
168 {
169  *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
170 }
171 
172 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
173  int level, int nchildren, int ncmax,
174  const __u64 *keys, const __u64 *ptrs)
175 {
176  __le64 *dkeys;
177  __le64 *dptrs;
178  int i;
179 
180  nilfs_btree_node_set_flags(node, flags);
181  nilfs_btree_node_set_level(node, level);
182  nilfs_btree_node_set_nchildren(node, nchildren);
183 
184  dkeys = nilfs_btree_node_dkeys(node);
185  dptrs = nilfs_btree_node_dptrs(node, ncmax);
186  for (i = 0; i < nchildren; i++) {
187  dkeys[i] = cpu_to_le64(keys[i]);
188  dptrs[i] = cpu_to_le64(ptrs[i]);
189  }
190 }
191 
192 /* Assume the buffer heads corresponding to left and right are locked. */
193 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
194  struct nilfs_btree_node *right,
195  int n, int lncmax, int rncmax)
196 {
197  __le64 *ldkeys, *rdkeys;
198  __le64 *ldptrs, *rdptrs;
199  int lnchildren, rnchildren;
200 
201  ldkeys = nilfs_btree_node_dkeys(left);
202  ldptrs = nilfs_btree_node_dptrs(left, lncmax);
203  lnchildren = nilfs_btree_node_get_nchildren(left);
204 
205  rdkeys = nilfs_btree_node_dkeys(right);
206  rdptrs = nilfs_btree_node_dptrs(right, rncmax);
207  rnchildren = nilfs_btree_node_get_nchildren(right);
208 
209  memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
210  memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
211  memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
212  memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
213 
214  lnchildren += n;
215  rnchildren -= n;
216  nilfs_btree_node_set_nchildren(left, lnchildren);
217  nilfs_btree_node_set_nchildren(right, rnchildren);
218 }
219 
220 /* Assume that the buffer heads corresponding to left and right are locked. */
221 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
222  struct nilfs_btree_node *right,
223  int n, int lncmax, int rncmax)
224 {
225  __le64 *ldkeys, *rdkeys;
226  __le64 *ldptrs, *rdptrs;
227  int lnchildren, rnchildren;
228 
229  ldkeys = nilfs_btree_node_dkeys(left);
230  ldptrs = nilfs_btree_node_dptrs(left, lncmax);
231  lnchildren = nilfs_btree_node_get_nchildren(left);
232 
233  rdkeys = nilfs_btree_node_dkeys(right);
234  rdptrs = nilfs_btree_node_dptrs(right, rncmax);
235  rnchildren = nilfs_btree_node_get_nchildren(right);
236 
237  memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
238  memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
239  memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
240  memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
241 
242  lnchildren -= n;
243  rnchildren += n;
244  nilfs_btree_node_set_nchildren(left, lnchildren);
245  nilfs_btree_node_set_nchildren(right, rnchildren);
246 }
247 
248 /* Assume that the buffer head corresponding to node is locked. */
249 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
250  __u64 key, __u64 ptr, int ncmax)
251 {
252  __le64 *dkeys;
253  __le64 *dptrs;
254  int nchildren;
255 
256  dkeys = nilfs_btree_node_dkeys(node);
257  dptrs = nilfs_btree_node_dptrs(node, ncmax);
258  nchildren = nilfs_btree_node_get_nchildren(node);
259  if (index < nchildren) {
260  memmove(dkeys + index + 1, dkeys + index,
261  (nchildren - index) * sizeof(*dkeys));
262  memmove(dptrs + index + 1, dptrs + index,
263  (nchildren - index) * sizeof(*dptrs));
264  }
265  dkeys[index] = cpu_to_le64(key);
266  dptrs[index] = cpu_to_le64(ptr);
267  nchildren++;
268  nilfs_btree_node_set_nchildren(node, nchildren);
269 }
270 
271 /* Assume that the buffer head corresponding to node is locked. */
272 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
273  __u64 *keyp, __u64 *ptrp, int ncmax)
274 {
275  __u64 key;
276  __u64 ptr;
277  __le64 *dkeys;
278  __le64 *dptrs;
279  int nchildren;
280 
281  dkeys = nilfs_btree_node_dkeys(node);
282  dptrs = nilfs_btree_node_dptrs(node, ncmax);
283  key = le64_to_cpu(dkeys[index]);
284  ptr = le64_to_cpu(dptrs[index]);
285  nchildren = nilfs_btree_node_get_nchildren(node);
286  if (keyp != NULL)
287  *keyp = key;
288  if (ptrp != NULL)
289  *ptrp = ptr;
290 
291  if (index < nchildren - 1) {
292  memmove(dkeys + index, dkeys + index + 1,
293  (nchildren - index - 1) * sizeof(*dkeys));
294  memmove(dptrs + index, dptrs + index + 1,
295  (nchildren - index - 1) * sizeof(*dptrs));
296  }
297  nchildren--;
298  nilfs_btree_node_set_nchildren(node, nchildren);
299 }
300 
301 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
302  __u64 key, int *indexp)
303 {
304  __u64 nkey;
305  int index, low, high, s;
306 
307  /* binary search */
308  low = 0;
309  high = nilfs_btree_node_get_nchildren(node) - 1;
310  index = 0;
311  s = 0;
312  while (low <= high) {
313  index = (low + high) / 2;
314  nkey = nilfs_btree_node_get_key(node, index);
315  if (nkey == key) {
316  s = 0;
317  goto out;
318  } else if (nkey < key) {
319  low = index + 1;
320  s = -1;
321  } else {
322  high = index - 1;
323  s = 1;
324  }
325  }
326 
327  /* adjust index */
328  if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
329  if (s > 0 && index > 0)
330  index--;
331  } else if (s < 0)
332  index++;
333 
334  out:
335  *indexp = index;
336 
337  return s == 0;
338 }
339 
348 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
349  size_t size, sector_t blocknr)
350 {
351  int level, flags, nchildren;
352  int ret = 0;
353 
354  level = nilfs_btree_node_get_level(node);
355  flags = nilfs_btree_node_get_flags(node);
356  nchildren = nilfs_btree_node_get_nchildren(node);
357 
358  if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
359  level >= NILFS_BTREE_LEVEL_MAX ||
360  (flags & NILFS_BTREE_NODE_ROOT) ||
361  nchildren < 0 ||
362  nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
363  printk(KERN_CRIT "NILFS: bad btree node (blocknr=%llu): "
364  "level = %d, flags = 0x%x, nchildren = %d\n",
365  (unsigned long long)blocknr, level, flags, nchildren);
366  ret = 1;
367  }
368  return ret;
369 }
370 
371 int nilfs_btree_broken_node_block(struct buffer_head *bh)
372 {
373  int ret;
374 
375  if (buffer_nilfs_checked(bh))
376  return 0;
377 
378  ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
379  bh->b_size, bh->b_blocknr);
380  if (likely(!ret))
381  set_buffer_nilfs_checked(bh);
382  return ret;
383 }
384 
385 static struct nilfs_btree_node *
386 nilfs_btree_get_root(const struct nilfs_bmap *btree)
387 {
388  return (struct nilfs_btree_node *)btree->b_u.u_data;
389 }
390 
391 static struct nilfs_btree_node *
392 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
393 {
394  return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
395 }
396 
397 static struct nilfs_btree_node *
398 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
399 {
400  return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
401 }
402 
403 static int nilfs_btree_height(const struct nilfs_bmap *btree)
404 {
405  return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
406 }
407 
408 static struct nilfs_btree_node *
409 nilfs_btree_get_node(const struct nilfs_bmap *btree,
410  const struct nilfs_btree_path *path,
411  int level, int *ncmaxp)
412 {
413  struct nilfs_btree_node *node;
414 
415  if (level == nilfs_btree_height(btree) - 1) {
416  node = nilfs_btree_get_root(btree);
418  } else {
419  node = nilfs_btree_get_nonroot_node(path, level);
420  *ncmaxp = nilfs_btree_nchildren_per_block(btree);
421  }
422  return node;
423 }
424 
425 static int
426 nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
427 {
428  if (unlikely(nilfs_btree_node_get_level(node) != level)) {
429  dump_stack();
430  printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
431  nilfs_btree_node_get_level(node), level);
432  return 1;
433  }
434  return 0;
435 }
436 
438  struct nilfs_btree_node *node; /* parent node */
439  int max_ra_blocks; /* max nof blocks to read ahead */
440  int index; /* current index on the parent node */
441  int ncmax; /* nof children in the parent node */
442 };
443 
444 static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
445  struct buffer_head **bhp,
446  const struct nilfs_btree_readahead_info *ra)
447 {
448  struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
449  struct buffer_head *bh, *ra_bh;
450  sector_t submit_ptr = 0;
451  int ret;
452 
453  ret = nilfs_btnode_submit_block(btnc, ptr, 0, READ, &bh, &submit_ptr);
454  if (ret) {
455  if (ret != -EEXIST)
456  return ret;
457  goto out_check;
458  }
459 
460  if (ra) {
461  int i, n;
462  __u64 ptr2;
463 
464  /* read ahead sibling nodes */
465  for (n = ra->max_ra_blocks, i = ra->index + 1;
466  n > 0 && i < ra->ncmax; n--, i++) {
467  ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
468 
469  ret = nilfs_btnode_submit_block(btnc, ptr2, 0, READA,
470  &ra_bh, &submit_ptr);
471  if (likely(!ret || ret == -EEXIST))
472  brelse(ra_bh);
473  else if (ret != -EBUSY)
474  break;
475  if (!buffer_locked(bh))
476  goto out_no_wait;
477  }
478  }
479 
480  wait_on_buffer(bh);
481 
482  out_no_wait:
483  if (!buffer_uptodate(bh)) {
484  brelse(bh);
485  return -EIO;
486  }
487 
488  out_check:
490  clear_buffer_uptodate(bh);
491  brelse(bh);
492  return -EINVAL;
493  }
494 
495  *bhp = bh;
496  return 0;
497 }
498 
499 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
500  struct buffer_head **bhp)
501 {
502  return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
503 }
504 
505 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
506  struct nilfs_btree_path *path,
507  __u64 key, __u64 *ptrp, int minlevel,
508  int readahead)
509 {
510  struct nilfs_btree_node *node;
512  __u64 ptr;
513  int level, index, found, ncmax, ret;
514 
515  node = nilfs_btree_get_root(btree);
516  level = nilfs_btree_node_get_level(node);
517  if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
518  return -ENOENT;
519 
520  found = nilfs_btree_node_lookup(node, key, &index);
521  ptr = nilfs_btree_node_get_ptr(node, index,
523  path[level].bp_bh = NULL;
524  path[level].bp_index = index;
525 
526  ncmax = nilfs_btree_nchildren_per_block(btree);
527 
528  while (--level >= minlevel) {
529  ra = NULL;
530  if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
531  p.node = nilfs_btree_get_node(btree, path, level + 1,
532  &p.ncmax);
533  p.index = index;
534  p.max_ra_blocks = 7;
535  ra = &p;
536  }
537  ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
538  ra);
539  if (ret < 0)
540  return ret;
541 
542  node = nilfs_btree_get_nonroot_node(path, level);
543  if (nilfs_btree_bad_node(node, level))
544  return -EINVAL;
545  if (!found)
546  found = nilfs_btree_node_lookup(node, key, &index);
547  else
548  index = 0;
549  if (index < ncmax) {
550  ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
551  } else {
552  WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
553  /* insert */
555  }
556  path[level].bp_index = index;
557  }
558  if (!found)
559  return -ENOENT;
560 
561  if (ptrp != NULL)
562  *ptrp = ptr;
563 
564  return 0;
565 }
566 
567 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
568  struct nilfs_btree_path *path,
569  __u64 *keyp, __u64 *ptrp)
570 {
571  struct nilfs_btree_node *node;
572  __u64 ptr;
573  int index, level, ncmax, ret;
574 
575  node = nilfs_btree_get_root(btree);
576  index = nilfs_btree_node_get_nchildren(node) - 1;
577  if (index < 0)
578  return -ENOENT;
579  level = nilfs_btree_node_get_level(node);
580  ptr = nilfs_btree_node_get_ptr(node, index,
582  path[level].bp_bh = NULL;
583  path[level].bp_index = index;
584  ncmax = nilfs_btree_nchildren_per_block(btree);
585 
586  for (level--; level > 0; level--) {
587  ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
588  if (ret < 0)
589  return ret;
590  node = nilfs_btree_get_nonroot_node(path, level);
591  if (nilfs_btree_bad_node(node, level))
592  return -EINVAL;
593  index = nilfs_btree_node_get_nchildren(node) - 1;
594  ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
595  path[level].bp_index = index;
596  }
597 
598  if (keyp != NULL)
599  *keyp = nilfs_btree_node_get_key(node, index);
600  if (ptrp != NULL)
601  *ptrp = ptr;
602 
603  return 0;
604 }
605 
606 static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
607  __u64 key, int level, __u64 *ptrp)
608 {
609  struct nilfs_btree_path *path;
610  int ret;
611 
612  path = nilfs_btree_alloc_path();
613  if (path == NULL)
614  return -ENOMEM;
615 
616  ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
617 
618  nilfs_btree_free_path(path);
619 
620  return ret;
621 }
622 
623 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
624  __u64 key, __u64 *ptrp, unsigned maxblocks)
625 {
626  struct nilfs_btree_path *path;
627  struct nilfs_btree_node *node;
628  struct inode *dat = NULL;
629  __u64 ptr, ptr2;
631  int level = NILFS_BTREE_LEVEL_NODE_MIN;
632  int ret, cnt, index, maxlevel, ncmax;
634 
635  path = nilfs_btree_alloc_path();
636  if (path == NULL)
637  return -ENOMEM;
638 
639  ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
640  if (ret < 0)
641  goto out;
642 
643  if (NILFS_BMAP_USE_VBN(btree)) {
644  dat = nilfs_bmap_get_dat(btree);
645  ret = nilfs_dat_translate(dat, ptr, &blocknr);
646  if (ret < 0)
647  goto out;
648  ptr = blocknr;
649  }
650  cnt = 1;
651  if (cnt == maxblocks)
652  goto end;
653 
654  maxlevel = nilfs_btree_height(btree) - 1;
655  node = nilfs_btree_get_node(btree, path, level, &ncmax);
656  index = path[level].bp_index + 1;
657  for (;;) {
658  while (index < nilfs_btree_node_get_nchildren(node)) {
659  if (nilfs_btree_node_get_key(node, index) !=
660  key + cnt)
661  goto end;
662  ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
663  if (dat) {
664  ret = nilfs_dat_translate(dat, ptr2, &blocknr);
665  if (ret < 0)
666  goto out;
667  ptr2 = blocknr;
668  }
669  if (ptr2 != ptr + cnt || ++cnt == maxblocks)
670  goto end;
671  index++;
672  continue;
673  }
674  if (level == maxlevel)
675  break;
676 
677  /* look-up right sibling node */
678  p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
679  p.index = path[level + 1].bp_index + 1;
680  p.max_ra_blocks = 7;
681  if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
682  nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
683  break;
684  ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
685  path[level + 1].bp_index = p.index;
686 
687  brelse(path[level].bp_bh);
688  path[level].bp_bh = NULL;
689 
690  ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
691  &p);
692  if (ret < 0)
693  goto out;
694  node = nilfs_btree_get_nonroot_node(path, level);
695  ncmax = nilfs_btree_nchildren_per_block(btree);
696  index = 0;
697  path[level].bp_index = index;
698  }
699  end:
700  *ptrp = ptr;
701  ret = cnt;
702  out:
703  nilfs_btree_free_path(path);
704  return ret;
705 }
706 
707 static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
708  struct nilfs_btree_path *path,
709  int level, __u64 key)
710 {
711  if (level < nilfs_btree_height(btree) - 1) {
712  do {
713  nilfs_btree_node_set_key(
714  nilfs_btree_get_nonroot_node(path, level),
715  path[level].bp_index, key);
716  if (!buffer_dirty(path[level].bp_bh))
717  mark_buffer_dirty(path[level].bp_bh);
718  } while ((path[level].bp_index == 0) &&
719  (++level < nilfs_btree_height(btree) - 1));
720  }
721 
722  /* root */
723  if (level == nilfs_btree_height(btree) - 1) {
724  nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
725  path[level].bp_index, key);
726  }
727 }
728 
729 static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
730  struct nilfs_btree_path *path,
731  int level, __u64 *keyp, __u64 *ptrp)
732 {
733  struct nilfs_btree_node *node;
734  int ncblk;
735 
736  if (level < nilfs_btree_height(btree) - 1) {
737  node = nilfs_btree_get_nonroot_node(path, level);
738  ncblk = nilfs_btree_nchildren_per_block(btree);
739  nilfs_btree_node_insert(node, path[level].bp_index,
740  *keyp, *ptrp, ncblk);
741  if (!buffer_dirty(path[level].bp_bh))
742  mark_buffer_dirty(path[level].bp_bh);
743 
744  if (path[level].bp_index == 0)
745  nilfs_btree_promote_key(btree, path, level + 1,
746  nilfs_btree_node_get_key(node,
747  0));
748  } else {
749  node = nilfs_btree_get_root(btree);
750  nilfs_btree_node_insert(node, path[level].bp_index,
751  *keyp, *ptrp,
753  }
754 }
755 
756 static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
757  struct nilfs_btree_path *path,
758  int level, __u64 *keyp, __u64 *ptrp)
759 {
760  struct nilfs_btree_node *node, *left;
761  int nchildren, lnchildren, n, move, ncblk;
762 
763  node = nilfs_btree_get_nonroot_node(path, level);
764  left = nilfs_btree_get_sib_node(path, level);
765  nchildren = nilfs_btree_node_get_nchildren(node);
766  lnchildren = nilfs_btree_node_get_nchildren(left);
767  ncblk = nilfs_btree_nchildren_per_block(btree);
768  move = 0;
769 
770  n = (nchildren + lnchildren + 1) / 2 - lnchildren;
771  if (n > path[level].bp_index) {
772  /* move insert point */
773  n--;
774  move = 1;
775  }
776 
777  nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
778 
779  if (!buffer_dirty(path[level].bp_bh))
780  mark_buffer_dirty(path[level].bp_bh);
781  if (!buffer_dirty(path[level].bp_sib_bh))
782  mark_buffer_dirty(path[level].bp_sib_bh);
783 
784  nilfs_btree_promote_key(btree, path, level + 1,
785  nilfs_btree_node_get_key(node, 0));
786 
787  if (move) {
788  brelse(path[level].bp_bh);
789  path[level].bp_bh = path[level].bp_sib_bh;
790  path[level].bp_sib_bh = NULL;
791  path[level].bp_index += lnchildren;
792  path[level + 1].bp_index--;
793  } else {
794  brelse(path[level].bp_sib_bh);
795  path[level].bp_sib_bh = NULL;
796  path[level].bp_index -= n;
797  }
798 
799  nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
800 }
801 
802 static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
803  struct nilfs_btree_path *path,
804  int level, __u64 *keyp, __u64 *ptrp)
805 {
806  struct nilfs_btree_node *node, *right;
807  int nchildren, rnchildren, n, move, ncblk;
808 
809  node = nilfs_btree_get_nonroot_node(path, level);
810  right = nilfs_btree_get_sib_node(path, level);
811  nchildren = nilfs_btree_node_get_nchildren(node);
812  rnchildren = nilfs_btree_node_get_nchildren(right);
813  ncblk = nilfs_btree_nchildren_per_block(btree);
814  move = 0;
815 
816  n = (nchildren + rnchildren + 1) / 2 - rnchildren;
817  if (n > nchildren - path[level].bp_index) {
818  /* move insert point */
819  n--;
820  move = 1;
821  }
822 
823  nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
824 
825  if (!buffer_dirty(path[level].bp_bh))
826  mark_buffer_dirty(path[level].bp_bh);
827  if (!buffer_dirty(path[level].bp_sib_bh))
828  mark_buffer_dirty(path[level].bp_sib_bh);
829 
830  path[level + 1].bp_index++;
831  nilfs_btree_promote_key(btree, path, level + 1,
832  nilfs_btree_node_get_key(right, 0));
833  path[level + 1].bp_index--;
834 
835  if (move) {
836  brelse(path[level].bp_bh);
837  path[level].bp_bh = path[level].bp_sib_bh;
838  path[level].bp_sib_bh = NULL;
839  path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
840  path[level + 1].bp_index++;
841  } else {
842  brelse(path[level].bp_sib_bh);
843  path[level].bp_sib_bh = NULL;
844  }
845 
846  nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
847 }
848 
849 static void nilfs_btree_split(struct nilfs_bmap *btree,
850  struct nilfs_btree_path *path,
851  int level, __u64 *keyp, __u64 *ptrp)
852 {
853  struct nilfs_btree_node *node, *right;
854  __u64 newkey;
855  __u64 newptr;
856  int nchildren, n, move, ncblk;
857 
858  node = nilfs_btree_get_nonroot_node(path, level);
859  right = nilfs_btree_get_sib_node(path, level);
860  nchildren = nilfs_btree_node_get_nchildren(node);
861  ncblk = nilfs_btree_nchildren_per_block(btree);
862  move = 0;
863 
864  n = (nchildren + 1) / 2;
865  if (n > nchildren - path[level].bp_index) {
866  n--;
867  move = 1;
868  }
869 
870  nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
871 
872  if (!buffer_dirty(path[level].bp_bh))
873  mark_buffer_dirty(path[level].bp_bh);
874  if (!buffer_dirty(path[level].bp_sib_bh))
875  mark_buffer_dirty(path[level].bp_sib_bh);
876 
877  newkey = nilfs_btree_node_get_key(right, 0);
878  newptr = path[level].bp_newreq.bpr_ptr;
879 
880  if (move) {
881  path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
882  nilfs_btree_node_insert(right, path[level].bp_index,
883  *keyp, *ptrp, ncblk);
884 
885  *keyp = nilfs_btree_node_get_key(right, 0);
886  *ptrp = path[level].bp_newreq.bpr_ptr;
887 
888  brelse(path[level].bp_bh);
889  path[level].bp_bh = path[level].bp_sib_bh;
890  path[level].bp_sib_bh = NULL;
891  } else {
892  nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
893 
894  *keyp = nilfs_btree_node_get_key(right, 0);
895  *ptrp = path[level].bp_newreq.bpr_ptr;
896 
897  brelse(path[level].bp_sib_bh);
898  path[level].bp_sib_bh = NULL;
899  }
900 
901  path[level + 1].bp_index++;
902 }
903 
904 static void nilfs_btree_grow(struct nilfs_bmap *btree,
905  struct nilfs_btree_path *path,
906  int level, __u64 *keyp, __u64 *ptrp)
907 {
908  struct nilfs_btree_node *root, *child;
909  int n, ncblk;
910 
911  root = nilfs_btree_get_root(btree);
912  child = nilfs_btree_get_sib_node(path, level);
913  ncblk = nilfs_btree_nchildren_per_block(btree);
914 
915  n = nilfs_btree_node_get_nchildren(root);
916 
917  nilfs_btree_node_move_right(root, child, n,
919  nilfs_btree_node_set_level(root, level + 1);
920 
921  if (!buffer_dirty(path[level].bp_sib_bh))
922  mark_buffer_dirty(path[level].bp_sib_bh);
923 
924  path[level].bp_bh = path[level].bp_sib_bh;
925  path[level].bp_sib_bh = NULL;
926 
927  nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
928 
929  *keyp = nilfs_btree_node_get_key(child, 0);
930  *ptrp = path[level].bp_newreq.bpr_ptr;
931 }
932 
933 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
934  const struct nilfs_btree_path *path)
935 {
936  struct nilfs_btree_node *node;
937  int level, ncmax;
938 
939  if (path == NULL)
940  return NILFS_BMAP_INVALID_PTR;
941 
942  /* left sibling */
944  if (path[level].bp_index > 0) {
945  node = nilfs_btree_get_node(btree, path, level, &ncmax);
946  return nilfs_btree_node_get_ptr(node,
947  path[level].bp_index - 1,
948  ncmax);
949  }
950 
951  /* parent */
952  level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
953  if (level <= nilfs_btree_height(btree) - 1) {
954  node = nilfs_btree_get_node(btree, path, level, &ncmax);
955  return nilfs_btree_node_get_ptr(node, path[level].bp_index,
956  ncmax);
957  }
958 
959  return NILFS_BMAP_INVALID_PTR;
960 }
961 
962 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
963  const struct nilfs_btree_path *path,
964  __u64 key)
965 {
966  __u64 ptr;
967 
968  ptr = nilfs_bmap_find_target_seq(btree, key);
969  if (ptr != NILFS_BMAP_INVALID_PTR)
970  /* sequential access */
971  return ptr;
972  else {
973  ptr = nilfs_btree_find_near(btree, path);
974  if (ptr != NILFS_BMAP_INVALID_PTR)
975  /* near */
976  return ptr;
977  }
978  /* block group */
979  return nilfs_bmap_find_target_in_group(btree);
980 }
981 
982 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
983  struct nilfs_btree_path *path,
984  int *levelp, __u64 key, __u64 ptr,
985  struct nilfs_bmap_stats *stats)
986 {
987  struct buffer_head *bh;
988  struct nilfs_btree_node *node, *parent, *sib;
989  __u64 sibptr;
990  int pindex, level, ncmax, ncblk, ret;
991  struct inode *dat = NULL;
992 
993  stats->bs_nblocks = 0;
994  level = NILFS_BTREE_LEVEL_DATA;
995 
996  /* allocate a new ptr for data block */
997  if (NILFS_BMAP_USE_VBN(btree)) {
998  path[level].bp_newreq.bpr_ptr =
999  nilfs_btree_find_target_v(btree, path, key);
1000  dat = nilfs_bmap_get_dat(btree);
1001  }
1002 
1003  ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1004  if (ret < 0)
1005  goto err_out_data;
1006 
1007  ncblk = nilfs_btree_nchildren_per_block(btree);
1008 
1009  for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1010  level < nilfs_btree_height(btree) - 1;
1011  level++) {
1012  node = nilfs_btree_get_nonroot_node(path, level);
1013  if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1014  path[level].bp_op = nilfs_btree_do_insert;
1015  stats->bs_nblocks++;
1016  goto out;
1017  }
1018 
1019  parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1020  pindex = path[level + 1].bp_index;
1021 
1022  /* left sibling */
1023  if (pindex > 0) {
1024  sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1025  ncmax);
1026  ret = nilfs_btree_get_block(btree, sibptr, &bh);
1027  if (ret < 0)
1028  goto err_out_child_node;
1029  sib = (struct nilfs_btree_node *)bh->b_data;
1030  if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1031  path[level].bp_sib_bh = bh;
1032  path[level].bp_op = nilfs_btree_carry_left;
1033  stats->bs_nblocks++;
1034  goto out;
1035  } else {
1036  brelse(bh);
1037  }
1038  }
1039 
1040  /* right sibling */
1041  if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1042  sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1043  ncmax);
1044  ret = nilfs_btree_get_block(btree, sibptr, &bh);
1045  if (ret < 0)
1046  goto err_out_child_node;
1047  sib = (struct nilfs_btree_node *)bh->b_data;
1048  if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1049  path[level].bp_sib_bh = bh;
1050  path[level].bp_op = nilfs_btree_carry_right;
1051  stats->bs_nblocks++;
1052  goto out;
1053  } else {
1054  brelse(bh);
1055  }
1056  }
1057 
1058  /* split */
1059  path[level].bp_newreq.bpr_ptr =
1060  path[level - 1].bp_newreq.bpr_ptr + 1;
1061  ret = nilfs_bmap_prepare_alloc_ptr(btree,
1062  &path[level].bp_newreq, dat);
1063  if (ret < 0)
1064  goto err_out_child_node;
1065  ret = nilfs_btree_get_new_block(btree,
1066  path[level].bp_newreq.bpr_ptr,
1067  &bh);
1068  if (ret < 0)
1069  goto err_out_curr_node;
1070 
1071  stats->bs_nblocks++;
1072 
1073  sib = (struct nilfs_btree_node *)bh->b_data;
1074  nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1075  path[level].bp_sib_bh = bh;
1076  path[level].bp_op = nilfs_btree_split;
1077  }
1078 
1079  /* root */
1080  node = nilfs_btree_get_root(btree);
1081  if (nilfs_btree_node_get_nchildren(node) <
1083  path[level].bp_op = nilfs_btree_do_insert;
1084  stats->bs_nblocks++;
1085  goto out;
1086  }
1087 
1088  /* grow */
1089  path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1090  ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1091  if (ret < 0)
1092  goto err_out_child_node;
1093  ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1094  &bh);
1095  if (ret < 0)
1096  goto err_out_curr_node;
1097 
1098  nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1099  0, level, 0, ncblk, NULL, NULL);
1100  path[level].bp_sib_bh = bh;
1101  path[level].bp_op = nilfs_btree_grow;
1102 
1103  level++;
1104  path[level].bp_op = nilfs_btree_do_insert;
1105 
1106  /* a newly-created node block and a data block are added */
1107  stats->bs_nblocks += 2;
1108 
1109  /* success */
1110  out:
1111  *levelp = level;
1112  return ret;
1113 
1114  /* error */
1115  err_out_curr_node:
1116  nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1117  err_out_child_node:
1118  for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1119  nilfs_btnode_delete(path[level].bp_sib_bh);
1120  nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1121 
1122  }
1123 
1124  nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1125  err_out_data:
1126  *levelp = level;
1127  stats->bs_nblocks = 0;
1128  return ret;
1129 }
1130 
1131 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1132  struct nilfs_btree_path *path,
1133  int maxlevel, __u64 key, __u64 ptr)
1134 {
1135  struct inode *dat = NULL;
1136  int level;
1137 
1138  set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1140  if (NILFS_BMAP_USE_VBN(btree)) {
1141  nilfs_bmap_set_target_v(btree, key, ptr);
1142  dat = nilfs_bmap_get_dat(btree);
1143  }
1144 
1145  for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1146  nilfs_bmap_commit_alloc_ptr(btree,
1147  &path[level - 1].bp_newreq, dat);
1148  path[level].bp_op(btree, path, level, &key, &ptr);
1149  }
1150 
1151  if (!nilfs_bmap_dirty(btree))
1152  nilfs_bmap_set_dirty(btree);
1153 }
1154 
1155 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1156 {
1157  struct nilfs_btree_path *path;
1158  struct nilfs_bmap_stats stats;
1159  int level, ret;
1160 
1161  path = nilfs_btree_alloc_path();
1162  if (path == NULL)
1163  return -ENOMEM;
1164 
1165  ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1167  if (ret != -ENOENT) {
1168  if (ret == 0)
1169  ret = -EEXIST;
1170  goto out;
1171  }
1172 
1173  ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1174  if (ret < 0)
1175  goto out;
1176  nilfs_btree_commit_insert(btree, path, level, key, ptr);
1177  nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1178 
1179  out:
1180  nilfs_btree_free_path(path);
1181  return ret;
1182 }
1183 
1184 static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1185  struct nilfs_btree_path *path,
1186  int level, __u64 *keyp, __u64 *ptrp)
1187 {
1188  struct nilfs_btree_node *node;
1189  int ncblk;
1190 
1191  if (level < nilfs_btree_height(btree) - 1) {
1192  node = nilfs_btree_get_nonroot_node(path, level);
1193  ncblk = nilfs_btree_nchildren_per_block(btree);
1194  nilfs_btree_node_delete(node, path[level].bp_index,
1195  keyp, ptrp, ncblk);
1196  if (!buffer_dirty(path[level].bp_bh))
1197  mark_buffer_dirty(path[level].bp_bh);
1198  if (path[level].bp_index == 0)
1199  nilfs_btree_promote_key(btree, path, level + 1,
1200  nilfs_btree_node_get_key(node, 0));
1201  } else {
1202  node = nilfs_btree_get_root(btree);
1203  nilfs_btree_node_delete(node, path[level].bp_index,
1204  keyp, ptrp,
1206  }
1207 }
1208 
1209 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1210  struct nilfs_btree_path *path,
1211  int level, __u64 *keyp, __u64 *ptrp)
1212 {
1213  struct nilfs_btree_node *node, *left;
1214  int nchildren, lnchildren, n, ncblk;
1215 
1216  nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1217 
1218  node = nilfs_btree_get_nonroot_node(path, level);
1219  left = nilfs_btree_get_sib_node(path, level);
1220  nchildren = nilfs_btree_node_get_nchildren(node);
1221  lnchildren = nilfs_btree_node_get_nchildren(left);
1222  ncblk = nilfs_btree_nchildren_per_block(btree);
1223 
1224  n = (nchildren + lnchildren) / 2 - nchildren;
1225 
1226  nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1227 
1228  if (!buffer_dirty(path[level].bp_bh))
1229  mark_buffer_dirty(path[level].bp_bh);
1230  if (!buffer_dirty(path[level].bp_sib_bh))
1231  mark_buffer_dirty(path[level].bp_sib_bh);
1232 
1233  nilfs_btree_promote_key(btree, path, level + 1,
1234  nilfs_btree_node_get_key(node, 0));
1235 
1236  brelse(path[level].bp_sib_bh);
1237  path[level].bp_sib_bh = NULL;
1238  path[level].bp_index += n;
1239 }
1240 
1241 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1242  struct nilfs_btree_path *path,
1243  int level, __u64 *keyp, __u64 *ptrp)
1244 {
1245  struct nilfs_btree_node *node, *right;
1246  int nchildren, rnchildren, n, ncblk;
1247 
1248  nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1249 
1250  node = nilfs_btree_get_nonroot_node(path, level);
1251  right = nilfs_btree_get_sib_node(path, level);
1252  nchildren = nilfs_btree_node_get_nchildren(node);
1253  rnchildren = nilfs_btree_node_get_nchildren(right);
1254  ncblk = nilfs_btree_nchildren_per_block(btree);
1255 
1256  n = (nchildren + rnchildren) / 2 - nchildren;
1257 
1258  nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1259 
1260  if (!buffer_dirty(path[level].bp_bh))
1261  mark_buffer_dirty(path[level].bp_bh);
1262  if (!buffer_dirty(path[level].bp_sib_bh))
1263  mark_buffer_dirty(path[level].bp_sib_bh);
1264 
1265  path[level + 1].bp_index++;
1266  nilfs_btree_promote_key(btree, path, level + 1,
1267  nilfs_btree_node_get_key(right, 0));
1268  path[level + 1].bp_index--;
1269 
1270  brelse(path[level].bp_sib_bh);
1271  path[level].bp_sib_bh = NULL;
1272 }
1273 
1274 static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1275  struct nilfs_btree_path *path,
1276  int level, __u64 *keyp, __u64 *ptrp)
1277 {
1278  struct nilfs_btree_node *node, *left;
1279  int n, ncblk;
1280 
1281  nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1282 
1283  node = nilfs_btree_get_nonroot_node(path, level);
1284  left = nilfs_btree_get_sib_node(path, level);
1285  ncblk = nilfs_btree_nchildren_per_block(btree);
1286 
1287  n = nilfs_btree_node_get_nchildren(node);
1288 
1289  nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1290 
1291  if (!buffer_dirty(path[level].bp_sib_bh))
1292  mark_buffer_dirty(path[level].bp_sib_bh);
1293 
1294  nilfs_btnode_delete(path[level].bp_bh);
1295  path[level].bp_bh = path[level].bp_sib_bh;
1296  path[level].bp_sib_bh = NULL;
1297  path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1298 }
1299 
1300 static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1301  struct nilfs_btree_path *path,
1302  int level, __u64 *keyp, __u64 *ptrp)
1303 {
1304  struct nilfs_btree_node *node, *right;
1305  int n, ncblk;
1306 
1307  nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1308 
1309  node = nilfs_btree_get_nonroot_node(path, level);
1310  right = nilfs_btree_get_sib_node(path, level);
1311  ncblk = nilfs_btree_nchildren_per_block(btree);
1312 
1313  n = nilfs_btree_node_get_nchildren(right);
1314 
1315  nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1316 
1317  if (!buffer_dirty(path[level].bp_bh))
1318  mark_buffer_dirty(path[level].bp_bh);
1319 
1320  nilfs_btnode_delete(path[level].bp_sib_bh);
1321  path[level].bp_sib_bh = NULL;
1322  path[level + 1].bp_index++;
1323 }
1324 
1325 static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1326  struct nilfs_btree_path *path,
1327  int level, __u64 *keyp, __u64 *ptrp)
1328 {
1329  struct nilfs_btree_node *root, *child;
1330  int n, ncblk;
1331 
1332  nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1333 
1334  root = nilfs_btree_get_root(btree);
1335  child = nilfs_btree_get_nonroot_node(path, level);
1336  ncblk = nilfs_btree_nchildren_per_block(btree);
1337 
1338  nilfs_btree_node_delete(root, 0, NULL, NULL,
1340  nilfs_btree_node_set_level(root, level);
1341  n = nilfs_btree_node_get_nchildren(child);
1342  nilfs_btree_node_move_left(root, child, n,
1344 
1345  nilfs_btnode_delete(path[level].bp_bh);
1346  path[level].bp_bh = NULL;
1347 }
1348 
1349 static void nilfs_btree_nop(struct nilfs_bmap *btree,
1350  struct nilfs_btree_path *path,
1351  int level, __u64 *keyp, __u64 *ptrp)
1352 {
1353 }
1354 
1355 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1356  struct nilfs_btree_path *path,
1357  int *levelp,
1358  struct nilfs_bmap_stats *stats,
1359  struct inode *dat)
1360 {
1361  struct buffer_head *bh;
1362  struct nilfs_btree_node *node, *parent, *sib;
1363  __u64 sibptr;
1364  int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1365 
1366  ret = 0;
1367  stats->bs_nblocks = 0;
1368  ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1369  ncblk = nilfs_btree_nchildren_per_block(btree);
1370 
1371  for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1372  level < nilfs_btree_height(btree) - 1;
1373  level++) {
1374  node = nilfs_btree_get_nonroot_node(path, level);
1375  path[level].bp_oldreq.bpr_ptr =
1376  nilfs_btree_node_get_ptr(node, dindex, ncblk);
1377  ret = nilfs_bmap_prepare_end_ptr(btree,
1378  &path[level].bp_oldreq, dat);
1379  if (ret < 0)
1380  goto err_out_child_node;
1381 
1382  if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1383  path[level].bp_op = nilfs_btree_do_delete;
1384  stats->bs_nblocks++;
1385  goto out;
1386  }
1387 
1388  parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1389  pindex = path[level + 1].bp_index;
1390  dindex = pindex;
1391 
1392  if (pindex > 0) {
1393  /* left sibling */
1394  sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1395  ncmax);
1396  ret = nilfs_btree_get_block(btree, sibptr, &bh);
1397  if (ret < 0)
1398  goto err_out_curr_node;
1399  sib = (struct nilfs_btree_node *)bh->b_data;
1400  if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1401  path[level].bp_sib_bh = bh;
1402  path[level].bp_op = nilfs_btree_borrow_left;
1403  stats->bs_nblocks++;
1404  goto out;
1405  } else {
1406  path[level].bp_sib_bh = bh;
1407  path[level].bp_op = nilfs_btree_concat_left;
1408  stats->bs_nblocks++;
1409  /* continue; */
1410  }
1411  } else if (pindex <
1412  nilfs_btree_node_get_nchildren(parent) - 1) {
1413  /* right sibling */
1414  sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1415  ncmax);
1416  ret = nilfs_btree_get_block(btree, sibptr, &bh);
1417  if (ret < 0)
1418  goto err_out_curr_node;
1419  sib = (struct nilfs_btree_node *)bh->b_data;
1420  if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1421  path[level].bp_sib_bh = bh;
1422  path[level].bp_op = nilfs_btree_borrow_right;
1423  stats->bs_nblocks++;
1424  goto out;
1425  } else {
1426  path[level].bp_sib_bh = bh;
1427  path[level].bp_op = nilfs_btree_concat_right;
1428  stats->bs_nblocks++;
1429  /*
1430  * When merging right sibling node
1431  * into the current node, pointer to
1432  * the right sibling node must be
1433  * terminated instead. The adjustment
1434  * below is required for that.
1435  */
1436  dindex = pindex + 1;
1437  /* continue; */
1438  }
1439  } else {
1440  /* no siblings */
1441  /* the only child of the root node */
1442  WARN_ON(level != nilfs_btree_height(btree) - 2);
1443  if (nilfs_btree_node_get_nchildren(node) - 1 <=
1445  path[level].bp_op = nilfs_btree_shrink;
1446  stats->bs_nblocks += 2;
1447  level++;
1448  path[level].bp_op = nilfs_btree_nop;
1449  goto shrink_root_child;
1450  } else {
1451  path[level].bp_op = nilfs_btree_do_delete;
1452  stats->bs_nblocks++;
1453  goto out;
1454  }
1455  }
1456  }
1457 
1458  /* child of the root node is deleted */
1459  path[level].bp_op = nilfs_btree_do_delete;
1460  stats->bs_nblocks++;
1461 
1462 shrink_root_child:
1463  node = nilfs_btree_get_root(btree);
1464  path[level].bp_oldreq.bpr_ptr =
1465  nilfs_btree_node_get_ptr(node, dindex,
1467 
1468  ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1469  if (ret < 0)
1470  goto err_out_child_node;
1471 
1472  /* success */
1473  out:
1474  *levelp = level;
1475  return ret;
1476 
1477  /* error */
1478  err_out_curr_node:
1479  nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1480  err_out_child_node:
1481  for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1482  brelse(path[level].bp_sib_bh);
1483  nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1484  }
1485  *levelp = level;
1486  stats->bs_nblocks = 0;
1487  return ret;
1488 }
1489 
1490 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1491  struct nilfs_btree_path *path,
1492  int maxlevel, struct inode *dat)
1493 {
1494  int level;
1495 
1496  for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1497  nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1498  path[level].bp_op(btree, path, level, NULL, NULL);
1499  }
1500 
1501  if (!nilfs_bmap_dirty(btree))
1502  nilfs_bmap_set_dirty(btree);
1503 }
1504 
1505 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1506 
1507 {
1508  struct nilfs_btree_path *path;
1509  struct nilfs_bmap_stats stats;
1510  struct inode *dat;
1511  int level, ret;
1512 
1513  path = nilfs_btree_alloc_path();
1514  if (path == NULL)
1515  return -ENOMEM;
1516 
1517  ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1519  if (ret < 0)
1520  goto out;
1521 
1522 
1523  dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1524 
1525  ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1526  if (ret < 0)
1527  goto out;
1528  nilfs_btree_commit_delete(btree, path, level, dat);
1529  nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1530 
1531 out:
1532  nilfs_btree_free_path(path);
1533  return ret;
1534 }
1535 
1536 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1537 {
1538  struct nilfs_btree_path *path;
1539  int ret;
1540 
1541  path = nilfs_btree_alloc_path();
1542  if (path == NULL)
1543  return -ENOMEM;
1544 
1545  ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1546 
1547  nilfs_btree_free_path(path);
1548 
1549  return ret;
1550 }
1551 
1552 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1553 {
1554  struct buffer_head *bh;
1555  struct nilfs_btree_node *root, *node;
1556  __u64 maxkey, nextmaxkey;
1557  __u64 ptr;
1558  int nchildren, ret;
1559 
1560  root = nilfs_btree_get_root(btree);
1561  switch (nilfs_btree_height(btree)) {
1562  case 2:
1563  bh = NULL;
1564  node = root;
1565  break;
1566  case 3:
1567  nchildren = nilfs_btree_node_get_nchildren(root);
1568  if (nchildren > 1)
1569  return 0;
1570  ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1572  ret = nilfs_btree_get_block(btree, ptr, &bh);
1573  if (ret < 0)
1574  return ret;
1575  node = (struct nilfs_btree_node *)bh->b_data;
1576  break;
1577  default:
1578  return 0;
1579  }
1580 
1581  nchildren = nilfs_btree_node_get_nchildren(node);
1582  maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1583  nextmaxkey = (nchildren > 1) ?
1584  nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1585  if (bh != NULL)
1586  brelse(bh);
1587 
1588  return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1589 }
1590 
1591 static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1592  __u64 *keys, __u64 *ptrs, int nitems)
1593 {
1594  struct buffer_head *bh;
1595  struct nilfs_btree_node *node, *root;
1596  __le64 *dkeys;
1597  __le64 *dptrs;
1598  __u64 ptr;
1599  int nchildren, ncmax, i, ret;
1600 
1601  root = nilfs_btree_get_root(btree);
1602  switch (nilfs_btree_height(btree)) {
1603  case 2:
1604  bh = NULL;
1605  node = root;
1607  break;
1608  case 3:
1609  nchildren = nilfs_btree_node_get_nchildren(root);
1610  WARN_ON(nchildren > 1);
1611  ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1613  ret = nilfs_btree_get_block(btree, ptr, &bh);
1614  if (ret < 0)
1615  return ret;
1616  node = (struct nilfs_btree_node *)bh->b_data;
1617  ncmax = nilfs_btree_nchildren_per_block(btree);
1618  break;
1619  default:
1620  node = NULL;
1621  return -EINVAL;
1622  }
1623 
1624  nchildren = nilfs_btree_node_get_nchildren(node);
1625  if (nchildren < nitems)
1626  nitems = nchildren;
1627  dkeys = nilfs_btree_node_dkeys(node);
1628  dptrs = nilfs_btree_node_dptrs(node, ncmax);
1629  for (i = 0; i < nitems; i++) {
1630  keys[i] = le64_to_cpu(dkeys[i]);
1631  ptrs[i] = le64_to_cpu(dptrs[i]);
1632  }
1633 
1634  if (bh != NULL)
1635  brelse(bh);
1636 
1637  return nitems;
1638 }
1639 
1640 static int
1641 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1642  union nilfs_bmap_ptr_req *dreq,
1643  union nilfs_bmap_ptr_req *nreq,
1644  struct buffer_head **bhp,
1645  struct nilfs_bmap_stats *stats)
1646 {
1647  struct buffer_head *bh;
1648  struct inode *dat = NULL;
1649  int ret;
1650 
1651  stats->bs_nblocks = 0;
1652 
1653  /* for data */
1654  /* cannot find near ptr */
1655  if (NILFS_BMAP_USE_VBN(btree)) {
1656  dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1657  dat = nilfs_bmap_get_dat(btree);
1658  }
1659 
1660  ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1661  if (ret < 0)
1662  return ret;
1663 
1664  *bhp = NULL;
1665  stats->bs_nblocks++;
1666  if (nreq != NULL) {
1667  nreq->bpr_ptr = dreq->bpr_ptr + 1;
1668  ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1669  if (ret < 0)
1670  goto err_out_dreq;
1671 
1672  ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1673  if (ret < 0)
1674  goto err_out_nreq;
1675 
1676  *bhp = bh;
1677  stats->bs_nblocks++;
1678  }
1679 
1680  /* success */
1681  return 0;
1682 
1683  /* error */
1684  err_out_nreq:
1685  nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1686  err_out_dreq:
1687  nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1688  stats->bs_nblocks = 0;
1689  return ret;
1690 
1691 }
1692 
1693 static void
1694 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1695  __u64 key, __u64 ptr,
1696  const __u64 *keys, const __u64 *ptrs,
1697  int n,
1698  union nilfs_bmap_ptr_req *dreq,
1699  union nilfs_bmap_ptr_req *nreq,
1700  struct buffer_head *bh)
1701 {
1702  struct nilfs_btree_node *node;
1703  struct inode *dat;
1704  __u64 tmpptr;
1705  int ncblk;
1706 
1707  /* free resources */
1708  if (btree->b_ops->bop_clear != NULL)
1709  btree->b_ops->bop_clear(btree);
1710 
1711  /* ptr must be a pointer to a buffer head. */
1712  set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1713 
1714  /* convert and insert */
1715  dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1716  nilfs_btree_init(btree);
1717  if (nreq != NULL) {
1718  nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1719  nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1720 
1721  /* create child node at level 1 */
1722  node = (struct nilfs_btree_node *)bh->b_data;
1723  ncblk = nilfs_btree_nchildren_per_block(btree);
1724  nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1725  nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1726  if (!buffer_dirty(bh))
1727  mark_buffer_dirty(bh);
1728  if (!nilfs_bmap_dirty(btree))
1729  nilfs_bmap_set_dirty(btree);
1730 
1731  brelse(bh);
1732 
1733  /* create root node at level 2 */
1734  node = nilfs_btree_get_root(btree);
1735  tmpptr = nreq->bpr_ptr;
1736  nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1738  &keys[0], &tmpptr);
1739  } else {
1740  nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1741 
1742  /* create root node at level 1 */
1743  node = nilfs_btree_get_root(btree);
1744  nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1746  keys, ptrs);
1747  nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1749  if (!nilfs_bmap_dirty(btree))
1750  nilfs_bmap_set_dirty(btree);
1751  }
1752 
1753  if (NILFS_BMAP_USE_VBN(btree))
1754  nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1755 }
1756 
1767  __u64 key, __u64 ptr,
1768  const __u64 *keys, const __u64 *ptrs, int n)
1769 {
1770  struct buffer_head *bh;
1771  union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1772  struct nilfs_bmap_stats stats;
1773  int ret;
1774 
1775  if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1776  di = &dreq;
1777  ni = NULL;
1778  } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1779  1 << btree->b_inode->i_blkbits)) {
1780  di = &dreq;
1781  ni = &nreq;
1782  } else {
1783  di = NULL;
1784  ni = NULL;
1785  BUG();
1786  }
1787 
1788  ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1789  &stats);
1790  if (ret < 0)
1791  return ret;
1792  nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1793  di, ni, bh);
1794  nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1795  return 0;
1796 }
1797 
1798 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1799  struct nilfs_btree_path *path,
1800  int level,
1801  struct buffer_head *bh)
1802 {
1803  while ((++level < nilfs_btree_height(btree) - 1) &&
1804  !buffer_dirty(path[level].bp_bh))
1805  mark_buffer_dirty(path[level].bp_bh);
1806 
1807  return 0;
1808 }
1809 
1810 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1811  struct nilfs_btree_path *path,
1812  int level, struct inode *dat)
1813 {
1814  struct nilfs_btree_node *parent;
1815  int ncmax, ret;
1816 
1817  parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1818  path[level].bp_oldreq.bpr_ptr =
1819  nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1820  ncmax);
1821  path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1822  ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1823  &path[level].bp_newreq.bpr_req);
1824  if (ret < 0)
1825  return ret;
1826 
1827  if (buffer_nilfs_node(path[level].bp_bh)) {
1828  path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1829  path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1830  path[level].bp_ctxt.bh = path[level].bp_bh;
1832  &NILFS_BMAP_I(btree)->i_btnode_cache,
1833  &path[level].bp_ctxt);
1834  if (ret < 0) {
1836  &path[level].bp_oldreq.bpr_req,
1837  &path[level].bp_newreq.bpr_req);
1838  return ret;
1839  }
1840  }
1841 
1842  return 0;
1843 }
1844 
1845 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1846  struct nilfs_btree_path *path,
1847  int level, struct inode *dat)
1848 {
1849  struct nilfs_btree_node *parent;
1850  int ncmax;
1851 
1852  nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1853  &path[level].bp_newreq.bpr_req,
1854  btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1855 
1856  if (buffer_nilfs_node(path[level].bp_bh)) {
1858  &NILFS_BMAP_I(btree)->i_btnode_cache,
1859  &path[level].bp_ctxt);
1860  path[level].bp_bh = path[level].bp_ctxt.bh;
1861  }
1862  set_buffer_nilfs_volatile(path[level].bp_bh);
1863 
1864  parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1865  nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1866  path[level].bp_newreq.bpr_ptr, ncmax);
1867 }
1868 
1869 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1870  struct nilfs_btree_path *path,
1871  int level, struct inode *dat)
1872 {
1873  nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1874  &path[level].bp_newreq.bpr_req);
1875  if (buffer_nilfs_node(path[level].bp_bh))
1877  &NILFS_BMAP_I(btree)->i_btnode_cache,
1878  &path[level].bp_ctxt);
1879 }
1880 
1881 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1882  struct nilfs_btree_path *path,
1883  int minlevel, int *maxlevelp,
1884  struct inode *dat)
1885 {
1886  int level, ret;
1887 
1888  level = minlevel;
1889  if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1890  ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1891  if (ret < 0)
1892  return ret;
1893  }
1894  while ((++level < nilfs_btree_height(btree) - 1) &&
1895  !buffer_dirty(path[level].bp_bh)) {
1896 
1897  WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1898  ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1899  if (ret < 0)
1900  goto out;
1901  }
1902 
1903  /* success */
1904  *maxlevelp = level - 1;
1905  return 0;
1906 
1907  /* error */
1908  out:
1909  while (--level > minlevel)
1910  nilfs_btree_abort_update_v(btree, path, level, dat);
1911  if (!buffer_nilfs_volatile(path[level].bp_bh))
1912  nilfs_btree_abort_update_v(btree, path, level, dat);
1913  return ret;
1914 }
1915 
1916 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
1917  struct nilfs_btree_path *path,
1918  int minlevel, int maxlevel,
1919  struct buffer_head *bh,
1920  struct inode *dat)
1921 {
1922  int level;
1923 
1924  if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1925  nilfs_btree_commit_update_v(btree, path, minlevel, dat);
1926 
1927  for (level = minlevel + 1; level <= maxlevel; level++)
1928  nilfs_btree_commit_update_v(btree, path, level, dat);
1929 }
1930 
1931 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
1932  struct nilfs_btree_path *path,
1933  int level, struct buffer_head *bh)
1934 {
1935  int maxlevel = 0, ret;
1936  struct nilfs_btree_node *parent;
1937  struct inode *dat = nilfs_bmap_get_dat(btree);
1938  __u64 ptr;
1939  int ncmax;
1940 
1941  get_bh(bh);
1942  path[level].bp_bh = bh;
1943  ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
1944  dat);
1945  if (ret < 0)
1946  goto out;
1947 
1948  if (buffer_nilfs_volatile(path[level].bp_bh)) {
1949  parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1950  ptr = nilfs_btree_node_get_ptr(parent,
1951  path[level + 1].bp_index,
1952  ncmax);
1953  ret = nilfs_dat_mark_dirty(dat, ptr);
1954  if (ret < 0)
1955  goto out;
1956  }
1957 
1958  nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
1959 
1960  out:
1961  brelse(path[level].bp_bh);
1962  path[level].bp_bh = NULL;
1963  return ret;
1964 }
1965 
1966 static int nilfs_btree_propagate(struct nilfs_bmap *btree,
1967  struct buffer_head *bh)
1968 {
1969  struct nilfs_btree_path *path;
1970  struct nilfs_btree_node *node;
1971  __u64 key;
1972  int level, ret;
1973 
1974  WARN_ON(!buffer_dirty(bh));
1975 
1976  path = nilfs_btree_alloc_path();
1977  if (path == NULL)
1978  return -ENOMEM;
1979 
1980  if (buffer_nilfs_node(bh)) {
1981  node = (struct nilfs_btree_node *)bh->b_data;
1982  key = nilfs_btree_node_get_key(node, 0);
1983  level = nilfs_btree_node_get_level(node);
1984  } else {
1985  key = nilfs_bmap_data_get_key(btree, bh);
1986  level = NILFS_BTREE_LEVEL_DATA;
1987  }
1988 
1989  ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
1990  if (ret < 0) {
1991  if (unlikely(ret == -ENOENT))
1992  printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1993  __func__, (unsigned long long)key, level);
1994  goto out;
1995  }
1996 
1997  ret = NILFS_BMAP_USE_VBN(btree) ?
1998  nilfs_btree_propagate_v(btree, path, level, bh) :
1999  nilfs_btree_propagate_p(btree, path, level, bh);
2000 
2001  out:
2002  nilfs_btree_free_path(path);
2003 
2004  return ret;
2005 }
2006 
2007 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2008  struct buffer_head *bh)
2009 {
2010  return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2011 }
2012 
2013 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2014  struct list_head *lists,
2015  struct buffer_head *bh)
2016 {
2017  struct list_head *head;
2018  struct buffer_head *cbh;
2019  struct nilfs_btree_node *node, *cnode;
2020  __u64 key, ckey;
2021  int level;
2022 
2023  get_bh(bh);
2024  node = (struct nilfs_btree_node *)bh->b_data;
2025  key = nilfs_btree_node_get_key(node, 0);
2026  level = nilfs_btree_node_get_level(node);
2027  if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2028  level >= NILFS_BTREE_LEVEL_MAX) {
2029  dump_stack();
2031  "%s: invalid btree level: %d (key=%llu, ino=%lu, "
2032  "blocknr=%llu)\n",
2033  __func__, level, (unsigned long long)key,
2034  NILFS_BMAP_I(btree)->vfs_inode.i_ino,
2035  (unsigned long long)bh->b_blocknr);
2036  return;
2037  }
2038 
2039  list_for_each(head, &lists[level]) {
2040  cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2041  cnode = (struct nilfs_btree_node *)cbh->b_data;
2042  ckey = nilfs_btree_node_get_key(cnode, 0);
2043  if (key < ckey)
2044  break;
2045  }
2046  list_add_tail(&bh->b_assoc_buffers, head);
2047 }
2048 
2049 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2050  struct list_head *listp)
2051 {
2052  struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache;
2053  struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2054  struct pagevec pvec;
2055  struct buffer_head *bh, *head;
2056  pgoff_t index = 0;
2057  int level, i;
2058 
2059  for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2060  level < NILFS_BTREE_LEVEL_MAX;
2061  level++)
2062  INIT_LIST_HEAD(&lists[level]);
2063 
2064  pagevec_init(&pvec, 0);
2065 
2066  while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2067  PAGEVEC_SIZE)) {
2068  for (i = 0; i < pagevec_count(&pvec); i++) {
2069  bh = head = page_buffers(pvec.pages[i]);
2070  do {
2071  if (buffer_dirty(bh))
2072  nilfs_btree_add_dirty_buffer(btree,
2073  lists, bh);
2074  } while ((bh = bh->b_this_page) != head);
2075  }
2076  pagevec_release(&pvec);
2077  cond_resched();
2078  }
2079 
2080  for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2081  level < NILFS_BTREE_LEVEL_MAX;
2082  level++)
2083  list_splice_tail(&lists[level], listp);
2084 }
2085 
2086 static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2087  struct nilfs_btree_path *path,
2088  int level,
2089  struct buffer_head **bh,
2090  sector_t blocknr,
2091  union nilfs_binfo *binfo)
2092 {
2093  struct nilfs_btree_node *parent;
2094  __u64 key;
2095  __u64 ptr;
2096  int ncmax, ret;
2097 
2098  parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2099  ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2100  ncmax);
2101  if (buffer_nilfs_node(*bh)) {
2102  path[level].bp_ctxt.oldkey = ptr;
2103  path[level].bp_ctxt.newkey = blocknr;
2104  path[level].bp_ctxt.bh = *bh;
2106  &NILFS_BMAP_I(btree)->i_btnode_cache,
2107  &path[level].bp_ctxt);
2108  if (ret < 0)
2109  return ret;
2111  &NILFS_BMAP_I(btree)->i_btnode_cache,
2112  &path[level].bp_ctxt);
2113  *bh = path[level].bp_ctxt.bh;
2114  }
2115 
2116  nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2117  ncmax);
2118 
2119  key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2120  /* on-disk format */
2121  binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2122  binfo->bi_dat.bi_level = level;
2123 
2124  return 0;
2125 }
2126 
2127 static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2128  struct nilfs_btree_path *path,
2129  int level,
2130  struct buffer_head **bh,
2131  sector_t blocknr,
2132  union nilfs_binfo *binfo)
2133 {
2134  struct nilfs_btree_node *parent;
2135  struct inode *dat = nilfs_bmap_get_dat(btree);
2136  __u64 key;
2137  __u64 ptr;
2138  union nilfs_bmap_ptr_req req;
2139  int ncmax, ret;
2140 
2141  parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2142  ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2143  ncmax);
2144  req.bpr_ptr = ptr;
2145  ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2146  if (ret < 0)
2147  return ret;
2148  nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2149 
2150  key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2151  /* on-disk format */
2152  binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2153  binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2154 
2155  return 0;
2156 }
2157 
2158 static int nilfs_btree_assign(struct nilfs_bmap *btree,
2159  struct buffer_head **bh,
2160  sector_t blocknr,
2161  union nilfs_binfo *binfo)
2162 {
2163  struct nilfs_btree_path *path;
2164  struct nilfs_btree_node *node;
2165  __u64 key;
2166  int level, ret;
2167 
2168  path = nilfs_btree_alloc_path();
2169  if (path == NULL)
2170  return -ENOMEM;
2171 
2172  if (buffer_nilfs_node(*bh)) {
2173  node = (struct nilfs_btree_node *)(*bh)->b_data;
2174  key = nilfs_btree_node_get_key(node, 0);
2175  level = nilfs_btree_node_get_level(node);
2176  } else {
2177  key = nilfs_bmap_data_get_key(btree, *bh);
2178  level = NILFS_BTREE_LEVEL_DATA;
2179  }
2180 
2181  ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2182  if (ret < 0) {
2183  WARN_ON(ret == -ENOENT);
2184  goto out;
2185  }
2186 
2187  ret = NILFS_BMAP_USE_VBN(btree) ?
2188  nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2189  nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2190 
2191  out:
2192  nilfs_btree_free_path(path);
2193 
2194  return ret;
2195 }
2196 
2197 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2198  struct buffer_head **bh,
2199  sector_t blocknr,
2200  union nilfs_binfo *binfo)
2201 {
2202  struct nilfs_btree_node *node;
2203  __u64 key;
2204  int ret;
2205 
2206  ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2207  blocknr);
2208  if (ret < 0)
2209  return ret;
2210 
2211  if (buffer_nilfs_node(*bh)) {
2212  node = (struct nilfs_btree_node *)(*bh)->b_data;
2213  key = nilfs_btree_node_get_key(node, 0);
2214  } else
2215  key = nilfs_bmap_data_get_key(btree, *bh);
2216 
2217  /* on-disk format */
2218  binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2219  binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2220 
2221  return 0;
2222 }
2223 
2224 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2225 {
2226  struct buffer_head *bh;
2227  struct nilfs_btree_path *path;
2228  __u64 ptr;
2229  int ret;
2230 
2231  path = nilfs_btree_alloc_path();
2232  if (path == NULL)
2233  return -ENOMEM;
2234 
2235  ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2236  if (ret < 0) {
2237  WARN_ON(ret == -ENOENT);
2238  goto out;
2239  }
2240  ret = nilfs_btree_get_block(btree, ptr, &bh);
2241  if (ret < 0) {
2242  WARN_ON(ret == -ENOENT);
2243  goto out;
2244  }
2245 
2246  if (!buffer_dirty(bh))
2247  mark_buffer_dirty(bh);
2248  brelse(bh);
2249  if (!nilfs_bmap_dirty(btree))
2250  nilfs_bmap_set_dirty(btree);
2251 
2252  out:
2253  nilfs_btree_free_path(path);
2254  return ret;
2255 }
2256 
2257 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2258  .bop_lookup = nilfs_btree_lookup,
2259  .bop_lookup_contig = nilfs_btree_lookup_contig,
2260  .bop_insert = nilfs_btree_insert,
2261  .bop_delete = nilfs_btree_delete,
2262  .bop_clear = NULL,
2263 
2264  .bop_propagate = nilfs_btree_propagate,
2265 
2266  .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers,
2267 
2268  .bop_assign = nilfs_btree_assign,
2269  .bop_mark = nilfs_btree_mark,
2270 
2271  .bop_last_key = nilfs_btree_last_key,
2272  .bop_check_insert = NULL,
2273  .bop_check_delete = nilfs_btree_check_delete,
2274  .bop_gather_data = nilfs_btree_gather_data,
2275 };
2276 
2277 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2278  .bop_lookup = NULL,
2279  .bop_lookup_contig = NULL,
2280  .bop_insert = NULL,
2281  .bop_delete = NULL,
2282  .bop_clear = NULL,
2283 
2284  .bop_propagate = nilfs_btree_propagate_gc,
2285 
2286  .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers,
2287 
2288  .bop_assign = nilfs_btree_assign_gc,
2289  .bop_mark = NULL,
2290 
2291  .bop_last_key = NULL,
2292  .bop_check_insert = NULL,
2293  .bop_check_delete = NULL,
2294  .bop_gather_data = NULL,
2295 };
2296 
2298 {
2299  bmap->b_ops = &nilfs_btree_ops;
2300  bmap->b_nchildren_per_block =
2301  NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2302  return 0;
2303 }
2304 
2306 {
2307  bmap->b_ops = &nilfs_btree_ops_gc;
2308  bmap->b_nchildren_per_block =
2309  NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2310 }