Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
fix_node.c
Go to the documentation of this file.
1 /*
2  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3  */
4 
37 #include <linux/time.h>
38 #include <linux/slab.h>
39 #include <linux/string.h>
40 #include "reiserfs.h"
41 #include <linux/buffer_head.h>
42 
43 /* To make any changes in the tree we find a node, that contains item
44  to be changed/deleted or position in the node we insert a new item
45  to. We call this node S. To do balancing we need to decide what we
46  will shift to left/right neighbor, or to a new node, where new item
47  will be etc. To make this analysis simpler we build virtual
48  node. Virtual node is an array of items, that will replace items of
49  node S. (For instance if we are going to delete an item, virtual
50  node does not contain it). Virtual node keeps information about
51  item sizes and types, mergeability of first and last items, sizes
52  of all entries in directory item. We use this array of items when
53  calculating what we can shift to neighbors and how many nodes we
54  have to have if we do not any shiftings, if we shift to left/right
55  neighbor or to both. */
56 
57 /* taking item number in virtual node, returns number of item, that it has in source buffer */
58 static inline int old_item_num(int new_num, int affected_item_num, int mode)
59 {
60  if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num)
61  return new_num;
62 
63  if (mode == M_INSERT) {
64 
65  RFALSE(new_num == 0,
66  "vs-8005: for INSERT mode and item number of inserted item");
67 
68  return new_num - 1;
69  }
70 
71  RFALSE(mode != M_DELETE,
72  "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'",
73  mode);
74  /* delete mode */
75  return new_num + 1;
76 }
77 
78 static void create_virtual_node(struct tree_balance *tb, int h)
79 {
80  struct item_head *ih;
81  struct virtual_node *vn = tb->tb_vn;
82  int new_num;
83  struct buffer_head *Sh; /* this comes from tb->S[h] */
84 
85  Sh = PATH_H_PBUFFER(tb->tb_path, h);
86 
87  /* size of changed node */
88  vn->vn_size =
89  MAX_CHILD_SIZE(Sh) - B_FREE_SPACE(Sh) + tb->insert_size[h];
90 
91  /* for internal nodes array if virtual items is not created */
92  if (h) {
93  vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE);
94  return;
95  }
96 
97  /* number of items in virtual node */
98  vn->vn_nr_item =
99  B_NR_ITEMS(Sh) + ((vn->vn_mode == M_INSERT) ? 1 : 0) -
100  ((vn->vn_mode == M_DELETE) ? 1 : 0);
101 
102  /* first virtual item */
103  vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1);
104  memset(vn->vn_vi, 0, vn->vn_nr_item * sizeof(struct virtual_item));
105  vn->vn_free_ptr += vn->vn_nr_item * sizeof(struct virtual_item);
106 
107  /* first item in the node */
108  ih = B_N_PITEM_HEAD(Sh, 0);
109 
110  /* define the mergeability for 0-th item (if it is not being deleted) */
111  if (op_is_left_mergeable(&(ih->ih_key), Sh->b_size)
112  && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num))
113  vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE;
114 
115  /* go through all items those remain in the virtual node (except for the new (inserted) one) */
116  for (new_num = 0; new_num < vn->vn_nr_item; new_num++) {
117  int j;
118  struct virtual_item *vi = vn->vn_vi + new_num;
119  int is_affected =
120  ((new_num != vn->vn_affected_item_num) ? 0 : 1);
121 
122  if (is_affected && vn->vn_mode == M_INSERT)
123  continue;
124 
125  /* get item number in source node */
126  j = old_item_num(new_num, vn->vn_affected_item_num,
127  vn->vn_mode);
128 
129  vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE;
130  vi->vi_ih = ih + j;
131  vi->vi_item = B_I_PITEM(Sh, ih + j);
132  vi->vi_uarea = vn->vn_free_ptr;
133 
134  // FIXME: there is no check, that item operation did not
135  // consume too much memory
136  vn->vn_free_ptr +=
137  op_create_vi(vn, vi, is_affected, tb->insert_size[0]);
138  if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr)
139  reiserfs_panic(tb->tb_sb, "vs-8030",
140  "virtual node space consumed");
141 
142  if (!is_affected)
143  /* this is not being changed */
144  continue;
145 
146  if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) {
147  vn->vn_vi[new_num].vi_item_len += tb->insert_size[0];
148  vi->vi_new_data = vn->vn_data; // pointer to data which is going to be pasted
149  }
150  }
151 
152  /* virtual inserted item is not defined yet */
153  if (vn->vn_mode == M_INSERT) {
154  struct virtual_item *vi = vn->vn_vi + vn->vn_affected_item_num;
155 
156  RFALSE(vn->vn_ins_ih == NULL,
157  "vs-8040: item header of inserted item is not specified");
158  vi->vi_item_len = tb->insert_size[0];
159  vi->vi_ih = vn->vn_ins_ih;
160  vi->vi_item = vn->vn_data;
161  vi->vi_uarea = vn->vn_free_ptr;
162 
163  op_create_vi(vn, vi, 0 /*not pasted or cut */ ,
164  tb->insert_size[0]);
165  }
166 
167  /* set right merge flag we take right delimiting key and check whether it is a mergeable item */
168  if (tb->CFR[0]) {
169  struct reiserfs_key *key;
170 
171  key = B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]);
172  if (op_is_left_mergeable(key, Sh->b_size)
173  && (vn->vn_mode != M_DELETE
174  || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1))
175  vn->vn_vi[vn->vn_nr_item - 1].vi_type |=
177 
178 #ifdef CONFIG_REISERFS_CHECK
179  if (op_is_left_mergeable(key, Sh->b_size) &&
180  !(vn->vn_mode != M_DELETE
181  || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) {
182  /* we delete last item and it could be merged with right neighbor's first item */
183  if (!
184  (B_NR_ITEMS(Sh) == 1
185  && is_direntry_le_ih(B_N_PITEM_HEAD(Sh, 0))
186  && I_ENTRY_COUNT(B_N_PITEM_HEAD(Sh, 0)) == 1)) {
187  /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */
188  print_block(Sh, 0, -1, -1);
189  reiserfs_panic(tb->tb_sb, "vs-8045",
190  "rdkey %k, affected item==%d "
191  "(mode==%c) Must be %c",
192  key, vn->vn_affected_item_num,
193  vn->vn_mode, M_DELETE);
194  }
195  }
196 #endif
197 
198  }
199 }
200 
201 /* using virtual node check, how many items can be shifted to left
202  neighbor */
203 static void check_left(struct tree_balance *tb, int h, int cur_free)
204 {
205  int i;
206  struct virtual_node *vn = tb->tb_vn;
207  struct virtual_item *vi;
208  int d_size, ih_size;
209 
210  RFALSE(cur_free < 0, "vs-8050: cur_free (%d) < 0", cur_free);
211 
212  /* internal level */
213  if (h > 0) {
214  tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
215  return;
216  }
217 
218  /* leaf level */
219 
220  if (!cur_free || !vn->vn_nr_item) {
221  /* no free space or nothing to move */
222  tb->lnum[h] = 0;
223  tb->lbytes = -1;
224  return;
225  }
226 
227  RFALSE(!PATH_H_PPARENT(tb->tb_path, 0),
228  "vs-8055: parent does not exist or invalid");
229 
230  vi = vn->vn_vi;
231  if ((unsigned int)cur_free >=
232  (vn->vn_size -
233  ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) {
234  /* all contents of S[0] fits into L[0] */
235 
236  RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
237  "vs-8055: invalid mode or balance condition failed");
238 
239  tb->lnum[0] = vn->vn_nr_item;
240  tb->lbytes = -1;
241  return;
242  }
243 
244  d_size = 0, ih_size = IH_SIZE;
245 
246  /* first item may be merge with last item in left neighbor */
248  d_size = -((int)IH_SIZE), ih_size = 0;
249 
250  tb->lnum[0] = 0;
251  for (i = 0; i < vn->vn_nr_item;
252  i++, ih_size = IH_SIZE, d_size = 0, vi++) {
253  d_size += vi->vi_item_len;
254  if (cur_free >= d_size) {
255  /* the item can be shifted entirely */
256  cur_free -= d_size;
257  tb->lnum[0]++;
258  continue;
259  }
260 
261  /* the item cannot be shifted entirely, try to split it */
262  /* check whether L[0] can hold ih and at least one byte of the item body */
263  if (cur_free <= ih_size) {
264  /* cannot shift even a part of the current item */
265  tb->lbytes = -1;
266  return;
267  }
268  cur_free -= ih_size;
269 
270  tb->lbytes = op_check_left(vi, cur_free, 0, 0);
271  if (tb->lbytes != -1)
272  /* count partially shifted item */
273  tb->lnum[0]++;
274 
275  break;
276  }
277 
278  return;
279 }
280 
281 /* using virtual node check, how many items can be shifted to right
282  neighbor */
283 static void check_right(struct tree_balance *tb, int h, int cur_free)
284 {
285  int i;
286  struct virtual_node *vn = tb->tb_vn;
287  struct virtual_item *vi;
288  int d_size, ih_size;
289 
290  RFALSE(cur_free < 0, "vs-8070: cur_free < 0");
291 
292  /* internal level */
293  if (h > 0) {
294  tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
295  return;
296  }
297 
298  /* leaf level */
299 
300  if (!cur_free || !vn->vn_nr_item) {
301  /* no free space */
302  tb->rnum[h] = 0;
303  tb->rbytes = -1;
304  return;
305  }
306 
307  RFALSE(!PATH_H_PPARENT(tb->tb_path, 0),
308  "vs-8075: parent does not exist or invalid");
309 
310  vi = vn->vn_vi + vn->vn_nr_item - 1;
311  if ((unsigned int)cur_free >=
312  (vn->vn_size -
313  ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) {
314  /* all contents of S[0] fits into R[0] */
315 
316  RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
317  "vs-8080: invalid mode or balance condition failed");
318 
319  tb->rnum[h] = vn->vn_nr_item;
320  tb->rbytes = -1;
321  return;
322  }
323 
324  d_size = 0, ih_size = IH_SIZE;
325 
326  /* last item may be merge with first item in right neighbor */
328  d_size = -(int)IH_SIZE, ih_size = 0;
329 
330  tb->rnum[0] = 0;
331  for (i = vn->vn_nr_item - 1; i >= 0;
332  i--, d_size = 0, ih_size = IH_SIZE, vi--) {
333  d_size += vi->vi_item_len;
334  if (cur_free >= d_size) {
335  /* the item can be shifted entirely */
336  cur_free -= d_size;
337  tb->rnum[0]++;
338  continue;
339  }
340 
341  /* check whether R[0] can hold ih and at least one byte of the item body */
342  if (cur_free <= ih_size) { /* cannot shift even a part of the current item */
343  tb->rbytes = -1;
344  return;
345  }
346 
347  /* R[0] can hold the header of the item and at least one byte of its body */
348  cur_free -= ih_size; /* cur_free is still > 0 */
349 
350  tb->rbytes = op_check_right(vi, cur_free);
351  if (tb->rbytes != -1)
352  /* count partially shifted item */
353  tb->rnum[0]++;
354 
355  break;
356  }
357 
358  return;
359 }
360 
361 /*
362  * from - number of items, which are shifted to left neighbor entirely
363  * to - number of item, which are shifted to right neighbor entirely
364  * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor
365  * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */
366 static int get_num_ver(int mode, struct tree_balance *tb, int h,
367  int from, int from_bytes,
368  int to, int to_bytes, short *snum012, int flow)
369 {
370  int i;
371  int cur_free;
372  // int bytes;
373  int units;
374  struct virtual_node *vn = tb->tb_vn;
375  // struct virtual_item * vi;
376 
377  int total_node_size, max_node_size, current_item_size;
378  int needed_nodes;
379  int start_item, /* position of item we start filling node from */
380  end_item, /* position of item we finish filling node by */
381  start_bytes, /* number of first bytes (entries for directory) of start_item-th item
382  we do not include into node that is being filled */
383  end_bytes; /* number of last bytes (entries for directory) of end_item-th item
384  we do node include into node that is being filled */
385  int split_item_positions[2]; /* these are positions in virtual item of
386  items, that are split between S[0] and
387  S1new and S1new and S2new */
388 
389  split_item_positions[0] = -1;
390  split_item_positions[1] = -1;
391 
392  /* We only create additional nodes if we are in insert or paste mode
393  or we are in replace mode at the internal level. If h is 0 and
394  the mode is M_REPLACE then in fix_nodes we change the mode to
395  paste or insert before we get here in the code. */
396  RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE),
397  "vs-8100: insert_size < 0 in overflow");
398 
399  max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h));
400 
401  /* snum012 [0-2] - number of items, that lay
402  to S[0], first new node and second new node */
403  snum012[3] = -1; /* s1bytes */
404  snum012[4] = -1; /* s2bytes */
405 
406  /* internal level */
407  if (h > 0) {
408  i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE);
409  if (i == max_node_size)
410  return 1;
411  return (i / max_node_size + 1);
412  }
413 
414  /* leaf level */
415  needed_nodes = 1;
416  total_node_size = 0;
417  cur_free = max_node_size;
418 
419  // start from 'from'-th item
420  start_item = from;
421  // skip its first 'start_bytes' units
422  start_bytes = ((from_bytes != -1) ? from_bytes : 0);
423 
424  // last included item is the 'end_item'-th one
425  end_item = vn->vn_nr_item - to - 1;
426  // do not count last 'end_bytes' units of 'end_item'-th item
427  end_bytes = (to_bytes != -1) ? to_bytes : 0;
428 
429  /* go through all item beginning from the start_item-th item and ending by
430  the end_item-th item. Do not count first 'start_bytes' units of
431  'start_item'-th item and last 'end_bytes' of 'end_item'-th item */
432 
433  for (i = start_item; i <= end_item; i++) {
434  struct virtual_item *vi = vn->vn_vi + i;
435  int skip_from_end = ((i == end_item) ? end_bytes : 0);
436 
437  RFALSE(needed_nodes > 3, "vs-8105: too many nodes are needed");
438 
439  /* get size of current item */
440  current_item_size = vi->vi_item_len;
441 
442  /* do not take in calculation head part (from_bytes) of from-th item */
443  current_item_size -=
444  op_part_size(vi, 0 /*from start */ , start_bytes);
445 
446  /* do not take in calculation tail part of last item */
447  current_item_size -=
448  op_part_size(vi, 1 /*from end */ , skip_from_end);
449 
450  /* if item fits into current node entierly */
451  if (total_node_size + current_item_size <= max_node_size) {
452  snum012[needed_nodes - 1]++;
453  total_node_size += current_item_size;
454  start_bytes = 0;
455  continue;
456  }
457 
458  if (current_item_size > max_node_size) {
459  /* virtual item length is longer, than max size of item in
460  a node. It is impossible for direct item */
461  RFALSE(is_direct_le_ih(vi->vi_ih),
462  "vs-8110: "
463  "direct item length is %d. It can not be longer than %d",
464  current_item_size, max_node_size);
465  /* we will try to split it */
466  flow = 1;
467  }
468 
469  if (!flow) {
470  /* as we do not split items, take new node and continue */
471  needed_nodes++;
472  i--;
473  total_node_size = 0;
474  continue;
475  }
476  // calculate number of item units which fit into node being
477  // filled
478  {
479  int free_space;
480 
481  free_space = max_node_size - total_node_size - IH_SIZE;
482  units =
483  op_check_left(vi, free_space, start_bytes,
484  skip_from_end);
485  if (units == -1) {
486  /* nothing fits into current node, take new node and continue */
487  needed_nodes++, i--, total_node_size = 0;
488  continue;
489  }
490  }
491 
492  /* something fits into the current node */
493  //if (snum012[3] != -1 || needed_nodes != 1)
494  // reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required");
495  //snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units;
496  start_bytes += units;
497  snum012[needed_nodes - 1 + 3] = units;
498 
499  if (needed_nodes > 2)
500  reiserfs_warning(tb->tb_sb, "vs-8111",
501  "split_item_position is out of range");
502  snum012[needed_nodes - 1]++;
503  split_item_positions[needed_nodes - 1] = i;
504  needed_nodes++;
505  /* continue from the same item with start_bytes != -1 */
506  start_item = i;
507  i--;
508  total_node_size = 0;
509  }
510 
511  // sum012[4] (if it is not -1) contains number of units of which
512  // are to be in S1new, snum012[3] - to be in S0. They are supposed
513  // to be S1bytes and S2bytes correspondingly, so recalculate
514  if (snum012[4] > 0) {
515  int split_item_num;
516  int bytes_to_r, bytes_to_l;
517  int bytes_to_S1new;
518 
519  split_item_num = split_item_positions[1];
520  bytes_to_l =
521  ((from == split_item_num
522  && from_bytes != -1) ? from_bytes : 0);
523  bytes_to_r =
524  ((end_item == split_item_num
525  && end_bytes != -1) ? end_bytes : 0);
526  bytes_to_S1new =
527  ((split_item_positions[0] ==
528  split_item_positions[1]) ? snum012[3] : 0);
529 
530  // s2bytes
531  snum012[4] =
532  op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] -
533  bytes_to_r - bytes_to_l - bytes_to_S1new;
534 
535  if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY &&
536  vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT)
537  reiserfs_warning(tb->tb_sb, "vs-8115",
538  "not directory or indirect item");
539  }
540 
541  /* now we know S2bytes, calculate S1bytes */
542  if (snum012[3] > 0) {
543  int split_item_num;
544  int bytes_to_r, bytes_to_l;
545  int bytes_to_S2new;
546 
547  split_item_num = split_item_positions[0];
548  bytes_to_l =
549  ((from == split_item_num
550  && from_bytes != -1) ? from_bytes : 0);
551  bytes_to_r =
552  ((end_item == split_item_num
553  && end_bytes != -1) ? end_bytes : 0);
554  bytes_to_S2new =
555  ((split_item_positions[0] == split_item_positions[1]
556  && snum012[4] != -1) ? snum012[4] : 0);
557 
558  // s1bytes
559  snum012[3] =
560  op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] -
561  bytes_to_r - bytes_to_l - bytes_to_S2new;
562  }
563 
564  return needed_nodes;
565 }
566 
567 
568 /* Set parameters for balancing.
569  * Performs write of results of analysis of balancing into structure tb,
570  * where it will later be used by the functions that actually do the balancing.
571  * Parameters:
572  * tb tree_balance structure;
573  * h current level of the node;
574  * lnum number of items from S[h] that must be shifted to L[h];
575  * rnum number of items from S[h] that must be shifted to R[h];
576  * blk_num number of blocks that S[h] will be splitted into;
577  * s012 number of items that fall into splitted nodes.
578  * lbytes number of bytes which flow to the left neighbor from the item that is not
579  * not shifted entirely
580  * rbytes number of bytes which flow to the right neighbor from the item that is not
581  * not shifted entirely
582  * s1bytes number of bytes which flow to the first new node when S[0] splits (this number is contained in s012 array)
583  */
584 
585 static void set_parameters(struct tree_balance *tb, int h, int lnum,
586  int rnum, int blk_num, short *s012, int lb, int rb)
587 {
588 
589  tb->lnum[h] = lnum;
590  tb->rnum[h] = rnum;
591  tb->blknum[h] = blk_num;
592 
593  if (h == 0) { /* only for leaf level */
594  if (s012 != NULL) {
595  tb->s0num = *s012++,
596  tb->s1num = *s012++, tb->s2num = *s012++;
597  tb->s1bytes = *s012++;
598  tb->s2bytes = *s012;
599  }
600  tb->lbytes = lb;
601  tb->rbytes = rb;
602  }
603  PROC_INFO_ADD(tb->tb_sb, lnum[h], lnum);
604  PROC_INFO_ADD(tb->tb_sb, rnum[h], rnum);
605 
606  PROC_INFO_ADD(tb->tb_sb, lbytes[h], lb);
607  PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb);
608 }
609 
610 /* check, does node disappear if we shift tb->lnum[0] items to left
611  neighbor and tb->rnum[0] to the right one. */
612 static int is_leaf_removable(struct tree_balance *tb)
613 {
614  struct virtual_node *vn = tb->tb_vn;
615  int to_left, to_right;
616  int size;
617  int remain_items;
618 
619  /* number of items, that will be shifted to left (right) neighbor
620  entirely */
621  to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0);
622  to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0);
623  remain_items = vn->vn_nr_item;
624 
625  /* how many items remain in S[0] after shiftings to neighbors */
626  remain_items -= (to_left + to_right);
627 
628  if (remain_items < 1) {
629  /* all content of node can be shifted to neighbors */
630  set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0,
631  NULL, -1, -1);
632  return 1;
633  }
634 
635  if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1)
636  /* S[0] is not removable */
637  return 0;
638 
639  /* check, whether we can divide 1 remaining item between neighbors */
640 
641  /* get size of remaining item (in item units) */
642  size = op_unit_num(&(vn->vn_vi[to_left]));
643 
644  if (tb->lbytes + tb->rbytes >= size) {
645  set_parameters(tb, 0, to_left + 1, to_right + 1, 0, NULL,
646  tb->lbytes, -1);
647  return 1;
648  }
649 
650  return 0;
651 }
652 
653 /* check whether L, S, R can be joined in one node */
654 static int are_leaves_removable(struct tree_balance *tb, int lfree, int rfree)
655 {
656  struct virtual_node *vn = tb->tb_vn;
657  int ih_size;
658  struct buffer_head *S0;
659 
660  S0 = PATH_H_PBUFFER(tb->tb_path, 0);
661 
662  ih_size = 0;
663  if (vn->vn_nr_item) {
664  if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE)
665  ih_size += IH_SIZE;
666 
667  if (vn->vn_vi[vn->vn_nr_item - 1].
668  vi_type & VI_TYPE_RIGHT_MERGEABLE)
669  ih_size += IH_SIZE;
670  } else {
671  /* there was only one item and it will be deleted */
672  struct item_head *ih;
673 
674  RFALSE(B_NR_ITEMS(S0) != 1,
675  "vs-8125: item number must be 1: it is %d",
676  B_NR_ITEMS(S0));
677 
678  ih = B_N_PITEM_HEAD(S0, 0);
679  if (tb->CFR[0]
680  && !comp_short_le_keys(&(ih->ih_key),
681  B_N_PDELIM_KEY(tb->CFR[0],
682  tb->rkey[0])))
683  if (is_direntry_le_ih(ih)) {
684  /* Directory must be in correct state here: that is
685  somewhere at the left side should exist first directory
686  item. But the item being deleted can not be that first
687  one because its right neighbor is item of the same
688  directory. (But first item always gets deleted in last
689  turn). So, neighbors of deleted item can be merged, so
690  we can save ih_size */
691  ih_size = IH_SIZE;
692 
693  /* we might check that left neighbor exists and is of the
694  same directory */
695  RFALSE(le_ih_k_offset(ih) == DOT_OFFSET,
696  "vs-8130: first directory item can not be removed until directory is not empty");
697  }
698 
699  }
700 
701  if (MAX_CHILD_SIZE(S0) + vn->vn_size <= rfree + lfree + ih_size) {
702  set_parameters(tb, 0, -1, -1, -1, NULL, -1, -1);
703  PROC_INFO_INC(tb->tb_sb, leaves_removable);
704  return 1;
705  }
706  return 0;
707 
708 }
709 
710 /* when we do not split item, lnum and rnum are numbers of entire items */
711 #define SET_PAR_SHIFT_LEFT \
712 if (h)\
713 {\
714  int to_l;\
715  \
716  to_l = (MAX_NR_KEY(Sh)+1 - lpar + vn->vn_nr_item + 1) / 2 -\
717  (MAX_NR_KEY(Sh) + 1 - lpar);\
718  \
719  set_parameters (tb, h, to_l, 0, lnver, NULL, -1, -1);\
720 }\
721 else \
722 {\
723  if (lset==LEFT_SHIFT_FLOW)\
724  set_parameters (tb, h, lpar, 0, lnver, snum012+lset,\
725  tb->lbytes, -1);\
726  else\
727  set_parameters (tb, h, lpar - (tb->lbytes!=-1), 0, lnver, snum012+lset,\
728  -1, -1);\
729 }
730 
731 #define SET_PAR_SHIFT_RIGHT \
732 if (h)\
733 {\
734  int to_r;\
735  \
736  to_r = (MAX_NR_KEY(Sh)+1 - rpar + vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - rpar);\
737  \
738  set_parameters (tb, h, 0, to_r, rnver, NULL, -1, -1);\
739 }\
740 else \
741 {\
742  if (rset==RIGHT_SHIFT_FLOW)\
743  set_parameters (tb, h, 0, rpar, rnver, snum012+rset,\
744  -1, tb->rbytes);\
745  else\
746  set_parameters (tb, h, 0, rpar - (tb->rbytes!=-1), rnver, snum012+rset,\
747  -1, -1);\
748 }
749 
750 static void free_buffers_in_tb(struct tree_balance *tb)
751 {
752  int i;
753 
754  pathrelse(tb->tb_path);
755 
756  for (i = 0; i < MAX_HEIGHT; i++) {
757  brelse(tb->L[i]);
758  brelse(tb->R[i]);
759  brelse(tb->FL[i]);
760  brelse(tb->FR[i]);
761  brelse(tb->CFL[i]);
762  brelse(tb->CFR[i]);
763 
764  tb->L[i] = NULL;
765  tb->R[i] = NULL;
766  tb->FL[i] = NULL;
767  tb->FR[i] = NULL;
768  tb->CFL[i] = NULL;
769  tb->CFR[i] = NULL;
770  }
771 }
772 
773 /* Get new buffers for storing new nodes that are created while balancing.
774  * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked;
775  * CARRY_ON - schedule didn't occur while the function worked;
776  * NO_DISK_SPACE - no disk space.
777  */
778 /* The function is NOT SCHEDULE-SAFE! */
779 static int get_empty_nodes(struct tree_balance *tb, int h)
780 {
781  struct buffer_head *new_bh,
782  *Sh = PATH_H_PBUFFER(tb->tb_path, h);
783  b_blocknr_t *blocknr, blocknrs[MAX_AMOUNT_NEEDED] = { 0, };
784  int counter, number_of_freeblk, amount_needed, /* number of needed empty blocks */
785  retval = CARRY_ON;
786  struct super_block *sb = tb->tb_sb;
787 
788  /* number_of_freeblk is the number of empty blocks which have been
789  acquired for use by the balancing algorithm minus the number of
790  empty blocks used in the previous levels of the analysis,
791  number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs
792  after empty blocks are acquired, and the balancing analysis is
793  then restarted, amount_needed is the number needed by this level
794  (h) of the balancing analysis.
795 
796  Note that for systems with many processes writing, it would be
797  more layout optimal to calculate the total number needed by all
798  levels and then to run reiserfs_new_blocks to get all of them at once. */
799 
800  /* Initiate number_of_freeblk to the amount acquired prior to the restart of
801  the analysis or 0 if not restarted, then subtract the amount needed
802  by all of the levels of the tree below h. */
803  /* blknum includes S[h], so we subtract 1 in this calculation */
804  for (counter = 0, number_of_freeblk = tb->cur_blknum;
805  counter < h; counter++)
806  number_of_freeblk -=
807  (tb->blknum[counter]) ? (tb->blknum[counter] -
808  1) : 0;
809 
810  /* Allocate missing empty blocks. */
811  /* if Sh == 0 then we are getting a new root */
812  amount_needed = (Sh) ? (tb->blknum[h] - 1) : 1;
813  /* Amount_needed = the amount that we need more than the amount that we have. */
814  if (amount_needed > number_of_freeblk)
815  amount_needed -= number_of_freeblk;
816  else /* If we have enough already then there is nothing to do. */
817  return CARRY_ON;
818 
819  /* No need to check quota - is not allocated for blocks used for formatted nodes */
820  if (reiserfs_new_form_blocknrs(tb, blocknrs,
821  amount_needed) == NO_DISK_SPACE)
822  return NO_DISK_SPACE;
823 
824  /* for each blocknumber we just got, get a buffer and stick it on FEB */
825  for (blocknr = blocknrs, counter = 0;
826  counter < amount_needed; blocknr++, counter++) {
827 
828  RFALSE(!*blocknr,
829  "PAP-8135: reiserfs_new_blocknrs failed when got new blocks");
830 
831  new_bh = sb_getblk(sb, *blocknr);
832  RFALSE(buffer_dirty(new_bh) ||
833  buffer_journaled(new_bh) ||
834  buffer_journal_dirty(new_bh),
835  "PAP-8140: journaled or dirty buffer %b for the new block",
836  new_bh);
837 
838  /* Put empty buffers into the array. */
839  RFALSE(tb->FEB[tb->cur_blknum],
840  "PAP-8141: busy slot for new buffer");
841 
842  set_buffer_journal_new(new_bh);
843  tb->FEB[tb->cur_blknum++] = new_bh;
844  }
845 
846  if (retval == CARRY_ON && FILESYSTEM_CHANGED_TB(tb))
847  retval = REPEAT_SEARCH;
848 
849  return retval;
850 }
851 
852 /* Get free space of the left neighbor, which is stored in the parent
853  * node of the left neighbor. */
854 static int get_lfree(struct tree_balance *tb, int h)
855 {
856  struct buffer_head *l, *f;
857  int order;
858 
859  if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL ||
860  (l = tb->FL[h]) == NULL)
861  return 0;
862 
863  if (f == l)
864  order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) - 1;
865  else {
866  order = B_NR_ITEMS(l);
867  f = l;
868  }
869 
870  return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
871 }
872 
873 /* Get free space of the right neighbor,
874  * which is stored in the parent node of the right neighbor.
875  */
876 static int get_rfree(struct tree_balance *tb, int h)
877 {
878  struct buffer_head *r, *f;
879  int order;
880 
881  if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL ||
882  (r = tb->FR[h]) == NULL)
883  return 0;
884 
885  if (f == r)
886  order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) + 1;
887  else {
888  order = 0;
889  f = r;
890  }
891 
892  return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
893 
894 }
895 
896 /* Check whether left neighbor is in memory. */
897 static int is_left_neighbor_in_cache(struct tree_balance *tb, int h)
898 {
899  struct buffer_head *father, *left;
900  struct super_block *sb = tb->tb_sb;
901  b_blocknr_t left_neighbor_blocknr;
902  int left_neighbor_position;
903 
904  /* Father of the left neighbor does not exist. */
905  if (!tb->FL[h])
906  return 0;
907 
908  /* Calculate father of the node to be balanced. */
909  father = PATH_H_PBUFFER(tb->tb_path, h + 1);
910 
911  RFALSE(!father ||
912  !B_IS_IN_TREE(father) ||
913  !B_IS_IN_TREE(tb->FL[h]) ||
914  !buffer_uptodate(father) ||
915  !buffer_uptodate(tb->FL[h]),
916  "vs-8165: F[h] (%b) or FL[h] (%b) is invalid",
917  father, tb->FL[h]);
918 
919  /* Get position of the pointer to the left neighbor into the left father. */
920  left_neighbor_position = (father == tb->FL[h]) ?
921  tb->lkey[h] : B_NR_ITEMS(tb->FL[h]);
922  /* Get left neighbor block number. */
923  left_neighbor_blocknr =
924  B_N_CHILD_NUM(tb->FL[h], left_neighbor_position);
925  /* Look for the left neighbor in the cache. */
926  if ((left = sb_find_get_block(sb, left_neighbor_blocknr))) {
927 
928  RFALSE(buffer_uptodate(left) && !B_IS_IN_TREE(left),
929  "vs-8170: left neighbor (%b %z) is not in the tree",
930  left, left);
931  put_bh(left);
932  return 1;
933  }
934 
935  return 0;
936 }
937 
938 #define LEFT_PARENTS 'l'
939 #define RIGHT_PARENTS 'r'
940 
941 static void decrement_key(struct cpu_key *key)
942 {
943  // call item specific function for this key
944  item_ops[cpu_key_k_type(key)]->decrement_key(key);
945 }
946 
947 /* Calculate far left/right parent of the left/right neighbor of the current node, that
948  * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h].
949  * Calculate left/right common parent of the current node and L[h]/R[h].
950  * Calculate left/right delimiting key position.
951  * Returns: PATH_INCORRECT - path in the tree is not correct;
952  SCHEDULE_OCCURRED - schedule occurred while the function worked;
953  * CARRY_ON - schedule didn't occur while the function worked;
954  */
955 static int get_far_parent(struct tree_balance *tb,
956  int h,
957  struct buffer_head **pfather,
958  struct buffer_head **pcom_father, char c_lr_par)
959 {
960  struct buffer_head *parent;
961  INITIALIZE_PATH(s_path_to_neighbor_father);
962  struct treepath *path = tb->tb_path;
963  struct cpu_key s_lr_father_key;
964  int counter,
965  position = INT_MAX,
966  first_last_position = 0,
967  path_offset = PATH_H_PATH_OFFSET(path, h);
968 
969  /* Starting from F[h] go upwards in the tree, and look for the common
970  ancestor of F[h], and its neighbor l/r, that should be obtained. */
971 
972  counter = path_offset;
973 
975  "PAP-8180: invalid path length");
976 
977  for (; counter > FIRST_PATH_ELEMENT_OFFSET; counter--) {
978  /* Check whether parent of the current buffer in the path is really parent in the tree. */
979  if (!B_IS_IN_TREE
980  (parent = PATH_OFFSET_PBUFFER(path, counter - 1)))
981  return REPEAT_SEARCH;
982  /* Check whether position in the parent is correct. */
983  if ((position =
985  counter - 1)) >
986  B_NR_ITEMS(parent))
987  return REPEAT_SEARCH;
988  /* Check whether parent at the path really points to the child. */
989  if (B_N_CHILD_NUM(parent, position) !=
990  PATH_OFFSET_PBUFFER(path, counter)->b_blocknr)
991  return REPEAT_SEARCH;
992  /* Return delimiting key if position in the parent is not equal to first/last one. */
993  if (c_lr_par == RIGHT_PARENTS)
994  first_last_position = B_NR_ITEMS(parent);
995  if (position != first_last_position) {
996  *pcom_father = parent;
997  get_bh(*pcom_father);
998  /*(*pcom_father = parent)->b_count++; */
999  break;
1000  }
1001  }
1002 
1003  /* if we are in the root of the tree, then there is no common father */
1004  if (counter == FIRST_PATH_ELEMENT_OFFSET) {
1005  /* Check whether first buffer in the path is the root of the tree. */
1007  (tb->tb_path,
1008  FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
1009  SB_ROOT_BLOCK(tb->tb_sb)) {
1010  *pfather = *pcom_father = NULL;
1011  return CARRY_ON;
1012  }
1013  return REPEAT_SEARCH;
1014  }
1015 
1016  RFALSE(B_LEVEL(*pcom_father) <= DISK_LEAF_NODE_LEVEL,
1017  "PAP-8185: (%b %z) level too small",
1018  *pcom_father, *pcom_father);
1019 
1020  /* Check whether the common parent is locked. */
1021 
1022  if (buffer_locked(*pcom_father)) {
1023 
1024  /* Release the write lock while the buffer is busy */
1026  __wait_on_buffer(*pcom_father);
1028  if (FILESYSTEM_CHANGED_TB(tb)) {
1029  brelse(*pcom_father);
1030  return REPEAT_SEARCH;
1031  }
1032  }
1033 
1034  /* So, we got common parent of the current node and its left/right neighbor.
1035  Now we are geting the parent of the left/right neighbor. */
1036 
1037  /* Form key to get parent of the left/right neighbor. */
1038  le_key2cpu_key(&s_lr_father_key,
1039  B_N_PDELIM_KEY(*pcom_father,
1040  (c_lr_par ==
1041  LEFT_PARENTS) ? (tb->lkey[h - 1] =
1042  position -
1043  1) : (tb->rkey[h -
1044  1] =
1045  position)));
1046 
1047  if (c_lr_par == LEFT_PARENTS)
1048  decrement_key(&s_lr_father_key);
1049 
1050  if (search_by_key
1051  (tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father,
1052  h + 1) == IO_ERROR)
1053  // path is released
1054  return IO_ERROR;
1055 
1056  if (FILESYSTEM_CHANGED_TB(tb)) {
1057  pathrelse(&s_path_to_neighbor_father);
1058  brelse(*pcom_father);
1059  return REPEAT_SEARCH;
1060  }
1061 
1062  *pfather = PATH_PLAST_BUFFER(&s_path_to_neighbor_father);
1063 
1064  RFALSE(B_LEVEL(*pfather) != h + 1,
1065  "PAP-8190: (%b %z) level too small", *pfather, *pfather);
1066  RFALSE(s_path_to_neighbor_father.path_length <
1067  FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small");
1068 
1069  s_path_to_neighbor_father.path_length--;
1070  pathrelse(&s_path_to_neighbor_father);
1071  return CARRY_ON;
1072 }
1073 
1074 /* Get parents of neighbors of node in the path(S[path_offset]) and common parents of
1075  * S[path_offset] and L[path_offset]/R[path_offset]: F[path_offset], FL[path_offset],
1076  * FR[path_offset], CFL[path_offset], CFR[path_offset].
1077  * Calculate numbers of left and right delimiting keys position: lkey[path_offset], rkey[path_offset].
1078  * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked;
1079  * CARRY_ON - schedule didn't occur while the function worked;
1080  */
1081 static int get_parents(struct tree_balance *tb, int h)
1082 {
1083  struct treepath *path = tb->tb_path;
1084  int position,
1085  ret,
1086  path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h);
1087  struct buffer_head *curf, *curcf;
1088 
1089  /* Current node is the root of the tree or will be root of the tree */
1090  if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
1091  /* The root can not have parents.
1092  Release nodes which previously were obtained as parents of the current node neighbors. */
1093  brelse(tb->FL[h]);
1094  brelse(tb->CFL[h]);
1095  brelse(tb->FR[h]);
1096  brelse(tb->CFR[h]);
1097  tb->FL[h] = NULL;
1098  tb->CFL[h] = NULL;
1099  tb->FR[h] = NULL;
1100  tb->CFR[h] = NULL;
1101  return CARRY_ON;
1102  }
1103 
1104  /* Get parent FL[path_offset] of L[path_offset]. */
1105  position = PATH_OFFSET_POSITION(path, path_offset - 1);
1106  if (position) {
1107  /* Current node is not the first child of its parent. */
1108  curf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
1109  curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
1110  get_bh(curf);
1111  get_bh(curf);
1112  tb->lkey[h] = position - 1;
1113  } else {
1114  /* Calculate current parent of L[path_offset], which is the left neighbor of the current node.
1115  Calculate current common parent of L[path_offset] and the current node. Note that
1116  CFL[path_offset] not equal FL[path_offset] and CFL[path_offset] not equal F[path_offset].
1117  Calculate lkey[path_offset]. */
1118  if ((ret = get_far_parent(tb, h + 1, &curf,
1119  &curcf,
1120  LEFT_PARENTS)) != CARRY_ON)
1121  return ret;
1122  }
1123 
1124  brelse(tb->FL[h]);
1125  tb->FL[h] = curf; /* New initialization of FL[h]. */
1126  brelse(tb->CFL[h]);
1127  tb->CFL[h] = curcf; /* New initialization of CFL[h]. */
1128 
1129  RFALSE((curf && !B_IS_IN_TREE(curf)) ||
1130  (curcf && !B_IS_IN_TREE(curcf)),
1131  "PAP-8195: FL (%b) or CFL (%b) is invalid", curf, curcf);
1132 
1133 /* Get parent FR[h] of R[h]. */
1134 
1135 /* Current node is the last child of F[h]. FR[h] != F[h]. */
1136  if (position == B_NR_ITEMS(PATH_H_PBUFFER(path, h + 1))) {
1137 /* Calculate current parent of R[h], which is the right neighbor of F[h].
1138  Calculate current common parent of R[h] and current node. Note that CFR[h]
1139  not equal FR[path_offset] and CFR[h] not equal F[h]. */
1140  if ((ret =
1141  get_far_parent(tb, h + 1, &curf, &curcf,
1142  RIGHT_PARENTS)) != CARRY_ON)
1143  return ret;
1144  } else {
1145 /* Current node is not the last child of its parent F[h]. */
1146  curf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
1147  curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
1148  get_bh(curf);
1149  get_bh(curf);
1150  tb->rkey[h] = position;
1151  }
1152 
1153  brelse(tb->FR[h]);
1154  /* New initialization of FR[path_offset]. */
1155  tb->FR[h] = curf;
1156 
1157  brelse(tb->CFR[h]);
1158  /* New initialization of CFR[path_offset]. */
1159  tb->CFR[h] = curcf;
1160 
1161  RFALSE((curf && !B_IS_IN_TREE(curf)) ||
1162  (curcf && !B_IS_IN_TREE(curcf)),
1163  "PAP-8205: FR (%b) or CFR (%b) is invalid", curf, curcf);
1164 
1165  return CARRY_ON;
1166 }
1167 
1168 /* it is possible to remove node as result of shiftings to
1169  neighbors even when we insert or paste item. */
1170 static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree,
1171  struct tree_balance *tb, int h)
1172 {
1173  struct buffer_head *Sh = PATH_H_PBUFFER(tb->tb_path, h);
1174  int levbytes = tb->insert_size[h];
1175  struct item_head *ih;
1176  struct reiserfs_key *r_key = NULL;
1177 
1178  ih = B_N_PITEM_HEAD(Sh, 0);
1179  if (tb->CFR[h])
1180  r_key = B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]);
1181 
1182  if (lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes
1183  /* shifting may merge items which might save space */
1184  -
1185  ((!h
1186  && op_is_left_mergeable(&(ih->ih_key), Sh->b_size)) ? IH_SIZE : 0)
1187  -
1188  ((!h && r_key
1189  && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0)
1190  + ((h) ? KEY_SIZE : 0)) {
1191  /* node can not be removed */
1192  if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */
1193  if (!h)
1194  tb->s0num =
1195  B_NR_ITEMS(Sh) +
1196  ((mode == M_INSERT) ? 1 : 0);
1197  set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1198  return NO_BALANCING_NEEDED;
1199  }
1200  }
1201  PROC_INFO_INC(tb->tb_sb, can_node_be_removed[h]);
1202  return !NO_BALANCING_NEEDED;
1203 }
1204 
1205 /* Check whether current node S[h] is balanced when increasing its size by
1206  * Inserting or Pasting.
1207  * Calculate parameters for balancing for current level h.
1208  * Parameters:
1209  * tb tree_balance structure;
1210  * h current level of the node;
1211  * inum item number in S[h];
1212  * mode i - insert, p - paste;
1213  * Returns: 1 - schedule occurred;
1214  * 0 - balancing for higher levels needed;
1215  * -1 - no balancing for higher levels needed;
1216  * -2 - no disk space.
1217  */
1218 /* ip means Inserting or Pasting */
1219 static int ip_check_balance(struct tree_balance *tb, int h)
1220 {
1221  struct virtual_node *vn = tb->tb_vn;
1222  int levbytes, /* Number of bytes that must be inserted into (value
1223  is negative if bytes are deleted) buffer which
1224  contains node being balanced. The mnemonic is
1225  that the attempted change in node space used level
1226  is levbytes bytes. */
1227  ret;
1228 
1229  int lfree, sfree, rfree /* free space in L, S and R */ ;
1230 
1231  /* nver is short for number of vertixes, and lnver is the number if
1232  we shift to the left, rnver is the number if we shift to the
1233  right, and lrnver is the number if we shift in both directions.
1234  The goal is to minimize first the number of vertixes, and second,
1235  the number of vertixes whose contents are changed by shifting,
1236  and third the number of uncached vertixes whose contents are
1237  changed by shifting and must be read from disk. */
1238  int nver, lnver, rnver, lrnver;
1239 
1240  /* used at leaf level only, S0 = S[0] is the node being balanced,
1241  sInum [ I = 0,1,2 ] is the number of items that will
1242  remain in node SI after balancing. S1 and S2 are new
1243  nodes that might be created. */
1244 
1245  /* we perform 8 calls to get_num_ver(). For each call we calculate five parameters.
1246  where 4th parameter is s1bytes and 5th - s2bytes
1247  */
1248  short snum012[40] = { 0, }; /* s0num, s1num, s2num for 8 cases
1249  0,1 - do not shift and do not shift but bottle
1250  2 - shift only whole item to left
1251  3 - shift to left and bottle as much as possible
1252  4,5 - shift to right (whole items and as much as possible
1253  6,7 - shift to both directions (whole items and as much as possible)
1254  */
1255 
1256  /* Sh is the node whose balance is currently being checked */
1257  struct buffer_head *Sh;
1258 
1259  Sh = PATH_H_PBUFFER(tb->tb_path, h);
1260  levbytes = tb->insert_size[h];
1261 
1262  /* Calculate balance parameters for creating new root. */
1263  if (!Sh) {
1264  if (!h)
1265  reiserfs_panic(tb->tb_sb, "vs-8210",
1266  "S[0] can not be 0");
1267  switch (ret = get_empty_nodes(tb, h)) {
1268  case CARRY_ON:
1269  set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1270  return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */
1271 
1272  case NO_DISK_SPACE:
1273  case REPEAT_SEARCH:
1274  return ret;
1275  default:
1276  reiserfs_panic(tb->tb_sb, "vs-8215", "incorrect "
1277  "return value of get_empty_nodes");
1278  }
1279  }
1280 
1281  if ((ret = get_parents(tb, h)) != CARRY_ON) /* get parents of S[h] neighbors. */
1282  return ret;
1283 
1284  sfree = B_FREE_SPACE(Sh);
1285 
1286  /* get free space of neighbors */
1287  rfree = get_rfree(tb, h);
1288  lfree = get_lfree(tb, h);
1289 
1290  if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) ==
1292  /* and new item fits into node S[h] without any shifting */
1293  return NO_BALANCING_NEEDED;
1294 
1295  create_virtual_node(tb, h);
1296 
1297  /*
1298  determine maximal number of items we can shift to the left neighbor (in tb structure)
1299  and the maximal number of bytes that can flow to the left neighbor
1300  from the left most liquid item that cannot be shifted from S[0] entirely (returned value)
1301  */
1302  check_left(tb, h, lfree);
1303 
1304  /*
1305  determine maximal number of items we can shift to the right neighbor (in tb structure)
1306  and the maximal number of bytes that can flow to the right neighbor
1307  from the right most liquid item that cannot be shifted from S[0] entirely (returned value)
1308  */
1309  check_right(tb, h, rfree);
1310 
1311  /* all contents of internal node S[h] can be moved into its
1312  neighbors, S[h] will be removed after balancing */
1313  if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) {
1314  int to_r;
1315 
1316  /* Since we are working on internal nodes, and our internal
1317  nodes have fixed size entries, then we can balance by the
1318  number of items rather than the space they consume. In this
1319  routine we set the left node equal to the right node,
1320  allowing a difference of less than or equal to 1 child
1321  pointer. */
1322  to_r =
1323  ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
1324  vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
1325  tb->rnum[h]);
1326  set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL,
1327  -1, -1);
1328  return CARRY_ON;
1329  }
1330 
1331  /* this checks balance condition, that any two neighboring nodes can not fit in one node */
1332  RFALSE(h &&
1333  (tb->lnum[h] >= vn->vn_nr_item + 1 ||
1334  tb->rnum[h] >= vn->vn_nr_item + 1),
1335  "vs-8220: tree is not balanced on internal level");
1336  RFALSE(!h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) ||
1337  (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))),
1338  "vs-8225: tree is not balanced on leaf level");
1339 
1340  /* all contents of S[0] can be moved into its neighbors
1341  S[0] will be removed after balancing. */
1342  if (!h && is_leaf_removable(tb))
1343  return CARRY_ON;
1344 
1345  /* why do we perform this check here rather than earlier??
1346  Answer: we can win 1 node in some cases above. Moreover we
1347  checked it above, when we checked, that S[0] is not removable
1348  in principle */
1349  if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */
1350  if (!h)
1351  tb->s0num = vn->vn_nr_item;
1352  set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1353  return NO_BALANCING_NEEDED;
1354  }
1355 
1356  {
1357  int lpar, rpar, nset, lset, rset, lrset;
1358  /*
1359  * regular overflowing of the node
1360  */
1361 
1362  /* get_num_ver works in 2 modes (FLOW & NO_FLOW)
1363  lpar, rpar - number of items we can shift to left/right neighbor (including splitting item)
1364  nset, lset, rset, lrset - shows, whether flowing items give better packing
1365  */
1366 #define FLOW 1
1367 #define NO_FLOW 0 /* do not any splitting */
1368 
1369  /* we choose one the following */
1370 #define NOTHING_SHIFT_NO_FLOW 0
1371 #define NOTHING_SHIFT_FLOW 5
1372 #define LEFT_SHIFT_NO_FLOW 10
1373 #define LEFT_SHIFT_FLOW 15
1374 #define RIGHT_SHIFT_NO_FLOW 20
1375 #define RIGHT_SHIFT_FLOW 25
1376 #define LR_SHIFT_NO_FLOW 30
1377 #define LR_SHIFT_FLOW 35
1378 
1379  lpar = tb->lnum[h];
1380  rpar = tb->rnum[h];
1381 
1382  /* calculate number of blocks S[h] must be split into when
1383  nothing is shifted to the neighbors,
1384  as well as number of items in each part of the split node (s012 numbers),
1385  and number of bytes (s1bytes) of the shared drop which flow to S1 if any */
1386  nset = NOTHING_SHIFT_NO_FLOW;
1387  nver = get_num_ver(vn->vn_mode, tb, h,
1388  0, -1, h ? vn->vn_nr_item : 0, -1,
1389  snum012, NO_FLOW);
1390 
1391  if (!h) {
1392  int nver1;
1393 
1394  /* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */
1395  nver1 = get_num_ver(vn->vn_mode, tb, h,
1396  0, -1, 0, -1,
1397  snum012 + NOTHING_SHIFT_FLOW, FLOW);
1398  if (nver > nver1)
1399  nset = NOTHING_SHIFT_FLOW, nver = nver1;
1400  }
1401 
1402  /* calculate number of blocks S[h] must be split into when
1403  l_shift_num first items and l_shift_bytes of the right most
1404  liquid item to be shifted are shifted to the left neighbor,
1405  as well as number of items in each part of the splitted node (s012 numbers),
1406  and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1407  */
1408  lset = LEFT_SHIFT_NO_FLOW;
1409  lnver = get_num_ver(vn->vn_mode, tb, h,
1410  lpar - ((h || tb->lbytes == -1) ? 0 : 1),
1411  -1, h ? vn->vn_nr_item : 0, -1,
1412  snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW);
1413  if (!h) {
1414  int lnver1;
1415 
1416  lnver1 = get_num_ver(vn->vn_mode, tb, h,
1417  lpar -
1418  ((tb->lbytes != -1) ? 1 : 0),
1419  tb->lbytes, 0, -1,
1420  snum012 + LEFT_SHIFT_FLOW, FLOW);
1421  if (lnver > lnver1)
1422  lset = LEFT_SHIFT_FLOW, lnver = lnver1;
1423  }
1424 
1425  /* calculate number of blocks S[h] must be split into when
1426  r_shift_num first items and r_shift_bytes of the left most
1427  liquid item to be shifted are shifted to the right neighbor,
1428  as well as number of items in each part of the splitted node (s012 numbers),
1429  and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1430  */
1431  rset = RIGHT_SHIFT_NO_FLOW;
1432  rnver = get_num_ver(vn->vn_mode, tb, h,
1433  0, -1,
1434  h ? (vn->vn_nr_item - rpar) : (rpar -
1435  ((tb->
1436  rbytes !=
1437  -1) ? 1 :
1438  0)), -1,
1439  snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW);
1440  if (!h) {
1441  int rnver1;
1442 
1443  rnver1 = get_num_ver(vn->vn_mode, tb, h,
1444  0, -1,
1445  (rpar -
1446  ((tb->rbytes != -1) ? 1 : 0)),
1447  tb->rbytes,
1448  snum012 + RIGHT_SHIFT_FLOW, FLOW);
1449 
1450  if (rnver > rnver1)
1451  rset = RIGHT_SHIFT_FLOW, rnver = rnver1;
1452  }
1453 
1454  /* calculate number of blocks S[h] must be split into when
1455  items are shifted in both directions,
1456  as well as number of items in each part of the splitted node (s012 numbers),
1457  and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1458  */
1459  lrset = LR_SHIFT_NO_FLOW;
1460  lrnver = get_num_ver(vn->vn_mode, tb, h,
1461  lpar - ((h || tb->lbytes == -1) ? 0 : 1),
1462  -1,
1463  h ? (vn->vn_nr_item - rpar) : (rpar -
1464  ((tb->
1465  rbytes !=
1466  -1) ? 1 :
1467  0)), -1,
1468  snum012 + LR_SHIFT_NO_FLOW, NO_FLOW);
1469  if (!h) {
1470  int lrnver1;
1471 
1472  lrnver1 = get_num_ver(vn->vn_mode, tb, h,
1473  lpar -
1474  ((tb->lbytes != -1) ? 1 : 0),
1475  tb->lbytes,
1476  (rpar -
1477  ((tb->rbytes != -1) ? 1 : 0)),
1478  tb->rbytes,
1479  snum012 + LR_SHIFT_FLOW, FLOW);
1480  if (lrnver > lrnver1)
1481  lrset = LR_SHIFT_FLOW, lrnver = lrnver1;
1482  }
1483 
1484  /* Our general shifting strategy is:
1485  1) to minimized number of new nodes;
1486  2) to minimized number of neighbors involved in shifting;
1487  3) to minimized number of disk reads; */
1488 
1489  /* we can win TWO or ONE nodes by shifting in both directions */
1490  if (lrnver < lnver && lrnver < rnver) {
1491  RFALSE(h &&
1492  (tb->lnum[h] != 1 ||
1493  tb->rnum[h] != 1 ||
1494  lrnver != 1 || rnver != 2 || lnver != 2
1495  || h != 1), "vs-8230: bad h");
1496  if (lrset == LR_SHIFT_FLOW)
1497  set_parameters(tb, h, tb->lnum[h], tb->rnum[h],
1498  lrnver, snum012 + lrset,
1499  tb->lbytes, tb->rbytes);
1500  else
1501  set_parameters(tb, h,
1502  tb->lnum[h] -
1503  ((tb->lbytes == -1) ? 0 : 1),
1504  tb->rnum[h] -
1505  ((tb->rbytes == -1) ? 0 : 1),
1506  lrnver, snum012 + lrset, -1, -1);
1507 
1508  return CARRY_ON;
1509  }
1510 
1511  /* if shifting doesn't lead to better packing then don't shift */
1512  if (nver == lrnver) {
1513  set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1,
1514  -1);
1515  return CARRY_ON;
1516  }
1517 
1518  /* now we know that for better packing shifting in only one
1519  direction either to the left or to the right is required */
1520 
1521  /* if shifting to the left is better than shifting to the right */
1522  if (lnver < rnver) {
1524  return CARRY_ON;
1525  }
1526 
1527  /* if shifting to the right is better than shifting to the left */
1528  if (lnver > rnver) {
1530  return CARRY_ON;
1531  }
1532 
1533  /* now shifting in either direction gives the same number
1534  of nodes and we can make use of the cached neighbors */
1535  if (is_left_neighbor_in_cache(tb, h)) {
1537  return CARRY_ON;
1538  }
1539 
1540  /* shift to the right independently on whether the right neighbor in cache or not */
1542  return CARRY_ON;
1543  }
1544 }
1545 
1546 /* Check whether current node S[h] is balanced when Decreasing its size by
1547  * Deleting or Cutting for INTERNAL node of S+tree.
1548  * Calculate parameters for balancing for current level h.
1549  * Parameters:
1550  * tb tree_balance structure;
1551  * h current level of the node;
1552  * inum item number in S[h];
1553  * mode i - insert, p - paste;
1554  * Returns: 1 - schedule occurred;
1555  * 0 - balancing for higher levels needed;
1556  * -1 - no balancing for higher levels needed;
1557  * -2 - no disk space.
1558  *
1559  * Note: Items of internal nodes have fixed size, so the balance condition for
1560  * the internal part of S+tree is as for the B-trees.
1561  */
1562 static int dc_check_balance_internal(struct tree_balance *tb, int h)
1563 {
1564  struct virtual_node *vn = tb->tb_vn;
1565 
1566  /* Sh is the node whose balance is currently being checked,
1567  and Fh is its father. */
1568  struct buffer_head *Sh, *Fh;
1569  int maxsize, ret;
1570  int lfree, rfree /* free space in L and R */ ;
1571 
1572  Sh = PATH_H_PBUFFER(tb->tb_path, h);
1573  Fh = PATH_H_PPARENT(tb->tb_path, h);
1574 
1575  maxsize = MAX_CHILD_SIZE(Sh);
1576 
1577 /* using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */
1578 /* new_nr_item = number of items node would have if operation is */
1579 /* performed without balancing (new_nr_item); */
1580  create_virtual_node(tb, h);
1581 
1582  if (!Fh) { /* S[h] is the root. */
1583  if (vn->vn_nr_item > 0) {
1584  set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1585  return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */
1586  }
1587  /* new_nr_item == 0.
1588  * Current root will be deleted resulting in
1589  * decrementing the tree height. */
1590  set_parameters(tb, h, 0, 0, 0, NULL, -1, -1);
1591  return CARRY_ON;
1592  }
1593 
1594  if ((ret = get_parents(tb, h)) != CARRY_ON)
1595  return ret;
1596 
1597  /* get free space of neighbors */
1598  rfree = get_rfree(tb, h);
1599  lfree = get_lfree(tb, h);
1600 
1601  /* determine maximal number of items we can fit into neighbors */
1602  check_left(tb, h, lfree);
1603  check_right(tb, h, rfree);
1604 
1605  if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) { /* Balance condition for the internal node is valid.
1606  * In this case we balance only if it leads to better packing. */
1607  if (vn->vn_nr_item == MIN_NR_KEY(Sh)) { /* Here we join S[h] with one of its neighbors,
1608  * which is impossible with greater values of new_nr_item. */
1609  if (tb->lnum[h] >= vn->vn_nr_item + 1) {
1610  /* All contents of S[h] can be moved to L[h]. */
1611  int n;
1612  int order_L;
1613 
1614  order_L =
1615  ((n =
1617  h)) ==
1618  0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1619  n = dc_size(B_N_CHILD(tb->FL[h], order_L)) /
1620  (DC_SIZE + KEY_SIZE);
1621  set_parameters(tb, h, -n - 1, 0, 0, NULL, -1,
1622  -1);
1623  return CARRY_ON;
1624  }
1625 
1626  if (tb->rnum[h] >= vn->vn_nr_item + 1) {
1627  /* All contents of S[h] can be moved to R[h]. */
1628  int n;
1629  int order_R;
1630 
1631  order_R =
1632  ((n =
1634  h)) ==
1635  B_NR_ITEMS(Fh)) ? 0 : n + 1;
1636  n = dc_size(B_N_CHILD(tb->FR[h], order_R)) /
1637  (DC_SIZE + KEY_SIZE);
1638  set_parameters(tb, h, 0, -n - 1, 0, NULL, -1,
1639  -1);
1640  return CARRY_ON;
1641  }
1642  }
1643 
1644  if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
1645  /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1646  int to_r;
1647 
1648  to_r =
1649  ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] -
1650  tb->rnum[h] + vn->vn_nr_item + 1) / 2 -
1651  (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
1652  set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r,
1653  0, NULL, -1, -1);
1654  return CARRY_ON;
1655  }
1656 
1657  /* Balancing does not lead to better packing. */
1658  set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1659  return NO_BALANCING_NEEDED;
1660  }
1661 
1662  /* Current node contain insufficient number of items. Balancing is required. */
1663  /* Check whether we can merge S[h] with left neighbor. */
1664  if (tb->lnum[h] >= vn->vn_nr_item + 1)
1665  if (is_left_neighbor_in_cache(tb, h)
1666  || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h]) {
1667  int n;
1668  int order_L;
1669 
1670  order_L =
1671  ((n =
1673  h)) ==
1674  0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1675  n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / (DC_SIZE +
1676  KEY_SIZE);
1677  set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, -1);
1678  return CARRY_ON;
1679  }
1680 
1681  /* Check whether we can merge S[h] with right neighbor. */
1682  if (tb->rnum[h] >= vn->vn_nr_item + 1) {
1683  int n;
1684  int order_R;
1685 
1686  order_R =
1687  ((n =
1689  h)) == B_NR_ITEMS(Fh)) ? 0 : (n + 1);
1690  n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / (DC_SIZE +
1691  KEY_SIZE);
1692  set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, -1);
1693  return CARRY_ON;
1694  }
1695 
1696  /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1697  if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
1698  int to_r;
1699 
1700  to_r =
1701  ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
1702  vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
1703  tb->rnum[h]);
1704  set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL,
1705  -1, -1);
1706  return CARRY_ON;
1707  }
1708 
1709  /* For internal nodes try to borrow item from a neighbor */
1710  RFALSE(!tb->FL[h] && !tb->FR[h], "vs-8235: trying to borrow for root");
1711 
1712  /* Borrow one or two items from caching neighbor */
1713  if (is_left_neighbor_in_cache(tb, h) || !tb->FR[h]) {
1714  int from_l;
1715 
1716  from_l =
1717  (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item +
1718  1) / 2 - (vn->vn_nr_item + 1);
1719  set_parameters(tb, h, -from_l, 0, 1, NULL, -1, -1);
1720  return CARRY_ON;
1721  }
1722 
1723  set_parameters(tb, h, 0,
1724  -((MAX_NR_KEY(Sh) + 1 - tb->rnum[h] + vn->vn_nr_item +
1725  1) / 2 - (vn->vn_nr_item + 1)), 1, NULL, -1, -1);
1726  return CARRY_ON;
1727 }
1728 
1729 /* Check whether current node S[h] is balanced when Decreasing its size by
1730  * Deleting or Truncating for LEAF node of S+tree.
1731  * Calculate parameters for balancing for current level h.
1732  * Parameters:
1733  * tb tree_balance structure;
1734  * h current level of the node;
1735  * inum item number in S[h];
1736  * mode i - insert, p - paste;
1737  * Returns: 1 - schedule occurred;
1738  * 0 - balancing for higher levels needed;
1739  * -1 - no balancing for higher levels needed;
1740  * -2 - no disk space.
1741  */
1742 static int dc_check_balance_leaf(struct tree_balance *tb, int h)
1743 {
1744  struct virtual_node *vn = tb->tb_vn;
1745 
1746  /* Number of bytes that must be deleted from
1747  (value is negative if bytes are deleted) buffer which
1748  contains node being balanced. The mnemonic is that the
1749  attempted change in node space used level is levbytes bytes. */
1750  int levbytes;
1751  /* the maximal item size */
1752  int maxsize, ret;
1753  /* S0 is the node whose balance is currently being checked,
1754  and F0 is its father. */
1755  struct buffer_head *S0, *F0;
1756  int lfree, rfree /* free space in L and R */ ;
1757 
1758  S0 = PATH_H_PBUFFER(tb->tb_path, 0);
1759  F0 = PATH_H_PPARENT(tb->tb_path, 0);
1760 
1761  levbytes = tb->insert_size[h];
1762 
1763  maxsize = MAX_CHILD_SIZE(S0); /* maximal possible size of an item */
1764 
1765  if (!F0) { /* S[0] is the root now. */
1766 
1767  RFALSE(-levbytes >= maxsize - B_FREE_SPACE(S0),
1768  "vs-8240: attempt to create empty buffer tree");
1769 
1770  set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1771  return NO_BALANCING_NEEDED;
1772  }
1773 
1774  if ((ret = get_parents(tb, h)) != CARRY_ON)
1775  return ret;
1776 
1777  /* get free space of neighbors */
1778  rfree = get_rfree(tb, h);
1779  lfree = get_lfree(tb, h);
1780 
1781  create_virtual_node(tb, h);
1782 
1783  /* if 3 leaves can be merge to one, set parameters and return */
1784  if (are_leaves_removable(tb, lfree, rfree))
1785  return CARRY_ON;
1786 
1787  /* determine maximal number of items we can shift to the left/right neighbor
1788  and the maximal number of bytes that can flow to the left/right neighbor
1789  from the left/right most liquid item that cannot be shifted from S[0] entirely
1790  */
1791  check_left(tb, h, lfree);
1792  check_right(tb, h, rfree);
1793 
1794  /* check whether we can merge S with left neighbor. */
1795  if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1)
1796  if (is_left_neighbor_in_cache(tb, h) || ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) || /* S can not be merged with R */
1797  !tb->FR[h]) {
1798 
1799  RFALSE(!tb->FL[h],
1800  "vs-8245: dc_check_balance_leaf: FL[h] must exist");
1801 
1802  /* set parameter to merge S[0] with its left neighbor */
1803  set_parameters(tb, h, -1, 0, 0, NULL, -1, -1);
1804  return CARRY_ON;
1805  }
1806 
1807  /* check whether we can merge S[0] with right neighbor. */
1808  if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) {
1809  set_parameters(tb, h, 0, -1, 0, NULL, -1, -1);
1810  return CARRY_ON;
1811  }
1812 
1813  /* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */
1814  if (is_leaf_removable(tb))
1815  return CARRY_ON;
1816 
1817  /* Balancing is not required. */
1818  tb->s0num = vn->vn_nr_item;
1819  set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1820  return NO_BALANCING_NEEDED;
1821 }
1822 
1823 /* Check whether current node S[h] is balanced when Decreasing its size by
1824  * Deleting or Cutting.
1825  * Calculate parameters for balancing for current level h.
1826  * Parameters:
1827  * tb tree_balance structure;
1828  * h current level of the node;
1829  * inum item number in S[h];
1830  * mode d - delete, c - cut.
1831  * Returns: 1 - schedule occurred;
1832  * 0 - balancing for higher levels needed;
1833  * -1 - no balancing for higher levels needed;
1834  * -2 - no disk space.
1835  */
1836 static int dc_check_balance(struct tree_balance *tb, int h)
1837 {
1838  RFALSE(!(PATH_H_PBUFFER(tb->tb_path, h)),
1839  "vs-8250: S is not initialized");
1840 
1841  if (h)
1842  return dc_check_balance_internal(tb, h);
1843  else
1844  return dc_check_balance_leaf(tb, h);
1845 }
1846 
1847 /* Check whether current node S[h] is balanced.
1848  * Calculate parameters for balancing for current level h.
1849  * Parameters:
1850  *
1851  * tb tree_balance structure:
1852  *
1853  * tb is a large structure that must be read about in the header file
1854  * at the same time as this procedure if the reader is to successfully
1855  * understand this procedure
1856  *
1857  * h current level of the node;
1858  * inum item number in S[h];
1859  * mode i - insert, p - paste, d - delete, c - cut.
1860  * Returns: 1 - schedule occurred;
1861  * 0 - balancing for higher levels needed;
1862  * -1 - no balancing for higher levels needed;
1863  * -2 - no disk space.
1864  */
1865 static int check_balance(int mode,
1866  struct tree_balance *tb,
1867  int h,
1868  int inum,
1869  int pos_in_item,
1870  struct item_head *ins_ih, const void *data)
1871 {
1872  struct virtual_node *vn;
1873 
1874  vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf);
1875  vn->vn_free_ptr = (char *)(tb->tb_vn + 1);
1876  vn->vn_mode = mode;
1877  vn->vn_affected_item_num = inum;
1879  vn->vn_ins_ih = ins_ih;
1880  vn->vn_data = data;
1881 
1882  RFALSE(mode == M_INSERT && !vn->vn_ins_ih,
1883  "vs-8255: ins_ih can not be 0 in insert mode");
1884 
1885  if (tb->insert_size[h] > 0)
1886  /* Calculate balance parameters when size of node is increasing. */
1887  return ip_check_balance(tb, h);
1888 
1889  /* Calculate balance parameters when size of node is decreasing. */
1890  return dc_check_balance(tb, h);
1891 }
1892 
1893 /* Check whether parent at the path is the really parent of the current node.*/
1894 static int get_direct_parent(struct tree_balance *tb, int h)
1895 {
1896  struct buffer_head *bh;
1897  struct treepath *path = tb->tb_path;
1898  int position,
1899  path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h);
1900 
1901  /* We are in the root or in the new root. */
1902  if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
1903 
1904  RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET - 1,
1905  "PAP-8260: invalid offset in the path");
1906 
1907  if (PATH_OFFSET_PBUFFER(path, FIRST_PATH_ELEMENT_OFFSET)->
1908  b_blocknr == SB_ROOT_BLOCK(tb->tb_sb)) {
1909  /* Root is not changed. */
1910  PATH_OFFSET_PBUFFER(path, path_offset - 1) = NULL;
1911  PATH_OFFSET_POSITION(path, path_offset - 1) = 0;
1912  return CARRY_ON;
1913  }
1914  return REPEAT_SEARCH; /* Root is changed and we must recalculate the path. */
1915  }
1916 
1917  if (!B_IS_IN_TREE
1918  (bh = PATH_OFFSET_PBUFFER(path, path_offset - 1)))
1919  return REPEAT_SEARCH; /* Parent in the path is not in the tree. */
1920 
1921  if ((position =
1922  PATH_OFFSET_POSITION(path,
1923  path_offset - 1)) > B_NR_ITEMS(bh))
1924  return REPEAT_SEARCH;
1925 
1926  if (B_N_CHILD_NUM(bh, position) !=
1927  PATH_OFFSET_PBUFFER(path, path_offset)->b_blocknr)
1928  /* Parent in the path is not parent of the current node in the tree. */
1929  return REPEAT_SEARCH;
1930 
1931  if (buffer_locked(bh)) {
1933  __wait_on_buffer(bh);
1935  if (FILESYSTEM_CHANGED_TB(tb))
1936  return REPEAT_SEARCH;
1937  }
1938 
1939  return CARRY_ON; /* Parent in the path is unlocked and really parent of the current node. */
1940 }
1941 
1942 /* Using lnum[h] and rnum[h] we should determine what neighbors
1943  * of S[h] we
1944  * need in order to balance S[h], and get them if necessary.
1945  * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked;
1946  * CARRY_ON - schedule didn't occur while the function worked;
1947  */
1948 static int get_neighbors(struct tree_balance *tb, int h)
1949 {
1950  int child_position,
1951  path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h + 1);
1952  unsigned long son_number;
1953  struct super_block *sb = tb->tb_sb;
1954  struct buffer_head *bh;
1955 
1956  PROC_INFO_INC(sb, get_neighbors[h]);
1957 
1958  if (tb->lnum[h]) {
1959  /* We need left neighbor to balance S[h]. */
1960  PROC_INFO_INC(sb, need_l_neighbor[h]);
1961  bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset);
1962 
1963  RFALSE(bh == tb->FL[h] &&
1964  !PATH_OFFSET_POSITION(tb->tb_path, path_offset),
1965  "PAP-8270: invalid position in the parent");
1966 
1967  child_position =
1968  (bh ==
1969  tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb->
1970  FL[h]);
1971  son_number = B_N_CHILD_NUM(tb->FL[h], child_position);
1973  bh = sb_bread(sb, son_number);
1974  reiserfs_write_lock(sb);
1975  if (!bh)
1976  return IO_ERROR;
1977  if (FILESYSTEM_CHANGED_TB(tb)) {
1978  brelse(bh);
1979  PROC_INFO_INC(sb, get_neighbors_restart[h]);
1980  return REPEAT_SEARCH;
1981  }
1982 
1983  RFALSE(!B_IS_IN_TREE(tb->FL[h]) ||
1984  child_position > B_NR_ITEMS(tb->FL[h]) ||
1985  B_N_CHILD_NUM(tb->FL[h], child_position) !=
1986  bh->b_blocknr, "PAP-8275: invalid parent");
1987  RFALSE(!B_IS_IN_TREE(bh), "PAP-8280: invalid child");
1988  RFALSE(!h &&
1989  B_FREE_SPACE(bh) !=
1990  MAX_CHILD_SIZE(bh) -
1991  dc_size(B_N_CHILD(tb->FL[0], child_position)),
1992  "PAP-8290: invalid child size of left neighbor");
1993 
1994  brelse(tb->L[h]);
1995  tb->L[h] = bh;
1996  }
1997 
1998  /* We need right neighbor to balance S[path_offset]. */
1999  if (tb->rnum[h]) { /* We need right neighbor to balance S[path_offset]. */
2000  PROC_INFO_INC(sb, need_r_neighbor[h]);
2001  bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset);
2002 
2003  RFALSE(bh == tb->FR[h] &&
2005  path_offset) >=
2006  B_NR_ITEMS(bh),
2007  "PAP-8295: invalid position in the parent");
2008 
2009  child_position =
2010  (bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0;
2011  son_number = B_N_CHILD_NUM(tb->FR[h], child_position);
2013  bh = sb_bread(sb, son_number);
2014  reiserfs_write_lock(sb);
2015  if (!bh)
2016  return IO_ERROR;
2017  if (FILESYSTEM_CHANGED_TB(tb)) {
2018  brelse(bh);
2019  PROC_INFO_INC(sb, get_neighbors_restart[h]);
2020  return REPEAT_SEARCH;
2021  }
2022  brelse(tb->R[h]);
2023  tb->R[h] = bh;
2024 
2025  RFALSE(!h
2026  && B_FREE_SPACE(bh) !=
2027  MAX_CHILD_SIZE(bh) -
2028  dc_size(B_N_CHILD(tb->FR[0], child_position)),
2029  "PAP-8300: invalid child size of right neighbor (%d != %d - %d)",
2030  B_FREE_SPACE(bh), MAX_CHILD_SIZE(bh),
2031  dc_size(B_N_CHILD(tb->FR[0], child_position)));
2032 
2033  }
2034  return CARRY_ON;
2035 }
2036 
2037 static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh)
2038 {
2039  int max_num_of_items;
2040  int max_num_of_entries;
2041  unsigned long blocksize = sb->s_blocksize;
2042 
2043 #define MIN_NAME_LEN 1
2044 
2045  max_num_of_items = (blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN);
2046  max_num_of_entries = (blocksize - BLKH_SIZE - IH_SIZE) /
2047  (DEH_SIZE + MIN_NAME_LEN);
2048 
2049  return sizeof(struct virtual_node) +
2050  max(max_num_of_items * sizeof(struct virtual_item),
2052  (max_num_of_entries - 1) * sizeof(__u16));
2053 }
2054 
2055 /* maybe we should fail balancing we are going to perform when kmalloc
2056  fails several times. But now it will loop until kmalloc gets
2057  required memory */
2058 static int get_mem_for_virtual_node(struct tree_balance *tb)
2059 {
2060  int check_fs = 0;
2061  int size;
2062  char *buf;
2063 
2064  size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path));
2065 
2066  if (size > tb->vn_buf_size) {
2067  /* we have to allocate more memory for virtual node */
2068  if (tb->vn_buf) {
2069  /* free memory allocated before */
2070  kfree(tb->vn_buf);
2071  /* this is not needed if kfree is atomic */
2072  check_fs = 1;
2073  }
2074 
2075  /* virtual node requires now more memory */
2076  tb->vn_buf_size = size;
2077 
2078  /* get memory for virtual item */
2079  buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN);
2080  if (!buf) {
2081  /* getting memory with GFP_KERNEL priority may involve
2082  balancing now (due to indirect_to_direct conversion on
2083  dcache shrinking). So, release path and collected
2084  resources here */
2085  free_buffers_in_tb(tb);
2086  buf = kmalloc(size, GFP_NOFS);
2087  if (!buf) {
2088  tb->vn_buf_size = 0;
2089  }
2090  tb->vn_buf = buf;
2091  schedule();
2092  return REPEAT_SEARCH;
2093  }
2094 
2095  tb->vn_buf = buf;
2096  }
2097 
2098  if (check_fs && FILESYSTEM_CHANGED_TB(tb))
2099  return REPEAT_SEARCH;
2100 
2101  return CARRY_ON;
2102 }
2103 
2104 #ifdef CONFIG_REISERFS_CHECK
2105 static void tb_buffer_sanity_check(struct super_block *sb,
2106  struct buffer_head *bh,
2107  const char *descr, int level)
2108 {
2109  if (bh) {
2110  if (atomic_read(&(bh->b_count)) <= 0)
2111 
2112  reiserfs_panic(sb, "jmacd-1", "negative or zero "
2113  "reference counter for buffer %s[%d] "
2114  "(%b)", descr, level, bh);
2115 
2116  if (!buffer_uptodate(bh))
2117  reiserfs_panic(sb, "jmacd-2", "buffer is not up "
2118  "to date %s[%d] (%b)",
2119  descr, level, bh);
2120 
2121  if (!B_IS_IN_TREE(bh))
2122  reiserfs_panic(sb, "jmacd-3", "buffer is not "
2123  "in tree %s[%d] (%b)",
2124  descr, level, bh);
2125 
2126  if (bh->b_bdev != sb->s_bdev)
2127  reiserfs_panic(sb, "jmacd-4", "buffer has wrong "
2128  "device %s[%d] (%b)",
2129  descr, level, bh);
2130 
2131  if (bh->b_size != sb->s_blocksize)
2132  reiserfs_panic(sb, "jmacd-5", "buffer has wrong "
2133  "blocksize %s[%d] (%b)",
2134  descr, level, bh);
2135 
2136  if (bh->b_blocknr > SB_BLOCK_COUNT(sb))
2137  reiserfs_panic(sb, "jmacd-6", "buffer block "
2138  "number too high %s[%d] (%b)",
2139  descr, level, bh);
2140  }
2141 }
2142 #else
2143 static void tb_buffer_sanity_check(struct super_block *sb,
2144  struct buffer_head *bh,
2145  const char *descr, int level)
2146 {;
2147 }
2148 #endif
2149 
2150 static int clear_all_dirty_bits(struct super_block *s, struct buffer_head *bh)
2151 {
2152  return reiserfs_prepare_for_journal(s, bh, 0);
2153 }
2154 
2155 static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
2156 {
2157  struct buffer_head *locked;
2158 #ifdef CONFIG_REISERFS_CHECK
2159  int repeat_counter = 0;
2160 #endif
2161  int i;
2162 
2163  do {
2164 
2165  locked = NULL;
2166 
2167  for (i = tb->tb_path->path_length;
2168  !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) {
2169  if (PATH_OFFSET_PBUFFER(tb->tb_path, i)) {
2170  /* if I understand correctly, we can only be sure the last buffer
2171  ** in the path is in the tree --clm
2172  */
2173 #ifdef CONFIG_REISERFS_CHECK
2174  if (PATH_PLAST_BUFFER(tb->tb_path) ==
2175  PATH_OFFSET_PBUFFER(tb->tb_path, i))
2176  tb_buffer_sanity_check(tb->tb_sb,
2178  (tb->tb_path,
2179  i), "S",
2180  tb->tb_path->
2181  path_length - i);
2182 #endif
2183  if (!clear_all_dirty_bits(tb->tb_sb,
2185  (tb->tb_path,
2186  i))) {
2187  locked =
2189  i);
2190  }
2191  }
2192  }
2193 
2194  for (i = 0; !locked && i < MAX_HEIGHT && tb->insert_size[i];
2195  i++) {
2196 
2197  if (tb->lnum[i]) {
2198 
2199  if (tb->L[i]) {
2200  tb_buffer_sanity_check(tb->tb_sb,
2201  tb->L[i],
2202  "L", i);
2203  if (!clear_all_dirty_bits
2204  (tb->tb_sb, tb->L[i]))
2205  locked = tb->L[i];
2206  }
2207 
2208  if (!locked && tb->FL[i]) {
2209  tb_buffer_sanity_check(tb->tb_sb,
2210  tb->FL[i],
2211  "FL", i);
2212  if (!clear_all_dirty_bits
2213  (tb->tb_sb, tb->FL[i]))
2214  locked = tb->FL[i];
2215  }
2216 
2217  if (!locked && tb->CFL[i]) {
2218  tb_buffer_sanity_check(tb->tb_sb,
2219  tb->CFL[i],
2220  "CFL", i);
2221  if (!clear_all_dirty_bits
2222  (tb->tb_sb, tb->CFL[i]))
2223  locked = tb->CFL[i];
2224  }
2225 
2226  }
2227 
2228  if (!locked && (tb->rnum[i])) {
2229 
2230  if (tb->R[i]) {
2231  tb_buffer_sanity_check(tb->tb_sb,
2232  tb->R[i],
2233  "R", i);
2234  if (!clear_all_dirty_bits
2235  (tb->tb_sb, tb->R[i]))
2236  locked = tb->R[i];
2237  }
2238 
2239  if (!locked && tb->FR[i]) {
2240  tb_buffer_sanity_check(tb->tb_sb,
2241  tb->FR[i],
2242  "FR", i);
2243  if (!clear_all_dirty_bits
2244  (tb->tb_sb, tb->FR[i]))
2245  locked = tb->FR[i];
2246  }
2247 
2248  if (!locked && tb->CFR[i]) {
2249  tb_buffer_sanity_check(tb->tb_sb,
2250  tb->CFR[i],
2251  "CFR", i);
2252  if (!clear_all_dirty_bits
2253  (tb->tb_sb, tb->CFR[i]))
2254  locked = tb->CFR[i];
2255  }
2256  }
2257  }
2258  /* as far as I can tell, this is not required. The FEB list seems
2259  ** to be full of newly allocated nodes, which will never be locked,
2260  ** dirty, or anything else.
2261  ** To be safe, I'm putting in the checks and waits in. For the moment,
2262  ** they are needed to keep the code in journal.c from complaining
2263  ** about the buffer. That code is inside CONFIG_REISERFS_CHECK as well.
2264  ** --clm
2265  */
2266  for (i = 0; !locked && i < MAX_FEB_SIZE; i++) {
2267  if (tb->FEB[i]) {
2268  if (!clear_all_dirty_bits
2269  (tb->tb_sb, tb->FEB[i]))
2270  locked = tb->FEB[i];
2271  }
2272  }
2273 
2274  if (locked) {
2275 #ifdef CONFIG_REISERFS_CHECK
2276  repeat_counter++;
2277  if ((repeat_counter % 10000) == 0) {
2278  reiserfs_warning(tb->tb_sb, "reiserfs-8200",
2279  "too many iterations waiting "
2280  "for buffer to unlock "
2281  "(%b)", locked);
2282 
2283  /* Don't loop forever. Try to recover from possible error. */
2284 
2285  return (FILESYSTEM_CHANGED_TB(tb)) ?
2287  }
2288 #endif
2290  __wait_on_buffer(locked);
2292  if (FILESYSTEM_CHANGED_TB(tb))
2293  return REPEAT_SEARCH;
2294  }
2295 
2296  } while (locked);
2297 
2298  return CARRY_ON;
2299 }
2300 
2301 /* Prepare for balancing, that is
2302  * get all necessary parents, and neighbors;
2303  * analyze what and where should be moved;
2304  * get sufficient number of new nodes;
2305  * Balancing will start only after all resources will be collected at a time.
2306  *
2307  * When ported to SMP kernels, only at the last moment after all needed nodes
2308  * are collected in cache, will the resources be locked using the usual
2309  * textbook ordered lock acquisition algorithms. Note that ensuring that
2310  * this code neither write locks what it does not need to write lock nor locks out of order
2311  * will be a pain in the butt that could have been avoided. Grumble grumble. -Hans
2312  *
2313  * fix is meant in the sense of render unchanging
2314  *
2315  * Latency might be improved by first gathering a list of what buffers are needed
2316  * and then getting as many of them in parallel as possible? -Hans
2317  *
2318  * Parameters:
2319  * op_mode i - insert, d - delete, c - cut (truncate), p - paste (append)
2320  * tb tree_balance structure;
2321  * inum item number in S[h];
2322  * pos_in_item - comment this if you can
2323  * ins_ih item head of item being inserted
2324  * data inserted item or data to be pasted
2325  * Returns: 1 - schedule occurred while the function worked;
2326  * 0 - schedule didn't occur while the function worked;
2327  * -1 - if no_disk_space
2328  */
2329 
2330 int fix_nodes(int op_mode, struct tree_balance *tb,
2331  struct item_head *ins_ih, const void *data)
2332 {
2333  int ret, h, item_num = PATH_LAST_POSITION(tb->tb_path);
2334  int pos_in_item;
2335 
2336  /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared
2337  ** during wait_tb_buffers_run
2338  */
2339  int wait_tb_buffers_run = 0;
2340  struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
2341 
2342  ++REISERFS_SB(tb->tb_sb)->s_fix_nodes;
2343 
2344  pos_in_item = tb->tb_path->pos_in_item;
2345 
2346  tb->fs_gen = get_generation(tb->tb_sb);
2347 
2348  /* we prepare and log the super here so it will already be in the
2349  ** transaction when do_balance needs to change it.
2350  ** This way do_balance won't have to schedule when trying to prepare
2351  ** the super for logging
2352  */
2354  SB_BUFFER_WITH_SB(tb->tb_sb), 1);
2356  SB_BUFFER_WITH_SB(tb->tb_sb));
2357  if (FILESYSTEM_CHANGED_TB(tb))
2358  return REPEAT_SEARCH;
2359 
2360  /* if it possible in indirect_to_direct conversion */
2361  if (buffer_locked(tbS0)) {
2363  __wait_on_buffer(tbS0);
2365  if (FILESYSTEM_CHANGED_TB(tb))
2366  return REPEAT_SEARCH;
2367  }
2368 #ifdef CONFIG_REISERFS_CHECK
2369  if (REISERFS_SB(tb->tb_sb)->cur_tb) {
2370  print_cur_tb("fix_nodes");
2371  reiserfs_panic(tb->tb_sb, "PAP-8305",
2372  "there is pending do_balance");
2373  }
2374 
2375  if (!buffer_uptodate(tbS0) || !B_IS_IN_TREE(tbS0))
2376  reiserfs_panic(tb->tb_sb, "PAP-8320", "S[0] (%b %z) is "
2377  "not uptodate at the beginning of fix_nodes "
2378  "or not in tree (mode %c)",
2379  tbS0, tbS0, op_mode);
2380 
2381  /* Check parameters. */
2382  switch (op_mode) {
2383  case M_INSERT:
2384  if (item_num <= 0 || item_num > B_NR_ITEMS(tbS0))
2385  reiserfs_panic(tb->tb_sb, "PAP-8330", "Incorrect "
2386  "item number %d (in S0 - %d) in case "
2387  "of insert", item_num,
2388  B_NR_ITEMS(tbS0));
2389  break;
2390  case M_PASTE:
2391  case M_DELETE:
2392  case M_CUT:
2393  if (item_num < 0 || item_num >= B_NR_ITEMS(tbS0)) {
2394  print_block(tbS0, 0, -1, -1);
2395  reiserfs_panic(tb->tb_sb, "PAP-8335", "Incorrect "
2396  "item number(%d); mode = %c "
2397  "insert_size = %d",
2398  item_num, op_mode,
2399  tb->insert_size[0]);
2400  }
2401  break;
2402  default:
2403  reiserfs_panic(tb->tb_sb, "PAP-8340", "Incorrect mode "
2404  "of operation");
2405  }
2406 #endif
2407 
2408  if (get_mem_for_virtual_node(tb) == REPEAT_SEARCH)
2409  // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat
2410  return REPEAT_SEARCH;
2411 
2412  /* Starting from the leaf level; for all levels h of the tree. */
2413  for (h = 0; h < MAX_HEIGHT && tb->insert_size[h]; h++) {
2414  ret = get_direct_parent(tb, h);
2415  if (ret != CARRY_ON)
2416  goto repeat;
2417 
2418  ret = check_balance(op_mode, tb, h, item_num,
2419  pos_in_item, ins_ih, data);
2420  if (ret != CARRY_ON) {
2421  if (ret == NO_BALANCING_NEEDED) {
2422  /* No balancing for higher levels needed. */
2423  ret = get_neighbors(tb, h);
2424  if (ret != CARRY_ON)
2425  goto repeat;
2426  if (h != MAX_HEIGHT - 1)
2427  tb->insert_size[h + 1] = 0;
2428  /* ok, analysis and resource gathering are complete */
2429  break;
2430  }
2431  goto repeat;
2432  }
2433 
2434  ret = get_neighbors(tb, h);
2435  if (ret != CARRY_ON)
2436  goto repeat;
2437 
2438  /* No disk space, or schedule occurred and analysis may be
2439  * invalid and needs to be redone. */
2440  ret = get_empty_nodes(tb, h);
2441  if (ret != CARRY_ON)
2442  goto repeat;
2443 
2444  if (!PATH_H_PBUFFER(tb->tb_path, h)) {
2445  /* We have a positive insert size but no nodes exist on this
2446  level, this means that we are creating a new root. */
2447 
2448  RFALSE(tb->blknum[h] != 1,
2449  "PAP-8350: creating new empty root");
2450 
2451  if (h < MAX_HEIGHT - 1)
2452  tb->insert_size[h + 1] = 0;
2453  } else if (!PATH_H_PBUFFER(tb->tb_path, h + 1)) {
2454  if (tb->blknum[h] > 1) {
2455  /* The tree needs to be grown, so this node S[h]
2456  which is the root node is split into two nodes,
2457  and a new node (S[h+1]) will be created to
2458  become the root node. */
2459 
2460  RFALSE(h == MAX_HEIGHT - 1,
2461  "PAP-8355: attempt to create too high of a tree");
2462 
2463  tb->insert_size[h + 1] =
2464  (DC_SIZE +
2465  KEY_SIZE) * (tb->blknum[h] - 1) +
2466  DC_SIZE;
2467  } else if (h < MAX_HEIGHT - 1)
2468  tb->insert_size[h + 1] = 0;
2469  } else
2470  tb->insert_size[h + 1] =
2471  (DC_SIZE + KEY_SIZE) * (tb->blknum[h] - 1);
2472  }
2473 
2474  ret = wait_tb_buffers_until_unlocked(tb);
2475  if (ret == CARRY_ON) {
2476  if (FILESYSTEM_CHANGED_TB(tb)) {
2477  wait_tb_buffers_run = 1;
2478  ret = REPEAT_SEARCH;
2479  goto repeat;
2480  } else {
2481  return CARRY_ON;
2482  }
2483  } else {
2484  wait_tb_buffers_run = 1;
2485  goto repeat;
2486  }
2487 
2488  repeat:
2489  // fix_nodes was unable to perform its calculation due to
2490  // filesystem got changed under us, lack of free disk space or i/o
2491  // failure. If the first is the case - the search will be
2492  // repeated. For now - free all resources acquired so far except
2493  // for the new allocated nodes
2494  {
2495  int i;
2496 
2497  /* Release path buffers. */
2498  if (wait_tb_buffers_run) {
2500  } else {
2501  pathrelse(tb->tb_path);
2502  }
2503  /* brelse all resources collected for balancing */
2504  for (i = 0; i < MAX_HEIGHT; i++) {
2505  if (wait_tb_buffers_run) {
2507  tb->L[i]);
2509  tb->R[i]);
2511  tb->FL[i]);
2513  tb->FR[i]);
2515  tb->
2516  CFL[i]);
2518  tb->
2519  CFR[i]);
2520  }
2521 
2522  brelse(tb->L[i]);
2523  brelse(tb->R[i]);
2524  brelse(tb->FL[i]);
2525  brelse(tb->FR[i]);
2526  brelse(tb->CFL[i]);
2527  brelse(tb->CFR[i]);
2528 
2529  tb->L[i] = NULL;
2530  tb->R[i] = NULL;
2531  tb->FL[i] = NULL;
2532  tb->FR[i] = NULL;
2533  tb->CFL[i] = NULL;
2534  tb->CFR[i] = NULL;
2535  }
2536 
2537  if (wait_tb_buffers_run) {
2538  for (i = 0; i < MAX_FEB_SIZE; i++) {
2539  if (tb->FEB[i])
2541  (tb->tb_sb, tb->FEB[i]);
2542  }
2543  }
2544  return ret;
2545  }
2546 
2547 }
2548 
2549 /* Anatoly will probably forgive me renaming tb to tb. I just
2550  wanted to make lines shorter */
2551 void unfix_nodes(struct tree_balance *tb)
2552 {
2553  int i;
2554 
2555  /* Release path buffers. */
2557 
2558  /* brelse all resources collected for balancing */
2559  for (i = 0; i < MAX_HEIGHT; i++) {
2566 
2567  brelse(tb->L[i]);
2568  brelse(tb->R[i]);
2569  brelse(tb->FL[i]);
2570  brelse(tb->FR[i]);
2571  brelse(tb->CFL[i]);
2572  brelse(tb->CFR[i]);
2573  }
2574 
2575  /* deal with list of allocated (used and unused) nodes */
2576  for (i = 0; i < MAX_FEB_SIZE; i++) {
2577  if (tb->FEB[i]) {
2578  b_blocknr_t blocknr = tb->FEB[i]->b_blocknr;
2579  /* de-allocated block which was not used by balancing and
2580  bforget about buffer for it */
2581  brelse(tb->FEB[i]);
2583  blocknr, 0);
2584  }
2585  if (tb->used[i]) {
2586  /* release used as new nodes including a new root */
2587  brelse(tb->used[i]);
2588  }
2589  }
2590 
2591  kfree(tb->vn_buf);
2592 
2593 }