Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
do_balan.c
Go to the documentation of this file.
1 /*
2  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3  */
4 
5 /* Now we have all buffers that must be used in balancing of the tree */
6 /* Further calculations can not cause schedule(), and thus the buffer */
7 /* tree will be stable until the balancing will be finished */
8 /* balance the tree according to the analysis made before, */
9 /* and using buffers obtained after all above. */
10 
18 #include <asm/uaccess.h>
19 #include <linux/time.h>
20 #include "reiserfs.h"
21 #include <linux/buffer_head.h>
22 #include <linux/kernel.h>
23 
24 static inline void buffer_info_init_left(struct tree_balance *tb,
25  struct buffer_info *bi)
26 {
27  bi->tb = tb;
28  bi->bi_bh = tb->L[0];
29  bi->bi_parent = tb->FL[0];
31 }
32 
33 static inline void buffer_info_init_right(struct tree_balance *tb,
34  struct buffer_info *bi)
35 {
36  bi->tb = tb;
37  bi->bi_bh = tb->R[0];
38  bi->bi_parent = tb->FR[0];
40 }
41 
42 static inline void buffer_info_init_tbS0(struct tree_balance *tb,
43  struct buffer_info *bi)
44 {
45  bi->tb = tb;
46  bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path);
47  bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
48  bi->bi_position = PATH_H_POSITION(tb->tb_path, 1);
49 }
50 
51 static inline void buffer_info_init_bh(struct tree_balance *tb,
52  struct buffer_info *bi,
53  struct buffer_head *bh)
54 {
55  bi->tb = tb;
56  bi->bi_bh = bh;
57  bi->bi_parent = NULL;
58  bi->bi_position = 0;
59 }
60 
62  struct buffer_head *bh, int flag)
63 {
65  tb->transaction_handle->t_super, bh);
66 }
67 
68 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
69 #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
70 
71 /* summary:
72  if deleting something ( tb->insert_size[0] < 0 )
73  return(balance_leaf_when_delete()); (flag d handled here)
74  else
75  if lnum is larger than 0 we put items into the left node
76  if rnum is larger than 0 we put items into the right node
77  if snum1 is larger than 0 we put items into the new node s1
78  if snum2 is larger than 0 we put items into the new node s2
79 Note that all *num* count new items being created.
80 
81 It would be easier to read balance_leaf() if each of these summary
82 lines was a separate procedure rather than being inlined. I think
83 that there are many passages here and in balance_leaf_when_delete() in
84 which two calls to one procedure can replace two passages, and it
85 might save cache space and improve software maintenance costs to do so.
86 
87 Vladimir made the perceptive comment that we should offload most of
88 the decision making in this function into fix_nodes/check_balance, and
89 then create some sort of structure in tb that says what actions should
90 be performed by do_balance.
91 
92 -Hans */
93 
94 /* Balance leaf node in case of delete or cut: insert_size[0] < 0
95  *
96  * lnum, rnum can have values >= -1
97  * -1 means that the neighbor must be joined with S
98  * 0 means that nothing should be done with the neighbor
99  * >0 means to shift entirely or partly the specified number of items to the neighbor
100  */
101 static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
102 {
103  struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
104  int item_pos = PATH_LAST_POSITION(tb->tb_path);
105  int pos_in_item = tb->tb_path->pos_in_item;
106  struct buffer_info bi;
107  int n;
108  struct item_head *ih;
109 
110  RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
111  "vs- 12000: level: wrong FR %z", tb->FR[0]);
112  RFALSE(tb->blknum[0] > 1,
113  "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
114  RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0),
115  "PAP-12010: tree can not be empty");
116 
117  ih = B_N_PITEM_HEAD(tbS0, item_pos);
118  buffer_info_init_tbS0(tb, &bi);
119 
120  /* Delete or truncate the item */
121 
122  switch (flag) {
123  case M_DELETE: /* delete item in S[0] */
124 
125  RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
126  "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
127  -tb->insert_size[0], ih);
128 
129  leaf_delete_items(&bi, 0, item_pos, 1, -1);
130 
131  if (!item_pos && tb->CFL[0]) {
132  if (B_NR_ITEMS(tbS0)) {
133  replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0,
134  0);
135  } else {
136  if (!PATH_H_POSITION(tb->tb_path, 1))
137  replace_key(tb, tb->CFL[0], tb->lkey[0],
139  0), 0);
140  }
141  }
142 
143  RFALSE(!item_pos && !tb->CFL[0],
144  "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0],
145  tb->L[0]);
146 
147  break;
148 
149  case M_CUT:{ /* cut item in S[0] */
150  if (is_direntry_le_ih(ih)) {
151 
152  /* UFS unlink semantics are such that you can only delete one directory entry at a time. */
153  /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
154  tb->insert_size[0] = -1;
155  leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
156  -tb->insert_size[0]);
157 
158  RFALSE(!item_pos && !pos_in_item && !tb->CFL[0],
159  "PAP-12030: can not change delimiting key. CFL[0]=%p",
160  tb->CFL[0]);
161 
162  if (!item_pos && !pos_in_item && tb->CFL[0]) {
163  replace_key(tb, tb->CFL[0], tb->lkey[0],
164  tbS0, 0);
165  }
166  } else {
167  leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
168  -tb->insert_size[0]);
169 
170  RFALSE(!ih_item_len(ih),
171  "PAP-12035: cut must leave non-zero dynamic length of item");
172  }
173  break;
174  }
175 
176  default:
177  print_cur_tb("12040");
178  reiserfs_panic(tb->tb_sb, "PAP-12040",
179  "unexpected mode: %s(%d)",
180  (flag ==
181  M_PASTE) ? "PASTE" : ((flag ==
182  M_INSERT) ? "INSERT" :
183  "UNKNOWN"), flag);
184  }
185 
186  /* the rule is that no shifting occurs unless by shifting a node can be freed */
187  n = B_NR_ITEMS(tbS0);
188  if (tb->lnum[0]) { /* L[0] takes part in balancing */
189  if (tb->lnum[0] == -1) { /* L[0] must be joined with S[0] */
190  if (tb->rnum[0] == -1) { /* R[0] must be also joined with S[0] */
191  if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) {
192  /* all contents of all the 3 buffers will be in L[0] */
193  if (PATH_H_POSITION(tb->tb_path, 1) == 0
194  && 1 < B_NR_ITEMS(tb->FR[0]))
195  replace_key(tb, tb->CFL[0],
196  tb->lkey[0],
197  tb->FR[0], 1);
198 
200  -1, NULL);
202  B_NR_ITEMS(tb->R[0]),
203  -1, NULL);
204 
205  reiserfs_invalidate_buffer(tb, tbS0);
207  tb->R[0]);
208 
209  return 0;
210  }
211  /* all contents of all the 3 buffers will be in R[0] */
213  NULL);
215  B_NR_ITEMS(tb->L[0]), -1, NULL);
216 
217  /* right_delimiting_key is correct in R[0] */
218  replace_key(tb, tb->CFR[0], tb->rkey[0],
219  tb->R[0], 0);
220 
221  reiserfs_invalidate_buffer(tb, tbS0);
222  reiserfs_invalidate_buffer(tb, tb->L[0]);
223 
224  return -1;
225  }
226 
227  RFALSE(tb->rnum[0] != 0,
228  "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
229  /* all contents of L[0] and S[0] will be in L[0] */
230  leaf_shift_left(tb, n, -1);
231 
232  reiserfs_invalidate_buffer(tb, tbS0);
233 
234  return 0;
235  }
236  /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
237 
238  RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
239  (tb->lnum[0] + tb->rnum[0] > n + 1),
240  "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
241  tb->rnum[0], tb->lnum[0], n);
242  RFALSE((tb->lnum[0] + tb->rnum[0] == n) &&
243  (tb->lbytes != -1 || tb->rbytes != -1),
244  "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
245  tb->rbytes, tb->lbytes);
246  RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) &&
247  (tb->lbytes < 1 || tb->rbytes != -1),
248  "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
249  tb->rbytes, tb->lbytes);
250 
251  leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
252  leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
253 
254  reiserfs_invalidate_buffer(tb, tbS0);
255 
256  return 0;
257  }
258 
259  if (tb->rnum[0] == -1) {
260  /* all contents of R[0] and S[0] will be in R[0] */
261  leaf_shift_right(tb, n, -1);
262  reiserfs_invalidate_buffer(tb, tbS0);
263  return 0;
264  }
265 
266  RFALSE(tb->rnum[0],
267  "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
268  return 0;
269 }
270 
271 static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item header of inserted item (this is on little endian) */
272  const char *body, /* body of inserted item or bytes to paste */
273  int flag, /* i - insert, d - delete, c - cut, p - paste
274  (see comment to do_balance) */
275  struct item_head *insert_key, /* in our processing of one level we sometimes determine what
276  must be inserted into the next higher level. This insertion
277  consists of a key or two keys and their corresponding
278  pointers */
279  struct buffer_head **insert_ptr /* inserted node-ptrs for the next level */
280  )
281 {
282  struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
283  int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0]
284  of the affected item */
285  struct buffer_info bi;
286  struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */
287  int snum[2]; /* number of items that will be placed
288  into S_new (includes partially shifted
289  items) */
290  int sbytes[2]; /* if an item is partially shifted into S_new then
291  if it is a directory item
292  it is the number of entries from the item that are shifted into S_new
293  else
294  it is the number of bytes from the item that are shifted into S_new
295  */
296  int n, i;
297  int ret_val;
298  int pos_in_item;
299  int zeros_num;
300 
301  PROC_INFO_INC(tb->tb_sb, balance_at[0]);
302 
303  /* Make balance in case insert_size[0] < 0 */
304  if (tb->insert_size[0] < 0)
305  return balance_leaf_when_delete(tb, flag);
306 
307  zeros_num = 0;
308  if (flag == M_INSERT && !body)
309  zeros_num = ih_item_len(ih);
310 
311  pos_in_item = tb->tb_path->pos_in_item;
312  /* for indirect item pos_in_item is measured in unformatted node
313  pointers. Recalculate to bytes */
314  if (flag != M_INSERT
315  && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos)))
316  pos_in_item *= UNFM_P_SIZE;
317 
318  if (tb->lnum[0] > 0) {
319  /* Shift lnum[0] items from S[0] to the left neighbor L[0] */
320  if (item_pos < tb->lnum[0]) {
321  /* new item or it part falls to L[0], shift it too */
322  n = B_NR_ITEMS(tb->L[0]);
323 
324  switch (flag) {
325  case M_INSERT: /* insert item into L[0] */
326 
327  if (item_pos == tb->lnum[0] - 1
328  && tb->lbytes != -1) {
329  /* part of new item falls into L[0] */
330  int new_item_len;
331  int version;
332 
333  ret_val =
334  leaf_shift_left(tb, tb->lnum[0] - 1,
335  -1);
336 
337  /* Calculate item length to insert to S[0] */
338  new_item_len =
339  ih_item_len(ih) - tb->lbytes;
340  /* Calculate and check item length to insert to L[0] */
341  put_ih_item_len(ih,
342  ih_item_len(ih) -
343  new_item_len);
344 
345  RFALSE(ih_item_len(ih) <= 0,
346  "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
347  ih_item_len(ih));
348 
349  /* Insert new item into L[0] */
350  buffer_info_init_left(tb, &bi);
352  n + item_pos -
353  ret_val, ih, body,
354  zeros_num >
355  ih_item_len(ih) ?
356  ih_item_len(ih) :
357  zeros_num);
358 
359  version = ih_version(ih);
360 
361  /* Calculate key component, item length and body to insert into S[0] */
362  set_le_ih_k_offset(ih,
363  le_ih_k_offset(ih) +
364  (tb->
365  lbytes <<
366  (is_indirect_le_ih
367  (ih) ? tb->tb_sb->
368  s_blocksize_bits -
369  UNFM_P_SHIFT :
370  0)));
371 
372  put_ih_item_len(ih, new_item_len);
373  if (tb->lbytes > zeros_num) {
374  body +=
375  (tb->lbytes - zeros_num);
376  zeros_num = 0;
377  } else
378  zeros_num -= tb->lbytes;
379 
380  RFALSE(ih_item_len(ih) <= 0,
381  "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
382  ih_item_len(ih));
383  } else {
384  /* new item in whole falls into L[0] */
385  /* Shift lnum[0]-1 items to L[0] */
386  ret_val =
387  leaf_shift_left(tb, tb->lnum[0] - 1,
388  tb->lbytes);
389  /* Insert new item into L[0] */
390  buffer_info_init_left(tb, &bi);
392  n + item_pos -
393  ret_val, ih, body,
394  zeros_num);
395  tb->insert_size[0] = 0;
396  zeros_num = 0;
397  }
398  break;
399 
400  case M_PASTE: /* append item in L[0] */
401 
402  if (item_pos == tb->lnum[0] - 1
403  && tb->lbytes != -1) {
404  /* we must shift the part of the appended item */
405  if (is_direntry_le_ih
406  (B_N_PITEM_HEAD(tbS0, item_pos))) {
407 
408  RFALSE(zeros_num,
409  "PAP-12090: invalid parameter in case of a directory");
410  /* directory item */
411  if (tb->lbytes > pos_in_item) {
412  /* new directory entry falls into L[0] */
413  struct item_head
414  *pasted;
415  int l_pos_in_item =
416  pos_in_item;
417 
418  /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
419  ret_val =
420  leaf_shift_left(tb,
421  tb->
422  lnum
423  [0],
424  tb->
425  lbytes
426  -
427  1);
428  if (ret_val
429  && !item_pos) {
430  pasted =
432  (tb->L[0],
433  B_NR_ITEMS
434  (tb->
435  L[0]) -
436  1);
437  l_pos_in_item +=
439  (pasted) -
440  (tb->
441  lbytes -
442  1);
443  }
444 
445  /* Append given directory entry to directory item */
446  buffer_info_init_left(tb, &bi);
448  (&bi,
449  n + item_pos -
450  ret_val,
451  l_pos_in_item,
452  tb->insert_size[0],
453  body, zeros_num);
454 
455  /* previous string prepared space for pasting new entry, following string pastes this entry */
456 
457  /* when we have merge directory item, pos_in_item has been changed too */
458 
459  /* paste new directory entry. 1 is entry number */
461  n +
462  item_pos
463  -
464  ret_val,
465  l_pos_in_item,
466  1,
467  (struct
469  *)
470  body,
471  body
472  +
473  DEH_SIZE,
474  tb->
475  insert_size
476  [0]
477  );
478  tb->insert_size[0] = 0;
479  } else {
480  /* new directory item doesn't fall into L[0] */
481  /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
482  leaf_shift_left(tb,
483  tb->
484  lnum[0],
485  tb->
486  lbytes);
487  }
488  /* Calculate new position to append in item body */
489  pos_in_item -= tb->lbytes;
490  } else {
491  /* regular object */
492  RFALSE(tb->lbytes <= 0,
493  "PAP-12095: there is nothing to shift to L[0]. lbytes=%d",
494  tb->lbytes);
495  RFALSE(pos_in_item !=
498  (tbS0, item_pos)),
499  "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
502  (tbS0, item_pos)),
503  pos_in_item);
504 
505  if (tb->lbytes >= pos_in_item) {
506  /* appended item will be in L[0] in whole */
507  int l_n;
508 
509  /* this bytes number must be appended to the last item of L[h] */
510  l_n =
511  tb->lbytes -
512  pos_in_item;
513 
514  /* Calculate new insert_size[0] */
515  tb->insert_size[0] -=
516  l_n;
517 
518  RFALSE(tb->
519  insert_size[0] <=
520  0,
521  "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
522  tb->
523  insert_size[0]);
524  ret_val =
525  leaf_shift_left(tb,
526  tb->
527  lnum
528  [0],
531  (tbS0,
532  item_pos)));
533  /* Append to body of item in L[0] */
534  buffer_info_init_left(tb, &bi);
536  (&bi,
537  n + item_pos -
538  ret_val,
541  (tb->L[0],
542  n + item_pos -
543  ret_val)), l_n,
544  body,
545  zeros_num >
546  l_n ? l_n :
547  zeros_num);
548  /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
549  {
550  int version;
551  int temp_l =
552  l_n;
553 
554  RFALSE
555  (ih_item_len
557  (tbS0,
558  0)),
559  "PAP-12106: item length must be 0");
560  RFALSE
562  (B_N_PKEY
563  (tbS0, 0),
564  B_N_PKEY
565  (tb->L[0],
566  n +
567  item_pos
568  -
569  ret_val)),
570  "PAP-12107: items must be of the same file");
571  if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) {
572  temp_l =
573  l_n
574  <<
575  (tb->
576  tb_sb->
577  s_blocksize_bits
578  -
579  UNFM_P_SHIFT);
580  }
581  /* update key of first item in S0 */
582  version =
583  ih_version
585  (tbS0, 0));
586  set_le_key_k_offset
587  (version,
588  B_N_PKEY
589  (tbS0, 0),
590  le_key_k_offset
591  (version,
592  B_N_PKEY
593  (tbS0,
594  0)) +
595  temp_l);
596  /* update left delimiting key */
597  set_le_key_k_offset
598  (version,
600  (tb->
601  CFL[0],
602  tb->
603  lkey[0]),
604  le_key_k_offset
605  (version,
607  (tb->
608  CFL[0],
609  tb->
610  lkey[0]))
611  + temp_l);
612  }
613 
614  /* Calculate new body, position in item and insert_size[0] */
615  if (l_n > zeros_num) {
616  body +=
617  (l_n -
618  zeros_num);
619  zeros_num = 0;
620  } else
621  zeros_num -=
622  l_n;
623  pos_in_item = 0;
624 
625  RFALSE
627  (B_N_PKEY(tbS0, 0),
628  B_N_PKEY(tb->L[0],
629  B_NR_ITEMS
630  (tb->
631  L[0]) -
632  1))
633  ||
635  (B_N_PKEY(tbS0, 0),
636  tbS0->b_size)
637  ||
640  (tb->CFL[0],
641  tb->lkey[0]),
642  tbS0->b_size),
643  "PAP-12120: item must be merge-able with left neighboring item");
644  } else { /* only part of the appended item will be in L[0] */
645 
646  /* Calculate position in item for append in S[0] */
647  pos_in_item -=
648  tb->lbytes;
649 
650  RFALSE(pos_in_item <= 0,
651  "PAP-12125: no place for paste. pos_in_item=%d",
652  pos_in_item);
653 
654  /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
655  leaf_shift_left(tb,
656  tb->
657  lnum[0],
658  tb->
659  lbytes);
660  }
661  }
662  } else { /* appended item will be in L[0] in whole */
663 
664  struct item_head *pasted;
665 
666  if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) { /* if we paste into first item of S[0] and it is left mergable */
667  /* then increment pos_in_item by the size of the last item in L[0] */
668  pasted =
669  B_N_PITEM_HEAD(tb->L[0],
670  n - 1);
671  if (is_direntry_le_ih(pasted))
672  pos_in_item +=
674  (pasted);
675  else
676  pos_in_item +=
677  ih_item_len(pasted);
678  }
679 
680  /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
681  ret_val =
682  leaf_shift_left(tb, tb->lnum[0],
683  tb->lbytes);
684  /* Append to body of item in L[0] */
685  buffer_info_init_left(tb, &bi);
687  n + item_pos -
688  ret_val,
689  pos_in_item,
690  tb->insert_size[0],
691  body, zeros_num);
692 
693  /* if appended item is directory, paste entry */
694  pasted =
695  B_N_PITEM_HEAD(tb->L[0],
696  n + item_pos -
697  ret_val);
698  if (is_direntry_le_ih(pasted))
700  n +
701  item_pos -
702  ret_val,
703  pos_in_item,
704  1,
705  (struct
707  *)body,
708  body +
709  DEH_SIZE,
710  tb->
711  insert_size
712  [0]
713  );
714  /* if appended item is indirect item, put unformatted node into un list */
715  if (is_indirect_le_ih(pasted))
716  set_ih_free_space(pasted, 0);
717  tb->insert_size[0] = 0;
718  zeros_num = 0;
719  }
720  break;
721  default: /* cases d and t */
722  reiserfs_panic(tb->tb_sb, "PAP-12130",
723  "lnum > 0: unexpected mode: "
724  " %s(%d)",
725  (flag ==
726  M_DELETE) ? "DELETE" : ((flag ==
727  M_CUT)
728  ? "CUT"
729  :
730  "UNKNOWN"),
731  flag);
732  }
733  } else {
734  /* new item doesn't fall into L[0] */
735  leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
736  }
737  }
738 
739  /* tb->lnum[0] > 0 */
740  /* Calculate new item position */
741  item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0));
742 
743  if (tb->rnum[0] > 0) {
744  /* shift rnum[0] items from S[0] to the right neighbor R[0] */
745  n = B_NR_ITEMS(tbS0);
746  switch (flag) {
747 
748  case M_INSERT: /* insert item */
749  if (n - tb->rnum[0] < item_pos) { /* new item or its part falls to R[0] */
750  if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { /* part of new item falls into R[0] */
751  loff_t old_key_comp, old_len,
752  r_zeros_number;
753  const char *r_body;
754  int version;
755  loff_t offset;
756 
757  leaf_shift_right(tb, tb->rnum[0] - 1,
758  -1);
759 
760  version = ih_version(ih);
761  /* Remember key component and item length */
762  old_key_comp = le_ih_k_offset(ih);
763  old_len = ih_item_len(ih);
764 
765  /* Calculate key component and item length to insert into R[0] */
766  offset =
767  le_ih_k_offset(ih) +
768  ((old_len -
769  tb->
770  rbytes) << (is_indirect_le_ih(ih)
771  ? tb->tb_sb->
772  s_blocksize_bits -
773  UNFM_P_SHIFT : 0));
774  set_le_ih_k_offset(ih, offset);
775  put_ih_item_len(ih, tb->rbytes);
776  /* Insert part of the item into R[0] */
777  buffer_info_init_right(tb, &bi);
778  if ((old_len - tb->rbytes) > zeros_num) {
779  r_zeros_number = 0;
780  r_body =
781  body + (old_len -
782  tb->rbytes) -
783  zeros_num;
784  } else {
785  r_body = body;
786  r_zeros_number =
787  zeros_num - (old_len -
788  tb->rbytes);
789  zeros_num -= r_zeros_number;
790  }
791 
792  leaf_insert_into_buf(&bi, 0, ih, r_body,
793  r_zeros_number);
794 
795  /* Replace right delimiting key by first key in R[0] */
796  replace_key(tb, tb->CFR[0], tb->rkey[0],
797  tb->R[0], 0);
798 
799  /* Calculate key component and item length to insert into S[0] */
800  set_le_ih_k_offset(ih, old_key_comp);
801  put_ih_item_len(ih,
802  old_len - tb->rbytes);
803 
804  tb->insert_size[0] -= tb->rbytes;
805 
806  } else { /* whole new item falls into R[0] */
807 
808  /* Shift rnum[0]-1 items to R[0] */
809  ret_val =
810  leaf_shift_right(tb,
811  tb->rnum[0] - 1,
812  tb->rbytes);
813  /* Insert new item into R[0] */
814  buffer_info_init_right(tb, &bi);
816  item_pos - n +
817  tb->rnum[0] - 1,
818  ih, body,
819  zeros_num);
820 
821  if (item_pos - n + tb->rnum[0] - 1 == 0) {
822  replace_key(tb, tb->CFR[0],
823  tb->rkey[0],
824  tb->R[0], 0);
825 
826  }
827  zeros_num = tb->insert_size[0] = 0;
828  }
829  } else { /* new item or part of it doesn't fall into R[0] */
830 
831  leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
832  }
833  break;
834 
835  case M_PASTE: /* append item */
836 
837  if (n - tb->rnum[0] <= item_pos) { /* pasted item or part of it falls to R[0] */
838  if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) { /* we must shift the part of the appended item */
839  if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) { /* we append to directory item */
840  int entry_count;
841 
842  RFALSE(zeros_num,
843  "PAP-12145: invalid parameter in case of a directory");
844  entry_count =
846  (tbS0,
847  item_pos));
848  if (entry_count - tb->rbytes <
849  pos_in_item)
850  /* new directory entry falls into R[0] */
851  {
852  int paste_entry_position;
853 
854  RFALSE(tb->rbytes - 1 >=
855  entry_count
856  || !tb->
857  insert_size[0],
858  "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
859  tb->rbytes,
860  entry_count);
861  /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
862  leaf_shift_right(tb,
863  tb->
864  rnum
865  [0],
866  tb->
867  rbytes
868  - 1);
869  /* Paste given directory entry to directory item */
870  paste_entry_position =
871  pos_in_item -
872  entry_count +
873  tb->rbytes - 1;
874  buffer_info_init_right(tb, &bi);
876  (&bi, 0,
877  paste_entry_position,
878  tb->insert_size[0],
879  body, zeros_num);
880  /* paste entry */
882  0,
883  paste_entry_position,
884  1,
885  (struct
887  *)
888  body,
889  body
890  +
891  DEH_SIZE,
892  tb->
893  insert_size
894  [0]
895  );
896 
897  if (paste_entry_position
898  == 0) {
899  /* change delimiting keys */
900  replace_key(tb,
901  tb->
902  CFR
903  [0],
904  tb->
905  rkey
906  [0],
907  tb->
908  R
909  [0],
910  0);
911  }
912 
913  tb->insert_size[0] = 0;
914  pos_in_item++;
915  } else { /* new directory entry doesn't fall into R[0] */
916 
917  leaf_shift_right(tb,
918  tb->
919  rnum
920  [0],
921  tb->
922  rbytes);
923  }
924  } else { /* regular object */
925 
926  int n_shift, n_rem,
927  r_zeros_number;
928  const char *r_body;
929 
930  /* Calculate number of bytes which must be shifted from appended item */
931  if ((n_shift =
932  tb->rbytes -
933  tb->insert_size[0]) < 0)
934  n_shift = 0;
935 
936  RFALSE(pos_in_item !=
939  (tbS0, item_pos)),
940  "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
941  pos_in_item,
944  (tbS0, item_pos)));
945 
946  leaf_shift_right(tb,
947  tb->rnum[0],
948  n_shift);
949  /* Calculate number of bytes which must remain in body after appending to R[0] */
950  if ((n_rem =
951  tb->insert_size[0] -
952  tb->rbytes) < 0)
953  n_rem = 0;
954 
955  {
956  int version;
957  unsigned long temp_rem =
958  n_rem;
959 
960  version =
961  ih_version
963  (tb->R[0], 0));
964  if (is_indirect_le_key
965  (version,
966  B_N_PKEY(tb->R[0],
967  0))) {
968  temp_rem =
969  n_rem <<
970  (tb->tb_sb->
971  s_blocksize_bits
972  -
973  UNFM_P_SHIFT);
974  }
975  set_le_key_k_offset
976  (version,
977  B_N_PKEY(tb->R[0],
978  0),
979  le_key_k_offset
980  (version,
981  B_N_PKEY(tb->R[0],
982  0)) +
983  temp_rem);
984  set_le_key_k_offset
985  (version,
986  B_N_PDELIM_KEY(tb->
987  CFR
988  [0],
989  tb->
990  rkey
991  [0]),
992  le_key_k_offset
993  (version,
995  (tb->CFR[0],
996  tb->rkey[0])) +
997  temp_rem);
998  }
999 /* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
1000  k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
1002  (tb, tb->CFR[0], 0);
1003 
1004  /* Append part of body into R[0] */
1005  buffer_info_init_right(tb, &bi);
1006  if (n_rem > zeros_num) {
1007  r_zeros_number = 0;
1008  r_body =
1009  body + n_rem -
1010  zeros_num;
1011  } else {
1012  r_body = body;
1013  r_zeros_number =
1014  zeros_num - n_rem;
1015  zeros_num -=
1016  r_zeros_number;
1017  }
1018 
1020  n_shift,
1021  tb->
1022  insert_size
1023  [0] -
1024  n_rem,
1025  r_body,
1026  r_zeros_number);
1027 
1028  if (is_indirect_le_ih
1030  (tb->R[0], 0))) {
1031 #if 0
1032  RFALSE(n_rem,
1033  "PAP-12160: paste more than one unformatted node pointer");
1034 #endif
1037  (tb->R[0], 0), 0);
1038  }
1039  tb->insert_size[0] = n_rem;
1040  if (!n_rem)
1041  pos_in_item++;
1042  }
1043  } else { /* pasted item in whole falls into R[0] */
1044 
1045  struct item_head *pasted;
1046 
1047  ret_val =
1048  leaf_shift_right(tb, tb->rnum[0],
1049  tb->rbytes);
1050  /* append item in R[0] */
1051  if (pos_in_item >= 0) {
1052  buffer_info_init_right(tb, &bi);
1054  item_pos -
1055  n +
1056  tb->
1057  rnum[0],
1058  pos_in_item,
1059  tb->
1060  insert_size
1061  [0], body,
1062  zeros_num);
1063  }
1064 
1065  /* paste new entry, if item is directory item */
1066  pasted =
1067  B_N_PITEM_HEAD(tb->R[0],
1068  item_pos - n +
1069  tb->rnum[0]);
1070  if (is_direntry_le_ih(pasted)
1071  && pos_in_item >= 0) {
1073  item_pos -
1074  n +
1075  tb->rnum[0],
1076  pos_in_item,
1077  1,
1078  (struct
1080  *)body,
1081  body +
1082  DEH_SIZE,
1083  tb->
1084  insert_size
1085  [0]
1086  );
1087  if (!pos_in_item) {
1088 
1089  RFALSE(item_pos - n +
1090  tb->rnum[0],
1091  "PAP-12165: directory item must be first item of node when pasting is in 0th position");
1092 
1093  /* update delimiting keys */
1094  replace_key(tb,
1095  tb->CFR[0],
1096  tb->rkey[0],
1097  tb->R[0],
1098  0);
1099  }
1100  }
1101 
1102  if (is_indirect_le_ih(pasted))
1103  set_ih_free_space(pasted, 0);
1104  zeros_num = tb->insert_size[0] = 0;
1105  }
1106  } else { /* new item doesn't fall into R[0] */
1107 
1108  leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
1109  }
1110  break;
1111  default: /* cases d and t */
1112  reiserfs_panic(tb->tb_sb, "PAP-12175",
1113  "rnum > 0: unexpected mode: %s(%d)",
1114  (flag ==
1115  M_DELETE) ? "DELETE" : ((flag ==
1116  M_CUT) ? "CUT"
1117  : "UNKNOWN"),
1118  flag);
1119  }
1120 
1121  }
1122 
1123  /* tb->rnum[0] > 0 */
1124  RFALSE(tb->blknum[0] > 3,
1125  "PAP-12180: blknum can not be %d. It must be <= 3",
1126  tb->blknum[0]);
1127  RFALSE(tb->blknum[0] < 0,
1128  "PAP-12185: blknum can not be %d. It must be >= 0",
1129  tb->blknum[0]);
1130 
1131  /* if while adding to a node we discover that it is possible to split
1132  it in two, and merge the left part into the left neighbor and the
1133  right part into the right neighbor, eliminating the node */
1134  if (tb->blknum[0] == 0) { /* node S[0] is empty now */
1135 
1136  RFALSE(!tb->lnum[0] || !tb->rnum[0],
1137  "PAP-12190: lnum and rnum must not be zero");
1138  /* if insertion was done before 0-th position in R[0], right
1139  delimiting key of the tb->L[0]'s and left delimiting key are
1140  not set correctly */
1141  if (tb->CFL[0]) {
1142  if (!tb->CFR[0])
1143  reiserfs_panic(tb->tb_sb, "vs-12195",
1144  "CFR not initialized");
1145  copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
1146  B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]));
1147  do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
1148  }
1149 
1150  reiserfs_invalidate_buffer(tb, tbS0);
1151  return 0;
1152  }
1153 
1154  /* Fill new nodes that appear in place of S[0] */
1155 
1156  /* I am told that this copying is because we need an array to enable
1157  the looping code. -Hans */
1158  snum[0] = tb->s1num, snum[1] = tb->s2num;
1159  sbytes[0] = tb->s1bytes;
1160  sbytes[1] = tb->s2bytes;
1161  for (i = tb->blknum[0] - 2; i >= 0; i--) {
1162 
1163  RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i,
1164  snum[i]);
1165 
1166  /* here we shift from S to S_new nodes */
1167 
1168  S_new[i] = get_FEB(tb);
1169 
1170  /* initialized block type and tree level */
1172 
1173  n = B_NR_ITEMS(tbS0);
1174 
1175  switch (flag) {
1176  case M_INSERT: /* insert item */
1177 
1178  if (n - snum[i] < item_pos) { /* new item or it's part falls to first new node S_new[i] */
1179  if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) { /* part of new item falls into S_new[i] */
1180  int old_key_comp, old_len,
1181  r_zeros_number;
1182  const char *r_body;
1183  int version;
1184 
1185  /* Move snum[i]-1 items from S[0] to S_new[i] */
1187  snum[i] - 1, -1,
1188  S_new[i]);
1189  /* Remember key component and item length */
1190  version = ih_version(ih);
1191  old_key_comp = le_ih_k_offset(ih);
1192  old_len = ih_item_len(ih);
1193 
1194  /* Calculate key component and item length to insert into S_new[i] */
1195  set_le_ih_k_offset(ih,
1196  le_ih_k_offset(ih) +
1197  ((old_len -
1198  sbytes[i]) <<
1199  (is_indirect_le_ih
1200  (ih) ? tb->tb_sb->
1201  s_blocksize_bits -
1202  UNFM_P_SHIFT :
1203  0)));
1204 
1205  put_ih_item_len(ih, sbytes[i]);
1206 
1207  /* Insert part of the item into S_new[i] before 0-th item */
1208  buffer_info_init_bh(tb, &bi, S_new[i]);
1209 
1210  if ((old_len - sbytes[i]) > zeros_num) {
1211  r_zeros_number = 0;
1212  r_body =
1213  body + (old_len -
1214  sbytes[i]) -
1215  zeros_num;
1216  } else {
1217  r_body = body;
1218  r_zeros_number =
1219  zeros_num - (old_len -
1220  sbytes[i]);
1221  zeros_num -= r_zeros_number;
1222  }
1223 
1224  leaf_insert_into_buf(&bi, 0, ih, r_body,
1225  r_zeros_number);
1226 
1227  /* Calculate key component and item length to insert into S[i] */
1228  set_le_ih_k_offset(ih, old_key_comp);
1229  put_ih_item_len(ih,
1230  old_len - sbytes[i]);
1231  tb->insert_size[0] -= sbytes[i];
1232  } else { /* whole new item falls into S_new[i] */
1233 
1234  /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
1236  snum[i] - 1, sbytes[i],
1237  S_new[i]);
1238 
1239  /* Insert new item into S_new[i] */
1240  buffer_info_init_bh(tb, &bi, S_new[i]);
1242  item_pos - n +
1243  snum[i] - 1, ih,
1244  body, zeros_num);
1245 
1246  zeros_num = tb->insert_size[0] = 0;
1247  }
1248  }
1249 
1250  else { /* new item or it part don't falls into S_new[i] */
1251 
1253  snum[i], sbytes[i], S_new[i]);
1254  }
1255  break;
1256 
1257  case M_PASTE: /* append item */
1258 
1259  if (n - snum[i] <= item_pos) { /* pasted item or part if it falls to S_new[i] */
1260  if (item_pos == n - snum[i] && sbytes[i] != -1) { /* we must shift part of the appended item */
1261  struct item_head *aux_ih;
1262 
1263  RFALSE(ih, "PAP-12210: ih must be 0");
1264 
1265  aux_ih = B_N_PITEM_HEAD(tbS0, item_pos);
1266  if (is_direntry_le_ih(aux_ih)) {
1267  /* we append to directory item */
1268 
1269  int entry_count;
1270 
1271  entry_count =
1272  ih_entry_count(aux_ih);
1273 
1274  if (entry_count - sbytes[i] <
1275  pos_in_item
1276  && pos_in_item <=
1277  entry_count) {
1278  /* new directory entry falls into S_new[i] */
1279 
1280  RFALSE(!tb->
1281  insert_size[0],
1282  "PAP-12215: insert_size is already 0");
1283  RFALSE(sbytes[i] - 1 >=
1284  entry_count,
1285  "PAP-12220: there are no so much entries (%d), only %d",
1286  sbytes[i] - 1,
1287  entry_count);
1288 
1289  /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
1292  tb, snum[i],
1293  sbytes[i] - 1,
1294  S_new[i]);
1295  /* Paste given directory entry to directory item */
1296  buffer_info_init_bh(tb, &bi, S_new[i]);
1298  (&bi, 0,
1299  pos_in_item -
1300  entry_count +
1301  sbytes[i] - 1,
1302  tb->insert_size[0],
1303  body, zeros_num);
1304  /* paste new directory entry */
1306  0,
1307  pos_in_item
1308  -
1309  entry_count
1310  +
1311  sbytes
1312  [i] -
1313  1, 1,
1314  (struct
1316  *)
1317  body,
1318  body
1319  +
1320  DEH_SIZE,
1321  tb->
1322  insert_size
1323  [0]
1324  );
1325  tb->insert_size[0] = 0;
1326  pos_in_item++;
1327  } else { /* new directory entry doesn't fall into S_new[i] */
1330  tb, snum[i],
1331  sbytes[i],
1332  S_new[i]);
1333  }
1334  } else { /* regular object */
1335 
1336  int n_shift, n_rem,
1337  r_zeros_number;
1338  const char *r_body;
1339 
1340  RFALSE(pos_in_item !=
1341  ih_item_len
1343  (tbS0, item_pos))
1344  || tb->insert_size[0] <=
1345  0,
1346  "PAP-12225: item too short or insert_size <= 0");
1347 
1348  /* Calculate number of bytes which must be shifted from appended item */
1349  n_shift =
1350  sbytes[i] -
1351  tb->insert_size[0];
1352  if (n_shift < 0)
1353  n_shift = 0;
1355  (LEAF_FROM_S_TO_SNEW, tb,
1356  snum[i], n_shift,
1357  S_new[i]);
1358 
1359  /* Calculate number of bytes which must remain in body after append to S_new[i] */
1360  n_rem =
1361  tb->insert_size[0] -
1362  sbytes[i];
1363  if (n_rem < 0)
1364  n_rem = 0;
1365  /* Append part of body into S_new[0] */
1366  buffer_info_init_bh(tb, &bi, S_new[i]);
1367  if (n_rem > zeros_num) {
1368  r_zeros_number = 0;
1369  r_body =
1370  body + n_rem -
1371  zeros_num;
1372  } else {
1373  r_body = body;
1374  r_zeros_number =
1375  zeros_num - n_rem;
1376  zeros_num -=
1377  r_zeros_number;
1378  }
1379 
1381  n_shift,
1382  tb->
1383  insert_size
1384  [0] -
1385  n_rem,
1386  r_body,
1387  r_zeros_number);
1388  {
1389  struct item_head *tmp;
1390 
1391  tmp =
1392  B_N_PITEM_HEAD(S_new
1393  [i],
1394  0);
1395  if (is_indirect_le_ih
1396  (tmp)) {
1398  (tmp, 0);
1399  set_le_ih_k_offset
1400  (tmp,
1401  le_ih_k_offset
1402  (tmp) +
1403  (n_rem <<
1404  (tb->
1405  tb_sb->
1406  s_blocksize_bits
1407  -
1408  UNFM_P_SHIFT)));
1409  } else {
1410  set_le_ih_k_offset
1411  (tmp,
1412  le_ih_k_offset
1413  (tmp) +
1414  n_rem);
1415  }
1416  }
1417 
1418  tb->insert_size[0] = n_rem;
1419  if (!n_rem)
1420  pos_in_item++;
1421  }
1422  } else
1423  /* item falls wholly into S_new[i] */
1424  {
1425  int leaf_mi;
1426  struct item_head *pasted;
1427 
1428 #ifdef CONFIG_REISERFS_CHECK
1429  struct item_head *ih_check =
1430  B_N_PITEM_HEAD(tbS0, item_pos);
1431 
1432  if (!is_direntry_le_ih(ih_check)
1433  && (pos_in_item != ih_item_len(ih_check)
1434  || tb->insert_size[0] <= 0))
1435  reiserfs_panic(tb->tb_sb,
1436  "PAP-12235",
1437  "pos_in_item "
1438  "must be equal "
1439  "to ih_item_len");
1440 #endif /* CONFIG_REISERFS_CHECK */
1441 
1442  leaf_mi =
1444  tb, snum[i],
1445  sbytes[i],
1446  S_new[i]);
1447 
1448  RFALSE(leaf_mi,
1449  "PAP-12240: unexpected value returned by leaf_move_items (%d)",
1450  leaf_mi);
1451 
1452  /* paste into item */
1453  buffer_info_init_bh(tb, &bi, S_new[i]);
1455  item_pos - n +
1456  snum[i],
1457  pos_in_item,
1458  tb->insert_size[0],
1459  body, zeros_num);
1460 
1461  pasted =
1462  B_N_PITEM_HEAD(S_new[i],
1463  item_pos - n +
1464  snum[i]);
1465  if (is_direntry_le_ih(pasted)) {
1467  item_pos -
1468  n + snum[i],
1469  pos_in_item,
1470  1,
1471  (struct
1473  *)body,
1474  body +
1475  DEH_SIZE,
1476  tb->
1477  insert_size
1478  [0]
1479  );
1480  }
1481 
1482  /* if we paste to indirect item update ih_free_space */
1483  if (is_indirect_le_ih(pasted))
1484  set_ih_free_space(pasted, 0);
1485  zeros_num = tb->insert_size[0] = 0;
1486  }
1487  }
1488 
1489  else { /* pasted item doesn't fall into S_new[i] */
1490 
1492  snum[i], sbytes[i], S_new[i]);
1493  }
1494  break;
1495  default: /* cases d and t */
1496  reiserfs_panic(tb->tb_sb, "PAP-12245",
1497  "blknum > 2: unexpected mode: %s(%d)",
1498  (flag ==
1499  M_DELETE) ? "DELETE" : ((flag ==
1500  M_CUT) ? "CUT"
1501  : "UNKNOWN"),
1502  flag);
1503  }
1504 
1505  memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE);
1506  insert_ptr[i] = S_new[i];
1507 
1508  RFALSE(!buffer_journaled(S_new[i])
1509  || buffer_journal_dirty(S_new[i])
1510  || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)",
1511  i, S_new[i]);
1512  }
1513 
1514  /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1515  affected item which remains in S */
1516  if (0 <= item_pos && item_pos < tb->s0num) { /* if we must insert or append into buffer S[0] */
1517 
1518  switch (flag) {
1519  case M_INSERT: /* insert item into S[0] */
1520  buffer_info_init_tbS0(tb, &bi);
1521  leaf_insert_into_buf(&bi, item_pos, ih, body,
1522  zeros_num);
1523 
1524  /* If we insert the first key change the delimiting key */
1525  if (item_pos == 0) {
1526  if (tb->CFL[0]) /* can be 0 in reiserfsck */
1527  replace_key(tb, tb->CFL[0], tb->lkey[0],
1528  tbS0, 0);
1529 
1530  }
1531  break;
1532 
1533  case M_PASTE:{ /* append item in S[0] */
1534  struct item_head *pasted;
1535 
1536  pasted = B_N_PITEM_HEAD(tbS0, item_pos);
1537  /* when directory, may be new entry already pasted */
1538  if (is_direntry_le_ih(pasted)) {
1539  if (pos_in_item >= 0 &&
1540  pos_in_item <=
1541  ih_entry_count(pasted)) {
1542 
1543  RFALSE(!tb->insert_size[0],
1544  "PAP-12260: insert_size is 0 already");
1545 
1546  /* prepare space */
1547  buffer_info_init_tbS0(tb, &bi);
1549  item_pos,
1550  pos_in_item,
1551  tb->
1552  insert_size
1553  [0], body,
1554  zeros_num);
1555 
1556  /* paste entry */
1558  item_pos,
1559  pos_in_item,
1560  1,
1561  (struct
1563  *)body,
1564  body +
1565  DEH_SIZE,
1566  tb->
1567  insert_size
1568  [0]
1569  );
1570  if (!item_pos && !pos_in_item) {
1571  RFALSE(!tb->CFL[0]
1572  || !tb->L[0],
1573  "PAP-12270: CFL[0]/L[0] must be specified");
1574  if (tb->CFL[0]) {
1575  replace_key(tb,
1576  tb->
1577  CFL
1578  [0],
1579  tb->
1580  lkey
1581  [0],
1582  tbS0,
1583  0);
1584 
1585  }
1586  }
1587  tb->insert_size[0] = 0;
1588  }
1589  } else { /* regular object */
1590  if (pos_in_item == ih_item_len(pasted)) {
1591 
1592  RFALSE(tb->insert_size[0] <= 0,
1593  "PAP-12275: insert size must not be %d",
1594  tb->insert_size[0]);
1595  buffer_info_init_tbS0(tb, &bi);
1597  item_pos,
1598  pos_in_item,
1599  tb->
1600  insert_size
1601  [0], body,
1602  zeros_num);
1603 
1604  if (is_indirect_le_ih(pasted)) {
1605 #if 0
1606  RFALSE(tb->
1607  insert_size[0] !=
1608  UNFM_P_SIZE,
1609  "PAP-12280: insert_size for indirect item must be %d, not %d",
1610  UNFM_P_SIZE,
1611  tb->
1612  insert_size[0]);
1613 #endif
1615  (pasted, 0);
1616  }
1617  tb->insert_size[0] = 0;
1618  }
1619 #ifdef CONFIG_REISERFS_CHECK
1620  else {
1621  if (tb->insert_size[0]) {
1622  print_cur_tb("12285");
1623  reiserfs_panic(tb->
1624  tb_sb,
1625  "PAP-12285",
1626  "insert_size "
1627  "must be 0 "
1628  "(%d)",
1629  tb->insert_size[0]);
1630  }
1631  }
1632 #endif /* CONFIG_REISERFS_CHECK */
1633 
1634  }
1635  } /* case M_PASTE: */
1636  }
1637  }
1638 #ifdef CONFIG_REISERFS_CHECK
1639  if (flag == M_PASTE && tb->insert_size[0]) {
1640  print_cur_tb("12290");
1641  reiserfs_panic(tb->tb_sb,
1642  "PAP-12290", "insert_size is still not 0 (%d)",
1643  tb->insert_size[0]);
1644  }
1645 #endif /* CONFIG_REISERFS_CHECK */
1646  return 0;
1647 } /* Leaf level of the tree is balanced (end of balance_leaf) */
1648 
1649 /* Make empty node */
1651 {
1652  struct block_head *blkh;
1653 
1654  RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
1655 
1656  blkh = B_BLK_HEAD(bi->bi_bh);
1657  set_blkh_nr_item(blkh, 0);
1659 
1660  if (bi->bi_parent)
1661  B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */
1662 }
1663 
1664 /* Get first empty buffer */
1665 struct buffer_head *get_FEB(struct tree_balance *tb)
1666 {
1667  int i;
1668  struct buffer_info bi;
1669 
1670  for (i = 0; i < MAX_FEB_SIZE; i++)
1671  if (tb->FEB[i] != NULL)
1672  break;
1673 
1674  if (i == MAX_FEB_SIZE)
1675  reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty");
1676 
1677  buffer_info_init_bh(tb, &bi, tb->FEB[i]);
1678  make_empty_node(&bi);
1679  set_buffer_uptodate(tb->FEB[i]);
1680  tb->used[i] = tb->FEB[i];
1681  tb->FEB[i] = NULL;
1682 
1683  return tb->used[i];
1684 }
1685 
1686 /* This is now used because reiserfs_free_block has to be able to
1687 ** schedule.
1688 */
1689 static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
1690 {
1691  int i;
1692 
1693  if (buffer_dirty(bh))
1694  reiserfs_warning(tb->tb_sb, "reiserfs-12320",
1695  "called with dirty buffer");
1696  for (i = 0; i < ARRAY_SIZE(tb->thrown); i++)
1697  if (!tb->thrown[i]) {
1698  tb->thrown[i] = bh;
1699  get_bh(bh); /* free_thrown puts this */
1700  return;
1701  }
1702  reiserfs_warning(tb->tb_sb, "reiserfs-12321",
1703  "too many thrown buffers");
1704 }
1705 
1706 static void free_thrown(struct tree_balance *tb)
1707 {
1708  int i;
1710  for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) {
1711  if (tb->thrown[i]) {
1712  blocknr = tb->thrown[i]->b_blocknr;
1713  if (buffer_dirty(tb->thrown[i]))
1714  reiserfs_warning(tb->tb_sb, "reiserfs-12322",
1715  "called with dirty buffer %d",
1716  blocknr);
1717  brelse(tb->thrown[i]); /* incremented in store_thrown */
1719  blocknr, 0);
1720  }
1721  }
1722 }
1723 
1724 void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh)
1725 {
1726  struct block_head *blkh;
1727  blkh = B_BLK_HEAD(bh);
1728  set_blkh_level(blkh, FREE_LEVEL);
1729  set_blkh_nr_item(blkh, 0);
1730 
1731  clear_buffer_dirty(bh);
1732  store_thrown(tb, bh);
1733 }
1734 
1735 /* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
1736 void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest,
1737  struct buffer_head *src, int n_src)
1738 {
1739 
1740  RFALSE(dest == NULL || src == NULL,
1741  "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1742  src, dest);
1743  RFALSE(!B_IS_KEYS_LEVEL(dest),
1744  "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1745  dest);
1746  RFALSE(n_dest < 0 || n_src < 0,
1747  "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
1748  RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
1749  "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1750  n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
1751 
1752  if (B_IS_ITEMS_LEVEL(src))
1753  /* source buffer contains leaf node */
1754  memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PITEM_HEAD(src, n_src),
1755  KEY_SIZE);
1756  else
1757  memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PDELIM_KEY(src, n_src),
1758  KEY_SIZE);
1759 
1760  do_balance_mark_internal_dirty(tb, dest, 0);
1761 }
1762 
1764 {
1765  int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1766 
1767  RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL,
1768  "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1769  h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h));
1770 
1771  if (Sh_position == 0)
1772  return B_NR_ITEMS(tb->FL[h]);
1773  else
1774  return Sh_position - 1;
1775 }
1776 
1778 {
1779  int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1780 
1781  RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL,
1782  "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1783  h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]);
1784 
1785  if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h)))
1786  return 0;
1787  else
1788  return Sh_position + 1;
1789 }
1790 
1791 #ifdef CONFIG_REISERFS_CHECK
1792 
1793 int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
1794 static void check_internal_node(struct super_block *s, struct buffer_head *bh,
1795  char *mes)
1796 {
1797  struct disk_child *dc;
1798  int i;
1799 
1800  RFALSE(!bh, "PAP-12336: bh == 0");
1801 
1802  if (!bh || !B_IS_IN_TREE(bh))
1803  return;
1804 
1805  RFALSE(!buffer_dirty(bh) &&
1806  !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1807  "PAP-12337: buffer (%b) must be dirty", bh);
1808  dc = B_N_CHILD(bh, 0);
1809 
1810  for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) {
1811  if (!is_reusable(s, dc_block_number(dc), 1)) {
1812  print_cur_tb(mes);
1813  reiserfs_panic(s, "PAP-12338",
1814  "invalid child pointer %y in %b",
1815  dc, bh);
1816  }
1817  }
1818 }
1819 
1820 static int locked_or_not_in_tree(struct tree_balance *tb,
1821  struct buffer_head *bh, char *which)
1822 {
1823  if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) ||
1824  !B_IS_IN_TREE(bh)) {
1825  reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh);
1826  return 1;
1827  }
1828  return 0;
1829 }
1830 
1831 static int check_before_balancing(struct tree_balance *tb)
1832 {
1833  int retval = 0;
1834 
1835  if (REISERFS_SB(tb->tb_sb)->cur_tb) {
1836  reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule "
1837  "occurred based on cur_tb not being null at "
1838  "this point in code. do_balance cannot properly "
1839  "handle concurrent tree accesses on a same "
1840  "mount point.");
1841  }
1842 
1843  /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
1844  prepped all of these for us). */
1845  if (tb->lnum[0]) {
1846  retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]");
1847  retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]");
1848  retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]");
1849  check_leaf(tb->L[0]);
1850  }
1851  if (tb->rnum[0]) {
1852  retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]");
1853  retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]");
1854  retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]");
1855  check_leaf(tb->R[0]);
1856  }
1857  retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path),
1858  "S[0]");
1860 
1861  return retval;
1862 }
1863 
1864 static void check_after_balance_leaf(struct tree_balance *tb)
1865 {
1866  if (tb->lnum[0]) {
1867  if (B_FREE_SPACE(tb->L[0]) !=
1868  MAX_CHILD_SIZE(tb->L[0]) -
1870  (tb->FL[0], get_left_neighbor_position(tb, 0)))) {
1871  print_cur_tb("12221");
1872  reiserfs_panic(tb->tb_sb, "PAP-12355",
1873  "shift to left was incorrect");
1874  }
1875  }
1876  if (tb->rnum[0]) {
1877  if (B_FREE_SPACE(tb->R[0]) !=
1878  MAX_CHILD_SIZE(tb->R[0]) -
1880  (tb->FR[0], get_right_neighbor_position(tb, 0)))) {
1881  print_cur_tb("12222");
1882  reiserfs_panic(tb->tb_sb, "PAP-12360",
1883  "shift to right was incorrect");
1884  }
1885  }
1886  if (PATH_H_PBUFFER(tb->tb_path, 1) &&
1887  (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) !=
1890  PATH_H_POSITION(tb->tb_path, 1)))))) {
1891  int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0));
1892  int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1895  1))));
1896  print_cur_tb("12223");
1897  reiserfs_warning(tb->tb_sb, "reiserfs-12363",
1898  "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1899  "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1900  left,
1902  PATH_H_PBUFFER(tb->tb_path, 1),
1903  PATH_H_POSITION(tb->tb_path, 1),
1905  (PATH_H_PBUFFER(tb->tb_path, 1),
1906  PATH_H_POSITION(tb->tb_path, 1))),
1907  right);
1908  reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect");
1909  }
1910 }
1911 
1912 static void check_leaf_level(struct tree_balance *tb)
1913 {
1914  check_leaf(tb->L[0]);
1915  check_leaf(tb->R[0]);
1917 }
1918 
1919 static void check_internal_levels(struct tree_balance *tb)
1920 {
1921  int h;
1922 
1923  /* check all internal nodes */
1924  for (h = 1; tb->insert_size[h]; h++) {
1925  check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h),
1926  "BAD BUFFER ON PATH");
1927  if (tb->lnum[h])
1928  check_internal_node(tb->tb_sb, tb->L[h], "BAD L");
1929  if (tb->rnum[h])
1930  check_internal_node(tb->tb_sb, tb->R[h], "BAD R");
1931  }
1932 
1933 }
1934 
1935 #endif
1936 
1937 /* Now we have all of the buffers that must be used in balancing of
1938  the tree. We rely on the assumption that schedule() will not occur
1939  while do_balance works. ( Only interrupt handlers are acceptable.)
1940  We balance the tree according to the analysis made before this,
1941  using buffers already obtained. For SMP support it will someday be
1942  necessary to add ordered locking of tb. */
1943 
1944 /* Some interesting rules of balancing:
1945 
1946  we delete a maximum of two nodes per level per balancing: we never
1947  delete R, when we delete two of three nodes L, S, R then we move
1948  them into R.
1949 
1950  we only delete L if we are deleting two nodes, if we delete only
1951  one node we delete S
1952 
1953  if we shift leaves then we shift as much as we can: this is a
1954  deliberate policy of extremism in node packing which results in
1955  higher average utilization after repeated random balance operations
1956  at the cost of more memory copies and more balancing as a result of
1957  small insertions to full nodes.
1958 
1959  if we shift internal nodes we try to evenly balance the node
1960  utilization, with consequent less balancing at the cost of lower
1961  utilization.
1962 
1963  one could argue that the policy for directories in leaves should be
1964  that of internal nodes, but we will wait until another day to
1965  evaluate this.... It would be nice to someday measure and prove
1966  these assumptions as to what is optimal....
1967 
1968 */
1969 
1970 static inline void do_balance_starts(struct tree_balance *tb)
1971 {
1972  /* use print_cur_tb() to see initial state of struct
1973  tree_balance */
1974 
1975  /* store_print_tb (tb); */
1976 
1977  /* do not delete, just comment it out */
1978 /* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
1979  "check");*/
1980  RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
1981 #ifdef CONFIG_REISERFS_CHECK
1982  REISERFS_SB(tb->tb_sb)->cur_tb = tb;
1983 #endif
1984 }
1985 
1986 static inline void do_balance_completed(struct tree_balance *tb)
1987 {
1988 
1989 #ifdef CONFIG_REISERFS_CHECK
1990  check_leaf_level(tb);
1991  check_internal_levels(tb);
1992  REISERFS_SB(tb->tb_sb)->cur_tb = NULL;
1993 #endif
1994 
1995  /* reiserfs_free_block is no longer schedule safe. So, we need to
1996  ** put the buffers we want freed on the thrown list during do_balance,
1997  ** and then free them now
1998  */
1999 
2000  REISERFS_SB(tb->tb_sb)->s_do_balance++;
2001 
2002  /* release all nodes hold to perform the balancing */
2003  unfix_nodes(tb);
2004 
2005  free_thrown(tb);
2006 }
2007 
2008 void do_balance(struct tree_balance *tb, /* tree_balance structure */
2009  struct item_head *ih, /* item header of inserted item */
2010  const char *body, /* body of inserted item or bytes to paste */
2011  int flag)
2012 { /* i - insert, d - delete
2013  c - cut, p - paste
2014 
2015  Cut means delete part of an item
2016  (includes removing an entry from a
2017  directory).
2018 
2019  Delete means delete whole item.
2020 
2021  Insert means add a new item into the
2022  tree.
2023 
2024  Paste means to append to the end of an
2025  existing file or to insert a directory
2026  entry. */
2027  int child_pos, /* position of a child node in its parent */
2028  h; /* level of the tree being processed */
2029  struct item_head insert_key[2]; /* in our processing of one level
2030  we sometimes determine what
2031  must be inserted into the next
2032  higher level. This insertion
2033  consists of a key or two keys
2034  and their corresponding
2035  pointers */
2036  struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next
2037  level */
2038 
2039  tb->tb_mode = flag;
2040  tb->need_balance_dirty = 0;
2041 
2042  if (FILESYSTEM_CHANGED_TB(tb)) {
2043  reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has "
2044  "changed");
2045  }
2046  /* if we have no real work to do */
2047  if (!tb->insert_size[0]) {
2048  reiserfs_warning(tb->tb_sb, "PAP-12350",
2049  "insert_size == 0, mode == %c", flag);
2050  unfix_nodes(tb);
2051  return;
2052  }
2053 
2054  atomic_inc(&(fs_generation(tb->tb_sb)));
2055  do_balance_starts(tb);
2056 
2057  /* balance leaf returns 0 except if combining L R and S into
2058  one node. see balance_internal() for explanation of this
2059  line of code. */
2060  child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) +
2061  balance_leaf(tb, ih, body, flag, insert_key, insert_ptr);
2062 
2063 #ifdef CONFIG_REISERFS_CHECK
2064  check_after_balance_leaf(tb);
2065 #endif
2066 
2067  /* Balance internal level of the tree. */
2068  for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++)
2069  child_pos =
2070  balance_internal(tb, h, child_pos, insert_key, insert_ptr);
2071 
2072  do_balance_completed(tb);
2073 
2074 }