Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
stree.c
Go to the documentation of this file.
1 /*
2  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3  */
4 
5 /*
6  * Written by Anatoly P. Pinchuk [email protected]
7  * Programm System Institute
8  * Pereslavl-Zalessky Russia
9  */
10 
11 /*
12  * This file contains functions dealing with S+tree
13  *
14  * B_IS_IN_TREE
15  * copy_item_head
16  * comp_short_keys
17  * comp_keys
18  * comp_short_le_keys
19  * le_key2cpu_key
20  * comp_le_keys
21  * bin_search
22  * get_lkey
23  * get_rkey
24  * key_in_buffer
25  * decrement_bcount
26  * reiserfs_check_path
27  * pathrelse_and_restore
28  * pathrelse
29  * search_by_key_reada
30  * search_by_key
31  * search_for_position_by_key
32  * comp_items
33  * prepare_for_direct_item
34  * prepare_for_direntry_item
35  * prepare_for_delete_or_cut
36  * calc_deleted_bytes_number
37  * init_tb_struct
38  * padd_item
39  * reiserfs_delete_item
40  * reiserfs_delete_solid_item
41  * reiserfs_delete_object
42  * maybe_indirect_to_direct
43  * indirect_to_direct_roll_back
44  * reiserfs_cut_from_item
45  * truncate_directory
46  * reiserfs_do_truncate
47  * reiserfs_paste_into_item
48  * reiserfs_insert_item
49  */
50 
51 #include <linux/time.h>
52 #include <linux/string.h>
53 #include <linux/pagemap.h>
54 #include "reiserfs.h"
55 #include <linux/buffer_head.h>
56 #include <linux/quotaops.h>
57 
58 /* Does the buffer contain a disk block which is in the tree. */
59 inline int B_IS_IN_TREE(const struct buffer_head *bh)
60 {
61 
63  "PAP-1010: block (%b) has too big level (%z)", bh, bh);
64 
65  return (B_LEVEL(bh) != FREE_LEVEL);
66 }
67 
68 //
69 // to gets item head in le form
70 //
71 inline void copy_item_head(struct item_head *to,
72  const struct item_head *from)
73 {
74  memcpy(to, from, IH_SIZE);
75 }
76 
77 /* k1 is pointer to on-disk structure which is stored in little-endian
78  form. k2 is pointer to cpu variable. For key of items of the same
79  object this returns 0.
80  Returns: -1 if key1 < key2
81  0 if key1 == key2
82  1 if key1 > key2 */
83 inline int comp_short_keys(const struct reiserfs_key *le_key,
84  const struct cpu_key *cpu_key)
85 {
86  __u32 n;
87  n = le32_to_cpu(le_key->k_dir_id);
88  if (n < cpu_key->on_disk_key.k_dir_id)
89  return -1;
90  if (n > cpu_key->on_disk_key.k_dir_id)
91  return 1;
92  n = le32_to_cpu(le_key->k_objectid);
93  if (n < cpu_key->on_disk_key.k_objectid)
94  return -1;
95  if (n > cpu_key->on_disk_key.k_objectid)
96  return 1;
97  return 0;
98 }
99 
100 /* k1 is pointer to on-disk structure which is stored in little-endian
101  form. k2 is pointer to cpu variable.
102  Compare keys using all 4 key fields.
103  Returns: -1 if key1 < key2 0
104  if key1 = key2 1 if key1 > key2 */
105 static inline int comp_keys(const struct reiserfs_key *le_key,
106  const struct cpu_key *cpu_key)
107 {
108  int retval;
109 
110  retval = comp_short_keys(le_key, cpu_key);
111  if (retval)
112  return retval;
113  if (le_key_k_offset(le_key_version(le_key), le_key) <
114  cpu_key_k_offset(cpu_key))
115  return -1;
116  if (le_key_k_offset(le_key_version(le_key), le_key) >
117  cpu_key_k_offset(cpu_key))
118  return 1;
119 
120  if (cpu_key->key_length == 3)
121  return 0;
122 
123  /* this part is needed only when tail conversion is in progress */
124  if (le_key_k_type(le_key_version(le_key), le_key) <
125  cpu_key_k_type(cpu_key))
126  return -1;
127 
128  if (le_key_k_type(le_key_version(le_key), le_key) >
129  cpu_key_k_type(cpu_key))
130  return 1;
131 
132  return 0;
133 }
134 
135 inline int comp_short_le_keys(const struct reiserfs_key *key1,
136  const struct reiserfs_key *key2)
137 {
138  __u32 *k1_u32, *k2_u32;
140 
141  k1_u32 = (__u32 *) key1;
142  k2_u32 = (__u32 *) key2;
143  for (; key_length--; ++k1_u32, ++k2_u32) {
144  if (le32_to_cpu(*k1_u32) < le32_to_cpu(*k2_u32))
145  return -1;
146  if (le32_to_cpu(*k1_u32) > le32_to_cpu(*k2_u32))
147  return 1;
148  }
149  return 0;
150 }
151 
152 inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from)
153 {
154  int version;
155  to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id);
156  to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid);
157 
158  // find out version of the key
159  version = le_key_version(from);
160  to->version = version;
161  to->on_disk_key.k_offset = le_key_k_offset(version, from);
162  to->on_disk_key.k_type = le_key_k_type(version, from);
163 }
164 
165 // this does not say which one is bigger, it only returns 1 if keys
166 // are not equal, 0 otherwise
167 inline int comp_le_keys(const struct reiserfs_key *k1,
168  const struct reiserfs_key *k2)
169 {
170  return memcmp(k1, k2, sizeof(struct reiserfs_key));
171 }
172 
173 /**************************************************************************
174  * Binary search toolkit function *
175  * Search for an item in the array by the item key *
176  * Returns: 1 if found, 0 if not found; *
177  * *pos = number of the searched element if found, else the *
178  * number of the first element that is larger than key. *
179  **************************************************************************/
180 /* For those not familiar with binary search: lbound is the leftmost item that it
181  could be, rbound the rightmost item that it could be. We examine the item
182  halfway between lbound and rbound, and that tells us either that we can increase
183  lbound, or decrease rbound, or that we have found it, or if lbound <= rbound that
184  there are no possible items, and we have not found it. With each examination we
185  cut the number of possible items it could be by one more than half rounded down,
186  or we find it. */
187 static inline int bin_search(const void *key, /* Key to search for. */
188  const void *base, /* First item in the array. */
189  int num, /* Number of items in the array. */
190  int width, /* Item size in the array.
191  searched. Lest the reader be
192  confused, note that this is crafted
193  as a general function, and when it
194  is applied specifically to the array
195  of item headers in a node, width
196  is actually the item header size not
197  the item size. */
198  int *pos /* Number of the searched for element. */
199  )
200 {
201  int rbound, lbound, j;
202 
203  for (j = ((rbound = num - 1) + (lbound = 0)) / 2;
204  lbound <= rbound; j = (rbound + lbound) / 2)
205  switch (comp_keys
206  ((struct reiserfs_key *)((char *)base + j * width),
207  (struct cpu_key *)key)) {
208  case -1:
209  lbound = j + 1;
210  continue;
211  case 1:
212  rbound = j - 1;
213  continue;
214  case 0:
215  *pos = j;
216  return ITEM_FOUND; /* Key found in the array. */
217  }
218 
219  /* bin_search did not find given key, it returns position of key,
220  that is minimal and greater than the given one. */
221  *pos = lbound;
222  return ITEM_NOT_FOUND;
223 }
224 
225 
226 /* Minimal possible key. It is never in the tree. */
227 const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
228 
229 /* Maximal possible key. It is never in the tree. */
230 static const struct reiserfs_key MAX_KEY = {
231  __constant_cpu_to_le32(0xffffffff),
232  __constant_cpu_to_le32(0xffffffff),
233  {{__constant_cpu_to_le32(0xffffffff),
234  __constant_cpu_to_le32(0xffffffff)},}
235 };
236 
237 /* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom
238  of the path, and going upwards. We must check the path's validity at each step. If the key is not in
239  the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this
240  case we return a special key, either MIN_KEY or MAX_KEY. */
241 static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path,
242  const struct super_block *sb)
243 {
244  int position, path_offset = chk_path->path_length;
245  struct buffer_head *parent;
246 
247  RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
248  "PAP-5010: invalid offset in the path");
249 
250  /* While not higher in path than first element. */
251  while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
252 
253  RFALSE(!buffer_uptodate
254  (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
255  "PAP-5020: parent is not uptodate");
256 
257  /* Parent at the path is not in the tree now. */
258  if (!B_IS_IN_TREE
259  (parent =
260  PATH_OFFSET_PBUFFER(chk_path, path_offset)))
261  return &MAX_KEY;
262  /* Check whether position in the parent is correct. */
263  if ((position =
264  PATH_OFFSET_POSITION(chk_path,
265  path_offset)) >
266  B_NR_ITEMS(parent))
267  return &MAX_KEY;
268  /* Check whether parent at the path really points to the child. */
269  if (B_N_CHILD_NUM(parent, position) !=
270  PATH_OFFSET_PBUFFER(chk_path,
271  path_offset + 1)->b_blocknr)
272  return &MAX_KEY;
273  /* Return delimiting key if position in the parent is not equal to zero. */
274  if (position)
275  return B_N_PDELIM_KEY(parent, position - 1);
276  }
277  /* Return MIN_KEY if we are in the root of the buffer tree. */
279  b_blocknr == SB_ROOT_BLOCK(sb))
280  return &MIN_KEY;
281  return &MAX_KEY;
282 }
283 
284 /* Get delimiting key of the buffer at the path and its right neighbor. */
285 inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
286  const struct super_block *sb)
287 {
288  int position, path_offset = chk_path->path_length;
289  struct buffer_head *parent;
290 
291  RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
292  "PAP-5030: invalid offset in the path");
293 
294  while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
295 
296  RFALSE(!buffer_uptodate
297  (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
298  "PAP-5040: parent is not uptodate");
299 
300  /* Parent at the path is not in the tree now. */
301  if (!B_IS_IN_TREE
302  (parent =
303  PATH_OFFSET_PBUFFER(chk_path, path_offset)))
304  return &MIN_KEY;
305  /* Check whether position in the parent is correct. */
306  if ((position =
307  PATH_OFFSET_POSITION(chk_path,
308  path_offset)) >
309  B_NR_ITEMS(parent))
310  return &MIN_KEY;
311  /* Check whether parent at the path really points to the child. */
312  if (B_N_CHILD_NUM(parent, position) !=
313  PATH_OFFSET_PBUFFER(chk_path,
314  path_offset + 1)->b_blocknr)
315  return &MIN_KEY;
316  /* Return delimiting key if position in the parent is not the last one. */
317  if (position != B_NR_ITEMS(parent))
318  return B_N_PDELIM_KEY(parent, position);
319  }
320  /* Return MAX_KEY if we are in the root of the buffer tree. */
322  b_blocknr == SB_ROOT_BLOCK(sb))
323  return &MAX_KEY;
324  return &MIN_KEY;
325 }
326 
327 /* Check whether a key is contained in the tree rooted from a buffer at a path. */
328 /* This works by looking at the left and right delimiting keys for the buffer in the last path_element in
329  the path. These delimiting keys are stored at least one level above that buffer in the tree. If the
330  buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in
331  this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */
332 static inline int key_in_buffer(struct treepath *chk_path, /* Path which should be checked. */
333  const struct cpu_key *key, /* Key which should be checked. */
334  struct super_block *sb
335  )
336 {
337 
338  RFALSE(!key || chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET
339  || chk_path->path_length > MAX_HEIGHT,
340  "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
341  key, chk_path->path_length);
342  RFALSE(!PATH_PLAST_BUFFER(chk_path)->b_bdev,
343  "PAP-5060: device must not be NODEV");
344 
345  if (comp_keys(get_lkey(chk_path, sb), key) == 1)
346  /* left delimiting key is bigger, that the key we look for */
347  return 0;
348  /* if ( comp_keys(key, get_rkey(chk_path, sb)) != -1 ) */
349  if (comp_keys(get_rkey(chk_path, sb), key) != 1)
350  /* key must be less than right delimitiing key */
351  return 0;
352  return 1;
353 }
354 
356 {
358  "path not properly relsed");
359  return 0;
360 }
361 
362 /* Drop the reference to each buffer in a path and restore
363  * dirty bits clean when preparing the buffer for the log.
364  * This version should only be called from fix_nodes() */
366  struct treepath *search_path)
367 {
368  int path_offset = search_path->path_length;
369 
370  RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
371  "clm-4000: invalid path offset");
372 
373  while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
374  struct buffer_head *bh;
375  bh = PATH_OFFSET_PBUFFER(search_path, path_offset--);
377  brelse(bh);
378  }
380 }
381 
382 /* Drop the reference to each buffer in a path */
384 {
385  int path_offset = search_path->path_length;
386 
387  RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
388  "PAP-5090: invalid path offset");
389 
390  while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET)
391  brelse(PATH_OFFSET_PBUFFER(search_path, path_offset--));
392 
394 }
395 
396 static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
397 {
398  struct block_head *blkh;
399  struct item_head *ih;
400  int used_space;
401  int prev_location;
402  int i;
403  int nr;
404 
405  blkh = (struct block_head *)buf;
406  if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
407  reiserfs_warning(NULL, "reiserfs-5080",
408  "this should be caught earlier");
409  return 0;
410  }
411 
412  nr = blkh_nr_item(blkh);
413  if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
414  /* item number is too big or too small */
415  reiserfs_warning(NULL, "reiserfs-5081",
416  "nr_item seems wrong: %z", bh);
417  return 0;
418  }
419  ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
420  used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
421  if (used_space != blocksize - blkh_free_space(blkh)) {
422  /* free space does not match to calculated amount of use space */
423  reiserfs_warning(NULL, "reiserfs-5082",
424  "free space seems wrong: %z", bh);
425  return 0;
426  }
427  // FIXME: it is_leaf will hit performance too much - we may have
428  // return 1 here
429 
430  /* check tables of item heads */
431  ih = (struct item_head *)(buf + BLKH_SIZE);
432  prev_location = blocksize;
433  for (i = 0; i < nr; i++, ih++) {
434  if (le_ih_k_type(ih) == TYPE_ANY) {
435  reiserfs_warning(NULL, "reiserfs-5083",
436  "wrong item type for item %h",
437  ih);
438  return 0;
439  }
440  if (ih_location(ih) >= blocksize
441  || ih_location(ih) < IH_SIZE * nr) {
442  reiserfs_warning(NULL, "reiserfs-5084",
443  "item location seems wrong: %h",
444  ih);
445  return 0;
446  }
447  if (ih_item_len(ih) < 1
448  || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
449  reiserfs_warning(NULL, "reiserfs-5085",
450  "item length seems wrong: %h",
451  ih);
452  return 0;
453  }
454  if (prev_location - ih_location(ih) != ih_item_len(ih)) {
455  reiserfs_warning(NULL, "reiserfs-5086",
456  "item location seems wrong "
457  "(second one): %h", ih);
458  return 0;
459  }
460  prev_location = ih_location(ih);
461  }
462 
463  // one may imagine much more checks
464  return 1;
465 }
466 
467 /* returns 1 if buf looks like an internal node, 0 otherwise */
468 static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
469 {
470  struct block_head *blkh;
471  int nr;
472  int used_space;
473 
474  blkh = (struct block_head *)buf;
475  nr = blkh_level(blkh);
476  if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
477  /* this level is not possible for internal nodes */
478  reiserfs_warning(NULL, "reiserfs-5087",
479  "this should be caught earlier");
480  return 0;
481  }
482 
483  nr = blkh_nr_item(blkh);
484  if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
485  /* for internal which is not root we might check min number of keys */
486  reiserfs_warning(NULL, "reiserfs-5088",
487  "number of key seems wrong: %z", bh);
488  return 0;
489  }
490 
491  used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
492  if (used_space != blocksize - blkh_free_space(blkh)) {
493  reiserfs_warning(NULL, "reiserfs-5089",
494  "free space seems wrong: %z", bh);
495  return 0;
496  }
497  // one may imagine much more checks
498  return 1;
499 }
500 
501 // make sure that bh contains formatted node of reiserfs tree of
502 // 'level'-th level
503 static int is_tree_node(struct buffer_head *bh, int level)
504 {
505  if (B_LEVEL(bh) != level) {
506  reiserfs_warning(NULL, "reiserfs-5090", "node level %d does "
507  "not match to the expected one %d",
508  B_LEVEL(bh), level);
509  return 0;
510  }
511  if (level == DISK_LEAF_NODE_LEVEL)
512  return is_leaf(bh->b_data, bh->b_size, bh);
513 
514  return is_internal(bh->b_data, bh->b_size, bh);
515 }
516 
517 #define SEARCH_BY_KEY_READA 16
518 
519 /*
520  * The function is NOT SCHEDULE-SAFE!
521  * It might unlock the write lock if we needed to wait for a block
522  * to be read. Note that in this case it won't recover the lock to avoid
523  * high contention resulting from too much lock requests, especially
524  * the caller (search_by_key) will perform other schedule-unsafe
525  * operations just after calling this function.
526  *
527  * @return true if we have unlocked
528  */
529 static bool search_by_key_reada(struct super_block *s,
530  struct buffer_head **bh,
531  b_blocknr_t *b, int num)
532 {
533  int i, j;
534  bool unlocked = false;
535 
536  for (i = 0; i < num; i++) {
537  bh[i] = sb_getblk(s, b[i]);
538  }
539  /*
540  * We are going to read some blocks on which we
541  * have a reference. It's safe, though we might be
542  * reading blocks concurrently changed if we release
543  * the lock. But it's still fine because we check later
544  * if the tree changed
545  */
546  for (j = 0; j < i; j++) {
547  /*
548  * note, this needs attention if we are getting rid of the BKL
549  * you have to make sure the prepared bit isn't set on this buffer
550  */
551  if (!buffer_uptodate(bh[j])) {
552  if (!unlocked) {
554  unlocked = true;
555  }
556  ll_rw_block(READA, 1, bh + j);
557  }
558  brelse(bh[j]);
559  }
560  return unlocked;
561 }
562 
563 /**************************************************************************
564  * Algorithm SearchByKey *
565  * look for item in the Disk S+Tree by its key *
566  * Input: sb - super block *
567  * key - pointer to the key to search *
568  * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR *
569  * search_path - path from the root to the needed leaf *
570  **************************************************************************/
571 
572 /* This function fills up the path from the root to the leaf as it
573  descends the tree looking for the key. It uses reiserfs_bread to
574  try to find buffers in the cache given their block number. If it
575  does not find them in the cache it reads them from disk. For each
576  node search_by_key finds using reiserfs_bread it then uses
577  bin_search to look through that node. bin_search will find the
578  position of the block_number of the next node if it is looking
579  through an internal node. If it is looking through a leaf node
580  bin_search will find the position of the item which has key either
581  equal to given key, or which is the maximal key less than the given
582  key. search_by_key returns a path that must be checked for the
583  correctness of the top of the path but need not be checked for the
584  correctness of the bottom of the path */
585 /* The function is NOT SCHEDULE-SAFE! */
586 int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to search. */
587  struct treepath *search_path,/* This structure was
588  allocated and initialized
589  by the calling
590  function. It is filled up
591  by this function. */
592  int stop_level /* How far down the tree to search. To
593  stop at leaf level - set to
594  DISK_LEAF_NODE_LEVEL */
595  )
596 {
598  int expected_level;
599  struct buffer_head *bh;
600  struct path_element *last_element;
601  int node_level, retval;
602  int right_neighbor_of_leaf_node;
603  int fs_gen;
604  struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
605  b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA];
606  int reada_count = 0;
607 
608 #ifdef CONFIG_REISERFS_CHECK
609  int repeat_counter = 0;
610 #endif
611 
613 
614  /* As we add each node to a path we increase its count. This means that
615  we must be careful to release all nodes in a path before we either
616  discard the path struct or re-use the path struct, as we do here. */
617 
618  pathrelse(search_path);
619 
620  right_neighbor_of_leaf_node = 0;
621 
622  /* With each iteration of this loop we search through the items in the
623  current node, and calculate the next current node(next path element)
624  for the next iteration of this loop.. */
625  block_number = SB_ROOT_BLOCK(sb);
626  expected_level = -1;
627  while (1) {
628 
629 #ifdef CONFIG_REISERFS_CHECK
630  if (!(++repeat_counter % 50000))
631  reiserfs_warning(sb, "PAP-5100",
632  "%s: there were %d iterations of "
633  "while loop looking for key %K",
634  current->comm, repeat_counter,
635  key);
636 #endif
637 
638  /* prep path to have another element added to it. */
639  last_element =
640  PATH_OFFSET_PELEMENT(search_path,
641  ++search_path->path_length);
642  fs_gen = get_generation(sb);
643 
644  /* Read the next tree node, and set the last element in the path to
645  have a pointer to it. */
646  if ((bh = last_element->pe_buffer =
647  sb_getblk(sb, block_number))) {
648  bool unlocked = false;
649 
650  if (!buffer_uptodate(bh) && reada_count > 1)
651  /* may unlock the write lock */
652  unlocked = search_by_key_reada(sb, reada_bh,
653  reada_blocks, reada_count);
654  /*
655  * If we haven't already unlocked the write lock,
656  * then we need to do that here before reading
657  * the current block
658  */
659  if (!buffer_uptodate(bh) && !unlocked) {
661  unlocked = true;
662  }
663  ll_rw_block(READ, 1, &bh);
664  wait_on_buffer(bh);
665 
666  if (unlocked)
668  if (!buffer_uptodate(bh))
669  goto io_error;
670  } else {
671  io_error:
672  search_path->path_length--;
673  pathrelse(search_path);
674  return IO_ERROR;
675  }
676  reada_count = 0;
677  if (expected_level == -1)
678  expected_level = SB_TREE_HEIGHT(sb);
679  expected_level--;
680 
681  /* It is possible that schedule occurred. We must check whether the key
682  to search is still in the tree rooted from the current buffer. If
683  not then repeat search from the root. */
684  if (fs_changed(fs_gen, sb) &&
685  (!B_IS_IN_TREE(bh) ||
686  B_LEVEL(bh) != expected_level ||
687  !key_in_buffer(search_path, key, sb))) {
688  PROC_INFO_INC(sb, search_by_key_fs_changed);
689  PROC_INFO_INC(sb, search_by_key_restarted);
690  PROC_INFO_INC(sb,
691  sbk_restarted[expected_level - 1]);
692  pathrelse(search_path);
693 
694  /* Get the root block number so that we can repeat the search
695  starting from the root. */
696  block_number = SB_ROOT_BLOCK(sb);
697  expected_level = -1;
698  right_neighbor_of_leaf_node = 0;
699 
700  /* repeat search from the root */
701  continue;
702  }
703 
704  /* only check that the key is in the buffer if key is not
705  equal to the MAX_KEY. Latter case is only possible in
706  "finish_unfinished()" processing during mount. */
707  RFALSE(comp_keys(&MAX_KEY, key) &&
708  !key_in_buffer(search_path, key, sb),
709  "PAP-5130: key is not in the buffer");
710 #ifdef CONFIG_REISERFS_CHECK
711  if (REISERFS_SB(sb)->cur_tb) {
712  print_cur_tb("5140");
713  reiserfs_panic(sb, "PAP-5140",
714  "schedule occurred in do_balance!");
715  }
716 #endif
717 
718  // make sure, that the node contents look like a node of
719  // certain level
720  if (!is_tree_node(bh, expected_level)) {
721  reiserfs_error(sb, "vs-5150",
722  "invalid format found in block %ld. "
723  "Fsck?", bh->b_blocknr);
724  pathrelse(search_path);
725  return IO_ERROR;
726  }
727 
728  /* ok, we have acquired next formatted node in the tree */
729  node_level = B_LEVEL(bh);
730 
731  PROC_INFO_BH_STAT(sb, bh, node_level - 1);
732 
733  RFALSE(node_level < stop_level,
734  "vs-5152: tree level (%d) is less than stop level (%d)",
735  node_level, stop_level);
736 
737  retval = bin_search(key, B_N_PITEM_HEAD(bh, 0),
738  B_NR_ITEMS(bh),
739  (node_level ==
741  KEY_SIZE,
742  &(last_element->pe_position));
743  if (node_level == stop_level) {
744  return retval;
745  }
746 
747  /* we are not in the stop level */
748  if (retval == ITEM_FOUND)
749  /* item has been found, so we choose the pointer which is to the right of the found one */
750  last_element->pe_position++;
751 
752  /* if item was not found we choose the position which is to
753  the left of the found item. This requires no code,
754  bin_search did it already. */
755 
756  /* So we have chosen a position in the current node which is
757  an internal node. Now we calculate child block number by
758  position in the node. */
759  block_number =
760  B_N_CHILD_NUM(bh, last_element->pe_position);
761 
762  /* if we are going to read leaf nodes, try for read ahead as well */
763  if ((search_path->reada & PATH_READA) &&
764  node_level == DISK_LEAF_NODE_LEVEL + 1) {
765  int pos = last_element->pe_position;
766  int limit = B_NR_ITEMS(bh);
767  struct reiserfs_key *le_key;
768 
769  if (search_path->reada & PATH_READA_BACK)
770  limit = 0;
771  while (reada_count < SEARCH_BY_KEY_READA) {
772  if (pos == limit)
773  break;
774  reada_blocks[reada_count++] =
775  B_N_CHILD_NUM(bh, pos);
776  if (search_path->reada & PATH_READA_BACK)
777  pos--;
778  else
779  pos++;
780 
781  /*
782  * check to make sure we're in the same object
783  */
784  le_key = B_N_PDELIM_KEY(bh, pos);
785  if (le32_to_cpu(le_key->k_objectid) !=
786  key->on_disk_key.k_objectid) {
787  break;
788  }
789  }
790  }
791  }
792 }
793 
794 /* Form the path to an item and position in this item which contains
795  file byte defined by key. If there is no such item
796  corresponding to the key, we point the path to the item with
797  maximal key less than key, and *pos_in_item is set to one
798  past the last entry/byte in the item. If searching for entry in a
799  directory item, and it is not found, *pos_in_item is set to one
800  entry more than the entry with maximal key which is less than the
801  sought key.
802 
803  Note that if there is no entry in this same node which is one more,
804  then we point to an imaginary entry. for direct items, the
805  position is in units of bytes, for indirect items the position is
806  in units of blocknr entries, for directory items the position is in
807  units of directory entries. */
808 
809 /* The function is NOT SCHEDULE-SAFE! */
810 int search_for_position_by_key(struct super_block *sb, /* Pointer to the super block. */
811  const struct cpu_key *p_cpu_key, /* Key to search (cpu variable) */
812  struct treepath *search_path /* Filled up by this function. */
813  )
814 {
815  struct item_head *p_le_ih; /* pointer to on-disk structure */
816  int blk_size;
817  loff_t item_offset, offset;
818  struct reiserfs_dir_entry de;
819  int retval;
820 
821  /* If searching for directory entry. */
822  if (is_direntry_cpu_key(p_cpu_key))
823  return search_by_entry_key(sb, p_cpu_key, search_path,
824  &de);
825 
826  /* If not searching for directory entry. */
827 
828  /* If item is found. */
829  retval = search_item(sb, p_cpu_key, search_path);
830  if (retval == IO_ERROR)
831  return retval;
832  if (retval == ITEM_FOUND) {
833 
836  (PATH_PLAST_BUFFER(search_path),
837  PATH_LAST_POSITION(search_path))),
838  "PAP-5165: item length equals zero");
839 
840  pos_in_item(search_path) = 0;
841  return POSITION_FOUND;
842  }
843 
844  RFALSE(!PATH_LAST_POSITION(search_path),
845  "PAP-5170: position equals zero");
846 
847  /* Item is not found. Set path to the previous item. */
848  p_le_ih =
849  B_N_PITEM_HEAD(PATH_PLAST_BUFFER(search_path),
850  --PATH_LAST_POSITION(search_path));
851  blk_size = sb->s_blocksize;
852 
853  if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) {
854  return FILE_NOT_FOUND;
855  }
856  // FIXME: quite ugly this far
857 
858  item_offset = le_ih_k_offset(p_le_ih);
859  offset = cpu_key_k_offset(p_cpu_key);
860 
861  /* Needed byte is contained in the item pointed to by the path. */
862  if (item_offset <= offset &&
863  item_offset + op_bytes_number(p_le_ih, blk_size) > offset) {
864  pos_in_item(search_path) = offset - item_offset;
865  if (is_indirect_le_ih(p_le_ih)) {
866  pos_in_item(search_path) /= blk_size;
867  }
868  return POSITION_FOUND;
869  }
870 
871  /* Needed byte is not contained in the item pointed to by the
872  path. Set pos_in_item out of the item. */
873  if (is_indirect_le_ih(p_le_ih))
874  pos_in_item(search_path) =
875  ih_item_len(p_le_ih) / UNFM_P_SIZE;
876  else
877  pos_in_item(search_path) = ih_item_len(p_le_ih);
878 
879  return POSITION_NOT_FOUND;
880 }
881 
882 /* Compare given item and item pointed to by the path. */
883 int comp_items(const struct item_head *stored_ih, const struct treepath *path)
884 {
885  struct buffer_head *bh = PATH_PLAST_BUFFER(path);
886  struct item_head *ih;
887 
888  /* Last buffer at the path is not in the tree. */
889  if (!B_IS_IN_TREE(bh))
890  return 1;
891 
892  /* Last path position is invalid. */
893  if (PATH_LAST_POSITION(path) >= B_NR_ITEMS(bh))
894  return 1;
895 
896  /* we need only to know, whether it is the same item */
897  ih = get_ih(path);
898  return memcmp(stored_ih, ih, IH_SIZE);
899 }
900 
901 /* unformatted nodes are not logged anymore, ever. This is safe
902 ** now
903 */
904 #define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1)
905 
906 // block can not be forgotten as it is in I/O or held by someone
907 #define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))
908 
909 // prepare for delete or cut of direct item
910 static inline int prepare_for_direct_item(struct treepath *path,
911  struct item_head *le_ih,
912  struct inode *inode,
913  loff_t new_file_length, int *cut_size)
914 {
915  loff_t round_len;
916 
917  if (new_file_length == max_reiserfs_offset(inode)) {
918  /* item has to be deleted */
919  *cut_size = -(IH_SIZE + ih_item_len(le_ih));
920  return M_DELETE;
921  }
922  // new file gets truncated
924  //
925  round_len = ROUND_UP(new_file_length);
926  /* this was new_file_length < le_ih ... */
927  if (round_len < le_ih_k_offset(le_ih)) {
928  *cut_size = -(IH_SIZE + ih_item_len(le_ih));
929  return M_DELETE; /* Delete this item. */
930  }
931  /* Calculate first position and size for cutting from item. */
932  pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1);
933  *cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
934 
935  return M_CUT; /* Cut from this item. */
936  }
937 
938  // old file: items may have any length
939 
940  if (new_file_length < le_ih_k_offset(le_ih)) {
941  *cut_size = -(IH_SIZE + ih_item_len(le_ih));
942  return M_DELETE; /* Delete this item. */
943  }
944  /* Calculate first position and size for cutting from item. */
945  *cut_size = -(ih_item_len(le_ih) -
946  (pos_in_item(path) =
947  new_file_length + 1 - le_ih_k_offset(le_ih)));
948  return M_CUT; /* Cut from this item. */
949 }
950 
951 static inline int prepare_for_direntry_item(struct treepath *path,
952  struct item_head *le_ih,
953  struct inode *inode,
954  loff_t new_file_length,
955  int *cut_size)
956 {
957  if (le_ih_k_offset(le_ih) == DOT_OFFSET &&
958  new_file_length == max_reiserfs_offset(inode)) {
959  RFALSE(ih_entry_count(le_ih) != 2,
960  "PAP-5220: incorrect empty directory item (%h)", le_ih);
961  *cut_size = -(IH_SIZE + ih_item_len(le_ih));
962  return M_DELETE; /* Delete the directory item containing "." and ".." entry. */
963  }
964 
965  if (ih_entry_count(le_ih) == 1) {
966  /* Delete the directory item such as there is one record only
967  in this item */
968  *cut_size = -(IH_SIZE + ih_item_len(le_ih));
969  return M_DELETE;
970  }
971 
972  /* Cut one record from the directory item. */
973  *cut_size =
974  -(DEH_SIZE +
975  entry_length(get_last_bh(path), le_ih, pos_in_item(path)));
976  return M_CUT;
977 }
978 
979 #define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1)
980 
981 /* If the path points to a directory or direct item, calculate mode and the size cut, for balance.
982  If the path points to an indirect item, remove some number of its unformatted nodes.
983  In case of file truncate calculate whether this item must be deleted/truncated or last
984  unformatted node of this item will be converted to a direct item.
985  This function returns a determination of what balance mode the calling function should employ. */
986 static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct treepath *path, const struct cpu_key *item_key, int *removed, /* Number of unformatted nodes which were removed
987  from end of the file. */
988  int *cut_size, unsigned long long new_file_length /* MAX_KEY_OFFSET in case of delete. */
989  )
990 {
991  struct super_block *sb = inode->i_sb;
992  struct item_head *p_le_ih = PATH_PITEM_HEAD(path);
993  struct buffer_head *bh = PATH_PLAST_BUFFER(path);
994 
995  BUG_ON(!th->t_trans_id);
996 
997  /* Stat_data item. */
998  if (is_statdata_le_ih(p_le_ih)) {
999 
1000  RFALSE(new_file_length != max_reiserfs_offset(inode),
1001  "PAP-5210: mode must be M_DELETE");
1002 
1003  *cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
1004  return M_DELETE;
1005  }
1006 
1007  /* Directory item. */
1008  if (is_direntry_le_ih(p_le_ih))
1009  return prepare_for_direntry_item(path, p_le_ih, inode,
1010  new_file_length,
1011  cut_size);
1012 
1013  /* Direct item. */
1014  if (is_direct_le_ih(p_le_ih))
1015  return prepare_for_direct_item(path, p_le_ih, inode,
1016  new_file_length, cut_size);
1017 
1018  /* Case of an indirect item. */
1019  {
1020  int blk_size = sb->s_blocksize;
1021  struct item_head s_ih;
1022  int need_re_search;
1023  int delete = 0;
1024  int result = M_CUT;
1025  int pos = 0;
1026 
1027  if ( new_file_length == max_reiserfs_offset (inode) ) {
1028  /* prepare_for_delete_or_cut() is called by
1029  * reiserfs_delete_item() */
1030  new_file_length = 0;
1031  delete = 1;
1032  }
1033 
1034  do {
1035  need_re_search = 0;
1036  *cut_size = 0;
1037  bh = PATH_PLAST_BUFFER(path);
1038  copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
1039  pos = I_UNFM_NUM(&s_ih);
1040 
1041  while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) {
1042  __le32 *unfm;
1043  __u32 block;
1044 
1045  /* Each unformatted block deletion may involve one additional
1046  * bitmap block into the transaction, thereby the initial
1047  * journal space reservation might not be enough. */
1048  if (!delete && (*cut_size) != 0 &&
1049  reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD)
1050  break;
1051 
1052  unfm = (__le32 *)B_I_PITEM(bh, &s_ih) + pos - 1;
1053  block = get_block_num(unfm, 0);
1054 
1055  if (block != 0) {
1056  reiserfs_prepare_for_journal(sb, bh, 1);
1057  put_block_num(unfm, 0, 0);
1058  journal_mark_dirty(th, sb, bh);
1059  reiserfs_free_block(th, inode, block, 1);
1060  }
1061 
1063  cond_resched();
1064  reiserfs_write_lock(sb);
1065 
1066  if (item_moved (&s_ih, path)) {
1067  need_re_search = 1;
1068  break;
1069  }
1070 
1071  pos --;
1072  (*removed)++;
1073  (*cut_size) -= UNFM_P_SIZE;
1074 
1075  if (pos == 0) {
1076  (*cut_size) -= IH_SIZE;
1077  result = M_DELETE;
1078  break;
1079  }
1080  }
1081  /* a trick. If the buffer has been logged, this will do nothing. If
1082  ** we've broken the loop without logging it, it will restore the
1083  ** buffer */
1085  } while (need_re_search &&
1086  search_for_position_by_key(sb, item_key, path) == POSITION_FOUND);
1087  pos_in_item(path) = pos * UNFM_P_SIZE;
1088 
1089  if (*cut_size == 0) {
1090  /* Nothing were cut. maybe convert last unformatted node to the
1091  * direct item? */
1092  result = M_CONVERT;
1093  }
1094  return result;
1095  }
1096 }
1097 
1098 /* Calculate number of bytes which will be deleted or cut during balance */
1099 static int calc_deleted_bytes_number(struct tree_balance *tb, char mode)
1100 {
1101  int del_size;
1102  struct item_head *p_le_ih = PATH_PITEM_HEAD(tb->tb_path);
1103 
1104  if (is_statdata_le_ih(p_le_ih))
1105  return 0;
1106 
1107  del_size =
1108  (mode ==
1109  M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0];
1110  if (is_direntry_le_ih(p_le_ih)) {
1111  /* return EMPTY_DIR_SIZE; We delete emty directoris only.
1112  * we can't use EMPTY_DIR_SIZE, as old format dirs have a different
1113  * empty size. ick. FIXME, is this right? */
1114  return del_size;
1115  }
1116 
1117  if (is_indirect_le_ih(p_le_ih))
1118  del_size = (del_size / UNFM_P_SIZE) *
1119  (PATH_PLAST_BUFFER(tb->tb_path)->b_size);
1120  return del_size;
1121 }
1122 
1123 static void init_tb_struct(struct reiserfs_transaction_handle *th,
1124  struct tree_balance *tb,
1125  struct super_block *sb,
1126  struct treepath *path, int size)
1127 {
1128 
1129  BUG_ON(!th->t_trans_id);
1130 
1131  memset(tb, '\0', sizeof(struct tree_balance));
1132  tb->transaction_handle = th;
1133  tb->tb_sb = sb;
1134  tb->tb_path = path;
1137  tb->insert_size[0] = size;
1138 }
1139 
1140 void padd_item(char *item, int total_length, int length)
1141 {
1142  int i;
1143 
1144  for (i = total_length; i > length;)
1145  item[--i] = 0;
1146 }
1147 
1148 #ifdef REISERQUOTA_DEBUG
1149 char key2type(struct reiserfs_key *ih)
1150 {
1151  if (is_direntry_le_key(2, ih))
1152  return 'd';
1153  if (is_direct_le_key(2, ih))
1154  return 'D';
1155  if (is_indirect_le_key(2, ih))
1156  return 'i';
1157  if (is_statdata_le_key(2, ih))
1158  return 's';
1159  return 'u';
1160 }
1161 
1162 char head2type(struct item_head *ih)
1163 {
1164  if (is_direntry_le_ih(ih))
1165  return 'd';
1166  if (is_direct_le_ih(ih))
1167  return 'D';
1168  if (is_indirect_le_ih(ih))
1169  return 'i';
1170  if (is_statdata_le_ih(ih))
1171  return 's';
1172  return 'u';
1173 }
1174 #endif
1175 
1176 /* Delete object item.
1177  * th - active transaction handle
1178  * path - path to the deleted item
1179  * item_key - key to search for the deleted item
1180  * indode - used for updating i_blocks and quotas
1181  * un_bh - NULL or unformatted node pointer
1182  */
1184  struct treepath *path, const struct cpu_key *item_key,
1185  struct inode *inode, struct buffer_head *un_bh)
1186 {
1187  struct super_block *sb = inode->i_sb;
1188  struct tree_balance s_del_balance;
1189  struct item_head s_ih;
1190  struct item_head *q_ih;
1191  int quota_cut_bytes;
1192  int ret_value, del_size, removed;
1193 
1194 #ifdef CONFIG_REISERFS_CHECK
1195  char mode;
1196  int iter = 0;
1197 #endif
1198 
1199  BUG_ON(!th->t_trans_id);
1200 
1201  init_tb_struct(th, &s_del_balance, sb, path,
1202  0 /*size is unknown */ );
1203 
1204  while (1) {
1205  removed = 0;
1206 
1207 #ifdef CONFIG_REISERFS_CHECK
1208  iter++;
1209  mode =
1210 #endif
1211  prepare_for_delete_or_cut(th, inode, path,
1212  item_key, &removed,
1213  &del_size,
1214  max_reiserfs_offset(inode));
1215 
1216  RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
1217 
1218  copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
1219  s_del_balance.insert_size[0] = del_size;
1220 
1221  ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
1222  if (ret_value != REPEAT_SEARCH)
1223  break;
1224 
1225  PROC_INFO_INC(sb, delete_item_restarted);
1226 
1227  // file system changed, repeat search
1228  ret_value =
1229  search_for_position_by_key(sb, item_key, path);
1230  if (ret_value == IO_ERROR)
1231  break;
1232  if (ret_value == FILE_NOT_FOUND) {
1233  reiserfs_warning(sb, "vs-5340",
1234  "no items of the file %K found",
1235  item_key);
1236  break;
1237  }
1238  } /* while (1) */
1239 
1240  if (ret_value != CARRY_ON) {
1241  unfix_nodes(&s_del_balance);
1242  return 0;
1243  }
1244  // reiserfs_delete_item returns item length when success
1245  ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
1246  q_ih = get_ih(path);
1247  quota_cut_bytes = ih_item_len(q_ih);
1248 
1249  /* hack so the quota code doesn't have to guess if the file
1250  ** has a tail. On tail insert, we allocate quota for 1 unformatted node.
1251  ** We test the offset because the tail might have been
1252  ** split into multiple items, and we only want to decrement for
1253  ** the unfm node once
1254  */
1255  if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) {
1256  if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) {
1257  quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
1258  } else {
1259  quota_cut_bytes = 0;
1260  }
1261  }
1262 
1263  if (un_bh) {
1264  int off;
1265  char *data;
1266 
1267  /* We are in direct2indirect conversion, so move tail contents
1268  to the unformatted node */
1269  /* note, we do the copy before preparing the buffer because we
1270  ** don't care about the contents of the unformatted node yet.
1271  ** the only thing we really care about is the direct item's data
1272  ** is in the unformatted node.
1273  **
1274  ** Otherwise, we would have to call reiserfs_prepare_for_journal on
1275  ** the unformatted node, which might schedule, meaning we'd have to
1276  ** loop all the way back up to the start of the while loop.
1277  **
1278  ** The unformatted node must be dirtied later on. We can't be
1279  ** sure here if the entire tail has been deleted yet.
1280  **
1281  ** un_bh is from the page cache (all unformatted nodes are
1282  ** from the page cache) and might be a highmem page. So, we
1283  ** can't use un_bh->b_data.
1284  ** -clm
1285  */
1286 
1287  data = kmap_atomic(un_bh->b_page);
1288  off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
1289  memcpy(data + off,
1290  B_I_PITEM(PATH_PLAST_BUFFER(path), &s_ih),
1291  ret_value);
1292  kunmap_atomic(data);
1293  }
1294  /* Perform balancing after all resources have been collected at once. */
1295  do_balance(&s_del_balance, NULL, NULL, M_DELETE);
1296 
1297 #ifdef REISERQUOTA_DEBUG
1299  "reiserquota delete_item(): freeing %u, id=%u type=%c",
1300  quota_cut_bytes, inode->i_uid, head2type(&s_ih));
1301 #endif
1302  dquot_free_space_nodirty(inode, quota_cut_bytes);
1303 
1304  /* Return deleted body length */
1305  return ret_value;
1306 }
1307 
1308 /* Summary Of Mechanisms For Handling Collisions Between Processes:
1309 
1310  deletion of the body of the object is performed by iput(), with the
1311  result that if multiple processes are operating on a file, the
1312  deletion of the body of the file is deferred until the last process
1313  that has an open inode performs its iput().
1314 
1315  writes and truncates are protected from collisions by use of
1316  semaphores.
1317 
1318  creates, linking, and mknod are protected from collisions with other
1319  processes by making the reiserfs_add_entry() the last step in the
1320  creation, and then rolling back all changes if there was a collision.
1321  - Hans
1322 */
1323 
1324 /* this deletes item which never gets split */
1326  struct inode *inode, struct reiserfs_key *key)
1327 {
1328  struct tree_balance tb;
1329  INITIALIZE_PATH(path);
1330  int item_len = 0;
1331  int tb_init = 0;
1332  struct cpu_key cpu_key;
1333  int retval;
1334  int quota_cut_bytes = 0;
1335 
1336  BUG_ON(!th->t_trans_id);
1337 
1338  le_key2cpu_key(&cpu_key, key);
1339 
1340  while (1) {
1341  retval = search_item(th->t_super, &cpu_key, &path);
1342  if (retval == IO_ERROR) {
1343  reiserfs_error(th->t_super, "vs-5350",
1344  "i/o failure occurred trying "
1345  "to delete %K", &cpu_key);
1346  break;
1347  }
1348  if (retval != ITEM_FOUND) {
1349  pathrelse(&path);
1350  // No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir
1351  if (!
1352  ((unsigned long long)
1353  GET_HASH_VALUE(le_key_k_offset
1354  (le_key_version(key), key)) == 0
1355  && (unsigned long long)
1356  GET_GENERATION_NUMBER(le_key_k_offset
1357  (le_key_version(key),
1358  key)) == 1))
1359  reiserfs_warning(th->t_super, "vs-5355",
1360  "%k not found", key);
1361  break;
1362  }
1363  if (!tb_init) {
1364  tb_init = 1;
1365  item_len = ih_item_len(PATH_PITEM_HEAD(&path));
1366  init_tb_struct(th, &tb, th->t_super, &path,
1367  -(IH_SIZE + item_len));
1368  }
1369  quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path));
1370 
1371  retval = fix_nodes(M_DELETE, &tb, NULL, NULL);
1372  if (retval == REPEAT_SEARCH) {
1373  PROC_INFO_INC(th->t_super, delete_solid_item_restarted);
1374  continue;
1375  }
1376 
1377  if (retval == CARRY_ON) {
1378  do_balance(&tb, NULL, NULL, M_DELETE);
1379  if (inode) { /* Should we count quota for item? (we don't count quotas for save-links) */
1380 #ifdef REISERQUOTA_DEBUG
1382  "reiserquota delete_solid_item(): freeing %u id=%u type=%c",
1383  quota_cut_bytes, inode->i_uid,
1384  key2type(key));
1385 #endif
1386  dquot_free_space_nodirty(inode,
1387  quota_cut_bytes);
1388  }
1389  break;
1390  }
1391  // IO_ERROR, NO_DISK_SPACE, etc
1392  reiserfs_warning(th->t_super, "vs-5360",
1393  "could not delete %K due to fix_nodes failure",
1394  &cpu_key);
1395  unfix_nodes(&tb);
1396  break;
1397  }
1398 
1399  reiserfs_check_path(&path);
1400 }
1401 
1403  struct inode *inode)
1404 {
1405  int err;
1406  inode->i_size = 0;
1407  BUG_ON(!th->t_trans_id);
1408 
1409  /* for directory this deletes item containing "." and ".." */
1410  err =
1411  reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ );
1412  if (err)
1413  return err;
1414 
1415 #if defined( USE_INODE_GENERATION_COUNTER )
1416  if (!old_format_only(th->t_super)) {
1417  __le32 *inode_generation;
1418 
1419  inode_generation =
1420  &REISERFS_SB(th->t_super)->s_rs->s_inode_generation;
1421  le32_add_cpu(inode_generation, 1);
1422  }
1423 /* USE_INODE_GENERATION_COUNTER */
1424 #endif
1425  reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1426 
1427  return err;
1428 }
1429 
1430 static void unmap_buffers(struct page *page, loff_t pos)
1431 {
1432  struct buffer_head *bh;
1433  struct buffer_head *head;
1434  struct buffer_head *next;
1435  unsigned long tail_index;
1436  unsigned long cur_index;
1437 
1438  if (page) {
1439  if (page_has_buffers(page)) {
1440  tail_index = pos & (PAGE_CACHE_SIZE - 1);
1441  cur_index = 0;
1442  head = page_buffers(page);
1443  bh = head;
1444  do {
1445  next = bh->b_this_page;
1446 
1447  /* we want to unmap the buffers that contain the tail, and
1448  ** all the buffers after it (since the tail must be at the
1449  ** end of the file). We don't want to unmap file data
1450  ** before the tail, since it might be dirty and waiting to
1451  ** reach disk
1452  */
1453  cur_index += bh->b_size;
1454  if (cur_index > tail_index) {
1456  }
1457  bh = next;
1458  } while (bh != head);
1459  }
1460  }
1461 }
1462 
1463 static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
1464  struct inode *inode,
1465  struct page *page,
1466  struct treepath *path,
1467  const struct cpu_key *item_key,
1468  loff_t new_file_size, char *mode)
1469 {
1470  struct super_block *sb = inode->i_sb;
1471  int block_size = sb->s_blocksize;
1472  int cut_bytes;
1473  BUG_ON(!th->t_trans_id);
1474  BUG_ON(new_file_size != inode->i_size);
1475 
1476  /* the page being sent in could be NULL if there was an i/o error
1477  ** reading in the last block. The user will hit problems trying to
1478  ** read the file, but for now we just skip the indirect2direct
1479  */
1480  if (atomic_read(&inode->i_count) > 1 ||
1481  !tail_has_to_be_packed(inode) ||
1482  !page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) {
1483  /* leave tail in an unformatted node */
1484  *mode = M_SKIP_BALANCING;
1485  cut_bytes =
1486  block_size - (new_file_size & (block_size - 1));
1487  pathrelse(path);
1488  return cut_bytes;
1489  }
1490  /* Perform the conversion to a direct_item. */
1491  /* return indirect_to_direct(inode, path, item_key,
1492  new_file_size, mode); */
1493  return indirect2direct(th, inode, page, path, item_key,
1494  new_file_size, mode);
1495 }
1496 
1497 /* we did indirect_to_direct conversion. And we have inserted direct
1498  item successesfully, but there were no disk space to cut unfm
1499  pointer being converted. Therefore we have to delete inserted
1500  direct item(s) */
1501 static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
1502  struct inode *inode, struct treepath *path)
1503 {
1504  struct cpu_key tail_key;
1505  int tail_len;
1506  int removed;
1507  BUG_ON(!th->t_trans_id);
1508 
1509  make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4); // !!!!
1510  tail_key.key_length = 4;
1511 
1512  tail_len =
1513  (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
1514  while (tail_len) {
1515  /* look for the last byte of the tail */
1516  if (search_for_position_by_key(inode->i_sb, &tail_key, path) ==
1518  reiserfs_panic(inode->i_sb, "vs-5615",
1519  "found invalid item");
1520  RFALSE(path->pos_in_item !=
1521  ih_item_len(PATH_PITEM_HEAD(path)) - 1,
1522  "vs-5616: appended bytes found");
1523  PATH_LAST_POSITION(path)--;
1524 
1525  removed =
1526  reiserfs_delete_item(th, path, &tail_key, inode,
1527  NULL /*unbh not needed */ );
1528  RFALSE(removed <= 0
1529  || removed > tail_len,
1530  "vs-5617: there was tail %d bytes, removed item length %d bytes",
1531  tail_len, removed);
1532  tail_len -= removed;
1533  set_cpu_key_k_offset(&tail_key,
1534  cpu_key_k_offset(&tail_key) - removed);
1535  }
1536  reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct "
1537  "conversion has been rolled back due to "
1538  "lack of disk space");
1539  //mark_file_without_tail (inode);
1540  mark_inode_dirty(inode);
1541 }
1542 
1543 /* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
1545  struct treepath *path,
1546  struct cpu_key *item_key,
1547  struct inode *inode,
1548  struct page *page, loff_t new_file_size)
1549 {
1550  struct super_block *sb = inode->i_sb;
1551  /* Every function which is going to call do_balance must first
1552  create a tree_balance structure. Then it must fill up this
1553  structure by using the init_tb_struct and fix_nodes functions.
1554  After that we can make tree balancing. */
1555  struct tree_balance s_cut_balance;
1556  struct item_head *p_le_ih;
1557  int cut_size = 0, /* Amount to be cut. */
1558  ret_value = CARRY_ON, removed = 0, /* Number of the removed unformatted nodes. */
1559  is_inode_locked = 0;
1560  char mode; /* Mode of the balance. */
1561  int retval2 = -1;
1562  int quota_cut_bytes;
1563  loff_t tail_pos = 0;
1564 
1565  BUG_ON(!th->t_trans_id);
1566 
1567  init_tb_struct(th, &s_cut_balance, inode->i_sb, path,
1568  cut_size);
1569 
1570  /* Repeat this loop until we either cut the item without needing
1571  to balance, or we fix_nodes without schedule occurring */
1572  while (1) {
1573  /* Determine the balance mode, position of the first byte to
1574  be cut, and size to be cut. In case of the indirect item
1575  free unformatted nodes which are pointed to by the cut
1576  pointers. */
1577 
1578  mode =
1579  prepare_for_delete_or_cut(th, inode, path,
1580  item_key, &removed,
1581  &cut_size, new_file_size);
1582  if (mode == M_CONVERT) {
1583  /* convert last unformatted node to direct item or leave
1584  tail in the unformatted node */
1585  RFALSE(ret_value != CARRY_ON,
1586  "PAP-5570: can not convert twice");
1587 
1588  ret_value =
1589  maybe_indirect_to_direct(th, inode, page,
1590  path, item_key,
1591  new_file_size, &mode);
1592  if (mode == M_SKIP_BALANCING)
1593  /* tail has been left in the unformatted node */
1594  return ret_value;
1595 
1596  is_inode_locked = 1;
1597 
1598  /* removing of last unformatted node will change value we
1599  have to return to truncate. Save it */
1600  retval2 = ret_value;
1601  /*retval2 = sb->s_blocksize - (new_file_size & (sb->s_blocksize - 1)); */
1602 
1603  /* So, we have performed the first part of the conversion:
1604  inserting the new direct item. Now we are removing the
1605  last unformatted node pointer. Set key to search for
1606  it. */
1607  set_cpu_key_k_type(item_key, TYPE_INDIRECT);
1608  item_key->key_length = 4;
1609  new_file_size -=
1610  (new_file_size & (sb->s_blocksize - 1));
1611  tail_pos = new_file_size;
1612  set_cpu_key_k_offset(item_key, new_file_size + 1);
1614  (sb, item_key,
1615  path) == POSITION_NOT_FOUND) {
1616  print_block(PATH_PLAST_BUFFER(path), 3,
1617  PATH_LAST_POSITION(path) - 1,
1618  PATH_LAST_POSITION(path) + 1);
1619  reiserfs_panic(sb, "PAP-5580", "item to "
1620  "convert does not exist (%K)",
1621  item_key);
1622  }
1623  continue;
1624  }
1625  if (cut_size == 0) {
1626  pathrelse(path);
1627  return 0;
1628  }
1629 
1630  s_cut_balance.insert_size[0] = cut_size;
1631 
1632  ret_value = fix_nodes(mode, &s_cut_balance, NULL, NULL);
1633  if (ret_value != REPEAT_SEARCH)
1634  break;
1635 
1636  PROC_INFO_INC(sb, cut_from_item_restarted);
1637 
1638  ret_value =
1639  search_for_position_by_key(sb, item_key, path);
1640  if (ret_value == POSITION_FOUND)
1641  continue;
1642 
1643  reiserfs_warning(sb, "PAP-5610", "item %K not found",
1644  item_key);
1645  unfix_nodes(&s_cut_balance);
1646  return (ret_value == IO_ERROR) ? -EIO : -ENOENT;
1647  } /* while */
1648 
1649  // check fix_nodes results (IO_ERROR or NO_DISK_SPACE)
1650  if (ret_value != CARRY_ON) {
1651  if (is_inode_locked) {
1652  // FIXME: this seems to be not needed: we are always able
1653  // to cut item
1654  indirect_to_direct_roll_back(th, inode, path);
1655  }
1656  if (ret_value == NO_DISK_SPACE)
1657  reiserfs_warning(sb, "reiserfs-5092",
1658  "NO_DISK_SPACE");
1659  unfix_nodes(&s_cut_balance);
1660  return -EIO;
1661  }
1662 
1663  /* go ahead and perform balancing */
1664 
1665  RFALSE(mode == M_PASTE || mode == M_INSERT, "invalid mode");
1666 
1667  /* Calculate number of bytes that need to be cut from the item. */
1668  quota_cut_bytes =
1669  (mode ==
1670  M_DELETE) ? ih_item_len(get_ih(path)) : -s_cut_balance.
1671  insert_size[0];
1672  if (retval2 == -1)
1673  ret_value = calc_deleted_bytes_number(&s_cut_balance, mode);
1674  else
1675  ret_value = retval2;
1676 
1677  /* For direct items, we only change the quota when deleting the last
1678  ** item.
1679  */
1680  p_le_ih = PATH_PITEM_HEAD(s_cut_balance.tb_path);
1681  if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) {
1682  if (mode == M_DELETE &&
1683  (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) ==
1684  1) {
1685  // FIXME: this is to keep 3.5 happy
1686  REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
1687  quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
1688  } else {
1689  quota_cut_bytes = 0;
1690  }
1691  }
1692 #ifdef CONFIG_REISERFS_CHECK
1693  if (is_inode_locked) {
1694  struct item_head *le_ih =
1695  PATH_PITEM_HEAD(s_cut_balance.tb_path);
1696  /* we are going to complete indirect2direct conversion. Make
1697  sure, that we exactly remove last unformatted node pointer
1698  of the item */
1699  if (!is_indirect_le_ih(le_ih))
1700  reiserfs_panic(sb, "vs-5652",
1701  "item must be indirect %h", le_ih);
1702 
1703  if (mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
1704  reiserfs_panic(sb, "vs-5653", "completing "
1705  "indirect2direct conversion indirect "
1706  "item %h being deleted must be of "
1707  "4 byte long", le_ih);
1708 
1709  if (mode == M_CUT
1710  && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
1711  reiserfs_panic(sb, "vs-5654", "can not complete "
1712  "indirect2direct conversion of %h "
1713  "(CUT, insert_size==%d)",
1714  le_ih, s_cut_balance.insert_size[0]);
1715  }
1716  /* it would be useful to make sure, that right neighboring
1717  item is direct item of this file */
1718  }
1719 #endif
1720 
1721  do_balance(&s_cut_balance, NULL, NULL, mode);
1722  if (is_inode_locked) {
1723  /* we've done an indirect->direct conversion. when the data block
1724  ** was freed, it was removed from the list of blocks that must
1725  ** be flushed before the transaction commits, make sure to
1726  ** unmap and invalidate it
1727  */
1728  unmap_buffers(page, tail_pos);
1729  REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
1730  }
1731 #ifdef REISERQUOTA_DEBUG
1733  "reiserquota cut_from_item(): freeing %u id=%u type=%c",
1734  quota_cut_bytes, inode->i_uid, '?');
1735 #endif
1736  dquot_free_space_nodirty(inode, quota_cut_bytes);
1737  return ret_value;
1738 }
1739 
1740 static void truncate_directory(struct reiserfs_transaction_handle *th,
1741  struct inode *inode)
1742 {
1743  BUG_ON(!th->t_trans_id);
1744  if (inode->i_nlink)
1745  reiserfs_error(inode->i_sb, "vs-5655", "link count != 0");
1746 
1747  set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET);
1748  set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY);
1749  reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1750  reiserfs_update_sd(th, inode);
1751  set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET);
1752  set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA);
1753 }
1754 
1755 /* Truncate file to the new size. Note, this must be called with a transaction
1756  already started */
1758  struct inode *inode, /* ->i_size contains new size */
1759  struct page *page, /* up to date for last block */
1760  int update_timestamps /* when it is called by
1761  file_release to convert
1762  the tail - no timestamps
1763  should be updated */
1764  )
1765 {
1766  INITIALIZE_PATH(s_search_path); /* Path to the current object item. */
1767  struct item_head *p_le_ih; /* Pointer to an item header. */
1768  struct cpu_key s_item_key; /* Key to search for a previous file item. */
1769  loff_t file_size, /* Old file size. */
1770  new_file_size; /* New file size. */
1771  int deleted; /* Number of deleted or truncated bytes. */
1772  int retval;
1773  int err = 0;
1774 
1775  BUG_ON(!th->t_trans_id);
1776  if (!
1777  (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
1778  || S_ISLNK(inode->i_mode)))
1779  return 0;
1780 
1781  if (S_ISDIR(inode->i_mode)) {
1782  // deletion of directory - no need to update timestamps
1783  truncate_directory(th, inode);
1784  return 0;
1785  }
1786 
1787  /* Get new file size. */
1788  new_file_size = inode->i_size;
1789 
1790  // FIXME: note, that key type is unimportant here
1791  make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode),
1792  TYPE_DIRECT, 3);
1793 
1794  retval =
1795  search_for_position_by_key(inode->i_sb, &s_item_key,
1796  &s_search_path);
1797  if (retval == IO_ERROR) {
1798  reiserfs_error(inode->i_sb, "vs-5657",
1799  "i/o failure occurred trying to truncate %K",
1800  &s_item_key);
1801  err = -EIO;
1802  goto out;
1803  }
1804  if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
1805  reiserfs_error(inode->i_sb, "PAP-5660",
1806  "wrong result %d of search for %K", retval,
1807  &s_item_key);
1808 
1809  err = -EIO;
1810  goto out;
1811  }
1812 
1813  s_search_path.pos_in_item--;
1814 
1815  /* Get real file size (total length of all file items) */
1816  p_le_ih = PATH_PITEM_HEAD(&s_search_path);
1817  if (is_statdata_le_ih(p_le_ih))
1818  file_size = 0;
1819  else {
1820  loff_t offset = le_ih_k_offset(p_le_ih);
1821  int bytes =
1822  op_bytes_number(p_le_ih, inode->i_sb->s_blocksize);
1823 
1824  /* this may mismatch with real file size: if last direct item
1825  had no padding zeros and last unformatted node had no free
1826  space, this file would have this file size */
1827  file_size = offset + bytes - 1;
1828  }
1829  /*
1830  * are we doing a full truncate or delete, if so
1831  * kick in the reada code
1832  */
1833  if (new_file_size == 0)
1834  s_search_path.reada = PATH_READA | PATH_READA_BACK;
1835 
1836  if (file_size == 0 || file_size < new_file_size) {
1837  goto update_and_out;
1838  }
1839 
1840  /* Update key to search for the last file item. */
1841  set_cpu_key_k_offset(&s_item_key, file_size);
1842 
1843  do {
1844  /* Cut or delete file item. */
1845  deleted =
1846  reiserfs_cut_from_item(th, &s_search_path, &s_item_key,
1847  inode, page, new_file_size);
1848  if (deleted < 0) {
1849  reiserfs_warning(inode->i_sb, "vs-5665",
1850  "reiserfs_cut_from_item failed");
1851  reiserfs_check_path(&s_search_path);
1852  return 0;
1853  }
1854 
1855  RFALSE(deleted > file_size,
1856  "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
1857  deleted, file_size, &s_item_key);
1858 
1859  /* Change key to search the last file item. */
1860  file_size -= deleted;
1861 
1862  set_cpu_key_k_offset(&s_item_key, file_size);
1863 
1864  /* While there are bytes to truncate and previous file item is presented in the tree. */
1865 
1866  /*
1867  ** This loop could take a really long time, and could log
1868  ** many more blocks than a transaction can hold. So, we do a polite
1869  ** journal end here, and if the transaction needs ending, we make
1870  ** sure the file is consistent before ending the current trans
1871  ** and starting a new one
1872  */
1873  if (journal_transaction_should_end(th, 0) ||
1874  reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
1875  int orig_len_alloc = th->t_blocks_allocated;
1876  pathrelse(&s_search_path);
1877 
1878  if (update_timestamps) {
1879  inode->i_mtime = CURRENT_TIME_SEC;
1880  inode->i_ctime = CURRENT_TIME_SEC;
1881  }
1882  reiserfs_update_sd(th, inode);
1883 
1884  err = journal_end(th, inode->i_sb, orig_len_alloc);
1885  if (err)
1886  goto out;
1887  err = journal_begin(th, inode->i_sb,
1889  if (err)
1890  goto out;
1892  }
1893  } while (file_size > ROUND_UP(new_file_size) &&
1894  search_for_position_by_key(inode->i_sb, &s_item_key,
1895  &s_search_path) == POSITION_FOUND);
1896 
1897  RFALSE(file_size > ROUND_UP(new_file_size),
1898  "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
1899  new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
1900 
1901  update_and_out:
1902  if (update_timestamps) {
1903  // this is truncate, not file closing
1904  inode->i_mtime = CURRENT_TIME_SEC;
1905  inode->i_ctime = CURRENT_TIME_SEC;
1906  }
1907  reiserfs_update_sd(th, inode);
1908 
1909  out:
1910  pathrelse(&s_search_path);
1911  return err;
1912 }
1913 
1914 #ifdef CONFIG_REISERFS_CHECK
1915 // this makes sure, that we __append__, not overwrite or add holes
1916 static void check_research_for_paste(struct treepath *path,
1917  const struct cpu_key *key)
1918 {
1919  struct item_head *found_ih = get_ih(path);
1920 
1921  if (is_direct_le_ih(found_ih)) {
1922  if (le_ih_k_offset(found_ih) +
1923  op_bytes_number(found_ih,
1924  get_last_bh(path)->b_size) !=
1925  cpu_key_k_offset(key)
1926  || op_bytes_number(found_ih,
1927  get_last_bh(path)->b_size) !=
1928  pos_in_item(path))
1929  reiserfs_panic(NULL, "PAP-5720", "found direct item "
1930  "%h or position (%d) does not match "
1931  "to key %K", found_ih,
1932  pos_in_item(path), key);
1933  }
1934  if (is_indirect_le_ih(found_ih)) {
1935  if (le_ih_k_offset(found_ih) +
1936  op_bytes_number(found_ih,
1937  get_last_bh(path)->b_size) !=
1938  cpu_key_k_offset(key)
1939  || I_UNFM_NUM(found_ih) != pos_in_item(path)
1940  || get_ih_free_space(found_ih) != 0)
1941  reiserfs_panic(NULL, "PAP-5730", "found indirect "
1942  "item (%h) or position (%d) does not "
1943  "match to key (%K)",
1944  found_ih, pos_in_item(path), key);
1945  }
1946 }
1947 #endif /* config reiserfs check */
1948 
1949 /* Paste bytes to the existing item. Returns bytes number pasted into the item. */
1950 int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct treepath *search_path, /* Path to the pasted item. */
1951  const struct cpu_key *key, /* Key to search for the needed item. */
1952  struct inode *inode, /* Inode item belongs to */
1953  const char *body, /* Pointer to the bytes to paste. */
1954  int pasted_size)
1955 { /* Size of pasted bytes. */
1956  struct tree_balance s_paste_balance;
1957  int retval;
1958  int fs_gen;
1959 
1960  BUG_ON(!th->t_trans_id);
1961 
1962  fs_gen = get_generation(inode->i_sb);
1963 
1964 #ifdef REISERQUOTA_DEBUG
1966  "reiserquota paste_into_item(): allocating %u id=%u type=%c",
1967  pasted_size, inode->i_uid,
1968  key2type(&(key->on_disk_key)));
1969 #endif
1970 
1971  reiserfs_write_unlock(inode->i_sb);
1972  retval = dquot_alloc_space_nodirty(inode, pasted_size);
1973  reiserfs_write_lock(inode->i_sb);
1974  if (retval) {
1975  pathrelse(search_path);
1976  return retval;
1977  }
1978  init_tb_struct(th, &s_paste_balance, th->t_super, search_path,
1979  pasted_size);
1980 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
1981  s_paste_balance.key = key->on_disk_key;
1982 #endif
1983 
1984  /* DQUOT_* can schedule, must check before the fix_nodes */
1985  if (fs_changed(fs_gen, inode->i_sb)) {
1986  goto search_again;
1987  }
1988 
1989  while ((retval =
1990  fix_nodes(M_PASTE, &s_paste_balance, NULL,
1991  body)) == REPEAT_SEARCH) {
1992  search_again:
1993  /* file system changed while we were in the fix_nodes */
1994  PROC_INFO_INC(th->t_super, paste_into_item_restarted);
1995  retval =
1997  search_path);
1998  if (retval == IO_ERROR) {
1999  retval = -EIO;
2000  goto error_out;
2001  }
2002  if (retval == POSITION_FOUND) {
2003  reiserfs_warning(inode->i_sb, "PAP-5710",
2004  "entry or pasted byte (%K) exists",
2005  key);
2006  retval = -EEXIST;
2007  goto error_out;
2008  }
2009 #ifdef CONFIG_REISERFS_CHECK
2010  check_research_for_paste(search_path, key);
2011 #endif
2012  }
2013 
2014  /* Perform balancing after all resources are collected by fix_nodes, and
2015  accessing them will not risk triggering schedule. */
2016  if (retval == CARRY_ON) {
2017  do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE);
2018  return 0;
2019  }
2020  retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2021  error_out:
2022  /* this also releases the path */
2023  unfix_nodes(&s_paste_balance);
2024 #ifdef REISERQUOTA_DEBUG
2026  "reiserquota paste_into_item(): freeing %u id=%u type=%c",
2027  pasted_size, inode->i_uid,
2028  key2type(&(key->on_disk_key)));
2029 #endif
2030  dquot_free_space_nodirty(inode, pasted_size);
2031  return retval;
2032 }
2033 
2034 /* Insert new item into the buffer at the path.
2035  * th - active transaction handle
2036  * path - path to the inserted item
2037  * ih - pointer to the item header to insert
2038  * body - pointer to the bytes to insert
2039  */
2041  struct treepath *path, const struct cpu_key *key,
2042  struct item_head *ih, struct inode *inode,
2043  const char *body)
2044 {
2045  struct tree_balance s_ins_balance;
2046  int retval;
2047  int fs_gen = 0;
2048  int quota_bytes = 0;
2049 
2050  BUG_ON(!th->t_trans_id);
2051 
2052  if (inode) { /* Do we count quotas for item? */
2053  fs_gen = get_generation(inode->i_sb);
2054  quota_bytes = ih_item_len(ih);
2055 
2056  /* hack so the quota code doesn't have to guess if the file has
2057  ** a tail, links are always tails, so there's no guessing needed
2058  */
2059  if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih))
2060  quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
2061 #ifdef REISERQUOTA_DEBUG
2063  "reiserquota insert_item(): allocating %u id=%u type=%c",
2064  quota_bytes, inode->i_uid, head2type(ih));
2065 #endif
2066  reiserfs_write_unlock(inode->i_sb);
2067  /* We can't dirty inode here. It would be immediately written but
2068  * appropriate stat item isn't inserted yet... */
2069  retval = dquot_alloc_space_nodirty(inode, quota_bytes);
2070  reiserfs_write_lock(inode->i_sb);
2071  if (retval) {
2072  pathrelse(path);
2073  return retval;
2074  }
2075  }
2076  init_tb_struct(th, &s_ins_balance, th->t_super, path,
2077  IH_SIZE + ih_item_len(ih));
2078 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
2079  s_ins_balance.key = key->on_disk_key;
2080 #endif
2081  /* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */
2082  if (inode && fs_changed(fs_gen, inode->i_sb)) {
2083  goto search_again;
2084  }
2085 
2086  while ((retval =
2087  fix_nodes(M_INSERT, &s_ins_balance, ih,
2088  body)) == REPEAT_SEARCH) {
2089  search_again:
2090  /* file system changed while we were in the fix_nodes */
2091  PROC_INFO_INC(th->t_super, insert_item_restarted);
2092  retval = search_item(th->t_super, key, path);
2093  if (retval == IO_ERROR) {
2094  retval = -EIO;
2095  goto error_out;
2096  }
2097  if (retval == ITEM_FOUND) {
2098  reiserfs_warning(th->t_super, "PAP-5760",
2099  "key %K already exists in the tree",
2100  key);
2101  retval = -EEXIST;
2102  goto error_out;
2103  }
2104  }
2105 
2106  /* make balancing after all resources will be collected at a time */
2107  if (retval == CARRY_ON) {
2108  do_balance(&s_ins_balance, ih, body, M_INSERT);
2109  return 0;
2110  }
2111 
2112  retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2113  error_out:
2114  /* also releases the path */
2115  unfix_nodes(&s_ins_balance);
2116 #ifdef REISERQUOTA_DEBUG
2118  "reiserquota insert_item(): freeing %u id=%u type=%c",
2119  quota_bytes, inode->i_uid, head2type(ih));
2120 #endif
2121  if (inode)
2122  dquot_free_space_nodirty(inode, quota_bytes);
2123  return retval;
2124 }