Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
delayed-inode.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2011 Fujitsu. All rights reserved.
3  * Written by Miao Xie <[email protected]>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public
7  * License v2 as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public
15  * License along with this program; if not, write to the
16  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17  * Boston, MA 021110-1307, USA.
18  */
19 
20 #include <linux/slab.h>
21 #include "delayed-inode.h"
22 #include "disk-io.h"
23 #include "transaction.h"
24 
25 #define BTRFS_DELAYED_WRITEBACK 400
26 #define BTRFS_DELAYED_BACKGROUND 100
27 
28 static struct kmem_cache *delayed_node_cache;
29 
31 {
32  delayed_node_cache = kmem_cache_create("btrfs_delayed_node",
33  sizeof(struct btrfs_delayed_node),
34  0,
36  NULL);
37  if (!delayed_node_cache)
38  return -ENOMEM;
39  return 0;
40 }
41 
43 {
44  if (delayed_node_cache)
45  kmem_cache_destroy(delayed_node_cache);
46 }
47 
48 static inline void btrfs_init_delayed_node(
49  struct btrfs_delayed_node *delayed_node,
50  struct btrfs_root *root, u64 inode_id)
51 {
52  delayed_node->root = root;
53  delayed_node->inode_id = inode_id;
54  atomic_set(&delayed_node->refs, 0);
55  delayed_node->count = 0;
56  delayed_node->in_list = 0;
57  delayed_node->inode_dirty = 0;
58  delayed_node->ins_root = RB_ROOT;
59  delayed_node->del_root = RB_ROOT;
60  mutex_init(&delayed_node->mutex);
61  delayed_node->index_cnt = 0;
62  INIT_LIST_HEAD(&delayed_node->n_list);
63  INIT_LIST_HEAD(&delayed_node->p_list);
64  delayed_node->bytes_reserved = 0;
65  memset(&delayed_node->inode_item, 0, sizeof(delayed_node->inode_item));
66 }
67 
68 static inline int btrfs_is_continuous_delayed_item(
69  struct btrfs_delayed_item *item1,
70  struct btrfs_delayed_item *item2)
71 {
72  if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
73  item1->key.objectid == item2->key.objectid &&
74  item1->key.type == item2->key.type &&
75  item1->key.offset + 1 == item2->key.offset)
76  return 1;
77  return 0;
78 }
79 
80 static inline struct btrfs_delayed_root *btrfs_get_delayed_root(
81  struct btrfs_root *root)
82 {
83  return root->fs_info->delayed_root;
84 }
85 
86 static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
87 {
88  struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
89  struct btrfs_root *root = btrfs_inode->root;
90  u64 ino = btrfs_ino(inode);
91  struct btrfs_delayed_node *node;
92 
93  node = ACCESS_ONCE(btrfs_inode->delayed_node);
94  if (node) {
95  atomic_inc(&node->refs);
96  return node;
97  }
98 
99  spin_lock(&root->inode_lock);
100  node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
101  if (node) {
102  if (btrfs_inode->delayed_node) {
103  atomic_inc(&node->refs); /* can be accessed */
104  BUG_ON(btrfs_inode->delayed_node != node);
105  spin_unlock(&root->inode_lock);
106  return node;
107  }
108  btrfs_inode->delayed_node = node;
109  atomic_inc(&node->refs); /* can be accessed */
110  atomic_inc(&node->refs); /* cached in the inode */
111  spin_unlock(&root->inode_lock);
112  return node;
113  }
114  spin_unlock(&root->inode_lock);
115 
116  return NULL;
117 }
118 
119 /* Will return either the node or PTR_ERR(-ENOMEM) */
120 static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
121  struct inode *inode)
122 {
123  struct btrfs_delayed_node *node;
124  struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
125  struct btrfs_root *root = btrfs_inode->root;
126  u64 ino = btrfs_ino(inode);
127  int ret;
128 
129 again:
130  node = btrfs_get_delayed_node(inode);
131  if (node)
132  return node;
133 
134  node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS);
135  if (!node)
136  return ERR_PTR(-ENOMEM);
137  btrfs_init_delayed_node(node, root, ino);
138 
139  atomic_inc(&node->refs); /* cached in the btrfs inode */
140  atomic_inc(&node->refs); /* can be accessed */
141 
143  if (ret) {
144  kmem_cache_free(delayed_node_cache, node);
145  return ERR_PTR(ret);
146  }
147 
148  spin_lock(&root->inode_lock);
149  ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
150  if (ret == -EEXIST) {
151  kmem_cache_free(delayed_node_cache, node);
152  spin_unlock(&root->inode_lock);
153  radix_tree_preload_end();
154  goto again;
155  }
156  btrfs_inode->delayed_node = node;
157  spin_unlock(&root->inode_lock);
158  radix_tree_preload_end();
159 
160  return node;
161 }
162 
163 /*
164  * Call it when holding delayed_node->mutex
165  *
166  * If mod = 1, add this node into the prepared list.
167  */
168 static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
169  struct btrfs_delayed_node *node,
170  int mod)
171 {
172  spin_lock(&root->lock);
173  if (node->in_list) {
174  if (!list_empty(&node->p_list))
175  list_move_tail(&node->p_list, &root->prepare_list);
176  else if (mod)
177  list_add_tail(&node->p_list, &root->prepare_list);
178  } else {
179  list_add_tail(&node->n_list, &root->node_list);
180  list_add_tail(&node->p_list, &root->prepare_list);
181  atomic_inc(&node->refs); /* inserted into list */
182  root->nodes++;
183  node->in_list = 1;
184  }
185  spin_unlock(&root->lock);
186 }
187 
188 /* Call it when holding delayed_node->mutex */
189 static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
190  struct btrfs_delayed_node *node)
191 {
192  spin_lock(&root->lock);
193  if (node->in_list) {
194  root->nodes--;
195  atomic_dec(&node->refs); /* not in the list */
196  list_del_init(&node->n_list);
197  if (!list_empty(&node->p_list))
198  list_del_init(&node->p_list);
199  node->in_list = 0;
200  }
201  spin_unlock(&root->lock);
202 }
203 
205  struct btrfs_delayed_root *delayed_root)
206 {
207  struct list_head *p;
208  struct btrfs_delayed_node *node = NULL;
209 
210  spin_lock(&delayed_root->lock);
211  if (list_empty(&delayed_root->node_list))
212  goto out;
213 
214  p = delayed_root->node_list.next;
215  node = list_entry(p, struct btrfs_delayed_node, n_list);
216  atomic_inc(&node->refs);
217 out:
218  spin_unlock(&delayed_root->lock);
219 
220  return node;
221 }
222 
224  struct btrfs_delayed_node *node)
225 {
226  struct btrfs_delayed_root *delayed_root;
227  struct list_head *p;
228  struct btrfs_delayed_node *next = NULL;
229 
230  delayed_root = node->root->fs_info->delayed_root;
231  spin_lock(&delayed_root->lock);
232  if (!node->in_list) { /* not in the list */
233  if (list_empty(&delayed_root->node_list))
234  goto out;
235  p = delayed_root->node_list.next;
236  } else if (list_is_last(&node->n_list, &delayed_root->node_list))
237  goto out;
238  else
239  p = node->n_list.next;
240 
241  next = list_entry(p, struct btrfs_delayed_node, n_list);
242  atomic_inc(&next->refs);
243 out:
244  spin_unlock(&delayed_root->lock);
245 
246  return next;
247 }
248 
249 static void __btrfs_release_delayed_node(
250  struct btrfs_delayed_node *delayed_node,
251  int mod)
252 {
253  struct btrfs_delayed_root *delayed_root;
254 
255  if (!delayed_node)
256  return;
257 
258  delayed_root = delayed_node->root->fs_info->delayed_root;
259 
260  mutex_lock(&delayed_node->mutex);
261  if (delayed_node->count)
262  btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
263  else
264  btrfs_dequeue_delayed_node(delayed_root, delayed_node);
265  mutex_unlock(&delayed_node->mutex);
266 
267  if (atomic_dec_and_test(&delayed_node->refs)) {
268  struct btrfs_root *root = delayed_node->root;
269  spin_lock(&root->inode_lock);
270  if (atomic_read(&delayed_node->refs) == 0) {
272  delayed_node->inode_id);
273  kmem_cache_free(delayed_node_cache, delayed_node);
274  }
275  spin_unlock(&root->inode_lock);
276  }
277 }
278 
279 static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
280 {
281  __btrfs_release_delayed_node(node, 0);
282 }
283 
285  struct btrfs_delayed_root *delayed_root)
286 {
287  struct list_head *p;
288  struct btrfs_delayed_node *node = NULL;
289 
290  spin_lock(&delayed_root->lock);
291  if (list_empty(&delayed_root->prepare_list))
292  goto out;
293 
294  p = delayed_root->prepare_list.next;
295  list_del_init(p);
296  node = list_entry(p, struct btrfs_delayed_node, p_list);
297  atomic_inc(&node->refs);
298 out:
299  spin_unlock(&delayed_root->lock);
300 
301  return node;
302 }
303 
304 static inline void btrfs_release_prepared_delayed_node(
305  struct btrfs_delayed_node *node)
306 {
307  __btrfs_release_delayed_node(node, 1);
308 }
309 
311 {
312  struct btrfs_delayed_item *item;
313  item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
314  if (item) {
315  item->data_len = data_len;
316  item->ins_or_del = 0;
317  item->bytes_reserved = 0;
318  item->delayed_node = NULL;
319  atomic_set(&item->refs, 1);
320  }
321  return item;
322 }
323 
324 /*
325  * __btrfs_lookup_delayed_item - look up the delayed item by key
326  * @delayed_node: pointer to the delayed node
327  * @key: the key to look up
328  * @prev: used to store the prev item if the right item isn't found
329  * @next: used to store the next item if the right item isn't found
330  *
331  * Note: if we don't find the right item, we will return the prev item and
332  * the next item.
333  */
334 static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
335  struct rb_root *root,
336  struct btrfs_key *key,
337  struct btrfs_delayed_item **prev,
338  struct btrfs_delayed_item **next)
339 {
340  struct rb_node *node, *prev_node = NULL;
341  struct btrfs_delayed_item *delayed_item = NULL;
342  int ret = 0;
343 
344  node = root->rb_node;
345 
346  while (node) {
347  delayed_item = rb_entry(node, struct btrfs_delayed_item,
348  rb_node);
349  prev_node = node;
350  ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
351  if (ret < 0)
352  node = node->rb_right;
353  else if (ret > 0)
354  node = node->rb_left;
355  else
356  return delayed_item;
357  }
358 
359  if (prev) {
360  if (!prev_node)
361  *prev = NULL;
362  else if (ret < 0)
363  *prev = delayed_item;
364  else if ((node = rb_prev(prev_node)) != NULL) {
365  *prev = rb_entry(node, struct btrfs_delayed_item,
366  rb_node);
367  } else
368  *prev = NULL;
369  }
370 
371  if (next) {
372  if (!prev_node)
373  *next = NULL;
374  else if (ret > 0)
375  *next = delayed_item;
376  else if ((node = rb_next(prev_node)) != NULL) {
377  *next = rb_entry(node, struct btrfs_delayed_item,
378  rb_node);
379  } else
380  *next = NULL;
381  }
382  return NULL;
383 }
384 
386  struct btrfs_delayed_node *delayed_node,
387  struct btrfs_key *key)
388 {
389  struct btrfs_delayed_item *item;
390 
391  item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
392  NULL, NULL);
393  return item;
394 }
395 
397  struct btrfs_delayed_node *delayed_node,
398  struct btrfs_key *key)
399 {
400  struct btrfs_delayed_item *item;
401 
402  item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
403  NULL, NULL);
404  return item;
405 }
406 
408  struct btrfs_delayed_node *delayed_node,
409  struct btrfs_key *key)
410 {
411  struct btrfs_delayed_item *item, *next;
412 
413  item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
414  NULL, &next);
415  if (!item)
416  item = next;
417 
418  return item;
419 }
420 
422  struct btrfs_delayed_node *delayed_node,
423  struct btrfs_key *key)
424 {
425  struct btrfs_delayed_item *item, *next;
426 
427  item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
428  NULL, &next);
429  if (!item)
430  item = next;
431 
432  return item;
433 }
434 
435 static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
436  struct btrfs_delayed_item *ins,
437  int action)
438 {
439  struct rb_node **p, *node;
440  struct rb_node *parent_node = NULL;
441  struct rb_root *root;
442  struct btrfs_delayed_item *item;
443  int cmp;
444 
445  if (action == BTRFS_DELAYED_INSERTION_ITEM)
446  root = &delayed_node->ins_root;
447  else if (action == BTRFS_DELAYED_DELETION_ITEM)
448  root = &delayed_node->del_root;
449  else
450  BUG();
451  p = &root->rb_node;
452  node = &ins->rb_node;
453 
454  while (*p) {
455  parent_node = *p;
456  item = rb_entry(parent_node, struct btrfs_delayed_item,
457  rb_node);
458 
459  cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
460  if (cmp < 0)
461  p = &(*p)->rb_right;
462  else if (cmp > 0)
463  p = &(*p)->rb_left;
464  else
465  return -EEXIST;
466  }
467 
468  rb_link_node(node, parent_node, p);
469  rb_insert_color(node, root);
470  ins->delayed_node = delayed_node;
471  ins->ins_or_del = action;
472 
473  if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
474  action == BTRFS_DELAYED_INSERTION_ITEM &&
475  ins->key.offset >= delayed_node->index_cnt)
476  delayed_node->index_cnt = ins->key.offset + 1;
477 
478  delayed_node->count++;
479  atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
480  return 0;
481 }
482 
483 static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
484  struct btrfs_delayed_item *item)
485 {
486  return __btrfs_add_delayed_item(node, item,
488 }
489 
490 static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
491  struct btrfs_delayed_item *item)
492 {
493  return __btrfs_add_delayed_item(node, item,
495 }
496 
497 static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
498 {
499  struct rb_root *root;
500  struct btrfs_delayed_root *delayed_root;
501 
502  delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
503 
504  BUG_ON(!delayed_root);
505  BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
506  delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
507 
508  if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
509  root = &delayed_item->delayed_node->ins_root;
510  else
511  root = &delayed_item->delayed_node->del_root;
512 
513  rb_erase(&delayed_item->rb_node, root);
514  delayed_item->delayed_node->count--;
515  if (atomic_dec_return(&delayed_root->items) <
517  waitqueue_active(&delayed_root->wait))
518  wake_up(&delayed_root->wait);
519 }
520 
521 static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
522 {
523  if (item) {
524  __btrfs_remove_delayed_item(item);
525  if (atomic_dec_and_test(&item->refs))
526  kfree(item);
527  }
528 }
529 
531  struct btrfs_delayed_node *delayed_node)
532 {
533  struct rb_node *p;
534  struct btrfs_delayed_item *item = NULL;
535 
536  p = rb_first(&delayed_node->ins_root);
537  if (p)
538  item = rb_entry(p, struct btrfs_delayed_item, rb_node);
539 
540  return item;
541 }
542 
544  struct btrfs_delayed_node *delayed_node)
545 {
546  struct rb_node *p;
547  struct btrfs_delayed_item *item = NULL;
548 
549  p = rb_first(&delayed_node->del_root);
550  if (p)
551  item = rb_entry(p, struct btrfs_delayed_item, rb_node);
552 
553  return item;
554 }
555 
557  struct btrfs_delayed_item *item)
558 {
559  struct rb_node *p;
560  struct btrfs_delayed_item *next = NULL;
561 
562  p = rb_next(&item->rb_node);
563  if (p)
564  next = rb_entry(p, struct btrfs_delayed_item, rb_node);
565 
566  return next;
567 }
568 
569 static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root,
570  u64 root_id)
571 {
572  struct btrfs_key root_key;
573 
574  if (root->objectid == root_id)
575  return root;
576 
577  root_key.objectid = root_id;
579  root_key.offset = (u64)-1;
580  return btrfs_read_fs_root_no_name(root->fs_info, &root_key);
581 }
582 
583 static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
584  struct btrfs_root *root,
585  struct btrfs_delayed_item *item)
586 {
587  struct btrfs_block_rsv *src_rsv;
588  struct btrfs_block_rsv *dst_rsv;
589  u64 num_bytes;
590  int ret;
591 
592  if (!trans->bytes_reserved)
593  return 0;
594 
595  src_rsv = trans->block_rsv;
596  dst_rsv = &root->fs_info->delayed_block_rsv;
597 
598  num_bytes = btrfs_calc_trans_metadata_size(root, 1);
599  ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
600  if (!ret) {
601  trace_btrfs_space_reservation(root->fs_info, "delayed_item",
602  item->key.objectid,
603  num_bytes, 1);
604  item->bytes_reserved = num_bytes;
605  }
606 
607  return ret;
608 }
609 
610 static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
611  struct btrfs_delayed_item *item)
612 {
613  struct btrfs_block_rsv *rsv;
614 
615  if (!item->bytes_reserved)
616  return;
617 
618  rsv = &root->fs_info->delayed_block_rsv;
619  trace_btrfs_space_reservation(root->fs_info, "delayed_item",
620  item->key.objectid, item->bytes_reserved,
621  0);
622  btrfs_block_rsv_release(root, rsv,
623  item->bytes_reserved);
624 }
625 
626 static int btrfs_delayed_inode_reserve_metadata(
627  struct btrfs_trans_handle *trans,
628  struct btrfs_root *root,
629  struct inode *inode,
630  struct btrfs_delayed_node *node)
631 {
632  struct btrfs_block_rsv *src_rsv;
633  struct btrfs_block_rsv *dst_rsv;
634  u64 num_bytes;
635  int ret;
636  bool release = false;
637 
638  src_rsv = trans->block_rsv;
639  dst_rsv = &root->fs_info->delayed_block_rsv;
640 
641  num_bytes = btrfs_calc_trans_metadata_size(root, 1);
642 
643  /*
644  * btrfs_dirty_inode will update the inode under btrfs_join_transaction
645  * which doesn't reserve space for speed. This is a problem since we
646  * still need to reserve space for this update, so try to reserve the
647  * space.
648  *
649  * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
650  * we're accounted for.
651  */
652  if (!src_rsv || (!trans->bytes_reserved &&
653  src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
654  ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
655  /*
656  * Since we're under a transaction reserve_metadata_bytes could
657  * try to commit the transaction which will make it return
658  * EAGAIN to make us stop the transaction we have, so return
659  * ENOSPC instead so that btrfs_dirty_inode knows what to do.
660  */
661  if (ret == -EAGAIN)
662  ret = -ENOSPC;
663  if (!ret) {
664  node->bytes_reserved = num_bytes;
665  trace_btrfs_space_reservation(root->fs_info,
666  "delayed_inode",
667  btrfs_ino(inode),
668  num_bytes, 1);
669  }
670  return ret;
671  } else if (src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
672  spin_lock(&BTRFS_I(inode)->lock);
674  &BTRFS_I(inode)->runtime_flags)) {
675  spin_unlock(&BTRFS_I(inode)->lock);
676  release = true;
677  goto migrate;
678  }
679  spin_unlock(&BTRFS_I(inode)->lock);
680 
681  /* Ok we didn't have space pre-reserved. This shouldn't happen
682  * too often but it can happen if we do delalloc to an existing
683  * inode which gets dirtied because of the time update, and then
684  * isn't touched again until after the transaction commits and
685  * then we try to write out the data. First try to be nice and
686  * reserve something strictly for us. If not be a pain and try
687  * to steal from the delalloc block rsv.
688  */
689  ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
690  if (!ret)
691  goto out;
692 
693  ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
694  if (!ret)
695  goto out;
696 
697  /*
698  * Ok this is a problem, let's just steal from the global rsv
699  * since this really shouldn't happen that often.
700  */
701  WARN_ON(1);
702  ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv,
703  dst_rsv, num_bytes);
704  goto out;
705  }
706 
707 migrate:
708  ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
709 
710 out:
711  /*
712  * Migrate only takes a reservation, it doesn't touch the size of the
713  * block_rsv. This is to simplify people who don't normally have things
714  * migrated from their block rsv. If they go to release their
715  * reservation, that will decrease the size as well, so if migrate
716  * reduced size we'd end up with a negative size. But for the
717  * delalloc_meta_reserved stuff we will only know to drop 1 reservation,
718  * but we could in fact do this reserve/migrate dance several times
719  * between the time we did the original reservation and we'd clean it
720  * up. So to take care of this, release the space for the meta
721  * reservation here. I think it may be time for a documentation page on
722  * how block rsvs. work.
723  */
724  if (!ret) {
725  trace_btrfs_space_reservation(root->fs_info, "delayed_inode",
726  btrfs_ino(inode), num_bytes, 1);
727  node->bytes_reserved = num_bytes;
728  }
729 
730  if (release) {
731  trace_btrfs_space_reservation(root->fs_info, "delalloc",
732  btrfs_ino(inode), num_bytes, 0);
733  btrfs_block_rsv_release(root, src_rsv, num_bytes);
734  }
735 
736  return ret;
737 }
738 
739 static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root,
740  struct btrfs_delayed_node *node)
741 {
742  struct btrfs_block_rsv *rsv;
743 
744  if (!node->bytes_reserved)
745  return;
746 
747  rsv = &root->fs_info->delayed_block_rsv;
748  trace_btrfs_space_reservation(root->fs_info, "delayed_inode",
749  node->inode_id, node->bytes_reserved, 0);
750  btrfs_block_rsv_release(root, rsv,
751  node->bytes_reserved);
752  node->bytes_reserved = 0;
753 }
754 
755 /*
756  * This helper will insert some continuous items into the same leaf according
757  * to the free space of the leaf.
758  */
759 static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
760  struct btrfs_root *root,
761  struct btrfs_path *path,
762  struct btrfs_delayed_item *item)
763 {
764  struct btrfs_delayed_item *curr, *next;
765  int free_space;
766  int total_data_size = 0, total_size = 0;
767  struct extent_buffer *leaf;
768  char *data_ptr;
769  struct btrfs_key *keys;
770  u32 *data_size;
771  struct list_head head;
772  int slot;
773  int nitems;
774  int i;
775  int ret = 0;
776 
777  BUG_ON(!path->nodes[0]);
778 
779  leaf = path->nodes[0];
780  free_space = btrfs_leaf_free_space(root, leaf);
781  INIT_LIST_HEAD(&head);
782 
783  next = item;
784  nitems = 0;
785 
786  /*
787  * count the number of the continuous items that we can insert in batch
788  */
789  while (total_size + next->data_len + sizeof(struct btrfs_item) <=
790  free_space) {
791  total_data_size += next->data_len;
792  total_size += next->data_len + sizeof(struct btrfs_item);
793  list_add_tail(&next->tree_list, &head);
794  nitems++;
795 
796  curr = next;
797  next = __btrfs_next_delayed_item(curr);
798  if (!next)
799  break;
800 
801  if (!btrfs_is_continuous_delayed_item(curr, next))
802  break;
803  }
804 
805  if (!nitems) {
806  ret = 0;
807  goto out;
808  }
809 
810  /*
811  * we need allocate some memory space, but it might cause the task
812  * to sleep, so we set all locked nodes in the path to blocking locks
813  * first.
814  */
816 
817  keys = kmalloc(sizeof(struct btrfs_key) * nitems, GFP_NOFS);
818  if (!keys) {
819  ret = -ENOMEM;
820  goto out;
821  }
822 
823  data_size = kmalloc(sizeof(u32) * nitems, GFP_NOFS);
824  if (!data_size) {
825  ret = -ENOMEM;
826  goto error;
827  }
828 
829  /* get keys of all the delayed items */
830  i = 0;
831  list_for_each_entry(next, &head, tree_list) {
832  keys[i] = next->key;
833  data_size[i] = next->data_len;
834  i++;
835  }
836 
837  /* reset all the locked nodes in the patch to spinning locks. */
839 
840  /* insert the keys of the items */
841  setup_items_for_insert(trans, root, path, keys, data_size,
842  total_data_size, total_size, nitems);
843 
844  /* insert the dir index items */
845  slot = path->slots[0];
846  list_for_each_entry_safe(curr, next, &head, tree_list) {
847  data_ptr = btrfs_item_ptr(leaf, slot, char);
848  write_extent_buffer(leaf, &curr->data,
849  (unsigned long)data_ptr,
850  curr->data_len);
851  slot++;
852 
853  btrfs_delayed_item_release_metadata(root, curr);
854 
855  list_del(&curr->tree_list);
856  btrfs_release_delayed_item(curr);
857  }
858 
859 error:
860  kfree(data_size);
861  kfree(keys);
862 out:
863  return ret;
864 }
865 
866 /*
867  * This helper can just do simple insertion that needn't extend item for new
868  * data, such as directory name index insertion, inode insertion.
869  */
870 static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
871  struct btrfs_root *root,
872  struct btrfs_path *path,
873  struct btrfs_delayed_item *delayed_item)
874 {
875  struct extent_buffer *leaf;
876  struct btrfs_item *item;
877  char *ptr;
878  int ret;
879 
880  ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
881  delayed_item->data_len);
882  if (ret < 0 && ret != -EEXIST)
883  return ret;
884 
885  leaf = path->nodes[0];
886 
887  item = btrfs_item_nr(leaf, path->slots[0]);
888  ptr = btrfs_item_ptr(leaf, path->slots[0], char);
889 
890  write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
891  delayed_item->data_len);
893 
894  btrfs_delayed_item_release_metadata(root, delayed_item);
895  return 0;
896 }
897 
898 /*
899  * we insert an item first, then if there are some continuous items, we try
900  * to insert those items into the same leaf.
901  */
902 static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
903  struct btrfs_path *path,
904  struct btrfs_root *root,
905  struct btrfs_delayed_node *node)
906 {
907  struct btrfs_delayed_item *curr, *prev;
908  int ret = 0;
909 
910 do_again:
911  mutex_lock(&node->mutex);
913  if (!curr)
914  goto insert_end;
915 
916  ret = btrfs_insert_delayed_item(trans, root, path, curr);
917  if (ret < 0) {
918  btrfs_release_path(path);
919  goto insert_end;
920  }
921 
922  prev = curr;
923  curr = __btrfs_next_delayed_item(prev);
924  if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
925  /* insert the continuous items into the same leaf */
926  path->slots[0]++;
927  btrfs_batch_insert_items(trans, root, path, curr);
928  }
929  btrfs_release_delayed_item(prev);
930  btrfs_mark_buffer_dirty(path->nodes[0]);
931 
932  btrfs_release_path(path);
933  mutex_unlock(&node->mutex);
934  goto do_again;
935 
936 insert_end:
937  mutex_unlock(&node->mutex);
938  return ret;
939 }
940 
941 static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
942  struct btrfs_root *root,
943  struct btrfs_path *path,
944  struct btrfs_delayed_item *item)
945 {
946  struct btrfs_delayed_item *curr, *next;
947  struct extent_buffer *leaf;
948  struct btrfs_key key;
949  struct list_head head;
950  int nitems, i, last_item;
951  int ret = 0;
952 
953  BUG_ON(!path->nodes[0]);
954 
955  leaf = path->nodes[0];
956 
957  i = path->slots[0];
958  last_item = btrfs_header_nritems(leaf) - 1;
959  if (i > last_item)
960  return -ENOENT; /* FIXME: Is errno suitable? */
961 
962  next = item;
963  INIT_LIST_HEAD(&head);
964  btrfs_item_key_to_cpu(leaf, &key, i);
965  nitems = 0;
966  /*
967  * count the number of the dir index items that we can delete in batch
968  */
969  while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
970  list_add_tail(&next->tree_list, &head);
971  nitems++;
972 
973  curr = next;
974  next = __btrfs_next_delayed_item(curr);
975  if (!next)
976  break;
977 
978  if (!btrfs_is_continuous_delayed_item(curr, next))
979  break;
980 
981  i++;
982  if (i > last_item)
983  break;
984  btrfs_item_key_to_cpu(leaf, &key, i);
985  }
986 
987  if (!nitems)
988  return 0;
989 
990  ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
991  if (ret)
992  goto out;
993 
994  list_for_each_entry_safe(curr, next, &head, tree_list) {
995  btrfs_delayed_item_release_metadata(root, curr);
996  list_del(&curr->tree_list);
997  btrfs_release_delayed_item(curr);
998  }
999 
1000 out:
1001  return ret;
1002 }
1003 
1004 static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
1005  struct btrfs_path *path,
1006  struct btrfs_root *root,
1007  struct btrfs_delayed_node *node)
1008 {
1009  struct btrfs_delayed_item *curr, *prev;
1010  int ret = 0;
1011 
1012 do_again:
1013  mutex_lock(&node->mutex);
1015  if (!curr)
1016  goto delete_fail;
1017 
1018  ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
1019  if (ret < 0)
1020  goto delete_fail;
1021  else if (ret > 0) {
1022  /*
1023  * can't find the item which the node points to, so this node
1024  * is invalid, just drop it.
1025  */
1026  prev = curr;
1027  curr = __btrfs_next_delayed_item(prev);
1028  btrfs_release_delayed_item(prev);
1029  ret = 0;
1030  btrfs_release_path(path);
1031  if (curr) {
1032  mutex_unlock(&node->mutex);
1033  goto do_again;
1034  } else
1035  goto delete_fail;
1036  }
1037 
1038  btrfs_batch_delete_items(trans, root, path, curr);
1039  btrfs_release_path(path);
1040  mutex_unlock(&node->mutex);
1041  goto do_again;
1042 
1043 delete_fail:
1044  btrfs_release_path(path);
1045  mutex_unlock(&node->mutex);
1046  return ret;
1047 }
1048 
1049 static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
1050 {
1051  struct btrfs_delayed_root *delayed_root;
1052 
1053  if (delayed_node && delayed_node->inode_dirty) {
1054  BUG_ON(!delayed_node->root);
1055  delayed_node->inode_dirty = 0;
1056  delayed_node->count--;
1057 
1058  delayed_root = delayed_node->root->fs_info->delayed_root;
1059  if (atomic_dec_return(&delayed_root->items) <
1061  waitqueue_active(&delayed_root->wait))
1062  wake_up(&delayed_root->wait);
1063  }
1064 }
1065 
1066 static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1067  struct btrfs_root *root,
1068  struct btrfs_path *path,
1069  struct btrfs_delayed_node *node)
1070 {
1071  struct btrfs_key key;
1072  struct btrfs_inode_item *inode_item;
1073  struct extent_buffer *leaf;
1074  int ret;
1075 
1076  mutex_lock(&node->mutex);
1077  if (!node->inode_dirty) {
1078  mutex_unlock(&node->mutex);
1079  return 0;
1080  }
1081 
1082  key.objectid = node->inode_id;
1083  btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1084  key.offset = 0;
1085  ret = btrfs_lookup_inode(trans, root, path, &key, 1);
1086  if (ret > 0) {
1087  btrfs_release_path(path);
1088  mutex_unlock(&node->mutex);
1089  return -ENOENT;
1090  } else if (ret < 0) {
1091  mutex_unlock(&node->mutex);
1092  return ret;
1093  }
1094 
1095  btrfs_unlock_up_safe(path, 1);
1096  leaf = path->nodes[0];
1097  inode_item = btrfs_item_ptr(leaf, path->slots[0],
1098  struct btrfs_inode_item);
1099  write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
1100  sizeof(struct btrfs_inode_item));
1102  btrfs_release_path(path);
1103 
1104  btrfs_delayed_inode_release_metadata(root, node);
1105  btrfs_release_delayed_inode(node);
1106  mutex_unlock(&node->mutex);
1107 
1108  return 0;
1109 }
1110 
1111 /*
1112  * Called when committing the transaction.
1113  * Returns 0 on success.
1114  * Returns < 0 on error and returns with an aborted transaction with any
1115  * outstanding delayed items cleaned up.
1116  */
1117 static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1118  struct btrfs_root *root, int nr)
1119 {
1120  struct btrfs_root *curr_root = root;
1121  struct btrfs_delayed_root *delayed_root;
1122  struct btrfs_delayed_node *curr_node, *prev_node;
1123  struct btrfs_path *path;
1124  struct btrfs_block_rsv *block_rsv;
1125  int ret = 0;
1126  bool count = (nr > 0);
1127 
1128  if (trans->aborted)
1129  return -EIO;
1130 
1131  path = btrfs_alloc_path();
1132  if (!path)
1133  return -ENOMEM;
1134  path->leave_spinning = 1;
1135 
1136  block_rsv = trans->block_rsv;
1137  trans->block_rsv = &root->fs_info->delayed_block_rsv;
1138 
1139  delayed_root = btrfs_get_delayed_root(root);
1140 
1141  curr_node = btrfs_first_delayed_node(delayed_root);
1142  while (curr_node && (!count || (count && nr--))) {
1143  curr_root = curr_node->root;
1144  ret = btrfs_insert_delayed_items(trans, path, curr_root,
1145  curr_node);
1146  if (!ret)
1147  ret = btrfs_delete_delayed_items(trans, path,
1148  curr_root, curr_node);
1149  if (!ret)
1150  ret = btrfs_update_delayed_inode(trans, curr_root,
1151  path, curr_node);
1152  if (ret) {
1153  btrfs_release_delayed_node(curr_node);
1154  curr_node = NULL;
1155  btrfs_abort_transaction(trans, root, ret);
1156  break;
1157  }
1158 
1159  prev_node = curr_node;
1160  curr_node = btrfs_next_delayed_node(curr_node);
1161  btrfs_release_delayed_node(prev_node);
1162  }
1163 
1164  if (curr_node)
1165  btrfs_release_delayed_node(curr_node);
1166  btrfs_free_path(path);
1167  trans->block_rsv = block_rsv;
1168 
1169  return ret;
1170 }
1171 
1173  struct btrfs_root *root)
1174 {
1175  return __btrfs_run_delayed_items(trans, root, -1);
1176 }
1177 
1179  struct btrfs_root *root, int nr)
1180 {
1181  return __btrfs_run_delayed_items(trans, root, nr);
1182 }
1183 
1184 static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1185  struct btrfs_delayed_node *node)
1186 {
1187  struct btrfs_path *path;
1188  struct btrfs_block_rsv *block_rsv;
1189  int ret;
1190 
1191  path = btrfs_alloc_path();
1192  if (!path)
1193  return -ENOMEM;
1194  path->leave_spinning = 1;
1195 
1196  block_rsv = trans->block_rsv;
1197  trans->block_rsv = &node->root->fs_info->delayed_block_rsv;
1198 
1199  ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1200  if (!ret)
1201  ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1202  if (!ret)
1203  ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1204  btrfs_free_path(path);
1205 
1206  trans->block_rsv = block_rsv;
1207  return ret;
1208 }
1209 
1211  struct inode *inode)
1212 {
1213  struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1214  int ret;
1215 
1216  if (!delayed_node)
1217  return 0;
1218 
1219  mutex_lock(&delayed_node->mutex);
1220  if (!delayed_node->count) {
1221  mutex_unlock(&delayed_node->mutex);
1222  btrfs_release_delayed_node(delayed_node);
1223  return 0;
1224  }
1225  mutex_unlock(&delayed_node->mutex);
1226 
1227  ret = __btrfs_commit_inode_delayed_items(trans, delayed_node);
1228  btrfs_release_delayed_node(delayed_node);
1229  return ret;
1230 }
1231 
1232 void btrfs_remove_delayed_node(struct inode *inode)
1233 {
1234  struct btrfs_delayed_node *delayed_node;
1235 
1236  delayed_node = ACCESS_ONCE(BTRFS_I(inode)->delayed_node);
1237  if (!delayed_node)
1238  return;
1239 
1240  BTRFS_I(inode)->delayed_node = NULL;
1241  btrfs_release_delayed_node(delayed_node);
1242 }
1243 
1245  struct btrfs_root *root;
1248 };
1249 
1250 static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1251 {
1252  struct btrfs_async_delayed_node *async_node;
1253  struct btrfs_trans_handle *trans;
1254  struct btrfs_path *path;
1255  struct btrfs_delayed_node *delayed_node = NULL;
1256  struct btrfs_root *root;
1257  struct btrfs_block_rsv *block_rsv;
1258  unsigned long nr = 0;
1259  int need_requeue = 0;
1260  int ret;
1261 
1262  async_node = container_of(work, struct btrfs_async_delayed_node, work);
1263 
1264  path = btrfs_alloc_path();
1265  if (!path)
1266  goto out;
1267  path->leave_spinning = 1;
1268 
1269  delayed_node = async_node->delayed_node;
1270  root = delayed_node->root;
1271 
1272  trans = btrfs_join_transaction(root);
1273  if (IS_ERR(trans))
1274  goto free_path;
1275 
1276  block_rsv = trans->block_rsv;
1277  trans->block_rsv = &root->fs_info->delayed_block_rsv;
1278 
1279  ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
1280  if (!ret)
1281  ret = btrfs_delete_delayed_items(trans, path, root,
1282  delayed_node);
1283 
1284  if (!ret)
1285  btrfs_update_delayed_inode(trans, root, path, delayed_node);
1286 
1287  /*
1288  * Maybe new delayed items have been inserted, so we need requeue
1289  * the work. Besides that, we must dequeue the empty delayed nodes
1290  * to avoid the race between delayed items balance and the worker.
1291  * The race like this:
1292  * Task1 Worker thread
1293  * count == 0, needn't requeue
1294  * also needn't insert the
1295  * delayed node into prepare
1296  * list again.
1297  * add lots of delayed items
1298  * queue the delayed node
1299  * already in the list,
1300  * and not in the prepare
1301  * list, it means the delayed
1302  * node is being dealt with
1303  * by the worker.
1304  * do delayed items balance
1305  * the delayed node is being
1306  * dealt with by the worker
1307  * now, just wait.
1308  * the worker goto idle.
1309  * Task1 will sleep until the transaction is commited.
1310  */
1311  mutex_lock(&delayed_node->mutex);
1312  if (delayed_node->count)
1313  need_requeue = 1;
1314  else
1315  btrfs_dequeue_delayed_node(root->fs_info->delayed_root,
1316  delayed_node);
1317  mutex_unlock(&delayed_node->mutex);
1318 
1319  nr = trans->blocks_used;
1320 
1321  trans->block_rsv = block_rsv;
1322  btrfs_end_transaction_dmeta(trans, root);
1323  __btrfs_btree_balance_dirty(root, nr);
1324 free_path:
1325  btrfs_free_path(path);
1326 out:
1327  if (need_requeue)
1328  btrfs_requeue_work(&async_node->work);
1329  else {
1330  btrfs_release_prepared_delayed_node(delayed_node);
1331  kfree(async_node);
1332  }
1333 }
1334 
1335 static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1336  struct btrfs_root *root, int all)
1337 {
1338  struct btrfs_async_delayed_node *async_node;
1339  struct btrfs_delayed_node *curr;
1340  int count = 0;
1341 
1342 again:
1343  curr = btrfs_first_prepared_delayed_node(delayed_root);
1344  if (!curr)
1345  return 0;
1346 
1347  async_node = kmalloc(sizeof(*async_node), GFP_NOFS);
1348  if (!async_node) {
1349  btrfs_release_prepared_delayed_node(curr);
1350  return -ENOMEM;
1351  }
1352 
1353  async_node->root = root;
1354  async_node->delayed_node = curr;
1355 
1356  async_node->work.func = btrfs_async_run_delayed_node_done;
1357  async_node->work.flags = 0;
1358 
1359  btrfs_queue_worker(&root->fs_info->delayed_workers, &async_node->work);
1360  count++;
1361 
1362  if (all || count < 4)
1363  goto again;
1364 
1365  return 0;
1366 }
1367 
1369 {
1370  struct btrfs_delayed_root *delayed_root;
1371  delayed_root = btrfs_get_delayed_root(root);
1372  WARN_ON(btrfs_first_delayed_node(delayed_root));
1373 }
1374 
1376 {
1377  struct btrfs_delayed_root *delayed_root;
1378 
1379  delayed_root = btrfs_get_delayed_root(root);
1380 
1381  if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1382  return;
1383 
1384  if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1385  int ret;
1386  ret = btrfs_wq_run_delayed_node(delayed_root, root, 1);
1387  if (ret)
1388  return;
1389 
1391  delayed_root->wait,
1392  (atomic_read(&delayed_root->items) <
1394  HZ);
1395  return;
1396  }
1397 
1398  btrfs_wq_run_delayed_node(delayed_root, root, 0);
1399 }
1400 
1401 /* Will return 0 or -ENOMEM */
1403  struct btrfs_root *root, const char *name,
1404  int name_len, struct inode *dir,
1405  struct btrfs_disk_key *disk_key, u8 type,
1406  u64 index)
1407 {
1408  struct btrfs_delayed_node *delayed_node;
1409  struct btrfs_delayed_item *delayed_item;
1410  struct btrfs_dir_item *dir_item;
1411  int ret;
1412 
1413  delayed_node = btrfs_get_or_create_delayed_node(dir);
1414  if (IS_ERR(delayed_node))
1415  return PTR_ERR(delayed_node);
1416 
1417  delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
1418  if (!delayed_item) {
1419  ret = -ENOMEM;
1420  goto release_node;
1421  }
1422 
1423  delayed_item->key.objectid = btrfs_ino(dir);
1424  btrfs_set_key_type(&delayed_item->key, BTRFS_DIR_INDEX_KEY);
1425  delayed_item->key.offset = index;
1426 
1427  dir_item = (struct btrfs_dir_item *)delayed_item->data;
1428  dir_item->location = *disk_key;
1429  dir_item->transid = cpu_to_le64(trans->transid);
1430  dir_item->data_len = 0;
1431  dir_item->name_len = cpu_to_le16(name_len);
1432  dir_item->type = type;
1433  memcpy((char *)(dir_item + 1), name, name_len);
1434 
1435  ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
1436  /*
1437  * we have reserved enough space when we start a new transaction,
1438  * so reserving metadata failure is impossible
1439  */
1440  BUG_ON(ret);
1441 
1442 
1443  mutex_lock(&delayed_node->mutex);
1444  ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1445  if (unlikely(ret)) {
1446  printk(KERN_ERR "err add delayed dir index item(name: %s) into "
1447  "the insertion tree of the delayed node"
1448  "(root id: %llu, inode id: %llu, errno: %d)\n",
1449  name,
1450  (unsigned long long)delayed_node->root->objectid,
1451  (unsigned long long)delayed_node->inode_id,
1452  ret);
1453  BUG();
1454  }
1455  mutex_unlock(&delayed_node->mutex);
1456 
1457 release_node:
1458  btrfs_release_delayed_node(delayed_node);
1459  return ret;
1460 }
1461 
1462 static int btrfs_delete_delayed_insertion_item(struct btrfs_root *root,
1463  struct btrfs_delayed_node *node,
1464  struct btrfs_key *key)
1465 {
1466  struct btrfs_delayed_item *item;
1467 
1468  mutex_lock(&node->mutex);
1469  item = __btrfs_lookup_delayed_insertion_item(node, key);
1470  if (!item) {
1471  mutex_unlock(&node->mutex);
1472  return 1;
1473  }
1474 
1475  btrfs_delayed_item_release_metadata(root, item);
1476  btrfs_release_delayed_item(item);
1477  mutex_unlock(&node->mutex);
1478  return 0;
1479 }
1480 
1482  struct btrfs_root *root, struct inode *dir,
1483  u64 index)
1484 {
1485  struct btrfs_delayed_node *node;
1486  struct btrfs_delayed_item *item;
1487  struct btrfs_key item_key;
1488  int ret;
1489 
1490  node = btrfs_get_or_create_delayed_node(dir);
1491  if (IS_ERR(node))
1492  return PTR_ERR(node);
1493 
1494  item_key.objectid = btrfs_ino(dir);
1495  btrfs_set_key_type(&item_key, BTRFS_DIR_INDEX_KEY);
1496  item_key.offset = index;
1497 
1498  ret = btrfs_delete_delayed_insertion_item(root, node, &item_key);
1499  if (!ret)
1500  goto end;
1501 
1502  item = btrfs_alloc_delayed_item(0);
1503  if (!item) {
1504  ret = -ENOMEM;
1505  goto end;
1506  }
1507 
1508  item->key = item_key;
1509 
1510  ret = btrfs_delayed_item_reserve_metadata(trans, root, item);
1511  /*
1512  * we have reserved enough space when we start a new transaction,
1513  * so reserving metadata failure is impossible.
1514  */
1515  BUG_ON(ret);
1516 
1517  mutex_lock(&node->mutex);
1518  ret = __btrfs_add_delayed_deletion_item(node, item);
1519  if (unlikely(ret)) {
1520  printk(KERN_ERR "err add delayed dir index item(index: %llu) "
1521  "into the deletion tree of the delayed node"
1522  "(root id: %llu, inode id: %llu, errno: %d)\n",
1523  (unsigned long long)index,
1524  (unsigned long long)node->root->objectid,
1525  (unsigned long long)node->inode_id,
1526  ret);
1527  BUG();
1528  }
1529  mutex_unlock(&node->mutex);
1530 end:
1531  btrfs_release_delayed_node(node);
1532  return ret;
1533 }
1534 
1535 int btrfs_inode_delayed_dir_index_count(struct inode *inode)
1536 {
1537  struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1538 
1539  if (!delayed_node)
1540  return -ENOENT;
1541 
1542  /*
1543  * Since we have held i_mutex of this directory, it is impossible that
1544  * a new directory index is added into the delayed node and index_cnt
1545  * is updated now. So we needn't lock the delayed node.
1546  */
1547  if (!delayed_node->index_cnt) {
1548  btrfs_release_delayed_node(delayed_node);
1549  return -EINVAL;
1550  }
1551 
1552  BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
1553  btrfs_release_delayed_node(delayed_node);
1554  return 0;
1555 }
1556 
1557 void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
1558  struct list_head *del_list)
1559 {
1560  struct btrfs_delayed_node *delayed_node;
1561  struct btrfs_delayed_item *item;
1562 
1563  delayed_node = btrfs_get_delayed_node(inode);
1564  if (!delayed_node)
1565  return;
1566 
1567  mutex_lock(&delayed_node->mutex);
1568  item = __btrfs_first_delayed_insertion_item(delayed_node);
1569  while (item) {
1570  atomic_inc(&item->refs);
1571  list_add_tail(&item->readdir_list, ins_list);
1572  item = __btrfs_next_delayed_item(item);
1573  }
1574 
1575  item = __btrfs_first_delayed_deletion_item(delayed_node);
1576  while (item) {
1577  atomic_inc(&item->refs);
1578  list_add_tail(&item->readdir_list, del_list);
1579  item = __btrfs_next_delayed_item(item);
1580  }
1581  mutex_unlock(&delayed_node->mutex);
1582  /*
1583  * This delayed node is still cached in the btrfs inode, so refs
1584  * must be > 1 now, and we needn't check it is going to be freed
1585  * or not.
1586  *
1587  * Besides that, this function is used to read dir, we do not
1588  * insert/delete delayed items in this period. So we also needn't
1589  * requeue or dequeue this delayed node.
1590  */
1591  atomic_dec(&delayed_node->refs);
1592 }
1593 
1594 void btrfs_put_delayed_items(struct list_head *ins_list,
1595  struct list_head *del_list)
1596 {
1597  struct btrfs_delayed_item *curr, *next;
1598 
1599  list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1600  list_del(&curr->readdir_list);
1601  if (atomic_dec_and_test(&curr->refs))
1602  kfree(curr);
1603  }
1604 
1605  list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1606  list_del(&curr->readdir_list);
1607  if (atomic_dec_and_test(&curr->refs))
1608  kfree(curr);
1609  }
1610 }
1611 
1613  u64 index)
1614 {
1615  struct btrfs_delayed_item *curr, *next;
1616  int ret;
1617 
1618  if (list_empty(del_list))
1619  return 0;
1620 
1621  list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1622  if (curr->key.offset > index)
1623  break;
1624 
1625  list_del(&curr->readdir_list);
1626  ret = (curr->key.offset == index);
1627 
1628  if (atomic_dec_and_test(&curr->refs))
1629  kfree(curr);
1630 
1631  if (ret)
1632  return 1;
1633  else
1634  continue;
1635  }
1636  return 0;
1637 }
1638 
1639 /*
1640  * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1641  *
1642  */
1644  filldir_t filldir,
1645  struct list_head *ins_list)
1646 {
1647  struct btrfs_dir_item *di;
1648  struct btrfs_delayed_item *curr, *next;
1649  struct btrfs_key location;
1650  char *name;
1651  int name_len;
1652  int over = 0;
1653  unsigned char d_type;
1654 
1655  if (list_empty(ins_list))
1656  return 0;
1657 
1658  /*
1659  * Changing the data of the delayed item is impossible. So
1660  * we needn't lock them. And we have held i_mutex of the
1661  * directory, nobody can delete any directory indexes now.
1662  */
1663  list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1664  list_del(&curr->readdir_list);
1665 
1666  if (curr->key.offset < filp->f_pos) {
1667  if (atomic_dec_and_test(&curr->refs))
1668  kfree(curr);
1669  continue;
1670  }
1671 
1672  filp->f_pos = curr->key.offset;
1673 
1674  di = (struct btrfs_dir_item *)curr->data;
1675  name = (char *)(di + 1);
1676  name_len = le16_to_cpu(di->name_len);
1677 
1678  d_type = btrfs_filetype_table[di->type];
1679  btrfs_disk_key_to_cpu(&location, &di->location);
1680 
1681  over = filldir(dirent, name, name_len, curr->key.offset,
1682  location.objectid, d_type);
1683 
1684  if (atomic_dec_and_test(&curr->refs))
1685  kfree(curr);
1686 
1687  if (over)
1688  return 1;
1689  }
1690  return 0;
1691 }
1692 
1693 BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item,
1694  generation, 64);
1695 BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item,
1696  sequence, 64);
1697 BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item,
1698  transid, 64);
1699 BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64);
1700 BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item,
1701  nbytes, 64);
1702 BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item,
1703  block_group, 64);
1704 BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32);
1705 BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32);
1706 BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
1707 BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
1708 BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
1709 BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
1710 
1711 BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
1712 BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
1713 
1714 static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1715  struct btrfs_inode_item *inode_item,
1716  struct inode *inode)
1717 {
1718  btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode));
1719  btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode));
1720  btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1721  btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1722  btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1723  btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1724  btrfs_set_stack_inode_generation(inode_item,
1725  BTRFS_I(inode)->generation);
1726  btrfs_set_stack_inode_sequence(inode_item, inode->i_version);
1727  btrfs_set_stack_inode_transid(inode_item, trans->transid);
1728  btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1729  btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1730  btrfs_set_stack_inode_block_group(inode_item, 0);
1731 
1732  btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item),
1733  inode->i_atime.tv_sec);
1734  btrfs_set_stack_timespec_nsec(btrfs_inode_atime(inode_item),
1735  inode->i_atime.tv_nsec);
1736 
1737  btrfs_set_stack_timespec_sec(btrfs_inode_mtime(inode_item),
1738  inode->i_mtime.tv_sec);
1739  btrfs_set_stack_timespec_nsec(btrfs_inode_mtime(inode_item),
1740  inode->i_mtime.tv_nsec);
1741 
1742  btrfs_set_stack_timespec_sec(btrfs_inode_ctime(inode_item),
1743  inode->i_ctime.tv_sec);
1744  btrfs_set_stack_timespec_nsec(btrfs_inode_ctime(inode_item),
1745  inode->i_ctime.tv_nsec);
1746 }
1747 
1748 int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1749 {
1750  struct btrfs_delayed_node *delayed_node;
1751  struct btrfs_inode_item *inode_item;
1752  struct btrfs_timespec *tspec;
1753 
1754  delayed_node = btrfs_get_delayed_node(inode);
1755  if (!delayed_node)
1756  return -ENOENT;
1757 
1758  mutex_lock(&delayed_node->mutex);
1759  if (!delayed_node->inode_dirty) {
1760  mutex_unlock(&delayed_node->mutex);
1761  btrfs_release_delayed_node(delayed_node);
1762  return -ENOENT;
1763  }
1764 
1765  inode_item = &delayed_node->inode_item;
1766 
1767  i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
1768  i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
1769  btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
1770  inode->i_mode = btrfs_stack_inode_mode(inode_item);
1771  set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1772  inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1773  BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1774  inode->i_version = btrfs_stack_inode_sequence(inode_item);
1775  inode->i_rdev = 0;
1776  *rdev = btrfs_stack_inode_rdev(inode_item);
1777  BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
1778 
1779  tspec = btrfs_inode_atime(inode_item);
1780  inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec);
1781  inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
1782 
1783  tspec = btrfs_inode_mtime(inode_item);
1784  inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec);
1785  inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
1786 
1787  tspec = btrfs_inode_ctime(inode_item);
1788  inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec);
1789  inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
1790 
1791  inode->i_generation = BTRFS_I(inode)->generation;
1792  BTRFS_I(inode)->index_cnt = (u64)-1;
1793 
1794  mutex_unlock(&delayed_node->mutex);
1795  btrfs_release_delayed_node(delayed_node);
1796  return 0;
1797 }
1798 
1800  struct btrfs_root *root, struct inode *inode)
1801 {
1802  struct btrfs_delayed_node *delayed_node;
1803  int ret = 0;
1804 
1805  delayed_node = btrfs_get_or_create_delayed_node(inode);
1806  if (IS_ERR(delayed_node))
1807  return PTR_ERR(delayed_node);
1808 
1809  mutex_lock(&delayed_node->mutex);
1810  if (delayed_node->inode_dirty) {
1811  fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1812  goto release_node;
1813  }
1814 
1815  ret = btrfs_delayed_inode_reserve_metadata(trans, root, inode,
1816  delayed_node);
1817  if (ret)
1818  goto release_node;
1819 
1820  fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1821  delayed_node->inode_dirty = 1;
1822  delayed_node->count++;
1823  atomic_inc(&root->fs_info->delayed_root->items);
1824 release_node:
1825  mutex_unlock(&delayed_node->mutex);
1826  btrfs_release_delayed_node(delayed_node);
1827  return ret;
1828 }
1829 
1830 static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1831 {
1832  struct btrfs_root *root = delayed_node->root;
1833  struct btrfs_delayed_item *curr_item, *prev_item;
1834 
1835  mutex_lock(&delayed_node->mutex);
1836  curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
1837  while (curr_item) {
1838  btrfs_delayed_item_release_metadata(root, curr_item);
1839  prev_item = curr_item;
1840  curr_item = __btrfs_next_delayed_item(prev_item);
1841  btrfs_release_delayed_item(prev_item);
1842  }
1843 
1844  curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
1845  while (curr_item) {
1846  btrfs_delayed_item_release_metadata(root, curr_item);
1847  prev_item = curr_item;
1848  curr_item = __btrfs_next_delayed_item(prev_item);
1849  btrfs_release_delayed_item(prev_item);
1850  }
1851 
1852  if (delayed_node->inode_dirty) {
1853  btrfs_delayed_inode_release_metadata(root, delayed_node);
1854  btrfs_release_delayed_inode(delayed_node);
1855  }
1856  mutex_unlock(&delayed_node->mutex);
1857 }
1858 
1859 void btrfs_kill_delayed_inode_items(struct inode *inode)
1860 {
1861  struct btrfs_delayed_node *delayed_node;
1862 
1863  delayed_node = btrfs_get_delayed_node(inode);
1864  if (!delayed_node)
1865  return;
1866 
1867  __btrfs_kill_delayed_node(delayed_node);
1868  btrfs_release_delayed_node(delayed_node);
1869 }
1870 
1872 {
1873  u64 inode_id = 0;
1874  struct btrfs_delayed_node *delayed_nodes[8];
1875  int i, n;
1876 
1877  while (1) {
1878  spin_lock(&root->inode_lock);
1880  (void **)delayed_nodes, inode_id,
1881  ARRAY_SIZE(delayed_nodes));
1882  if (!n) {
1883  spin_unlock(&root->inode_lock);
1884  break;
1885  }
1886 
1887  inode_id = delayed_nodes[n - 1]->inode_id + 1;
1888 
1889  for (i = 0; i < n; i++)
1890  atomic_inc(&delayed_nodes[i]->refs);
1891  spin_unlock(&root->inode_lock);
1892 
1893  for (i = 0; i < n; i++) {
1894  __btrfs_kill_delayed_node(delayed_nodes[i]);
1895  btrfs_release_delayed_node(delayed_nodes[i]);
1896  }
1897  }
1898 }
1899 
1901 {
1902  struct btrfs_delayed_root *delayed_root;
1903  struct btrfs_delayed_node *curr_node, *prev_node;
1904 
1905  delayed_root = btrfs_get_delayed_root(root);
1906 
1907  curr_node = btrfs_first_delayed_node(delayed_root);
1908  while (curr_node) {
1909  __btrfs_kill_delayed_node(curr_node);
1910 
1911  prev_node = curr_node;
1912  curr_node = btrfs_next_delayed_node(curr_node);
1913  btrfs_release_delayed_node(prev_node);
1914  }
1915 }
1916