Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
relocation.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2009 Oracle. All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 
19 #include <linux/sched.h>
20 #include <linux/pagemap.h>
21 #include <linux/writeback.h>
22 #include <linux/blkdev.h>
23 #include <linux/rbtree.h>
24 #include <linux/slab.h>
25 #include "ctree.h"
26 #include "disk-io.h"
27 #include "transaction.h"
28 #include "volumes.h"
29 #include "locking.h"
30 #include "btrfs_inode.h"
31 #include "async-thread.h"
32 #include "free-space-cache.h"
33 #include "inode-map.h"
34 
35 /*
36  * backref_node, mapping_node and tree_block start with this
37  */
38 struct tree_entry {
39  struct rb_node rb_node;
41 };
42 
43 /*
44  * present a tree block in the backref cache
45  */
46 struct backref_node {
47  struct rb_node rb_node;
49 
51  /* objectid of tree block owner, can be not uptodate */
53  /* link to pending, changed or detached list */
54  struct list_head list;
55  /* list of upper level blocks reference this block */
56  struct list_head upper;
57  /* list of child blocks in the cache */
58  struct list_head lower;
59  /* NULL if this node is not tree root */
60  struct btrfs_root *root;
61  /* extent buffer got by COW the block */
62  struct extent_buffer *eb;
63  /* level of tree block */
64  unsigned int level:8;
65  /* is the block in non-reference counted tree */
66  unsigned int cowonly:1;
67  /* 1 if no child node in the cache */
68  unsigned int lowest:1;
69  /* is the extent buffer locked */
70  unsigned int locked:1;
71  /* has the block been processed */
72  unsigned int processed:1;
73  /* have backrefs of this block been checked */
74  unsigned int checked:1;
75  /*
76  * 1 if corresponding block has been cowed but some upper
77  * level block pointers may not point to the new location
78  */
79  unsigned int pending:1;
80  /*
81  * 1 if the backref node isn't connected to any other
82  * backref node.
83  */
84  unsigned int detached:1;
85 };
86 
87 /*
88  * present a block pointer in the backref cache
89  */
90 struct backref_edge {
91  struct list_head list[2];
92  struct backref_node *node[2];
93 };
94 
95 #define LOWER 0
96 #define UPPER 1
97 
98 struct backref_cache {
99  /* red black tree of all backref nodes in the cache */
100  struct rb_root rb_root;
101  /* for passing backref nodes to btrfs_reloc_cow_block */
103  /*
104  * list of blocks that have been cowed but some block
105  * pointers in upper level blocks may not reflect the
106  * new location
107  */
109  /* list of backref nodes with no child node */
111  /* list of blocks that have been cowed in current transaction */
113  /* list of detached backref node. */
115 
117 
118  int nr_nodes;
119  int nr_edges;
120 };
121 
122 /*
123  * map address of tree root to tree
124  */
125 struct mapping_node {
126  struct rb_node rb_node;
128  void *data;
129 };
130 
131 struct mapping_tree {
132  struct rb_root rb_root;
134 };
135 
136 /*
137  * present a tree block to process
138  */
139 struct tree_block {
140  struct rb_node rb_node;
142  struct btrfs_key key;
143  unsigned int level:8;
144  unsigned int key_ready:1;
145 };
146 
147 #define MAX_EXTENTS 128
148 
153  unsigned int nr;
154 };
155 
157  /* block group to relocate */
159  /* extent tree */
161  /* inode for moving data */
162  struct inode *data_inode;
163 
165 
167 
169  /* tree blocks have been processed */
171  /* map start of tree root to corresponding reloc tree */
173  /* list of reloc trees */
175  /* size of metadata reservation for merging reloc trees */
177  /* size of relocated tree nodes */
179 
182 
183  unsigned int stage:8;
184  unsigned int create_reloc_tree:1;
185  unsigned int merge_reloc_tree:1;
186  unsigned int found_file_extent:1;
187  unsigned int commit_transaction:1;
188 };
189 
190 /* stages of data relocation */
191 #define MOVE_DATA_EXTENTS 0
192 #define UPDATE_DATA_PTRS 1
193 
194 static void remove_backref_node(struct backref_cache *cache,
195  struct backref_node *node);
196 static void __mark_block_processed(struct reloc_control *rc,
197  struct backref_node *node);
198 
199 static void mapping_tree_init(struct mapping_tree *tree)
200 {
201  tree->rb_root = RB_ROOT;
202  spin_lock_init(&tree->lock);
203 }
204 
205 static void backref_cache_init(struct backref_cache *cache)
206 {
207  int i;
208  cache->rb_root = RB_ROOT;
209  for (i = 0; i < BTRFS_MAX_LEVEL; i++)
210  INIT_LIST_HEAD(&cache->pending[i]);
211  INIT_LIST_HEAD(&cache->changed);
212  INIT_LIST_HEAD(&cache->detached);
213  INIT_LIST_HEAD(&cache->leaves);
214 }
215 
216 static void backref_cache_cleanup(struct backref_cache *cache)
217 {
218  struct backref_node *node;
219  int i;
220 
221  while (!list_empty(&cache->detached)) {
222  node = list_entry(cache->detached.next,
223  struct backref_node, list);
224  remove_backref_node(cache, node);
225  }
226 
227  while (!list_empty(&cache->leaves)) {
228  node = list_entry(cache->leaves.next,
229  struct backref_node, lower);
230  remove_backref_node(cache, node);
231  }
232 
233  cache->last_trans = 0;
234 
235  for (i = 0; i < BTRFS_MAX_LEVEL; i++)
236  BUG_ON(!list_empty(&cache->pending[i]));
237  BUG_ON(!list_empty(&cache->changed));
238  BUG_ON(!list_empty(&cache->detached));
239  BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root));
240  BUG_ON(cache->nr_nodes);
241  BUG_ON(cache->nr_edges);
242 }
243 
244 static struct backref_node *alloc_backref_node(struct backref_cache *cache)
245 {
246  struct backref_node *node;
247 
248  node = kzalloc(sizeof(*node), GFP_NOFS);
249  if (node) {
250  INIT_LIST_HEAD(&node->list);
251  INIT_LIST_HEAD(&node->upper);
252  INIT_LIST_HEAD(&node->lower);
253  RB_CLEAR_NODE(&node->rb_node);
254  cache->nr_nodes++;
255  }
256  return node;
257 }
258 
259 static void free_backref_node(struct backref_cache *cache,
260  struct backref_node *node)
261 {
262  if (node) {
263  cache->nr_nodes--;
264  kfree(node);
265  }
266 }
267 
268 static struct backref_edge *alloc_backref_edge(struct backref_cache *cache)
269 {
270  struct backref_edge *edge;
271 
272  edge = kzalloc(sizeof(*edge), GFP_NOFS);
273  if (edge)
274  cache->nr_edges++;
275  return edge;
276 }
277 
278 static void free_backref_edge(struct backref_cache *cache,
279  struct backref_edge *edge)
280 {
281  if (edge) {
282  cache->nr_edges--;
283  kfree(edge);
284  }
285 }
286 
287 static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
288  struct rb_node *node)
289 {
290  struct rb_node **p = &root->rb_node;
291  struct rb_node *parent = NULL;
292  struct tree_entry *entry;
293 
294  while (*p) {
295  parent = *p;
296  entry = rb_entry(parent, struct tree_entry, rb_node);
297 
298  if (bytenr < entry->bytenr)
299  p = &(*p)->rb_left;
300  else if (bytenr > entry->bytenr)
301  p = &(*p)->rb_right;
302  else
303  return parent;
304  }
305 
306  rb_link_node(node, parent, p);
307  rb_insert_color(node, root);
308  return NULL;
309 }
310 
311 static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
312 {
313  struct rb_node *n = root->rb_node;
314  struct tree_entry *entry;
315 
316  while (n) {
317  entry = rb_entry(n, struct tree_entry, rb_node);
318 
319  if (bytenr < entry->bytenr)
320  n = n->rb_left;
321  else if (bytenr > entry->bytenr)
322  n = n->rb_right;
323  else
324  return n;
325  }
326  return NULL;
327 }
328 
329 void backref_tree_panic(struct rb_node *rb_node, int errno,
330  u64 bytenr)
331 {
332 
333  struct btrfs_fs_info *fs_info = NULL;
334  struct backref_node *bnode = rb_entry(rb_node, struct backref_node,
335  rb_node);
336  if (bnode->root)
337  fs_info = bnode->root->fs_info;
338  btrfs_panic(fs_info, errno, "Inconsistency in backref cache "
339  "found at offset %llu\n", (unsigned long long)bytenr);
340 }
341 
342 /*
343  * walk up backref nodes until reach node presents tree root
344  */
345 static struct backref_node *walk_up_backref(struct backref_node *node,
346  struct backref_edge *edges[],
347  int *index)
348 {
349  struct backref_edge *edge;
350  int idx = *index;
351 
352  while (!list_empty(&node->upper)) {
353  edge = list_entry(node->upper.next,
354  struct backref_edge, list[LOWER]);
355  edges[idx++] = edge;
356  node = edge->node[UPPER];
357  }
358  BUG_ON(node->detached);
359  *index = idx;
360  return node;
361 }
362 
363 /*
364  * walk down backref nodes to find start of next reference path
365  */
366 static struct backref_node *walk_down_backref(struct backref_edge *edges[],
367  int *index)
368 {
369  struct backref_edge *edge;
370  struct backref_node *lower;
371  int idx = *index;
372 
373  while (idx > 0) {
374  edge = edges[idx - 1];
375  lower = edge->node[LOWER];
376  if (list_is_last(&edge->list[LOWER], &lower->upper)) {
377  idx--;
378  continue;
379  }
380  edge = list_entry(edge->list[LOWER].next,
381  struct backref_edge, list[LOWER]);
382  edges[idx - 1] = edge;
383  *index = idx;
384  return edge->node[UPPER];
385  }
386  *index = 0;
387  return NULL;
388 }
389 
390 static void unlock_node_buffer(struct backref_node *node)
391 {
392  if (node->locked) {
393  btrfs_tree_unlock(node->eb);
394  node->locked = 0;
395  }
396 }
397 
398 static void drop_node_buffer(struct backref_node *node)
399 {
400  if (node->eb) {
401  unlock_node_buffer(node);
402  free_extent_buffer(node->eb);
403  node->eb = NULL;
404  }
405 }
406 
407 static void drop_backref_node(struct backref_cache *tree,
408  struct backref_node *node)
409 {
410  BUG_ON(!list_empty(&node->upper));
411 
412  drop_node_buffer(node);
413  list_del(&node->list);
414  list_del(&node->lower);
415  if (!RB_EMPTY_NODE(&node->rb_node))
416  rb_erase(&node->rb_node, &tree->rb_root);
417  free_backref_node(tree, node);
418 }
419 
420 /*
421  * remove a backref node from the backref cache
422  */
423 static void remove_backref_node(struct backref_cache *cache,
424  struct backref_node *node)
425 {
426  struct backref_node *upper;
427  struct backref_edge *edge;
428 
429  if (!node)
430  return;
431 
432  BUG_ON(!node->lowest && !node->detached);
433  while (!list_empty(&node->upper)) {
434  edge = list_entry(node->upper.next, struct backref_edge,
435  list[LOWER]);
436  upper = edge->node[UPPER];
437  list_del(&edge->list[LOWER]);
438  list_del(&edge->list[UPPER]);
439  free_backref_edge(cache, edge);
440 
441  if (RB_EMPTY_NODE(&upper->rb_node)) {
442  BUG_ON(!list_empty(&node->upper));
443  drop_backref_node(cache, node);
444  node = upper;
445  node->lowest = 1;
446  continue;
447  }
448  /*
449  * add the node to leaf node list if no other
450  * child block cached.
451  */
452  if (list_empty(&upper->lower)) {
453  list_add_tail(&upper->lower, &cache->leaves);
454  upper->lowest = 1;
455  }
456  }
457 
458  drop_backref_node(cache, node);
459 }
460 
461 static void update_backref_node(struct backref_cache *cache,
462  struct backref_node *node, u64 bytenr)
463 {
464  struct rb_node *rb_node;
465  rb_erase(&node->rb_node, &cache->rb_root);
466  node->bytenr = bytenr;
467  rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node);
468  if (rb_node)
469  backref_tree_panic(rb_node, -EEXIST, bytenr);
470 }
471 
472 /*
473  * update backref cache after a transaction commit
474  */
475 static int update_backref_cache(struct btrfs_trans_handle *trans,
476  struct backref_cache *cache)
477 {
478  struct backref_node *node;
479  int level = 0;
480 
481  if (cache->last_trans == 0) {
482  cache->last_trans = trans->transid;
483  return 0;
484  }
485 
486  if (cache->last_trans == trans->transid)
487  return 0;
488 
489  /*
490  * detached nodes are used to avoid unnecessary backref
491  * lookup. transaction commit changes the extent tree.
492  * so the detached nodes are no longer useful.
493  */
494  while (!list_empty(&cache->detached)) {
495  node = list_entry(cache->detached.next,
496  struct backref_node, list);
497  remove_backref_node(cache, node);
498  }
499 
500  while (!list_empty(&cache->changed)) {
501  node = list_entry(cache->changed.next,
502  struct backref_node, list);
503  list_del_init(&node->list);
504  BUG_ON(node->pending);
505  update_backref_node(cache, node, node->new_bytenr);
506  }
507 
508  /*
509  * some nodes can be left in the pending list if there were
510  * errors during processing the pending nodes.
511  */
512  for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
513  list_for_each_entry(node, &cache->pending[level], list) {
514  BUG_ON(!node->pending);
515  if (node->bytenr == node->new_bytenr)
516  continue;
517  update_backref_node(cache, node, node->new_bytenr);
518  }
519  }
520 
521  cache->last_trans = 0;
522  return 1;
523 }
524 
525 
526 static int should_ignore_root(struct btrfs_root *root)
527 {
528  struct btrfs_root *reloc_root;
529 
530  if (!root->ref_cows)
531  return 0;
532 
533  reloc_root = root->reloc_root;
534  if (!reloc_root)
535  return 0;
536 
537  if (btrfs_root_last_snapshot(&reloc_root->root_item) ==
538  root->fs_info->running_transaction->transid - 1)
539  return 0;
540  /*
541  * if there is reloc tree and it was created in previous
542  * transaction backref lookup can find the reloc tree,
543  * so backref node for the fs tree root is useless for
544  * relocation.
545  */
546  return 1;
547 }
548 /*
549  * find reloc tree by address of tree root
550  */
551 static struct btrfs_root *find_reloc_root(struct reloc_control *rc,
552  u64 bytenr)
553 {
554  struct rb_node *rb_node;
555  struct mapping_node *node;
556  struct btrfs_root *root = NULL;
557 
558  spin_lock(&rc->reloc_root_tree.lock);
559  rb_node = tree_search(&rc->reloc_root_tree.rb_root, bytenr);
560  if (rb_node) {
561  node = rb_entry(rb_node, struct mapping_node, rb_node);
562  root = (struct btrfs_root *)node->data;
563  }
564  spin_unlock(&rc->reloc_root_tree.lock);
565  return root;
566 }
567 
568 static int is_cowonly_root(u64 root_objectid)
569 {
570  if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
571  root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
572  root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
573  root_objectid == BTRFS_DEV_TREE_OBJECTID ||
574  root_objectid == BTRFS_TREE_LOG_OBJECTID ||
575  root_objectid == BTRFS_CSUM_TREE_OBJECTID)
576  return 1;
577  return 0;
578 }
579 
580 static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
581  u64 root_objectid)
582 {
583  struct btrfs_key key;
584 
585  key.objectid = root_objectid;
586  key.type = BTRFS_ROOT_ITEM_KEY;
587  if (is_cowonly_root(root_objectid))
588  key.offset = 0;
589  else
590  key.offset = (u64)-1;
591 
592  return btrfs_read_fs_root_no_name(fs_info, &key);
593 }
594 
595 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
596 static noinline_for_stack
597 struct btrfs_root *find_tree_root(struct reloc_control *rc,
598  struct extent_buffer *leaf,
599  struct btrfs_extent_ref_v0 *ref0)
600 {
601  struct btrfs_root *root;
602  u64 root_objectid = btrfs_ref_root_v0(leaf, ref0);
603  u64 generation = btrfs_ref_generation_v0(leaf, ref0);
604 
605  BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID);
606 
607  root = read_fs_root(rc->extent_root->fs_info, root_objectid);
608  BUG_ON(IS_ERR(root));
609 
610  if (root->ref_cows &&
611  generation != btrfs_root_generation(&root->root_item))
612  return NULL;
613 
614  return root;
615 }
616 #endif
617 
618 static noinline_for_stack
619 int find_inline_backref(struct extent_buffer *leaf, int slot,
620  unsigned long *ptr, unsigned long *end)
621 {
622  struct btrfs_extent_item *ei;
623  struct btrfs_tree_block_info *bi;
624  u32 item_size;
625 
626  item_size = btrfs_item_size_nr(leaf, slot);
627 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
628  if (item_size < sizeof(*ei)) {
629  WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
630  return 1;
631  }
632 #endif
633  ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
634  WARN_ON(!(btrfs_extent_flags(leaf, ei) &
636 
637  if (item_size <= sizeof(*ei) + sizeof(*bi)) {
638  WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));
639  return 1;
640  }
641 
642  bi = (struct btrfs_tree_block_info *)(ei + 1);
643  *ptr = (unsigned long)(bi + 1);
644  *end = (unsigned long)ei + item_size;
645  return 0;
646 }
647 
648 /*
649  * build backref tree for a given tree block. root of the backref tree
650  * corresponds the tree block, leaves of the backref tree correspond
651  * roots of b-trees that reference the tree block.
652  *
653  * the basic idea of this function is check backrefs of a given block
654  * to find upper level blocks that refernece the block, and then check
655  * bakcrefs of these upper level blocks recursively. the recursion stop
656  * when tree root is reached or backrefs for the block is cached.
657  *
658  * NOTE: if we find backrefs for a block are cached, we know backrefs
659  * for all upper level blocks that directly/indirectly reference the
660  * block are also cached.
661  */
662 static noinline_for_stack
663 struct backref_node *build_backref_tree(struct reloc_control *rc,
664  struct btrfs_key *node_key,
665  int level, u64 bytenr)
666 {
667  struct backref_cache *cache = &rc->backref_cache;
668  struct btrfs_path *path1;
669  struct btrfs_path *path2;
670  struct extent_buffer *eb;
671  struct btrfs_root *root;
672  struct backref_node *cur;
673  struct backref_node *upper;
674  struct backref_node *lower;
675  struct backref_node *node = NULL;
676  struct backref_node *exist = NULL;
677  struct backref_edge *edge;
678  struct rb_node *rb_node;
679  struct btrfs_key key;
680  unsigned long end;
681  unsigned long ptr;
682  LIST_HEAD(list);
683  LIST_HEAD(useless);
684  int cowonly;
685  int ret;
686  int err = 0;
687 
688  path1 = btrfs_alloc_path();
689  path2 = btrfs_alloc_path();
690  if (!path1 || !path2) {
691  err = -ENOMEM;
692  goto out;
693  }
694  path1->reada = 1;
695  path2->reada = 2;
696 
697  node = alloc_backref_node(cache);
698  if (!node) {
699  err = -ENOMEM;
700  goto out;
701  }
702 
703  node->bytenr = bytenr;
704  node->level = level;
705  node->lowest = 1;
706  cur = node;
707 again:
708  end = 0;
709  ptr = 0;
710  key.objectid = cur->bytenr;
711  key.type = BTRFS_EXTENT_ITEM_KEY;
712  key.offset = (u64)-1;
713 
714  path1->search_commit_root = 1;
715  path1->skip_locking = 1;
716  ret = btrfs_search_slot(NULL, rc->extent_root, &key, path1,
717  0, 0);
718  if (ret < 0) {
719  err = ret;
720  goto out;
721  }
722  BUG_ON(!ret || !path1->slots[0]);
723 
724  path1->slots[0]--;
725 
726  WARN_ON(cur->checked);
727  if (!list_empty(&cur->upper)) {
728  /*
729  * the backref was added previously when processing
730  * backref of type BTRFS_TREE_BLOCK_REF_KEY
731  */
732  BUG_ON(!list_is_singular(&cur->upper));
733  edge = list_entry(cur->upper.next, struct backref_edge,
734  list[LOWER]);
735  BUG_ON(!list_empty(&edge->list[UPPER]));
736  exist = edge->node[UPPER];
737  /*
738  * add the upper level block to pending list if we need
739  * check its backrefs
740  */
741  if (!exist->checked)
742  list_add_tail(&edge->list[UPPER], &list);
743  } else {
744  exist = NULL;
745  }
746 
747  while (1) {
748  cond_resched();
749  eb = path1->nodes[0];
750 
751  if (ptr >= end) {
752  if (path1->slots[0] >= btrfs_header_nritems(eb)) {
753  ret = btrfs_next_leaf(rc->extent_root, path1);
754  if (ret < 0) {
755  err = ret;
756  goto out;
757  }
758  if (ret > 0)
759  break;
760  eb = path1->nodes[0];
761  }
762 
763  btrfs_item_key_to_cpu(eb, &key, path1->slots[0]);
764  if (key.objectid != cur->bytenr) {
765  WARN_ON(exist);
766  break;
767  }
768 
769  if (key.type == BTRFS_EXTENT_ITEM_KEY) {
770  ret = find_inline_backref(eb, path1->slots[0],
771  &ptr, &end);
772  if (ret)
773  goto next;
774  }
775  }
776 
777  if (ptr < end) {
778  /* update key for inline back ref */
779  struct btrfs_extent_inline_ref *iref;
780  iref = (struct btrfs_extent_inline_ref *)ptr;
781  key.type = btrfs_extent_inline_ref_type(eb, iref);
782  key.offset = btrfs_extent_inline_ref_offset(eb, iref);
785  }
786 
787  if (exist &&
788  ((key.type == BTRFS_TREE_BLOCK_REF_KEY &&
789  exist->owner == key.offset) ||
790  (key.type == BTRFS_SHARED_BLOCK_REF_KEY &&
791  exist->bytenr == key.offset))) {
792  exist = NULL;
793  goto next;
794  }
795 
796 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
797  if (key.type == BTRFS_SHARED_BLOCK_REF_KEY ||
798  key.type == BTRFS_EXTENT_REF_V0_KEY) {
799  if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
800  struct btrfs_extent_ref_v0 *ref0;
801  ref0 = btrfs_item_ptr(eb, path1->slots[0],
802  struct btrfs_extent_ref_v0);
803  if (key.objectid == key.offset) {
804  root = find_tree_root(rc, eb, ref0);
805  if (root && !should_ignore_root(root))
806  cur->root = root;
807  else
808  list_add(&cur->list, &useless);
809  break;
810  }
811  if (is_cowonly_root(btrfs_ref_root_v0(eb,
812  ref0)))
813  cur->cowonly = 1;
814  }
815 #else
817  if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
818 #endif
819  if (key.objectid == key.offset) {
820  /*
821  * only root blocks of reloc trees use
822  * backref of this type.
823  */
824  root = find_reloc_root(rc, cur->bytenr);
825  BUG_ON(!root);
826  cur->root = root;
827  break;
828  }
829 
830  edge = alloc_backref_edge(cache);
831  if (!edge) {
832  err = -ENOMEM;
833  goto out;
834  }
835  rb_node = tree_search(&cache->rb_root, key.offset);
836  if (!rb_node) {
837  upper = alloc_backref_node(cache);
838  if (!upper) {
839  free_backref_edge(cache, edge);
840  err = -ENOMEM;
841  goto out;
842  }
843  upper->bytenr = key.offset;
844  upper->level = cur->level + 1;
845  /*
846  * backrefs for the upper level block isn't
847  * cached, add the block to pending list
848  */
849  list_add_tail(&edge->list[UPPER], &list);
850  } else {
851  upper = rb_entry(rb_node, struct backref_node,
852  rb_node);
853  BUG_ON(!upper->checked);
854  INIT_LIST_HEAD(&edge->list[UPPER]);
855  }
856  list_add_tail(&edge->list[LOWER], &cur->upper);
857  edge->node[LOWER] = cur;
858  edge->node[UPPER] = upper;
859 
860  goto next;
861  } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
862  goto next;
863  }
864 
865  /* key.type == BTRFS_TREE_BLOCK_REF_KEY */
866  root = read_fs_root(rc->extent_root->fs_info, key.offset);
867  if (IS_ERR(root)) {
868  err = PTR_ERR(root);
869  goto out;
870  }
871 
872  if (!root->ref_cows)
873  cur->cowonly = 1;
874 
875  if (btrfs_root_level(&root->root_item) == cur->level) {
876  /* tree root */
877  BUG_ON(btrfs_root_bytenr(&root->root_item) !=
878  cur->bytenr);
879  if (should_ignore_root(root))
880  list_add(&cur->list, &useless);
881  else
882  cur->root = root;
883  break;
884  }
885 
886  level = cur->level + 1;
887 
888  /*
889  * searching the tree to find upper level blocks
890  * reference the block.
891  */
892  path2->search_commit_root = 1;
893  path2->skip_locking = 1;
894  path2->lowest_level = level;
895  ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0);
896  path2->lowest_level = 0;
897  if (ret < 0) {
898  err = ret;
899  goto out;
900  }
901  if (ret > 0 && path2->slots[level] > 0)
902  path2->slots[level]--;
903 
904  eb = path2->nodes[level];
905  WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) !=
906  cur->bytenr);
907 
908  lower = cur;
909  for (; level < BTRFS_MAX_LEVEL; level++) {
910  if (!path2->nodes[level]) {
911  BUG_ON(btrfs_root_bytenr(&root->root_item) !=
912  lower->bytenr);
913  if (should_ignore_root(root))
914  list_add(&lower->list, &useless);
915  else
916  lower->root = root;
917  break;
918  }
919 
920  edge = alloc_backref_edge(cache);
921  if (!edge) {
922  err = -ENOMEM;
923  goto out;
924  }
925 
926  eb = path2->nodes[level];
927  rb_node = tree_search(&cache->rb_root, eb->start);
928  if (!rb_node) {
929  upper = alloc_backref_node(cache);
930  if (!upper) {
931  free_backref_edge(cache, edge);
932  err = -ENOMEM;
933  goto out;
934  }
935  upper->bytenr = eb->start;
936  upper->owner = btrfs_header_owner(eb);
937  upper->level = lower->level + 1;
938  if (!root->ref_cows)
939  upper->cowonly = 1;
940 
941  /*
942  * if we know the block isn't shared
943  * we can void checking its backrefs.
944  */
945  if (btrfs_block_can_be_shared(root, eb))
946  upper->checked = 0;
947  else
948  upper->checked = 1;
949 
950  /*
951  * add the block to pending list if we
952  * need check its backrefs. only block
953  * at 'cur->level + 1' is added to the
954  * tail of pending list. this guarantees
955  * we check backrefs from lower level
956  * blocks to upper level blocks.
957  */
958  if (!upper->checked &&
959  level == cur->level + 1) {
960  list_add_tail(&edge->list[UPPER],
961  &list);
962  } else
963  INIT_LIST_HEAD(&edge->list[UPPER]);
964  } else {
965  upper = rb_entry(rb_node, struct backref_node,
966  rb_node);
967  BUG_ON(!upper->checked);
968  INIT_LIST_HEAD(&edge->list[UPPER]);
969  if (!upper->owner)
970  upper->owner = btrfs_header_owner(eb);
971  }
972  list_add_tail(&edge->list[LOWER], &lower->upper);
973  edge->node[LOWER] = lower;
974  edge->node[UPPER] = upper;
975 
976  if (rb_node)
977  break;
978  lower = upper;
979  upper = NULL;
980  }
981  btrfs_release_path(path2);
982 next:
983  if (ptr < end) {
984  ptr += btrfs_extent_inline_ref_size(key.type);
985  if (ptr >= end) {
986  WARN_ON(ptr > end);
987  ptr = 0;
988  end = 0;
989  }
990  }
991  if (ptr >= end)
992  path1->slots[0]++;
993  }
994  btrfs_release_path(path1);
995 
996  cur->checked = 1;
997  WARN_ON(exist);
998 
999  /* the pending list isn't empty, take the first block to process */
1000  if (!list_empty(&list)) {
1001  edge = list_entry(list.next, struct backref_edge, list[UPPER]);
1002  list_del_init(&edge->list[UPPER]);
1003  cur = edge->node[UPPER];
1004  goto again;
1005  }
1006 
1007  /*
1008  * everything goes well, connect backref nodes and insert backref nodes
1009  * into the cache.
1010  */
1011  BUG_ON(!node->checked);
1012  cowonly = node->cowonly;
1013  if (!cowonly) {
1014  rb_node = tree_insert(&cache->rb_root, node->bytenr,
1015  &node->rb_node);
1016  if (rb_node)
1017  backref_tree_panic(rb_node, -EEXIST, node->bytenr);
1018  list_add_tail(&node->lower, &cache->leaves);
1019  }
1020 
1021  list_for_each_entry(edge, &node->upper, list[LOWER])
1022  list_add_tail(&edge->list[UPPER], &list);
1023 
1024  while (!list_empty(&list)) {
1025  edge = list_entry(list.next, struct backref_edge, list[UPPER]);
1026  list_del_init(&edge->list[UPPER]);
1027  upper = edge->node[UPPER];
1028  if (upper->detached) {
1029  list_del(&edge->list[LOWER]);
1030  lower = edge->node[LOWER];
1031  free_backref_edge(cache, edge);
1032  if (list_empty(&lower->upper))
1033  list_add(&lower->list, &useless);
1034  continue;
1035  }
1036 
1037  if (!RB_EMPTY_NODE(&upper->rb_node)) {
1038  if (upper->lowest) {
1039  list_del_init(&upper->lower);
1040  upper->lowest = 0;
1041  }
1042 
1043  list_add_tail(&edge->list[UPPER], &upper->lower);
1044  continue;
1045  }
1046 
1047  BUG_ON(!upper->checked);
1048  BUG_ON(cowonly != upper->cowonly);
1049  if (!cowonly) {
1050  rb_node = tree_insert(&cache->rb_root, upper->bytenr,
1051  &upper->rb_node);
1052  if (rb_node)
1053  backref_tree_panic(rb_node, -EEXIST,
1054  upper->bytenr);
1055  }
1056 
1057  list_add_tail(&edge->list[UPPER], &upper->lower);
1058 
1059  list_for_each_entry(edge, &upper->upper, list[LOWER])
1060  list_add_tail(&edge->list[UPPER], &list);
1061  }
1062  /*
1063  * process useless backref nodes. backref nodes for tree leaves
1064  * are deleted from the cache. backref nodes for upper level
1065  * tree blocks are left in the cache to avoid unnecessary backref
1066  * lookup.
1067  */
1068  while (!list_empty(&useless)) {
1069  upper = list_entry(useless.next, struct backref_node, list);
1070  list_del_init(&upper->list);
1071  BUG_ON(!list_empty(&upper->upper));
1072  if (upper == node)
1073  node = NULL;
1074  if (upper->lowest) {
1075  list_del_init(&upper->lower);
1076  upper->lowest = 0;
1077  }
1078  while (!list_empty(&upper->lower)) {
1079  edge = list_entry(upper->lower.next,
1080  struct backref_edge, list[UPPER]);
1081  list_del(&edge->list[UPPER]);
1082  list_del(&edge->list[LOWER]);
1083  lower = edge->node[LOWER];
1084  free_backref_edge(cache, edge);
1085 
1086  if (list_empty(&lower->upper))
1087  list_add(&lower->list, &useless);
1088  }
1089  __mark_block_processed(rc, upper);
1090  if (upper->level > 0) {
1091  list_add(&upper->list, &cache->detached);
1092  upper->detached = 1;
1093  } else {
1094  rb_erase(&upper->rb_node, &cache->rb_root);
1095  free_backref_node(cache, upper);
1096  }
1097  }
1098 out:
1099  btrfs_free_path(path1);
1100  btrfs_free_path(path2);
1101  if (err) {
1102  while (!list_empty(&useless)) {
1103  lower = list_entry(useless.next,
1104  struct backref_node, upper);
1105  list_del_init(&lower->upper);
1106  }
1107  upper = node;
1108  INIT_LIST_HEAD(&list);
1109  while (upper) {
1110  if (RB_EMPTY_NODE(&upper->rb_node)) {
1111  list_splice_tail(&upper->upper, &list);
1112  free_backref_node(cache, upper);
1113  }
1114 
1115  if (list_empty(&list))
1116  break;
1117 
1118  edge = list_entry(list.next, struct backref_edge,
1119  list[LOWER]);
1120  list_del(&edge->list[LOWER]);
1121  upper = edge->node[UPPER];
1122  free_backref_edge(cache, edge);
1123  }
1124  return ERR_PTR(err);
1125  }
1126  BUG_ON(node && node->detached);
1127  return node;
1128 }
1129 
1130 /*
1131  * helper to add backref node for the newly created snapshot.
1132  * the backref node is created by cloning backref node that
1133  * corresponds to root of source tree
1134  */
1135 static int clone_backref_node(struct btrfs_trans_handle *trans,
1136  struct reloc_control *rc,
1137  struct btrfs_root *src,
1138  struct btrfs_root *dest)
1139 {
1140  struct btrfs_root *reloc_root = src->reloc_root;
1141  struct backref_cache *cache = &rc->backref_cache;
1142  struct backref_node *node = NULL;
1143  struct backref_node *new_node;
1144  struct backref_edge *edge;
1145  struct backref_edge *new_edge;
1146  struct rb_node *rb_node;
1147 
1148  if (cache->last_trans > 0)
1149  update_backref_cache(trans, cache);
1150 
1151  rb_node = tree_search(&cache->rb_root, src->commit_root->start);
1152  if (rb_node) {
1153  node = rb_entry(rb_node, struct backref_node, rb_node);
1154  if (node->detached)
1155  node = NULL;
1156  else
1157  BUG_ON(node->new_bytenr != reloc_root->node->start);
1158  }
1159 
1160  if (!node) {
1161  rb_node = tree_search(&cache->rb_root,
1162  reloc_root->commit_root->start);
1163  if (rb_node) {
1164  node = rb_entry(rb_node, struct backref_node,
1165  rb_node);
1166  BUG_ON(node->detached);
1167  }
1168  }
1169 
1170  if (!node)
1171  return 0;
1172 
1173  new_node = alloc_backref_node(cache);
1174  if (!new_node)
1175  return -ENOMEM;
1176 
1177  new_node->bytenr = dest->node->start;
1178  new_node->level = node->level;
1179  new_node->lowest = node->lowest;
1180  new_node->checked = 1;
1181  new_node->root = dest;
1182 
1183  if (!node->lowest) {
1184  list_for_each_entry(edge, &node->lower, list[UPPER]) {
1185  new_edge = alloc_backref_edge(cache);
1186  if (!new_edge)
1187  goto fail;
1188 
1189  new_edge->node[UPPER] = new_node;
1190  new_edge->node[LOWER] = edge->node[LOWER];
1191  list_add_tail(&new_edge->list[UPPER],
1192  &new_node->lower);
1193  }
1194  } else {
1195  list_add_tail(&new_node->lower, &cache->leaves);
1196  }
1197 
1198  rb_node = tree_insert(&cache->rb_root, new_node->bytenr,
1199  &new_node->rb_node);
1200  if (rb_node)
1201  backref_tree_panic(rb_node, -EEXIST, new_node->bytenr);
1202 
1203  if (!new_node->lowest) {
1204  list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) {
1205  list_add_tail(&new_edge->list[LOWER],
1206  &new_edge->node[LOWER]->upper);
1207  }
1208  }
1209  return 0;
1210 fail:
1211  while (!list_empty(&new_node->lower)) {
1212  new_edge = list_entry(new_node->lower.next,
1213  struct backref_edge, list[UPPER]);
1214  list_del(&new_edge->list[UPPER]);
1215  free_backref_edge(cache, new_edge);
1216  }
1217  free_backref_node(cache, new_node);
1218  return -ENOMEM;
1219 }
1220 
1221 /*
1222  * helper to add 'address of tree root -> reloc tree' mapping
1223  */
1224 static int __must_check __add_reloc_root(struct btrfs_root *root)
1225 {
1226  struct rb_node *rb_node;
1227  struct mapping_node *node;
1228  struct reloc_control *rc = root->fs_info->reloc_ctl;
1229 
1230  node = kmalloc(sizeof(*node), GFP_NOFS);
1231  if (!node)
1232  return -ENOMEM;
1233 
1234  node->bytenr = root->node->start;
1235  node->data = root;
1236 
1237  spin_lock(&rc->reloc_root_tree.lock);
1238  rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
1239  node->bytenr, &node->rb_node);
1240  spin_unlock(&rc->reloc_root_tree.lock);
1241  if (rb_node) {
1242  btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found "
1243  "for start=%llu while inserting into relocation "
1244  "tree\n", node->bytenr);
1245  kfree(node);
1246  return -EEXIST;
1247  }
1248 
1249  list_add_tail(&root->root_list, &rc->reloc_roots);
1250  return 0;
1251 }
1252 
1253 /*
1254  * helper to update/delete the 'address of tree root -> reloc tree'
1255  * mapping
1256  */
1257 static int __update_reloc_root(struct btrfs_root *root, int del)
1258 {
1259  struct rb_node *rb_node;
1260  struct mapping_node *node = NULL;
1261  struct reloc_control *rc = root->fs_info->reloc_ctl;
1262 
1263  spin_lock(&rc->reloc_root_tree.lock);
1264  rb_node = tree_search(&rc->reloc_root_tree.rb_root,
1265  root->commit_root->start);
1266  if (rb_node) {
1267  node = rb_entry(rb_node, struct mapping_node, rb_node);
1268  rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
1269  }
1270  spin_unlock(&rc->reloc_root_tree.lock);
1271 
1272  BUG_ON((struct btrfs_root *)node->data != root);
1273 
1274  if (!del) {
1275  spin_lock(&rc->reloc_root_tree.lock);
1276  node->bytenr = root->node->start;
1277  rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
1278  node->bytenr, &node->rb_node);
1279  spin_unlock(&rc->reloc_root_tree.lock);
1280  if (rb_node)
1281  backref_tree_panic(rb_node, -EEXIST, node->bytenr);
1282  } else {
1283  spin_lock(&root->fs_info->trans_lock);
1284  list_del_init(&root->root_list);
1285  spin_unlock(&root->fs_info->trans_lock);
1286  kfree(node);
1287  }
1288  return 0;
1289 }
1290 
1291 static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
1292  struct btrfs_root *root, u64 objectid)
1293 {
1294  struct btrfs_root *reloc_root;
1295  struct extent_buffer *eb;
1296  struct btrfs_root_item *root_item;
1297  struct btrfs_key root_key;
1298  int ret;
1299 
1300  root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
1301  BUG_ON(!root_item);
1302 
1305  root_key.offset = objectid;
1306 
1307  if (root->root_key.objectid == objectid) {
1308  /* called by btrfs_init_reloc_root */
1309  ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
1311  BUG_ON(ret);
1312 
1313  btrfs_set_root_last_snapshot(&root->root_item,
1314  trans->transid - 1);
1315  } else {
1316  /*
1317  * called by btrfs_reloc_post_snapshot_hook.
1318  * the source tree is a reloc tree, all tree blocks
1319  * modified after it was created have RELOC flag
1320  * set in their headers. so it's OK to not update
1321  * the 'last_snapshot'.
1322  */
1323  ret = btrfs_copy_root(trans, root, root->node, &eb,
1325  BUG_ON(ret);
1326  }
1327 
1328  memcpy(root_item, &root->root_item, sizeof(*root_item));
1329  btrfs_set_root_bytenr(root_item, eb->start);
1330  btrfs_set_root_level(root_item, btrfs_header_level(eb));
1331  btrfs_set_root_generation(root_item, trans->transid);
1332 
1333  if (root->root_key.objectid == objectid) {
1334  btrfs_set_root_refs(root_item, 0);
1335  memset(&root_item->drop_progress, 0,
1336  sizeof(struct btrfs_disk_key));
1337  root_item->drop_level = 0;
1338  }
1339 
1340  btrfs_tree_unlock(eb);
1341  free_extent_buffer(eb);
1342 
1343  ret = btrfs_insert_root(trans, root->fs_info->tree_root,
1344  &root_key, root_item);
1345  BUG_ON(ret);
1346  kfree(root_item);
1347 
1348  reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
1349  &root_key);
1350  BUG_ON(IS_ERR(reloc_root));
1351  reloc_root->last_trans = trans->transid;
1352  return reloc_root;
1353 }
1354 
1355 /*
1356  * create reloc tree for a given fs tree. reloc tree is just a
1357  * snapshot of the fs tree with special root objectid.
1358  */
1360  struct btrfs_root *root)
1361 {
1362  struct btrfs_root *reloc_root;
1363  struct reloc_control *rc = root->fs_info->reloc_ctl;
1364  int clear_rsv = 0;
1365  int ret;
1366 
1367  if (root->reloc_root) {
1368  reloc_root = root->reloc_root;
1369  reloc_root->last_trans = trans->transid;
1370  return 0;
1371  }
1372 
1373  if (!rc || !rc->create_reloc_tree ||
1374  root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1375  return 0;
1376 
1377  if (!trans->block_rsv) {
1378  trans->block_rsv = rc->block_rsv;
1379  clear_rsv = 1;
1380  }
1381  reloc_root = create_reloc_root(trans, root, root->root_key.objectid);
1382  if (clear_rsv)
1383  trans->block_rsv = NULL;
1384 
1385  ret = __add_reloc_root(reloc_root);
1386  BUG_ON(ret < 0);
1387  root->reloc_root = reloc_root;
1388  return 0;
1389 }
1390 
1391 /*
1392  * update root item of reloc tree
1393  */
1395  struct btrfs_root *root)
1396 {
1397  struct btrfs_root *reloc_root;
1398  struct btrfs_root_item *root_item;
1399  int del = 0;
1400  int ret;
1401 
1402  if (!root->reloc_root)
1403  goto out;
1404 
1405  reloc_root = root->reloc_root;
1406  root_item = &reloc_root->root_item;
1407 
1408  if (root->fs_info->reloc_ctl->merge_reloc_tree &&
1409  btrfs_root_refs(root_item) == 0) {
1410  root->reloc_root = NULL;
1411  del = 1;
1412  }
1413 
1414  __update_reloc_root(reloc_root, del);
1415 
1416  if (reloc_root->commit_root != reloc_root->node) {
1417  btrfs_set_root_node(root_item, reloc_root->node);
1418  free_extent_buffer(reloc_root->commit_root);
1419  reloc_root->commit_root = btrfs_root_node(reloc_root);
1420  }
1421 
1422  ret = btrfs_update_root(trans, root->fs_info->tree_root,
1423  &reloc_root->root_key, root_item);
1424  BUG_ON(ret);
1425 
1426 out:
1427  return 0;
1428 }
1429 
1430 /*
1431  * helper to find first cached inode with inode number >= objectid
1432  * in a subvolume
1433  */
1434 static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid)
1435 {
1436  struct rb_node *node;
1437  struct rb_node *prev;
1438  struct btrfs_inode *entry;
1439  struct inode *inode;
1440 
1441  spin_lock(&root->inode_lock);
1442 again:
1443  node = root->inode_tree.rb_node;
1444  prev = NULL;
1445  while (node) {
1446  prev = node;
1447  entry = rb_entry(node, struct btrfs_inode, rb_node);
1448 
1449  if (objectid < btrfs_ino(&entry->vfs_inode))
1450  node = node->rb_left;
1451  else if (objectid > btrfs_ino(&entry->vfs_inode))
1452  node = node->rb_right;
1453  else
1454  break;
1455  }
1456  if (!node) {
1457  while (prev) {
1458  entry = rb_entry(prev, struct btrfs_inode, rb_node);
1459  if (objectid <= btrfs_ino(&entry->vfs_inode)) {
1460  node = prev;
1461  break;
1462  }
1463  prev = rb_next(prev);
1464  }
1465  }
1466  while (node) {
1467  entry = rb_entry(node, struct btrfs_inode, rb_node);
1468  inode = igrab(&entry->vfs_inode);
1469  if (inode) {
1470  spin_unlock(&root->inode_lock);
1471  return inode;
1472  }
1473 
1474  objectid = btrfs_ino(&entry->vfs_inode) + 1;
1475  if (cond_resched_lock(&root->inode_lock))
1476  goto again;
1477 
1478  node = rb_next(node);
1479  }
1480  spin_unlock(&root->inode_lock);
1481  return NULL;
1482 }
1483 
1484 static int in_block_group(u64 bytenr,
1486 {
1487  if (bytenr >= block_group->key.objectid &&
1488  bytenr < block_group->key.objectid + block_group->key.offset)
1489  return 1;
1490  return 0;
1491 }
1492 
1493 /*
1494  * get new location of data
1495  */
1496 static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
1497  u64 bytenr, u64 num_bytes)
1498 {
1499  struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
1500  struct btrfs_path *path;
1501  struct btrfs_file_extent_item *fi;
1502  struct extent_buffer *leaf;
1503  int ret;
1504 
1505  path = btrfs_alloc_path();
1506  if (!path)
1507  return -ENOMEM;
1508 
1509  bytenr -= BTRFS_I(reloc_inode)->index_cnt;
1510  ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(reloc_inode),
1511  bytenr, 0);
1512  if (ret < 0)
1513  goto out;
1514  if (ret > 0) {
1515  ret = -ENOENT;
1516  goto out;
1517  }
1518 
1519  leaf = path->nodes[0];
1520  fi = btrfs_item_ptr(leaf, path->slots[0],
1521  struct btrfs_file_extent_item);
1522 
1523  BUG_ON(btrfs_file_extent_offset(leaf, fi) ||
1524  btrfs_file_extent_compression(leaf, fi) ||
1525  btrfs_file_extent_encryption(leaf, fi) ||
1526  btrfs_file_extent_other_encoding(leaf, fi));
1527 
1528  if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
1529  ret = 1;
1530  goto out;
1531  }
1532 
1533  *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1534  ret = 0;
1535 out:
1536  btrfs_free_path(path);
1537  return ret;
1538 }
1539 
1540 /*
1541  * update file extent items in the tree leaf to point to
1542  * the new locations.
1543  */
1544 static noinline_for_stack
1545 int replace_file_extents(struct btrfs_trans_handle *trans,
1546  struct reloc_control *rc,
1547  struct btrfs_root *root,
1548  struct extent_buffer *leaf)
1549 {
1550  struct btrfs_key key;
1551  struct btrfs_file_extent_item *fi;
1552  struct inode *inode = NULL;
1553  u64 parent;
1554  u64 bytenr;
1555  u64 new_bytenr = 0;
1556  u64 num_bytes;
1557  u64 end;
1558  u32 nritems;
1559  u32 i;
1560  int ret;
1561  int first = 1;
1562  int dirty = 0;
1563 
1564  if (rc->stage != UPDATE_DATA_PTRS)
1565  return 0;
1566 
1567  /* reloc trees always use full backref */
1568  if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1569  parent = leaf->start;
1570  else
1571  parent = 0;
1572 
1573  nritems = btrfs_header_nritems(leaf);
1574  for (i = 0; i < nritems; i++) {
1575  cond_resched();
1576  btrfs_item_key_to_cpu(leaf, &key, i);
1577  if (key.type != BTRFS_EXTENT_DATA_KEY)
1578  continue;
1579  fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1580  if (btrfs_file_extent_type(leaf, fi) ==
1582  continue;
1583  bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1584  num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1585  if (bytenr == 0)
1586  continue;
1587  if (!in_block_group(bytenr, rc->block_group))
1588  continue;
1589 
1590  /*
1591  * if we are modifying block in fs tree, wait for readpage
1592  * to complete and drop the extent cache
1593  */
1594  if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1595  if (first) {
1596  inode = find_next_inode(root, key.objectid);
1597  first = 0;
1598  } else if (inode && btrfs_ino(inode) < key.objectid) {
1599  btrfs_add_delayed_iput(inode);
1600  inode = find_next_inode(root, key.objectid);
1601  }
1602  if (inode && btrfs_ino(inode) == key.objectid) {
1603  end = key.offset +
1604  btrfs_file_extent_num_bytes(leaf, fi);
1605  WARN_ON(!IS_ALIGNED(key.offset,
1606  root->sectorsize));
1607  WARN_ON(!IS_ALIGNED(end, root->sectorsize));
1608  end--;
1609  ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
1610  key.offset, end);
1611  if (!ret)
1612  continue;
1613 
1614  btrfs_drop_extent_cache(inode, key.offset, end,
1615  1);
1616  unlock_extent(&BTRFS_I(inode)->io_tree,
1617  key.offset, end);
1618  }
1619  }
1620 
1621  ret = get_new_location(rc->data_inode, &new_bytenr,
1622  bytenr, num_bytes);
1623  if (ret > 0) {
1624  WARN_ON(1);
1625  continue;
1626  }
1627  BUG_ON(ret < 0);
1628 
1629  btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
1630  dirty = 1;
1631 
1632  key.offset -= btrfs_file_extent_offset(leaf, fi);
1633  ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
1634  num_bytes, parent,
1635  btrfs_header_owner(leaf),
1636  key.objectid, key.offset, 1);
1637  BUG_ON(ret);
1638 
1639  ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1640  parent, btrfs_header_owner(leaf),
1641  key.objectid, key.offset, 1);
1642  BUG_ON(ret);
1643  }
1644  if (dirty)
1646  if (inode)
1647  btrfs_add_delayed_iput(inode);
1648  return 0;
1649 }
1650 
1651 static noinline_for_stack
1652 int memcmp_node_keys(struct extent_buffer *eb, int slot,
1653  struct btrfs_path *path, int level)
1654 {
1655  struct btrfs_disk_key key1;
1656  struct btrfs_disk_key key2;
1657  btrfs_node_key(eb, &key1, slot);
1658  btrfs_node_key(path->nodes[level], &key2, path->slots[level]);
1659  return memcmp(&key1, &key2, sizeof(key1));
1660 }
1661 
1662 /*
1663  * try to replace tree blocks in fs tree with the new blocks
1664  * in reloc tree. tree blocks haven't been modified since the
1665  * reloc tree was create can be replaced.
1666  *
1667  * if a block was replaced, level of the block + 1 is returned.
1668  * if no block got replaced, 0 is returned. if there are other
1669  * errors, a negative error number is returned.
1670  */
1671 static noinline_for_stack
1672 int replace_path(struct btrfs_trans_handle *trans,
1673  struct btrfs_root *dest, struct btrfs_root *src,
1674  struct btrfs_path *path, struct btrfs_key *next_key,
1675  int lowest_level, int max_level)
1676 {
1677  struct extent_buffer *eb;
1678  struct extent_buffer *parent;
1679  struct btrfs_key key;
1680  u64 old_bytenr;
1681  u64 new_bytenr;
1682  u64 old_ptr_gen;
1683  u64 new_ptr_gen;
1685  u32 blocksize;
1686  int cow = 0;
1687  int level;
1688  int ret;
1689  int slot;
1690 
1691  BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
1692  BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
1693 
1694  last_snapshot = btrfs_root_last_snapshot(&src->root_item);
1695 again:
1696  slot = path->slots[lowest_level];
1697  btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
1698 
1699  eb = btrfs_lock_root_node(dest);
1700  btrfs_set_lock_blocking(eb);
1701  level = btrfs_header_level(eb);
1702 
1703  if (level < lowest_level) {
1704  btrfs_tree_unlock(eb);
1705  free_extent_buffer(eb);
1706  return 0;
1707  }
1708 
1709  if (cow) {
1710  ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb);
1711  BUG_ON(ret);
1712  }
1713  btrfs_set_lock_blocking(eb);
1714 
1715  if (next_key) {
1716  next_key->objectid = (u64)-1;
1717  next_key->type = (u8)-1;
1718  next_key->offset = (u64)-1;
1719  }
1720 
1721  parent = eb;
1722  while (1) {
1723  level = btrfs_header_level(parent);
1724  BUG_ON(level < lowest_level);
1725 
1726  ret = btrfs_bin_search(parent, &key, level, &slot);
1727  if (ret && slot > 0)
1728  slot--;
1729 
1730  if (next_key && slot + 1 < btrfs_header_nritems(parent))
1731  btrfs_node_key_to_cpu(parent, next_key, slot + 1);
1732 
1733  old_bytenr = btrfs_node_blockptr(parent, slot);
1734  blocksize = btrfs_level_size(dest, level - 1);
1735  old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
1736 
1737  if (level <= max_level) {
1738  eb = path->nodes[level];
1739  new_bytenr = btrfs_node_blockptr(eb,
1740  path->slots[level]);
1741  new_ptr_gen = btrfs_node_ptr_generation(eb,
1742  path->slots[level]);
1743  } else {
1744  new_bytenr = 0;
1745  new_ptr_gen = 0;
1746  }
1747 
1748  if (new_bytenr > 0 && new_bytenr == old_bytenr) {
1749  WARN_ON(1);
1750  ret = level;
1751  break;
1752  }
1753 
1754  if (new_bytenr == 0 || old_ptr_gen > last_snapshot ||
1755  memcmp_node_keys(parent, slot, path, level)) {
1756  if (level <= lowest_level) {
1757  ret = 0;
1758  break;
1759  }
1760 
1761  eb = read_tree_block(dest, old_bytenr, blocksize,
1762  old_ptr_gen);
1763  BUG_ON(!eb);
1764  btrfs_tree_lock(eb);
1765  if (cow) {
1766  ret = btrfs_cow_block(trans, dest, eb, parent,
1767  slot, &eb);
1768  BUG_ON(ret);
1769  }
1770  btrfs_set_lock_blocking(eb);
1771 
1772  btrfs_tree_unlock(parent);
1773  free_extent_buffer(parent);
1774 
1775  parent = eb;
1776  continue;
1777  }
1778 
1779  if (!cow) {
1780  btrfs_tree_unlock(parent);
1781  free_extent_buffer(parent);
1782  cow = 1;
1783  goto again;
1784  }
1785 
1786  btrfs_node_key_to_cpu(path->nodes[level], &key,
1787  path->slots[level]);
1788  btrfs_release_path(path);
1789 
1790  path->lowest_level = level;
1791  ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
1792  path->lowest_level = 0;
1793  BUG_ON(ret);
1794 
1795  /*
1796  * swap blocks in fs tree and reloc tree.
1797  */
1798  btrfs_set_node_blockptr(parent, slot, new_bytenr);
1799  btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen);
1800  btrfs_mark_buffer_dirty(parent);
1801 
1802  btrfs_set_node_blockptr(path->nodes[level],
1803  path->slots[level], old_bytenr);
1804  btrfs_set_node_ptr_generation(path->nodes[level],
1805  path->slots[level], old_ptr_gen);
1806  btrfs_mark_buffer_dirty(path->nodes[level]);
1807 
1808  ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize,
1809  path->nodes[level]->start,
1810  src->root_key.objectid, level - 1, 0,
1811  1);
1812  BUG_ON(ret);
1813  ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize,
1814  0, dest->root_key.objectid, level - 1,
1815  0, 1);
1816  BUG_ON(ret);
1817 
1818  ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
1819  path->nodes[level]->start,
1820  src->root_key.objectid, level - 1, 0,
1821  1);
1822  BUG_ON(ret);
1823 
1824  ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
1825  0, dest->root_key.objectid, level - 1,
1826  0, 1);
1827  BUG_ON(ret);
1828 
1829  btrfs_unlock_up_safe(path, 0);
1830 
1831  ret = level;
1832  break;
1833  }
1834  btrfs_tree_unlock(parent);
1835  free_extent_buffer(parent);
1836  return ret;
1837 }
1838 
1839 /*
1840  * helper to find next relocated block in reloc tree
1841  */
1842 static noinline_for_stack
1843 int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1844  int *level)
1845 {
1846  struct extent_buffer *eb;
1847  int i;
1849  u32 nritems;
1850 
1851  last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1852 
1853  for (i = 0; i < *level; i++) {
1854  free_extent_buffer(path->nodes[i]);
1855  path->nodes[i] = NULL;
1856  }
1857 
1858  for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
1859  eb = path->nodes[i];
1860  nritems = btrfs_header_nritems(eb);
1861  while (path->slots[i] + 1 < nritems) {
1862  path->slots[i]++;
1863  if (btrfs_node_ptr_generation(eb, path->slots[i]) <=
1864  last_snapshot)
1865  continue;
1866 
1867  *level = i;
1868  return 0;
1869  }
1870  free_extent_buffer(path->nodes[i]);
1871  path->nodes[i] = NULL;
1872  }
1873  return 1;
1874 }
1875 
1876 /*
1877  * walk down reloc tree to find relocated block of lowest level
1878  */
1879 static noinline_for_stack
1880 int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1881  int *level)
1882 {
1883  struct extent_buffer *eb = NULL;
1884  int i;
1885  u64 bytenr;
1886  u64 ptr_gen = 0;
1888  u32 blocksize;
1889  u32 nritems;
1890 
1891  last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1892 
1893  for (i = *level; i > 0; i--) {
1894  eb = path->nodes[i];
1895  nritems = btrfs_header_nritems(eb);
1896  while (path->slots[i] < nritems) {
1897  ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]);
1898  if (ptr_gen > last_snapshot)
1899  break;
1900  path->slots[i]++;
1901  }
1902  if (path->slots[i] >= nritems) {
1903  if (i == *level)
1904  break;
1905  *level = i + 1;
1906  return 0;
1907  }
1908  if (i == 1) {
1909  *level = i;
1910  return 0;
1911  }
1912 
1913  bytenr = btrfs_node_blockptr(eb, path->slots[i]);
1914  blocksize = btrfs_level_size(root, i - 1);
1915  eb = read_tree_block(root, bytenr, blocksize, ptr_gen);
1916  BUG_ON(btrfs_header_level(eb) != i - 1);
1917  path->nodes[i - 1] = eb;
1918  path->slots[i - 1] = 0;
1919  }
1920  return 1;
1921 }
1922 
1923 /*
1924  * invalidate extent cache for file extents whose key in range of
1925  * [min_key, max_key)
1926  */
1927 static int invalidate_extent_cache(struct btrfs_root *root,
1928  struct btrfs_key *min_key,
1929  struct btrfs_key *max_key)
1930 {
1931  struct inode *inode = NULL;
1932  u64 objectid;
1933  u64 start, end;
1934  u64 ino;
1935 
1936  objectid = min_key->objectid;
1937  while (1) {
1938  cond_resched();
1939  iput(inode);
1940 
1941  if (objectid > max_key->objectid)
1942  break;
1943 
1944  inode = find_next_inode(root, objectid);
1945  if (!inode)
1946  break;
1947  ino = btrfs_ino(inode);
1948 
1949  if (ino > max_key->objectid) {
1950  iput(inode);
1951  break;
1952  }
1953 
1954  objectid = ino + 1;
1955  if (!S_ISREG(inode->i_mode))
1956  continue;
1957 
1958  if (unlikely(min_key->objectid == ino)) {
1959  if (min_key->type > BTRFS_EXTENT_DATA_KEY)
1960  continue;
1961  if (min_key->type < BTRFS_EXTENT_DATA_KEY)
1962  start = 0;
1963  else {
1964  start = min_key->offset;
1965  WARN_ON(!IS_ALIGNED(start, root->sectorsize));
1966  }
1967  } else {
1968  start = 0;
1969  }
1970 
1971  if (unlikely(max_key->objectid == ino)) {
1972  if (max_key->type < BTRFS_EXTENT_DATA_KEY)
1973  continue;
1974  if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
1975  end = (u64)-1;
1976  } else {
1977  if (max_key->offset == 0)
1978  continue;
1979  end = max_key->offset;
1980  WARN_ON(!IS_ALIGNED(end, root->sectorsize));
1981  end--;
1982  }
1983  } else {
1984  end = (u64)-1;
1985  }
1986 
1987  /* the lock_extent waits for readpage to complete */
1988  lock_extent(&BTRFS_I(inode)->io_tree, start, end);
1989  btrfs_drop_extent_cache(inode, start, end, 1);
1990  unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
1991  }
1992  return 0;
1993 }
1994 
1995 static int find_next_key(struct btrfs_path *path, int level,
1996  struct btrfs_key *key)
1997 
1998 {
1999  while (level < BTRFS_MAX_LEVEL) {
2000  if (!path->nodes[level])
2001  break;
2002  if (path->slots[level] + 1 <
2003  btrfs_header_nritems(path->nodes[level])) {
2004  btrfs_node_key_to_cpu(path->nodes[level], key,
2005  path->slots[level] + 1);
2006  return 0;
2007  }
2008  level++;
2009  }
2010  return 1;
2011 }
2012 
2013 /*
2014  * merge the relocated tree blocks in reloc tree with corresponding
2015  * fs tree.
2016  */
2017 static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
2018  struct btrfs_root *root)
2019 {
2020  LIST_HEAD(inode_list);
2021  struct btrfs_key key;
2022  struct btrfs_key next_key;
2023  struct btrfs_trans_handle *trans;
2024  struct btrfs_root *reloc_root;
2025  struct btrfs_root_item *root_item;
2026  struct btrfs_path *path;
2027  struct extent_buffer *leaf;
2028  unsigned long nr;
2029  int level;
2030  int max_level;
2031  int replaced = 0;
2032  int ret;
2033  int err = 0;
2034  u32 min_reserved;
2035 
2036  path = btrfs_alloc_path();
2037  if (!path)
2038  return -ENOMEM;
2039  path->reada = 1;
2040 
2041  reloc_root = root->reloc_root;
2042  root_item = &reloc_root->root_item;
2043 
2044  if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2045  level = btrfs_root_level(root_item);
2046  extent_buffer_get(reloc_root->node);
2047  path->nodes[level] = reloc_root->node;
2048  path->slots[level] = 0;
2049  } else {
2050  btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2051 
2052  level = root_item->drop_level;
2053  BUG_ON(level == 0);
2054  path->lowest_level = level;
2055  ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
2056  path->lowest_level = 0;
2057  if (ret < 0) {
2058  btrfs_free_path(path);
2059  return ret;
2060  }
2061 
2062  btrfs_node_key_to_cpu(path->nodes[level], &next_key,
2063  path->slots[level]);
2064  WARN_ON(memcmp(&key, &next_key, sizeof(key)));
2065 
2066  btrfs_unlock_up_safe(path, 0);
2067  }
2068 
2069  min_reserved = root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
2070  memset(&next_key, 0, sizeof(next_key));
2071 
2072  while (1) {
2073  trans = btrfs_start_transaction(root, 0);
2074  BUG_ON(IS_ERR(trans));
2075  trans->block_rsv = rc->block_rsv;
2076 
2077  ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved);
2078  if (ret) {
2079  BUG_ON(ret != -EAGAIN);
2080  ret = btrfs_commit_transaction(trans, root);
2081  BUG_ON(ret);
2082  continue;
2083  }
2084 
2085  replaced = 0;
2086  max_level = level;
2087 
2088  ret = walk_down_reloc_tree(reloc_root, path, &level);
2089  if (ret < 0) {
2090  err = ret;
2091  goto out;
2092  }
2093  if (ret > 0)
2094  break;
2095 
2096  if (!find_next_key(path, level, &key) &&
2097  btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
2098  ret = 0;
2099  } else {
2100  ret = replace_path(trans, root, reloc_root, path,
2101  &next_key, level, max_level);
2102  }
2103  if (ret < 0) {
2104  err = ret;
2105  goto out;
2106  }
2107 
2108  if (ret > 0) {
2109  level = ret;
2110  btrfs_node_key_to_cpu(path->nodes[level], &key,
2111  path->slots[level]);
2112  replaced = 1;
2113  }
2114 
2115  ret = walk_up_reloc_tree(reloc_root, path, &level);
2116  if (ret > 0)
2117  break;
2118 
2119  BUG_ON(level == 0);
2120  /*
2121  * save the merging progress in the drop_progress.
2122  * this is OK since root refs == 1 in this case.
2123  */
2124  btrfs_node_key(path->nodes[level], &root_item->drop_progress,
2125  path->slots[level]);
2126  root_item->drop_level = level;
2127 
2128  nr = trans->blocks_used;
2129  btrfs_end_transaction_throttle(trans, root);
2130 
2131  btrfs_btree_balance_dirty(root, nr);
2132 
2133  if (replaced && rc->stage == UPDATE_DATA_PTRS)
2134  invalidate_extent_cache(root, &key, &next_key);
2135  }
2136 
2137  /*
2138  * handle the case only one block in the fs tree need to be
2139  * relocated and the block is tree root.
2140  */
2141  leaf = btrfs_lock_root_node(root);
2142  ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf);
2143  btrfs_tree_unlock(leaf);
2144  free_extent_buffer(leaf);
2145  if (ret < 0)
2146  err = ret;
2147 out:
2148  btrfs_free_path(path);
2149 
2150  if (err == 0) {
2151  memset(&root_item->drop_progress, 0,
2152  sizeof(root_item->drop_progress));
2153  root_item->drop_level = 0;
2154  btrfs_set_root_refs(root_item, 0);
2155  btrfs_update_reloc_root(trans, root);
2156  }
2157 
2158  nr = trans->blocks_used;
2159  btrfs_end_transaction_throttle(trans, root);
2160 
2161  btrfs_btree_balance_dirty(root, nr);
2162 
2163  if (replaced && rc->stage == UPDATE_DATA_PTRS)
2164  invalidate_extent_cache(root, &key, &next_key);
2165 
2166  return err;
2167 }
2168 
2169 static noinline_for_stack
2170 int prepare_to_merge(struct reloc_control *rc, int err)
2171 {
2172  struct btrfs_root *root = rc->extent_root;
2173  struct btrfs_root *reloc_root;
2174  struct btrfs_trans_handle *trans;
2175  LIST_HEAD(reloc_roots);
2176  u64 num_bytes = 0;
2177  int ret;
2178 
2179  mutex_lock(&root->fs_info->reloc_mutex);
2180  rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
2181  rc->merging_rsv_size += rc->nodes_relocated * 2;
2182  mutex_unlock(&root->fs_info->reloc_mutex);
2183 
2184 again:
2185  if (!err) {
2186  num_bytes = rc->merging_rsv_size;
2187  ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes);
2188  if (ret)
2189  err = ret;
2190  }
2191 
2192  trans = btrfs_join_transaction(rc->extent_root);
2193  if (IS_ERR(trans)) {
2194  if (!err)
2196  rc->block_rsv, num_bytes);
2197  return PTR_ERR(trans);
2198  }
2199 
2200  if (!err) {
2201  if (num_bytes != rc->merging_rsv_size) {
2202  btrfs_end_transaction(trans, rc->extent_root);
2204  rc->block_rsv, num_bytes);
2205  goto again;
2206  }
2207  }
2208 
2209  rc->merge_reloc_tree = 1;
2210 
2211  while (!list_empty(&rc->reloc_roots)) {
2212  reloc_root = list_entry(rc->reloc_roots.next,
2213  struct btrfs_root, root_list);
2214  list_del_init(&reloc_root->root_list);
2215 
2216  root = read_fs_root(reloc_root->fs_info,
2217  reloc_root->root_key.offset);
2218  BUG_ON(IS_ERR(root));
2219  BUG_ON(root->reloc_root != reloc_root);
2220 
2221  /*
2222  * set reference count to 1, so btrfs_recover_relocation
2223  * knows it should resumes merging
2224  */
2225  if (!err)
2226  btrfs_set_root_refs(&reloc_root->root_item, 1);
2227  btrfs_update_reloc_root(trans, root);
2228 
2229  list_add(&reloc_root->root_list, &reloc_roots);
2230  }
2231 
2232  list_splice(&reloc_roots, &rc->reloc_roots);
2233 
2234  if (!err)
2236  else
2237  btrfs_end_transaction(trans, rc->extent_root);
2238  return err;
2239 }
2240 
2241 static noinline_for_stack
2242 int merge_reloc_roots(struct reloc_control *rc)
2243 {
2244  struct btrfs_root *root;
2245  struct btrfs_root *reloc_root;
2246  LIST_HEAD(reloc_roots);
2247  int found = 0;
2248  int ret;
2249 again:
2250  root = rc->extent_root;
2251 
2252  /*
2253  * this serializes us with btrfs_record_root_in_transaction,
2254  * we have to make sure nobody is in the middle of
2255  * adding their roots to the list while we are
2256  * doing this splice
2257  */
2258  mutex_lock(&root->fs_info->reloc_mutex);
2259  list_splice_init(&rc->reloc_roots, &reloc_roots);
2260  mutex_unlock(&root->fs_info->reloc_mutex);
2261 
2262  while (!list_empty(&reloc_roots)) {
2263  found = 1;
2264  reloc_root = list_entry(reloc_roots.next,
2265  struct btrfs_root, root_list);
2266 
2267  if (btrfs_root_refs(&reloc_root->root_item) > 0) {
2268  root = read_fs_root(reloc_root->fs_info,
2269  reloc_root->root_key.offset);
2270  BUG_ON(IS_ERR(root));
2271  BUG_ON(root->reloc_root != reloc_root);
2272 
2273  ret = merge_reloc_root(rc, root);
2274  BUG_ON(ret);
2275  } else {
2276  list_del_init(&reloc_root->root_list);
2277  }
2278  ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1);
2279  BUG_ON(ret < 0);
2280  }
2281 
2282  if (found) {
2283  found = 0;
2284  goto again;
2285  }
2286  BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
2287  return 0;
2288 }
2289 
2290 static void free_block_list(struct rb_root *blocks)
2291 {
2292  struct tree_block *block;
2293  struct rb_node *rb_node;
2294  while ((rb_node = rb_first(blocks))) {
2295  block = rb_entry(rb_node, struct tree_block, rb_node);
2296  rb_erase(rb_node, blocks);
2297  kfree(block);
2298  }
2299 }
2300 
2301 static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
2302  struct btrfs_root *reloc_root)
2303 {
2304  struct btrfs_root *root;
2305 
2306  if (reloc_root->last_trans == trans->transid)
2307  return 0;
2308 
2309  root = read_fs_root(reloc_root->fs_info, reloc_root->root_key.offset);
2310  BUG_ON(IS_ERR(root));
2311  BUG_ON(root->reloc_root != reloc_root);
2312 
2313  return btrfs_record_root_in_trans(trans, root);
2314 }
2315 
2316 static noinline_for_stack
2317 struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
2318  struct reloc_control *rc,
2319  struct backref_node *node,
2320  struct backref_edge *edges[], int *nr)
2321 {
2322  struct backref_node *next;
2323  struct btrfs_root *root;
2324  int index = 0;
2325 
2326  next = node;
2327  while (1) {
2328  cond_resched();
2329  next = walk_up_backref(next, edges, &index);
2330  root = next->root;
2331  BUG_ON(!root);
2332  BUG_ON(!root->ref_cows);
2333 
2334  if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2335  record_reloc_root_in_trans(trans, root);
2336  break;
2337  }
2338 
2339  btrfs_record_root_in_trans(trans, root);
2340  root = root->reloc_root;
2341 
2342  if (next->new_bytenr != root->node->start) {
2343  BUG_ON(next->new_bytenr);
2344  BUG_ON(!list_empty(&next->list));
2345  next->new_bytenr = root->node->start;
2346  next->root = root;
2347  list_add_tail(&next->list,
2348  &rc->backref_cache.changed);
2349  __mark_block_processed(rc, next);
2350  break;
2351  }
2352 
2353  WARN_ON(1);
2354  root = NULL;
2355  next = walk_down_backref(edges, &index);
2356  if (!next || next->level <= node->level)
2357  break;
2358  }
2359  if (!root)
2360  return NULL;
2361 
2362  *nr = index;
2363  next = node;
2364  /* setup backref node path for btrfs_reloc_cow_block */
2365  while (1) {
2366  rc->backref_cache.path[next->level] = next;
2367  if (--index < 0)
2368  break;
2369  next = edges[index]->node[UPPER];
2370  }
2371  return root;
2372 }
2373 
2374 /*
2375  * select a tree root for relocation. return NULL if the block
2376  * is reference counted. we should use do_relocation() in this
2377  * case. return a tree root pointer if the block isn't reference
2378  * counted. return -ENOENT if the block is root of reloc tree.
2379  */
2380 static noinline_for_stack
2381 struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans,
2382  struct backref_node *node)
2383 {
2384  struct backref_node *next;
2385  struct btrfs_root *root;
2386  struct btrfs_root *fs_root = NULL;
2387  struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2388  int index = 0;
2389 
2390  next = node;
2391  while (1) {
2392  cond_resched();
2393  next = walk_up_backref(next, edges, &index);
2394  root = next->root;
2395  BUG_ON(!root);
2396 
2397  /* no other choice for non-references counted tree */
2398  if (!root->ref_cows)
2399  return root;
2400 
2401  if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)
2402  fs_root = root;
2403 
2404  if (next != node)
2405  return NULL;
2406 
2407  next = walk_down_backref(edges, &index);
2408  if (!next || next->level <= node->level)
2409  break;
2410  }
2411 
2412  if (!fs_root)
2413  return ERR_PTR(-ENOENT);
2414  return fs_root;
2415 }
2416 
2417 static noinline_for_stack
2418 u64 calcu_metadata_size(struct reloc_control *rc,
2419  struct backref_node *node, int reserve)
2420 {
2421  struct backref_node *next = node;
2422  struct backref_edge *edge;
2423  struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2424  u64 num_bytes = 0;
2425  int index = 0;
2426 
2427  BUG_ON(reserve && node->processed);
2428 
2429  while (next) {
2430  cond_resched();
2431  while (1) {
2432  if (next->processed && (reserve || next != node))
2433  break;
2434 
2435  num_bytes += btrfs_level_size(rc->extent_root,
2436  next->level);
2437 
2438  if (list_empty(&next->upper))
2439  break;
2440 
2441  edge = list_entry(next->upper.next,
2442  struct backref_edge, list[LOWER]);
2443  edges[index++] = edge;
2444  next = edge->node[UPPER];
2445  }
2446  next = walk_down_backref(edges, &index);
2447  }
2448  return num_bytes;
2449 }
2450 
2451 static int reserve_metadata_space(struct btrfs_trans_handle *trans,
2452  struct reloc_control *rc,
2453  struct backref_node *node)
2454 {
2455  struct btrfs_root *root = rc->extent_root;
2456  u64 num_bytes;
2457  int ret;
2458 
2459  num_bytes = calcu_metadata_size(rc, node, 1) * 2;
2460 
2461  trans->block_rsv = rc->block_rsv;
2462  ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes);
2463  if (ret) {
2464  if (ret == -EAGAIN)
2465  rc->commit_transaction = 1;
2466  return ret;
2467  }
2468 
2469  return 0;
2470 }
2471 
2472 static void release_metadata_space(struct reloc_control *rc,
2473  struct backref_node *node)
2474 {
2475  u64 num_bytes = calcu_metadata_size(rc, node, 0) * 2;
2476  btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, num_bytes);
2477 }
2478 
2479 /*
2480  * relocate a block tree, and then update pointers in upper level
2481  * blocks that reference the block to point to the new location.
2482  *
2483  * if called by link_to_upper, the block has already been relocated.
2484  * in that case this function just updates pointers.
2485  */
2486 static int do_relocation(struct btrfs_trans_handle *trans,
2487  struct reloc_control *rc,
2488  struct backref_node *node,
2489  struct btrfs_key *key,
2490  struct btrfs_path *path, int lowest)
2491 {
2492  struct backref_node *upper;
2493  struct backref_edge *edge;
2494  struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2495  struct btrfs_root *root;
2496  struct extent_buffer *eb;
2497  u32 blocksize;
2498  u64 bytenr;
2499  u64 generation;
2500  int nr;
2501  int slot;
2502  int ret;
2503  int err = 0;
2504 
2505  BUG_ON(lowest && node->eb);
2506 
2507  path->lowest_level = node->level + 1;
2508  rc->backref_cache.path[node->level] = node;
2509  list_for_each_entry(edge, &node->upper, list[LOWER]) {
2510  cond_resched();
2511 
2512  upper = edge->node[UPPER];
2513  root = select_reloc_root(trans, rc, upper, edges, &nr);
2514  BUG_ON(!root);
2515 
2516  if (upper->eb && !upper->locked) {
2517  if (!lowest) {
2518  ret = btrfs_bin_search(upper->eb, key,
2519  upper->level, &slot);
2520  BUG_ON(ret);
2521  bytenr = btrfs_node_blockptr(upper->eb, slot);
2522  if (node->eb->start == bytenr)
2523  goto next;
2524  }
2525  drop_node_buffer(upper);
2526  }
2527 
2528  if (!upper->eb) {
2529  ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2530  if (ret < 0) {
2531  err = ret;
2532  break;
2533  }
2534  BUG_ON(ret > 0);
2535 
2536  if (!upper->eb) {
2537  upper->eb = path->nodes[upper->level];
2538  path->nodes[upper->level] = NULL;
2539  } else {
2540  BUG_ON(upper->eb != path->nodes[upper->level]);
2541  }
2542 
2543  upper->locked = 1;
2544  path->locks[upper->level] = 0;
2545 
2546  slot = path->slots[upper->level];
2547  btrfs_release_path(path);
2548  } else {
2549  ret = btrfs_bin_search(upper->eb, key, upper->level,
2550  &slot);
2551  BUG_ON(ret);
2552  }
2553 
2554  bytenr = btrfs_node_blockptr(upper->eb, slot);
2555  if (lowest) {
2556  BUG_ON(bytenr != node->bytenr);
2557  } else {
2558  if (node->eb->start == bytenr)
2559  goto next;
2560  }
2561 
2562  blocksize = btrfs_level_size(root, node->level);
2563  generation = btrfs_node_ptr_generation(upper->eb, slot);
2564  eb = read_tree_block(root, bytenr, blocksize, generation);
2565  if (!eb) {
2566  err = -EIO;
2567  goto next;
2568  }
2569  btrfs_tree_lock(eb);
2570  btrfs_set_lock_blocking(eb);
2571 
2572  if (!node->eb) {
2573  ret = btrfs_cow_block(trans, root, eb, upper->eb,
2574  slot, &eb);
2575  btrfs_tree_unlock(eb);
2576  free_extent_buffer(eb);
2577  if (ret < 0) {
2578  err = ret;
2579  goto next;
2580  }
2581  BUG_ON(node->eb != eb);
2582  } else {
2583  btrfs_set_node_blockptr(upper->eb, slot,
2584  node->eb->start);
2585  btrfs_set_node_ptr_generation(upper->eb, slot,
2586  trans->transid);
2587  btrfs_mark_buffer_dirty(upper->eb);
2588 
2589  ret = btrfs_inc_extent_ref(trans, root,
2590  node->eb->start, blocksize,
2591  upper->eb->start,
2592  btrfs_header_owner(upper->eb),
2593  node->level, 0, 1);
2594  BUG_ON(ret);
2595 
2596  ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
2597  BUG_ON(ret);
2598  }
2599 next:
2600  if (!upper->pending)
2601  drop_node_buffer(upper);
2602  else
2603  unlock_node_buffer(upper);
2604  if (err)
2605  break;
2606  }
2607 
2608  if (!err && node->pending) {
2609  drop_node_buffer(node);
2610  list_move_tail(&node->list, &rc->backref_cache.changed);
2611  node->pending = 0;
2612  }
2613 
2614  path->lowest_level = 0;
2615  BUG_ON(err == -ENOSPC);
2616  return err;
2617 }
2618 
2619 static int link_to_upper(struct btrfs_trans_handle *trans,
2620  struct reloc_control *rc,
2621  struct backref_node *node,
2622  struct btrfs_path *path)
2623 {
2624  struct btrfs_key key;
2625 
2626  btrfs_node_key_to_cpu(node->eb, &key, 0);
2627  return do_relocation(trans, rc, node, &key, path, 0);
2628 }
2629 
2630 static int finish_pending_nodes(struct btrfs_trans_handle *trans,
2631  struct reloc_control *rc,
2632  struct btrfs_path *path, int err)
2633 {
2634  LIST_HEAD(list);
2635  struct backref_cache *cache = &rc->backref_cache;
2636  struct backref_node *node;
2637  int level;
2638  int ret;
2639 
2640  for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2641  while (!list_empty(&cache->pending[level])) {
2642  node = list_entry(cache->pending[level].next,
2643  struct backref_node, list);
2644  list_move_tail(&node->list, &list);
2645  BUG_ON(!node->pending);
2646 
2647  if (!err) {
2648  ret = link_to_upper(trans, rc, node, path);
2649  if (ret < 0)
2650  err = ret;
2651  }
2652  }
2653  list_splice_init(&list, &cache->pending[level]);
2654  }
2655  return err;
2656 }
2657 
2658 static void mark_block_processed(struct reloc_control *rc,
2659  u64 bytenr, u32 blocksize)
2660 {
2661  set_extent_bits(&rc->processed_blocks, bytenr, bytenr + blocksize - 1,
2663 }
2664 
2665 static void __mark_block_processed(struct reloc_control *rc,
2666  struct backref_node *node)
2667 {
2668  u32 blocksize;
2669  if (node->level == 0 ||
2670  in_block_group(node->bytenr, rc->block_group)) {
2671  blocksize = btrfs_level_size(rc->extent_root, node->level);
2672  mark_block_processed(rc, node->bytenr, blocksize);
2673  }
2674  node->processed = 1;
2675 }
2676 
2677 /*
2678  * mark a block and all blocks directly/indirectly reference the block
2679  * as processed.
2680  */
2681 static void update_processed_blocks(struct reloc_control *rc,
2682  struct backref_node *node)
2683 {
2684  struct backref_node *next = node;
2685  struct backref_edge *edge;
2686  struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2687  int index = 0;
2688 
2689  while (next) {
2690  cond_resched();
2691  while (1) {
2692  if (next->processed)
2693  break;
2694 
2695  __mark_block_processed(rc, next);
2696 
2697  if (list_empty(&next->upper))
2698  break;
2699 
2700  edge = list_entry(next->upper.next,
2701  struct backref_edge, list[LOWER]);
2702  edges[index++] = edge;
2703  next = edge->node[UPPER];
2704  }
2705  next = walk_down_backref(edges, &index);
2706  }
2707 }
2708 
2709 static int tree_block_processed(u64 bytenr, u32 blocksize,
2710  struct reloc_control *rc)
2711 {
2712  if (test_range_bit(&rc->processed_blocks, bytenr,
2713  bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
2714  return 1;
2715  return 0;
2716 }
2717 
2718 static int get_tree_block_key(struct reloc_control *rc,
2719  struct tree_block *block)
2720 {
2721  struct extent_buffer *eb;
2722 
2723  BUG_ON(block->key_ready);
2724  eb = read_tree_block(rc->extent_root, block->bytenr,
2725  block->key.objectid, block->key.offset);
2726  BUG_ON(!eb);
2727  WARN_ON(btrfs_header_level(eb) != block->level);
2728  if (block->level == 0)
2729  btrfs_item_key_to_cpu(eb, &block->key, 0);
2730  else
2731  btrfs_node_key_to_cpu(eb, &block->key, 0);
2732  free_extent_buffer(eb);
2733  block->key_ready = 1;
2734  return 0;
2735 }
2736 
2737 static int reada_tree_block(struct reloc_control *rc,
2738  struct tree_block *block)
2739 {
2740  BUG_ON(block->key_ready);
2742  block->key.objectid, block->key.offset);
2743  return 0;
2744 }
2745 
2746 /*
2747  * helper function to relocate a tree block
2748  */
2749 static int relocate_tree_block(struct btrfs_trans_handle *trans,
2750  struct reloc_control *rc,
2751  struct backref_node *node,
2752  struct btrfs_key *key,
2753  struct btrfs_path *path)
2754 {
2755  struct btrfs_root *root;
2756  int release = 0;
2757  int ret = 0;
2758 
2759  if (!node)
2760  return 0;
2761 
2762  BUG_ON(node->processed);
2763  root = select_one_root(trans, node);
2764  if (root == ERR_PTR(-ENOENT)) {
2765  update_processed_blocks(rc, node);
2766  goto out;
2767  }
2768 
2769  if (!root || root->ref_cows) {
2770  ret = reserve_metadata_space(trans, rc, node);
2771  if (ret)
2772  goto out;
2773  release = 1;
2774  }
2775 
2776  if (root) {
2777  if (root->ref_cows) {
2778  BUG_ON(node->new_bytenr);
2779  BUG_ON(!list_empty(&node->list));
2780  btrfs_record_root_in_trans(trans, root);
2781  root = root->reloc_root;
2782  node->new_bytenr = root->node->start;
2783  node->root = root;
2784  list_add_tail(&node->list, &rc->backref_cache.changed);
2785  } else {
2786  path->lowest_level = node->level;
2787  ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2788  btrfs_release_path(path);
2789  if (ret > 0)
2790  ret = 0;
2791  }
2792  if (!ret)
2793  update_processed_blocks(rc, node);
2794  } else {
2795  ret = do_relocation(trans, rc, node, key, path, 1);
2796  }
2797 out:
2798  if (ret || node->level == 0 || node->cowonly) {
2799  if (release)
2800  release_metadata_space(rc, node);
2801  remove_backref_node(&rc->backref_cache, node);
2802  }
2803  return ret;
2804 }
2805 
2806 /*
2807  * relocate a list of blocks
2808  */
2809 static noinline_for_stack
2810 int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2811  struct reloc_control *rc, struct rb_root *blocks)
2812 {
2813  struct backref_node *node;
2814  struct btrfs_path *path;
2815  struct tree_block *block;
2816  struct rb_node *rb_node;
2817  int ret;
2818  int err = 0;
2819 
2820  path = btrfs_alloc_path();
2821  if (!path)
2822  return -ENOMEM;
2823 
2824  rb_node = rb_first(blocks);
2825  while (rb_node) {
2826  block = rb_entry(rb_node, struct tree_block, rb_node);
2827  if (!block->key_ready)
2828  reada_tree_block(rc, block);
2829  rb_node = rb_next(rb_node);
2830  }
2831 
2832  rb_node = rb_first(blocks);
2833  while (rb_node) {
2834  block = rb_entry(rb_node, struct tree_block, rb_node);
2835  if (!block->key_ready)
2836  get_tree_block_key(rc, block);
2837  rb_node = rb_next(rb_node);
2838  }
2839 
2840  rb_node = rb_first(blocks);
2841  while (rb_node) {
2842  block = rb_entry(rb_node, struct tree_block, rb_node);
2843 
2844  node = build_backref_tree(rc, &block->key,
2845  block->level, block->bytenr);
2846  if (IS_ERR(node)) {
2847  err = PTR_ERR(node);
2848  goto out;
2849  }
2850 
2851  ret = relocate_tree_block(trans, rc, node, &block->key,
2852  path);
2853  if (ret < 0) {
2854  if (ret != -EAGAIN || rb_node == rb_first(blocks))
2855  err = ret;
2856  goto out;
2857  }
2858  rb_node = rb_next(rb_node);
2859  }
2860 out:
2861  free_block_list(blocks);
2862  err = finish_pending_nodes(trans, rc, path, err);
2863 
2864  btrfs_free_path(path);
2865  return err;
2866 }
2867 
2868 static noinline_for_stack
2869 int prealloc_file_extent_cluster(struct inode *inode,
2870  struct file_extent_cluster *cluster)
2871 {
2872  u64 alloc_hint = 0;
2873  u64 start;
2874  u64 end;
2875  u64 offset = BTRFS_I(inode)->index_cnt;
2876  u64 num_bytes;
2877  int nr = 0;
2878  int ret = 0;
2879 
2880  BUG_ON(cluster->start != cluster->boundary[0]);
2881  mutex_lock(&inode->i_mutex);
2882 
2883  ret = btrfs_check_data_free_space(inode, cluster->end +
2884  1 - cluster->start);
2885  if (ret)
2886  goto out;
2887 
2888  while (nr < cluster->nr) {
2889  start = cluster->boundary[nr] - offset;
2890  if (nr + 1 < cluster->nr)
2891  end = cluster->boundary[nr + 1] - 1 - offset;
2892  else
2893  end = cluster->end - offset;
2894 
2895  lock_extent(&BTRFS_I(inode)->io_tree, start, end);
2896  num_bytes = end + 1 - start;
2897  ret = btrfs_prealloc_file_range(inode, 0, start,
2898  num_bytes, num_bytes,
2899  end + 1, &alloc_hint);
2900  unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
2901  if (ret)
2902  break;
2903  nr++;
2904  }
2905  btrfs_free_reserved_data_space(inode, cluster->end +
2906  1 - cluster->start);
2907 out:
2908  mutex_unlock(&inode->i_mutex);
2909  return ret;
2910 }
2911 
2912 static noinline_for_stack
2913 int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
2914  u64 block_start)
2915 {
2916  struct btrfs_root *root = BTRFS_I(inode)->root;
2917  struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2918  struct extent_map *em;
2919  int ret = 0;
2920 
2921  em = alloc_extent_map();
2922  if (!em)
2923  return -ENOMEM;
2924 
2925  em->start = start;
2926  em->len = end + 1 - start;
2927  em->block_len = em->len;
2928  em->block_start = block_start;
2929  em->bdev = root->fs_info->fs_devices->latest_bdev;
2931 
2932  lock_extent(&BTRFS_I(inode)->io_tree, start, end);
2933  while (1) {
2934  write_lock(&em_tree->lock);
2935  ret = add_extent_mapping(em_tree, em);
2936  write_unlock(&em_tree->lock);
2937  if (ret != -EEXIST) {
2938  free_extent_map(em);
2939  break;
2940  }
2941  btrfs_drop_extent_cache(inode, start, end, 0);
2942  }
2943  unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
2944  return ret;
2945 }
2946 
2947 static int relocate_file_extent_cluster(struct inode *inode,
2948  struct file_extent_cluster *cluster)
2949 {
2950  u64 page_start;
2951  u64 page_end;
2952  u64 offset = BTRFS_I(inode)->index_cnt;
2953  unsigned long index;
2954  unsigned long last_index;
2955  struct page *page;
2956  struct file_ra_state *ra;
2957  gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
2958  int nr = 0;
2959  int ret = 0;
2960 
2961  if (!cluster->nr)
2962  return 0;
2963 
2964  ra = kzalloc(sizeof(*ra), GFP_NOFS);
2965  if (!ra)
2966  return -ENOMEM;
2967 
2968  ret = prealloc_file_extent_cluster(inode, cluster);
2969  if (ret)
2970  goto out;
2971 
2972  file_ra_state_init(ra, inode->i_mapping);
2973 
2974  ret = setup_extent_mapping(inode, cluster->start - offset,
2975  cluster->end - offset, cluster->start);
2976  if (ret)
2977  goto out;
2978 
2979  index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
2980  last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
2981  while (index <= last_index) {
2983  if (ret)
2984  goto out;
2985 
2986  page = find_lock_page(inode->i_mapping, index);
2987  if (!page) {
2989  ra, NULL, index,
2990  last_index + 1 - index);
2991  page = find_or_create_page(inode->i_mapping, index,
2992  mask);
2993  if (!page) {
2995  PAGE_CACHE_SIZE);
2996  ret = -ENOMEM;
2997  goto out;
2998  }
2999  }
3000 
3001  if (PageReadahead(page)) {
3003  ra, NULL, page, index,
3004  last_index + 1 - index);
3005  }
3006 
3007  if (!PageUptodate(page)) {
3008  btrfs_readpage(NULL, page);
3009  lock_page(page);
3010  if (!PageUptodate(page)) {
3011  unlock_page(page);
3012  page_cache_release(page);
3014  PAGE_CACHE_SIZE);
3015  ret = -EIO;
3016  goto out;
3017  }
3018  }
3019 
3020  page_start = (u64)page->index << PAGE_CACHE_SHIFT;
3021  page_end = page_start + PAGE_CACHE_SIZE - 1;
3022 
3023  lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end);
3024 
3025  set_page_extent_mapped(page);
3026 
3027  if (nr < cluster->nr &&
3028  page_start + offset == cluster->boundary[nr]) {
3029  set_extent_bits(&BTRFS_I(inode)->io_tree,
3030  page_start, page_end,
3032  nr++;
3033  }
3034 
3035  btrfs_set_extent_delalloc(inode, page_start, page_end, NULL);
3036  set_page_dirty(page);
3037 
3038  unlock_extent(&BTRFS_I(inode)->io_tree,
3039  page_start, page_end);
3040  unlock_page(page);
3041  page_cache_release(page);
3042 
3043  index++;
3044  balance_dirty_pages_ratelimited(inode->i_mapping);
3045  btrfs_throttle(BTRFS_I(inode)->root);
3046  }
3047  WARN_ON(nr != cluster->nr);
3048 out:
3049  kfree(ra);
3050  return ret;
3051 }
3052 
3053 static noinline_for_stack
3054 int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
3055  struct file_extent_cluster *cluster)
3056 {
3057  int ret;
3058 
3059  if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
3060  ret = relocate_file_extent_cluster(inode, cluster);
3061  if (ret)
3062  return ret;
3063  cluster->nr = 0;
3064  }
3065 
3066  if (!cluster->nr)
3067  cluster->start = extent_key->objectid;
3068  else
3069  BUG_ON(cluster->nr >= MAX_EXTENTS);
3070  cluster->end = extent_key->objectid + extent_key->offset - 1;
3071  cluster->boundary[cluster->nr] = extent_key->objectid;
3072  cluster->nr++;
3073 
3074  if (cluster->nr >= MAX_EXTENTS) {
3075  ret = relocate_file_extent_cluster(inode, cluster);
3076  if (ret)
3077  return ret;
3078  cluster->nr = 0;
3079  }
3080  return 0;
3081 }
3082 
3083 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3084 static int get_ref_objectid_v0(struct reloc_control *rc,
3085  struct btrfs_path *path,
3086  struct btrfs_key *extent_key,
3087  u64 *ref_objectid, int *path_change)
3088 {
3089  struct btrfs_key key;
3090  struct extent_buffer *leaf;
3091  struct btrfs_extent_ref_v0 *ref0;
3092  int ret;
3093  int slot;
3094 
3095  leaf = path->nodes[0];
3096  slot = path->slots[0];
3097  while (1) {
3098  if (slot >= btrfs_header_nritems(leaf)) {
3099  ret = btrfs_next_leaf(rc->extent_root, path);
3100  if (ret < 0)
3101  return ret;
3102  BUG_ON(ret > 0);
3103  leaf = path->nodes[0];
3104  slot = path->slots[0];
3105  if (path_change)
3106  *path_change = 1;
3107  }
3108  btrfs_item_key_to_cpu(leaf, &key, slot);
3109  if (key.objectid != extent_key->objectid)
3110  return -ENOENT;
3111 
3112  if (key.type != BTRFS_EXTENT_REF_V0_KEY) {
3113  slot++;
3114  continue;
3115  }
3116  ref0 = btrfs_item_ptr(leaf, slot,
3117  struct btrfs_extent_ref_v0);
3118  *ref_objectid = btrfs_ref_objectid_v0(leaf, ref0);
3119  break;
3120  }
3121  return 0;
3122 }
3123 #endif
3124 
3125 /*
3126  * helper to add a tree block to the list.
3127  * the major work is getting the generation and level of the block
3128  */
3129 static int add_tree_block(struct reloc_control *rc,
3130  struct btrfs_key *extent_key,
3131  struct btrfs_path *path,
3132  struct rb_root *blocks)
3133 {
3134  struct extent_buffer *eb;
3135  struct btrfs_extent_item *ei;
3136  struct btrfs_tree_block_info *bi;
3137  struct tree_block *block;
3138  struct rb_node *rb_node;
3139  u32 item_size;
3140  int level = -1;
3141  int generation;
3142 
3143  eb = path->nodes[0];
3144  item_size = btrfs_item_size_nr(eb, path->slots[0]);
3145 
3146  if (item_size >= sizeof(*ei) + sizeof(*bi)) {
3147  ei = btrfs_item_ptr(eb, path->slots[0],
3148  struct btrfs_extent_item);
3149  bi = (struct btrfs_tree_block_info *)(ei + 1);
3150  generation = btrfs_extent_generation(eb, ei);
3151  level = btrfs_tree_block_level(eb, bi);
3152  } else {
3153 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3154  u64 ref_owner;
3155  int ret;
3156 
3157  BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
3158  ret = get_ref_objectid_v0(rc, path, extent_key,
3159  &ref_owner, NULL);
3160  if (ret < 0)
3161  return ret;
3162  BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
3163  level = (int)ref_owner;
3164  /* FIXME: get real generation */
3165  generation = 0;
3166 #else
3167  BUG();
3168 #endif
3169  }
3170 
3171  btrfs_release_path(path);
3172 
3173  BUG_ON(level == -1);
3174 
3175  block = kmalloc(sizeof(*block), GFP_NOFS);
3176  if (!block)
3177  return -ENOMEM;
3178 
3179  block->bytenr = extent_key->objectid;
3180  block->key.objectid = extent_key->offset;
3181  block->key.offset = generation;
3182  block->level = level;
3183  block->key_ready = 0;
3184 
3185  rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
3186  if (rb_node)
3187  backref_tree_panic(rb_node, -EEXIST, block->bytenr);
3188 
3189  return 0;
3190 }
3191 
3192 /*
3193  * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY
3194  */
3195 static int __add_tree_block(struct reloc_control *rc,
3196  u64 bytenr, u32 blocksize,
3197  struct rb_root *blocks)
3198 {
3199  struct btrfs_path *path;
3200  struct btrfs_key key;
3201  int ret;
3202 
3203  if (tree_block_processed(bytenr, blocksize, rc))
3204  return 0;
3205 
3206  if (tree_search(blocks, bytenr))
3207  return 0;
3208 
3209  path = btrfs_alloc_path();
3210  if (!path)
3211  return -ENOMEM;
3212 
3213  key.objectid = bytenr;
3215  key.offset = blocksize;
3216 
3217  path->search_commit_root = 1;
3218  path->skip_locking = 1;
3219  ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
3220  if (ret < 0)
3221  goto out;
3222  BUG_ON(ret);
3223 
3224  btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3225  ret = add_tree_block(rc, &key, path, blocks);
3226 out:
3227  btrfs_free_path(path);
3228  return ret;
3229 }
3230 
3231 /*
3232  * helper to check if the block use full backrefs for pointers in it
3233  */
3234 static int block_use_full_backref(struct reloc_control *rc,
3235  struct extent_buffer *eb)
3236 {
3237  u64 flags;
3238  int ret;
3239 
3240  if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) ||
3241  btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV)
3242  return 1;
3243 
3245  eb->start, eb->len, NULL, &flags);
3246  BUG_ON(ret);
3247 
3248  if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
3249  ret = 1;
3250  else
3251  ret = 0;
3252  return ret;
3253 }
3254 
3255 static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
3256  struct inode *inode, u64 ino)
3257 {
3258  struct btrfs_key key;
3259  struct btrfs_path *path;
3260  struct btrfs_root *root = fs_info->tree_root;
3261  struct btrfs_trans_handle *trans;
3262  unsigned long nr;
3263  int ret = 0;
3264 
3265  if (inode)
3266  goto truncate;
3267 
3268  key.objectid = ino;
3269  key.type = BTRFS_INODE_ITEM_KEY;
3270  key.offset = 0;
3271 
3272  inode = btrfs_iget(fs_info->sb, &key, root, NULL);
3273  if (IS_ERR(inode) || is_bad_inode(inode)) {
3274  if (!IS_ERR(inode))
3275  iput(inode);
3276  return -ENOENT;
3277  }
3278 
3279 truncate:
3280  path = btrfs_alloc_path();
3281  if (!path) {
3282  ret = -ENOMEM;
3283  goto out;
3284  }
3285 
3286  trans = btrfs_join_transaction(root);
3287  if (IS_ERR(trans)) {
3288  btrfs_free_path(path);
3289  ret = PTR_ERR(trans);
3290  goto out;
3291  }
3292 
3293  ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
3294 
3295  btrfs_free_path(path);
3296  nr = trans->blocks_used;
3297  btrfs_end_transaction(trans, root);
3298  btrfs_btree_balance_dirty(root, nr);
3299 out:
3300  iput(inode);
3301  return ret;
3302 }
3303 
3304 /*
3305  * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
3306  * this function scans fs tree to find blocks reference the data extent
3307  */
3308 static int find_data_references(struct reloc_control *rc,
3309  struct btrfs_key *extent_key,
3310  struct extent_buffer *leaf,
3311  struct btrfs_extent_data_ref *ref,
3312  struct rb_root *blocks)
3313 {
3314  struct btrfs_path *path;
3315  struct tree_block *block;
3316  struct btrfs_root *root;
3317  struct btrfs_file_extent_item *fi;
3318  struct rb_node *rb_node;
3319  struct btrfs_key key;
3320  u64 ref_root;
3321  u64 ref_objectid;
3322  u64 ref_offset;
3323  u32 ref_count;
3324  u32 nritems;
3325  int err = 0;
3326  int added = 0;
3327  int counted;
3328  int ret;
3329 
3330  ref_root = btrfs_extent_data_ref_root(leaf, ref);
3331  ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
3332  ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
3333  ref_count = btrfs_extent_data_ref_count(leaf, ref);
3334 
3335  /*
3336  * This is an extent belonging to the free space cache, lets just delete
3337  * it and redo the search.
3338  */
3339  if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
3340  ret = delete_block_group_cache(rc->extent_root->fs_info,
3341  NULL, ref_objectid);
3342  if (ret != -ENOENT)
3343  return ret;
3344  ret = 0;
3345  }
3346 
3347  path = btrfs_alloc_path();
3348  if (!path)
3349  return -ENOMEM;
3350  path->reada = 1;
3351 
3352  root = read_fs_root(rc->extent_root->fs_info, ref_root);
3353  if (IS_ERR(root)) {
3354  err = PTR_ERR(root);
3355  goto out;
3356  }
3357 
3358  key.objectid = ref_objectid;
3360  if (ref_offset > ((u64)-1 << 32))
3361  key.offset = 0;
3362  else
3363  key.offset = ref_offset;
3364 
3365  path->search_commit_root = 1;
3366  path->skip_locking = 1;
3367  ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3368  if (ret < 0) {
3369  err = ret;
3370  goto out;
3371  }
3372 
3373  leaf = path->nodes[0];
3374  nritems = btrfs_header_nritems(leaf);
3375  /*
3376  * the references in tree blocks that use full backrefs
3377  * are not counted in
3378  */
3379  if (block_use_full_backref(rc, leaf))
3380  counted = 0;
3381  else
3382  counted = 1;
3383  rb_node = tree_search(blocks, leaf->start);
3384  if (rb_node) {
3385  if (counted)
3386  added = 1;
3387  else
3388  path->slots[0] = nritems;
3389  }
3390 
3391  while (ref_count > 0) {
3392  while (path->slots[0] >= nritems) {
3393  ret = btrfs_next_leaf(root, path);
3394  if (ret < 0) {
3395  err = ret;
3396  goto out;
3397  }
3398  if (ret > 0) {
3399  WARN_ON(1);
3400  goto out;
3401  }
3402 
3403  leaf = path->nodes[0];
3404  nritems = btrfs_header_nritems(leaf);
3405  added = 0;
3406 
3407  if (block_use_full_backref(rc, leaf))
3408  counted = 0;
3409  else
3410  counted = 1;
3411  rb_node = tree_search(blocks, leaf->start);
3412  if (rb_node) {
3413  if (counted)
3414  added = 1;
3415  else
3416  path->slots[0] = nritems;
3417  }
3418  }
3419 
3420  btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3421  if (key.objectid != ref_objectid ||
3422  key.type != BTRFS_EXTENT_DATA_KEY) {
3423  WARN_ON(1);
3424  break;
3425  }
3426 
3427  fi = btrfs_item_ptr(leaf, path->slots[0],
3428  struct btrfs_file_extent_item);
3429 
3430  if (btrfs_file_extent_type(leaf, fi) ==
3432  goto next;
3433 
3434  if (btrfs_file_extent_disk_bytenr(leaf, fi) !=
3435  extent_key->objectid)
3436  goto next;
3437 
3438  key.offset -= btrfs_file_extent_offset(leaf, fi);
3439  if (key.offset != ref_offset)
3440  goto next;
3441 
3442  if (counted)
3443  ref_count--;
3444  if (added)
3445  goto next;
3446 
3447  if (!tree_block_processed(leaf->start, leaf->len, rc)) {
3448  block = kmalloc(sizeof(*block), GFP_NOFS);
3449  if (!block) {
3450  err = -ENOMEM;
3451  break;
3452  }
3453  block->bytenr = leaf->start;
3454  btrfs_item_key_to_cpu(leaf, &block->key, 0);
3455  block->level = 0;
3456  block->key_ready = 1;
3457  rb_node = tree_insert(blocks, block->bytenr,
3458  &block->rb_node);
3459  if (rb_node)
3460  backref_tree_panic(rb_node, -EEXIST,
3461  block->bytenr);
3462  }
3463  if (counted)
3464  added = 1;
3465  else
3466  path->slots[0] = nritems;
3467 next:
3468  path->slots[0]++;
3469 
3470  }
3471 out:
3472  btrfs_free_path(path);
3473  return err;
3474 }
3475 
3476 /*
3477  * hepler to find all tree blocks that reference a given data extent
3478  */
3479 static noinline_for_stack
3480 int add_data_references(struct reloc_control *rc,
3481  struct btrfs_key *extent_key,
3482  struct btrfs_path *path,
3483  struct rb_root *blocks)
3484 {
3485  struct btrfs_key key;
3486  struct extent_buffer *eb;
3487  struct btrfs_extent_data_ref *dref;
3488  struct btrfs_extent_inline_ref *iref;
3489  unsigned long ptr;
3490  unsigned long end;
3491  u32 blocksize = btrfs_level_size(rc->extent_root, 0);
3492  int ret;
3493  int err = 0;
3494 
3495  eb = path->nodes[0];
3496  ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
3497  end = ptr + btrfs_item_size_nr(eb, path->slots[0]);
3498 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3499  if (ptr + sizeof(struct btrfs_extent_item_v0) == end)
3500  ptr = end;
3501  else
3502 #endif
3503  ptr += sizeof(struct btrfs_extent_item);
3504 
3505  while (ptr < end) {
3506  iref = (struct btrfs_extent_inline_ref *)ptr;
3507  key.type = btrfs_extent_inline_ref_type(eb, iref);
3508  if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
3509  key.offset = btrfs_extent_inline_ref_offset(eb, iref);
3510  ret = __add_tree_block(rc, key.offset, blocksize,
3511  blocks);
3512  } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
3513  dref = (struct btrfs_extent_data_ref *)(&iref->offset);
3514  ret = find_data_references(rc, extent_key,
3515  eb, dref, blocks);
3516  } else {
3517  BUG();
3518  }
3519  ptr += btrfs_extent_inline_ref_size(key.type);
3520  }
3521  WARN_ON(ptr > end);
3522 
3523  while (1) {
3524  cond_resched();
3525  eb = path->nodes[0];
3526  if (path->slots[0] >= btrfs_header_nritems(eb)) {
3527  ret = btrfs_next_leaf(rc->extent_root, path);
3528  if (ret < 0) {
3529  err = ret;
3530  break;
3531  }
3532  if (ret > 0)
3533  break;
3534  eb = path->nodes[0];
3535  }
3536 
3537  btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
3538  if (key.objectid != extent_key->objectid)
3539  break;
3540 
3541 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3542  if (key.type == BTRFS_SHARED_DATA_REF_KEY ||
3543  key.type == BTRFS_EXTENT_REF_V0_KEY) {
3544 #else
3546  if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
3547 #endif
3548  ret = __add_tree_block(rc, key.offset, blocksize,
3549  blocks);
3550  } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
3551  dref = btrfs_item_ptr(eb, path->slots[0],
3552  struct btrfs_extent_data_ref);
3553  ret = find_data_references(rc, extent_key,
3554  eb, dref, blocks);
3555  } else {
3556  ret = 0;
3557  }
3558  if (ret) {
3559  err = ret;
3560  break;
3561  }
3562  path->slots[0]++;
3563  }
3564  btrfs_release_path(path);
3565  if (err)
3566  free_block_list(blocks);
3567  return err;
3568 }
3569 
3570 /*
3571  * hepler to find next unprocessed extent
3572  */
3573 static noinline_for_stack
3574 int find_next_extent(struct btrfs_trans_handle *trans,
3575  struct reloc_control *rc, struct btrfs_path *path,
3576  struct btrfs_key *extent_key)
3577 {
3578  struct btrfs_key key;
3579  struct extent_buffer *leaf;
3580  u64 start, end, last;
3581  int ret;
3582 
3583  last = rc->block_group->key.objectid + rc->block_group->key.offset;
3584  while (1) {
3585  cond_resched();
3586  if (rc->search_start >= last) {
3587  ret = 1;
3588  break;
3589  }
3590 
3591  key.objectid = rc->search_start;
3593  key.offset = 0;
3594 
3595  path->search_commit_root = 1;
3596  path->skip_locking = 1;
3597  ret = btrfs_search_slot(NULL, rc->extent_root, &key, path,
3598  0, 0);
3599  if (ret < 0)
3600  break;
3601 next:
3602  leaf = path->nodes[0];
3603  if (path->slots[0] >= btrfs_header_nritems(leaf)) {
3604  ret = btrfs_next_leaf(rc->extent_root, path);
3605  if (ret != 0)
3606  break;
3607  leaf = path->nodes[0];
3608  }
3609 
3610  btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3611  if (key.objectid >= last) {
3612  ret = 1;
3613  break;
3614  }
3615 
3616  if (key.type != BTRFS_EXTENT_ITEM_KEY ||
3617  key.objectid + key.offset <= rc->search_start) {
3618  path->slots[0]++;
3619  goto next;
3620  }
3621 
3623  key.objectid, &start, &end,
3624  EXTENT_DIRTY, NULL);
3625 
3626  if (ret == 0 && start <= key.objectid) {
3627  btrfs_release_path(path);
3628  rc->search_start = end + 1;
3629  } else {
3630  rc->search_start = key.objectid + key.offset;
3631  memcpy(extent_key, &key, sizeof(key));
3632  return 0;
3633  }
3634  }
3635  btrfs_release_path(path);
3636  return ret;
3637 }
3638 
3639 static void set_reloc_control(struct reloc_control *rc)
3640 {
3641  struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3642 
3643  mutex_lock(&fs_info->reloc_mutex);
3644  fs_info->reloc_ctl = rc;
3645  mutex_unlock(&fs_info->reloc_mutex);
3646 }
3647 
3648 static void unset_reloc_control(struct reloc_control *rc)
3649 {
3650  struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3651 
3652  mutex_lock(&fs_info->reloc_mutex);
3653  fs_info->reloc_ctl = NULL;
3654  mutex_unlock(&fs_info->reloc_mutex);
3655 }
3656 
3657 static int check_extent_flags(u64 flags)
3658 {
3659  if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
3660  (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
3661  return 1;
3662  if (!(flags & BTRFS_EXTENT_FLAG_DATA) &&
3663  !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
3664  return 1;
3665  if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
3667  return 1;
3668  return 0;
3669 }
3670 
3671 static noinline_for_stack
3672 int prepare_to_relocate(struct reloc_control *rc)
3673 {
3674  struct btrfs_trans_handle *trans;
3675  int ret;
3676 
3679  if (!rc->block_rsv)
3680  return -ENOMEM;
3681 
3682  /*
3683  * reserve some space for creating reloc trees.
3684  * btrfs_init_reloc_root will use them when there
3685  * is no reservation in transaction handle.
3686  */
3687  ret = btrfs_block_rsv_add(rc->extent_root, rc->block_rsv,
3688  rc->extent_root->nodesize * 256);
3689  if (ret)
3690  return ret;
3691 
3692  memset(&rc->cluster, 0, sizeof(rc->cluster));
3693  rc->search_start = rc->block_group->key.objectid;
3694  rc->extents_found = 0;
3695  rc->nodes_relocated = 0;
3696  rc->merging_rsv_size = 0;
3697 
3698  rc->create_reloc_tree = 1;
3699  set_reloc_control(rc);
3700 
3701  trans = btrfs_join_transaction(rc->extent_root);
3702  BUG_ON(IS_ERR(trans));
3704  return 0;
3705 }
3706 
3707 static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3708 {
3709  struct rb_root blocks = RB_ROOT;
3710  struct btrfs_key key;
3711  struct btrfs_trans_handle *trans = NULL;
3712  struct btrfs_path *path;
3713  struct btrfs_extent_item *ei;
3714  unsigned long nr;
3715  u64 flags;
3716  u32 item_size;
3717  int ret;
3718  int err = 0;
3719  int progress = 0;
3720 
3721  path = btrfs_alloc_path();
3722  if (!path)
3723  return -ENOMEM;
3724  path->reada = 1;
3725 
3726  ret = prepare_to_relocate(rc);
3727  if (ret) {
3728  err = ret;
3729  goto out_free;
3730  }
3731 
3732  while (1) {
3733  progress++;
3734  trans = btrfs_start_transaction(rc->extent_root, 0);
3735  BUG_ON(IS_ERR(trans));
3736 restart:
3737  if (update_backref_cache(trans, &rc->backref_cache)) {
3738  btrfs_end_transaction(trans, rc->extent_root);
3739  continue;
3740  }
3741 
3742  ret = find_next_extent(trans, rc, path, &key);
3743  if (ret < 0)
3744  err = ret;
3745  if (ret != 0)
3746  break;
3747 
3748  rc->extents_found++;
3749 
3750  ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
3751  struct btrfs_extent_item);
3752  item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
3753  if (item_size >= sizeof(*ei)) {
3754  flags = btrfs_extent_flags(path->nodes[0], ei);
3755  ret = check_extent_flags(flags);
3756  BUG_ON(ret);
3757 
3758  } else {
3759 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3760  u64 ref_owner;
3761  int path_change = 0;
3762 
3763  BUG_ON(item_size !=
3764  sizeof(struct btrfs_extent_item_v0));
3765  ret = get_ref_objectid_v0(rc, path, &key, &ref_owner,
3766  &path_change);
3767  if (ref_owner < BTRFS_FIRST_FREE_OBJECTID)
3769  else
3770  flags = BTRFS_EXTENT_FLAG_DATA;
3771 
3772  if (path_change) {
3773  btrfs_release_path(path);
3774 
3775  path->search_commit_root = 1;
3776  path->skip_locking = 1;
3777  ret = btrfs_search_slot(NULL, rc->extent_root,
3778  &key, path, 0, 0);
3779  if (ret < 0) {
3780  err = ret;
3781  break;
3782  }
3783  BUG_ON(ret > 0);
3784  }
3785 #else
3786  BUG();
3787 #endif
3788  }
3789 
3790  if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
3791  ret = add_tree_block(rc, &key, path, &blocks);
3792  } else if (rc->stage == UPDATE_DATA_PTRS &&
3793  (flags & BTRFS_EXTENT_FLAG_DATA)) {
3794  ret = add_data_references(rc, &key, path, &blocks);
3795  } else {
3796  btrfs_release_path(path);
3797  ret = 0;
3798  }
3799  if (ret < 0) {
3800  err = ret;
3801  break;
3802  }
3803 
3804  if (!RB_EMPTY_ROOT(&blocks)) {
3805  ret = relocate_tree_blocks(trans, rc, &blocks);
3806  if (ret < 0) {
3807  if (ret != -EAGAIN) {
3808  err = ret;
3809  break;
3810  }
3811  rc->extents_found--;
3812  rc->search_start = key.objectid;
3813  }
3814  }
3815 
3816  ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5);
3817  if (ret < 0) {
3818  if (ret != -ENOSPC) {
3819  err = ret;
3820  WARN_ON(1);
3821  break;
3822  }
3823  rc->commit_transaction = 1;
3824  }
3825 
3826  if (rc->commit_transaction) {
3827  rc->commit_transaction = 0;
3828  ret = btrfs_commit_transaction(trans, rc->extent_root);
3829  BUG_ON(ret);
3830  } else {
3831  nr = trans->blocks_used;
3834  }
3835  trans = NULL;
3836 
3837  if (rc->stage == MOVE_DATA_EXTENTS &&
3838  (flags & BTRFS_EXTENT_FLAG_DATA)) {
3839  rc->found_file_extent = 1;
3840  ret = relocate_data_extent(rc->data_inode,
3841  &key, &rc->cluster);
3842  if (ret < 0) {
3843  err = ret;
3844  break;
3845  }
3846  }
3847  }
3848  if (trans && progress && err == -ENOSPC) {
3849  ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
3850  rc->block_group->flags);
3851  if (ret == 0) {
3852  err = 0;
3853  progress = 0;
3854  goto restart;
3855  }
3856  }
3857 
3858  btrfs_release_path(path);
3860  GFP_NOFS);
3861 
3862  if (trans) {
3863  nr = trans->blocks_used;
3866  }
3867 
3868  if (!err) {
3869  ret = relocate_file_extent_cluster(rc->data_inode,
3870  &rc->cluster);
3871  if (ret < 0)
3872  err = ret;
3873  }
3874 
3875  rc->create_reloc_tree = 0;
3876  set_reloc_control(rc);
3877 
3878  backref_cache_cleanup(&rc->backref_cache);
3880 
3881  err = prepare_to_merge(rc, err);
3882 
3883  merge_reloc_roots(rc);
3884 
3885  rc->merge_reloc_tree = 0;
3886  unset_reloc_control(rc);
3888 
3889  /* get rid of pinned extents */
3890  trans = btrfs_join_transaction(rc->extent_root);
3891  if (IS_ERR(trans))
3892  err = PTR_ERR(trans);
3893  else
3895 out_free:
3897  btrfs_free_path(path);
3898  return err;
3899 }
3900 
3901 static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
3902  struct btrfs_root *root, u64 objectid)
3903 {
3904  struct btrfs_path *path;
3905  struct btrfs_inode_item *item;
3906  struct extent_buffer *leaf;
3907  int ret;
3908 
3909  path = btrfs_alloc_path();
3910  if (!path)
3911  return -ENOMEM;
3912 
3913  ret = btrfs_insert_empty_inode(trans, root, path, objectid);
3914  if (ret)
3915  goto out;
3916 
3917  leaf = path->nodes[0];
3918  item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
3919  memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
3920  btrfs_set_inode_generation(leaf, item, 1);
3921  btrfs_set_inode_size(leaf, item, 0);
3922  btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
3923  btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |
3926  btrfs_release_path(path);
3927 out:
3928  btrfs_free_path(path);
3929  return ret;
3930 }
3931 
3932 /*
3933  * helper to create inode for data relocation.
3934  * the inode is in data relocation tree and its link count is 0
3935  */
3936 static noinline_for_stack
3937 struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
3939 {
3940  struct inode *inode = NULL;
3941  struct btrfs_trans_handle *trans;
3942  struct btrfs_root *root;
3943  struct btrfs_key key;
3944  unsigned long nr;
3945  u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
3946  int err = 0;
3947 
3948  root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID);
3949  if (IS_ERR(root))
3950  return ERR_CAST(root);
3951 
3952  trans = btrfs_start_transaction(root, 6);
3953  if (IS_ERR(trans))
3954  return ERR_CAST(trans);
3955 
3956  err = btrfs_find_free_objectid(root, &objectid);
3957  if (err)
3958  goto out;
3959 
3960  err = __insert_orphan_inode(trans, root, objectid);
3961  BUG_ON(err);
3962 
3963  key.objectid = objectid;
3964  key.type = BTRFS_INODE_ITEM_KEY;
3965  key.offset = 0;
3966  inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
3967  BUG_ON(IS_ERR(inode) || is_bad_inode(inode));
3968  BTRFS_I(inode)->index_cnt = group->key.objectid;
3969 
3970  err = btrfs_orphan_add(trans, inode);
3971 out:
3972  nr = trans->blocks_used;
3973  btrfs_end_transaction(trans, root);
3974  btrfs_btree_balance_dirty(root, nr);
3975  if (err) {
3976  if (inode)
3977  iput(inode);
3978  inode = ERR_PTR(err);
3979  }
3980  return inode;
3981 }
3982 
3983 static struct reloc_control *alloc_reloc_control(void)
3984 {
3985  struct reloc_control *rc;
3986 
3987  rc = kzalloc(sizeof(*rc), GFP_NOFS);
3988  if (!rc)
3989  return NULL;
3990 
3991  INIT_LIST_HEAD(&rc->reloc_roots);
3992  backref_cache_init(&rc->backref_cache);
3993  mapping_tree_init(&rc->reloc_root_tree);
3995  return rc;
3996 }
3997 
3998 /*
3999  * function to relocate all extents in a block group.
4000  */
4002 {
4003  struct btrfs_fs_info *fs_info = extent_root->fs_info;
4004  struct reloc_control *rc;
4005  struct inode *inode;
4006  struct btrfs_path *path;
4007  int ret;
4008  int rw = 0;
4009  int err = 0;
4010 
4011  rc = alloc_reloc_control();
4012  if (!rc)
4013  return -ENOMEM;
4014 
4015  rc->extent_root = extent_root;
4016 
4017  rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
4018  BUG_ON(!rc->block_group);
4019 
4020  if (!rc->block_group->ro) {
4021  ret = btrfs_set_block_group_ro(extent_root, rc->block_group);
4022  if (ret) {
4023  err = ret;
4024  goto out;
4025  }
4026  rw = 1;
4027  }
4028 
4029  path = btrfs_alloc_path();
4030  if (!path) {
4031  err = -ENOMEM;
4032  goto out;
4033  }
4034 
4035  inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group,
4036  path);
4037  btrfs_free_path(path);
4038 
4039  if (!IS_ERR(inode))
4040  ret = delete_block_group_cache(fs_info, inode, 0);
4041  else
4042  ret = PTR_ERR(inode);
4043 
4044  if (ret && ret != -ENOENT) {
4045  err = ret;
4046  goto out;
4047  }
4048 
4049  rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
4050  if (IS_ERR(rc->data_inode)) {
4051  err = PTR_ERR(rc->data_inode);
4052  rc->data_inode = NULL;
4053  goto out;
4054  }
4055 
4056  printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n",
4057  (unsigned long long)rc->block_group->key.objectid,
4058  (unsigned long long)rc->block_group->flags);
4059 
4062 
4063  while (1) {
4064  mutex_lock(&fs_info->cleaner_mutex);
4065 
4067  ret = relocate_block_group(rc);
4068 
4069  mutex_unlock(&fs_info->cleaner_mutex);
4070  if (ret < 0) {
4071  err = ret;
4072  goto out;
4073  }
4074 
4075  if (rc->extents_found == 0)
4076  break;
4077 
4078  printk(KERN_INFO "btrfs: found %llu extents\n",
4079  (unsigned long long)rc->extents_found);
4080 
4081  if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
4083  invalidate_mapping_pages(rc->data_inode->i_mapping,
4084  0, -1);
4085  rc->stage = UPDATE_DATA_PTRS;
4086  }
4087  }
4088 
4089  filemap_write_and_wait_range(fs_info->btree_inode->i_mapping,
4090  rc->block_group->key.objectid,
4091  rc->block_group->key.objectid +
4092  rc->block_group->key.offset - 1);
4093 
4094  WARN_ON(rc->block_group->pinned > 0);
4095  WARN_ON(rc->block_group->reserved > 0);
4096  WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
4097 out:
4098  if (err && rw)
4099  btrfs_set_block_group_rw(extent_root, rc->block_group);
4100  iput(rc->data_inode);
4102  kfree(rc);
4103  return err;
4104 }
4105 
4106 static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
4107 {
4108  struct btrfs_trans_handle *trans;
4109  int ret, err;
4110 
4111  trans = btrfs_start_transaction(root->fs_info->tree_root, 0);
4112  if (IS_ERR(trans))
4113  return PTR_ERR(trans);
4114 
4115  memset(&root->root_item.drop_progress, 0,
4116  sizeof(root->root_item.drop_progress));
4117  root->root_item.drop_level = 0;
4118  btrfs_set_root_refs(&root->root_item, 0);
4119  ret = btrfs_update_root(trans, root->fs_info->tree_root,
4120  &root->root_key, &root->root_item);
4121 
4122  err = btrfs_end_transaction(trans, root->fs_info->tree_root);
4123  if (err)
4124  return err;
4125  return ret;
4126 }
4127 
4128 /*
4129  * recover relocation interrupted by system crash.
4130  *
4131  * this function resumes merging reloc trees with corresponding fs trees.
4132  * this is important for keeping the sharing of tree blocks
4133  */
4135 {
4136  LIST_HEAD(reloc_roots);
4137  struct btrfs_key key;
4138  struct btrfs_root *fs_root;
4139  struct btrfs_root *reloc_root;
4140  struct btrfs_path *path;
4141  struct extent_buffer *leaf;
4142  struct reloc_control *rc = NULL;
4143  struct btrfs_trans_handle *trans;
4144  int ret;
4145  int err = 0;
4146 
4147  path = btrfs_alloc_path();
4148  if (!path)
4149  return -ENOMEM;
4150  path->reada = -1;
4151 
4153  key.type = BTRFS_ROOT_ITEM_KEY;
4154  key.offset = (u64)-1;
4155 
4156  while (1) {
4157  ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key,
4158  path, 0, 0);
4159  if (ret < 0) {
4160  err = ret;
4161  goto out;
4162  }
4163  if (ret > 0) {
4164  if (path->slots[0] == 0)
4165  break;
4166  path->slots[0]--;
4167  }
4168  leaf = path->nodes[0];
4169  btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4170  btrfs_release_path(path);
4171 
4172  if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||
4173  key.type != BTRFS_ROOT_ITEM_KEY)
4174  break;
4175 
4176  reloc_root = btrfs_read_fs_root_no_radix(root, &key);
4177  if (IS_ERR(reloc_root)) {
4178  err = PTR_ERR(reloc_root);
4179  goto out;
4180  }
4181 
4182  list_add(&reloc_root->root_list, &reloc_roots);
4183 
4184  if (btrfs_root_refs(&reloc_root->root_item) > 0) {
4185  fs_root = read_fs_root(root->fs_info,
4186  reloc_root->root_key.offset);
4187  if (IS_ERR(fs_root)) {
4188  ret = PTR_ERR(fs_root);
4189  if (ret != -ENOENT) {
4190  err = ret;
4191  goto out;
4192  }
4193  ret = mark_garbage_root(reloc_root);
4194  if (ret < 0) {
4195  err = ret;
4196  goto out;
4197  }
4198  }
4199  }
4200 
4201  if (key.offset == 0)
4202  break;
4203 
4204  key.offset--;
4205  }
4206  btrfs_release_path(path);
4207 
4208  if (list_empty(&reloc_roots))
4209  goto out;
4210 
4211  rc = alloc_reloc_control();
4212  if (!rc) {
4213  err = -ENOMEM;
4214  goto out;
4215  }
4216 
4217  rc->extent_root = root->fs_info->extent_root;
4218 
4219  set_reloc_control(rc);
4220 
4221  trans = btrfs_join_transaction(rc->extent_root);
4222  if (IS_ERR(trans)) {
4223  unset_reloc_control(rc);
4224  err = PTR_ERR(trans);
4225  goto out_free;
4226  }
4227 
4228  rc->merge_reloc_tree = 1;
4229 
4230  while (!list_empty(&reloc_roots)) {
4231  reloc_root = list_entry(reloc_roots.next,
4232  struct btrfs_root, root_list);
4233  list_del(&reloc_root->root_list);
4234 
4235  if (btrfs_root_refs(&reloc_root->root_item) == 0) {
4236  list_add_tail(&reloc_root->root_list,
4237  &rc->reloc_roots);
4238  continue;
4239  }
4240 
4241  fs_root = read_fs_root(root->fs_info,
4242  reloc_root->root_key.offset);
4243  if (IS_ERR(fs_root)) {
4244  err = PTR_ERR(fs_root);
4245  goto out_free;
4246  }
4247 
4248  err = __add_reloc_root(reloc_root);
4249  BUG_ON(err < 0); /* -ENOMEM or logic error */
4250  fs_root->reloc_root = reloc_root;
4251  }
4252 
4253  err = btrfs_commit_transaction(trans, rc->extent_root);
4254  if (err)
4255  goto out_free;
4256 
4257  merge_reloc_roots(rc);
4258 
4259  unset_reloc_control(rc);
4260 
4261  trans = btrfs_join_transaction(rc->extent_root);
4262  if (IS_ERR(trans))
4263  err = PTR_ERR(trans);
4264  else
4265  err = btrfs_commit_transaction(trans, rc->extent_root);
4266 out_free:
4267  kfree(rc);
4268 out:
4269  while (!list_empty(&reloc_roots)) {
4270  reloc_root = list_entry(reloc_roots.next,
4271  struct btrfs_root, root_list);
4272  list_del(&reloc_root->root_list);
4273  free_extent_buffer(reloc_root->node);
4274  free_extent_buffer(reloc_root->commit_root);
4275  kfree(reloc_root);
4276  }
4277  btrfs_free_path(path);
4278 
4279  if (err == 0) {
4280  /* cleanup orphan inode in data relocation tree */
4281  fs_root = read_fs_root(root->fs_info,
4283  if (IS_ERR(fs_root))
4284  err = PTR_ERR(fs_root);
4285  else
4286  err = btrfs_orphan_cleanup(fs_root);
4287  }
4288  return err;
4289 }
4290 
4291 /*
4292  * helper to add ordered checksum for data relocation.
4293  *
4294  * cloning checksum properly handles the nodatasum extents.
4295  * it also saves CPU time to re-calculate the checksum.
4296  */
4297 int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
4298 {
4299  struct btrfs_ordered_sum *sums;
4300  struct btrfs_sector_sum *sector_sum;
4301  struct btrfs_ordered_extent *ordered;
4302  struct btrfs_root *root = BTRFS_I(inode)->root;
4303  size_t offset;
4304  int ret;
4305  u64 disk_bytenr;
4306  LIST_HEAD(list);
4307 
4308  ordered = btrfs_lookup_ordered_extent(inode, file_pos);
4309  BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
4310 
4311  disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
4312  ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
4313  disk_bytenr + len - 1, &list, 0);
4314  if (ret)
4315  goto out;
4316 
4317  while (!list_empty(&list)) {
4318  sums = list_entry(list.next, struct btrfs_ordered_sum, list);
4319  list_del_init(&sums->list);
4320 
4321  sector_sum = sums->sums;
4322  sums->bytenr = ordered->start;
4323 
4324  offset = 0;
4325  while (offset < sums->len) {
4326  sector_sum->bytenr += ordered->start - disk_bytenr;
4327  sector_sum++;
4328  offset += root->sectorsize;
4329  }
4330 
4331  btrfs_add_ordered_sum(inode, ordered, sums);
4332  }
4333 out:
4334  btrfs_put_ordered_extent(ordered);
4335  return ret;
4336 }
4337 
4339  struct btrfs_root *root, struct extent_buffer *buf,
4340  struct extent_buffer *cow)
4341 {
4342  struct reloc_control *rc;
4343  struct backref_node *node;
4344  int first_cow = 0;
4345  int level;
4346  int ret;
4347 
4348  rc = root->fs_info->reloc_ctl;
4349  if (!rc)
4350  return;
4351 
4352  BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
4353  root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
4354 
4355  level = btrfs_header_level(buf);
4356  if (btrfs_header_generation(buf) <=
4357  btrfs_root_last_snapshot(&root->root_item))
4358  first_cow = 1;
4359 
4360  if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
4361  rc->create_reloc_tree) {
4362  WARN_ON(!first_cow && level == 0);
4363 
4364  node = rc->backref_cache.path[level];
4365  BUG_ON(node->bytenr != buf->start &&
4366  node->new_bytenr != buf->start);
4367 
4368  drop_node_buffer(node);
4369  extent_buffer_get(cow);
4370  node->eb = cow;
4371  node->new_bytenr = cow->start;
4372 
4373  if (!node->pending) {
4374  list_move_tail(&node->list,
4375  &rc->backref_cache.pending[level]);
4376  node->pending = 1;
4377  }
4378 
4379  if (first_cow)
4380  __mark_block_processed(rc, node);
4381 
4382  if (first_cow && level > 0)
4383  rc->nodes_relocated += buf->len;
4384  }
4385 
4386  if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) {
4387  ret = replace_file_extents(trans, rc, root, cow);
4388  BUG_ON(ret);
4389  }
4390 }
4391 
4392 /*
4393  * called before creating snapshot. it calculates metadata reservation
4394  * requried for relocating tree blocks in the snapshot
4395  */
4398  u64 *bytes_to_reserve)
4399 {
4400  struct btrfs_root *root;
4401  struct reloc_control *rc;
4402 
4403  root = pending->root;
4404  if (!root->reloc_root)
4405  return;
4406 
4407  rc = root->fs_info->reloc_ctl;
4408  if (!rc->merge_reloc_tree)
4409  return;
4410 
4411  root = root->reloc_root;
4412  BUG_ON(btrfs_root_refs(&root->root_item) == 0);
4413  /*
4414  * relocation is in the stage of merging trees. the space
4415  * used by merging a reloc tree is twice the size of
4416  * relocated tree nodes in the worst case. half for cowing
4417  * the reloc tree, half for cowing the fs tree. the space
4418  * used by cowing the reloc tree will be freed after the
4419  * tree is dropped. if we create snapshot, cowing the fs
4420  * tree may use more space than it frees. so we need
4421  * reserve extra space.
4422  */
4423  *bytes_to_reserve += rc->nodes_relocated;
4424 }
4425 
4426 /*
4427  * called after snapshot is created. migrate block reservation
4428  * and create reloc root for the newly created snapshot
4429  */
4431  struct btrfs_pending_snapshot *pending)
4432 {
4433  struct btrfs_root *root = pending->root;
4434  struct btrfs_root *reloc_root;
4435  struct btrfs_root *new_root;
4436  struct reloc_control *rc;
4437  int ret;
4438 
4439  if (!root->reloc_root)
4440  return 0;
4441 
4442  rc = root->fs_info->reloc_ctl;
4443  rc->merging_rsv_size += rc->nodes_relocated;
4444 
4445  if (rc->merge_reloc_tree) {
4446  ret = btrfs_block_rsv_migrate(&pending->block_rsv,
4447  rc->block_rsv,
4448  rc->nodes_relocated);
4449  if (ret)
4450  return ret;
4451  }
4452 
4453  new_root = pending->snap;
4454  reloc_root = create_reloc_root(trans, root->reloc_root,
4455  new_root->root_key.objectid);
4456  if (IS_ERR(reloc_root))
4457  return PTR_ERR(reloc_root);
4458 
4459  ret = __add_reloc_root(reloc_root);
4460  BUG_ON(ret < 0);
4461  new_root->reloc_root = reloc_root;
4462 
4463  if (rc->create_reloc_tree)
4464  ret = clone_backref_node(trans, rc, root, reloc_root);
4465  return ret;
4466 }