Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
free-space-cache.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2008 Red Hat. All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 #include <linux/math64.h>
23 #include <linux/ratelimit.h>
24 #include "ctree.h"
25 #include "free-space-cache.h"
26 #include "transaction.h"
27 #include "disk-io.h"
28 #include "extent_io.h"
29 #include "inode-map.h"
30 
31 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
32 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
33 
34 static int link_free_space(struct btrfs_free_space_ctl *ctl,
35  struct btrfs_free_space *info);
36 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
37  struct btrfs_free_space *info);
38 
39 static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
40  struct btrfs_path *path,
41  u64 offset)
42 {
43  struct btrfs_key key;
44  struct btrfs_key location;
45  struct btrfs_disk_key disk_key;
47  struct extent_buffer *leaf;
48  struct inode *inode = NULL;
49  int ret;
50 
51  key.objectid = BTRFS_FREE_SPACE_OBJECTID;
52  key.offset = offset;
53  key.type = 0;
54 
55  ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
56  if (ret < 0)
57  return ERR_PTR(ret);
58  if (ret > 0) {
59  btrfs_release_path(path);
60  return ERR_PTR(-ENOENT);
61  }
62 
63  leaf = path->nodes[0];
64  header = btrfs_item_ptr(leaf, path->slots[0],
66  btrfs_free_space_key(leaf, header, &disk_key);
67  btrfs_disk_key_to_cpu(&location, &disk_key);
68  btrfs_release_path(path);
69 
70  inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
71  if (!inode)
72  return ERR_PTR(-ENOENT);
73  if (IS_ERR(inode))
74  return inode;
75  if (is_bad_inode(inode)) {
76  iput(inode);
77  return ERR_PTR(-ENOENT);
78  }
79 
80  mapping_set_gfp_mask(inode->i_mapping,
81  mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
82 
83  return inode;
84 }
85 
86 struct inode *lookup_free_space_inode(struct btrfs_root *root,
88  *block_group, struct btrfs_path *path)
89 {
90  struct inode *inode = NULL;
92 
93  spin_lock(&block_group->lock);
94  if (block_group->inode)
95  inode = igrab(block_group->inode);
96  spin_unlock(&block_group->lock);
97  if (inode)
98  return inode;
99 
100  inode = __lookup_free_space_inode(root, path,
101  block_group->key.objectid);
102  if (IS_ERR(inode))
103  return inode;
104 
105  spin_lock(&block_group->lock);
106  if (!((BTRFS_I(inode)->flags & flags) == flags)) {
107  printk(KERN_INFO "Old style space inode found, converting.\n");
108  BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
110  block_group->disk_cache_state = BTRFS_DC_CLEAR;
111  }
112 
113  if (!block_group->iref) {
114  block_group->inode = igrab(inode);
115  block_group->iref = 1;
116  }
117  spin_unlock(&block_group->lock);
118 
119  return inode;
120 }
121 
123  struct btrfs_trans_handle *trans,
124  struct btrfs_path *path, u64 ino, u64 offset)
125 {
126  struct btrfs_key key;
127  struct btrfs_disk_key disk_key;
128  struct btrfs_free_space_header *header;
129  struct btrfs_inode_item *inode_item;
130  struct extent_buffer *leaf;
132  int ret;
133 
134  ret = btrfs_insert_empty_inode(trans, root, path, ino);
135  if (ret)
136  return ret;
137 
138  /* We inline crc's for the free disk space cache */
139  if (ino != BTRFS_FREE_INO_OBJECTID)
141 
142  leaf = path->nodes[0];
143  inode_item = btrfs_item_ptr(leaf, path->slots[0],
144  struct btrfs_inode_item);
145  btrfs_item_key(leaf, &disk_key, path->slots[0]);
146  memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
147  sizeof(*inode_item));
148  btrfs_set_inode_generation(leaf, inode_item, trans->transid);
149  btrfs_set_inode_size(leaf, inode_item, 0);
150  btrfs_set_inode_nbytes(leaf, inode_item, 0);
151  btrfs_set_inode_uid(leaf, inode_item, 0);
152  btrfs_set_inode_gid(leaf, inode_item, 0);
153  btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
154  btrfs_set_inode_flags(leaf, inode_item, flags);
155  btrfs_set_inode_nlink(leaf, inode_item, 1);
156  btrfs_set_inode_transid(leaf, inode_item, trans->transid);
157  btrfs_set_inode_block_group(leaf, inode_item, offset);
159  btrfs_release_path(path);
160 
162  key.offset = offset;
163  key.type = 0;
164 
165  ret = btrfs_insert_empty_item(trans, root, path, &key,
166  sizeof(struct btrfs_free_space_header));
167  if (ret < 0) {
168  btrfs_release_path(path);
169  return ret;
170  }
171  leaf = path->nodes[0];
172  header = btrfs_item_ptr(leaf, path->slots[0],
173  struct btrfs_free_space_header);
174  memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
175  btrfs_set_free_space_key(leaf, header, &disk_key);
177  btrfs_release_path(path);
178 
179  return 0;
180 }
181 
183  struct btrfs_trans_handle *trans,
185  struct btrfs_path *path)
186 {
187  int ret;
188  u64 ino;
189 
190  ret = btrfs_find_free_objectid(root, &ino);
191  if (ret < 0)
192  return ret;
193 
194  return __create_free_space_inode(root, trans, path, ino,
195  block_group->key.objectid);
196 }
197 
199  struct btrfs_trans_handle *trans,
200  struct btrfs_path *path,
201  struct inode *inode)
202 {
203  struct btrfs_block_rsv *rsv;
204  u64 needed_bytes;
205  loff_t oldsize;
206  int ret = 0;
207 
208  rsv = trans->block_rsv;
209  trans->block_rsv = &root->fs_info->global_block_rsv;
210 
211  /* 1 for slack space, 1 for updating the inode */
212  needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) +
213  btrfs_calc_trans_metadata_size(root, 1);
214 
215  spin_lock(&trans->block_rsv->lock);
216  if (trans->block_rsv->reserved < needed_bytes) {
217  spin_unlock(&trans->block_rsv->lock);
218  trans->block_rsv = rsv;
219  return -ENOSPC;
220  }
221  spin_unlock(&trans->block_rsv->lock);
222 
223  oldsize = i_size_read(inode);
224  btrfs_i_size_write(inode, 0);
225  truncate_pagecache(inode, oldsize, 0);
226 
227  /*
228  * We don't need an orphan item because truncating the free space cache
229  * will never be split across transactions.
230  */
231  ret = btrfs_truncate_inode_items(trans, root, inode,
233 
234  if (ret) {
235  trans->block_rsv = rsv;
236  btrfs_abort_transaction(trans, root, ret);
237  return ret;
238  }
239 
240  ret = btrfs_update_inode(trans, root, inode);
241  if (ret)
242  btrfs_abort_transaction(trans, root, ret);
243  trans->block_rsv = rsv;
244 
245  return ret;
246 }
247 
248 static int readahead_cache(struct inode *inode)
249 {
250  struct file_ra_state *ra;
251  unsigned long last_index;
252 
253  ra = kzalloc(sizeof(*ra), GFP_NOFS);
254  if (!ra)
255  return -ENOMEM;
256 
257  file_ra_state_init(ra, inode->i_mapping);
258  last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
259 
260  page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
261 
262  kfree(ra);
263 
264  return 0;
265 }
266 
267 struct io_ctl {
268  void *cur, *orig;
269  struct page *page;
270  struct page **pages;
271  struct btrfs_root *root;
272  unsigned long size;
273  int index;
275  unsigned check_crcs:1;
276 };
277 
278 static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
279  struct btrfs_root *root)
280 {
281  memset(io_ctl, 0, sizeof(struct io_ctl));
282  io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
284  io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages,
285  GFP_NOFS);
286  if (!io_ctl->pages)
287  return -ENOMEM;
288  io_ctl->root = root;
289  if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
290  io_ctl->check_crcs = 1;
291  return 0;
292 }
293 
294 static void io_ctl_free(struct io_ctl *io_ctl)
295 {
296  kfree(io_ctl->pages);
297 }
298 
299 static void io_ctl_unmap_page(struct io_ctl *io_ctl)
300 {
301  if (io_ctl->cur) {
302  kunmap(io_ctl->page);
303  io_ctl->cur = NULL;
304  io_ctl->orig = NULL;
305  }
306 }
307 
308 static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
309 {
310  WARN_ON(io_ctl->cur);
311  BUG_ON(io_ctl->index >= io_ctl->num_pages);
312  io_ctl->page = io_ctl->pages[io_ctl->index++];
313  io_ctl->cur = kmap(io_ctl->page);
314  io_ctl->orig = io_ctl->cur;
315  io_ctl->size = PAGE_CACHE_SIZE;
316  if (clear)
317  memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
318 }
319 
320 static void io_ctl_drop_pages(struct io_ctl *io_ctl)
321 {
322  int i;
323 
324  io_ctl_unmap_page(io_ctl);
325 
326  for (i = 0; i < io_ctl->num_pages; i++) {
327  if (io_ctl->pages[i]) {
328  ClearPageChecked(io_ctl->pages[i]);
329  unlock_page(io_ctl->pages[i]);
330  page_cache_release(io_ctl->pages[i]);
331  }
332  }
333 }
334 
335 static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
336  int uptodate)
337 {
338  struct page *page;
339  gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
340  int i;
341 
342  for (i = 0; i < io_ctl->num_pages; i++) {
343  page = find_or_create_page(inode->i_mapping, i, mask);
344  if (!page) {
345  io_ctl_drop_pages(io_ctl);
346  return -ENOMEM;
347  }
348  io_ctl->pages[i] = page;
349  if (uptodate && !PageUptodate(page)) {
350  btrfs_readpage(NULL, page);
351  lock_page(page);
352  if (!PageUptodate(page)) {
353  printk(KERN_ERR "btrfs: error reading free "
354  "space cache\n");
355  io_ctl_drop_pages(io_ctl);
356  return -EIO;
357  }
358  }
359  }
360 
361  for (i = 0; i < io_ctl->num_pages; i++) {
362  clear_page_dirty_for_io(io_ctl->pages[i]);
363  set_page_extent_mapped(io_ctl->pages[i]);
364  }
365 
366  return 0;
367 }
368 
369 static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
370 {
371  __le64 *val;
372 
373  io_ctl_map_page(io_ctl, 1);
374 
375  /*
376  * Skip the csum areas. If we don't check crcs then we just have a
377  * 64bit chunk at the front of the first page.
378  */
379  if (io_ctl->check_crcs) {
380  io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
381  io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
382  } else {
383  io_ctl->cur += sizeof(u64);
384  io_ctl->size -= sizeof(u64) * 2;
385  }
386 
387  val = io_ctl->cur;
388  *val = cpu_to_le64(generation);
389  io_ctl->cur += sizeof(u64);
390 }
391 
392 static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
393 {
394  __le64 *gen;
395 
396  /*
397  * Skip the crc area. If we don't check crcs then we just have a 64bit
398  * chunk at the front of the first page.
399  */
400  if (io_ctl->check_crcs) {
401  io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
402  io_ctl->size -= sizeof(u64) +
403  (sizeof(u32) * io_ctl->num_pages);
404  } else {
405  io_ctl->cur += sizeof(u64);
406  io_ctl->size -= sizeof(u64) * 2;
407  }
408 
409  gen = io_ctl->cur;
410  if (le64_to_cpu(*gen) != generation) {
411  printk_ratelimited(KERN_ERR "btrfs: space cache generation "
412  "(%Lu) does not match inode (%Lu)\n", *gen,
413  generation);
414  io_ctl_unmap_page(io_ctl);
415  return -EIO;
416  }
417  io_ctl->cur += sizeof(u64);
418  return 0;
419 }
420 
421 static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
422 {
423  u32 *tmp;
424  u32 crc = ~(u32)0;
425  unsigned offset = 0;
426 
427  if (!io_ctl->check_crcs) {
428  io_ctl_unmap_page(io_ctl);
429  return;
430  }
431 
432  if (index == 0)
433  offset = sizeof(u32) * io_ctl->num_pages;
434 
435  crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
436  PAGE_CACHE_SIZE - offset);
437  btrfs_csum_final(crc, (char *)&crc);
438  io_ctl_unmap_page(io_ctl);
439  tmp = kmap(io_ctl->pages[0]);
440  tmp += index;
441  *tmp = crc;
442  kunmap(io_ctl->pages[0]);
443 }
444 
445 static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
446 {
447  u32 *tmp, val;
448  u32 crc = ~(u32)0;
449  unsigned offset = 0;
450 
451  if (!io_ctl->check_crcs) {
452  io_ctl_map_page(io_ctl, 0);
453  return 0;
454  }
455 
456  if (index == 0)
457  offset = sizeof(u32) * io_ctl->num_pages;
458 
459  tmp = kmap(io_ctl->pages[0]);
460  tmp += index;
461  val = *tmp;
462  kunmap(io_ctl->pages[0]);
463 
464  io_ctl_map_page(io_ctl, 0);
465  crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
466  PAGE_CACHE_SIZE - offset);
467  btrfs_csum_final(crc, (char *)&crc);
468  if (val != crc) {
469  printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free "
470  "space cache\n");
471  io_ctl_unmap_page(io_ctl);
472  return -EIO;
473  }
474 
475  return 0;
476 }
477 
478 static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
479  void *bitmap)
480 {
482 
483  if (!io_ctl->cur)
484  return -ENOSPC;
485 
486  entry = io_ctl->cur;
487  entry->offset = cpu_to_le64(offset);
488  entry->bytes = cpu_to_le64(bytes);
489  entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
491  io_ctl->cur += sizeof(struct btrfs_free_space_entry);
492  io_ctl->size -= sizeof(struct btrfs_free_space_entry);
493 
494  if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
495  return 0;
496 
497  io_ctl_set_crc(io_ctl, io_ctl->index - 1);
498 
499  /* No more pages to map */
500  if (io_ctl->index >= io_ctl->num_pages)
501  return 0;
502 
503  /* map the next page */
504  io_ctl_map_page(io_ctl, 1);
505  return 0;
506 }
507 
508 static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
509 {
510  if (!io_ctl->cur)
511  return -ENOSPC;
512 
513  /*
514  * If we aren't at the start of the current page, unmap this one and
515  * map the next one if there is any left.
516  */
517  if (io_ctl->cur != io_ctl->orig) {
518  io_ctl_set_crc(io_ctl, io_ctl->index - 1);
519  if (io_ctl->index >= io_ctl->num_pages)
520  return -ENOSPC;
521  io_ctl_map_page(io_ctl, 0);
522  }
523 
524  memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE);
525  io_ctl_set_crc(io_ctl, io_ctl->index - 1);
526  if (io_ctl->index < io_ctl->num_pages)
527  io_ctl_map_page(io_ctl, 0);
528  return 0;
529 }
530 
531 static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
532 {
533  /*
534  * If we're not on the boundary we know we've modified the page and we
535  * need to crc the page.
536  */
537  if (io_ctl->cur != io_ctl->orig)
538  io_ctl_set_crc(io_ctl, io_ctl->index - 1);
539  else
540  io_ctl_unmap_page(io_ctl);
541 
542  while (io_ctl->index < io_ctl->num_pages) {
543  io_ctl_map_page(io_ctl, 1);
544  io_ctl_set_crc(io_ctl, io_ctl->index - 1);
545  }
546 }
547 
548 static int io_ctl_read_entry(struct io_ctl *io_ctl,
549  struct btrfs_free_space *entry, u8 *type)
550 {
551  struct btrfs_free_space_entry *e;
552  int ret;
553 
554  if (!io_ctl->cur) {
555  ret = io_ctl_check_crc(io_ctl, io_ctl->index);
556  if (ret)
557  return ret;
558  }
559 
560  e = io_ctl->cur;
561  entry->offset = le64_to_cpu(e->offset);
562  entry->bytes = le64_to_cpu(e->bytes);
563  *type = e->type;
564  io_ctl->cur += sizeof(struct btrfs_free_space_entry);
565  io_ctl->size -= sizeof(struct btrfs_free_space_entry);
566 
567  if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
568  return 0;
569 
570  io_ctl_unmap_page(io_ctl);
571 
572  return 0;
573 }
574 
575 static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
576  struct btrfs_free_space *entry)
577 {
578  int ret;
579 
580  ret = io_ctl_check_crc(io_ctl, io_ctl->index);
581  if (ret)
582  return ret;
583 
584  memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE);
585  io_ctl_unmap_page(io_ctl);
586 
587  return 0;
588 }
589 
590 /*
591  * Since we attach pinned extents after the fact we can have contiguous sections
592  * of free space that are split up in entries. This poses a problem with the
593  * tree logging stuff since it could have allocated across what appears to be 2
594  * entries since we would have merged the entries when adding the pinned extents
595  * back to the free space cache. So run through the space cache that we just
596  * loaded and merge contiguous entries. This will make the log replay stuff not
597  * blow up and it will make for nicer allocator behavior.
598  */
599 static void merge_space_tree(struct btrfs_free_space_ctl *ctl)
600 {
601  struct btrfs_free_space *e, *prev = NULL;
602  struct rb_node *n;
603 
604 again:
605  spin_lock(&ctl->tree_lock);
606  for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
607  e = rb_entry(n, struct btrfs_free_space, offset_index);
608  if (!prev)
609  goto next;
610  if (e->bitmap || prev->bitmap)
611  goto next;
612  if (prev->offset + prev->bytes == e->offset) {
613  unlink_free_space(ctl, prev);
614  unlink_free_space(ctl, e);
615  prev->bytes += e->bytes;
617  link_free_space(ctl, prev);
618  prev = NULL;
619  spin_unlock(&ctl->tree_lock);
620  goto again;
621  }
622 next:
623  prev = e;
624  }
625  spin_unlock(&ctl->tree_lock);
626 }
627 
628 int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
629  struct btrfs_free_space_ctl *ctl,
630  struct btrfs_path *path, u64 offset)
631 {
632  struct btrfs_free_space_header *header;
633  struct extent_buffer *leaf;
634  struct io_ctl io_ctl;
635  struct btrfs_key key;
636  struct btrfs_free_space *e, *n;
637  struct list_head bitmaps;
640  u64 generation;
641  u8 type;
642  int ret = 0;
643 
644  INIT_LIST_HEAD(&bitmaps);
645 
646  /* Nothing in the space cache, goodbye */
647  if (!i_size_read(inode))
648  return 0;
649 
651  key.offset = offset;
652  key.type = 0;
653 
654  ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
655  if (ret < 0)
656  return 0;
657  else if (ret > 0) {
658  btrfs_release_path(path);
659  return 0;
660  }
661 
662  ret = -1;
663 
664  leaf = path->nodes[0];
665  header = btrfs_item_ptr(leaf, path->slots[0],
666  struct btrfs_free_space_header);
667  num_entries = btrfs_free_space_entries(leaf, header);
668  num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
669  generation = btrfs_free_space_generation(leaf, header);
670  btrfs_release_path(path);
671 
672  if (BTRFS_I(inode)->generation != generation) {
673  printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
674  " not match free space cache generation (%llu)\n",
675  (unsigned long long)BTRFS_I(inode)->generation,
676  (unsigned long long)generation);
677  return 0;
678  }
679 
680  if (!num_entries)
681  return 0;
682 
683  ret = io_ctl_init(&io_ctl, inode, root);
684  if (ret)
685  return ret;
686 
687  ret = readahead_cache(inode);
688  if (ret)
689  goto out;
690 
691  ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
692  if (ret)
693  goto out;
694 
695  ret = io_ctl_check_crc(&io_ctl, 0);
696  if (ret)
697  goto free_cache;
698 
699  ret = io_ctl_check_generation(&io_ctl, generation);
700  if (ret)
701  goto free_cache;
702 
703  while (num_entries) {
704  e = kmem_cache_zalloc(btrfs_free_space_cachep,
705  GFP_NOFS);
706  if (!e)
707  goto free_cache;
708 
709  ret = io_ctl_read_entry(&io_ctl, e, &type);
710  if (ret) {
712  goto free_cache;
713  }
714 
715  if (!e->bytes) {
717  goto free_cache;
718  }
719 
720  if (type == BTRFS_FREE_SPACE_EXTENT) {
721  spin_lock(&ctl->tree_lock);
722  ret = link_free_space(ctl, e);
723  spin_unlock(&ctl->tree_lock);
724  if (ret) {
725  printk(KERN_ERR "Duplicate entries in "
726  "free space cache, dumping\n");
728  goto free_cache;
729  }
730  } else {
731  BUG_ON(!num_bitmaps);
732  num_bitmaps--;
733  e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
734  if (!e->bitmap) {
737  goto free_cache;
738  }
739  spin_lock(&ctl->tree_lock);
740  ret = link_free_space(ctl, e);
741  ctl->total_bitmaps++;
742  ctl->op->recalc_thresholds(ctl);
743  spin_unlock(&ctl->tree_lock);
744  if (ret) {
745  printk(KERN_ERR "Duplicate entries in "
746  "free space cache, dumping\n");
748  goto free_cache;
749  }
750  list_add_tail(&e->list, &bitmaps);
751  }
752 
753  num_entries--;
754  }
755 
756  io_ctl_unmap_page(&io_ctl);
757 
758  /*
759  * We add the bitmaps at the end of the entries in order that
760  * the bitmap entries are added to the cache.
761  */
762  list_for_each_entry_safe(e, n, &bitmaps, list) {
763  list_del_init(&e->list);
764  ret = io_ctl_read_bitmap(&io_ctl, e);
765  if (ret)
766  goto free_cache;
767  }
768 
769  io_ctl_drop_pages(&io_ctl);
770  merge_space_tree(ctl);
771  ret = 1;
772 out:
773  io_ctl_free(&io_ctl);
774  return ret;
775 free_cache:
776  io_ctl_drop_pages(&io_ctl);
778  goto out;
779 }
780 
783 {
784  struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
785  struct btrfs_root *root = fs_info->tree_root;
786  struct inode *inode;
787  struct btrfs_path *path;
788  int ret = 0;
789  bool matched;
790  u64 used = btrfs_block_group_used(&block_group->item);
791 
792  /*
793  * If this block group has been marked to be cleared for one reason or
794  * another then we can't trust the on disk cache, so just return.
795  */
796  spin_lock(&block_group->lock);
797  if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
798  spin_unlock(&block_group->lock);
799  return 0;
800  }
801  spin_unlock(&block_group->lock);
802 
803  path = btrfs_alloc_path();
804  if (!path)
805  return 0;
806  path->search_commit_root = 1;
807  path->skip_locking = 1;
808 
809  inode = lookup_free_space_inode(root, block_group, path);
810  if (IS_ERR(inode)) {
811  btrfs_free_path(path);
812  return 0;
813  }
814 
815  /* We may have converted the inode and made the cache invalid. */
816  spin_lock(&block_group->lock);
817  if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
818  spin_unlock(&block_group->lock);
819  btrfs_free_path(path);
820  goto out;
821  }
822  spin_unlock(&block_group->lock);
823 
824  ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
825  path, block_group->key.objectid);
826  btrfs_free_path(path);
827  if (ret <= 0)
828  goto out;
829 
830  spin_lock(&ctl->tree_lock);
831  matched = (ctl->free_space == (block_group->key.offset - used -
832  block_group->bytes_super));
833  spin_unlock(&ctl->tree_lock);
834 
835  if (!matched) {
837  printk(KERN_ERR "block group %llu has an wrong amount of free "
838  "space\n", block_group->key.objectid);
839  ret = -1;
840  }
841 out:
842  if (ret < 0) {
843  /* This cache is bogus, make sure it gets cleared */
844  spin_lock(&block_group->lock);
845  block_group->disk_cache_state = BTRFS_DC_CLEAR;
846  spin_unlock(&block_group->lock);
847  ret = 0;
848 
849  printk(KERN_ERR "btrfs: failed to load free space cache "
850  "for block group %llu\n", block_group->key.objectid);
851  }
852 
853  iput(inode);
854  return ret;
855 }
856 
870 int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
871  struct btrfs_free_space_ctl *ctl,
873  struct btrfs_trans_handle *trans,
874  struct btrfs_path *path, u64 offset)
875 {
876  struct btrfs_free_space_header *header;
877  struct extent_buffer *leaf;
878  struct rb_node *node;
879  struct list_head *pos, *n;
880  struct extent_state *cached_state = NULL;
881  struct btrfs_free_cluster *cluster = NULL;
882  struct extent_io_tree *unpin = NULL;
883  struct io_ctl io_ctl;
884  struct list_head bitmap_list;
885  struct btrfs_key key;
886  u64 start, extent_start, extent_end, len;
887  int entries = 0;
888  int bitmaps = 0;
889  int ret;
890  int err = -1;
891 
892  INIT_LIST_HEAD(&bitmap_list);
893 
894  if (!i_size_read(inode))
895  return -1;
896 
897  ret = io_ctl_init(&io_ctl, inode, root);
898  if (ret)
899  return -1;
900 
901  /* Get the cluster for this block_group if it exists */
902  if (block_group && !list_empty(&block_group->cluster_list))
903  cluster = list_entry(block_group->cluster_list.next,
904  struct btrfs_free_cluster,
905  block_group_list);
906 
907  /* Lock all pages first so we can lock the extent safely. */
908  io_ctl_prepare_pages(&io_ctl, inode, 0);
909 
910  lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
911  0, &cached_state);
912 
913  node = rb_first(&ctl->free_space_offset);
914  if (!node && cluster) {
915  node = rb_first(&cluster->root);
916  cluster = NULL;
917  }
918 
919  /* Make sure we can fit our crcs into the first page */
920  if (io_ctl.check_crcs &&
921  (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) {
922  WARN_ON(1);
923  goto out_nospc;
924  }
925 
926  io_ctl_set_generation(&io_ctl, trans->transid);
927 
928  /* Write out the extent entries */
929  while (node) {
930  struct btrfs_free_space *e;
931 
932  e = rb_entry(node, struct btrfs_free_space, offset_index);
933  entries++;
934 
935  ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes,
936  e->bitmap);
937  if (ret)
938  goto out_nospc;
939 
940  if (e->bitmap) {
941  list_add_tail(&e->list, &bitmap_list);
942  bitmaps++;
943  }
944  node = rb_next(node);
945  if (!node && cluster) {
946  node = rb_first(&cluster->root);
947  cluster = NULL;
948  }
949  }
950 
951  /*
952  * We want to add any pinned extents to our free space cache
953  * so we don't leak the space
954  */
955 
956  /*
957  * We shouldn't have switched the pinned extents yet so this is the
958  * right one
959  */
960  unpin = root->fs_info->pinned_extents;
961 
962  if (block_group)
963  start = block_group->key.objectid;
964 
965  while (block_group && (start < block_group->key.objectid +
966  block_group->key.offset)) {
967  ret = find_first_extent_bit(unpin, start,
968  &extent_start, &extent_end,
969  EXTENT_DIRTY, NULL);
970  if (ret) {
971  ret = 0;
972  break;
973  }
974 
975  /* This pinned extent is out of our range */
976  if (extent_start >= block_group->key.objectid +
977  block_group->key.offset)
978  break;
979 
980  extent_start = max(extent_start, start);
981  extent_end = min(block_group->key.objectid +
982  block_group->key.offset, extent_end + 1);
983  len = extent_end - extent_start;
984 
985  entries++;
986  ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL);
987  if (ret)
988  goto out_nospc;
989 
990  start = extent_end;
991  }
992 
993  /* Write out the bitmaps */
994  list_for_each_safe(pos, n, &bitmap_list) {
995  struct btrfs_free_space *entry =
996  list_entry(pos, struct btrfs_free_space, list);
997 
998  ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap);
999  if (ret)
1000  goto out_nospc;
1001  list_del_init(&entry->list);
1002  }
1003 
1004  /* Zero out the rest of the pages just to make sure */
1005  io_ctl_zero_remaining_pages(&io_ctl);
1006 
1007  ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages,
1008  0, i_size_read(inode), &cached_state);
1009  io_ctl_drop_pages(&io_ctl);
1010  unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1011  i_size_read(inode) - 1, &cached_state, GFP_NOFS);
1012 
1013  if (ret)
1014  goto out;
1015 
1016 
1017  btrfs_wait_ordered_range(inode, 0, (u64)-1);
1018 
1020  key.offset = offset;
1021  key.type = 0;
1022 
1023  ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1024  if (ret < 0) {
1025  clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
1027  GFP_NOFS);
1028  goto out;
1029  }
1030  leaf = path->nodes[0];
1031  if (ret > 0) {
1032  struct btrfs_key found_key;
1033  BUG_ON(!path->slots[0]);
1034  path->slots[0]--;
1035  btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1036  if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
1037  found_key.offset != offset) {
1038  clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
1039  inode->i_size - 1,
1040  EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
1041  NULL, GFP_NOFS);
1042  btrfs_release_path(path);
1043  goto out;
1044  }
1045  }
1046 
1047  BTRFS_I(inode)->generation = trans->transid;
1048  header = btrfs_item_ptr(leaf, path->slots[0],
1049  struct btrfs_free_space_header);
1050  btrfs_set_free_space_entries(leaf, header, entries);
1051  btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
1052  btrfs_set_free_space_generation(leaf, header, trans->transid);
1054  btrfs_release_path(path);
1055 
1056  err = 0;
1057 out:
1058  io_ctl_free(&io_ctl);
1059  if (err) {
1061  BTRFS_I(inode)->generation = 0;
1062  }
1063  btrfs_update_inode(trans, root, inode);
1064  return err;
1065 
1066 out_nospc:
1067  list_for_each_safe(pos, n, &bitmap_list) {
1068  struct btrfs_free_space *entry =
1069  list_entry(pos, struct btrfs_free_space, list);
1070  list_del_init(&entry->list);
1071  }
1072  io_ctl_drop_pages(&io_ctl);
1073  unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1074  i_size_read(inode) - 1, &cached_state, GFP_NOFS);
1075  goto out;
1076 }
1077 
1079  struct btrfs_trans_handle *trans,
1081  struct btrfs_path *path)
1082 {
1083  struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1084  struct inode *inode;
1085  int ret = 0;
1086 
1087  root = root->fs_info->tree_root;
1088 
1089  spin_lock(&block_group->lock);
1090  if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
1091  spin_unlock(&block_group->lock);
1092  return 0;
1093  }
1094  spin_unlock(&block_group->lock);
1095 
1096  inode = lookup_free_space_inode(root, block_group, path);
1097  if (IS_ERR(inode))
1098  return 0;
1099 
1100  ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
1101  path, block_group->key.objectid);
1102  if (ret) {
1103  spin_lock(&block_group->lock);
1104  block_group->disk_cache_state = BTRFS_DC_ERROR;
1105  spin_unlock(&block_group->lock);
1106  ret = 0;
1107 #ifdef DEBUG
1108  printk(KERN_ERR "btrfs: failed to write free space cache "
1109  "for block group %llu\n", block_group->key.objectid);
1110 #endif
1111  }
1112 
1113  iput(inode);
1114  return ret;
1115 }
1116 
1117 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
1118  u64 offset)
1119 {
1120  BUG_ON(offset < bitmap_start);
1121  offset -= bitmap_start;
1122  return (unsigned long)(div_u64(offset, unit));
1123 }
1124 
1125 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
1126 {
1127  return (unsigned long)(div_u64(bytes, unit));
1128 }
1129 
1130 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
1131  u64 offset)
1132 {
1133  u64 bitmap_start;
1134  u64 bytes_per_bitmap;
1135 
1136  bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1137  bitmap_start = offset - ctl->start;
1138  bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
1139  bitmap_start *= bytes_per_bitmap;
1140  bitmap_start += ctl->start;
1141 
1142  return bitmap_start;
1143 }
1144 
1145 static int tree_insert_offset(struct rb_root *root, u64 offset,
1146  struct rb_node *node, int bitmap)
1147 {
1148  struct rb_node **p = &root->rb_node;
1149  struct rb_node *parent = NULL;
1150  struct btrfs_free_space *info;
1151 
1152  while (*p) {
1153  parent = *p;
1154  info = rb_entry(parent, struct btrfs_free_space, offset_index);
1155 
1156  if (offset < info->offset) {
1157  p = &(*p)->rb_left;
1158  } else if (offset > info->offset) {
1159  p = &(*p)->rb_right;
1160  } else {
1161  /*
1162  * we could have a bitmap entry and an extent entry
1163  * share the same offset. If this is the case, we want
1164  * the extent entry to always be found first if we do a
1165  * linear search through the tree, since we want to have
1166  * the quickest allocation time, and allocating from an
1167  * extent is faster than allocating from a bitmap. So
1168  * if we're inserting a bitmap and we find an entry at
1169  * this offset, we want to go right, or after this entry
1170  * logically. If we are inserting an extent and we've
1171  * found a bitmap, we want to go left, or before
1172  * logically.
1173  */
1174  if (bitmap) {
1175  if (info->bitmap) {
1176  WARN_ON_ONCE(1);
1177  return -EEXIST;
1178  }
1179  p = &(*p)->rb_right;
1180  } else {
1181  if (!info->bitmap) {
1182  WARN_ON_ONCE(1);
1183  return -EEXIST;
1184  }
1185  p = &(*p)->rb_left;
1186  }
1187  }
1188  }
1189 
1190  rb_link_node(node, parent, p);
1191  rb_insert_color(node, root);
1192 
1193  return 0;
1194 }
1195 
1196 /*
1197  * searches the tree for the given offset.
1198  *
1199  * fuzzy - If this is set, then we are trying to make an allocation, and we just
1200  * want a section that has at least bytes size and comes at or after the given
1201  * offset.
1202  */
1203 static struct btrfs_free_space *
1204 tree_search_offset(struct btrfs_free_space_ctl *ctl,
1205  u64 offset, int bitmap_only, int fuzzy)
1206 {
1207  struct rb_node *n = ctl->free_space_offset.rb_node;
1208  struct btrfs_free_space *entry, *prev = NULL;
1209 
1210  /* find entry that is closest to the 'offset' */
1211  while (1) {
1212  if (!n) {
1213  entry = NULL;
1214  break;
1215  }
1216 
1217  entry = rb_entry(n, struct btrfs_free_space, offset_index);
1218  prev = entry;
1219 
1220  if (offset < entry->offset)
1221  n = n->rb_left;
1222  else if (offset > entry->offset)
1223  n = n->rb_right;
1224  else
1225  break;
1226  }
1227 
1228  if (bitmap_only) {
1229  if (!entry)
1230  return NULL;
1231  if (entry->bitmap)
1232  return entry;
1233 
1234  /*
1235  * bitmap entry and extent entry may share same offset,
1236  * in that case, bitmap entry comes after extent entry.
1237  */
1238  n = rb_next(n);
1239  if (!n)
1240  return NULL;
1241  entry = rb_entry(n, struct btrfs_free_space, offset_index);
1242  if (entry->offset != offset)
1243  return NULL;
1244 
1245  WARN_ON(!entry->bitmap);
1246  return entry;
1247  } else if (entry) {
1248  if (entry->bitmap) {
1249  /*
1250  * if previous extent entry covers the offset,
1251  * we should return it instead of the bitmap entry
1252  */
1253  n = &entry->offset_index;
1254  while (1) {
1255  n = rb_prev(n);
1256  if (!n)
1257  break;
1258  prev = rb_entry(n, struct btrfs_free_space,
1259  offset_index);
1260  if (!prev->bitmap) {
1261  if (prev->offset + prev->bytes > offset)
1262  entry = prev;
1263  break;
1264  }
1265  }
1266  }
1267  return entry;
1268  }
1269 
1270  if (!prev)
1271  return NULL;
1272 
1273  /* find last entry before the 'offset' */
1274  entry = prev;
1275  if (entry->offset > offset) {
1276  n = rb_prev(&entry->offset_index);
1277  if (n) {
1278  entry = rb_entry(n, struct btrfs_free_space,
1279  offset_index);
1280  BUG_ON(entry->offset > offset);
1281  } else {
1282  if (fuzzy)
1283  return entry;
1284  else
1285  return NULL;
1286  }
1287  }
1288 
1289  if (entry->bitmap) {
1290  n = &entry->offset_index;
1291  while (1) {
1292  n = rb_prev(n);
1293  if (!n)
1294  break;
1295  prev = rb_entry(n, struct btrfs_free_space,
1296  offset_index);
1297  if (!prev->bitmap) {
1298  if (prev->offset + prev->bytes > offset)
1299  return prev;
1300  break;
1301  }
1302  }
1303  if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1304  return entry;
1305  } else if (entry->offset + entry->bytes > offset)
1306  return entry;
1307 
1308  if (!fuzzy)
1309  return NULL;
1310 
1311  while (1) {
1312  if (entry->bitmap) {
1313  if (entry->offset + BITS_PER_BITMAP *
1314  ctl->unit > offset)
1315  break;
1316  } else {
1317  if (entry->offset + entry->bytes > offset)
1318  break;
1319  }
1320 
1321  n = rb_next(&entry->offset_index);
1322  if (!n)
1323  return NULL;
1324  entry = rb_entry(n, struct btrfs_free_space, offset_index);
1325  }
1326  return entry;
1327 }
1328 
1329 static inline void
1330 __unlink_free_space(struct btrfs_free_space_ctl *ctl,
1331  struct btrfs_free_space *info)
1332 {
1333  rb_erase(&info->offset_index, &ctl->free_space_offset);
1334  ctl->free_extents--;
1335 }
1336 
1337 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1338  struct btrfs_free_space *info)
1339 {
1340  __unlink_free_space(ctl, info);
1341  ctl->free_space -= info->bytes;
1342 }
1343 
1344 static int link_free_space(struct btrfs_free_space_ctl *ctl,
1345  struct btrfs_free_space *info)
1346 {
1347  int ret = 0;
1348 
1349  BUG_ON(!info->bitmap && !info->bytes);
1350  ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1351  &info->offset_index, (info->bitmap != NULL));
1352  if (ret)
1353  return ret;
1354 
1355  ctl->free_space += info->bytes;
1356  ctl->free_extents++;
1357  return ret;
1358 }
1359 
1360 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1361 {
1363  u64 max_bytes;
1364  u64 bitmap_bytes;
1365  u64 extent_bytes;
1366  u64 size = block_group->key.offset;
1367  u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1368  int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1369 
1370  BUG_ON(ctl->total_bitmaps > max_bitmaps);
1371 
1372  /*
1373  * The goal is to keep the total amount of memory used per 1gb of space
1374  * at or below 32k, so we need to adjust how much memory we allow to be
1375  * used by extent based free space tracking
1376  */
1377  if (size < 1024 * 1024 * 1024)
1378  max_bytes = MAX_CACHE_BYTES_PER_GIG;
1379  else
1380  max_bytes = MAX_CACHE_BYTES_PER_GIG *
1381  div64_u64(size, 1024 * 1024 * 1024);
1382 
1383  /*
1384  * we want to account for 1 more bitmap than what we have so we can make
1385  * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1386  * we add more bitmaps.
1387  */
1388  bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
1389 
1390  if (bitmap_bytes >= max_bytes) {
1391  ctl->extents_thresh = 0;
1392  return;
1393  }
1394 
1395  /*
1396  * we want the extent entry threshold to always be at most 1/2 the maxw
1397  * bytes we can have, or whatever is less than that.
1398  */
1399  extent_bytes = max_bytes - bitmap_bytes;
1400  extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1401 
1402  ctl->extents_thresh =
1403  div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
1404 }
1405 
1406 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1407  struct btrfs_free_space *info,
1408  u64 offset, u64 bytes)
1409 {
1410  unsigned long start, count;
1411 
1412  start = offset_to_bit(info->offset, ctl->unit, offset);
1413  count = bytes_to_bits(bytes, ctl->unit);
1414  BUG_ON(start + count > BITS_PER_BITMAP);
1415 
1416  bitmap_clear(info->bitmap, start, count);
1417 
1418  info->bytes -= bytes;
1419 }
1420 
1421 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1422  struct btrfs_free_space *info, u64 offset,
1423  u64 bytes)
1424 {
1425  __bitmap_clear_bits(ctl, info, offset, bytes);
1426  ctl->free_space -= bytes;
1427 }
1428 
1429 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1430  struct btrfs_free_space *info, u64 offset,
1431  u64 bytes)
1432 {
1433  unsigned long start, count;
1434 
1435  start = offset_to_bit(info->offset, ctl->unit, offset);
1436  count = bytes_to_bits(bytes, ctl->unit);
1437  BUG_ON(start + count > BITS_PER_BITMAP);
1438 
1439  bitmap_set(info->bitmap, start, count);
1440 
1441  info->bytes += bytes;
1442  ctl->free_space += bytes;
1443 }
1444 
1445 static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1446  struct btrfs_free_space *bitmap_info, u64 *offset,
1447  u64 *bytes)
1448 {
1449  unsigned long found_bits = 0;
1450  unsigned long bits, i;
1451  unsigned long next_zero;
1452 
1453  i = offset_to_bit(bitmap_info->offset, ctl->unit,
1454  max_t(u64, *offset, bitmap_info->offset));
1455  bits = bytes_to_bits(*bytes, ctl->unit);
1456 
1457  for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) {
1458  next_zero = find_next_zero_bit(bitmap_info->bitmap,
1459  BITS_PER_BITMAP, i);
1460  if ((next_zero - i) >= bits) {
1461  found_bits = next_zero - i;
1462  break;
1463  }
1464  i = next_zero;
1465  }
1466 
1467  if (found_bits) {
1468  *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1469  *bytes = (u64)(found_bits) * ctl->unit;
1470  return 0;
1471  }
1472 
1473  return -1;
1474 }
1475 
1476 static struct btrfs_free_space *
1477 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
1478 {
1479  struct btrfs_free_space *entry;
1480  struct rb_node *node;
1481  int ret;
1482 
1483  if (!ctl->free_space_offset.rb_node)
1484  return NULL;
1485 
1486  entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1487  if (!entry)
1488  return NULL;
1489 
1490  for (node = &entry->offset_index; node; node = rb_next(node)) {
1491  entry = rb_entry(node, struct btrfs_free_space, offset_index);
1492  if (entry->bytes < *bytes)
1493  continue;
1494 
1495  if (entry->bitmap) {
1496  ret = search_bitmap(ctl, entry, offset, bytes);
1497  if (!ret)
1498  return entry;
1499  continue;
1500  }
1501 
1502  *offset = entry->offset;
1503  *bytes = entry->bytes;
1504  return entry;
1505  }
1506 
1507  return NULL;
1508 }
1509 
1510 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1511  struct btrfs_free_space *info, u64 offset)
1512 {
1513  info->offset = offset_to_bitmap(ctl, offset);
1514  info->bytes = 0;
1515  INIT_LIST_HEAD(&info->list);
1516  link_free_space(ctl, info);
1517  ctl->total_bitmaps++;
1518 
1519  ctl->op->recalc_thresholds(ctl);
1520 }
1521 
1522 static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1523  struct btrfs_free_space *bitmap_info)
1524 {
1525  unlink_free_space(ctl, bitmap_info);
1526  kfree(bitmap_info->bitmap);
1528  ctl->total_bitmaps--;
1529  ctl->op->recalc_thresholds(ctl);
1530 }
1531 
1532 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1533  struct btrfs_free_space *bitmap_info,
1534  u64 *offset, u64 *bytes)
1535 {
1536  u64 end;
1537  u64 search_start, search_bytes;
1538  int ret;
1539 
1540 again:
1541  end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1542 
1543  /*
1544  * We need to search for bits in this bitmap. We could only cover some
1545  * of the extent in this bitmap thanks to how we add space, so we need
1546  * to search for as much as it as we can and clear that amount, and then
1547  * go searching for the next bit.
1548  */
1549  search_start = *offset;
1550  search_bytes = ctl->unit;
1551  search_bytes = min(search_bytes, end - search_start + 1);
1552  ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1553  BUG_ON(ret < 0 || search_start != *offset);
1554 
1555  /* We may have found more bits than what we need */
1556  search_bytes = min(search_bytes, *bytes);
1557 
1558  /* Cannot clear past the end of the bitmap */
1559  search_bytes = min(search_bytes, end - search_start + 1);
1560 
1561  bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes);
1562  *offset += search_bytes;
1563  *bytes -= search_bytes;
1564 
1565  if (*bytes) {
1566  struct rb_node *next = rb_next(&bitmap_info->offset_index);
1567  if (!bitmap_info->bytes)
1568  free_bitmap(ctl, bitmap_info);
1569 
1570  /*
1571  * no entry after this bitmap, but we still have bytes to
1572  * remove, so something has gone wrong.
1573  */
1574  if (!next)
1575  return -EINVAL;
1576 
1577  bitmap_info = rb_entry(next, struct btrfs_free_space,
1578  offset_index);
1579 
1580  /*
1581  * if the next entry isn't a bitmap we need to return to let the
1582  * extent stuff do its work.
1583  */
1584  if (!bitmap_info->bitmap)
1585  return -EAGAIN;
1586 
1587  /*
1588  * Ok the next item is a bitmap, but it may not actually hold
1589  * the information for the rest of this free space stuff, so
1590  * look for it, and if we don't find it return so we can try
1591  * everything over again.
1592  */
1593  search_start = *offset;
1594  search_bytes = ctl->unit;
1595  ret = search_bitmap(ctl, bitmap_info, &search_start,
1596  &search_bytes);
1597  if (ret < 0 || search_start != *offset)
1598  return -EAGAIN;
1599 
1600  goto again;
1601  } else if (!bitmap_info->bytes)
1602  free_bitmap(ctl, bitmap_info);
1603 
1604  return 0;
1605 }
1606 
1607 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1608  struct btrfs_free_space *info, u64 offset,
1609  u64 bytes)
1610 {
1611  u64 bytes_to_set = 0;
1612  u64 end;
1613 
1614  end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1615 
1616  bytes_to_set = min(end - offset, bytes);
1617 
1618  bitmap_set_bits(ctl, info, offset, bytes_to_set);
1619 
1620  return bytes_to_set;
1621 
1622 }
1623 
1624 static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1625  struct btrfs_free_space *info)
1626 {
1627  struct btrfs_block_group_cache *block_group = ctl->private;
1628 
1629  /*
1630  * If we are below the extents threshold then we can add this as an
1631  * extent, and don't have to deal with the bitmap
1632  */
1633  if (ctl->free_extents < ctl->extents_thresh) {
1634  /*
1635  * If this block group has some small extents we don't want to
1636  * use up all of our free slots in the cache with them, we want
1637  * to reserve them to larger extents, however if we have plent
1638  * of cache left then go ahead an dadd them, no sense in adding
1639  * the overhead of a bitmap if we don't have to.
1640  */
1641  if (info->bytes <= block_group->sectorsize * 4) {
1642  if (ctl->free_extents * 2 <= ctl->extents_thresh)
1643  return false;
1644  } else {
1645  return false;
1646  }
1647  }
1648 
1649  /*
1650  * some block groups are so tiny they can't be enveloped by a bitmap, so
1651  * don't even bother to create a bitmap for this
1652  */
1653  if (BITS_PER_BITMAP * block_group->sectorsize >
1654  block_group->key.offset)
1655  return false;
1656 
1657  return true;
1658 }
1659 
1660 static struct btrfs_free_space_op free_space_op = {
1661  .recalc_thresholds = recalculate_thresholds,
1662  .use_bitmap = use_bitmap,
1663 };
1664 
1665 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1666  struct btrfs_free_space *info)
1667 {
1668  struct btrfs_free_space *bitmap_info;
1669  struct btrfs_block_group_cache *block_group = NULL;
1670  int added = 0;
1671  u64 bytes, offset, bytes_added;
1672  int ret;
1673 
1674  bytes = info->bytes;
1675  offset = info->offset;
1676 
1677  if (!ctl->op->use_bitmap(ctl, info))
1678  return 0;
1679 
1680  if (ctl->op == &free_space_op)
1681  block_group = ctl->private;
1682 again:
1683  /*
1684  * Since we link bitmaps right into the cluster we need to see if we
1685  * have a cluster here, and if so and it has our bitmap we need to add
1686  * the free space to that bitmap.
1687  */
1688  if (block_group && !list_empty(&block_group->cluster_list)) {
1689  struct btrfs_free_cluster *cluster;
1690  struct rb_node *node;
1691  struct btrfs_free_space *entry;
1692 
1693  cluster = list_entry(block_group->cluster_list.next,
1694  struct btrfs_free_cluster,
1695  block_group_list);
1696  spin_lock(&cluster->lock);
1697  node = rb_first(&cluster->root);
1698  if (!node) {
1699  spin_unlock(&cluster->lock);
1700  goto no_cluster_bitmap;
1701  }
1702 
1703  entry = rb_entry(node, struct btrfs_free_space, offset_index);
1704  if (!entry->bitmap) {
1705  spin_unlock(&cluster->lock);
1706  goto no_cluster_bitmap;
1707  }
1708 
1709  if (entry->offset == offset_to_bitmap(ctl, offset)) {
1710  bytes_added = add_bytes_to_bitmap(ctl, entry,
1711  offset, bytes);
1712  bytes -= bytes_added;
1713  offset += bytes_added;
1714  }
1715  spin_unlock(&cluster->lock);
1716  if (!bytes) {
1717  ret = 1;
1718  goto out;
1719  }
1720  }
1721 
1722 no_cluster_bitmap:
1723  bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1724  1, 0);
1725  if (!bitmap_info) {
1726  BUG_ON(added);
1727  goto new_bitmap;
1728  }
1729 
1730  bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1731  bytes -= bytes_added;
1732  offset += bytes_added;
1733  added = 0;
1734 
1735  if (!bytes) {
1736  ret = 1;
1737  goto out;
1738  } else
1739  goto again;
1740 
1741 new_bitmap:
1742  if (info && info->bitmap) {
1743  add_new_bitmap(ctl, info, offset);
1744  added = 1;
1745  info = NULL;
1746  goto again;
1747  } else {
1748  spin_unlock(&ctl->tree_lock);
1749 
1750  /* no pre-allocated info, allocate a new one */
1751  if (!info) {
1752  info = kmem_cache_zalloc(btrfs_free_space_cachep,
1753  GFP_NOFS);
1754  if (!info) {
1755  spin_lock(&ctl->tree_lock);
1756  ret = -ENOMEM;
1757  goto out;
1758  }
1759  }
1760 
1761  /* allocate the bitmap */
1762  info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
1763  spin_lock(&ctl->tree_lock);
1764  if (!info->bitmap) {
1765  ret = -ENOMEM;
1766  goto out;
1767  }
1768  goto again;
1769  }
1770 
1771 out:
1772  if (info) {
1773  if (info->bitmap)
1774  kfree(info->bitmap);
1776  }
1777 
1778  return ret;
1779 }
1780 
1781 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
1782  struct btrfs_free_space *info, bool update_stat)
1783 {
1784  struct btrfs_free_space *left_info;
1785  struct btrfs_free_space *right_info;
1786  bool merged = false;
1787  u64 offset = info->offset;
1788  u64 bytes = info->bytes;
1789 
1790  /*
1791  * first we want to see if there is free space adjacent to the range we
1792  * are adding, if there is remove that struct and add a new one to
1793  * cover the entire range
1794  */
1795  right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
1796  if (right_info && rb_prev(&right_info->offset_index))
1797  left_info = rb_entry(rb_prev(&right_info->offset_index),
1798  struct btrfs_free_space, offset_index);
1799  else
1800  left_info = tree_search_offset(ctl, offset - 1, 0, 0);
1801 
1802  if (right_info && !right_info->bitmap) {
1803  if (update_stat)
1804  unlink_free_space(ctl, right_info);
1805  else
1806  __unlink_free_space(ctl, right_info);
1807  info->bytes += right_info->bytes;
1809  merged = true;
1810  }
1811 
1812  if (left_info && !left_info->bitmap &&
1813  left_info->offset + left_info->bytes == offset) {
1814  if (update_stat)
1815  unlink_free_space(ctl, left_info);
1816  else
1817  __unlink_free_space(ctl, left_info);
1818  info->offset = left_info->offset;
1819  info->bytes += left_info->bytes;
1821  merged = true;
1822  }
1823 
1824  return merged;
1825 }
1826 
1828  u64 offset, u64 bytes)
1829 {
1830  struct btrfs_free_space *info;
1831  int ret = 0;
1832 
1833  info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
1834  if (!info)
1835  return -ENOMEM;
1836 
1837  info->offset = offset;
1838  info->bytes = bytes;
1839 
1840  spin_lock(&ctl->tree_lock);
1841 
1842  if (try_merge_free_space(ctl, info, true))
1843  goto link;
1844 
1845  /*
1846  * There was no extent directly to the left or right of this new
1847  * extent then we know we're going to have to allocate a new extent, so
1848  * before we do that see if we need to drop this into a bitmap
1849  */
1850  ret = insert_into_bitmap(ctl, info);
1851  if (ret < 0) {
1852  goto out;
1853  } else if (ret) {
1854  ret = 0;
1855  goto out;
1856  }
1857 link:
1858  ret = link_free_space(ctl, info);
1859  if (ret)
1861 out:
1862  spin_unlock(&ctl->tree_lock);
1863 
1864  if (ret) {
1865  printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
1866  BUG_ON(ret == -EEXIST);
1867  }
1868 
1869  return ret;
1870 }
1871 
1873  u64 offset, u64 bytes)
1874 {
1875  struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1876  struct btrfs_free_space *info;
1877  int ret = 0;
1878 
1879  spin_lock(&ctl->tree_lock);
1880 
1881 again:
1882  if (!bytes)
1883  goto out_lock;
1884 
1885  info = tree_search_offset(ctl, offset, 0, 0);
1886  if (!info) {
1887  /*
1888  * oops didn't find an extent that matched the space we wanted
1889  * to remove, look for a bitmap instead
1890  */
1891  info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1892  1, 0);
1893  if (!info) {
1894  /* the tree logging code might be calling us before we
1895  * have fully loaded the free space rbtree for this
1896  * block group. So it is possible the entry won't
1897  * be in the rbtree yet at all. The caching code
1898  * will make sure not to put it in the rbtree if
1899  * the logging code has pinned it.
1900  */
1901  goto out_lock;
1902  }
1903  }
1904 
1905  if (!info->bitmap) {
1906  unlink_free_space(ctl, info);
1907  if (offset == info->offset) {
1908  u64 to_free = min(bytes, info->bytes);
1909 
1910  info->bytes -= to_free;
1911  info->offset += to_free;
1912  if (info->bytes) {
1913  ret = link_free_space(ctl, info);
1914  WARN_ON(ret);
1915  } else {
1917  }
1918 
1919  offset += to_free;
1920  bytes -= to_free;
1921  goto again;
1922  } else {
1923  u64 old_end = info->bytes + info->offset;
1924 
1925  info->bytes = offset - info->offset;
1926  ret = link_free_space(ctl, info);
1927  WARN_ON(ret);
1928  if (ret)
1929  goto out_lock;
1930 
1931  /* Not enough bytes in this entry to satisfy us */
1932  if (old_end < offset + bytes) {
1933  bytes -= old_end - offset;
1934  offset = old_end;
1935  goto again;
1936  } else if (old_end == offset + bytes) {
1937  /* all done */
1938  goto out_lock;
1939  }
1940  spin_unlock(&ctl->tree_lock);
1941 
1942  ret = btrfs_add_free_space(block_group, offset + bytes,
1943  old_end - (offset + bytes));
1944  WARN_ON(ret);
1945  goto out;
1946  }
1947  }
1948 
1949  ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1950  if (ret == -EAGAIN)
1951  goto again;
1952  BUG_ON(ret); /* logic error */
1953 out_lock:
1954  spin_unlock(&ctl->tree_lock);
1955 out:
1956  return ret;
1957 }
1958 
1960  u64 bytes)
1961 {
1962  struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1963  struct btrfs_free_space *info;
1964  struct rb_node *n;
1965  int count = 0;
1966 
1967  for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
1968  info = rb_entry(n, struct btrfs_free_space, offset_index);
1969  if (info->bytes >= bytes && !block_group->ro)
1970  count++;
1971  printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
1972  (unsigned long long)info->offset,
1973  (unsigned long long)info->bytes,
1974  (info->bitmap) ? "yes" : "no");
1975  }
1976  printk(KERN_INFO "block group has cluster?: %s\n",
1977  list_empty(&block_group->cluster_list) ? "no" : "yes");
1978  printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
1979  "\n", count);
1980 }
1981 
1983 {
1984  struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1985 
1986  spin_lock_init(&ctl->tree_lock);
1987  ctl->unit = block_group->sectorsize;
1988  ctl->start = block_group->key.objectid;
1989  ctl->private = block_group;
1990  ctl->op = &free_space_op;
1991 
1992  /*
1993  * we only want to have 32k of ram per block group for keeping
1994  * track of free space, and if we pass 1/2 of that we want to
1995  * start converting things over to using bitmaps
1996  */
1997  ctl->extents_thresh = ((1024 * 32) / 2) /
1998  sizeof(struct btrfs_free_space);
1999 }
2000 
2001 /*
2002  * for a given cluster, put all of its extents back into the free
2003  * space cache. If the block group passed doesn't match the block group
2004  * pointed to by the cluster, someone else raced in and freed the
2005  * cluster already. In that case, we just return without changing anything
2006  */
2007 static int
2008 __btrfs_return_cluster_to_free_space(
2009  struct btrfs_block_group_cache *block_group,
2010  struct btrfs_free_cluster *cluster)
2011 {
2012  struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2013  struct btrfs_free_space *entry;
2014  struct rb_node *node;
2015 
2016  spin_lock(&cluster->lock);
2017  if (cluster->block_group != block_group)
2018  goto out;
2019 
2020  cluster->block_group = NULL;
2021  cluster->window_start = 0;
2022  list_del_init(&cluster->block_group_list);
2023 
2024  node = rb_first(&cluster->root);
2025  while (node) {
2026  bool bitmap;
2027 
2028  entry = rb_entry(node, struct btrfs_free_space, offset_index);
2029  node = rb_next(&entry->offset_index);
2030  rb_erase(&entry->offset_index, &cluster->root);
2031 
2032  bitmap = (entry->bitmap != NULL);
2033  if (!bitmap)
2034  try_merge_free_space(ctl, entry, false);
2035  tree_insert_offset(&ctl->free_space_offset,
2036  entry->offset, &entry->offset_index, bitmap);
2037  }
2038  cluster->root = RB_ROOT;
2039 
2040 out:
2041  spin_unlock(&cluster->lock);
2042  btrfs_put_block_group(block_group);
2043  return 0;
2044 }
2045 
2047 {
2048  struct btrfs_free_space *info;
2049  struct rb_node *node;
2050 
2051  while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
2052  info = rb_entry(node, struct btrfs_free_space, offset_index);
2053  if (!info->bitmap) {
2054  unlink_free_space(ctl, info);
2056  } else {
2057  free_bitmap(ctl, info);
2058  }
2059  if (need_resched()) {
2060  spin_unlock(&ctl->tree_lock);
2061  cond_resched();
2062  spin_lock(&ctl->tree_lock);
2063  }
2064  }
2065 }
2066 
2068 {
2069  spin_lock(&ctl->tree_lock);
2071  spin_unlock(&ctl->tree_lock);
2072 }
2073 
2075 {
2076  struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2077  struct btrfs_free_cluster *cluster;
2078  struct list_head *head;
2079 
2080  spin_lock(&ctl->tree_lock);
2081  while ((head = block_group->cluster_list.next) !=
2082  &block_group->cluster_list) {
2083  cluster = list_entry(head, struct btrfs_free_cluster,
2084  block_group_list);
2085 
2086  WARN_ON(cluster->block_group != block_group);
2087  __btrfs_return_cluster_to_free_space(block_group, cluster);
2088  if (need_resched()) {
2089  spin_unlock(&ctl->tree_lock);
2090  cond_resched();
2091  spin_lock(&ctl->tree_lock);
2092  }
2093  }
2095  spin_unlock(&ctl->tree_lock);
2096 
2097 }
2098 
2100  u64 offset, u64 bytes, u64 empty_size)
2101 {
2102  struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2103  struct btrfs_free_space *entry = NULL;
2104  u64 bytes_search = bytes + empty_size;
2105  u64 ret = 0;
2106 
2107  spin_lock(&ctl->tree_lock);
2108  entry = find_free_space(ctl, &offset, &bytes_search);
2109  if (!entry)
2110  goto out;
2111 
2112  ret = offset;
2113  if (entry->bitmap) {
2114  bitmap_clear_bits(ctl, entry, offset, bytes);
2115  if (!entry->bytes)
2116  free_bitmap(ctl, entry);
2117  } else {
2118  unlink_free_space(ctl, entry);
2119  entry->offset += bytes;
2120  entry->bytes -= bytes;
2121  if (!entry->bytes)
2123  else
2124  link_free_space(ctl, entry);
2125  }
2126 
2127 out:
2128  spin_unlock(&ctl->tree_lock);
2129 
2130  return ret;
2131 }
2132 
2133 /*
2134  * given a cluster, put all of its extents back into the free space
2135  * cache. If a block group is passed, this function will only free
2136  * a cluster that belongs to the passed block group.
2137  *
2138  * Otherwise, it'll get a reference on the block group pointed to by the
2139  * cluster and remove the cluster from it.
2140  */
2142  struct btrfs_block_group_cache *block_group,
2143  struct btrfs_free_cluster *cluster)
2144 {
2145  struct btrfs_free_space_ctl *ctl;
2146  int ret;
2147 
2148  /* first, get a safe pointer to the block group */
2149  spin_lock(&cluster->lock);
2150  if (!block_group) {
2151  block_group = cluster->block_group;
2152  if (!block_group) {
2153  spin_unlock(&cluster->lock);
2154  return 0;
2155  }
2156  } else if (cluster->block_group != block_group) {
2157  /* someone else has already freed it don't redo their work */
2158  spin_unlock(&cluster->lock);
2159  return 0;
2160  }
2161  atomic_inc(&block_group->count);
2162  spin_unlock(&cluster->lock);
2163 
2164  ctl = block_group->free_space_ctl;
2165 
2166  /* now return any extents the cluster had on it */
2167  spin_lock(&ctl->tree_lock);
2168  ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
2169  spin_unlock(&ctl->tree_lock);
2170 
2171  /* finally drop our ref */
2172  btrfs_put_block_group(block_group);
2173  return ret;
2174 }
2175 
2176 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2177  struct btrfs_free_cluster *cluster,
2178  struct btrfs_free_space *entry,
2179  u64 bytes, u64 min_start)
2180 {
2181  struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2182  int err;
2183  u64 search_start = cluster->window_start;
2184  u64 search_bytes = bytes;
2185  u64 ret = 0;
2186 
2187  search_start = min_start;
2188  search_bytes = bytes;
2189 
2190  err = search_bitmap(ctl, entry, &search_start, &search_bytes);
2191  if (err)
2192  return 0;
2193 
2194  ret = search_start;
2195  __bitmap_clear_bits(ctl, entry, ret, bytes);
2196 
2197  return ret;
2198 }
2199 
2200 /*
2201  * given a cluster, try to allocate 'bytes' from it, returns 0
2202  * if it couldn't find anything suitably large, or a logical disk offset
2203  * if things worked out
2204  */
2206  struct btrfs_free_cluster *cluster, u64 bytes,
2207  u64 min_start)
2208 {
2209  struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2210  struct btrfs_free_space *entry = NULL;
2211  struct rb_node *node;
2212  u64 ret = 0;
2213 
2214  spin_lock(&cluster->lock);
2215  if (bytes > cluster->max_size)
2216  goto out;
2217 
2218  if (cluster->block_group != block_group)
2219  goto out;
2220 
2221  node = rb_first(&cluster->root);
2222  if (!node)
2223  goto out;
2224 
2225  entry = rb_entry(node, struct btrfs_free_space, offset_index);
2226  while(1) {
2227  if (entry->bytes < bytes ||
2228  (!entry->bitmap && entry->offset < min_start)) {
2229  node = rb_next(&entry->offset_index);
2230  if (!node)
2231  break;
2232  entry = rb_entry(node, struct btrfs_free_space,
2233  offset_index);
2234  continue;
2235  }
2236 
2237  if (entry->bitmap) {
2238  ret = btrfs_alloc_from_bitmap(block_group,
2239  cluster, entry, bytes,
2240  cluster->window_start);
2241  if (ret == 0) {
2242  node = rb_next(&entry->offset_index);
2243  if (!node)
2244  break;
2245  entry = rb_entry(node, struct btrfs_free_space,
2246  offset_index);
2247  continue;
2248  }
2249  cluster->window_start += bytes;
2250  } else {
2251  ret = entry->offset;
2252 
2253  entry->offset += bytes;
2254  entry->bytes -= bytes;
2255  }
2256 
2257  if (entry->bytes == 0)
2258  rb_erase(&entry->offset_index, &cluster->root);
2259  break;
2260  }
2261 out:
2262  spin_unlock(&cluster->lock);
2263 
2264  if (!ret)
2265  return 0;
2266 
2267  spin_lock(&ctl->tree_lock);
2268 
2269  ctl->free_space -= bytes;
2270  if (entry->bytes == 0) {
2271  ctl->free_extents--;
2272  if (entry->bitmap) {
2273  kfree(entry->bitmap);
2274  ctl->total_bitmaps--;
2275  ctl->op->recalc_thresholds(ctl);
2276  }
2278  }
2279 
2280  spin_unlock(&ctl->tree_lock);
2281 
2282  return ret;
2283 }
2284 
2285 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
2286  struct btrfs_free_space *entry,
2287  struct btrfs_free_cluster *cluster,
2288  u64 offset, u64 bytes,
2289  u64 cont1_bytes, u64 min_bytes)
2290 {
2291  struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2292  unsigned long next_zero;
2293  unsigned long i;
2294  unsigned long want_bits;
2295  unsigned long min_bits;
2296  unsigned long found_bits;
2297  unsigned long start = 0;
2298  unsigned long total_found = 0;
2299  int ret;
2300 
2301  i = offset_to_bit(entry->offset, block_group->sectorsize,
2302  max_t(u64, offset, entry->offset));
2303  want_bits = bytes_to_bits(bytes, block_group->sectorsize);
2304  min_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
2305 
2306 again:
2307  found_bits = 0;
2309  next_zero = find_next_zero_bit(entry->bitmap,
2310  BITS_PER_BITMAP, i);
2311  if (next_zero - i >= min_bits) {
2312  found_bits = next_zero - i;
2313  break;
2314  }
2315  i = next_zero;
2316  }
2317 
2318  if (!found_bits)
2319  return -ENOSPC;
2320 
2321  if (!total_found) {
2322  start = i;
2323  cluster->max_size = 0;
2324  }
2325 
2326  total_found += found_bits;
2327 
2328  if (cluster->max_size < found_bits * block_group->sectorsize)
2329  cluster->max_size = found_bits * block_group->sectorsize;
2330 
2331  if (total_found < want_bits || cluster->max_size < cont1_bytes) {
2332  i = next_zero + 1;
2333  goto again;
2334  }
2335 
2336  cluster->window_start = start * block_group->sectorsize +
2337  entry->offset;
2338  rb_erase(&entry->offset_index, &ctl->free_space_offset);
2339  ret = tree_insert_offset(&cluster->root, entry->offset,
2340  &entry->offset_index, 1);
2341  BUG_ON(ret); /* -EEXIST; Logic error */
2342 
2343  trace_btrfs_setup_cluster(block_group, cluster,
2344  total_found * block_group->sectorsize, 1);
2345  return 0;
2346 }
2347 
2348 /*
2349  * This searches the block group for just extents to fill the cluster with.
2350  * Try to find a cluster with at least bytes total bytes, at least one
2351  * extent of cont1_bytes, and other clusters of at least min_bytes.
2352  */
2353 static noinline int
2354 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2355  struct btrfs_free_cluster *cluster,
2356  struct list_head *bitmaps, u64 offset, u64 bytes,
2357  u64 cont1_bytes, u64 min_bytes)
2358 {
2359  struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2360  struct btrfs_free_space *first = NULL;
2361  struct btrfs_free_space *entry = NULL;
2362  struct btrfs_free_space *last;
2363  struct rb_node *node;
2364  u64 window_start;
2365  u64 window_free;
2366  u64 max_extent;
2367  u64 total_size = 0;
2368 
2369  entry = tree_search_offset(ctl, offset, 0, 1);
2370  if (!entry)
2371  return -ENOSPC;
2372 
2373  /*
2374  * We don't want bitmaps, so just move along until we find a normal
2375  * extent entry.
2376  */
2377  while (entry->bitmap || entry->bytes < min_bytes) {
2378  if (entry->bitmap && list_empty(&entry->list))
2379  list_add_tail(&entry->list, bitmaps);
2380  node = rb_next(&entry->offset_index);
2381  if (!node)
2382  return -ENOSPC;
2383  entry = rb_entry(node, struct btrfs_free_space, offset_index);
2384  }
2385 
2386  window_start = entry->offset;
2387  window_free = entry->bytes;
2388  max_extent = entry->bytes;
2389  first = entry;
2390  last = entry;
2391 
2392  for (node = rb_next(&entry->offset_index); node;
2393  node = rb_next(&entry->offset_index)) {
2394  entry = rb_entry(node, struct btrfs_free_space, offset_index);
2395 
2396  if (entry->bitmap) {
2397  if (list_empty(&entry->list))
2398  list_add_tail(&entry->list, bitmaps);
2399  continue;
2400  }
2401 
2402  if (entry->bytes < min_bytes)
2403  continue;
2404 
2405  last = entry;
2406  window_free += entry->bytes;
2407  if (entry->bytes > max_extent)
2408  max_extent = entry->bytes;
2409  }
2410 
2411  if (window_free < bytes || max_extent < cont1_bytes)
2412  return -ENOSPC;
2413 
2414  cluster->window_start = first->offset;
2415 
2416  node = &first->offset_index;
2417 
2418  /*
2419  * now we've found our entries, pull them out of the free space
2420  * cache and put them into the cluster rbtree
2421  */
2422  do {
2423  int ret;
2424 
2425  entry = rb_entry(node, struct btrfs_free_space, offset_index);
2426  node = rb_next(&entry->offset_index);
2427  if (entry->bitmap || entry->bytes < min_bytes)
2428  continue;
2429 
2430  rb_erase(&entry->offset_index, &ctl->free_space_offset);
2431  ret = tree_insert_offset(&cluster->root, entry->offset,
2432  &entry->offset_index, 0);
2433  total_size += entry->bytes;
2434  BUG_ON(ret); /* -EEXIST; Logic error */
2435  } while (node && entry != last);
2436 
2437  cluster->max_size = max_extent;
2438  trace_btrfs_setup_cluster(block_group, cluster, total_size, 0);
2439  return 0;
2440 }
2441 
2442 /*
2443  * This specifically looks for bitmaps that may work in the cluster, we assume
2444  * that we have already failed to find extents that will work.
2445  */
2446 static noinline int
2447 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2448  struct btrfs_free_cluster *cluster,
2449  struct list_head *bitmaps, u64 offset, u64 bytes,
2450  u64 cont1_bytes, u64 min_bytes)
2451 {
2452  struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2453  struct btrfs_free_space *entry;
2454  int ret = -ENOSPC;
2455  u64 bitmap_offset = offset_to_bitmap(ctl, offset);
2456 
2457  if (ctl->total_bitmaps == 0)
2458  return -ENOSPC;
2459 
2460  /*
2461  * The bitmap that covers offset won't be in the list unless offset
2462  * is just its start offset.
2463  */
2464  entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
2465  if (entry->offset != bitmap_offset) {
2466  entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
2467  if (entry && list_empty(&entry->list))
2468  list_add(&entry->list, bitmaps);
2469  }
2470 
2471  list_for_each_entry(entry, bitmaps, list) {
2472  if (entry->bytes < bytes)
2473  continue;
2474  ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2475  bytes, cont1_bytes, min_bytes);
2476  if (!ret)
2477  return 0;
2478  }
2479 
2480  /*
2481  * The bitmaps list has all the bitmaps that record free space
2482  * starting after offset, so no more search is required.
2483  */
2484  return -ENOSPC;
2485 }
2486 
2487 /*
2488  * here we try to find a cluster of blocks in a block group. The goal
2489  * is to find at least bytes+empty_size.
2490  * We might not find them all in one contiguous area.
2491  *
2492  * returns zero and sets up cluster if things worked out, otherwise
2493  * it returns -enospc
2494  */
2496  struct btrfs_root *root,
2497  struct btrfs_block_group_cache *block_group,
2498  struct btrfs_free_cluster *cluster,
2499  u64 offset, u64 bytes, u64 empty_size)
2500 {
2501  struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2502  struct btrfs_free_space *entry, *tmp;
2503  LIST_HEAD(bitmaps);
2504  u64 min_bytes;
2505  u64 cont1_bytes;
2506  int ret;
2507 
2508  /*
2509  * Choose the minimum extent size we'll require for this
2510  * cluster. For SSD_SPREAD, don't allow any fragmentation.
2511  * For metadata, allow allocates with smaller extents. For
2512  * data, keep it dense.
2513  */
2514  if (btrfs_test_opt(root, SSD_SPREAD)) {
2515  cont1_bytes = min_bytes = bytes + empty_size;
2516  } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
2517  cont1_bytes = bytes;
2518  min_bytes = block_group->sectorsize;
2519  } else {
2520  cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
2521  min_bytes = block_group->sectorsize;
2522  }
2523 
2524  spin_lock(&ctl->tree_lock);
2525 
2526  /*
2527  * If we know we don't have enough space to make a cluster don't even
2528  * bother doing all the work to try and find one.
2529  */
2530  if (ctl->free_space < bytes) {
2531  spin_unlock(&ctl->tree_lock);
2532  return -ENOSPC;
2533  }
2534 
2535  spin_lock(&cluster->lock);
2536 
2537  /* someone already found a cluster, hooray */
2538  if (cluster->block_group) {
2539  ret = 0;
2540  goto out;
2541  }
2542 
2543  trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
2544  min_bytes);
2545 
2546  INIT_LIST_HEAD(&bitmaps);
2547  ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2548  bytes + empty_size,
2549  cont1_bytes, min_bytes);
2550  if (ret)
2551  ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
2552  offset, bytes + empty_size,
2553  cont1_bytes, min_bytes);
2554 
2555  /* Clear our temporary list */
2556  list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2557  list_del_init(&entry->list);
2558 
2559  if (!ret) {
2560  atomic_inc(&block_group->count);
2561  list_add_tail(&cluster->block_group_list,
2562  &block_group->cluster_list);
2563  cluster->block_group = block_group;
2564  } else {
2565  trace_btrfs_failed_cluster_setup(block_group);
2566  }
2567 out:
2568  spin_unlock(&cluster->lock);
2569  spin_unlock(&ctl->tree_lock);
2570 
2571  return ret;
2572 }
2573 
2574 /*
2575  * simple code to zero out a cluster
2576  */
2578 {
2579  spin_lock_init(&cluster->lock);
2580  spin_lock_init(&cluster->refill_lock);
2581  cluster->root = RB_ROOT;
2582  cluster->max_size = 0;
2583  INIT_LIST_HEAD(&cluster->block_group_list);
2584  cluster->block_group = NULL;
2585 }
2586 
2587 static int do_trimming(struct btrfs_block_group_cache *block_group,
2588  u64 *total_trimmed, u64 start, u64 bytes,
2589  u64 reserved_start, u64 reserved_bytes)
2590 {
2591  struct btrfs_space_info *space_info = block_group->space_info;
2592  struct btrfs_fs_info *fs_info = block_group->fs_info;
2593  int ret;
2594  int update = 0;
2595  u64 trimmed = 0;
2596 
2597  spin_lock(&space_info->lock);
2598  spin_lock(&block_group->lock);
2599  if (!block_group->ro) {
2600  block_group->reserved += reserved_bytes;
2601  space_info->bytes_reserved += reserved_bytes;
2602  update = 1;
2603  }
2604  spin_unlock(&block_group->lock);
2605  spin_unlock(&space_info->lock);
2606 
2607  ret = btrfs_error_discard_extent(fs_info->extent_root,
2608  start, bytes, &trimmed);
2609  if (!ret)
2610  *total_trimmed += trimmed;
2611 
2612  btrfs_add_free_space(block_group, reserved_start, reserved_bytes);
2613 
2614  if (update) {
2615  spin_lock(&space_info->lock);
2616  spin_lock(&block_group->lock);
2617  if (block_group->ro)
2618  space_info->bytes_readonly += reserved_bytes;
2619  block_group->reserved -= reserved_bytes;
2620  space_info->bytes_reserved -= reserved_bytes;
2621  spin_unlock(&space_info->lock);
2622  spin_unlock(&block_group->lock);
2623  }
2624 
2625  return ret;
2626 }
2627 
2628 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
2629  u64 *total_trimmed, u64 start, u64 end, u64 minlen)
2630 {
2631  struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2632  struct btrfs_free_space *entry;
2633  struct rb_node *node;
2634  int ret = 0;
2635  u64 extent_start;
2636  u64 extent_bytes;
2637  u64 bytes;
2638 
2639  while (start < end) {
2640  spin_lock(&ctl->tree_lock);
2641 
2642  if (ctl->free_space < minlen) {
2643  spin_unlock(&ctl->tree_lock);
2644  break;
2645  }
2646 
2647  entry = tree_search_offset(ctl, start, 0, 1);
2648  if (!entry) {
2649  spin_unlock(&ctl->tree_lock);
2650  break;
2651  }
2652 
2653  /* skip bitmaps */
2654  while (entry->bitmap) {
2655  node = rb_next(&entry->offset_index);
2656  if (!node) {
2657  spin_unlock(&ctl->tree_lock);
2658  goto out;
2659  }
2660  entry = rb_entry(node, struct btrfs_free_space,
2661  offset_index);
2662  }
2663 
2664  if (entry->offset >= end) {
2665  spin_unlock(&ctl->tree_lock);
2666  break;
2667  }
2668 
2669  extent_start = entry->offset;
2670  extent_bytes = entry->bytes;
2671  start = max(start, extent_start);
2672  bytes = min(extent_start + extent_bytes, end) - start;
2673  if (bytes < minlen) {
2674  spin_unlock(&ctl->tree_lock);
2675  goto next;
2676  }
2677 
2678  unlink_free_space(ctl, entry);
2680 
2681  spin_unlock(&ctl->tree_lock);
2682 
2683  ret = do_trimming(block_group, total_trimmed, start, bytes,
2684  extent_start, extent_bytes);
2685  if (ret)
2686  break;
2687 next:
2688  start += bytes;
2689 
2690  if (fatal_signal_pending(current)) {
2691  ret = -ERESTARTSYS;
2692  break;
2693  }
2694 
2695  cond_resched();
2696  }
2697 out:
2698  return ret;
2699 }
2700 
2701 static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
2702  u64 *total_trimmed, u64 start, u64 end, u64 minlen)
2703 {
2704  struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2705  struct btrfs_free_space *entry;
2706  int ret = 0;
2707  int ret2;
2708  u64 bytes;
2709  u64 offset = offset_to_bitmap(ctl, start);
2710 
2711  while (offset < end) {
2712  bool next_bitmap = false;
2713 
2714  spin_lock(&ctl->tree_lock);
2715 
2716  if (ctl->free_space < minlen) {
2717  spin_unlock(&ctl->tree_lock);
2718  break;
2719  }
2720 
2721  entry = tree_search_offset(ctl, offset, 1, 0);
2722  if (!entry) {
2723  spin_unlock(&ctl->tree_lock);
2724  next_bitmap = true;
2725  goto next;
2726  }
2727 
2728  bytes = minlen;
2729  ret2 = search_bitmap(ctl, entry, &start, &bytes);
2730  if (ret2 || start >= end) {
2731  spin_unlock(&ctl->tree_lock);
2732  next_bitmap = true;
2733  goto next;
2734  }
2735 
2736  bytes = min(bytes, end - start);
2737  if (bytes < minlen) {
2738  spin_unlock(&ctl->tree_lock);
2739  goto next;
2740  }
2741 
2742  bitmap_clear_bits(ctl, entry, start, bytes);
2743  if (entry->bytes == 0)
2744  free_bitmap(ctl, entry);
2745 
2746  spin_unlock(&ctl->tree_lock);
2747 
2748  ret = do_trimming(block_group, total_trimmed, start, bytes,
2749  start, bytes);
2750  if (ret)
2751  break;
2752 next:
2753  if (next_bitmap) {
2754  offset += BITS_PER_BITMAP * ctl->unit;
2755  } else {
2756  start += bytes;
2757  if (start >= offset + BITS_PER_BITMAP * ctl->unit)
2758  offset += BITS_PER_BITMAP * ctl->unit;
2759  }
2760 
2761  if (fatal_signal_pending(current)) {
2762  ret = -ERESTARTSYS;
2763  break;
2764  }
2765 
2766  cond_resched();
2767  }
2768 
2769  return ret;
2770 }
2771 
2773  u64 *trimmed, u64 start, u64 end, u64 minlen)
2774 {
2775  int ret;
2776 
2777  *trimmed = 0;
2778 
2779  ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
2780  if (ret)
2781  return ret;
2782 
2783  ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
2784 
2785  return ret;
2786 }
2787 
2788 /*
2789  * Find the left-most item in the cache tree, and then return the
2790  * smallest inode number in the item.
2791  *
2792  * Note: the returned inode number may not be the smallest one in
2793  * the tree, if the left-most item is a bitmap.
2794  */
2796 {
2797  struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
2798  struct btrfs_free_space *entry = NULL;
2799  u64 ino = 0;
2800 
2801  spin_lock(&ctl->tree_lock);
2802 
2803  if (RB_EMPTY_ROOT(&ctl->free_space_offset))
2804  goto out;
2805 
2806  entry = rb_entry(rb_first(&ctl->free_space_offset),
2807  struct btrfs_free_space, offset_index);
2808 
2809  if (!entry->bitmap) {
2810  ino = entry->offset;
2811 
2812  unlink_free_space(ctl, entry);
2813  entry->offset++;
2814  entry->bytes--;
2815  if (!entry->bytes)
2817  else
2818  link_free_space(ctl, entry);
2819  } else {
2820  u64 offset = 0;
2821  u64 count = 1;
2822  int ret;
2823 
2824  ret = search_bitmap(ctl, entry, &offset, &count);
2825  /* Logic error; Should be empty if it can't find anything */
2826  BUG_ON(ret);
2827 
2828  ino = offset;
2829  bitmap_clear_bits(ctl, entry, offset, 1);
2830  if (entry->bytes == 0)
2831  free_bitmap(ctl, entry);
2832  }
2833 out:
2834  spin_unlock(&ctl->tree_lock);
2835 
2836  return ino;
2837 }
2838 
2839 struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2840  struct btrfs_path *path)
2841 {
2842  struct inode *inode = NULL;
2843 
2844  spin_lock(&root->cache_lock);
2845  if (root->cache_inode)
2846  inode = igrab(root->cache_inode);
2847  spin_unlock(&root->cache_lock);
2848  if (inode)
2849  return inode;
2850 
2851  inode = __lookup_free_space_inode(root, path, 0);
2852  if (IS_ERR(inode))
2853  return inode;
2854 
2855  spin_lock(&root->cache_lock);
2856  if (!btrfs_fs_closing(root->fs_info))
2857  root->cache_inode = igrab(inode);
2858  spin_unlock(&root->cache_lock);
2859 
2860  return inode;
2861 }
2862 
2864  struct btrfs_trans_handle *trans,
2865  struct btrfs_path *path)
2866 {
2867  return __create_free_space_inode(root, trans, path,
2869 }
2870 
2871 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2872 {
2873  struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2874  struct btrfs_path *path;
2875  struct inode *inode;
2876  int ret = 0;
2877  u64 root_gen = btrfs_root_generation(&root->root_item);
2878 
2879  if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2880  return 0;
2881 
2882  /*
2883  * If we're unmounting then just return, since this does a search on the
2884  * normal root and not the commit root and we could deadlock.
2885  */
2886  if (btrfs_fs_closing(fs_info))
2887  return 0;
2888 
2889  path = btrfs_alloc_path();
2890  if (!path)
2891  return 0;
2892 
2893  inode = lookup_free_ino_inode(root, path);
2894  if (IS_ERR(inode))
2895  goto out;
2896 
2897  if (root_gen != BTRFS_I(inode)->generation)
2898  goto out_put;
2899 
2900  ret = __load_free_space_cache(root, inode, ctl, path, 0);
2901 
2902  if (ret < 0)
2903  printk(KERN_ERR "btrfs: failed to load free ino cache for "
2904  "root %llu\n", root->root_key.objectid);
2905 out_put:
2906  iput(inode);
2907 out:
2908  btrfs_free_path(path);
2909  return ret;
2910 }
2911 
2913  struct btrfs_trans_handle *trans,
2914  struct btrfs_path *path)
2915 {
2916  struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2917  struct inode *inode;
2918  int ret;
2919 
2920  if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2921  return 0;
2922 
2923  inode = lookup_free_ino_inode(root, path);
2924  if (IS_ERR(inode))
2925  return 0;
2926 
2927  ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
2928  if (ret) {
2929  btrfs_delalloc_release_metadata(inode, inode->i_size);
2930 #ifdef DEBUG
2931  printk(KERN_ERR "btrfs: failed to write free ino cache "
2932  "for root %llu\n", root->root_key.objectid);
2933 #endif
2934  }
2935 
2936  iput(inode);
2937  return ret;
2938 }