1 #include <linux/bitops.h>
2 #include <linux/slab.h>
7 #include <linux/module.h>
13 #include <linux/prefetch.h>
36 #define BUFFER_LRU_MAX 64
63 return btrfs_sb(tree->
mapping->host->i_sb);
71 if (!extent_state_cache)
77 if (!extent_buffer_cache)
78 goto free_state_cache;
91 while (!list_empty(&states)) {
94 "state %lu in tree %p refs %d\n",
95 (
unsigned long long)state->
start,
96 (
unsigned long long)state->
end,
103 while (!list_empty(&buffers)) {
106 "refs %d\n", (
unsigned long long)eb->
start,
117 if (extent_state_cache)
119 if (extent_buffer_cache)
151 spin_unlock_irqrestore(&leak_lock, flags);
155 trace_alloc_extent_state(state, mask,
_RET_IP_);
171 spin_unlock_irqrestore(&leak_lock, flags);
173 trace_free_extent_state(state,
_RET_IP_);
191 else if (offset > entry->
end)
197 rb_link_node(node, parent, p);
220 else if (offset > entry->
end)
228 while (prev && offset > prev_entry->
end) {
253 ret = __etree_search(tree, offset, &prev,
NULL);
262 if (tree->
ops && tree->
ops->merge_extent_hook)
263 tree->
ops->merge_extent_hook(tree->
mapping->host,
new,
288 if (other->
end == state->
start - 1 &&
290 merge_cb(tree, state, other);
300 if (other->
start == state->
end + 1 &&
302 merge_cb(tree, state, other);
314 if (tree->
ops && tree->
ops->set_bit_hook)
315 tree->
ops->set_bit_hook(tree->
mapping->host, state, bits);
321 if (tree->
ops && tree->
ops->clear_bit_hook)
322 tree->
ops->clear_bit_hook(tree->
mapping->host, state, bits);
346 (
unsigned long long)end,
347 (
unsigned long long)start);
353 set_state_bits(tree, state, bits);
360 "%llu %llu\n", (
unsigned long long)found->
start,
361 (
unsigned long long)found->
end,
362 (
unsigned long long)start, (
unsigned long long)end);
366 merge_state(tree, state);
373 if (tree->
ops && tree->
ops->split_extent_hook)
374 tree->
ops->split_extent_hook(tree->
mapping->host, orig, split);
396 split_cb(tree, orig, split);
399 prealloc->
end = split - 1;
440 clear_state_cb(tree, state, bits);
441 state->
state &= ~bits_to_clear;
444 if (state->
state == 0) {
454 merge_state(tree, state);
461 alloc_extent_state_atomic(
struct extent_state *prealloc)
471 btrfs_panic(tree_fs_info(tree), err,
"Locking error: "
472 "Extent tree was modified by another "
473 "thread while locked.");
489 int bits,
int wake,
int delete,
509 prealloc = alloc_extent_state(mask);
514 spin_lock(&tree->
lock);
516 cached = *cached_state;
519 *cached_state =
NULL;
523 if (cached && cached->
tree && cached->
start <= start &&
524 cached->
end > start) {
537 node = tree_search(tree, start);
542 if (state->
start > end)
545 last_end = state->
end;
548 if (!(state->
state & bits)) {
569 if (state->
start < start) {
570 prealloc = alloc_extent_state_atomic(prealloc);
579 if (state->
end <= end) {
580 state = clear_state_bit(tree, state, &bits, wake);
591 if (state->
start <= end && state->
end > end) {
592 prealloc = alloc_extent_state_atomic(prealloc);
601 clear_state_bit(tree, prealloc, &bits, wake);
607 state = clear_state_bit(tree, state, &bits, wake);
609 if (last_end == (
u64)-1)
611 start = last_end + 1;
612 if (start <= end && state && !need_resched())
617 spin_unlock(&tree->
lock);
626 spin_unlock(&tree->
lock);
627 if (mask & __GFP_WAIT)
639 spin_unlock(&tree->
lock);
641 spin_lock(&tree->
lock);
655 spin_lock(&tree->
lock);
662 node = tree_search(tree, start);
668 if (state->
start > end)
671 if (state->
state & bits) {
672 start = state->
start;
674 wait_on_state(tree, state);
678 start = state->
end + 1;
686 spin_unlock(&tree->
lock);
695 set_state_cb(tree, state, bits);
696 if ((bits_to_set & EXTENT_DIRTY) && !(state->
state & EXTENT_DIRTY)) {
700 state->
state |= bits_to_set;
706 if (cached_ptr && !(*cached_ptr)) {
714 static void uncache_state(
struct extent_state **cached_ptr)
716 if (cached_ptr && (*cached_ptr)) {
736 int bits,
int exclusive_bits,
u64 *failed_start,
749 prealloc = alloc_extent_state(mask);
753 spin_lock(&tree->
lock);
754 if (cached_state && *cached_state) {
755 state = *cached_state;
756 if (state->
start <= start && state->
end > start &&
766 node = tree_search(tree, start);
768 prealloc = alloc_extent_state_atomic(prealloc);
770 err = insert_state(tree, prealloc, start, end, &bits);
779 last_start = state->
start;
780 last_end = state->
end;
788 if (state->
start == start && state->
end <= end) {
789 if (state->
state & exclusive_bits) {
790 *failed_start = state->
start;
795 set_state_bits(tree, state, &bits);
796 cache_state(state, cached_state);
797 merge_state(tree, state);
798 if (last_end == (
u64)-1)
800 start = last_end + 1;
802 if (start < end && state && state->start == start &&
824 if (state->
start < start) {
825 if (state->
state & exclusive_bits) {
826 *failed_start =
start;
831 prealloc = alloc_extent_state_atomic(prealloc);
840 if (state->
end <= end) {
841 set_state_bits(tree, state, &bits);
842 cache_state(state, cached_state);
843 merge_state(tree, state);
844 if (last_end == (
u64)-1)
846 start = last_end + 1;
848 if (start < end && state && state->start == start &&
861 if (state->
start > start) {
863 if (end < last_start)
866 this_end = last_start - 1;
868 prealloc = alloc_extent_state_atomic(prealloc);
875 err = insert_state(tree, prealloc, start, this_end,
880 cache_state(prealloc, cached_state);
882 start = this_end + 1;
891 if (state->
start <= end && state->
end > end) {
892 if (state->
state & exclusive_bits) {
893 *failed_start =
start;
898 prealloc = alloc_extent_state_atomic(prealloc);
904 set_state_bits(tree, prealloc, &bits);
905 cache_state(prealloc, cached_state);
906 merge_state(tree, prealloc);
914 spin_unlock(&tree->
lock);
923 spin_unlock(&tree->
lock);
924 if (mask & __GFP_WAIT)
933 return __set_extent_bit(tree, start, end, bits, 0, failed_start,
956 int bits,
int clear_bits,
967 if (!prealloc && (mask & __GFP_WAIT)) {
968 prealloc = alloc_extent_state(mask);
973 spin_lock(&tree->
lock);
974 if (cached_state && *cached_state) {
975 state = *cached_state;
976 if (state->
start <= start && state->
end > start &&
987 node = tree_search(tree, start);
989 prealloc = alloc_extent_state_atomic(prealloc);
994 err = insert_state(tree, prealloc, start, end, &bits);
1002 last_start = state->
start;
1003 last_end = state->
end;
1011 if (state->
start == start && state->
end <= end) {
1012 set_state_bits(tree, state, &bits);
1013 cache_state(state, cached_state);
1014 state = clear_state_bit(tree, state, &clear_bits, 0);
1015 if (last_end == (
u64)-1)
1017 start = last_end + 1;
1018 if (start < end && state && state->start == start &&
1040 if (state->
start < start) {
1041 prealloc = alloc_extent_state_atomic(prealloc);
1052 if (state->
end <= end) {
1053 set_state_bits(tree, state, &bits);
1054 cache_state(state, cached_state);
1055 state = clear_state_bit(tree, state, &clear_bits, 0);
1056 if (last_end == (
u64)-1)
1058 start = last_end + 1;
1059 if (start < end && state && state->start == start &&
1072 if (state->
start > start) {
1074 if (end < last_start)
1077 this_end = last_start - 1;
1079 prealloc = alloc_extent_state_atomic(prealloc);
1089 err = insert_state(tree, prealloc, start, this_end,
1093 cache_state(prealloc, cached_state);
1095 start = this_end + 1;
1104 if (state->
start <= end && state->
end > end) {
1105 prealloc = alloc_extent_state_atomic(prealloc);
1111 err =
split_state(tree, state, prealloc, end + 1);
1115 set_state_bits(tree, prealloc, &bits);
1116 cache_state(prealloc, cached_state);
1117 clear_state_bit(tree, prealloc, &clear_bits, 0);
1125 spin_unlock(&tree->
lock);
1134 spin_unlock(&tree->
lock);
1135 if (mask & __GFP_WAIT)
1149 int bits,
gfp_t mask)
1156 int bits,
gfp_t mask)
1166 NULL, cached_state, mask);
1174 NULL, cached_state, mask);
1196 cached_state, mask);
1203 cached_state, mask);
1216 err = __set_extent_bit(tree, start, end,
EXTENT_LOCKED | bits,
1221 start = failed_start;
1242 if (failed_start > start)
1272 while (index <= end_index) {
1275 set_page_writeback(page);
1287 u64 start,
int bits)
1296 node = tree_search(tree, start);
1302 if (state->
end >= start && (state->
state & bits))
1321 u64 *start_ret,
u64 *end_ret,
int bits,
1328 spin_lock(&tree->
lock);
1329 if (cached_state && *cached_state) {
1330 state = *cached_state;
1331 if (state->
end == start - 1 && state->
tree) {
1336 if (state->
state & bits)
1341 *cached_state =
NULL;
1345 *cached_state =
NULL;
1351 cache_state(state, cached_state);
1352 *start_ret = state->
start;
1353 *end_ret = state->
end;
1357 spin_unlock(&tree->
lock);
1377 spin_lock(&tree->
lock);
1383 node = tree_search(tree, cur_start);
1392 if (found && (state->
start != cur_start ||
1402 *start = state->
start;
1403 *cached_state =
state;
1408 cur_start = state->
end + 1;
1412 total_bytes += state->
end - state->
start + 1;
1413 if (total_bytes >= max_bytes)
1417 spin_unlock(&tree->
lock);
1422 struct page *locked_page,
1426 struct page *
pages[16];
1429 unsigned long nr_pages = end_index - index + 1;
1432 if (index == locked_page->
index && end_index == index)
1435 while (nr_pages > 0) {
1437 min_t(
unsigned long, nr_pages,
1439 for (i = 0; i <
ret; i++) {
1440 if (pages[i] != locked_page)
1450 static noinline int lock_delalloc_pages(
struct inode *inode,
1451 struct page *locked_page,
1456 unsigned long start_index =
index;
1458 unsigned long pages_locked = 0;
1459 struct page *pages[16];
1465 if (index == locked_page->
index && index == end_index)
1469 nrpages = end_index - index + 1;
1470 while (nrpages > 0) {
1472 min_t(
unsigned long,
1479 for (i = 0; i <
ret; i++) {
1484 if (pages[i] != locked_page) {
1485 lock_page(pages[i]);
1486 if (!PageDirty(pages[i]) ||
1503 if (ret && pages_locked) {
1504 __unlock_for_delalloc(inode, locked_page,
1506 ((
u64)(start_index + pages_locked - 1)) <<
1518 static noinline u64 find_lock_delalloc_range(
struct inode *inode,
1520 struct page *locked_page,
1533 delalloc_start = *
start;
1535 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
1536 max_bytes, &cached_state);
1537 if (!found || delalloc_end <= *start) {
1538 *start = delalloc_start;
1539 *end = delalloc_end;
1549 if (delalloc_start < *start)
1550 delalloc_start = *
start;
1556 if (delalloc_end + 1 - delalloc_start > max_bytes && loops)
1560 ret = lock_delalloc_pages(inode, locked_page,
1561 delalloc_start, delalloc_end);
1568 unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
1569 max_bytes = PAGE_CACHE_SIZE -
offset;
1588 __unlock_for_delalloc(inode, locked_page,
1589 delalloc_start, delalloc_end);
1594 *start = delalloc_start;
1595 *end = delalloc_end;
1602 u64 start,
u64 end,
struct page *locked_page,
1606 struct page *pages[16];
1609 unsigned long nr_pages = end_index - index + 1;
1627 while (nr_pages > 0) {
1629 min_t(
unsigned long,
1631 for (i = 0; i <
ret; i++) {
1634 SetPagePrivate2(pages[i]);
1636 if (pages[i] == locked_page) {
1640 if (op & EXTENT_CLEAR_DIRTY)
1643 set_page_writeback(pages[i]);
1663 u64 *start,
u64 search_end,
u64 max_bytes,
1664 unsigned long bits,
int contig)
1669 u64 total_bytes = 0;
1673 if (search_end <= cur_start) {
1678 spin_lock(&tree->
lock);
1679 if (cur_start == 0 && bits == EXTENT_DIRTY) {
1687 node = tree_search(tree, cur_start);
1693 if (state->
start > search_end)
1695 if (contig && found && state->
start > last + 1)
1697 if (state->
end >= cur_start && (state->
state & bits) == bits) {
1698 total_bytes +=
min(search_end, state->
end) + 1 -
1700 if (total_bytes >= max_bytes)
1703 *start =
max(cur_start, state->
start);
1707 }
else if (contig && found) {
1715 spin_unlock(&tree->
lock);
1729 spin_lock(&tree->
lock);
1734 node = tree_search(tree, start);
1740 if (state->
start != start) {
1746 spin_unlock(&tree->
lock);
1756 spin_lock(&tree->
lock);
1761 node = tree_search(tree, start);
1767 if (state->
start != start) {
1773 spin_unlock(&tree->
lock);
1790 spin_lock(&tree->
lock);
1791 if (cached && cached->
tree && cached->
start <= start &&
1792 cached->
end > start)
1795 node = tree_search(tree, start);
1796 while (node && start <= end) {
1799 if (filled && state->
start > start) {
1804 if (state->
start > end)
1807 if (state->
state & bits) {
1811 }
else if (filled) {
1816 if (state->
end == (
u64)-1)
1819 start = state->
end + 1;
1829 spin_unlock(&tree->
lock);
1837 static void check_page_uptodate(
struct extent_io_tree *tree,
struct page *page)
1840 u64 end = start + PAGE_CACHE_SIZE - 1;
1842 SetPageUptodate(page);
1849 static void check_page_locked(
struct extent_io_tree *tree,
struct page *page)
1852 u64 end = start + PAGE_CACHE_SIZE - 1;
1891 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
1912 static void repair_io_failure_callback(
struct bio *bio,
int err)
1944 bio->bi_private = &compl;
1945 bio->bi_end_io = repair_io_failure_callback;
1950 &map_length, &bbio, mirror_num);
1956 sector = bbio->
stripes[mirror_num-1].physical >> 9;
1958 dev = bbio->
stripes[mirror_num-1].dev;
1964 bio->bi_bdev = dev->
bdev;
1969 if (!
test_bit(BIO_UPTODATE, &bio->bi_flags)) {
1977 "(dev %s sector %llu)\n", page->
mapping->host->i_ino,
1993 struct page *p = extent_buffer_page(eb, i);
1995 start, p, mirror_num);
2008 static int clean_io_failure(
u64 start,
struct page *page)
2011 u64 private_failure;
2018 struct inode *inode = page->
mapping->host;
2022 (
u64)-1, 1, EXTENT_DIRTY, 0);
2036 pr_debug(
"clean_io_failure: freeing dummy error at %llu\n",
2042 spin_lock(&BTRFS_I(inode)->io_tree.lock);
2046 spin_unlock(&BTRFS_I(inode)->io_tree.lock);
2048 if (state && state->
start == failrec->
start) {
2049 map_tree = &BTRFS_I(inode)->root->fs_info->mapping_tree;
2052 if (num_copies > 1) {
2062 ret = free_io_failure(inode, failrec, did_repair);
2075 static int bio_readpage_error(
struct bio *failed_bio,
struct page *page,
2082 struct inode *inode = page->
mapping->host;
2083 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2096 failrec = kzalloc(
sizeof(*failrec),
GFP_NOFS);
2100 failrec->
len = end - start + 1;
2123 logical = start - em->
start;
2128 extent_set_compress_type(&failrec->
bio_flags,
2131 pr_debug(
"bio_readpage_error: (new) logical=%llu, start=%llu, "
2132 "len=%llu\n", logical, start, failrec->
len);
2141 (
u64)(
unsigned long)failrec);
2152 pr_debug(
"bio_readpage_error: (found) logical=%llu, "
2153 "start=%llu, len=%llu, validation=%d\n",
2163 &BTRFS_I(inode)->root->fs_info->mapping_tree,
2165 if (num_copies == 1) {
2171 pr_debug(
"bio_readpage_error: cannot repair, num_copies == 1. "
2172 "state=%p, num_copies=%d, next_mirror %d, "
2173 "failed_mirror %d\n", state, num_copies,
2175 free_io_failure(inode, failrec, 0);
2180 spin_lock(&tree->
lock);
2185 spin_unlock(&tree->
lock);
2193 if (failed_bio->bi_vcnt > 1) {
2224 if (!state || failrec->
this_mirror > num_copies) {
2225 pr_debug(
"bio_readpage_error: (fail) state=%p, num_copies=%d, "
2226 "next_mirror %d, failed_mirror %d\n", state,
2228 free_io_failure(inode, failrec, 0);
2234 free_io_failure(inode, failrec, 0);
2237 bio->bi_private =
state;
2238 bio->bi_end_io = failed_bio->bi_end_io;
2239 bio->bi_sector = failrec->
logical >> 9;
2240 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
2245 pr_debug(
"bio_readpage_error: submitting new read[%#x] to "
2246 "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode,
2249 ret = tree->
ops->submit_bio_hook(inode, read_mode, bio,
2259 int uptodate = (err == 0);
2263 tree = &BTRFS_I(page->
mapping->host)->io_tree;
2265 if (tree->
ops && tree->
ops->writepage_end_io_hook) {
2266 ret = tree->
ops->writepage_end_io_hook(page, start,
2267 end,
NULL, uptodate);
2273 ClearPageUptodate(page);
2288 static void end_bio_extent_writepage(
struct bio *bio,
int err)
2290 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
2297 struct page *page = bvec->bv_page;
2298 tree = &BTRFS_I(page->
mapping->host)->io_tree;
2300 start = ((
u64)page->
index << PAGE_CACHE_SHIFT) +
2302 end = start + bvec->bv_len - 1;
2304 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
2309 if (--bvec >= bio->bi_io_vec)
2318 check_page_writeback(tree, page);
2319 }
while (bvec >= bio->bi_io_vec);
2335 static void end_bio_extent_readpage(
struct bio *bio,
int err)
2337 int uptodate =
test_bit(BIO_UPTODATE, &bio->bi_flags);
2338 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
2339 struct bio_vec *bvec = bio->bi_io_vec;
2351 struct page *page = bvec->bv_page;
2355 pr_debug(
"end_bio_extent_readpage: bi_sector=%llu, err=%d, "
2356 "mirror=%ld\n", (
u64)bio->bi_sector, err,
2357 (
long int)bio->bi_bdev);
2358 tree = &BTRFS_I(page->
mapping->host)->io_tree;
2360 start = ((
u64)page->
index << PAGE_CACHE_SHIFT) +
2362 end = start + bvec->bv_len - 1;
2364 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
2369 if (++bvec <= bvec_end)
2372 spin_lock(&tree->
lock);
2374 if (state && state->
start == start) {
2379 cache_state(state, &cached);
2381 spin_unlock(&tree->
lock);
2383 mirror = (
int)(
unsigned long)bio->bi_bdev;
2384 if (uptodate && tree->
ops && tree->
ops->readpage_end_io_hook) {
2385 ret = tree->
ops->readpage_end_io_hook(page, start, end,
2390 clean_io_failure(start, page);
2393 if (!uptodate && tree->
ops && tree->
ops->readpage_io_failed_hook) {
2394 ret = tree->
ops->readpage_io_failed_hook(page, mirror);
2396 test_bit(BIO_UPTODATE, &bio->bi_flags))
2398 }
else if (!uptodate) {
2409 ret = bio_readpage_error(bio, page, start, end, mirror,
NULL);
2412 test_bit(BIO_UPTODATE, &bio->bi_flags);
2415 uncache_state(&cached);
2428 SetPageUptodate(page);
2430 ClearPageUptodate(page);
2436 check_page_uptodate(tree, page);
2438 ClearPageUptodate(page);
2441 check_page_locked(tree, page);
2443 }
while (bvec <= bvec_end);
2454 bio = bio_alloc(gfp_flags, nr_vecs);
2457 while (!bio && (nr_vecs /= 2))
2458 bio = bio_alloc(gfp_flags, nr_vecs);
2463 bio->bi_bdev = bdev;
2464 bio->bi_sector = first_sector;
2474 int mirror_num,
unsigned long bio_flags)
2477 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
2478 struct page *page = bvec->bv_page;
2482 start = ((
u64)page->
index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
2484 bio->bi_private =
NULL;
2488 if (tree->
ops && tree->
ops->submit_bio_hook)
2489 ret = tree->
ops->submit_bio_hook(page->
mapping->host, rw, bio,
2490 mirror_num, bio_flags, start);
2494 if (bio_flagged(bio, BIO_EOPNOTSUPP))
2500 static int merge_bio(
struct extent_io_tree *tree,
struct page *page,
2501 unsigned long offset,
size_t size,
struct bio *bio,
2502 unsigned long bio_flags)
2505 if (tree->
ops && tree->
ops->merge_bio_hook)
2506 ret = tree->
ops->merge_bio_hook(page, offset, size, bio,
2513 static int submit_extent_page(
int rw,
struct extent_io_tree *tree,
2515 size_t size,
unsigned long offset,
2517 struct bio **bio_ret,
2518 unsigned long max_pages,
2519 bio_end_io_t end_io_func,
2521 unsigned long prev_bio_flags,
2522 unsigned long bio_flags)
2532 if (bio_ret && *bio_ret) {
2535 contig = bio->bi_sector ==
sector;
2537 contig = bio->bi_sector + (bio->bi_size >> 9) ==
2540 if (prev_bio_flags != bio_flags || !contig ||
2541 merge_bio(tree, page, offset, page_size, bio, bio_flags) ||
2543 ret = submit_one_bio(rw, bio, mirror_num,
2552 if (this_compressed)
2562 bio->bi_end_io = end_io_func;
2563 bio->bi_private =
tree;
2568 ret = submit_one_bio(rw, bio, mirror_num, bio_flags);
2575 if (!PagePrivate(page)) {
2576 SetPagePrivate(page);
2578 set_page_private(page, (
unsigned long)eb);
2586 if (!PagePrivate(page)) {
2587 SetPagePrivate(page);
2602 struct bio **bio,
int mirror_num,
2603 unsigned long *bio_flags)
2605 struct inode *inode = page->
mapping->host;
2607 u64 page_end = start + PAGE_CACHE_SIZE - 1;
2611 u64 last_byte = i_size_read(inode);
2620 size_t pg_offset = 0;
2622 size_t disk_io_size;
2623 size_t blocksize = inode->
i_sb->s_blocksize;
2624 unsigned long this_bio_flag = 0;
2628 if (!PageUptodate(page)) {
2629 if (cleancache_get_page(page) == 0) {
2646 if (page->
index == last_byte >> PAGE_CACHE_SHIFT) {
2648 size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
2651 iosize = PAGE_CACHE_SIZE - zero_offset;
2653 memset(userpage + zero_offset, 0, iosize);
2658 while (
cur <= end) {
2659 if (
cur >= last_byte) {
2663 iosize = PAGE_CACHE_SIZE - pg_offset;
2665 memset(userpage + pg_offset, 0, iosize);
2674 em = get_extent(inode, page, pg_offset,
cur,
2676 if (IS_ERR_OR_NULL(em)) {
2687 extent_set_compress_type(&this_bio_flag,
2691 iosize =
min(extent_map_end(em) -
cur, end -
cur + 1);
2692 cur_end =
min(extent_map_end(em) - 1, end);
2693 iosize = (iosize + blocksize - 1) & ~((
u64)blocksize - 1);
2694 if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
2699 disk_io_size = iosize;
2714 memset(userpage + pg_offset, 0, iosize);
2723 pg_offset += iosize;
2729 check_page_uptodate(tree, page);
2732 pg_offset += iosize;
2742 pg_offset += iosize;
2747 if (tree->
ops && tree->
ops->readpage_io_hook) {
2748 ret = tree->
ops->readpage_io_hook(page,
cur,
2754 ret = submit_extent_page(
READ, tree, page,
2755 sector, disk_io_size, pg_offset,
2757 end_bio_extent_readpage, mirror_num,
2762 *bio_flags = this_bio_flag;
2770 pg_offset += iosize;
2774 if (!PageError(page))
2775 SetPageUptodate(page);
2784 struct bio *bio =
NULL;
2785 unsigned long bio_flags = 0;
2788 ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
2791 ret = submit_one_bio(
READ, bio, mirror_num, bio_flags);
2795 static noinline void update_nr_written(
struct page *page,
2797 unsigned long nr_written)
2802 page->
mapping->writeback_index = page->
index + nr_written;
2814 struct inode *inode = page->
mapping->host;
2819 u64 page_end = start + PAGE_CACHE_SIZE - 1;
2823 u64 last_byte = i_size_read(inode);
2832 size_t pg_offset = 0;
2834 loff_t i_size = i_size_read(inode);
2841 unsigned long nr_written = 0;
2842 bool fill_delalloc =
true;
2847 write_flags =
WRITE;
2849 trace___extent_writepage(page, inode, wbc);
2853 ClearPageError(page);
2855 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
2856 if (page->
index > end_index ||
2857 (page->
index == end_index && !pg_offset)) {
2858 page->
mapping->a_ops->invalidatepage(page, 0);
2863 if (page->
index == end_index) {
2867 memset(userpage + pg_offset, 0,
2868 PAGE_CACHE_SIZE - pg_offset);
2876 if (!tree->
ops || !tree->
ops->fill_delalloc)
2877 fill_delalloc =
false;
2879 delalloc_start =
start;
2883 u64 delalloc_to_write = 0;
2888 update_nr_written(page, wbc, 0);
2890 while (delalloc_end < page_end) {
2891 nr_delalloc = find_lock_delalloc_range(inode, tree,
2896 if (nr_delalloc == 0) {
2897 delalloc_start = delalloc_end + 1;
2900 ret = tree->
ops->fill_delalloc(inode, page,
2915 delalloc_to_write += (delalloc_end - delalloc_start +
2918 delalloc_start = delalloc_end + 1;
2923 if (delalloc_to_write < thresh * 2)
2924 thresh = delalloc_to_write;
2943 if (tree->
ops && tree->
ops->writepage_start_hook) {
2944 ret = tree->
ops->writepage_start_hook(page, start,
2952 update_nr_written(page, wbc, nr_written);
2963 update_nr_written(page, wbc, nr_written + 1);
2966 if (last_byte <= start) {
2967 if (tree->
ops && tree->
ops->writepage_end_io_hook)
2968 tree->
ops->writepage_end_io_hook(page, start,
2973 blocksize = inode->
i_sb->s_blocksize;
2975 while (
cur <= end) {
2976 if (
cur >= last_byte) {
2977 if (tree->
ops && tree->
ops->writepage_end_io_hook)
2978 tree->
ops->writepage_end_io_hook(page,
cur,
2984 if (IS_ERR_OR_NULL(em)) {
2992 iosize =
min(extent_map_end(em) -
cur, end -
cur + 1);
2993 iosize = (iosize + blocksize - 1) & ~((
u64)blocksize - 1);
3011 if (!compressed && tree->
ops &&
3012 tree->
ops->writepage_end_io_hook)
3013 tree->
ops->writepage_end_io_hook(page,
cur,
3016 else if (compressed) {
3025 pg_offset += iosize;
3030 EXTENT_DIRTY, 0,
NULL)) {
3032 pg_offset += iosize;
3036 if (tree->
ops && tree->
ops->writepage_io_hook) {
3037 ret = tree->
ops->writepage_io_hook(page,
cur,
3045 unsigned long max_nr = end_index + 1;
3047 set_range_writeback(tree,
cur,
cur + iosize - 1);
3048 if (!PageWriteback(page)) {
3050 "writeback, cur %llu end %llu\n",
3051 page->
index, (
unsigned long long)
cur,
3052 (
unsigned long long)end);
3055 ret = submit_extent_page(write_flags, tree, page,
3056 sector, iosize, pg_offset,
3057 bdev, &epd->
bio, max_nr,
3058 end_bio_extent_writepage,
3064 pg_offset += iosize;
3070 set_page_writeback(page);
3082 static int eb_wait(
void *
word)
3104 flush_write_bio(epd);
3113 flush_write_bio(epd);
3117 wait_on_extent_buffer_writeback(eb);
3151 num_pages = num_extent_pages(eb->
start, eb->
len);
3153 struct page *p = extent_buffer_page(eb, i);
3155 if (!trylock_page(p)) {
3157 flush_write_bio(epd);
3167 static void end_extent_buffer_writeback(
struct extent_buffer *eb)
3174 static void end_bio_extent_buffer_writepage(
struct bio *bio,
int err)
3176 int uptodate = err == 0;
3177 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
3182 struct page *page = bvec->bv_page;
3191 ClearPageUptodate(page);
3200 end_extent_buffer_writeback(eb);
3201 }
while (bvec >= bio->bi_io_vec);
3215 unsigned long bio_flags = 0;
3220 num_pages = num_extent_pages(eb->
start, eb->
len);
3226 struct page *p = extent_buffer_page(eb, i);
3229 set_page_writeback(p);
3230 ret = submit_extent_page(rw, eb->
tree, p, offset >> 9,
3231 PAGE_CACHE_SIZE, 0, bdev, &epd->
bio,
3232 -1, end_bio_extent_buffer_writepage,
3239 end_extent_buffer_writeback(eb);
3244 update_nr_written(p, wbc, 1);
3250 struct page *p = extent_buffer_page(eb, i);
3273 int nr_to_write_done = 0;
3281 pagevec_init(&pvec, 0);
3297 while (!done && !nr_to_write_done && (index <= end) &&
3303 for (i = 0; i < nr_pages; i++) {
3304 struct page *page = pvec.
pages[
i];
3306 if (!PagePrivate(page))
3315 if (!PagePrivate(page)) {
3333 if (eb == prev_eb) {
3344 ret = lock_extent_buffer_for_io(eb, fs_info, &epd);
3350 ret = write_one_eb(eb, fs_info, wbc, &epd);
3365 pagevec_release(&pvec);
3368 if (!scanned && !done) {
3377 flush_write_bio(&epd);
3400 void (*flush_fn)(
void *))
3402 struct inode *inode = mapping->
host;
3405 int nr_to_write_done = 0;
3425 pagevec_init(&pvec, 0);
3441 while (!done && !nr_to_write_done && (index <= end) &&
3447 for (i = 0; i < nr_pages; i++) {
3448 struct page *page = pvec.
pages[
i];
3458 tree->
ops->write_cache_pages_lock_hook) {
3459 tree->
ops->write_cache_pages_lock_hook(page,
3462 if (!trylock_page(page)) {
3480 if (PageWriteback(page))
3482 wait_on_page_writeback(page);
3485 if (PageWriteback(page) ||
3491 ret = (*writepage)(
page, wbc,
data);
3505 nr_to_write_done = wbc->nr_to_write <= 0;
3507 pagevec_release(&pvec);
3510 if (!scanned && !done) {
3538 static noinline void flush_write_bio(
void *data)
3541 flush_epd_write_bio(epd);
3558 ret = __extent_writepage(page, wbc, &epd);
3560 flush_epd_write_bio(&epd);
3584 .nr_to_write = nr_pages * 2,
3585 .range_start =
start,
3586 .range_end = end + 1,
3589 while (start <= end) {
3592 ret = __extent_writepage(page, &wbc_writepages, &epd);
3594 if (tree->
ops && tree->
ops->writepage_end_io_hook)
3595 tree->
ops->writepage_end_io_hook(page, start,
3596 start + PAGE_CACHE_SIZE - 1,
3604 flush_epd_write_bio(&epd);
3623 ret = extent_write_cache_pages(tree, mapping, wbc,
3624 __extent_writepage, &epd,
3626 flush_epd_write_bio(&epd);
3632 struct list_head *pages,
unsigned nr_pages,
3635 struct bio *bio =
NULL;
3637 unsigned long bio_flags = 0;
3638 struct page *pagepool[16];
3643 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
3654 pagepool[nr++] =
page;
3657 for (i = 0; i <
nr; i++) {
3658 __extent_read_full_page(tree, pagepool[i], get_extent,
3659 &bio, 0, &bio_flags);
3664 for (i = 0; i <
nr; i++) {
3665 __extent_read_full_page(tree, pagepool[i], get_extent,
3666 &bio, 0, &bio_flags);
3670 BUG_ON(!list_empty(pages));
3672 return submit_one_bio(
READ, bio, 0, bio_flags);
3682 struct page *page,
unsigned long offset)
3685 u64 start = ((
u64)page->
index << PAGE_CACHE_SHIFT);
3686 u64 end = start + PAGE_CACHE_SIZE - 1;
3687 size_t blocksize = page->
mapping->host->i_sb->s_blocksize;
3689 start += (offset + blocksize - 1) & ~(blocksize - 1);
3694 wait_on_page_writeback(page);
3712 u64 end = start + PAGE_CACHE_SIZE - 1;
3751 u64 end = start + PAGE_CACHE_SIZE - 1;
3753 if ((mask & __GFP_WAIT) &&
3754 page->
mapping->host->i_size > 16 * 1024 * 1024) {
3756 while (start <= end) {
3757 len = end - start + 1;
3765 em->
start != start) {
3771 extent_map_end(em) - 1,
3778 start = extent_map_end(em);
3792 static struct extent_map *get_extent_skip_holes(
struct inode *inode,
3808 len = (len + sectorsize - 1) & ~(sectorsize - 1);
3809 em = get_extent(inode,
NULL, 0, offset, len, 0);
3810 if (IS_ERR_OR_NULL(em))
3820 offset = extent_map_end(em);
3837 u64 last_for_get_extent = 0;
3839 u64 isize = i_size_read(inode);
3849 unsigned long emflags;
3859 start =
ALIGN(start, BTRFS_I(inode)->root->sectorsize);
3860 len =
ALIGN(len, BTRFS_I(inode)->root->sectorsize);
3867 path, btrfs_ino(inode), -1, 0);
3876 btrfs_item_key_to_cpu(path->
nodes[0], &found_key, path->
slots[0]);
3877 found_type = btrfs_key_type(&found_key);
3880 if (found_key.
objectid != btrfs_ino(inode) ||
3884 last_for_get_extent = isize;
3892 last_for_get_extent = last + 1;
3903 last_for_get_extent = isize;
3909 em = get_extent_skip_holes(inode, start, last_for_get_extent,
3919 u64 offset_in_extent;
3922 if (em->
start >= max || extent_map_end(em) < off)
3937 offset_in_extent = em_start - em->
start;
3938 em_end = extent_map_end(em);
3939 em_len = em_end - em_start;
3940 emflags = em->
flags;
3947 off = extent_map_end(em);
3968 if ((em_start >= last) || em_len == (
u64)-1 ||
3969 (last == (
u64)-1 && isize <= em_end)) {
3975 em = get_extent_skip_holes(inode, off, last_for_get_extent,
4001 unsigned long flags;
4004 spin_unlock_irqrestore(&leak_lock, flags);
4018 unsigned long flags;
4021 eb = kmem_cache_zalloc(extent_buffer_cache, mask);
4042 spin_unlock_irqrestore(&leak_lock, flags);
4049 struct page **
pages;
4050 int num_pages = (len + PAGE_CACHE_SIZE - 1) >>
4052 pages = kzalloc(num_pages, mask);
4054 __free_extent_buffer(eb);
4070 unsigned long num_pages = num_extent_pages(src->
start, src->
len);
4095 unsigned long num_pages = num_extent_pages(0, len);
4108 btrfs_set_header_nritems(eb, 0);
4115 __free_extent_buffer(eb);
4129 static void btrfs_release_extent_buffer_page(
struct extent_buffer *eb,
4130 unsigned long start_idx)
4132 unsigned long index;
4137 BUG_ON(extent_buffer_under_io(eb));
4139 num_pages = num_extent_pages(eb->
start, eb->
len);
4141 if (start_idx >= index)
4146 page = extent_buffer_page(eb, index);
4147 if (page && mapped) {
4148 spin_lock(&page->
mapping->private_lock);
4156 if (PagePrivate(page) &&
4157 page->
private == (
unsigned long)eb) {
4160 BUG_ON(PageWriteback(page));
4165 ClearPagePrivate(page);
4166 set_page_private(page, 0);
4170 spin_unlock(&page->
mapping->private_lock);
4177 }
while (index != start_idx);
4183 static inline void btrfs_release_extent_buffer(
struct extent_buffer *eb)
4185 btrfs_release_extent_buffer_page(eb, 0);
4186 __free_extent_buffer(eb);
4217 static void mark_extent_buffer_accessed(
struct extent_buffer *eb)
4221 check_buffer_tree_ref(eb);
4223 num_pages = num_extent_pages(eb->
start, eb->
len);
4225 struct page *p = extent_buffer_page(eb, i);
4231 u64 start,
unsigned long len)
4233 unsigned long num_pages = num_extent_pages(start, len);
4247 mark_extent_buffer_accessed(eb);
4252 eb = __alloc_extent_buffer(tree, start, len,
GFP_NOFS);
4256 for (i = 0; i <
num_pages; i++, index++) {
4262 if (PagePrivate(p)) {
4275 mark_extent_buffer_accessed(exists);
4283 ClearPagePrivate(p);
4292 if (!PageUptodate(p))
4311 start >> PAGE_CACHE_SHIFT);
4314 radix_tree_preload_end();
4319 radix_tree_preload_end();
4320 mark_extent_buffer_accessed(exists);
4324 check_buffer_tree_ref(eb);
4326 radix_tree_preload_end();
4337 SetPageChecked(eb->
pages[0]);
4339 p = extent_buffer_page(eb, i);
4340 ClearPageChecked(p);
4353 btrfs_release_extent_buffer(eb);
4358 u64 start,
unsigned long len)
4366 mark_extent_buffer_accessed(eb);
4374 static inline void btrfs_release_extent_buffer_rcu(
struct rcu_head *
head)
4379 __free_extent_buffer(eb);
4396 eb->
start >> PAGE_CACHE_SHIFT);
4401 btrfs_release_extent_buffer_page(eb, 0);
4422 !extent_buffer_under_io(eb) &&
4444 release_extent_buffer(eb,
GFP_NOFS);
4453 num_pages = num_extent_pages(eb->
start, eb->
len);
4456 page = extent_buffer_page(eb, i);
4457 if (!PageDirty(page))
4464 spin_lock_irq(&page->
mapping->tree_lock);
4465 if (!PageDirty(page)) {
4470 spin_unlock_irq(&page->
mapping->tree_lock);
4471 ClearPageError(page);
4483 check_buffer_tree_ref(eb);
4487 num_pages = num_extent_pages(eb->
start, eb->
len);
4496 static int range_straddles_pages(
u64 start,
u64 len)
4498 if (len < PAGE_CACHE_SIZE)
4500 if (start & (PAGE_CACHE_SIZE - 1))
4502 if ((start + len) & (PAGE_CACHE_SIZE - 1))
4514 num_pages = num_extent_pages(eb->
start, eb->
len);
4516 page = extent_buffer_page(eb, i);
4518 ClearPageUptodate(page);
4530 num_pages = num_extent_pages(eb->
start, eb->
len);
4532 page = extent_buffer_page(eb, i);
4533 SetPageUptodate(page);
4543 int pg_uptodate = 1;
4545 unsigned long index;
4547 if (range_straddles_pages(start, end - start + 1)) {
4553 while (start <= end) {
4558 uptodate = PageUptodate(page);
4579 unsigned long start_i;
4583 int locked_pages = 0;
4584 int all_uptodate = 1;
4586 unsigned long num_reads = 0;
4587 struct bio *bio =
NULL;
4588 unsigned long bio_flags = 0;
4596 (eb->
start >> PAGE_CACHE_SHIFT);
4601 num_pages = num_extent_pages(eb->
start, eb->
len);
4603 page = extent_buffer_page(eb, i);
4605 if (!trylock_page(page))
4611 if (!PageUptodate(page)) {
4626 page = extent_buffer_page(eb, i);
4627 if (!PageUptodate(page)) {
4628 ClearPageError(page);
4629 err = __extent_read_full_page(tree, page,
4631 mirror_num, &bio_flags);
4640 err = submit_one_bio(
READ, bio, mirror_num, bio_flags);
4649 page = extent_buffer_page(eb, i);
4650 wait_on_page_locked(page);
4651 if (!PageUptodate(page))
4659 while (locked_pages > 0) {
4660 page = extent_buffer_page(eb, i);
4669 unsigned long start,
4676 char *
dst = (
char *)dstv;
4678 unsigned long i = (start_offset +
start) >> PAGE_CACHE_SHIFT;
4683 offset = (start_offset +
start) & ((
unsigned long)PAGE_CACHE_SIZE - 1);
4686 page = extent_buffer_page(eb, i);
4688 cur =
min(len, (PAGE_CACHE_SIZE - offset));
4690 memcpy(dst, kaddr + offset, cur);
4700 unsigned long min_len,
char **
map,
4701 unsigned long *map_start,
4702 unsigned long *map_len)
4704 size_t offset = start & (PAGE_CACHE_SIZE - 1);
4708 unsigned long i = (start_offset +
start) >> PAGE_CACHE_SHIFT;
4709 unsigned long end_i = (start_offset + start + min_len - 1) >>
4723 if (start + min_len > eb->
len) {
4725 "wanted %lu %lu\n", (
unsigned long long)eb->
start,
4726 eb->
len, start, min_len);
4731 p = extent_buffer_page(eb, i);
4734 *map_len = PAGE_CACHE_SIZE -
offset;
4739 unsigned long start,
4746 char *
ptr = (
char *)ptrv;
4748 unsigned long i = (start_offset +
start) >> PAGE_CACHE_SHIFT;
4754 offset = (start_offset +
start) & ((
unsigned long)PAGE_CACHE_SIZE - 1);
4757 page = extent_buffer_page(eb, i);
4759 cur =
min(len, (PAGE_CACHE_SIZE - offset));
4762 ret =
memcmp(ptr, kaddr + offset, cur);
4775 unsigned long start,
unsigned long len)
4781 char *
src = (
char *)srcv;
4783 unsigned long i = (start_offset +
start) >> PAGE_CACHE_SHIFT;
4788 offset = (start_offset +
start) & ((
unsigned long)PAGE_CACHE_SIZE - 1);
4791 page = extent_buffer_page(eb, i);
4794 cur =
min(len, PAGE_CACHE_SIZE - offset);
4796 memcpy(kaddr + offset, src, cur);
4806 unsigned long start,
unsigned long len)
4813 unsigned long i = (start_offset +
start) >> PAGE_CACHE_SHIFT;
4818 offset = (start_offset +
start) & ((
unsigned long)PAGE_CACHE_SIZE - 1);
4821 page = extent_buffer_page(eb, i);
4824 cur =
min(len, PAGE_CACHE_SIZE - offset);
4826 memset(kaddr + offset, c, cur);
4835 unsigned long dst_offset,
unsigned long src_offset,
4844 unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
4848 offset = (start_offset + dst_offset) &
4849 ((
unsigned long)PAGE_CACHE_SIZE - 1);
4852 page = extent_buffer_page(dst, i);
4855 cur =
min(len, (
unsigned long)(PAGE_CACHE_SIZE - offset));
4867 static void move_pages(
struct page *dst_page,
struct page *src_page,
4868 unsigned long dst_off,
unsigned long src_off,
4872 if (dst_page == src_page) {
4873 memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
4876 char *p = dst_kaddr + dst_off + len;
4877 char *
s = src_kaddr + src_off + len;
4884 static inline bool areas_overlap(
unsigned long src,
unsigned long dst,
unsigned long len)
4886 unsigned long distance = (src >
dst) ? src - dst : dst - src;
4887 return distance < len;
4890 static void copy_pages(
struct page *dst_page,
struct page *src_page,
4891 unsigned long dst_off,
unsigned long src_off,
4896 int must_memmove = 0;
4898 if (dst_page != src_page) {
4901 src_kaddr = dst_kaddr;
4902 if (areas_overlap(src_off, dst_off, len))
4907 memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
4909 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
4913 unsigned long src_offset,
unsigned long len)
4916 size_t dst_off_in_page;
4917 size_t src_off_in_page;
4919 unsigned long dst_i;
4920 unsigned long src_i;
4922 if (src_offset + len > dst->
len) {
4924 "len %lu dst len %lu\n", src_offset, len, dst->
len);
4927 if (dst_offset + len > dst->
len) {
4929 "len %lu dst len %lu\n", dst_offset, len, dst->
len);
4934 dst_off_in_page = (start_offset + dst_offset) &
4935 ((
unsigned long)PAGE_CACHE_SIZE - 1);
4936 src_off_in_page = (start_offset + src_offset) &
4937 ((
unsigned long)PAGE_CACHE_SIZE - 1);
4939 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
4940 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
4942 cur =
min(len, (
unsigned long)(PAGE_CACHE_SIZE -
4944 cur =
min_t(
unsigned long, cur,
4945 (
unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
4947 copy_pages(extent_buffer_page(dst, dst_i),
4948 extent_buffer_page(dst, src_i),
4949 dst_off_in_page, src_off_in_page, cur);
4958 unsigned long src_offset,
unsigned long len)
4961 size_t dst_off_in_page;
4962 size_t src_off_in_page;
4963 unsigned long dst_end = dst_offset + len - 1;
4964 unsigned long src_end = src_offset + len - 1;
4966 unsigned long dst_i;
4967 unsigned long src_i;
4969 if (src_offset + len > dst->
len) {
4971 "len %lu len %lu\n", src_offset, len, dst->
len);
4974 if (dst_offset + len > dst->
len) {
4976 "len %lu len %lu\n", dst_offset, len, dst->
len);
4979 if (dst_offset < src_offset) {
4984 dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
4985 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
4987 dst_off_in_page = (start_offset + dst_end) &
4988 ((
unsigned long)PAGE_CACHE_SIZE - 1);
4989 src_off_in_page = (start_offset + src_end) &
4990 ((
unsigned long)PAGE_CACHE_SIZE - 1);
4992 cur =
min_t(
unsigned long, len, src_off_in_page + 1);
4993 cur =
min(cur, dst_off_in_page + 1);
4994 move_pages(extent_buffer_page(dst, dst_i),
4995 extent_buffer_page(dst, src_i),
4996 dst_off_in_page - cur + 1,
4997 src_off_in_page - cur + 1, cur);
5013 spin_lock(&page->
mapping->private_lock);
5014 if (!PagePrivate(page)) {
5015 spin_unlock(&page->
mapping->private_lock);
5030 spin_unlock(&page->
mapping->private_lock);
5033 spin_unlock(&page->
mapping->private_lock);
5047 return release_extent_buffer(eb, mask);