47 #define SCRUB_PAGES_PER_BIO 16
48 #define SCRUB_BIOS_PER_DEV 16
49 #define SCRUB_MAX_PAGES_PER_BLOCK 16
140 static int scrub_handle_errored_block(
struct scrub_block *sblock_to_check);
141 static int scrub_setup_recheck_block(
struct scrub_dev *sdev,
151 int is_metadata,
int have_csum,
154 static void scrub_complete_bio_end_io(
struct bio *bio,
int err);
155 static int scrub_repair_block_from_good_copy(
struct scrub_block *sblock_bad,
158 static int scrub_repair_page_from_good_copy(
struct scrub_block *sblock_bad,
160 int page_num,
int force_write);
161 static int scrub_checksum_data(
struct scrub_block *sblock);
162 static int scrub_checksum_tree_block(
struct scrub_block *sblock);
163 static int scrub_checksum_super(
struct scrub_block *sblock);
164 static void scrub_block_get(
struct scrub_block *sblock);
165 static void scrub_block_put(
struct scrub_block *sblock);
166 static int scrub_add_page_to_bio(
struct scrub_dev *sdev,
171 static void scrub_bio_end_io(
struct bio *bio,
int err);
173 static void scrub_block_complete(
struct scrub_block *sblock);
176 static void scrub_free_csums(
struct scrub_dev *sdev)
187 static noinline_for_stack
void scrub_free_dev(
struct scrub_dev *sdev)
195 if (sdev->
curr != -1) {
201 scrub_block_put(sbio->
pagev[i]->sblock);
214 scrub_free_csums(sdev);
218 static noinline_for_stack
228 sdev = kzalloc(
sizeof(*sdev),
GFP_NOFS);
237 sbio = kzalloc(
sizeof(*sbio),
GFP_NOFS);
240 sdev->
bios[
i] = sbio;
245 sbio->
work.func = scrub_bio_end_io_worker;
247 if (i != SCRUB_BIOS_PER_DEV-1)
248 sdev->
bios[
i]->next_free = i + 1;
250 sdev->
bios[
i]->next_free = -1;
268 scrub_free_dev(sdev);
290 if (IS_ERR(local_root)) {
291 ret = PTR_ERR(local_root);
301 eb = swarn->
path->nodes[0];
304 isize = btrfs_inode_size(eb, inode_item);
305 nlink = btrfs_inode_nlink(eb, inode_item);
310 ret = PTR_ERR(ipath);
323 for (i = 0; i < ipath->
fspath->elem_cnt; ++
i)
325 "%s, sector %llu, root %llu, inode %llu, offset %llu, "
326 "length %llu, links %u (path: %s)\n", swarn->
errstr,
328 (
unsigned long long)swarn->
sector, root, inum, offset,
330 (
char *)(
unsigned long)ipath->
fspath->val[i]);
337 "%s, sector %llu, root %llu, inode %llu, offset %llu: path "
338 "resolving failed with ret=%d\n", swarn->
errstr,
340 (
unsigned long long)swarn->
sector, root, inum, offset, ret);
355 unsigned long ptr = 0;
384 extent_item_pos = swarn.
logical - found_key.objectid;
389 item_size = btrfs_item_size_nr(eb, path->
slots[0]);
395 &ref_root, &ref_level);
397 "btrfs: %s at logical %llu on dev %s, "
398 "sector %llu: metadata %s (level %d) in tree "
399 "%llu\n", errstr, swarn.
logical,
401 (
unsigned long long)swarn.
sector,
402 ref_level ?
"node" :
"leaf",
403 ret < 0 ? -1 : ref_level,
404 ret < 0 ? -1 : ref_root);
410 scrub_print_warning_inode, &swarn);
419 static int scrub_fixup_readpage(
u64 inum,
u64 offset,
u64 root,
void *ctx)
428 u64 end = offset + PAGE_SIZE - 1;
435 if (IS_ERR(local_root))
436 return PTR_ERR(local_root);
443 return PTR_ERR(inode);
453 if (PageUptodate(page)) {
455 if (PageDirty(page)) {
475 map_tree = &BTRFS_I(inode)->root->fs_info->mapping_tree;
500 wait_on_page_locked(page);
518 if (ret == 0 && corrected) {
537 int uncorrectable = 0;
541 fs_info = fixup->
root->fs_info;
546 ++sdev->
stat.malloc_errors;
568 path, scrub_fixup_readpage,
577 ++sdev->
stat.corrected_errors;
581 if (trans && !IS_ERR(trans))
585 ++sdev->
stat.uncorrectable_errors;
589 "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
590 (
unsigned long long)fixup->
logical,
615 static int scrub_handle_errored_block(
struct scrub_block *sblock_to_check)
622 unsigned int failed_mirror_index;
623 unsigned int is_metadata;
624 unsigned int have_csum;
636 fs_info = sdev->
dev->dev_root->fs_info;
638 logical = sblock_to_check->
pagev[0].logical;
639 generation = sblock_to_check->
pagev[0].generation;
641 failed_mirror_index = sblock_to_check->
pagev[0].mirror_num - 1;
642 is_metadata = !(sblock_to_check->
pagev[0].flags &
644 have_csum = sblock_to_check->
pagev[0].have_csum;
645 csum = sblock_to_check->
pagev[0].csum;
677 sizeof(*sblocks_for_recheck),
679 if (!sblocks_for_recheck) {
681 sdev->
stat.malloc_errors++;
682 sdev->
stat.read_errors++;
683 sdev->
stat.uncorrectable_errors++;
691 ret = scrub_setup_recheck_block(sdev, &fs_info->
mapping_tree, length,
692 logical, sblocks_for_recheck);
695 sdev->
stat.read_errors++;
696 sdev->
stat.uncorrectable_errors++;
703 sblock_bad = sblocks_for_recheck + failed_mirror_index;
706 ret = scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum,
710 sdev->
stat.read_errors++;
711 sdev->
stat.uncorrectable_errors++;
729 sdev->
stat.unverified_errors++;
737 sdev->
stat.read_errors++;
740 scrub_print_warning(
"i/o error", sblock_to_check);
745 sdev->
stat.csum_errors++;
748 scrub_print_warning(
"checksum error", sblock_to_check);
753 sdev->
stat.verify_errors++;
756 scrub_print_warning(
"checksum/header error",
767 goto did_not_correct_error;
769 if (!is_metadata && !have_csum) {
779 fixup_nodatasum = kzalloc(
sizeof(*fixup_nodatasum),
GFP_NOFS);
780 if (!fixup_nodatasum)
781 goto did_not_correct_error;
785 fixup_nodatasum->
mirror_num = failed_mirror_index + 1;
802 &fixup_nodatasum->
work);
810 for (mirror_index = 0;
812 sblocks_for_recheck[mirror_index].
page_count > 0;
814 if (mirror_index == failed_mirror_index)
818 ret = scrub_recheck_block(fs_info,
819 sblocks_for_recheck + mirror_index,
820 is_metadata, have_csum, csum,
823 goto did_not_correct_error;
839 for (mirror_index = 0;
841 sblocks_for_recheck[mirror_index].
page_count > 0;
843 struct scrub_block *sblock_other = sblocks_for_recheck +
849 int force_write = is_metadata || have_csum;
851 ret = scrub_repair_block_from_good_copy(sblock_bad,
855 goto corrected_error;
886 goto did_not_correct_error;
889 for (page_num = 0; page_num < sblock_bad->
page_count; page_num++) {
895 for (mirror_index = 0;
897 sblocks_for_recheck[mirror_index].
page_count > 0;
899 struct scrub_block *sblock_other = sblocks_for_recheck +
905 ret = scrub_repair_page_from_good_copy(
906 sblock_bad, sblock_other, page_num, 0);
921 if (is_metadata || have_csum) {
931 ret = scrub_recheck_block(fs_info, sblock_bad,
932 is_metadata, have_csum, csum,
937 goto corrected_error;
939 goto did_not_correct_error;
943 sdev->
stat.corrected_errors++;
946 "btrfs: fixed up error at logical %llu on dev %s\n",
947 (
unsigned long long)logical,
951 did_not_correct_error:
953 sdev->
stat.uncorrectable_errors++;
956 "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
957 (
unsigned long long)logical,
962 if (sblocks_for_recheck) {
971 if (sblock->
pagev[page_index].page)
973 sblock->
pagev[page_index].page);
975 kfree(sblocks_for_recheck);
981 static int scrub_setup_recheck_block(
struct scrub_dev *sdev,
999 u64 mapped_length = sublen;
1008 if (ret || !bbio || mapped_length < sublen) {
1019 if (mirror_index >= BTRFS_MAX_MIRRORS)
1022 sblock = sblocks_for_recheck + mirror_index;
1023 page = sblock->
pagev + page_index;
1032 sdev->
stat.malloc_errors++;
1055 static int scrub_recheck_block(
struct btrfs_fs_info *fs_info,
1057 int have_csum,
u8 *csum,
u64 generation,
1066 for (page_num = 0; page_num < sblock->
page_count; page_num++) {
1072 if (page->
dev->bdev ==
NULL) {
1082 bio->bi_bdev = page->
dev->bdev;
1083 bio->bi_sector = page->
physical >> 9;
1084 bio->bi_end_io = scrub_complete_bio_end_io;
1088 if (PAGE_SIZE != ret) {
1098 if (!
test_bit(BIO_UPTODATE, &bio->bi_flags))
1104 scrub_recheck_block_checksum(fs_info, sblock, is_metadata,
1105 have_csum, csum, generation,
1111 static void scrub_recheck_block_checksum(
struct btrfs_fs_info *fs_info,
1113 int is_metadata,
int have_csum,
1114 const u8 *csum,
u64 generation,
1121 void *mapped_buffer;
1147 for (page_num = 0;;) {
1148 if (page_num == 0 && is_metadata)
1166 if (
memcmp(calculated_csum, csum, csum_size))
1170 static void scrub_complete_bio_end_io(
struct bio *bio,
int err)
1175 static int scrub_repair_block_from_good_copy(
struct scrub_block *sblock_bad,
1182 for (page_num = 0; page_num < sblock_bad->
page_count; page_num++) {
1185 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
1196 static int scrub_repair_page_from_good_copy(
struct scrub_block *sblock_bad,
1198 int page_num,
int force_write)
1214 bio->bi_bdev = page_bad->
dev->bdev;
1215 bio->bi_sector = page_bad->
physical >> 9;
1216 bio->bi_end_io = scrub_complete_bio_end_io;
1220 if (PAGE_SIZE != ret) {
1228 if (!bio_flagged(bio, BIO_UPTODATE)) {
1240 static void scrub_checksum(
struct scrub_block *sblock)
1246 flags = sblock->
pagev[0].flags;
1249 ret = scrub_checksum_data(sblock);
1250 else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1251 ret = scrub_checksum_tree_block(sblock);
1253 (
void)scrub_checksum_super(sblock);
1257 scrub_handle_errored_block(sblock);
1260 static int scrub_checksum_data(
struct scrub_block *sblock)
1274 if (!sblock->
pagev[0].have_csum)
1277 on_disk_csum = sblock->
pagev[0].csum;
1278 page = sblock->
pagev[0].page;
1305 static int scrub_checksum_tree_block(
struct scrub_block *sblock)
1314 void *mapped_buffer;
1324 page = sblock->
pagev[0].page;
1374 return fail || crc_fail;
1377 static int scrub_checksum_super(
struct scrub_block *sblock)
1386 void *mapped_buffer;
1396 page = sblock->
pagev[0].page;
1435 if (fail_cor + fail_gen) {
1442 ++sdev->
stat.super_errors;
1452 return fail_cor + fail_gen;
1455 static void scrub_block_get(
struct scrub_block *sblock)
1460 static void scrub_block_put(
struct scrub_block *sblock)
1466 if (sblock->
pagev[i].page)
1472 static void scrub_submit(
struct scrub_dev *sdev)
1476 if (sdev->
curr == -1)
1486 static int scrub_add_page_to_bio(
struct scrub_dev *sdev,
1497 while (sdev->
curr == -1) {
1500 if (sdev->
curr != -1) {
1502 sdev->
bios[sdev->
curr]->next_free = -1;
1503 sdev->
bios[sdev->
curr]->page_count = 0;
1524 bio->bi_private = sbio;
1525 bio->bi_end_io = scrub_bio_end_io;
1526 bio->bi_bdev = sdev->
dev->bdev;
1527 bio->bi_sector = spage->
physical >> 9;
1539 if (ret != PAGE_SIZE) {
1549 scrub_block_get(sblock);
1565 sblock = kzalloc(
sizeof(*sblock),
GFP_NOFS);
1568 sdev->
stat.malloc_errors++;
1578 for (index = 0; len > 0; index++) {
1586 sdev->
stat.malloc_errors++;
1615 for (index = 0; index < sblock->
page_count; index++) {
1619 ret = scrub_add_page_to_bio(sdev, spage);
1621 scrub_block_put(sblock);
1630 scrub_block_put(sblock);
1634 static void scrub_bio_end_io(
struct bio *bio,
int err)
1636 struct scrub_bio *sbio = bio->bi_private;
1646 static void scrub_bio_end_io_worker(
struct btrfs_work *work)
1658 spage->
sblock->no_io_error_seen = 0;
1668 scrub_block_complete(sblock);
1669 scrub_block_put(sblock);
1682 static void scrub_block_complete(
struct scrub_block *sblock)
1685 scrub_handle_errored_block(sblock);
1687 scrub_checksum(sblock);
1690 static int scrub_find_csum(
struct scrub_dev *sdev,
u64 logical,
u64 len,
1696 unsigned long num_sectors;
1701 if (sum->
bytenr > logical)
1706 ++sdev->
stat.csum_discards;
1715 for (i = 0; i < num_sectors; ++
i) {
1716 if (sum->
sums[i].bytenr == logical) {
1722 if (ret && i == num_sectors - 1) {
1731 u64 physical,
u64 flags,
u64 gen,
int mirror_num)
1737 if (flags & BTRFS_EXTENT_FLAG_DATA) {
1740 sdev->
stat.data_extents_scrubbed++;
1741 sdev->
stat.data_bytes_scrubbed +=
len;
1743 }
else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1747 sdev->
stat.tree_extents_scrubbed++;
1748 sdev->
stat.tree_bytes_scrubbed +=
len;
1759 if (flags & BTRFS_EXTENT_FLAG_DATA) {
1761 have_csum = scrub_find_csum(sdev, logical, l, csum);
1763 ++sdev->
stat.no_csum;
1765 ret = scrub_pages(sdev, logical, l, physical, flags, gen,
1766 mirror_num, have_csum ? csum :
NULL, 0);
1776 static noinline_for_stack
int scrub_stripe(
struct scrub_dev *sdev,
1852 key_start.objectid = logical;
1854 key_start.offset = (
u64)0;
1855 key_end.objectid = base + offset + nstripes * increment;
1857 key_end.offset = (
u64)0;
1862 key_start.offset = logical;
1865 key_end.offset = base + offset + nstripes * increment;
1868 if (!IS_ERR(reada1))
1870 if (!IS_ERR(reada2))
1894 physical = map->
stripes[num].physical;
1896 for (i = 0; i < nstripes; ++
i) {
1933 key.objectid = logical;
1958 slot = path->
slots[0];
1959 if (slot >= btrfs_header_nritems(l)) {
1968 btrfs_item_key_to_cpu(l, &
key, slot);
1970 if (
key.objectid +
key.offset <= logical)
1981 flags = btrfs_extent_flags(l, extent);
1982 generation = btrfs_extent_generation(l, extent);
1984 if (
key.objectid < logical &&
1985 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
1987 "btrfs scrub: tree block %llu spanning "
1988 "stripes, ignored. logical=%llu\n",
1989 (
unsigned long long)
key.objectid,
1990 (
unsigned long long)logical);
1997 if (
key.objectid < logical) {
1998 key.offset -= logical -
key.objectid;
1999 key.objectid = logical;
2001 if (
key.objectid +
key.offset >
2007 ret = scrub_extent(sdev,
key.objectid,
key.offset,
2008 key.objectid - logical + physical,
2009 flags, generation, mirror_num);
2017 logical += increment;
2020 sdev->
stat.last_physical = physical;
2029 return ret < 0 ? ret : 0;
2032 static noinline_for_stack
int scrub_chunk(
struct scrub_dev *sdev,
2037 &sdev->
dev->dev_root->fs_info->mapping_tree;
2051 if (em->
start != chunk_offset)
2054 if (em->
len < length)
2059 map->
stripes[i].physical == dev_offset) {
2060 ret = scrub_stripe(sdev, map, i, chunk_offset, length);
2071 static noinline_for_stack
2097 key.objectid = sdev->
dev->devid;
2107 if (path->
slots[0] >=
2108 btrfs_header_nritems(path->
nodes[0])) {
2116 slot = path->
slots[0];
2118 btrfs_item_key_to_cpu(l, &found_key, slot);
2120 if (found_key.objectid != sdev->
dev->devid)
2126 if (found_key.offset >= end)
2129 if (found_key.offset <
key.offset)
2133 length = btrfs_dev_extent_length(l, dev_extent);
2135 if (found_key.offset + length <= start) {
2141 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
2142 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
2143 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
2154 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
2155 chunk_offset, length, found_key.offset);
2170 return ret < 0 ? ret : 0;
2173 static noinline_for_stack
int scrub_supers(
struct scrub_dev *sdev)
2185 gen = root->
fs_info->last_trans_committed;
2188 bytenr = btrfs_sb_offset(i);
2193 BTRFS_EXTENT_FLAG_SUPER, gen, i,
NULL, 1);
2205 static noinline_for_stack
int scrub_workers_get(
struct btrfs_root *root)
2226 static noinline_for_stack
void scrub_workers_put(
struct btrfs_root *root)
2246 if (btrfs_fs_closing(root->
fs_info))
2254 "btrfs_scrub: size assumption nodesize == leafsize (%d == %d) fails\n",
2266 "btrfs_scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails\n",
2274 "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n",
2275 root->
sectorsize, (
unsigned long long)PAGE_SIZE);
2279 ret = scrub_workers_get(root);
2287 scrub_workers_put(root);
2295 scrub_workers_put(root);
2302 scrub_workers_put(root);
2305 sdev = scrub_setup_dev(dev);
2309 scrub_workers_put(root);
2310 return PTR_ERR(sdev);
2320 ret = scrub_supers(sdev);
2324 ret = scrub_enumerate_chunks(sdev, start, end);
2333 memcpy(progress, &sdev->
stat,
sizeof(*progress));
2339 scrub_free_dev(sdev);
2340 scrub_workers_put(root);
2463 memcpy(progress, &sdev->
stat,
sizeof(*progress));