21 #include <linux/slab.h>
24 #include <linux/module.h>
61 #define NR_RAID10_BIOS 256
68 #define IO_BLOCKED ((struct bio *)1)
73 #define IO_MADE_GOOD ((struct bio *)2)
75 #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
81 static int max_queued_requests = 1024;
83 static void allow_barrier(
struct r10conf *conf);
84 static void lower_barrier(
struct r10conf *conf);
85 static int enough(
struct r10conf *conf,
int ignore);
88 static void reshape_request_write(
struct mddev *
mddev,
struct r10bio *r10_bio);
89 static void end_reshape_write(
struct bio *bio,
int error);
90 static void end_reshape(
struct r10conf *conf);
92 static void * r10bio_pool_alloc(
gfp_t gfp_flags,
void *
data)
99 return kzalloc(size, gfp_flags);
102 static void r10bio_pool_free(
void *r10_bio,
void *
data)
108 #define RESYNC_BLOCK_SIZE (64*1024)
109 #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
111 #define RESYNC_WINDOW (1024*1024)
113 #define RESYNC_DEPTH (32*1024*1024/RESYNC_BLOCK_SIZE)
122 static void * r10buf_pool_alloc(
gfp_t gfp_flags,
void *
data)
131 r10_bio = r10bio_pool_alloc(gfp_flags, conf);
144 for (j = nalloc ; j-- ; ) {
148 r10_bio->
devs[
j].bio = bio;
154 r10_bio->
devs[
j].repl_bio = bio;
160 for (j = 0 ; j < nalloc; j++) {
161 struct bio *rbio = r10_bio->
devs[
j].repl_bio;
162 bio = r10_bio->
devs[
j].bio;
165 &conf->
mddev->recovery)) {
168 struct bio *rbio = r10_bio->
devs[0].bio;
169 page = rbio->bi_io_vec[
i].bv_page;
176 bio->bi_io_vec[
i].bv_page =
page;
178 rbio->bi_io_vec[
i].bv_page =
page;
186 safe_put_page(bio->bi_io_vec[i-1].bv_page);
189 safe_put_page(r10_bio->
devs[j].bio->bi_io_vec[i].bv_page);
192 for ( ; j < nalloc; j++) {
193 if (r10_bio->
devs[j].bio)
195 if (r10_bio->
devs[j].repl_bio)
198 r10bio_pool_free(r10_bio, conf);
202 static void r10buf_pool_free(
void *__r10_bio,
void *
data)
209 for (j=0; j < conf->
copies; j++) {
210 struct bio *bio = r10bio->
devs[
j].bio;
213 safe_put_page(bio->bi_io_vec[i].bv_page);
214 bio->bi_io_vec[
i].bv_page =
NULL;
218 bio = r10bio->
devs[
j].repl_bio;
222 r10bio_pool_free(r10bio, conf);
225 static void put_all_bios(
struct r10conf *conf,
struct r10bio *r10_bio)
229 for (i = 0; i < conf->
copies; i++) {
230 struct bio **bio = & r10_bio->
devs[
i].bio;
234 bio = &r10_bio->
devs[
i].repl_bio;
241 static void free_r10bio(
struct r10bio *r10_bio)
245 put_all_bios(conf, r10_bio);
249 static void put_buf(
struct r10bio *r10_bio)
258 static void reschedule_retry(
struct r10bio *r10_bio)
280 static void raid_end_bio_io(
struct r10bio *r10_bio)
286 if (bio->bi_phys_segments) {
289 bio->bi_phys_segments--;
290 done = (bio->bi_phys_segments == 0);
304 free_r10bio(r10_bio);
310 static inline void update_head_pos(
int slot,
struct r10bio *r10_bio)
321 static int find_bio_disk(
struct r10conf *conf,
struct r10bio *r10_bio,
322 struct bio *bio,
int *slotp,
int *replp)
327 for (slot = 0; slot < conf->
copies; slot++) {
328 if (r10_bio->
devs[slot].bio == bio)
330 if (r10_bio->
devs[slot].repl_bio == bio) {
337 update_head_pos(slot, r10_bio);
346 static void raid10_end_read_request(
struct bio *bio,
int error)
348 int uptodate =
test_bit(BIO_UPTODATE, &bio->bi_flags);
349 struct r10bio *r10_bio = bio->bi_private;
361 update_head_pos(slot, r10_bio);
387 raid_end_bio_io(r10_bio);
388 rdev_dec_pending(rdev, conf->
mddev);
395 "md/raid10:%s: %s: rescheduling sector %llu\n",
398 (
unsigned long long)r10_bio->
sector);
400 reschedule_retry(r10_bio);
404 static void close_write(
struct r10bio *r10_bio)
414 static void one_write_done(
struct r10bio *r10_bio)
418 reschedule_retry(r10_bio);
420 close_write(r10_bio);
422 reschedule_retry(r10_bio);
424 raid_end_bio_io(r10_bio);
429 static void raid10_end_write_request(
struct bio *bio,
int error)
431 int uptodate =
test_bit(BIO_UPTODATE, &bio->bi_flags);
432 struct r10bio *r10_bio = bio->bi_private;
439 dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
461 &rdev->
mddev->recovery);
481 if (is_badblock(rdev,
482 r10_bio->
devs[slot].addr,
484 &first_bad, &bad_sectors)) {
500 one_write_done(r10_bio);
502 rdev_dec_pending(rdev, conf->
mddev);
530 static void __raid10_find_phys(
struct geom *geo,
struct r10bio *r10bio)
540 chunk = r10bio->
sector >> geo->chunk_shift;
541 sector = r10bio->
sector & geo->chunk_mask;
543 chunk *= geo->near_copies;
547 stripe *= geo->far_copies;
549 sector += stripe << geo->chunk_shift;
552 for (n = 0; n < geo->near_copies; n++) {
559 for (f = 1; f < geo->far_copies; f++) {
560 d += geo->near_copies;
561 if (d >= geo->raid_disks)
562 d -= geo->raid_disks;
569 if (dev >= geo->raid_disks) {
571 sector += (geo->chunk_mask + 1);
576 static void raid10_find_phys(
struct r10conf *conf,
struct r10bio *r10bio)
578 struct geom *geo = &conf->
geo;
582 conf->
mddev->reshape_backwards)) {
588 __raid10_find_phys(geo, r10bio);
597 struct geom *geo = &conf->
geo;
599 offset = sector & geo->chunk_mask;
600 if (geo->far_offset) {
602 chunk = sector >> geo->chunk_shift;
604 dev -= fc * geo->near_copies;
606 dev += geo->raid_disks;
608 while (sector >= geo->stride) {
609 sector -= geo->stride;
610 if (dev < geo->near_copies)
611 dev += geo->raid_disks - geo->near_copies;
613 dev -= geo->near_copies;
615 chunk = sector >> geo->chunk_shift;
617 vchunk = chunk * geo->raid_disks +
dev;
619 return (vchunk << geo->chunk_shift) +
offset;
633 struct bvec_merge_data *bvm,
634 struct bio_vec *biovec)
638 sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
640 unsigned int chunk_sectors;
641 unsigned int bio_sectors = bvm->bi_size >> 9;
642 struct geom *geo = &conf->
geo;
644 chunk_sectors = (conf->
geo.chunk_mask & conf->
prev.chunk_mask) + 1;
647 conf->
mddev->reshape_backwards))
650 if (geo->near_copies < geo->raid_disks) {
651 max = (chunk_sectors - ((sector & (chunk_sectors - 1))
652 + bio_sectors)) << 9;
656 if (max <= biovec->bv_len && bio_sectors == 0)
657 return biovec->bv_len;
659 max = biovec->bv_len;
663 struct r10bio r10_bio;
664 struct r10dev devs[conf->copies];
666 struct r10bio *r10_bio = &on_stack.r10_bio;
670 if (max <= biovec->bv_len && bio_sectors == 0)
671 return biovec->bv_len;
675 raid10_find_phys(conf, r10_bio);
677 for (s = 0; s < conf->
copies; s++) {
678 int disk = r10_bio->
devs[
s].devnum;
683 bdev_get_queue(rdev->
bdev);
684 if (q->merge_bvec_fn) {
685 bvm->bi_sector = r10_bio->
devs[
s].addr
687 bvm->bi_bdev = rdev->
bdev;
688 max =
min(max, q->merge_bvec_fn(
695 bdev_get_queue(rdev->
bdev);
696 if (q->merge_bvec_fn) {
697 bvm->bi_sector = r10_bio->
devs[
s].addr
699 bvm->bi_bdev = rdev->
bdev;
700 max =
min(max, q->merge_bvec_fn(
730 struct r10bio *r10_bio,
736 int best_good_sectors;
741 struct geom *geo = &conf->
geo;
743 raid10_find_phys(conf, r10_bio);
750 best_good_sectors = 0;
762 for (slot = 0; slot < conf->
copies ; slot++) {
783 dev_sector = r10_bio->
devs[
slot].addr;
784 if (is_badblock(rdev, dev_sector, sectors,
785 &first_bad, &bad_sectors)) {
789 if (first_bad <= dev_sector) {
794 bad_sectors -= (dev_sector - first_bad);
795 if (!do_balance && sectors > bad_sectors)
796 sectors = bad_sectors;
797 if (best_good_sectors > sectors)
801 first_bad - dev_sector;
802 if (good_sectors > best_good_sectors) {
803 best_good_sectors = good_sectors;
826 if (geo->far_copies > 1)
827 new_distance = r10_bio->
devs[
slot].addr;
829 new_distance =
abs(r10_bio->
devs[slot].addr -
830 conf->
mirrors[disk].head_position);
831 if (new_distance < best_dist) {
832 best_dist = new_distance;
837 if (slot >= conf->
copies) {
848 rdev_dec_pending(rdev, conf->
mddev);
855 *max_sectors = best_good_sectors;
871 (i < conf->
geo.raid_disks || i < conf->
prev.raid_disks)
878 ret |= bdi_congested(&q->backing_dev_info, bits);
886 static int raid10_congested(
void *data,
int bits)
888 struct mddev *mddev =
data;
894 static void flush_pending_writes(
struct r10conf *conf)
912 struct bio *
next = bio->bi_next;
915 !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
948 static void raise_barrier(
struct r10conf *conf,
int force)
968 static void lower_barrier(
struct r10conf *conf)
977 static void wait_barrier(
struct r10conf *conf)
995 !bio_list_empty(
current->bio_list)),
1004 static void allow_barrier(
struct r10conf *conf)
1006 unsigned long flags;
1009 spin_unlock_irqrestore(&conf->
resync_lock, flags);
1013 static void freeze_array(
struct r10conf *conf)
1033 flush_pending_writes(conf));
1038 static void unfreeze_array(
struct r10conf *conf)
1048 static sector_t choose_data_offset(
struct r10bio *r10_bio,
1064 static void raid10_unplug(
struct blk_plug_cb *
cb,
bool from_schedule)
1068 struct mddev *mddev = plug->
cb.data;
1072 if (from_schedule ||
current->bio_list) {
1083 bio = bio_list_get(&plug->
pending);
1088 struct bio *next = bio->bi_next;
1089 bio->bi_next =
NULL;
1096 static void make_request(
struct mddev *mddev,
struct bio * bio)
1099 struct r10bio *r10_bio;
1100 struct bio *read_bio;
1103 int chunk_sects = chunk_mask + 1;
1104 const int rw = bio_data_dir(bio);
1105 const unsigned long do_sync = (bio->bi_rw &
REQ_SYNC);
1106 const unsigned long do_fua = (bio->bi_rw &
REQ_FUA);
1107 const unsigned long do_discard = (bio->bi_rw
1109 unsigned long flags;
1111 struct blk_plug_cb *cb;
1113 int sectors_handled;
1125 if (
unlikely((bio->bi_sector & chunk_mask) + (bio->bi_size >> 9)
1127 && (conf->
geo.near_copies < conf->
geo.raid_disks
1128 || conf->
prev.near_copies < conf->
prev.raid_disks))) {
1129 struct bio_pair *bp;
1131 if ((bio->bi_vcnt != 1 && bio->bi_vcnt != 0) ||
1138 chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
1152 make_request(mddev, &bp->bio1);
1153 make_request(mddev, &bp->bio2);
1163 printk(
"md/raid10:%s: make_request bug: can't convert block across chunks"
1164 " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2,
1165 (
unsigned long long)bio->bi_sector, bio->bi_size >> 10);
1180 sectors = bio->bi_size >> 9;
1187 allow_barrier(conf);
1194 bio_data_dir(bio) ==
WRITE &&
1216 r10_bio->
mddev = mddev;
1217 r10_bio->
sector = bio->bi_sector;
1227 bio->bi_phys_segments = 0;
1228 clear_bit(BIO_SEG_VALID, &bio->bi_flags);
1238 rdev = read_balance(conf, r10_bio, &max_sectors);
1240 raid_end_bio_io(r10_bio);
1249 r10_bio->
devs[
slot].bio = read_bio;
1252 read_bio->bi_sector = r10_bio->
devs[
slot].addr +
1253 choose_data_offset(r10_bio, rdev);
1254 read_bio->bi_bdev = rdev->
bdev;
1255 read_bio->bi_end_io = raid10_end_read_request;
1256 read_bio->bi_rw =
READ | do_sync;
1257 read_bio->bi_private = r10_bio;
1259 if (max_sectors < r10_bio->sectors) {
1263 sectors_handled = (r10_bio->
sectors + max_sectors
1265 r10_bio->
sectors = max_sectors;
1267 if (bio->bi_phys_segments == 0)
1268 bio->bi_phys_segments = 2;
1270 bio->bi_phys_segments++;
1277 reschedule_retry(r10_bio);
1282 r10_bio->
sectors = ((bio->bi_size >> 9)
1286 r10_bio->
sector = bio->bi_sector + sectors_handled;
1314 raid10_find_phys(conf, r10_bio);
1316 blocked_rdev =
NULL;
1318 max_sectors = r10_bio->
sectors;
1320 for (i = 0; i < conf->
copies; i++) {
1321 int d = r10_bio->
devs[
i].devnum;
1324 conf->
mirrors[d].replacement);
1329 blocked_rdev =
rdev;
1334 blocked_rdev = rrdev;
1347 if (!rdev && !rrdev) {
1357 is_bad = is_badblock(rdev, dev_sector,
1359 &first_bad, &bad_sectors);
1366 blocked_rdev =
rdev;
1369 if (is_bad && first_bad <= dev_sector) {
1371 bad_sectors -= (dev_sector - first_bad);
1372 if (bad_sectors < max_sectors)
1376 max_sectors = bad_sectors;
1388 int good_sectors = first_bad - dev_sector;
1389 if (good_sectors < max_sectors)
1390 max_sectors = good_sectors;
1394 r10_bio->
devs[
i].bio = bio;
1398 r10_bio->
devs[
i].repl_bio = bio;
1409 for (j = 0; j <
i; j++) {
1410 if (r10_bio->
devs[j].bio) {
1411 d = r10_bio->
devs[
j].devnum;
1412 rdev_dec_pending(conf->
mirrors[d].rdev, mddev);
1414 if (r10_bio->
devs[j].repl_bio) {
1416 d = r10_bio->
devs[
j].devnum;
1417 rdev = conf->
mirrors[
d].replacement;
1423 rdev_dec_pending(rdev, mddev);
1426 allow_barrier(conf);
1432 if (max_sectors < r10_bio->sectors) {
1436 r10_bio->
sectors = max_sectors;
1438 if (bio->bi_phys_segments == 0)
1439 bio->bi_phys_segments = 2;
1441 bio->bi_phys_segments++;
1444 sectors_handled = r10_bio->
sector + max_sectors - bio->bi_sector;
1449 for (i = 0; i < conf->
copies; i++) {
1451 int d = r10_bio->
devs[
i].devnum;
1452 if (r10_bio->
devs[i].bio) {
1457 r10_bio->
devs[
i].bio = mbio;
1459 mbio->bi_sector = (r10_bio->
devs[
i].addr+
1460 choose_data_offset(r10_bio,
1462 mbio->bi_bdev = rdev->
bdev;
1463 mbio->bi_end_io = raid10_end_write_request;
1464 mbio->bi_rw =
WRITE | do_sync | do_fua | do_discard;
1465 mbio->bi_private = r10_bio;
1478 bio_list_add(&plug->
pending, mbio);
1484 spin_unlock_irqrestore(&conf->
device_lock, flags);
1489 if (r10_bio->
devs[i].repl_bio) {
1499 r10_bio->
devs[
i].repl_bio = mbio;
1501 mbio->bi_sector = (r10_bio->
devs[
i].addr +
1504 mbio->bi_bdev = rdev->
bdev;
1505 mbio->bi_end_io = raid10_end_write_request;
1506 mbio->bi_rw =
WRITE | do_sync | do_fua | do_discard;
1507 mbio->bi_private = r10_bio;
1513 spin_unlock_irqrestore(&conf->
device_lock, flags);
1514 if (!mddev_check_plugged(mddev))
1523 if (sectors_handled < (bio->bi_size >> 9)) {
1524 one_write_done(r10_bio);
1531 r10_bio->
sectors = (bio->bi_size >> 9) - sectors_handled;
1534 r10_bio->
sector = bio->bi_sector + sectors_handled;
1538 one_write_done(r10_bio);
1549 if (conf->
geo.near_copies < conf->
geo.raid_disks)
1551 if (conf->
geo.near_copies > 1)
1553 if (conf->
geo.far_copies > 1) {
1554 if (conf->
geo.far_offset)
1561 for (i = 0; i < conf->
geo.raid_disks; i++)
1573 static int _enough(
struct r10conf *conf,
struct geom *geo,
int ignore)
1582 if (conf->
mirrors[
this].rdev &&
1585 this = (
this+1) % geo->raid_disks;
1589 first = (first + geo->near_copies) % geo->raid_disks;
1590 }
while (first != 0);
1594 static int enough(
struct r10conf *conf,
int ignore)
1596 return _enough(conf, &conf->
geo, ignore) &&
1597 _enough(conf, &conf->
prev, ignore);
1600 static void error(
struct mddev *mddev,
struct md_rdev *rdev)
1618 unsigned long flags;
1621 spin_unlock_irqrestore(&conf->
device_lock, flags);
1631 "md/raid10:%s: Disk failure on %s, disabling device.\n"
1632 "md/raid10:%s: Operation continuing on %d devices.\n",
1634 mdname(mddev), conf->
geo.raid_disks - mddev->
degraded);
1637 static void print_conf(
struct r10conf *conf)
1648 conf->
geo.raid_disks);
1650 for (i = 0; i < conf->
geo.raid_disks; i++) {
1661 static void close_sync(
struct r10conf *conf)
1664 allow_barrier(conf);
1670 static int raid10_spare_active(
struct mddev *mddev)
1676 unsigned long flags;
1682 for (i = 0; i < conf->
geo.raid_disks; i++) {
1698 sysfs_notify_dirent_safe(
1699 tmp->
rdev->sysfs_state);
1701 sysfs_notify_dirent_safe(tmp->
replacement->sysfs_state);
1702 }
else if (tmp->
rdev
1706 sysfs_notify_dirent_safe(tmp->
rdev->sysfs_state);
1711 spin_unlock_irqrestore(&conf->
device_lock, flags);
1718 static int raid10_add_disk(
struct mddev *mddev,
struct md_rdev *rdev)
1724 int last = conf->
geo.raid_disks - 1;
1738 if (q->merge_bvec_fn) {
1748 for ( ; mirror <= last ; mirror++) {
1788 raise_barrier(conf, 0);
1789 lower_barrier(conf);
1793 if (mddev->
queue && blk_queue_discard(bdev_get_queue(rdev->
bdev)))
1794 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->
queue);
1800 static int raid10_remove_disk(
struct mddev *mddev,
struct md_rdev *rdev)
1809 if (rdev == p->
rdev)
1827 number < conf->geo.raid_disks &&
1863 static void end_sync_read(
struct bio *bio,
int error)
1865 struct r10bio *r10_bio = bio->bi_private;
1873 d = find_bio_disk(conf, r10_bio, bio,
NULL,
NULL);
1875 if (
test_bit(BIO_UPTODATE, &bio->bi_flags))
1882 &conf->
mirrors[d].rdev->corrected_errors);
1893 reschedule_retry(r10_bio);
1897 static void end_sync_request(
struct r10bio *r10_bio)
1899 struct mddev *mddev = r10_bio->
mddev;
1907 reschedule_retry(r10_bio);
1913 struct r10bio *r10_bio2 = (
struct r10bio *)r10_bio->
master_bio;
1916 reschedule_retry(r10_bio);
1924 static void end_sync_write(
struct bio *bio,
int error)
1926 int uptodate =
test_bit(BIO_UPTODATE, &bio->bi_flags);
1927 struct r10bio *r10_bio = bio->bi_private;
1928 struct mddev *mddev = r10_bio->
mddev;
1937 d = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
1939 rdev = conf->
mirrors[
d].replacement;
1950 &rdev->
mddev->recovery);
1953 }
else if (is_badblock(rdev,
1954 r10_bio->
devs[slot].addr,
1956 &first_bad, &bad_sectors))
1959 rdev_dec_pending(rdev, mddev);
1961 end_sync_request(r10_bio);
1980 static void sync_request_write(
struct mddev *mddev,
struct r10bio *r10_bio)
1984 struct bio *tbio, *fbio;
1990 for (i=0; i<conf->
copies; i++)
1991 if (
test_bit(BIO_UPTODATE, &r10_bio->
devs[i].bio->bi_flags))
1998 fbio = r10_bio->
devs[
i].bio;
2002 for (i=0 ; i < conf->
copies ; i++) {
2005 tbio = r10_bio->
devs[
i].bio;
2007 if (tbio->bi_end_io != end_sync_read)
2011 if (
test_bit(BIO_UPTODATE, &r10_bio->
devs[i].bio->bi_flags)) {
2016 for (j = 0; j < vcnt; j++)
2019 fbio->bi_io_vec[
j].bv_len))
2033 tbio->bi_vcnt = vcnt;
2034 tbio->bi_size = r10_bio->
sectors << 9;
2036 tbio->bi_phys_segments = 0;
2037 tbio->bi_flags &= ~(BIO_POOL_MASK - 1);
2038 tbio->bi_flags |= 1 << BIO_UPTODATE;
2039 tbio->bi_next =
NULL;
2040 tbio->bi_rw =
WRITE;
2041 tbio->bi_private = r10_bio;
2042 tbio->bi_sector = r10_bio->
devs[
i].addr;
2044 for (j=0; j < vcnt ; j++) {
2045 tbio->bi_io_vec[
j].bv_offset = 0;
2052 tbio->bi_end_io = end_sync_write;
2054 d = r10_bio->
devs[
i].devnum;
2057 md_sync_acct(conf->
mirrors[d].rdev->bdev, tbio->bi_size >> 9);
2059 tbio->bi_sector += conf->
mirrors[
d].rdev->data_offset;
2060 tbio->bi_bdev = conf->
mirrors[
d].rdev->bdev;
2067 for (i = 0; i < conf->
copies; i++) {
2070 tbio = r10_bio->
devs[
i].repl_bio;
2071 if (!tbio || !tbio->bi_end_io)
2073 if (r10_bio->
devs[i].bio->bi_end_io != end_sync_write
2074 && r10_bio->
devs[i].bio != fbio)
2075 for (j = 0; j < vcnt; j++)
2079 d = r10_bio->
devs[
i].devnum;
2081 md_sync_acct(conf->
mirrors[d].replacement->bdev,
2082 tbio->bi_size >> 9);
2103 static void fix_recovery_read_error(
struct r10bio *r10_bio)
2112 struct mddev *mddev = r10_bio->
mddev;
2114 struct bio *bio = r10_bio->
devs[0].bio;
2116 int sectors = r10_bio->
sectors;
2118 int dr = r10_bio->
devs[0].devnum;
2119 int dw = r10_bio->
devs[1].devnum;
2130 rdev = conf->
mirrors[dr].rdev;
2131 addr = r10_bio->
devs[0].addr +
sect,
2135 bio->bi_io_vec[idx].bv_page,
2138 rdev = conf->
mirrors[dw].rdev;
2139 addr = r10_bio->
devs[1].addr +
sect;
2143 bio->bi_io_vec[idx].bv_page,
2150 &rdev->
mddev->recovery);
2160 if (rdev != conf->
mirrors[dw].rdev) {
2163 addr = r10_bio->
devs[1].addr +
sect;
2168 "md/raid10:%s: recovery aborted"
2169 " due to read error\n",
2172 conf->
mirrors[dw].recovery_disabled
2187 static void recovery_request_write(
struct mddev *mddev,
struct r10bio *r10_bio)
2191 struct bio *wbio, *wbio2;
2194 fix_recovery_read_error(r10_bio);
2195 end_sync_request(r10_bio);
2203 d = r10_bio->
devs[1].devnum;
2204 wbio = r10_bio->
devs[1].bio;
2205 wbio2 = r10_bio->
devs[1].repl_bio;
2206 if (wbio->bi_end_io) {
2208 md_sync_acct(conf->
mirrors[d].rdev->bdev, wbio->bi_size >> 9);
2211 if (wbio2 && wbio2->bi_end_io) {
2213 md_sync_acct(conf->
mirrors[d].replacement->bdev,
2214 wbio2->bi_size >> 9);
2226 static void check_decay_read_errors(
struct mddev *mddev,
struct md_rdev *rdev)
2229 unsigned long hours_since_last;
2241 hours_since_last = (cur_time_mon.tv_sec -
2251 if (hours_since_last >= 8 *
sizeof(read_errors))
2258 int sectors,
struct page *
page,
int rw)
2263 if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors)
2266 if (
sync_page_io(rdev, sector, sectors << 9, page, rw,
false))
2273 &rdev->
mddev->recovery);
2289 static void fix_read_error(
struct r10conf *conf,
struct mddev *mddev,
struct r10bio *r10_bio)
2292 int sectors = r10_bio->
sectors;
2307 check_decay_read_errors(mddev, rdev);
2314 "md/raid10:%s: %s: Raid device exceeded "
2315 "read_error threshold [cur %d:max %d]\n",
2319 "md/raid10:%s: %s: Failing raid device\n",
2340 d = r10_bio->
devs[
sl].devnum;
2345 is_badblock(rdev, r10_bio->
devs[sl].addr + sect, s,
2346 &first_bad, &bad_sectors) == 0) {
2350 r10_bio->
devs[sl].addr +
2354 rdev_dec_pending(rdev, mddev);
2362 }
while (!success && sl != r10_bio->
read_slot);
2394 d = r10_bio->
devs[
sl].devnum;
2403 if (r10_sync_page_io(rdev,
2404 r10_bio->
devs[sl].addr +
2410 "md/raid10:%s: read correction "
2412 " (%d sectors at %llu on %s)\n",
2414 (
unsigned long long)(
2416 choose_data_offset(r10_bio,
2424 rdev_dec_pending(rdev, mddev);
2434 d = r10_bio->
devs[
sl].devnum;
2442 switch (r10_sync_page_io(rdev,
2443 r10_bio->
devs[sl].addr +
2450 "md/raid10:%s: unable to read back "
2452 " (%d sectors at %llu on %s)\n",
2454 (
unsigned long long)(
2456 choose_data_offset(r10_bio, rdev)),
2465 "md/raid10:%s: read error corrected"
2466 " (%d sectors at %llu on %s)\n",
2468 (
unsigned long long)(
2470 choose_data_offset(r10_bio, rdev)),
2475 rdev_dec_pending(rdev, mddev);
2485 static void bi_complete(
struct bio *bio,
int error)
2490 static int submit_bio_wait(
int rw,
struct bio *bio)
2495 init_completion(&
event);
2496 bio->bi_private = &
event;
2497 bio->bi_end_io = bi_complete;
2501 return test_bit(BIO_UPTODATE, &bio->bi_flags);
2504 static int narrow_write_error(
struct r10bio *r10_bio,
int i)
2507 struct mddev *mddev = r10_bio->
mddev;
2524 int sect_to_write = r10_bio->
sectors;
2530 block_sectors = 1 << rdev->
badblocks.shift;
2531 sector = r10_bio->
sector;
2532 sectors = ((r10_bio->
sector + block_sectors)
2536 while (sect_to_write) {
2538 if (sectors > sect_to_write)
2539 sectors = sect_to_write;
2542 md_trim_bio(wbio, sector - bio->bi_sector, sectors);
2543 wbio->bi_sector = (r10_bio->
devs[
i].addr+
2544 choose_data_offset(r10_bio, rdev) +
2545 (sector - r10_bio->
sector));
2546 wbio->bi_bdev = rdev->
bdev;
2547 if (submit_bio_wait(
WRITE, wbio) == 0)
2556 sectors = block_sectors;
2561 static void handle_read_error(
struct mddev *mddev,
struct r10bio *r10_bio)
2568 unsigned long do_sync;
2584 if (mddev->
ro == 0) {
2586 fix_read_error(conf, mddev, r10_bio);
2587 unfreeze_array(conf);
2591 rdev_dec_pending(rdev, mddev);
2594 rdev = read_balance(conf, r10_bio, &max_sectors);
2597 " read error for block %llu\n",
2599 (
unsigned long long)r10_bio->
sector);
2600 raid_end_bio_io(r10_bio);
2608 "md/raid10:%s: %s: redirecting "
2609 "sector %llu to another mirror\n",
2612 (
unsigned long long)r10_bio->
sector);
2616 r10_bio->
sector - bio->bi_sector,
2620 bio->bi_sector = r10_bio->
devs[
slot].addr
2621 + choose_data_offset(r10_bio, rdev);
2622 bio->bi_bdev = rdev->
bdev;
2623 bio->bi_rw =
READ | do_sync;
2624 bio->bi_private = r10_bio;
2625 bio->bi_end_io = raid10_end_read_request;
2626 if (max_sectors < r10_bio->sectors) {
2629 int sectors_handled =
2630 r10_bio->
sector + max_sectors
2632 r10_bio->
sectors = max_sectors;
2634 if (mbio->bi_phys_segments == 0)
2635 mbio->bi_phys_segments = 2;
2637 mbio->bi_phys_segments++;
2644 r10_bio->
sectors = (mbio->bi_size >> 9)
2649 r10_bio->
mddev = mddev;
2650 r10_bio->
sector = mbio->bi_sector
2658 static void handle_write_completed(
struct r10conf *conf,
struct r10bio *r10_bio)
2671 for (m = 0; m < conf->
copies; m++) {
2672 int dev = r10_bio->
devs[
m].devnum;
2677 &r10_bio->
devs[m].bio->bi_flags)) {
2680 r10_bio->
devs[m].addr,
2685 r10_bio->
devs[m].addr,
2690 if (r10_bio->
devs[m].repl_bio ==
NULL)
2693 &r10_bio->
devs[m].repl_bio->bi_flags)) {
2696 r10_bio->
devs[m].addr,
2701 r10_bio->
devs[m].addr,
2708 for (m = 0; m < conf->
copies; m++) {
2709 int dev = r10_bio->
devs[
m].devnum;
2710 struct bio *bio = r10_bio->
devs[
m].bio;
2715 r10_bio->
devs[m].addr,
2717 rdev_dec_pending(rdev, conf->
mddev);
2718 }
else if (bio !=
NULL &&
2719 !
test_bit(BIO_UPTODATE, &bio->bi_flags)) {
2720 if (!narrow_write_error(r10_bio, m)) {
2725 rdev_dec_pending(rdev, conf->
mddev);
2727 bio = r10_bio->
devs[
m].repl_bio;
2732 r10_bio->
devs[m].addr,
2734 rdev_dec_pending(rdev, conf->
mddev);
2739 close_write(r10_bio);
2740 raid_end_bio_io(r10_bio);
2746 struct mddev *mddev = thread->
mddev;
2747 struct r10bio *r10_bio;
2748 unsigned long flags;
2758 flush_pending_writes(conf);
2761 if (list_empty(head)) {
2762 spin_unlock_irqrestore(&conf->
device_lock, flags);
2768 spin_unlock_irqrestore(&conf->
device_lock, flags);
2770 mddev = r10_bio->
mddev;
2774 handle_write_completed(conf, r10_bio);
2776 reshape_request_write(mddev, r10_bio);
2778 sync_request_write(mddev, r10_bio);
2780 recovery_request_write(mddev, r10_bio);
2782 handle_read_error(mddev, r10_bio);
2799 static int init_resync(
struct r10conf *conf)
2807 for (i = 0; i < conf->
geo.raid_disks; i++)
2808 if (conf->
mirrors[i].replacement)
2850 int *skipped,
int go_faster)
2853 struct r10bio *r10_bio;
2854 struct bio *biolist =
NULL, *bio;
2860 int chunks_skipped = 0;
2864 if (init_resync(conf))
2872 if (sector_nr >= max_sector) {
2891 else for (i = 0; i < conf->
geo.raid_disks; i++) {
2905 for (i = 0; i < conf->
geo.raid_disks; i++)
2906 if (conf->
mirrors[i].replacement)
2916 return sectors_skipped;
2920 return reshape_request(mddev, sector_nr, skipped);
2922 if (chunks_skipped >= conf->
geo.raid_disks) {
2927 return (max_sector - sector_nr) + sectors_skipped;
2936 if (conf->
geo.near_copies < conf->
geo.raid_disks &&
2937 max_sector > (sector_nr | chunk_mask))
2938 max_sector = (sector_nr | chunk_mask) + 1;
2967 for (i = 0 ; i < conf->
geo.raid_disks; i++) {
2986 sect = raid10_find_virt(conf, sector_nr, i);
2999 if (sync_blocks < max_sync)
3000 max_sync = sync_blocks;
3007 chunks_skipped = -1;
3012 raise_barrier(conf, rb2 !=
NULL);
3018 r10_bio->
mddev = mddev;
3022 raid10_find_phys(conf, r10_bio);
3027 for (j = 0; j < conf->
geo.raid_disks; j++)
3035 &sync_blocks, still_degraded);
3038 for (j=0; j<conf->
copies;j++) {
3040 int d = r10_bio->
devs[
j].devnum;
3051 sector = r10_bio->
devs[
j].addr;
3053 if (is_badblock(rdev, sector, max_sync,
3054 &first_bad, &bad_sectors)) {
3055 if (first_bad > sector)
3056 max_sync = first_bad -
sector;
3058 bad_sectors -= (sector
3060 if (max_sync > bad_sectors)
3061 max_sync = bad_sectors;
3065 bio = r10_bio->
devs[0].bio;
3066 bio->bi_next = biolist;
3068 bio->bi_private = r10_bio;
3069 bio->bi_end_io = end_sync_read;
3071 from_addr = r10_bio->
devs[
j].addr;
3073 bio->bi_bdev = rdev->
bdev;
3077 for (k=0; k<conf->
copies; k++)
3078 if (r10_bio->
devs[k].devnum == i)
3081 to_addr = r10_bio->
devs[
k].addr;
3082 r10_bio->
devs[0].devnum =
d;
3083 r10_bio->
devs[0].addr = from_addr;
3084 r10_bio->
devs[1].devnum =
i;
3085 r10_bio->
devs[1].addr = to_addr;
3087 rdev = mirror->
rdev;
3089 bio = r10_bio->
devs[1].bio;
3090 bio->bi_next = biolist;
3092 bio->bi_private = r10_bio;
3093 bio->bi_end_io = end_sync_write;
3095 bio->bi_sector = to_addr
3097 bio->bi_bdev = rdev->
bdev;
3100 r10_bio->
devs[1].bio->bi_end_io =
NULL;
3103 bio = r10_bio->
devs[1].repl_bio;
3105 bio->bi_end_io =
NULL;
3118 bio->bi_next = biolist;
3120 bio->bi_private = r10_bio;
3121 bio->bi_end_io = end_sync_write;
3124 bio->bi_bdev = rdev->
bdev;
3140 for (k = 0; k < conf->
copies; k++)
3141 if (r10_bio->
devs[k].devnum == i)
3144 &mirror->
rdev->flags)
3147 r10_bio->
devs[k].addr,
3153 r10_bio->
devs[k].addr,
3161 "working devices for recovery.\n",
3169 if (biolist ==
NULL) {
3171 struct r10bio *rb2 = r10_bio;
3190 return sync_blocks + sectors_skipped;
3192 if (sync_blocks < max_sync)
3193 max_sync = sync_blocks;
3198 raise_barrier(conf, 0);
3202 r10_bio->
sector = sector_nr;
3204 raid10_find_phys(conf, r10_bio);
3205 r10_bio->
sectors = (sector_nr | chunk_mask) - sector_nr + 1;
3207 for (i = 0; i < conf->
copies; i++) {
3208 int d = r10_bio->
devs[
i].devnum;
3212 if (r10_bio->
devs[i].repl_bio)
3213 r10_bio->
devs[
i].repl_bio->bi_end_io =
NULL;
3215 bio = r10_bio->
devs[
i].bio;
3216 bio->bi_end_io =
NULL;
3217 clear_bit(BIO_UPTODATE, &bio->bi_flags);
3221 sector = r10_bio->
devs[
i].addr;
3222 if (is_badblock(conf->
mirrors[d].rdev,
3224 &first_bad, &bad_sectors)) {
3225 if (first_bad > sector)
3226 max_sync = first_bad -
sector;
3228 bad_sectors -= (sector - first_bad);
3229 if (max_sync > bad_sectors)
3230 max_sync = bad_sectors;
3236 bio->bi_next = biolist;
3238 bio->bi_private = r10_bio;
3239 bio->bi_end_io = end_sync_read;
3241 bio->bi_sector = sector +
3242 conf->
mirrors[
d].rdev->data_offset;
3243 bio->bi_bdev = conf->
mirrors[
d].rdev->bdev;
3248 &conf->
mirrors[d].replacement->flags))
3252 bio = r10_bio->
devs[
i].repl_bio;
3253 clear_bit(BIO_UPTODATE, &bio->bi_flags);
3255 sector = r10_bio->
devs[
i].addr;
3257 bio->bi_next = biolist;
3259 bio->bi_private = r10_bio;
3260 bio->bi_end_io = end_sync_write;
3262 bio->bi_sector = sector +
3263 conf->
mirrors[
d].replacement->data_offset;
3264 bio->bi_bdev = conf->
mirrors[
d].replacement->bdev;
3269 for (i=0; i<conf->
copies; i++) {
3270 int d = r10_bio->
devs[
i].devnum;
3271 if (r10_bio->
devs[i].bio->bi_end_io)
3272 rdev_dec_pending(conf->
mirrors[d].rdev,
3274 if (r10_bio->
devs[i].repl_bio &&
3275 r10_bio->
devs[i].repl_bio->bi_end_io)
3286 for (bio = biolist; bio ; bio=bio->bi_next) {
3288 bio->bi_flags &= ~(BIO_POOL_MASK - 1);
3290 bio->bi_flags |= 1 << BIO_UPTODATE;
3293 bio->bi_phys_segments = 0;
3298 if (sector_nr + max_sync < max_sector)
3299 max_sector = sector_nr + max_sync;
3303 if (sector_nr + (len>>9) > max_sector)
3304 len = (max_sector - sector_nr) << 9;
3307 for (bio= biolist ; bio ; bio=bio->bi_next) {
3309 page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
3314 bio->bi_io_vec[bio->bi_vcnt].bv_page =
page;
3315 for (bio2 = biolist;
3316 bio2 && bio2 != bio;
3317 bio2 = bio2->bi_next) {
3320 bio2->bi_size -= len;
3321 bio2->bi_flags &= ~(1<< BIO_SEG_VALID);
3325 nr_sectors += len>>9;
3326 sector_nr += len>>9;
3333 biolist = biolist->bi_next;
3335 bio->bi_next =
NULL;
3336 r10_bio = bio->bi_private;
3339 if (bio->bi_end_io == end_sync_read) {
3340 md_sync_acct(bio->bi_bdev, nr_sectors);
3345 if (sectors_skipped)
3357 if (sector_nr + max_sync < max_sector)
3358 max_sector = sector_nr + max_sync;
3360 sectors_skipped += (max_sector - sector_nr);
3362 sector_nr = max_sector;
3367 raid10_size(
struct mddev *mddev,
sector_t sectors,
int raid_disks)
3373 raid_disks =
min(conf->
geo.raid_disks,
3374 conf->
prev.raid_disks);
3378 size = sectors >> conf->
geo.chunk_shift;
3380 size = size * raid_disks;
3383 return size << conf->
geo.chunk_shift;
3393 size = size >> conf->
geo.chunk_shift;
3395 size = size * conf->
geo.raid_disks;
3399 size = size * conf->
copies;
3408 if (conf->
geo.far_offset)
3409 conf->
geo.stride = 1 << conf->
geo.chunk_shift;
3412 conf->
geo.stride = size << conf->
geo.chunk_shift;
3417 static int setup_geo(
struct geom *geo,
struct mddev *mddev,
enum geo_type new)
3446 fc = (layout >> 8) & 255;
3447 fo = layout & (1<<16);
3448 geo->raid_disks = disks;
3449 geo->near_copies = nc;
3450 geo->far_copies =
fc;
3451 geo->far_offset = fo;
3452 geo->chunk_mask = chunk - 1;
3453 geo->chunk_shift =
ffz(~chunk);
3457 static struct r10conf *setup_conf(
struct mddev *mddev)
3464 copies = setup_geo(&geo, mddev,
geo_new);
3468 "at least PAGE_SIZE(%ld) and be a power of 2.\n",
3473 if (copies < 2 || copies > mddev->
raid_disks) {
3474 printk(
KERN_ERR "md/raid10:%s: unsupported raid10 layout: 0x%8x\n",
3498 r10bio_pool_free, conf);
3512 if (conf->
prev.far_offset)
3513 conf->
prev.stride = 1 << conf->
prev.chunk_shift;
3528 conf->
mddev = mddev;
3542 return ERR_PTR(err);
3545 static int run(
struct mddev *mddev)
3548 int i, disk_idx, chunk_size;
3554 bool discard_supported =
false;
3557 conf = setup_conf(mddev);
3559 return PTR_ERR(conf);
3574 if (conf->
geo.raid_disks % conf->
geo.near_copies)
3578 (conf->
geo.raid_disks / conf->
geo.near_copies));
3588 if (disk_idx >= conf->
geo.raid_disks &&
3589 disk_idx >= conf->
prev.raid_disks)
3591 disk = conf->
mirrors + disk_idx;
3602 q = bdev_get_queue(rdev->
bdev);
3603 if (q->merge_bvec_fn)
3610 if (first || diff < min_offset_diff)
3611 min_offset_diff = diff;
3619 if (blk_queue_discard(bdev_get_queue(rdev->
bdev)))
3620 discard_supported =
true;
3624 if (discard_supported)
3625 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
3628 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
3632 if (!enough(conf, -1)) {
3633 printk(
KERN_ERR "md/raid10:%s: not enough operational mirrors.\n",
3640 if (conf->
geo.far_copies != 1 &&
3641 conf->
geo.far_offset == 0)
3643 if (conf->
prev.far_copies != 1 &&
3644 conf->
geo.far_offset == 0)
3650 i < conf->
geo.raid_disks
3651 || i < conf->
prev.raid_disks;
3675 " -- starting background reconstruction\n",
3678 "md/raid10:%s: active with %d out of %d devices\n",
3679 mdname(mddev), conf->
geo.raid_disks - mddev->
degraded,
3680 conf->
geo.raid_disks);
3685 size = raid10_size(mddev, 0, 0);
3690 int stripe = conf->
geo.raid_disks *
3692 mddev->
queue->backing_dev_info.congested_fn = raid10_congested;
3693 mddev->
queue->backing_dev_info.congested_data = mddev;
3699 stripe /= conf->
geo.near_copies;
3700 if (mddev->
queue->backing_dev_info.ra_pages < 2 * stripe)
3701 mddev->
queue->backing_dev_info.ra_pages = 2 *
stripe;
3710 unsigned long before_length, after_length;
3712 before_length = ((1 << conf->
prev.chunk_shift) *
3713 conf->
prev.far_copies);
3714 after_length = ((1 << conf->
geo.chunk_shift) *
3715 conf->
geo.far_copies);
3717 if (
max(before_length, after_length) > min_offset_diff) {
3719 printk(
"md/raid10: offset difference not enough to continue reshape\n");
3747 static int stop(
struct mddev *mddev)
3751 raise_barrier(conf, 0);
3752 lower_barrier(conf);
3767 static void raid10_quiesce(
struct mddev *mddev,
int state)
3773 raise_barrier(conf, 0);
3776 lower_barrier(conf);
3781 static int raid10_resize(
struct mddev *mddev,
sector_t sectors)
3801 if (conf->
geo.far_copies > 1 && !conf->
geo.far_offset)
3804 oldsize = raid10_size(mddev, 0, 0);
3805 size = raid10_size(mddev, sectors, 0);
3822 calc_sectors(conf, sectors);
3828 static void *raid10_takeover_raid0(
struct mddev *mddev)
3849 conf = setup_conf(mddev);
3850 if (!IS_ERR(conf)) {
3852 if (rdev->raid_disk >= 0)
3853 rdev->new_raid_disk = rdev->raid_disk * 2;
3860 static
void *raid10_takeover(
struct mddev *mddev)
3862 struct r0conf *raid0_conf;
3867 if (mddev->level == 0) {
3869 raid0_conf = mddev->private;
3872 " with more than one zone.\n",
3876 return raid10_takeover_raid0(mddev);
3881 static int raid10_check_reshape(
struct mddev *mddev)
3900 if (conf->
geo.far_copies != 1 && !conf->
geo.far_offset)
3906 if (geo.far_copies > 1 && !geo.far_offset)
3914 if (!enough(conf, -1))
3945 static int calc_degraded(
struct r10conf *conf)
3947 int degraded, degraded2;
3953 for (i = 0; i < conf->
prev.raid_disks; i++) {
3965 if (conf->
geo.raid_disks == conf->
prev.raid_disks)
3969 for (i = 0; i < conf->
geo.raid_disks; i++) {
3979 if (conf->
geo.raid_disks <= conf->
prev.raid_disks)
3984 if (degraded2 > degraded)
3989 static int raid10_start_reshape(
struct mddev *mddev)
4001 unsigned long before_length, after_length;
4016 before_length = ((1 << conf->
prev.chunk_shift) *
4017 conf->
prev.far_copies);
4018 after_length = ((1 << conf->
geo.chunk_shift) *
4019 conf->
geo.far_copies);
4032 if (first || diff < min_offset_diff)
4033 min_offset_diff = diff;
4037 if (
max(before_length, after_length) > min_offset_diff)
4040 if (spares < mddev->delta_disks)
4057 sector_t size = raid10_size(mddev, 0, 0);
4058 if (size < mddev->array_sectors) {
4060 printk(
KERN_ERR "md/raid10:%s: array size must be reduce before number of disks\n",
4072 raid10_size(mddev, 0,
4073 conf->
geo.raid_disks),
4080 if (rdev->raid_disk < 0 &&
4082 if (raid10_add_disk(mddev, rdev) == 0) {
4084 conf->
prev.raid_disks)
4089 if (sysfs_link_rdev(mddev, rdev))
4103 mddev->
degraded = calc_degraded(conf);
4134 mddev->reshape_position = MaxSector;
4135 spin_unlock_irq(&conf->device_lock);
4147 s = (s | geo->chunk_mask) + 1;
4148 s >>= geo->chunk_shift;
4149 s *= geo->near_copies;
4151 s *= geo->far_copies;
4152 s <<= geo->chunk_shift;
4162 s >>= geo->chunk_shift;
4163 s *= geo->near_copies;
4165 s *= geo->far_copies;
4166 s <<= geo->chunk_shift;
4211 struct r10bio *r10_bio;
4219 struct bio *bio, *read_bio;
4220 int sectors_done = 0;
4222 if (sector_nr == 0) {
4226 sector_nr = (raid10_size(mddev, 0, 0)
4260 sector_nr = last & ~(
sector_t)(conf->
geo.chunk_mask
4261 & conf->
prev.chunk_mask);
4282 last = sector_nr | (conf->
geo.chunk_mask
4283 & conf->
prev.chunk_mask);
4305 allow_barrier(conf);
4311 raise_barrier(conf, sectors_done != 0);
4313 r10_bio->
mddev = mddev;
4314 r10_bio->
sector = sector_nr;
4316 r10_bio->
sectors = last - sector_nr + 1;
4317 rdev = read_balance(conf, r10_bio, &max_sectors);
4326 return sectors_done;
4331 read_bio->bi_bdev = rdev->
bdev;
4332 read_bio->bi_sector = (r10_bio->
devs[r10_bio->
read_slot].addr
4334 read_bio->bi_private = r10_bio;
4335 read_bio->bi_end_io = end_sync_read;
4336 read_bio->bi_rw =
READ;
4337 read_bio->bi_flags &= ~(BIO_POOL_MASK - 1);
4338 read_bio->bi_flags |= 1 << BIO_UPTODATE;
4339 read_bio->bi_vcnt = 0;
4340 read_bio->bi_idx = 0;
4341 read_bio->bi_size = 0;
4346 __raid10_find_phys(&conf->
geo, r10_bio);
4349 read_bio->bi_next =
NULL;
4351 for (s = 0; s < conf->
copies*2; s++) {
4353 int d = r10_bio->
devs[s/2].devnum;
4356 rdev2 = conf->
mirrors[
d].replacement;
4357 b = r10_bio->
devs[s/2].repl_bio;
4360 b = r10_bio->
devs[s/2].bio;
4364 b->bi_bdev = rdev2->
bdev;
4366 b->bi_private = r10_bio;
4367 b->bi_end_io = end_reshape_write;
4369 b->bi_flags &= ~(BIO_POOL_MASK - 1);
4370 b->bi_flags |= 1 << BIO_UPTODATE;
4382 struct page *page = r10_bio->
devs[0].bio->bi_io_vec[s/(
PAGE_SIZE>>9)].bv_page;
4383 int len = (max_sectors -
s) << 9;
4386 for (bio = blist; bio ; bio = bio->bi_next) {
4393 bio2 && bio2 != bio;
4394 bio2 = bio2->bi_next) {
4397 bio2->bi_size -= len;
4398 bio2->bi_flags &= ~(1<<BIO_SEG_VALID);
4402 sector_nr += len >> 9;
4403 nr_sectors += len >> 9;
4409 md_sync_acct(read_bio->bi_bdev, r10_bio->
sectors);
4411 read_bio->bi_next =
NULL;
4415 if (sector_nr <= last)
4426 return sectors_done;
4429 static void end_reshape_request(
struct r10bio *r10_bio);
4430 static int handle_reshape_read_error(
struct mddev *mddev,
4431 struct r10bio *r10_bio);
4432 static void reshape_request_write(
struct mddev *mddev,
struct r10bio *r10_bio)
4443 if (handle_reshape_read_error(mddev, r10_bio) < 0) {
4453 for (s = 0; s < conf->
copies*2; s++) {
4455 int d = r10_bio->
devs[s/2].devnum;
4458 rdev = conf->
mirrors[
d].replacement;
4459 b = r10_bio->
devs[s/2].repl_bio;
4462 b = r10_bio->
devs[s/2].bio;
4467 md_sync_acct(b->bi_bdev, r10_bio->
sectors);
4472 end_reshape_request(r10_bio);
4475 static void end_reshape(
struct r10conf *conf)
4490 if (conf->
mddev->queue) {
4491 int stripe = conf->
geo.raid_disks *
4493 stripe /= conf->
geo.near_copies;
4494 if (conf->
mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
4495 conf->
mddev->queue->backing_dev_info.ra_pages = 2 *
stripe;
4501 static int handle_reshape_read_error(
struct mddev *mddev,
4502 struct r10bio *r10_bio)
4505 int sectors = r10_bio->
sectors;
4508 struct r10bio r10_bio;
4509 struct r10dev devs[conf->copies];
4511 struct r10bio *r10b = &on_stack.r10_bio;
4514 struct bio_vec *bvec = r10_bio->
master_bio->bi_io_vec;
4517 __raid10_find_phys(&conf->
prev, r10b);
4546 if (slot >= conf->
copies)
4548 if (slot == first_slot)
4563 static void end_reshape_write(
struct bio *bio,
int error)
4565 int uptodate =
test_bit(BIO_UPTODATE, &bio->bi_flags);
4566 struct r10bio *r10_bio = bio->bi_private;
4567 struct mddev *mddev = r10_bio->
mddev;
4574 d = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
4576 rdev = conf->
mirrors[
d].replacement;
4587 rdev_dec_pending(rdev, mddev);
4588 end_reshape_request(r10_bio);
4591 static void end_reshape_request(
struct r10bio *r10_bio)
4600 static void raid10_finish_reshape(
struct mddev *mddev)
4608 sector_t size = raid10_size(mddev, 0, 0);
4619 for (d = conf->
geo.raid_disks ;
4625 rdev = conf->
mirrors[
d].replacement;
4646 .error_handler =
error,
4647 .hot_add_disk = raid10_add_disk,
4648 .hot_remove_disk= raid10_remove_disk,
4649 .spare_active = raid10_spare_active,
4651 .quiesce = raid10_quiesce,
4652 .size = raid10_size,
4653 .resize = raid10_resize,
4654 .takeover = raid10_takeover,
4655 .check_reshape = raid10_check_reshape,
4656 .start_reshape = raid10_start_reshape,
4657 .finish_reshape = raid10_finish_reshape,
4660 static int __init raid_init(
void)
4665 static void raid_exit(
void)