34 #include <linux/slab.h>
37 #include <linux/module.h>
47 #define NR_RAID1_BIOS 256
54 #define IO_BLOCKED ((struct bio *)1)
59 #define IO_MADE_GOOD ((struct bio *)2)
61 #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
67 static int max_queued_requests = 1024;
69 static void allow_barrier(
struct r1conf *conf);
70 static void lower_barrier(
struct r1conf *conf);
72 static void * r1bio_pool_alloc(
gfp_t gfp_flags,
void *
data)
78 return kzalloc(size, gfp_flags);
81 static void r1bio_pool_free(
void *r1_bio,
void *
data)
86 #define RESYNC_BLOCK_SIZE (64*1024)
88 #define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9)
89 #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
90 #define RESYNC_WINDOW (2048*1024)
92 static void * r1buf_pool_alloc(
gfp_t gfp_flags,
void *
data)
100 r1_bio = r1bio_pool_alloc(gfp_flags, pi);
111 r1_bio->
bios[
j] = bio;
124 bio = r1_bio->
bios[
j];
130 bio->bi_io_vec[
i].bv_page =
page;
138 r1_bio->
bios[j]->bi_io_vec[i].bv_page =
139 r1_bio->
bios[0]->bi_io_vec[i].bv_page;
148 for (i=0; i < r1_bio->
bios[
j]->bi_vcnt ; i++)
152 while (++j < pi->raid_disks)
154 r1bio_pool_free(r1_bio, data);
158 static void r1buf_pool_free(
void *__r1_bio,
void *
data)
167 r1bio->
bios[j]->bi_io_vec[i].bv_page !=
168 r1bio->
bios[0]->bi_io_vec[i].bv_page)
169 safe_put_page(r1bio->
bios[j]->bi_io_vec[i].bv_page);
174 r1bio_pool_free(r1bio, data);
177 static void put_all_bios(
struct r1conf *conf,
struct r1bio *r1_bio)
182 struct bio **bio = r1_bio->
bios +
i;
189 static void free_r1bio(
struct r1bio *r1_bio)
193 put_all_bios(conf, r1_bio);
197 static void put_buf(
struct r1bio *r1_bio)
203 struct bio *bio = r1_bio->
bios[
i];
213 static void reschedule_retry(
struct r1bio *r1_bio)
233 static void call_bio_endio(
struct r1bio *r1_bio)
239 if (bio->bi_phys_segments) {
242 bio->bi_phys_segments--;
243 done = (bio->bi_phys_segments == 0);
260 static void raid_end_bio_io(
struct r1bio *r1_bio)
266 pr_debug(
"raid1: sync end %s on sectors %llu-%llu\n",
267 (bio_data_dir(bio) ==
WRITE) ?
"write" :
"read",
268 (
unsigned long long) bio->bi_sector,
269 (
unsigned long long) bio->bi_sector +
270 (bio->bi_size >> 9) - 1);
272 call_bio_endio(r1_bio);
280 static inline void update_head_pos(
int disk,
struct r1bio *r1_bio)
284 conf->
mirrors[disk].head_position =
291 static int find_bio_disk(
struct r1bio *r1_bio,
struct bio *bio)
297 for (mirror = 0; mirror < raid_disks * 2; mirror++)
298 if (r1_bio->
bios[mirror] == bio)
301 BUG_ON(mirror == raid_disks * 2);
302 update_head_pos(mirror, r1_bio);
307 static void raid1_end_read_request(
struct bio *bio,
int error)
309 int uptodate =
test_bit(BIO_UPTODATE, &bio->bi_flags);
310 struct r1bio *r1_bio = bio->bi_private;
318 update_head_pos(mirror, r1_bio);
337 raid_end_bio_io(r1_bio);
338 rdev_dec_pending(conf->
mirrors[mirror].rdev, conf->
mddev);
346 "rescheduling sector %llu\n",
350 (
unsigned long long)r1_bio->
sector);
352 reschedule_retry(r1_bio);
357 static void close_write(
struct r1bio *r1_bio)
376 static void r1_bio_write_done(
struct r1bio *r1_bio)
382 reschedule_retry(r1_bio);
386 reschedule_retry(r1_bio);
388 raid_end_bio_io(r1_bio);
392 static void raid1_end_write_request(
struct bio *bio,
int error)
394 int uptodate =
test_bit(BIO_UPTODATE, &bio->bi_flags);
395 struct r1bio *r1_bio = bio->bi_private;
398 struct bio *to_put =
NULL;
400 mirror = find_bio_disk(r1_bio, bio);
407 &conf->
mirrors[mirror].rdev->flags);
409 &conf->
mirrors[mirror].rdev->flags))
411 conf->
mddev->recovery);
433 if (is_badblock(conf->
mirrors[mirror].rdev,
435 &first_bad, &bad_sectors)) {
457 pr_debug(
"raid1: behind end write sectors"
459 (
unsigned long long) mbio->bi_sector,
460 (
unsigned long long) mbio->bi_sector +
461 (mbio->bi_size >> 9) - 1);
462 call_bio_endio(r1_bio);
467 rdev_dec_pending(conf->
mirrors[mirror].rdev,
474 r1_bio_write_done(r1_bio);
495 static int read_balance(
struct r1conf *conf,
struct r1bio *r1_bio,
int *max_sectors)
499 int best_good_sectors;
500 int best_disk, best_dist_disk, best_pending_disk;
504 unsigned int min_pending;
507 int choose_next_idle;
520 best_pending_disk = -1;
522 best_good_sectors = 0;
524 choose_next_idle = 0;
532 for (disk = 0 ; disk < conf->
raid_disks * 2 ; disk++) {
536 unsigned int pending;
552 if (is_badblock(rdev, this_sector, sectors,
553 &first_bad, &bad_sectors)) {
554 if (first_bad < this_sector)
557 best_good_sectors = first_bad - this_sector;
567 if (is_badblock(rdev, this_sector, sectors,
568 &first_bad, &bad_sectors)) {
572 if (first_bad <= this_sector) {
577 bad_sectors -= (this_sector - first_bad);
578 if (choose_first && sectors > bad_sectors)
579 sectors = bad_sectors;
580 if (best_good_sectors > sectors)
584 sector_t good_sectors = first_bad - this_sector;
585 if (good_sectors > best_good_sectors) {
586 best_good_sectors = good_sectors;
596 nonrot = blk_queue_nonrot(bdev_get_queue(rdev->
bdev));
597 has_nonrot_disk |= nonrot;
599 dist =
abs(this_sector - conf->
mirrors[disk].head_position);
605 if (conf->
mirrors[disk].next_seq_sect == this_sector
607 int opt_iosize = bdev_io_opt(rdev->
bdev) >> 9;
624 if (nonrot && opt_iosize > 0 &&
629 choose_next_idle = 1;
640 if (choose_next_idle)
643 if (min_pending > pending) {
644 min_pending = pending;
645 best_pending_disk = disk;
648 if (dist < best_dist) {
650 best_dist_disk = disk;
660 if (best_disk == -1) {
662 best_disk = best_pending_disk;
664 best_disk = best_dist_disk;
667 if (best_disk >= 0) {
676 rdev_dec_pending(rdev, conf->
mddev);
679 sectors = best_good_sectors;
681 if (conf->
mirrors[best_disk].next_seq_sect != this_sector)
682 conf->
mirrors[best_disk].seq_start = this_sector;
693 struct bvec_merge_data *bvm,
694 struct bio_vec *biovec)
699 int max = biovec->bv_len;
704 for (disk = 0; disk < conf->
raid_disks * 2; disk++) {
709 bdev_get_queue(rdev->
bdev);
710 if (q->merge_bvec_fn) {
711 bvm->bi_sector = sector +
713 bvm->bi_bdev = rdev->
bdev;
714 max =
min(max, q->merge_bvec_fn(
746 ret |= bdi_congested(&q->backing_dev_info, bits);
748 ret &= bdi_congested(&q->backing_dev_info, bits);
756 static int raid1_congested(
void *data,
int bits)
758 struct mddev *mddev =
data;
764 static void flush_pending_writes(
struct r1conf *conf)
782 struct bio *
next = bio->bi_next;
785 !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
817 #define RESYNC_DEPTH 32
819 static void raise_barrier(
struct r1conf *conf)
838 static void lower_barrier(
struct r1conf *conf)
848 static void wait_barrier(
struct r1conf *conf)
866 !bio_list_empty(
current->bio_list)),
875 static void allow_barrier(
struct r1conf *conf)
884 static void freeze_array(
struct r1conf *conf)
904 flush_pending_writes(conf));
907 static void unfreeze_array(
struct r1conf *conf)
920 static void alloc_behind_pages(
struct bio *bio,
struct r1bio *r1_bio)
923 struct bio_vec *bvec;
924 struct bio_vec *bvecs = kzalloc(bio->bi_vcnt *
sizeof(
struct bio_vec),
929 bio_for_each_segment(bvec, bio, i) {
934 memcpy(
kmap(bvecs[i].bv_page) + bvec->bv_offset,
935 kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
945 for (i = 0; i < bio->bi_vcnt; i++)
946 if (bvecs[i].bv_page)
949 pr_debug(
"%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
953 struct blk_plug_cb
cb;
958 static void raid1_unplug(
struct blk_plug_cb *
cb,
bool from_schedule)
962 struct mddev *mddev = plug->
cb.data;
966 if (from_schedule ||
current->bio_list) {
977 bio = bio_list_get(&plug->
pending);
982 struct bio *next = bio->bi_next;
990 static void make_request(
struct mddev *mddev,
struct bio * bio)
994 struct r1bio *r1_bio;
995 struct bio *read_bio;
999 const int rw = bio_data_dir(bio);
1000 const unsigned long do_sync = (bio->bi_rw &
REQ_SYNC);
1002 const unsigned long do_discard = (bio->bi_rw
1005 struct blk_plug_cb *cb;
1008 int sectors_handled;
1019 if (bio_data_dir(bio) ==
WRITE &&
1020 bio->bi_sector + bio->bi_size/512 > mddev->
suspend_lo &&
1031 if (bio->bi_sector + bio->bi_size/512 <= mddev->
suspend_lo ||
1051 r1_bio->
sectors = bio->bi_size >> 9;
1053 r1_bio->
mddev = mddev;
1054 r1_bio->
sector = bio->bi_sector;
1063 bio->bi_phys_segments = 0;
1064 clear_bit(BIO_SEG_VALID, &bio->bi_flags);
1073 rdisk = read_balance(conf, r1_bio, &max_sectors);
1077 raid_end_bio_io(r1_bio);
1080 mirror = conf->
mirrors + rdisk;
1097 r1_bio->
bios[rdisk] = read_bio;
1099 read_bio->bi_sector = r1_bio->
sector + mirror->
rdev->data_offset;
1100 read_bio->bi_bdev = mirror->
rdev->bdev;
1101 read_bio->bi_end_io = raid1_end_read_request;
1102 read_bio->bi_rw =
READ | do_sync;
1103 read_bio->bi_private = r1_bio;
1105 if (max_sectors < r1_bio->sectors) {
1110 sectors_handled = (r1_bio->
sector + max_sectors
1112 r1_bio->
sectors = max_sectors;
1114 if (bio->bi_phys_segments == 0)
1115 bio->bi_phys_segments = 2;
1117 bio->bi_phys_segments++;
1124 reschedule_retry(r1_bio);
1129 r1_bio->
sectors = (bio->bi_size >> 9) - sectors_handled;
1131 r1_bio->
mddev = mddev;
1132 r1_bio->
sector = bio->bi_sector + sectors_handled;
1160 blocked_rdev =
NULL;
1162 max_sectors = r1_bio->
sectors;
1163 for (i = 0; i < disks; i++) {
1167 blocked_rdev =
rdev;
1173 if (i < conf->raid_disks)
1184 is_bad = is_badblock(rdev, r1_bio->
sector,
1186 &first_bad, &bad_sectors);
1191 blocked_rdev =
rdev;
1194 if (is_bad && first_bad <= r1_bio->sector) {
1196 bad_sectors -= (r1_bio->
sector - first_bad);
1197 if (bad_sectors < max_sectors)
1201 max_sectors = bad_sectors;
1202 rdev_dec_pending(rdev, mddev);
1216 int good_sectors = first_bad - r1_bio->
sector;
1217 if (good_sectors < max_sectors)
1218 max_sectors = good_sectors;
1221 r1_bio->
bios[
i] = bio;
1229 for (j = 0; j <
i; j++)
1230 if (r1_bio->
bios[j])
1231 rdev_dec_pending(conf->
mirrors[j].rdev, mddev);
1233 allow_barrier(conf);
1239 if (max_sectors < r1_bio->sectors) {
1243 r1_bio->
sectors = max_sectors;
1245 if (bio->bi_phys_segments == 0)
1246 bio->bi_phys_segments = 2;
1248 bio->bi_phys_segments++;
1251 sectors_handled = r1_bio->
sector + max_sectors - bio->bi_sector;
1257 for (i = 0; i < disks; i++) {
1259 if (!r1_bio->
bios[i])
1273 !waitqueue_active(&bitmap->behind_wait))
1274 alloc_behind_pages(mbio, r1_bio);
1283 struct bio_vec *bvec;
1293 __bio_for_each_segment(bvec, mbio, j, 0)
1294 bvec->bv_page = r1_bio->behind_bvecs[j].bv_page;
1299 r1_bio->
bios[i] = mbio;
1301 mbio->bi_sector = (r1_bio->sector +
1303 mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
1304 mbio->bi_end_io = raid1_end_write_request;
1305 mbio->bi_rw =
WRITE | do_flush_fua | do_sync | do_discard;
1306 mbio->bi_private = r1_bio;
1317 bio_list_add(&plug->pending, mbio);
1318 plug->pending_cnt++;
1323 spin_unlock_irqrestore(&conf->
device_lock, flags);
1330 if (sectors_handled < (bio->bi_size >> 9)) {
1331 r1_bio_write_done(r1_bio);
1337 r1_bio->
sectors = (bio->bi_size >> 9) - sectors_handled;
1339 r1_bio->
mddev = mddev;
1340 r1_bio->
sector = bio->bi_sector + sectors_handled;
1344 r1_bio_write_done(r1_bio);
1368 static void error(
struct mddev *mddev,
struct md_rdev *rdev)
1392 unsigned long flags;
1396 spin_unlock_irqrestore(&conf->
device_lock, flags);
1405 "md/raid1:%s: Disk failure on %s, disabling device.\n"
1406 "md/raid1:%s: Operation continuing on %d devices.\n",
1411 static void print_conf(
struct r1conf *conf)
1436 static void close_sync(
struct r1conf *conf)
1439 allow_barrier(conf);
1445 static int raid1_spare_active(
struct mddev *mddev)
1450 unsigned long flags;
1474 sysfs_notify_dirent_safe(
1487 spin_unlock_irqrestore(&conf->
device_lock, flags);
1494 static int raid1_add_disk(
struct mddev *mddev,
struct md_rdev *rdev)
1510 if (q->merge_bvec_fn) {
1515 for (mirror = first; mirror <= last; mirror++) {
1554 raise_barrier(conf);
1555 lower_barrier(conf);
1559 if (blk_queue_discard(bdev_get_queue(rdev->
bdev)))
1560 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->
queue);
1565 static int raid1_remove_disk(
struct mddev *mddev,
struct md_rdev *rdev)
1572 if (rdev != p->
rdev)
1576 if (rdev == p->
rdev) {
1605 raise_barrier(conf);
1609 lower_barrier(conf);
1622 static void end_sync_read(
struct bio *bio,
int error)
1624 struct r1bio *r1_bio = bio->bi_private;
1626 update_head_pos(r1_bio->
read_disk, r1_bio);
1633 if (
test_bit(BIO_UPTODATE, &bio->bi_flags))
1637 reschedule_retry(r1_bio);
1640 static void end_sync_write(
struct bio *bio,
int error)
1642 int uptodate =
test_bit(BIO_UPTODATE, &bio->bi_flags);
1643 struct r1bio *r1_bio = bio->bi_private;
1644 struct mddev *mddev = r1_bio->
mddev;
1650 mirror = find_bio_disk(r1_bio, bio);
1655 long sectors_to_go = r1_bio->
sectors;
1661 sectors_to_go -= sync_blocks;
1662 }
while (sectors_to_go > 0);
1664 &conf->
mirrors[mirror].rdev->flags);
1666 &conf->
mirrors[mirror].rdev->flags))
1670 }
else if (is_badblock(conf->
mirrors[mirror].rdev,
1673 &first_bad, &bad_sectors) &&
1677 &first_bad, &bad_sectors)
1685 reschedule_retry(r1_bio);
1694 int sectors,
struct page *
page,
int rw)
1696 if (
sync_page_io(rdev, sector, sectors << 9, page, rw,
false))
1704 rdev->
mddev->recovery);
1712 static int fix_sync_read_error(
struct r1bio *r1_bio)
1725 struct mddev *mddev = r1_bio->
mddev;
1729 int sectors = r1_bio->
sectors;
1742 if (r1_bio->
bios[d]->bi_end_io == end_sync_read) {
1749 bio->bi_io_vec[idx].bv_page,
1758 }
while (!success && d != r1_bio->
read_disk);
1769 " for block %llu\n",
1772 (
unsigned long long)r1_bio->
sector);
1801 if (r1_bio->
bios[d]->bi_end_io != end_sync_read)
1804 if (r1_sync_page_io(rdev, sect, s,
1805 bio->bi_io_vec[idx].bv_page,
1808 rdev_dec_pending(rdev, mddev);
1816 if (r1_bio->
bios[d]->bi_end_io != end_sync_read)
1819 if (r1_sync_page_io(rdev, sect, s,
1820 bio->bi_io_vec[idx].bv_page,
1829 set_bit(BIO_UPTODATE, &bio->bi_flags);
1833 static int process_checks(
struct r1bio *r1_bio)
1842 struct mddev *mddev = r1_bio->
mddev;
1848 for (primary = 0; primary < conf->
raid_disks * 2; primary++)
1849 if (r1_bio->
bios[primary]->bi_end_io == end_sync_read &&
1850 test_bit(BIO_UPTODATE, &r1_bio->
bios[primary]->bi_flags)) {
1852 rdev_dec_pending(conf->
mirrors[primary].rdev, mddev);
1860 struct bio *sbio = r1_bio->
bios[
i];
1863 if (r1_bio->
bios[i]->bi_end_io != end_sync_read)
1866 if (
test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
1867 for (j = vcnt; j-- ; ) {
1869 p = pbio->bi_io_vec[
j].bv_page;
1870 s = sbio->bi_io_vec[
j].bv_page;
1873 sbio->bi_io_vec[j].bv_len))
1881 &&
test_bit(BIO_UPTODATE, &sbio->bi_flags))) {
1883 sbio->bi_end_io =
NULL;
1884 rdev_dec_pending(conf->
mirrors[i].rdev, mddev);
1888 sbio->bi_vcnt = vcnt;
1889 sbio->bi_size = r1_bio->
sectors << 9;
1891 sbio->bi_phys_segments = 0;
1892 sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
1893 sbio->bi_flags |= 1 << BIO_UPTODATE;
1894 sbio->bi_next =
NULL;
1895 sbio->bi_sector = r1_bio->
sector +
1896 conf->
mirrors[
i].rdev->data_offset;
1897 sbio->bi_bdev = conf->
mirrors[
i].rdev->bdev;
1898 size = sbio->bi_size;
1899 for (j = 0; j < vcnt ; j++) {
1901 bi = &sbio->bi_io_vec[
j];
1916 static void sync_request_write(
struct mddev *mddev,
struct r1bio *r1_bio)
1921 struct bio *bio, *wbio;
1927 if (!fix_sync_read_error(r1_bio))
1931 if (process_checks(r1_bio) < 0)
1937 for (i = 0; i < disks ; i++) {
1938 wbio = r1_bio->
bios[
i];
1939 if (wbio->bi_end_io ==
NULL ||
1940 (wbio->bi_end_io == end_sync_read &&
1945 wbio->bi_rw =
WRITE;
1946 wbio->bi_end_io = end_sync_write;
1948 md_sync_acct(conf->
mirrors[i].rdev->bdev, wbio->bi_size >> 9);
1958 reschedule_retry(r1_bio);
1974 static void fix_read_error(
struct r1conf *conf,
int read_disk,
1977 struct mddev *mddev = conf->
mddev;
2002 is_badblock(rdev, sect, s,
2003 &first_bad, &bad_sectors) == 0 &&
2012 }
while (!success && d != read_disk);
2023 while (d != read_disk) {
2030 r1_sync_page_io(rdev, sect, s,
2034 while (d != read_disk) {
2042 if (r1_sync_page_io(rdev, sect, s,
2046 "md/raid1:%s: read error corrected "
2047 "(%d sectors at %llu on %s)\n",
2049 (
unsigned long long)(sect +
2060 static void bi_complete(
struct bio *bio,
int error)
2065 static int submit_bio_wait(
int rw,
struct bio *bio)
2070 init_completion(&
event);
2071 bio->bi_private = &
event;
2072 bio->bi_end_io = bi_complete;
2076 return test_bit(BIO_UPTODATE, &bio->bi_flags);
2079 static int narrow_write_error(
struct r1bio *r1_bio,
int i)
2081 struct mddev *mddev = r1_bio->
mddev;
2085 struct bio_vec *vec;
2101 int sect_to_write = r1_bio->
sectors;
2107 block_sectors = 1 << rdev->
badblocks.shift;
2109 sectors = ((sector + block_sectors)
2117 while (vec[idx].bv_page ==
NULL)
2124 while (sect_to_write) {
2126 if (sectors > sect_to_write)
2127 sectors = sect_to_write;
2131 memcpy(wbio->bi_io_vec, vec, vcnt *
sizeof(
struct bio_vec));
2132 wbio->bi_sector = r1_bio->
sector;
2133 wbio->bi_rw =
WRITE;
2134 wbio->bi_vcnt = vcnt;
2135 wbio->bi_size = r1_bio->
sectors << 9;
2140 wbio->bi_bdev = rdev->
bdev;
2141 if (submit_bio_wait(
WRITE, wbio) == 0)
2150 sectors = block_sectors;
2155 static void handle_sync_write_finished(
struct r1conf *conf,
struct r1bio *r1_bio)
2159 for (m = 0; m < conf->
raid_disks * 2 ; m++) {
2161 struct bio *bio = r1_bio->
bios[
m];
2162 if (bio->bi_end_io ==
NULL)
2164 if (
test_bit(BIO_UPTODATE, &bio->bi_flags) &&
2168 if (!
test_bit(BIO_UPTODATE, &bio->bi_flags) &&
2178 static void handle_write_finished(
struct r1conf *conf,
struct r1bio *r1_bio)
2187 rdev_dec_pending(rdev, conf->
mddev);
2188 }
else if (r1_bio->
bios[m] !=
NULL) {
2193 if (!narrow_write_error(r1_bio, m)) {
2199 rdev_dec_pending(conf->
mirrors[m].rdev,
2203 close_write(r1_bio);
2204 raid_end_bio_io(r1_bio);
2207 static void handle_read_error(
struct r1conf *conf,
struct r1bio *r1_bio)
2211 struct mddev *mddev = conf->
mddev;
2225 if (mddev->
ro == 0) {
2229 unfreeze_array(conf);
2237 disk = read_balance(conf, r1_bio, &max_sectors);
2240 " read error for block %llu\n",
2241 mdname(mddev), b, (
unsigned long long)r1_bio->
sector);
2242 raid_end_bio_io(r1_bio);
2244 const unsigned long do_sync
2255 rdev = conf->
mirrors[disk].rdev;
2257 "md/raid1:%s: redirecting sector %llu"
2258 " to other mirror: %s\n",
2260 (
unsigned long long)r1_bio->
sector,
2263 bio->bi_bdev = rdev->
bdev;
2264 bio->bi_end_io = raid1_end_read_request;
2265 bio->bi_rw =
READ | do_sync;
2266 bio->bi_private = r1_bio;
2267 if (max_sectors < r1_bio->sectors) {
2270 int sectors_handled = (r1_bio->
sector + max_sectors
2272 r1_bio->
sectors = max_sectors;
2274 if (mbio->bi_phys_segments == 0)
2275 mbio->bi_phys_segments = 2;
2277 mbio->bi_phys_segments++;
2285 r1_bio->
sectors = (mbio->bi_size >> 9)
2289 r1_bio->
mddev = mddev;
2290 r1_bio->
sector = mbio->bi_sector + sectors_handled;
2300 struct mddev *mddev = thread->
mddev;
2301 struct r1bio *r1_bio;
2302 unsigned long flags;
2312 flush_pending_writes(conf);
2315 if (list_empty(head)) {
2316 spin_unlock_irqrestore(&conf->
device_lock, flags);
2322 spin_unlock_irqrestore(&conf->
device_lock, flags);
2324 mddev = r1_bio->
mddev;
2329 handle_sync_write_finished(conf, r1_bio);
2331 sync_request_write(mddev, r1_bio);
2334 handle_write_finished(conf, r1_bio);
2336 handle_read_error(conf, r1_bio);
2351 static int init_resync(
struct r1conf *conf)
2375 static sector_t sync_request(
struct mddev *mddev,
sector_t sector_nr,
int *skipped,
int go_faster)
2378 struct r1bio *r1_bio;
2384 int write_targets = 0, read_targets = 0;
2386 int still_degraded = 0;
2391 if (init_resync(conf))
2395 if (sector_nr >= max_sector) {
2417 return max_sector - sector_nr;
2438 raise_barrier(conf);
2452 r1_bio->
mddev = mddev;
2453 r1_bio->
sector = sector_nr;
2459 bio = r1_bio->
bios[
i];
2462 bio->bi_next =
NULL;
2463 bio->bi_flags &= ~(BIO_POOL_MASK-1);
2464 bio->bi_flags |= 1 << BIO_UPTODATE;
2468 bio->bi_phys_segments = 0;
2470 bio->bi_end_io =
NULL;
2471 bio->bi_private =
NULL;
2476 if (i < conf->raid_disks)
2480 bio->bi_end_io = end_sync_write;
2487 if (is_badblock(rdev, sector_nr, good_sectors,
2488 &first_bad, &bad_sectors)) {
2489 if (first_bad > sector_nr)
2490 good_sectors = first_bad - sector_nr;
2492 bad_sectors -= (sector_nr - first_bad);
2494 min_bad > bad_sectors)
2495 min_bad = bad_sectors;
2498 if (sector_nr < first_bad) {
2507 bio->bi_end_io = end_sync_read;
2519 bio->bi_end_io = end_sync_write;
2523 if (bio->bi_end_io) {
2526 bio->bi_bdev = rdev->
bdev;
2527 bio->bi_private = r1_bio;
2535 if (read_targets == 0 && min_bad > 0) {
2541 if (r1_bio->
bios[i]->bi_end_io == end_sync_write) {
2564 if (min_bad > 0 && min_bad < good_sectors) {
2567 good_sectors = min_bad;
2572 write_targets += read_targets-1;
2574 if (write_targets == 0 || read_targets == 0) {
2580 max_sector = sector_nr + min_bad;
2581 rv = max_sector - sector_nr;
2589 if (max_sector > sector_nr + good_sectors)
2590 max_sector = sector_nr + good_sectors;
2596 if (sector_nr + (len>>9) > max_sector)
2597 len = (max_sector - sector_nr) << 9;
2600 if (sync_blocks == 0) {
2602 &sync_blocks, still_degraded) &&
2607 if ((len >> 9) > sync_blocks)
2608 len = sync_blocks<<9;
2611 for (i = 0 ; i < conf->
raid_disks * 2; i++) {
2612 bio = r1_bio->
bios[
i];
2613 if (bio->bi_end_io) {
2614 page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
2617 bio->bi_io_vec[bio->bi_vcnt].bv_page =
page;
2620 bio = r1_bio->
bios[
i];
2621 if (bio->bi_end_io==
NULL)
2625 bio->bi_size -= len;
2626 bio->bi_flags &= ~(1<< BIO_SEG_VALID);
2632 nr_sectors += len>>9;
2633 sector_nr += len>>9;
2634 sync_blocks -= (len>>9);
2635 }
while (r1_bio->
bios[disk]->bi_vcnt < RESYNC_PAGES);
2644 for (i = 0; i < conf->
raid_disks * 2 && read_targets; i++) {
2645 bio = r1_bio->
bios[
i];
2646 if (bio->bi_end_io == end_sync_read) {
2648 md_sync_acct(bio->bi_bdev, nr_sectors);
2655 md_sync_acct(bio->bi_bdev, nr_sectors);
2662 static sector_t raid1_size(
struct mddev *mddev,
sector_t sectors,
int raid_disks)
2670 static struct r1conf *setup_conf(
struct mddev *mddev)
2715 disk = conf->
mirrors + disk_idx;
2720 q = bdev_get_queue(rdev->
bdev);
2721 if (q->merge_bvec_fn)
2728 conf->
mddev = mddev;
2743 if (i < conf->raid_disks &&
2762 (disk->
rdev->saved_raid_disk < 0))
2771 "md/raid1:%s: couldn't allocate thread\n",
2787 return ERR_PTR(err);
2790 static int stop(
struct mddev *mddev);
2791 static int run(
struct mddev *mddev)
2797 bool discard_supported =
false;
2799 if (mddev->
level != 1) {
2800 printk(
KERN_ERR "md/raid1:%s: raid level not set to mirroring (%d)\n",
2801 mdname(mddev), mddev->
level);
2805 printk(
KERN_ERR "md/raid1:%s: reshape_position set but not supported\n",
2815 conf = setup_conf(mddev);
2820 return PTR_ERR(conf);
2827 if (blk_queue_discard(bdev_get_queue(rdev->
bdev)))
2828 discard_supported =
true;
2843 " -- starting background reconstruction\n",
2846 "md/raid1:%s: active with %d out of %d mirrors\n",
2860 mddev->
queue->backing_dev_info.congested_fn = raid1_congested;
2861 mddev->
queue->backing_dev_info.congested_data =
mddev;
2864 if (discard_supported)
2865 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
2868 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
2878 static int stop(
struct mddev *mddev)
2881 struct bitmap *bitmap = mddev->
bitmap;
2884 if (bitmap &&
atomic_read(&bitmap->behind_writes) > 0) {
2885 printk(
KERN_INFO "md/raid1:%s: behind writes in progress - waiting to stop.\n",
2892 raise_barrier(conf);
2893 lower_barrier(conf);
2905 static int raid1_resize(
struct mddev *mddev,
sector_t sectors)
2914 sector_t newsize = raid1_size(mddev, sectors, 0);
2936 static int raid1_reshape(
struct mddev *mddev)
2954 unsigned long flags;
2973 if (raid_disks < conf->raid_disks) {
2978 if (cnt > raid_disks)
2989 r1bio_pool_free, newpoolinfo);
2994 newmirrors = kzalloc(
sizeof(
struct raid1_info) * raid_disks * 2,
3002 raise_barrier(conf);
3008 for (d = d2 = 0; d < conf->
raid_disks; d++) {
3011 sysfs_unlink_rdev(mddev, rdev);
3013 sysfs_unlink_rdev(mddev, rdev);
3014 if (sysfs_link_rdev(mddev, rdev))
3016 "md/raid1:%s: cannot register rd%d\n",
3029 spin_unlock_irqrestore(&conf->
device_lock, flags);
3033 lower_barrier(conf);
3042 static void raid1_quiesce(
struct mddev *mddev,
int state)
3051 raise_barrier(conf);
3054 lower_barrier(conf);
3059 static void *raid1_takeover(
struct mddev *mddev)
3069 conf = setup_conf(mddev);
3086 .error_handler =
error,
3087 .hot_add_disk = raid1_add_disk,
3088 .hot_remove_disk= raid1_remove_disk,
3089 .spare_active = raid1_spare_active,
3091 .resize = raid1_resize,
3093 .check_reshape = raid1_reshape,
3094 .quiesce = raid1_quiesce,
3095 .takeover = raid1_takeover,
3098 static int __init raid_init(
void)
3103 static void raid_exit(
void)