12 #include <linux/module.h>
14 #include <linux/slab.h>
22 #define DM_MSG_PREFIX "raid1"
24 #define MAX_RECOVERY 1
26 #define DM_RAID1_HANDLE_ERRORS 0x01
27 #define errors_handled(p) ((p)->features & DM_RAID1_HANDLE_ERRORS)
86 static void wakeup_mirrord(
void *
context)
93 static void delayed_wake_fn(
unsigned long data)
108 ms->
timer.function = delayed_wake_fn;
112 static void wakeup_all_recovery_waiters(
void *
context)
117 static void queue_bio(
struct mirror_set *ms,
struct bio *bio,
int rw)
125 should_wake = !(bl->head);
126 bio_list_add(bl, bio);
127 spin_unlock_irqrestore(&ms->
lock, flags);
138 while ((bio = bio_list_pop(bio_list)))
139 queue_bio(ms, bio,
WRITE);
142 #define MIN_READ_RECORDS 20
148 static struct kmem_cache *_dm_raid1_read_record_cache;
153 #define DEFAULT_MIRROR 0
160 static struct mirror *bio_get_m(
struct bio *bio)
162 return (
struct mirror *) bio->bi_next;
165 static void bio_set_m(
struct bio *bio,
struct mirror *
m)
167 bio->bi_next = (
struct bio *) m;
175 static void set_default_mirror(
struct mirror *
m)
228 if (m != get_default_mirror(ms))
236 DMERR(
"Primary mirror (%s) failed while out-of-sync: "
237 "Reads may fail.", m->
dev->name);
241 new = get_valid_mirror(ms);
243 set_default_mirror(
new);
245 DMWARN(
"All sides of mirror have failed.");
251 static int mirror_flush(
struct dm_target *ti)
257 struct dm_io_region
io[ms->nr_mirrors];
259 struct dm_io_request
io_req = {
261 .mem.type = DM_IO_KMEM,
262 .mem.ptr.addr =
NULL,
266 for (i = 0, m = ms->
mirror; i < ms->nr_mirrors; i++, m++) {
267 io[
i].bdev = m->
dev->bdev;
277 fail_mirror(ms->
mirror + i,
292 static void recovery_complete(
int read_err,
unsigned long write_err,
301 DMERR_LIMIT(
"Unable to read primary mirror during recovery");
306 DMERR_LIMIT(
"Write error during recovery (error = 0x%lx)",
313 if (&ms->
mirror[m] == get_default_mirror(ms))
316 fail_mirror(ms->
mirror + m,
329 struct dm_io_region
from, to[DM_KCOPYD_MAX_REGIONS], *
dest;
331 unsigned long flags = 0;
336 m = get_default_mirror(ms);
344 from.count = ms->
ti->len & (region_size - 1);
346 from.count = region_size;
348 from.count = region_size;
351 for (i = 0, dest = to; i < ms->
nr_mirrors; i++) {
352 if (&ms->
mirror[i] == get_default_mirror(ms))
356 dest->bdev = m->
dev->bdev;
358 dest->count =
from.count;
364 set_bit(DM_KCOPYD_IGNORE_ERROR, &flags);
367 flags, recovery_complete, reg);
372 static void do_recovery(
struct mirror_set *ms)
387 r = recover(ms, reg);
396 (log->type->get_sync_count(log) == ms->
nr_regions)) {
408 struct mirror *m = get_default_mirror(ms);
416 }
while (m != get_default_mirror(ms));
421 static int default_ok(
struct mirror *m)
423 struct mirror *default_mirror = get_default_mirror(m->
ms);
428 static int mirror_available(
struct mirror_set *ms,
struct bio *bio)
433 if (log->type->in_sync(log, region, 0))
434 return choose_mirror(ms, bio->bi_sector) ? 1 : 0;
449 static void map_bio(
struct mirror *m,
struct bio *bio)
451 bio->bi_bdev = m->
dev->bdev;
452 bio->bi_sector = map_sector(m, bio);
458 io->bdev = m->
dev->bdev;
459 io->sector = map_sector(m, bio);
460 io->count = bio->bi_size >> 9;
463 static void hold_bio(
struct mirror_set *ms,
struct bio *bio)
469 spin_lock_irq(&ms->
lock);
472 spin_unlock_irq(&ms->
lock);
487 bio_list_add(&ms->
holds, bio);
488 spin_unlock_irq(&ms->
lock);
494 static void read_callback(
unsigned long error,
void *context)
500 bio_set_m(bio,
NULL);
509 if (
likely(default_ok(m)) || mirror_available(m->
ms, bio)) {
511 "Trying alternative device.",
513 queue_bio(m->
ms, bio, bio_rw(bio));
517 DMERR_LIMIT(
"Read failure on mirror device %s. Failing I/O.",
523 static void read_async_bio(
struct mirror *m,
struct bio *bio)
525 struct dm_io_region
io;
526 struct dm_io_request io_req = {
528 .mem.type = DM_IO_BVEC,
529 .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
530 .notify.fn = read_callback,
531 .notify.context = bio,
532 .client = m->
ms->io_client,
547 static void do_reads(
struct mirror_set *ms,
struct bio_list *reads)
553 while ((bio = bio_list_pop(reads))) {
555 m = get_default_mirror(ms);
560 if (
likely(region_in_sync(ms, region, 1)))
561 m = choose_mirror(ms, bio->bi_sector);
566 read_async_bio(m, bio);
584 static void write_callback(
unsigned long error,
void *context)
587 struct bio *bio = (
struct bio *) context;
592 ms = bio_get_m(bio)->ms;
593 bio_set_m(bio,
NULL);
619 spin_unlock_irqrestore(&ms->
lock, flags);
627 struct dm_io_region
io[ms->nr_mirrors], *dest =
io;
629 struct dm_io_request io_req = {
631 .
mem.type = DM_IO_BVEC,
632 .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
633 .notify.fn = write_callback,
634 .notify.context = bio,
640 io_req.mem.type = DM_IO_KMEM;
641 io_req.mem.ptr.addr =
NULL;
644 for (i = 0, m = ms->
mirror; i < ms->nr_mirrors; i++, m++)
651 bio_set_m(bio, get_default_mirror(ms));
656 static void do_writes(
struct mirror_set *ms,
struct bio_list *writes)
660 struct bio_list
sync, nosync, recover, *this_list =
NULL;
661 struct bio_list requeue;
671 bio_list_init(&
sync);
672 bio_list_init(&nosync);
673 bio_list_init(&recover);
674 bio_list_init(&requeue);
676 while ((bio = bio_list_pop(writes))) {
679 bio_list_add(&
sync, bio);
685 if (log->type->is_remote_recovering &&
686 log->type->is_remote_recovering(log, region)) {
687 bio_list_add(&requeue, bio);
703 this_list = &recover;
707 bio_list_add(this_list, bio);
715 spin_lock_irq(&ms->
lock);
716 bio_list_merge(&ms->
writes, &requeue);
717 spin_unlock_irq(&ms->
lock);
740 spin_lock_irq(&ms->
lock);
742 spin_unlock_irq(&ms->
lock);
745 while ((bio = bio_list_pop(&
sync)))
748 while ((bio = bio_list_pop(&recover)))
751 while ((bio = bio_list_pop(&nosync))) {
753 spin_lock_irq(&ms->
lock);
755 spin_unlock_irq(&ms->
lock);
758 map_bio(get_default_mirror(ms), bio);
764 static void do_failures(
struct mirror_set *ms,
struct bio_list *failures)
768 if (
likely(!failures->head))
788 while ((bio = bio_list_pop(failures))) {
802 if (!get_valid_mirror(ms))
826 struct bio_list reads, writes, failures;
833 bio_list_init(&ms->
reads);
834 bio_list_init(&ms->
writes);
836 spin_unlock_irqrestore(&ms->
lock, flags);
840 do_reads(ms, &reads);
841 do_writes(ms, &writes);
842 do_failures(ms, &failures);
851 struct dm_dirty_log *
dl)
856 len =
sizeof(*ms) + (
sizeof(ms->
mirror[0]) * nr_mirrors);
860 ti->
error =
"Cannot allocate mirror context";
865 bio_list_init(&ms->
reads);
866 bio_list_init(&ms->
writes);
868 bio_list_init(&ms->
holds);
880 _dm_raid1_read_record_cache);
883 ti->
error =
"Error creating mirror read_record_pool";
890 ti->
error =
"Error creating dm_io client";
897 wakeup_all_recovery_waiters,
900 if (IS_ERR(ms->
rh)) {
901 ti->
error =
"Error creating dirty region hash";
924 unsigned int mirror,
char **argv)
926 unsigned long long offset;
929 if (
sscanf(argv[1],
"%llu%c", &offset, &dummy) != 1) {
930 ti->
error =
"Invalid offset";
935 &ms->
mirror[mirror].dev)) {
936 ti->
error =
"Device lookup failure";
951 static struct dm_dirty_log *create_dirty_log(
struct dm_target *ti,
952 unsigned argc,
char **argv,
956 struct dm_dirty_log *
dl;
960 ti->
error =
"Insufficient mirror log arguments";
964 if (
sscanf(argv[1],
"%u%c", ¶m_count, &dummy) != 1) {
965 ti->
error =
"Invalid mirror log argument count";
971 if (argc < *args_used) {
972 ti->
error =
"Insufficient mirror log arguments";
979 ti->
error =
"Error creating mirror dirty log";
986 static int parse_features(
struct mirror_set *ms,
unsigned argc,
char **argv,
989 unsigned num_features;
998 if (
sscanf(argv[0],
"%u%c", &num_features, &dummy) != 1) {
999 ti->
error =
"Invalid number of features";
1007 if (num_features > argc) {
1008 ti->
error =
"Not enough arguments to support feature count";
1012 if (!
strcmp(
"handle_errors", argv[0]))
1015 ti->
error =
"Unrecognised feature requested";
1036 static int mirror_ctr(
struct dm_target *ti,
unsigned int argc,
char **argv)
1039 unsigned int nr_mirrors,
m, args_used;
1041 struct dm_dirty_log *
dl;
1044 dl = create_dirty_log(ti, argc, argv, &args_used);
1051 if (!argc ||
sscanf(argv[0],
"%u%c", &nr_mirrors, &dummy) != 1 ||
1052 nr_mirrors < 2 || nr_mirrors > DM_KCOPYD_MAX_REGIONS + 1) {
1053 ti->
error =
"Invalid number of mirrors";
1060 if (argc < nr_mirrors * 2) {
1061 ti->
error =
"Too few mirror arguments";
1066 ms = alloc_context(nr_mirrors, dl->type->get_region_size(dl), ti,
dl);
1073 for (m = 0; m < nr_mirrors; m++) {
1074 r = get_mirror(ms, ti, m, argv);
1076 free_context(ms, ti, m);
1087 goto err_free_context;
1096 DMERR(
"couldn't start kmirrord");
1098 goto err_free_context;
1105 r = parse_features(ms, argc, argv, &args_used);
1107 goto err_destroy_wq;
1122 ti->
error =
"Too many mirror arguments";
1124 goto err_destroy_wq;
1130 goto err_destroy_wq;
1143 static void mirror_dtr(
struct dm_target *ti)
1158 static int mirror_map(
struct dm_target *ti,
struct bio *bio,
1161 int r, rw = bio_rw(bio);
1170 queue_bio(ms, bio, rw);
1185 queue_bio(ms, bio, rw);
1193 m = choose_mirror(ms, bio->bi_sector);
1198 if (
likely(read_record)) {
1199 dm_bio_record(&read_record->
details, bio);
1200 map_context->
ptr = read_record;
1209 static int mirror_end_io(
struct dm_target *ti,
struct bio *bio,
1210 int error,
union map_info *map_context)
1212 int rw = bio_rw(bio);
1214 struct mirror *m =
NULL;
1246 DMERR(
"Mirror read failed from %s. Trying alternative device.",
1255 if (default_ok(m) || mirror_available(ms, bio)) {
1258 dm_bio_restore(bd, bio);
1261 queue_bio(ms, bio, rw);
1264 DMERR(
"All replicated volumes dead, failing I/O");
1276 static void mirror_presuspend(
struct dm_target *ti)
1281 struct bio_list holds;
1292 spin_lock_irq(&ms->
lock);
1294 bio_list_init(&ms->
holds);
1295 spin_unlock_irq(&ms->
lock);
1297 while ((bio = bio_list_pop(&holds)))
1309 if (log->type->presuspend && log->type->presuspend(log))
1311 DMWARN(
"log presuspend failed");
1322 static void mirror_postsuspend(
struct dm_target *ti)
1327 if (log->type->postsuspend && log->type->postsuspend(log))
1329 DMWARN(
"log postsuspend failed");
1332 static void mirror_resume(
struct dm_target *ti)
1338 if (log->type->resume && log->type->resume(log))
1340 DMWARN(
"log resume failed");
1357 static char device_status_char(
struct mirror *m)
1370 unsigned status_flags,
char *
result,
unsigned maxlen)
1372 unsigned int m, sz = 0;
1382 buffer[
m] = device_status_char(&(ms->
mirror[m]));
1386 DMEMIT(
"%llu/%llu 1 %s ",
1387 (
unsigned long long)log->type->get_sync_count(log),
1390 sz += log->type->status(log, type, result+sz, maxlen-sz);
1395 sz = log->type->status(log, type, result, maxlen);
1400 (
unsigned long long)ms->
mirror[m].offset);
1403 DMEMIT(
" 1 handle_errors");
1409 static int mirror_iterate_devices(
struct dm_target *ti,
1425 .version = {1, 12, 1},
1430 .end_io = mirror_end_io,
1431 .presuspend = mirror_presuspend,
1432 .postsuspend = mirror_postsuspend,
1433 .resume = mirror_resume,
1434 .status = mirror_status,
1435 .iterate_devices = mirror_iterate_devices,
1438 static int __init dm_mirror_init(
void)
1443 if (!_dm_raid1_read_record_cache) {
1444 DMERR(
"Can't allocate dm_raid1_read_record cache");
1451 DMERR(
"Failed to register mirror target");
1463 static void __exit dm_mirror_exit(
void)