37 #include <linux/sysctl.h>
40 #include <linux/poll.h>
41 #include <linux/ctype.h>
42 #include <linux/string.h>
45 #include <linux/random.h>
46 #include <linux/module.h>
47 #include <linux/reboot.h>
52 #include <linux/raid/md_u.h>
53 #include <linux/slab.h>
58 static void autostart_arrays(
int part);
69 static void md_print_devices(
void);
75 #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
82 #define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
96 static int sysctl_speed_limit_min = 1000;
97 static int sysctl_speed_limit_max = 200000;
98 static inline int speed_min(
struct mddev *
mddev)
104 static inline int speed_max(
struct mddev *
mddev)
115 .data = &sysctl_speed_limit_min,
116 .maxlen =
sizeof(
int),
121 .procname =
"speed_limit_max",
122 .data = &sysctl_speed_limit_max,
123 .maxlen =
sizeof(
int),
145 .child = raid_dir_table,
150 static const struct block_device_operations md_fops;
152 static int start_readonly;
164 return bio_alloc(gfp_mask, nr_iovecs);
177 return bio_clone(bio, gfp_mask);
190 struct bio_vec *bvec;
194 if (offset == 0 && size == bio->bi_size)
200 clear_bit(BIO_SEG_VALID, &bio->bi_flags);
202 while (bio->bi_idx < bio->bi_vcnt &&
203 bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
205 offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
208 if (bio->bi_idx < bio->bi_vcnt) {
209 bio->bi_io_vec[bio->bi_idx].bv_offset +=
offset;
210 bio->bi_io_vec[bio->bi_idx].bv_len -=
offset;
214 memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
215 (bio->bi_vcnt - bio->bi_idx) *
sizeof(
struct bio_vec));
216 bio->bi_vcnt -= bio->bi_idx;
220 bio_for_each_segment(bvec, bio, i) {
221 if (sofar + bvec->bv_len > size)
222 bvec->bv_len = size - sofar;
223 if (bvec->bv_len == 0) {
227 sofar += bvec->bv_len;
254 static void md_new_event_inintr(
struct mddev *
mddev)
275 #define for_each_mddev(_mddev,_tmp) \
277 for (({ spin_lock(&all_mddevs_lock); \
278 _tmp = all_mddevs.next; \
280 ({ if (_tmp != &all_mddevs) \
281 mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
282 spin_unlock(&all_mddevs_lock); \
283 if (_mddev) mddev_put(_mddev); \
284 _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
285 _tmp != &all_mddevs;}); \
286 ({ spin_lock(&all_mddevs_lock); \
287 _tmp = _tmp->next;}) \
298 static void md_make_request(
struct request_queue *
q,
struct bio *bio)
300 const int rw = bio_data_dir(bio);
332 sectors = bio_sectors(bio);
333 mddev->
pers->make_request(mddev, bio);
335 cpu = part_stat_lock();
336 part_stat_inc(cpu, &mddev->
gendisk->part0, ios[rw]);
337 part_stat_add(cpu, &mddev->
gendisk->part0, sectors[rw], sectors);
356 mddev->
pers->quiesce(mddev, 1);
366 mddev->
pers->quiesce(mddev, 0);
384 static void md_end_flush(
struct bio *bio,
int err)
389 rdev_dec_pending(rdev, mddev);
420 bi->bi_end_io = md_end_flush;
421 bi->bi_private =
rdev;
422 bi->bi_bdev = rdev->
bdev;
426 rdev_dec_pending(rdev, mddev);
433 static void md_submit_flush_data(
struct work_struct *ws)
438 if (bio->bi_size == 0)
443 mddev->
pers->make_request(mddev, bio);
466 struct mddev *mddev = cb->data;
472 static inline struct mddev *mddev_get(
struct mddev *mddev)
478 static void mddev_delayed_delete(
struct work_struct *ws);
480 static void mddev_put(
struct mddev *mddev)
482 struct bio_set *bs =
NULL;
504 spin_unlock(&all_mddevs_lock);
514 INIT_LIST_HEAD(&mddev->
disks);
532 static struct mddev * mddev_find(
dev_t unit)
534 struct mddev *mddev, *
new =
NULL;
540 spin_lock(&all_mddevs_lock);
544 if (mddev->unit == unit) {
546 spin_unlock(&all_mddevs_lock);
553 spin_unlock(&all_mddevs_lock);
559 static int next_minor = 512;
560 int start = next_minor;
568 if (next_minor == start) {
570 spin_unlock(&all_mddevs_lock);
577 if (mddev->unit == dev) {
583 new->md_minor =
MINOR(dev);
586 spin_unlock(&all_mddevs_lock);
589 spin_unlock(&all_mddevs_lock);
597 new->md_minor =
MINOR(unit);
606 static inline int mddev_lock(
struct mddev * mddev)
611 static inline int mddev_is_locked(
struct mddev *mddev)
616 static inline int mddev_trylock(
struct mddev * mddev)
623 static void mddev_unlock(
struct mddev * mddev)
643 if (mddev->
kobj.sd) {
644 if (to_remove != &md_redundancy_group)
647 mddev->
pers->sync_request ==
NULL) {
661 spin_lock(&pers_lock);
663 spin_unlock(&pers_lock);
666 static struct md_rdev * find_rdev_nr(
struct mddev *mddev,
int nr)
693 if (rdev->
bdev->bd_dev == dev)
704 if (rdev->
bdev->bd_dev == dev)
725 sector_t num_sectors = i_size_read(rdev->
bdev->bd_inode) / 512;
729 static int alloc_disk_sb(
struct md_rdev * rdev)
761 static void super_written(
struct bio *bio,
int error)
763 struct md_rdev *rdev = bio->bi_private;
764 struct mddev *mddev = rdev->
mddev;
766 if (error || !
test_bit(BIO_UPTODATE, &bio->bi_flags)) {
767 printk(
"md: super_written gets error=%d, uptodate=%d\n",
768 error,
test_bit(BIO_UPTODATE, &bio->bi_flags));
792 bio->bi_private =
rdev;
793 bio->bi_end_io = super_written;
812 static void bi_complete(
struct bio *bio,
int error)
826 bio->bi_bdev = (metadata_op && rdev->
meta_bdev) ?
829 bio->bi_sector = sector + rdev->
sb_start;
831 (rdev->
mddev->reshape_backwards ==
832 (sector >= rdev->
mddev->reshape_position)))
837 init_completion(&event);
838 bio->bi_private = &
event;
839 bio->bi_end_io = bi_complete;
843 ret =
test_bit(BIO_UPTODATE, &bio->bi_flags);
849 static int read_disk_sb(
struct md_rdev * rdev,
int size)
887 if (!tmp1 || !tmp2) {
912 csum = (csum & 0xffff) + (csum >> 16);
913 return (csum & 0xffff) + (csum >> 16);
921 unsigned int disk_csum,
csum;
928 csum = (newcsum & 0xffffffff) + (newcsum>>32);
940 sb->
sb_csum = md_csum_fold(disk_csum);
991 unsigned long long new_offset);
1007 mdname(mddev), mddev->
pers->name);
1027 rdev->
sb_start = calc_dev_sboffset(rdev);
1030 if (ret)
return ret;
1055 if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->
sb_csum)) {
1077 if (!uuid_equal(refsb, sb)) {
1082 if (!sb_equal(refsb, sb)) {
1084 " but different superblock to %s\n",
1089 ev2 = md_event(refsb);
1101 rdev->
sectors = (2ULL << 32) - 2;
1114 static int super_90_validate(
struct mddev *mddev,
struct md_rdev *rdev)
1118 __u64 ev1 = md_event(sb);
1187 }
else if (mddev->
pers == NULL) {
1195 }
else if (mddev->
bitmap) {
1199 if (ev1 < mddev->
bitmap->events_cleared)
1235 static void super_90_sync(
struct mddev *mddev,
struct md_rdev *rdev)
1253 int active=0, working=0,failed=0,
spare=0,nr_disks=0;
1259 memset(sb, 0,
sizeof(*sb));
1327 desc_nr = next_spare++;
1340 else if (is_active) {
1372 sb->
sb_csum = calc_sb_csum(sb);
1378 static unsigned long long
1381 if (num_sectors && num_sectors < rdev->mddev->
dev_sectors)
1383 if (rdev->
mddev->bitmap_info.offset)
1385 rdev->
sb_start = calc_dev_sboffset(rdev);
1386 if (!num_sectors || num_sectors > rdev->
sb_start)
1391 if (num_sectors >= (2ULL << 32) && rdev->
mddev->level >= 1)
1392 num_sectors = (2ULL << 32) - 2;
1400 super_90_allow_new_offset(
struct md_rdev *rdev,
unsigned long long new_offset)
1403 return new_offset == 0;
1414 unsigned long long newcsum;
1422 for (i=0; size>=4; size -= 4 )
1428 csum = (newcsum & 0xffffffff) + (newcsum >> 32);
1435 static int super_1_load(
struct md_rdev *rdev,
struct md_rdev *refdev,
int minor_version)
1452 switch(minor_version) {
1454 sb_start = i_size_read(rdev->
bdev->bd_inode) >> 9;
1472 ret = read_disk_sb(rdev, 4096);
1473 if (ret)
return ret;
1485 if (calc_sb_1_csum(sb) != sb->
sb_csum) {
1486 printk(
"md: invalid superblock checksum on %s\n",
1491 printk(
"md: data_size too small on %s\n",
1510 bmask = queue_logical_block_size(rdev->
bdev->bd_disk->queue)-1;
1546 bb_sector = (
long long)offset;
1552 for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
1554 int count = bb & (0x3ff);
1561 sector, count, 1) == 0)
1578 " superblock to %s\n",
1591 if (minor_version) {
1592 sectors = (i_size_read(rdev->
bdev->bd_inode) >> 9);
1602 static int super_1_validate(
struct mddev *mddev,
struct md_rdev *rdev)
1631 mddev->
bitmap_info.default_space = (4096-1024) >> 9;
1677 }
else if (mddev->
pers == NULL) {
1686 }
else if (mddev->
bitmap) {
1690 if (ev1 < mddev->
bitmap->events_cleared)
1730 static void super_1_sync(
struct mddev *mddev,
struct md_rdev *rdev)
1807 struct badblocks *bb = &rdev->
badblocks;
1815 seq = read_seqbegin(&bb->lock);
1819 for (i = 0 ; i < bb->count ; i++) {
1820 u64 internal_bb = p[
i];
1826 if (read_seqretry(&bb->lock, seq))
1837 if (rdev2->desc_nr+1 > max_dev)
1838 max_dev = rdev2->desc_nr+1;
1843 rdev->
sb_size = max_dev * 2 + 256;
1844 bmask = queue_logical_block_size(rdev->
bdev->bd_disk->queue)-1;
1850 for (i=0; i<max_dev;i++)
1865 sb->
sb_csum = calc_sb_1_csum(sb);
1868 static unsigned long long
1873 if (num_sectors && num_sectors < rdev->mddev->
dev_sectors)
1879 max_sectors = i_size_read(rdev->
bdev->bd_inode) >> 9;
1881 if (!num_sectors || num_sectors > max_sectors)
1882 num_sectors = max_sectors;
1883 }
else if (rdev->
mddev->bitmap_info.offset) {
1889 sb_start = (i_size_read(rdev->
bdev->bd_inode) >> 9) - 8*2;
1892 if (!num_sectors || num_sectors > max_sectors)
1893 num_sectors = max_sectors;
1899 sb->
sb_csum = calc_sb_1_csum(sb);
1908 super_1_allow_new_offset(
struct md_rdev *rdev,
1918 if (rdev->
mddev->minor_version == 0)
1927 if (rdev->
sb_start + (32+4)*2 > new_offset)
1929 bitmap = rdev->
mddev->bitmap;
1930 if (bitmap && !rdev->
mddev->bitmap_info.file &&
1932 bitmap->storage.file_pages * (
PAGE_SIZE>>9) > new_offset)
1944 .load_super = super_90_load,
1945 .validate_super = super_90_validate,
1946 .sync_super = super_90_sync,
1947 .rdev_size_change = super_90_rdev_size_change,
1948 .allow_new_offset = super_90_allow_new_offset,
1953 .load_super = super_1_load,
1954 .validate_super = super_1_validate,
1955 .sync_super = super_1_sync,
1956 .rdev_size_change = super_1_rdev_size_change,
1957 .allow_new_offset = super_1_allow_new_offset,
1973 static int match_mddev_units(
struct mddev *mddev1,
struct mddev *mddev2)
1980 if (rdev->
bdev->bd_contains ==
1981 rdev2->
bdev->bd_contains) {
2002 if (list_empty(&mddev->
disks))
2019 rdev->
bdev->bd_disk) < 0)
2022 if (!reference || !bdev_get_integrity(reference->
bdev))
2029 bdev_get_integrity(reference->
bdev)) != 0) {
2047 struct blk_integrity *bi_rdev;
2048 struct blk_integrity *bi_mddev;
2053 bi_rdev = bdev_get_integrity(rdev->
bdev);
2054 bi_mddev = blk_get_integrity(mddev->
gendisk);
2061 rdev->
bdev->bd_disk) >= 0)
2068 static int bind_rdev_to_array(
struct md_rdev * rdev,
struct mddev * mddev)
2081 if (find_rdev(mddev, rdev->
bdev->bd_dev))
2092 if (mddev->
level > 0)
2105 while (find_rdev_nr(mddev, choice))
2109 if (find_rdev_nr(mddev, rdev->
desc_nr))
2118 while ( (s=
strchr(b,
'/')) != NULL)
2121 rdev->
mddev = mddev;
2127 ko = &part_to_dev(rdev->
bdev->bd_part)->kobj;
2146 static void md_delayed_delete(
struct work_struct *ws)
2153 static void unbind_rdev_from_array(
struct md_rdev * rdev)
2160 bd_unlink_disk_holder(rdev->
bdev, rdev->
mddev->gendisk);
2190 shared ? (
struct md_rdev *)lock_rdev : rdev);
2194 return PTR_ERR(bdev);
2200 static void unlock_rdev(
struct md_rdev *rdev)
2211 static void export_rdev(
struct md_rdev * rdev)
2227 static void kick_rdev_from_array(
struct md_rdev * rdev)
2229 unbind_rdev_from_array(rdev);
2233 static void export_array(
struct mddev *mddev)
2242 kick_rdev_from_array(rdev);
2244 if (!list_empty(&mddev->
disks))
2252 printk(
" DISK<N:%d,(%d,%d),R:%d,S:%d>\n", desc->
number,
2261 "md: SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n",
2269 " FD:%d SD:%d CSUM:%08x E:%08lx\n",
2295 "md: SB: (V:%u) (F:0x%08x) Array-ID:<%pU>\n"
2296 "md: Name: \"%s\" CT:%llu\n",
2306 "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
2308 "md: Dev:%08x UUID: %pU\n"
2309 "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
2310 "md: (MaxDev:%u) \n",
2340 switch (major_version) {
2352 static void md_print_devices(
void)
2356 struct mddev *mddev;
2360 printk(
"md: **********************************\n");
2361 printk(
"md: * <COMPLETE RAID STATE PRINTOUT> *\n");
2362 printk(
"md: **********************************\n");
2368 printk(
"%s: ", mdname(mddev));
2374 print_rdev(rdev, mddev->major_version);
2376 printk("
md: **********************************\n");
2381 static
void sync_sbs(
struct mddev * mddev,
int nospares)
2398 sync_super(mddev, rdev);
2404 static void md_update_sb(
struct mddev * mddev,
int force_change)
2409 int any_badblocks_changed = 0;
2495 any_badblocks_changed++;
2500 sync_sbs(mddev, nospares);
2503 pr_debug(
"md: updating %s RAID superblock on device (in sync %d)\n",
2504 mdname(mddev), mddev->
in_sync);
2518 pr_debug(
"md: (write) %s's sb offset: %llu\n",
2520 (
unsigned long long)rdev->
sb_start);
2531 pr_debug(
"md: %s (skipping faulty)\n",
2534 pr_debug(
"(skipping incremental s/r ");
2544 if (mddev->
in_sync != sync_req ||
2560 if (any_badblocks_changed)
2570 static int cmd_match(
const char *
cmd,
const char *
str)
2576 while (*cmd && *str && *cmd == *str) {
2601 len+=
sprintf(page+len,
"%sfaulty",sep);
2605 len +=
sprintf(page+len,
"%sin_sync",sep);
2609 len +=
sprintf(page+len,
"%swrite_mostly",sep);
2615 len +=
sprintf(page+len,
"%sblocked", sep);
2620 len +=
sprintf(page+len,
"%sspare", sep);
2624 len +=
sprintf(page+len,
"%swrite_error", sep);
2628 len +=
sprintf(page+len,
"%swant_replacement", sep);
2632 len +=
sprintf(page+len,
"%sreplacement", sep);
2636 return len+
sprintf(page+len,
"\n");
2640 state_store(
struct md_rdev *rdev,
const char *
buf,
size_t len)
2654 if (cmd_match(buf,
"faulty") && rdev->
mddev->pers) {
2660 }
else if (cmd_match(buf,
"remove")) {
2664 struct mddev *mddev = rdev->
mddev;
2665 kick_rdev_from_array(rdev);
2667 md_update_sb(mddev, 1);
2671 }
else if (cmd_match(buf,
"writemostly")) {
2674 }
else if (cmd_match(buf,
"-writemostly")) {
2677 }
else if (cmd_match(buf,
"blocked")) {
2680 }
else if (cmd_match(buf,
"-blocked")) {
2695 }
else if (cmd_match(buf,
"insync") && rdev->
raid_disk == -1) {
2698 }
else if (cmd_match(buf,
"write_error")) {
2701 }
else if (cmd_match(buf,
"-write_error")) {
2704 }
else if (cmd_match(buf,
"want_replacement")) {
2715 }
else if (cmd_match(buf,
"-want_replacement")) {
2721 }
else if (cmd_match(buf,
"replacement")) {
2726 if (rdev->
mddev->pers)
2732 }
else if (cmd_match(buf,
"-replacement")) {
2734 if (rdev->
mddev->pers)
2743 return err ? err : len;
2749 errors_show(
struct md_rdev *rdev,
char *page)
2755 errors_store(
struct md_rdev *rdev,
const char *buf,
size_t len)
2759 if (*buf && (*e == 0 || *e ==
'\n')) {
2769 slot_show(
struct md_rdev *rdev,
char *page)
2772 return sprintf(page,
"none\n");
2778 slot_store(
struct md_rdev *rdev,
const char *buf,
size_t len)
2783 if (
strncmp(buf,
"none", 4)==0)
2785 else if (e==buf || (*e && *e!=
'\n'))
2787 if (rdev->
mddev->pers && slot == -1) {
2798 if (rdev->
mddev->pers->hot_remove_disk == NULL)
2800 err = rdev->
mddev->pers->
2801 hot_remove_disk(rdev->
mddev, rdev);
2804 sysfs_unlink_rdev(rdev->
mddev, rdev);
2808 }
else if (rdev->
mddev->pers) {
2819 if (rdev->
mddev->pers->hot_add_disk == NULL)
2822 if (slot >= rdev->
mddev->raid_disks &&
2823 slot >= rdev->
mddev->raid_disks + rdev->
mddev->delta_disks)
2832 err = rdev->
mddev->pers->
2833 hot_add_disk(rdev->
mddev, rdev);
2839 if (sysfs_link_rdev(rdev->
mddev, rdev))
2843 if (slot >= rdev->
mddev->raid_disks &&
2844 slot >= rdev->
mddev->raid_disks + rdev->
mddev->delta_disks)
2861 offset_show(
struct md_rdev *rdev,
char *page)
2867 offset_store(
struct md_rdev *rdev,
const char *buf,
size_t len)
2869 unsigned long long offset;
2888 return sprintf(page,
"%llu\n",
2893 const char *buf,
size_t len)
2895 unsigned long long new_offset;
2896 struct mddev *mddev = rdev->
mddev;
2944 rdev_size_show(
struct md_rdev *rdev,
char *page)
2946 return sprintf(page,
"%llu\n", (
unsigned long long)rdev->
sectors / 2);
2959 static int strict_blocks_to_sectors(
const char *buf,
sector_t *sectors)
2961 unsigned long long blocks;
2967 if (blocks & 1ULL << (8 *
sizeof(blocks) - 1))
2971 if (
new != blocks * 2)
2979 rdev_size_store(
struct md_rdev *rdev,
const char *buf,
size_t len)
2981 struct mddev *my_mddev = rdev->
mddev;
2985 if (strict_blocks_to_sectors(buf, §ors) < 0)
2992 rdev_size_change(rdev, sectors);
2995 }
else if (!sectors)
2996 sectors = (i_size_read(rdev->
bdev->bd_inode) >> 9) -
3003 if (sectors > oldsectors && my_mddev->
external) {
3009 struct mddev *mddev;
3013 mddev_unlock(my_mddev);
3019 if (rdev->bdev == rdev2->bdev &&
3027 mddev_unlock(mddev);
3033 mddev_lock(my_mddev);
3052 static ssize_t recovery_start_show(
struct md_rdev *rdev,
char *page)
3058 return sprintf(page,
"none\n");
3060 return sprintf(page,
"%llu\n", recovery_start);
3063 static ssize_t recovery_start_store(
struct md_rdev *rdev,
const char *buf,
size_t len)
3065 unsigned long long recovery_start;
3067 if (cmd_match(buf,
"none"))
3072 if (rdev->
mddev->pers &&
3089 badblocks_show(
struct badblocks *bb,
char *page,
int unack);
3091 badblocks_store(
struct badblocks *bb,
const char *page,
size_t len,
int unack);
3095 return badblocks_show(&rdev->
badblocks, page, 0);
3097 static ssize_t bb_store(
struct md_rdev *rdev,
const char *page,
size_t len)
3099 int rv = badblocks_store(&rdev->
badblocks, page, len, 0);
3111 return badblocks_show(&rdev->
badblocks, page, 1);
3113 static ssize_t ubb_store(
struct md_rdev *rdev,
const char *page,
size_t len)
3115 return badblocks_store(&rdev->
badblocks, page, len, 1);
3120 static struct attribute *rdev_default_attrs[] = {
3125 &rdev_new_offset.
attr,
3127 &rdev_recovery_start.
attr,
3128 &rdev_bad_blocks.
attr,
3129 &rdev_unack_bad_blocks.
attr,
3137 struct mddev *mddev = rdev->
mddev;
3143 rv = mddev ? mddev_lock(mddev) : -
EBUSY;
3145 if (rdev->
mddev == NULL)
3148 rv = entry->
show(rdev, page);
3149 mddev_unlock(mddev);
3156 const char *page,
size_t length)
3161 struct mddev *mddev = rdev->
mddev;
3167 rv = mddev ? mddev_lock(mddev): -
EBUSY;
3169 if (rdev->
mddev == NULL)
3172 rv = entry->
store(rdev, page, length);
3173 mddev_unlock(mddev);
3178 static void rdev_free(
struct kobject *ko)
3183 static const struct sysfs_ops rdev_sysfs_ops = {
3184 .show = rdev_attr_show,
3185 .store = rdev_attr_store,
3188 .release = rdev_free,
3189 .sysfs_ops = &rdev_sysfs_ops,
3190 .default_attrs = rdev_default_attrs,
3237 static struct md_rdev *md_import_device(
dev_t newdev,
int super_format,
int super_minor)
3253 err = alloc_disk_sb(rdev);
3257 err = lock_rdev(rdev, newdev, super_format == -2);
3266 "md: %s has zero or unknown size, marking faulty!\n",
3272 if (super_format >= 0) {
3273 err = super_types[super_format].
3274 load_super(rdev, NULL, super_minor);
3277 "md: %s does not have a valid v%d.%d "
3278 "superblock, not importing!\n",
3280 super_format, super_minor);
3285 "md: could not read %s's sb, not importing!\n",
3290 if (super_format == -1)
3301 return ERR_PTR(err);
3309 static void analyze_sbs(
struct mddev * mddev)
3317 switch (super_types[mddev->major_version].
3318 load_super(rdev, freshest, mddev->minor_version)) {
3326 "md: fatal superblock inconsistency in %s"
3327 " -- removing from array\n",
3329 kick_rdev_from_array(rdev);
3334 validate_super(mddev, freshest);
3342 "md: %s: %s: only %d devices permitted\n",
3343 mdname(mddev),
bdevname(rdev->bdev, b),
3345 kick_rdev_from_array(rdev);
3348 if (rdev != freshest)
3350 validate_super(mddev, rdev)) {
3354 kick_rdev_from_array(rdev);
3358 rdev->desc_nr = i++;
3359 rdev->raid_disk = rdev->desc_nr;
3362 rdev->raid_disk = -1;
3380 unsigned long result = 0;
3382 while (
isdigit(*cp) || (*cp ==
'.' && decimals < 0)) {
3385 else if (decimals < scale) {
3388 result = result * 10 +
value;
3400 while (decimals < scale) {
3409 static void md_safemode_timeout(
unsigned long data);
3412 safe_delay_show(
struct mddev *mddev,
char *page)
3415 return sprintf(page,
"%d.%03d\n", msec/1000, msec%1000);
3418 safe_delay_store(
struct mddev *mddev,
const char *
cbuf,
size_t len)
3432 md_safemode_timeout((
unsigned long)mddev);
3440 level_show(
struct mddev *mddev,
char *page)
3445 else if (mddev->
clevel[0])
3454 level_store(
struct mddev *mddev,
const char *buf,
size_t len)
3463 if (mddev->
pers == NULL) {
3466 if (len >=
sizeof(mddev->
clevel))
3469 if (mddev->
clevel[len-1] ==
'\n')
3487 if (!mddev->
pers->quiesce) {
3489 mdname(mddev), mddev->
pers->name);
3494 if (len == 0 || len >=
sizeof(clevel))
3497 if (clevel[len-1] ==
'\n')
3503 if (request_module(
"md-%s", clevel) != 0)
3504 request_module(
"md-level-%s", clevel);
3505 spin_lock(&pers_lock);
3506 pers = find_pers(level, clevel);
3507 if (!pers || !try_module_get(pers->
owner)) {
3508 spin_unlock(&pers_lock);
3512 spin_unlock(&pers_lock);
3514 if (pers == mddev->
pers) {
3516 module_put(pers->
owner);
3520 module_put(pers->
owner);
3522 mdname(mddev), clevel);
3532 priv = pers->takeover(mddev);
3534 mddev->new_level = mddev->level;
3535 mddev->new_layout = mddev->layout;
3536 mddev->new_chunk_sectors = mddev->chunk_sectors;
3537 mddev->raid_disks -= mddev->delta_disks;
3538 mddev->delta_disks = 0;
3539 mddev->reshape_backwards = 0;
3540 module_put(pers->
owner);
3542 mdname(mddev), clevel);
3543 return PTR_ERR(priv);
3548 mddev->pers->stop(mddev);
3550 if (mddev->pers->sync_request == NULL &&
3555 "md: cannot register extra attributes for %s\n",
3557 mddev->sysfs_action =
sysfs_get_dirent(mddev->kobj.sd, NULL,
"sync_action");
3559 if (mddev->pers->sync_request != NULL &&
3562 if (mddev->to_remove == NULL)
3563 mddev->to_remove = &md_redundancy_group;
3566 if (mddev->pers->sync_request == NULL &&
3576 mddev->safemode_delay = 0;
3577 mddev->safemode = 0;
3587 sysfs_unlink_rdev(mddev, rdev);
3598 if (sysfs_link_rdev(mddev, rdev))
3600 " for %s after level change\n",
3605 module_put(mddev->pers->owner);
3607 mddev->private =
priv;
3608 strlcpy(mddev->clevel, pers->
name,
sizeof(mddev->clevel));
3609 mddev->level = mddev->new_level;
3610 mddev->layout = mddev->new_layout;
3611 mddev->chunk_sectors = mddev->new_chunk_sectors;
3612 mddev->delta_disks = 0;
3613 mddev->reshape_backwards = 0;
3614 mddev->degraded = 0;
3615 if (mddev->pers->sync_request == NULL) {
3635 layout_show(
struct mddev *mddev,
char *page)
3640 return sprintf(page,
"%d (%d)\n",
3646 layout_store(
struct mddev *mddev,
const char *buf,
size_t len)
3651 if (!*buf || (*e && *e !=
'\n'))
3656 if (mddev->
pers->check_reshape == NULL)
3659 err = mddev->
pers->check_reshape(mddev);
3676 raid_disks_show(
struct mddev *mddev,
char *page)
3687 static int update_raid_disks(
struct mddev *mddev,
int raid_disks);
3690 raid_disks_store(
struct mddev *mddev,
const char *buf,
size_t len)
3696 if (!*buf || (*e && *e !=
'\n'))
3700 rv = update_raid_disks(mddev, n);
3718 return rv ? rv : len;
3724 chunk_size_show(
struct mddev *mddev,
char *page)
3728 return sprintf(page,
"%d (%d)\n",
3735 chunk_size_store(
struct mddev *mddev,
const char *buf,
size_t len)
3740 if (!*buf || (*e && *e !=
'\n'))
3745 if (mddev->
pers->check_reshape == NULL)
3748 err = mddev->
pers->check_reshape(mddev);
3764 resync_start_show(
struct mddev *mddev,
char *page)
3767 return sprintf(page,
"none\n");
3772 resync_start_store(
struct mddev *mddev,
const char *buf,
size_t len)
3779 if (cmd_match(buf,
"none"))
3781 else if (!*buf || (*e && *e !=
'\n'))
3830 static char *array_states[] = {
3831 "clear",
"inactive",
"suspended",
"readonly",
"read-auto",
"clean",
"active",
3832 "write-pending",
"active-idle", NULL };
3834 static int match_word(
const char *
word,
char **
list)
3837 for (n=0; list[
n]; n++)
3838 if (cmd_match(word, list[n]))
3844 array_state_show(
struct mddev *mddev,
char *page)
3867 if (list_empty(&mddev->
disks) &&
3874 return sprintf(page,
"%s\n", array_states[st]);
3877 static int do_md_stop(
struct mddev * mddev,
int ro,
struct block_device *bdev);
3878 static int md_set_readonly(
struct mddev * mddev,
struct block_device *bdev);
3879 static int do_md_run(
struct mddev * mddev);
3880 static int restart_array(
struct mddev *mddev);
3883 array_state_store(
struct mddev *mddev,
const char *buf,
size_t len)
3886 enum array_state st = match_word(buf, array_states);
3892 err = do_md_stop(mddev, 0, NULL);
3897 err = do_md_stop(mddev, 2, NULL);
3905 err = md_set_readonly(mddev, NULL);
3909 err = do_md_run(mddev);
3915 err = md_set_readonly(mddev, NULL);
3916 else if (mddev->
ro == 1)
3917 err = restart_array(mddev);
3924 err = do_md_run(mddev);
3929 restart_array(mddev);
3947 restart_array(mddev);
3954 err = do_md_run(mddev);
3975 max_corrected_read_errors_show(
struct mddev *mddev,
char *page) {
3981 max_corrected_read_errors_store(
struct mddev *mddev,
const char *buf,
size_t len)
3986 if (*buf && (*e == 0 || *e ==
'\n')) {
3995 max_corrected_read_errors_store);
3998 null_show(
struct mddev *mddev,
char *page)
4004 new_dev_store(
struct mddev *mddev,
const char *buf,
size_t len)
4020 if (!*buf || *e !=
':' || !e[1] || e[1] ==
'\n')
4023 if (*e && *e !=
'\n')
4025 dev =
MKDEV(major, minor);
4026 if (major !=
MAJOR(dev) ||
4027 minor !=
MINOR(dev))
4034 if (!IS_ERR(rdev) && !list_empty(&mddev->
disks)) {
4044 rdev = md_import_device(dev, -2, -1);
4046 rdev = md_import_device(dev, -1, -1);
4049 return PTR_ERR(rdev);
4050 err = bind_rdev_to_array(rdev, mddev);
4054 return err ? err : len;
4061 bitmap_store(
struct mddev *mddev,
const char *buf,
size_t len)
4064 unsigned long chunk, end_chunk;
4071 if (buf == end)
break;
4075 if (buf == end)
break;
4077 if (*end && !
isspace(*end))
break;
4090 size_show(
struct mddev *mddev,
char *page)
4092 return sprintf(page,
"%llu\n",
4096 static int update_size(
struct mddev *mddev,
sector_t num_sectors);
4099 size_store(
struct mddev *mddev,
const char *buf,
size_t len)
4106 int err = strict_blocks_to_sectors(buf, §ors);
4111 err = update_size(mddev, sectors);
4112 md_update_sb(mddev, 1);
4120 return err ? err : len;
4134 metadata_show(
struct mddev *mddev,
char *page)
4137 return sprintf(page,
"%d.%d\n",
4142 return sprintf(page,
"none\n");
4146 metadata_store(
struct mddev *mddev,
const char *buf,
size_t len)
4156 else if (!list_empty(&mddev->
disks))
4159 if (cmd_match(buf,
"none")) {
4166 if (
strncmp(buf,
"external:", 9) == 0) {
4181 if (e==buf || *e !=
'.')
4185 if (e==buf || (*e && *e !=
'\n') )
4187 if (major >=
ARRAY_SIZE(super_types) || super_types[major].
name == NULL)
4200 action_show(
struct mddev *mddev,
char *page)
4202 char *
type =
"idle";
4219 return sprintf(page,
"%s\n", type);
4222 static void reap_sync_thread(
struct mddev *mddev);
4225 action_store(
struct mddev *mddev,
const char *page,
size_t len)
4227 if (!mddev->
pers || !mddev->
pers->sync_request)
4230 if (cmd_match(page,
"frozen"))
4235 if (cmd_match(page,
"idle") || cmd_match(page,
"frozen")) {
4238 reap_sync_thread(mddev);
4243 else if (cmd_match(page,
"resync"))
4245 else if (cmd_match(page,
"recover")) {
4248 }
else if (cmd_match(page,
"reshape")) {
4250 if (mddev->
pers->start_reshape == NULL)
4252 err = mddev->
pers->start_reshape(mddev);
4257 if (cmd_match(page,
"check"))
4259 else if (!cmd_match(page,
"repair"))
4264 if (mddev->
ro == 2) {
4278 mismatch_cnt_show(
struct mddev *mddev,
char *page)
4280 return sprintf(page,
"%llu\n",
4281 (
unsigned long long)
4292 sync_min_show(
struct mddev *mddev,
char *page)
4294 return sprintf(page,
"%d (%s)\n", speed_min(mddev),
4299 sync_min_store(
struct mddev *mddev,
const char *buf,
size_t len)
4303 if (
strncmp(buf,
"system", 6)==0) {
4308 if (buf == e || (*e && *e !=
'\n') || min <= 0)
4318 sync_max_show(
struct mddev *mddev,
char *page)
4320 return sprintf(page,
"%d (%s)\n", speed_max(mddev),
4325 sync_max_store(
struct mddev *mddev,
const char *buf,
size_t len)
4329 if (
strncmp(buf,
"system", 6)==0) {
4334 if (buf == e || (*e && *e !=
'\n') || max <= 0)
4344 degraded_show(
struct mddev *mddev,
char *page)
4351 sync_force_parallel_show(
struct mddev *mddev,
char *page)
4357 sync_force_parallel_store(
struct mddev *mddev,
const char *buf,
size_t len)
4364 if (n != 0 && n != 1)
4378 sync_force_parallel_show, sync_force_parallel_store);
4381 sync_speed_show(
struct mddev *mddev,
char *page)
4383 unsigned long resync, dt, db;
4385 return sprintf(page,
"none\n");
4390 return sprintf(page,
"%lu\n", db/dt/2);
4396 sync_completed_show(
struct mddev *mddev,
char *page)
4398 unsigned long long max_sectors, resync;
4401 return sprintf(page,
"none\n");
4405 return sprintf(page,
"delayed\n");
4414 return sprintf(page,
"%llu / %llu\n", resync, max_sectors);
4420 min_sync_show(
struct mddev *mddev,
char *page)
4422 return sprintf(page,
"%llu\n",
4426 min_sync_store(
struct mddev *mddev,
const char *buf,
size_t len)
4428 unsigned long long min;
4451 max_sync_show(
struct mddev *mddev,
char *page)
4454 return sprintf(page,
"max\n");
4456 return sprintf(page,
"%llu\n",
4460 max_sync_store(
struct mddev *mddev,
const char *buf,
size_t len)
4462 if (
strncmp(buf,
"max", 3) == 0)
4465 unsigned long long max;
4468 if (max < mddev->resync_min)
4470 if (max < mddev->resync_max &&
4491 suspend_lo_show(
struct mddev *mddev,
char *page)
4497 suspend_lo_store(
struct mddev *mddev,
const char *buf,
size_t len)
4503 if (mddev->
pers == NULL ||
4504 mddev->
pers->quiesce == NULL)
4506 if (buf == e || (*e && *e !=
'\n'))
4512 mddev->
pers->quiesce(mddev, 2);
4515 mddev->
pers->quiesce(mddev, 1);
4516 mddev->
pers->quiesce(mddev, 0);
4525 suspend_hi_show(
struct mddev *mddev,
char *page)
4531 suspend_hi_store(
struct mddev *mddev,
const char *buf,
size_t len)
4537 if (mddev->
pers == NULL ||
4538 mddev->
pers->quiesce == NULL)
4540 if (buf == e || (*e && *e !=
'\n'))
4546 mddev->
pers->quiesce(mddev, 2);
4549 mddev->
pers->quiesce(mddev, 1);
4550 mddev->
pers->quiesce(mddev, 0);
4558 reshape_position_show(
struct mddev *mddev,
char *page)
4561 return sprintf(page,
"%llu\n",
4568 reshape_position_store(
struct mddev *mddev,
const char *buf,
size_t len)
4575 if (buf == e || (*e && *e !=
'\n'))
4590 reshape_position_store);
4593 reshape_direction_show(
struct mddev *mddev,
char *page)
4596 mddev->reshape_backwards ?
"backwards" :
"forwards");
4600 reshape_direction_store(
struct mddev *mddev,
const char *buf,
size_t len)
4603 if (cmd_match(buf,
"forwards"))
4605 else if (cmd_match(buf,
"backwards"))
4626 reshape_direction_store);
4629 array_size_show(
struct mddev *mddev,
char *page)
4632 return sprintf(page,
"%llu\n",
4635 return sprintf(page,
"default\n");
4639 array_size_store(
struct mddev *mddev,
const char *buf,
size_t len)
4643 if (
strncmp(buf,
"default", 7) == 0) {
4645 sectors = mddev->
pers->size(mddev, 0, 0);
4651 if (strict_blocks_to_sectors(buf, §ors) < 0)
4671 static struct attribute *md_default_attrs[] = {
4674 &md_raid_disks.
attr,
4675 &md_chunk_size.
attr,
4677 &md_resync_start.
attr,
4679 &md_new_device.
attr,
4680 &md_safe_delay.
attr,
4681 &md_array_state.
attr,
4682 &md_reshape_position.attr,
4683 &md_reshape_direction.
attr,
4684 &md_array_size.
attr,
4685 &max_corr_read_errors.
attr,
4689 static struct attribute *md_redundancy_attrs[] = {
4691 &md_mismatches.
attr,
4694 &md_sync_speed.
attr,
4695 &md_sync_force_parallel.
attr,
4696 &md_sync_completed.
attr,
4699 &md_suspend_lo.
attr,
4700 &md_suspend_hi.
attr,
4707 .attrs = md_redundancy_attrs,
4715 struct mddev *mddev =
container_of(kobj,
struct mddev, kobj);
4720 spin_lock(&all_mddevs_lock);
4722 spin_unlock(&all_mddevs_lock);
4726 spin_unlock(&all_mddevs_lock);
4728 rv = mddev_lock(mddev);
4730 rv = entry->
show(mddev, page);
4731 mddev_unlock(mddev);
4739 const char *page,
size_t length)
4742 struct mddev *mddev =
container_of(kobj,
struct mddev, kobj);
4749 spin_lock(&all_mddevs_lock);
4751 spin_unlock(&all_mddevs_lock);
4755 spin_unlock(&all_mddevs_lock);
4756 rv = mddev_lock(mddev);
4758 rv = entry->
store(mddev, page, length);
4759 mddev_unlock(mddev);
4765 static void md_free(
struct kobject *ko)
4767 struct mddev *mddev =
container_of(ko,
struct mddev, kobj);
4782 static const struct sysfs_ops md_sysfs_ops = {
4783 .show = md_attr_show,
4784 .store = md_attr_store,
4788 .sysfs_ops = &md_sysfs_ops,
4789 .default_attrs = md_default_attrs,
4794 static void mddev_delayed_delete(
struct work_struct *ws)
4803 static int md_alloc(
dev_t dev,
char *
name)
4806 struct mddev *mddev = mddev_find(dev);
4807 struct gendisk *disk;
4833 struct mddev *mddev2;
4834 spin_lock(&all_mddevs_lock);
4839 spin_unlock(&all_mddevs_lock);
4842 spin_unlock(&all_mddevs_lock);
4849 mddev->
queue->queuedata = mddev;
4861 disk->first_minor = unit << shift;
4863 strcpy(disk->disk_name, name);
4864 else if (partitioned)
4865 sprintf(disk->disk_name,
"md_d%d", unit);
4867 sprintf(disk->disk_name,
"md%d", unit);
4868 disk->fops = &md_fops;
4869 disk->private_data = mddev;
4876 disk->flags |= GENHD_FL_EXT_DEVT;
4885 &disk_to_dev(disk)->kobj,
"%s",
"md");
4894 if (mddev->
kobj.sd &&
4900 if (!error && mddev->
kobj.sd) {
4902 mddev->
sysfs_state = sysfs_get_dirent_safe(mddev->
kobj.sd,
"array_state");
4910 md_alloc(dev, NULL);
4921 char buf[DISK_NAME_LEN];
4923 while (len && val[len-1] ==
'\n')
4925 if (len >= DISK_NAME_LEN)
4928 if (
strncmp(buf,
"md_", 3) != 0)
4930 return md_alloc(0, buf);
4933 static void md_safemode_timeout(
unsigned long data)
4935 struct mddev *mddev = (
struct mddev *) data;
4945 static int start_dirty_degraded;
4953 if (list_empty(&mddev->
disks))
4973 request_module(
"md-level-%d", mddev->
level);
4974 else if (mddev->
clevel[0])
4975 request_module(
"md-%s", mddev->
clevel);
4998 printk(
"md: %s: data overlaps metadata\n",
5005 printk(
"md: %s: metadata overlaps data\n",
5016 spin_lock(&pers_lock);
5018 if (!pers || !try_module_get(pers->
owner)) {
5019 spin_unlock(&pers_lock);
5029 spin_unlock(&pers_lock);
5040 module_put(pers->
owner);
5055 rdev->
bdev->bd_contains ==
5056 rdev2->
bdev->bd_contains) {
5058 "%s: WARNING: %s appears to be"
5059 " on the same physical disk as"
5070 "True protection against single-disk"
5071 " failure might be compromised.\n");
5080 if (start_readonly && mddev->
ro == 0)
5083 err = mddev->
pers->run(mddev);
5088 " but 'external_size' not in effect?\n", __func__);
5090 "md: invalid array_size %llu > default size %llu\n",
5092 (
unsigned long long)mddev->
pers->size(mddev, 0, 0) / 2);
5094 mddev->
pers->stop(mddev);
5096 if (err == 0 && mddev->
pers->sync_request &&
5101 mdname(mddev), err);
5102 mddev->
pers->stop(mddev);
5106 module_put(mddev->
pers->owner);
5111 if (mddev->
pers->sync_request) {
5112 if (mddev->
kobj.sd &&
5115 "md: cannot register extra attributes for %s\n",
5117 mddev->
sysfs_action = sysfs_get_dirent_safe(mddev->
kobj.sd,
"sync_action");
5118 }
else if (mddev->
ro == 2)
5133 if (sysfs_link_rdev(mddev, rdev))
5139 md_update_sb(mddev, 0);
5149 static int do_md_run(
struct mddev *mddev)
5173 static int restart_array(
struct mddev *mddev)
5175 struct gendisk *disk = mddev->
gendisk;
5178 if (list_empty(&mddev->
disks))
5199 static int deny_bitmap_write_access(
struct file *
file)
5203 spin_lock(&inode->
i_lock);
5205 spin_unlock(&inode->
i_lock);
5209 spin_unlock(&inode->
i_lock);
5216 struct inode *inode = file->
f_mapping->host;
5218 spin_lock(&inode->
i_lock);
5220 spin_unlock(&inode->
i_lock);
5223 static void md_clean(
struct mddev *mddev)
5269 static void __md_stop_writes(
struct mddev *mddev)
5274 reap_sync_thread(mddev);
5285 md_update_sb(mddev, 1);
5292 __md_stop_writes(mddev);
5293 mddev_unlock(mddev);
5297 static void __md_stop(
struct mddev *mddev)
5300 mddev->
pers->stop(mddev);
5302 mddev->
to_remove = &md_redundancy_group;
5303 module_put(mddev->
pers->owner);
5321 static int md_set_readonly(
struct mddev *mddev,
struct block_device *bdev)
5326 printk(
"md: %s still in use.\n",mdname(mddev));
5333 __md_stop_writes(mddev);
5353 static int do_md_stop(
struct mddev * mddev,
int mode,
5356 struct gendisk *disk = mddev->
gendisk;
5362 printk(
"md: %s still in use.\n",mdname(mddev));
5378 __md_stop_writes(mddev);
5381 mddev->
queue->backing_dev_info.congested_fn =
NULL;
5387 if (rdev->raid_disk >= 0)
5388 sysfs_unlink_rdev(mddev, rdev);
5390 set_capacity(disk, 0);
5406 if (mddev->bitmap_info.file) {
5408 fput(mddev->bitmap_info.file);
5409 mddev->bitmap_info.file =
NULL;
5411 mddev->bitmap_info.offset = 0;
5413 export_array(mddev);
5418 mddev->hold_active = 0;
5422 sysfs_notify_dirent_safe(mddev->sysfs_state);
5427 static void autorun_array(
struct mddev *mddev)
5432 if (list_empty(&mddev->
disks))
5443 err = do_md_run(mddev);
5446 do_md_stop(mddev, 0, NULL);
5462 static void autorun_devices(
int part)
5465 struct mddev *mddev;
5469 while (!list_empty(&pending_raid_disks)) {
5478 INIT_LIST_HEAD(&candidates);
5480 if (super_90_load(rdev, rdev0, 0) >= 0) {
5483 list_move(&rdev->same_set, &candidates);
5491 dev =
MKDEV(mdp_major,
5498 if (rdev0->preferred_minor != unit) {
5500 bdevname(rdev0->bdev, b), rdev0->preferred_minor);
5504 md_probe(dev, NULL, NULL);
5505 mddev = mddev_find(dev);
5506 if (!mddev || !mddev->
gendisk) {
5510 "md: cannot allocate memory for md drive.\n");
5513 if (mddev_lock(mddev))
5517 || !list_empty(&mddev->
disks)) {
5519 "md: %s already running, cannot run %s\n",
5520 mdname(mddev),
bdevname(rdev0->bdev,b));
5521 mddev_unlock(mddev);
5526 list_del_init(&rdev->same_set);
5527 if (bind_rdev_to_array(rdev, mddev))
5530 autorun_array(mddev);
5531 mddev_unlock(mddev);
5537 list_del_init(&rdev->same_set);
5546 static int get_version(
void __user *
arg)
5560 static int get_array_info(
struct mddev * mddev,
void __user * arg)
5563 int nr,working,insync,failed,
spare;
5566 nr = working = insync = failed = spare = 0;
5615 static int get_bitmap_file(
struct mddev * mddev,
void __user * arg)
5656 static int get_disk_info(
struct mddev * mddev,
void __user * arg)
5665 rdev = find_rdev_nr_rcu(mddev, info.
number);
5707 "md: md_import_device returned %ld\n",
5709 return PTR_ERR(rdev);
5711 if (!list_empty(&mddev->
disks)) {
5719 "md: %s has different UUID to %s\n",
5726 err = bind_rdev_to_array(rdev, mddev);
5739 if (!mddev->
pers->hot_add_disk) {
5741 "%s: personality does not support diskops!\n",
5749 rdev = md_import_device(dev, -1, -1);
5752 "md: md_import_device returned %ld\n",
5754 return PTR_ERR(rdev);
5766 validate_super(mddev, rdev);
5788 err = bind_rdev_to_array(rdev, mddev);
5789 if (!err && !mddev->
pers->hot_remove_disk) {
5795 validate_super(mddev, rdev);
5796 err = mddev->
pers->hot_add_disk(mddev, rdev);
5798 unbind_rdev_from_array(rdev);
5805 md_update_sb(mddev, 1);
5826 rdev = md_import_device(dev, -1, 0);
5829 "md: error, md_import_device() returned %ld\n",
5831 return PTR_ERR(rdev);
5848 rdev->
sb_start = i_size_read(rdev->
bdev->bd_inode) / 512;
5850 rdev->
sb_start = calc_dev_sboffset(rdev);
5853 err = bind_rdev_to_array(rdev, mddev);
5863 static int hot_remove_disk(
struct mddev * mddev,
dev_t dev)
5868 rdev = find_rdev(mddev, dev);
5875 kick_rdev_from_array(rdev);
5876 md_update_sb(mddev, 1);
5886 static int hot_add_disk(
struct mddev * mddev,
dev_t dev)
5897 " version-0 superblocks.\n",
5901 if (!mddev->
pers->hot_add_disk) {
5903 "%s: personality does not support diskops!\n",
5908 rdev = md_import_device(dev, -1, 0);
5911 "md: error, md_import_device() returned %ld\n",
5917 rdev->
sb_start = calc_dev_sboffset(rdev);
5919 rdev->
sb_start = i_size_read(rdev->
bdev->bd_inode) / 512;
5925 "md: can not hot-add faulty %s disk to %s!\n",
5933 err = bind_rdev_to_array(rdev, mddev);
5944 md_update_sb(mddev, 1);
5960 static int set_bitmap_file(
struct mddev *mddev,
int fd)
5965 if (!mddev->
pers->quiesce)
5984 err = deny_bitmap_write_access(mddev->
bitmap_info.file);
5993 }
else if (mddev->
bitmap == NULL)
5997 mddev->
pers->quiesce(mddev, 1);
6003 if (fd < 0 || err) {
6007 mddev->
pers->quiesce(mddev, 0);
6043 "md: superblock version %d not known\n",
6107 WARN(!mddev_is_locked(mddev),
"%s: unlocked mddev!\n", __func__);
6116 static int update_size(
struct mddev *mddev,
sector_t num_sectors)
6120 int fit = (num_sectors == 0);
6122 if (mddev->
pers->resize == NULL)
6139 if (fit && (num_sectors == 0 || num_sectors > avail))
6140 num_sectors =
avail;
6141 if (avail < num_sectors)
6144 rv = mddev->
pers->resize(mddev, num_sectors);
6150 static int update_raid_disks(
struct mddev *mddev,
int raid_disks)
6155 if (mddev->
pers->check_reshape == NULL)
6157 if (raid_disks <= 0 ||
6178 rv = mddev->
pers->check_reshape(mddev);
6214 ((state^info->
state) & 0xfffffe00)
6236 if (mddev->
pers->check_reshape == NULL)
6240 rv = mddev->
pers->check_reshape(mddev);
6250 rv = update_raid_disks(mddev, info->
raid_disks);
6253 if (mddev->
pers->quiesce == NULL)
6267 mddev->
pers->quiesce(mddev, 1);
6273 mddev->
pers->quiesce(mddev, 0);
6278 if (mddev->
bitmap->storage.file)
6280 mddev->
pers->quiesce(mddev, 1);
6282 mddev->
pers->quiesce(mddev, 0);
6286 md_update_sb(mddev, 1);
6290 static int set_disk_faulty(
struct mddev *mddev,
dev_t dev)
6295 if (mddev->
pers == NULL)
6299 rdev = find_rdev_rcu(mddev, dev);
6319 struct mddev *mddev = bdev->
bd_disk->private_data;
6328 unsigned int cmd,
unsigned long arg)
6332 struct mddev *mddev =
NULL;
6352 err = get_version(argp);
6363 autostart_arrays(arg);
6373 mddev = bdev->
bd_disk->private_data;
6386 err = get_array_info(mddev, argp);
6393 err = get_disk_info(mddev, argp);
6397 err = set_disk_faulty(mddev, new_decode_dev(arg));
6401 err = mddev_lock(mddev);
6404 "md: ioctl lock interrupted, reason %d, cmd %d\n",
6415 memset(&info, 0,
sizeof(info));
6421 err = update_array_info(mddev, &info);
6424 " array info. %d\n", err);
6429 if (!list_empty(&mddev->
disks)) {
6431 "md: array %s already has disks!\n",
6438 "md: array %s already initialised!\n",
6443 err = set_array_info(mddev, &info);
6446 " array info. %d\n", err);
6474 err = get_bitmap_file(mddev, argp);
6478 err = restart_array(mddev);
6482 err = do_md_stop(mddev, 0, bdev);
6486 err = md_set_readonly(mddev, bdev);
6490 if (
get_user(ro, (
int __user *)(arg))) {
6510 err = restart_array(mddev);
6527 if (mddev->
ro == 2) {
6546 err = add_new_disk(mddev, &info);
6551 err = hot_remove_disk(mddev, new_decode_dev(arg));
6555 err = hot_add_disk(mddev, new_decode_dev(arg));
6559 err = do_md_run(mddev);
6563 err = set_bitmap_file(mddev, (
int)arg);
6576 mddev_unlock(mddev);
6585 #ifdef CONFIG_COMPAT
6587 unsigned int cmd,
unsigned long arg)
6597 arg = (
unsigned long)compat_ptr(arg);
6601 return md_ioctl(bdev, mode, cmd, arg);
6611 struct mddev *mddev = mddev_find(bdev->
bd_dev);
6643 struct mddev *mddev = disk->private_data;
6652 static int md_media_changed(
struct gendisk *disk)
6654 struct mddev *mddev = disk->private_data;
6659 static int md_revalidate(
struct gendisk *disk)
6661 struct mddev *mddev = disk->private_data;
6666 static const struct block_device_operations md_fops =
6670 .release = md_release,
6672 #ifdef CONFIG_COMPAT
6673 .compat_ioctl = md_compat_ioctl,
6675 .getgeo = md_getgeo,
6676 .media_changed = md_media_changed,
6677 .revalidate_disk= md_revalidate,
6715 thread->
run(thread);
6724 pr_debug(
"md: waking up MD thread %s.\n", thread->
tsk->comm);
6731 struct mddev *mddev,
const char *name)
6746 mdname(thread->
mddev),
6748 if (IS_ERR(thread->
tsk)) {
6760 pr_debug(
"interrupting MD-thread pid %d\n", task_pid_nr(thread->
tsk));
6764 spin_lock(&pers_lock);
6766 spin_unlock(&pers_lock);
6782 if (!mddev->
pers || !mddev->
pers->error_handler)
6784 mddev->
pers->error_handler(mddev,rdev);
6793 md_new_event_inintr(mddev);
6798 static void status_unused(
struct seq_file *seq)
6818 static void status_resync(
struct seq_file *seq,
struct mddev * mddev)
6821 unsigned long dt, db;
6824 unsigned int per_milli;
6851 if (
sizeof(
sector_t) >
sizeof(
unsigned long)) {
6852 while ( max_sectors/2 > (1ULL<<(scale+32)))
6855 res = (resync>>scale)*1000;
6860 int i,
x = per_milli/50,
y = 20-
x;
6862 for (i = 0; i <
x; i++)
6865 for (i = 0; i <
y; i++)
6875 "resync" :
"recovery"))),
6876 per_milli/10, per_milli % 10,
6877 (
unsigned long long) resync/2,
6878 (
unsigned long long) max_sectors/2);
6899 rt = max_sectors - resync;
6904 seq_printf(seq,
" finish=%lu.%lumin", (
unsigned long)rt / 60,
6905 ((
unsigned long)rt % 60)/6);
6910 static void *md_seq_start(
struct seq_file *seq, loff_t *
pos)
6914 struct mddev *mddev;
6922 spin_lock(&all_mddevs_lock);
6927 spin_unlock(&all_mddevs_lock);
6930 spin_unlock(&all_mddevs_lock);
6936 static void *md_seq_next(
struct seq_file *seq,
void *
v, loff_t *pos)
6939 struct mddev *next_mddev, *mddev =
v;
6945 spin_lock(&all_mddevs_lock);
6953 next_mddev = (
void*)2;
6956 spin_unlock(&all_mddevs_lock);
6964 static void md_seq_stop(
struct seq_file *seq,
void *v)
6966 struct mddev *mddev =
v;
6968 if (mddev && v != (
void*)1 && v != (
void*)2)
6972 static int md_seq_show(
struct seq_file *seq,
void *v)
6974 struct mddev *mddev =
v;
6978 if (v == (
void*)1) {
6981 spin_lock(&pers_lock);
6985 spin_unlock(&pers_lock);
6990 if (v == (
void*)2) {
6995 if (mddev_lock(mddev) < 0)
6999 seq_printf(seq,
"%s : %sactive", mdname(mddev),
7000 mddev->
pers ?
"" :
"in");
7027 if (!list_empty(&mddev->
disks)) {
7030 (
unsigned long long)
7034 (
unsigned long long)sectors / 2);
7050 mddev->
pers->status(seq, mddev);
7052 if (mddev->
pers->sync_request) {
7054 status_resync(seq, mddev);
7068 mddev_unlock(mddev);
7074 .start = md_seq_start,
7075 .next = md_seq_next,
7076 .stop = md_seq_stop,
7077 .show = md_seq_show,
7080 static int md_seq_open(
struct inode *inode,
struct file *file)
7085 error =
seq_open(file, &md_seq_ops);
7094 static unsigned int mdstat_poll(
struct file *filp,
poll_table *
wait)
7099 poll_wait(filp, &md_event_waiters, wait);
7111 .open = md_seq_open,
7115 .poll = mdstat_poll,
7120 spin_lock(&pers_lock);
7123 spin_unlock(&pers_lock);
7130 spin_lock(&pers_lock);
7131 list_del_init(&p->
list);
7132 spin_unlock(&pers_lock);
7136 static int is_mddev_idle(
struct mddev *mddev,
int init)
7145 struct gendisk *disk = rdev->
bdev->bd_contains->bd_disk;
7146 curr_events = (
int)part_stat_read(&disk->part0, sectors[0]) +
7147 (
int)part_stat_read(&disk->part0, sectors[1]) -
7171 if (init || curr_events - rdev->
last_events > 64) {
7201 if (bio_data_dir(bi) !=
WRITE)
7205 if (mddev->
ro == 2) {
7258 if (!mddev->
pers->sync_request)
7270 md_update_sb(mddev, 0);
7282 #define SYNC_MARKS 10
7283 #define SYNC_MARK_STEP (3*HZ)
7286 struct mddev *mddev = thread->
mddev;
7287 struct mddev *mddev2;
7288 unsigned int currspeed = 0,
7309 desc =
"data-check";
7311 desc =
"requested-resync";
7345 if (mddev2 == mddev)
7349 && match_mddev_units(mddev, mddev2)) {
7369 " until %s has finished (they"
7370 " share one or more physical units)\n",
7371 desc, mdname(mddev), mdname(mddev2));
7415 " %d KB/sec/disk.\n", speed_min(mddev));
7417 "(but not more than %d KB/sec) for %s.\n",
7418 speed_max(mddev), desc);
7420 is_mddev_idle(mddev, 1);
7425 mark_cnt[
m] = io_sectors;
7436 window/2, (
unsigned long long)max_sectors/2);
7443 "md: resuming %s of %s from checkpoint.\n",
7444 desc, mdname(mddev));
7453 while (j < max_sectors) {
7461 > (max_sectors >> 4)) ||
7487 sectors = mddev->
pers->sync_request(mddev, j, &skipped,
7488 currspeed < speed_min(mddev));
7506 if (last_check == 0)
7512 if (last_check +
window > io_sectors || j == max_sectors)
7515 last_check = io_sectors;
7519 int next = (last_mark+1) % SYNC_MARKS;
7546 if (currspeed > speed_min(mddev)) {
7547 if ((currspeed > speed_max(mddev)) ||
7548 !is_mddev_idle(mddev, 0)) {
7563 mddev->
pers->sync_request(mddev, max_sectors, &skipped, 1);
7571 "md: checkpointing %s of %s.\n",
7572 desc, mdname(mddev));
7613 "md: md_do_sync() got signal ... exiting\n");
7620 static int remove_and_add_spares(
struct mddev *mddev)
7627 if (rdev->raid_disk >= 0 &&
7632 if (mddev->
pers->hot_remove_disk(
7633 mddev, rdev) == 0) {
7634 sysfs_unlink_rdev(mddev, rdev);
7653 hot_add_disk(mddev, rdev) == 0) {
7654 if (sysfs_link_rdev(mddev, rdev))
7667 static void reap_sync_thread(
struct mddev *mddev)
7677 if (mddev->
pers->spare_active(mddev)) {
7684 mddev->
pers->finish_reshape)
7685 mddev->
pers->finish_reshape(mddev);
7694 if (!mddev->degraded ||
7698 md_update_sb(mddev, 1);
7706 sysfs_notify_dirent_safe(mddev->sysfs_action);
7736 if (mddev->suspended)
7742 if (signal_pending(
current)) {
7743 if (mddev->pers->sync_request && !mddev->external) {
7746 mddev->safemode = 2;
7751 if (mddev->ro && !
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
7755 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
7757 (mddev->external == 0 && mddev->safemode == 1) ||
7758 (mddev->safemode == 2 && !
atomic_read(&mddev->writes_pending)
7759 && !mddev->in_sync && mddev->recovery_cp ==
MaxSector)
7763 if (mddev_trylock(mddev)) {
7776 if (mddev->pers->hot_remove_disk(
7777 mddev, rdev) == 0) {
7778 sysfs_unlink_rdev(mddev, rdev);
7782 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7786 if (!mddev->external) {
7788 spin_lock_irq(&mddev->write_lock);
7789 if (mddev->safemode &&
7797 if (mddev->safemode == 1)
7798 mddev->safemode = 0;
7799 spin_unlock_irq(&mddev->write_lock);
7801 sysfs_notify_dirent_safe(mddev->sysfs_state);
7805 md_update_sb(mddev, 0);
7807 if (
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
7810 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7813 if (mddev->sync_thread) {
7814 reap_sync_thread(mddev);
7820 mddev->curr_resync_completed = 0;
7821 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7838 if (mddev->reshape_position !=
MaxSector) {
7839 if (mddev->pers->check_reshape == NULL ||
7840 mddev->pers->check_reshape(mddev) != 0)
7843 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7845 }
else if ((spares = remove_and_add_spares(mddev))) {
7846 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7847 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7848 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7850 }
else if (mddev->recovery_cp <
MaxSector) {
7851 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7853 }
else if (!
test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
7857 if (mddev->pers->sync_request) {
7868 if (!mddev->sync_thread) {
7873 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7874 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7875 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7876 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7877 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7880 sysfs_notify_dirent_safe(mddev->sysfs_action);
7884 if (!mddev->sync_thread) {
7885 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7888 if (mddev->sysfs_action)
7889 sysfs_notify_dirent_safe(mddev->sysfs_action);
7891 mddev_unlock(mddev);
7902 rdev_dec_pending(rdev, mddev);
7948 sector_t *first_bad,
int *bad_sectors)
7957 if (bb->shift > 0) {
7960 target += (1<<bb->shift) - 1;
7961 target >>= bb->shift;
7962 sectors = target -
s;
7967 seq = read_seqbegin(&bb->lock);
7980 while (hi - lo > 1) {
7981 int mid = (lo + hi) / 2;
8002 if (rv != -1 &&
BB_ACK(p[lo]))
8007 *bad_sectors =
BB_LEN(p[lo]);
8013 if (read_seqretry(&bb->lock, seq))
8042 next += (1<<bb->shift) - 1;
8053 while (hi - lo > 1) {
8054 int mid = (lo + hi) / 2;
8073 if (s == a && s + sectors >= e)
8077 ack = ack && acknowledged;
8079 if (e < s + sectors)
8095 if (sectors && hi < bb->count) {
8101 if (a <= s + sectors) {
8103 if (e <= s + sectors) {
8108 ack = ack && acknowledged;
8123 if (sectors == 0 && hi < bb->count) {
8127 int lolen =
BB_LEN(p[lo]);
8128 int hilen =
BB_LEN(p[hi]);
8129 int newlen = lolen + hilen - (s -
a);
8135 (bb->count - hi - 1) * 8);
8149 (bb->count - hi) * 8);
8154 p[hi] =
BB_MAKE(s, this_sectors, acknowledged);
8155 sectors -= this_sectors;
8162 bb->unacked_exist = 1;
8201 if (bb->shift > 0) {
8208 s += (1<<bb->shift) - 1;
8210 target >>= bb->shift;
8211 sectors = target -
s;
8220 while (hi - lo > 1) {
8221 int mid = (lo + hi) / 2;
8245 memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
8250 p[lo] =
BB_MAKE(target, end - target, ack);
8262 p[lo] =
BB_MAKE(start, s - start, ack);
8272 memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
8273 bb->count -= (hi - lo - 1);
8290 return md_clear_badblocks(&rdev->
badblocks,
8302 if (bb->page == NULL || bb->changed)
8307 if (bb->changed == 0 && bb->unacked_exist) {
8310 for (i = 0; i < bb->count ; i++) {
8317 bb->unacked_exist = 0;
8336 badblocks_show(
struct badblocks *bb,
char *page,
int unack)
8347 seq = read_seqbegin(&bb->lock);
8352 while (len <
PAGE_SIZE && i < bb->count) {
8354 unsigned int length =
BB_LEN(p[i]);
8362 (
unsigned long long)s << bb->shift,
8363 length << bb->shift);
8365 if (unack && len == 0)
8366 bb->unacked_exist = 0;
8368 if (read_seqretry(&bb->lock, seq))
8377 badblocks_store(
struct badblocks *bb,
const char *page,
size_t len,
int unack)
8379 unsigned long long sector;
8387 if (page[0] ==
'-') {
8393 switch (
sscanf(page,
"%llu %d%c", §or, &length, &newline)) {
8395 if (newline !=
'\n')
8407 md_clear_badblocks(bb, sector, length);
8411 if (md_set_badblocks(bb, sector, length, !unack))
8418 unsigned long code,
void *x)
8421 struct mddev *mddev;
8425 if (mddev_trylock(mddev)) {
8427 __md_stop_writes(mddev);
8429 mddev_unlock(mddev);
8446 .notifier_call = md_notify_reboot,
8451 static void md_geninit(
void)
8455 proc_create(
"mdstat",
S_IRUGO, NULL, &md_seq_fops);
8458 static int __init md_init(
void)
8478 md_probe, NULL, NULL);
8480 md_probe, NULL, NULL);
8515 node_detected_dev = kzalloc(
sizeof(*node_detected_dev),
GFP_KERNEL);
8516 if (node_detected_dev) {
8517 node_detected_dev->
dev =
dev;
8521 ", skipping dev(%d,%d)\n",
MAJOR(dev),
MINOR(dev));
8526 static void autostart_arrays(
int part)
8531 int i_scanned, i_passed;
8538 while (!list_empty(&all_detected_devices) && i_scanned <
INT_MAX) {
8540 node_detected_dev =
list_entry(all_detected_devices.next,
8543 dev = node_detected_dev->
dev;
8544 kfree(node_detected_dev);
8545 rdev = md_import_device(dev,0, 90);
8554 list_add(&rdev->
same_set, &pending_raid_disks);
8559 i_scanned, i_passed);
8561 autorun_devices(part);
8566 static __exit void md_exit(
void)
8568 struct mddev *mddev;
8580 export_array(mddev);
8592 return sprintf(buffer,
"%d", start_readonly);
8594 static int set_ro(
const char *val,
struct kernel_param *kp)
8598 if (*val && (*e ==
'\0' || *e ==
'\n')) {
8599 start_readonly = num;