27 #include <linux/kernel.h>
28 #include <linux/module.h>
31 #include <linux/rbtree.h>
32 #include <linux/sched.h>
33 #include <linux/slab.h>
39 #include <linux/device.h>
42 #define MTDSWAP_PREFIX "mtdswap"
47 #define CLEAN_BLOCK_THRESHOLD 20
53 #define LOW_FRAG_GC_TRESHOLD 5
65 #define MAX_ERASE_DIFF 4000
66 #define COLLECT_NONDIRTY_BASE MAX_ERASE_DIFF
67 #define COLLECT_NONDIRTY_FREQ1 6
68 #define COLLECT_NONDIRTY_FREQ2 4
70 #define PAGE_UNDEF UINT_MAX
71 #define BLOCK_UNDEF UINT_MAX
72 #define BLOCK_ERROR (UINT_MAX - 1)
73 #define BLOCK_MAX (UINT_MAX - 2)
75 #define EBLOCK_BAD (1 << 0)
76 #define EBLOCK_NOMAGIC (1 << 1)
77 #define EBLOCK_BITFLIP (1 << 2)
78 #define EBLOCK_FAILED (1 << 3)
79 #define EBLOCK_READERR (1 << 4)
80 #define EBLOCK_IDX_SHIFT 5
92 #define MTDSWAP_ECNT_MIN(rbroot) (rb_entry(rb_first(rbroot), struct swap_eb, \
94 #define MTDSWAP_ECNT_MAX(rbroot) (rb_entry(rb_last(rbroot), struct swap_eb, \
150 #define MTDSWAP_MAGIC_CLEAN 0x2095
151 #define MTDSWAP_MAGIC_DIRTY (MTDSWAP_MAGIC_CLEAN + 1)
152 #define MTDSWAP_TYPE_CLEAN 0
153 #define MTDSWAP_TYPE_DIRTY 1
154 #define MTDSWAP_OOBSIZE sizeof(struct mtdswap_oobdata)
156 #define MTDSWAP_ERASE_RETRIES 3
157 #define MTDSWAP_IO_RETRIES 3
172 #define MIN_SPARE_EBLOCKS 2
173 #define MIN_ERASE_BLOCKS (MIN_SPARE_EBLOCKS + 1)
175 #define TREE_ROOT(d, name) (&d->trees[MTDSWAP_ ## name].root)
176 #define TREE_EMPTY(d, name) (TREE_ROOT(d, name)->rb_node == NULL)
177 #define TREE_NONEMPTY(d, name) (!TREE_EMPTY(d, name))
178 #define TREE_COUNT(d, name) (d->trees[MTDSWAP_ ## name].count)
180 #define MTDSWAP_MBD_TO_MTDSWAP(dev) ((struct mtdswap_dev *)dev->priv)
182 static char partitions[128] =
"";
185 "partitions=\"1,3,5\"");
187 static unsigned int spare_eblocks = 10;
190 "garbage collection (default 10%)");
195 "Include builtin swap header (default 0, without header)");
197 static int mtdswap_gc(
struct mtdswap_dev *
d,
unsigned int background);
201 return (loff_t)(eb - d->
eb_data) * d->
mtd->erasesize;
211 oldidx = tp - &d->
trees[0];
213 d->
trees[oldidx].count--;
233 rb_link_node(&eb->
rb, parent, p);
244 mtdswap_eb_detach(d, eb);
246 __mtdswap_rb_add(root, eb);
251 static struct rb_node *mtdswap_rb_index(
struct rb_root *root,
unsigned int idx)
258 while (i < idx && p) {
273 mtdswap_eb_detach(d, eb);
277 if (!mtd_can_have_bb(d->
mtd))
280 offset = mtdswap_eb_offset(d, eb);
281 dev_warn(d->
dev,
"Marking bad block at %08llx\n", offset);
285 dev_warn(d->
dev,
"Mark block bad failed for block at %08llx "
286 "error %d\n", offset, ret);
300 if (curr_write == eb) {
309 return mtdswap_handle_badblock(d, eb);
317 if (mtd_is_bitflip(ret))
321 dev_warn(d->
dev,
"Read OOB failed %d for block at %08llx\n",
327 dev_warn(d->
dev,
"Read OOB return short read (%zd bytes not "
328 "%zd) for block at %08llx\n",
343 offset = mtdswap_eb_offset(d, eb);
349 ops.
ooblen = 2 * d->
mtd->ecclayout->oobavail;
355 ret = mtdswap_read_oob(d, offset, &ops);
357 if (ret && !mtd_is_bitflip(ret))
366 if (mtd_is_bitflip(ret))
399 offset = mtdswap_eb_offset(d, eb);
403 offset = mtdswap_eb_offset(d, eb) + d->
mtd->writesize;
406 ret = mtd_write_oob(d->
mtd, offset, &ops);
409 dev_warn(d->
dev,
"Write OOB failed for block at %08llx "
410 "error %d\n", offset, ret);
411 if (ret == -
EIO || mtd_is_eccerr(ret))
412 mtdswap_handle_write_error(d, eb);
417 dev_warn(d->
dev,
"Short OOB write for block at %08llx: "
431 static void mtdswap_check_counts(
struct mtdswap_dev *d)
436 unsigned int i,
cnt, median;
439 for (i = 0; i < d->
eblks; i++) {
445 __mtdswap_rb_add(&hist_root, eb);
452 medrb = mtdswap_rb_index(&hist_root, cnt / 2);
457 for (i = 0; i < d->
eblks; i++) {
470 static void mtdswap_scan_eblks(
struct mtdswap_dev *d)
476 for (i = 0; i < d->
eblks; i++) {
479 status = mtdswap_read_markers(d, eb);
502 mtdswap_check_counts(d);
504 for (i = 0; i < d->
eblks; i++) {
511 mtdswap_rb_add(d, eb, idx);
531 if (weight == maxweight)
533 else if (weight == 0)
535 else if (weight > (maxweight/2))
565 erase.callback = mtdswap_erase_callback;
566 erase.addr = mtdswap_eb_offset(d, eb);
574 "erase of erase block %#llx on %s failed",
575 erase.addr, mtd->
name);
580 dev_err(d->
dev,
"Cannot erase erase block %#llx on %s\n",
581 erase.addr, mtd->
name);
583 mtdswap_handle_badblock(d, eb);
590 dev_err(d->
dev,
"Interrupted erase block %#llx erassure on %s",
591 erase.addr, mtd->
name);
598 "erase of erase block %#llx on %s failed",
599 erase.addr, mtd->
name);
604 mtdswap_handle_badblock(d, eb);
611 static int mtdswap_map_free_block(
struct mtdswap_dev *d,
unsigned int page,
631 }
while (ret == -
EIO || mtd_is_eccerr(ret));
639 mtdswap_store_eb(d, old_eb);
652 static unsigned int mtdswap_free_page_cnt(
struct mtdswap_dev *d)
658 static unsigned int mtdswap_enough_free_pages(
struct mtdswap_dev *d)
664 unsigned int page,
unsigned int *bp,
int gc_context)
674 while (!mtdswap_enough_free_pages(d))
675 if (mtdswap_gc(d, 0) > 0)
678 ret = mtdswap_map_free_block(d, page, bp);
681 if (ret == -
EIO || mtd_is_eccerr(ret)) {
693 if (ret == -
EIO || mtd_is_eccerr(ret)) {
697 mtdswap_handle_write_error(d, eb);
702 dev_err(d->
dev,
"Write to MTD device failed: %d (%zd written)",
708 dev_err(d->
dev,
"Short write to MTD device: %zd written",
724 static int mtdswap_move_block(
struct mtdswap_dev *d,
unsigned int oldblock,
725 unsigned int *newblock)
734 page = d->
revmap[oldblock];
741 if (ret < 0 && !mtd_is_bitflip(ret)) {
745 dev_err(d->
dev,
"Read Error: %d (block %u)\n", ret,
755 dev_err(d->
dev,
"Short read: %zd (block %u)\n", retlen,
761 ret = mtdswap_write_block(d, d->
page_buf, page, newblock, 1);
784 unsigned int i,
block, eblk_base, newblock;
794 block = eblk_base +
i;
798 ret = mtdswap_move_block(d, block, &newblock);
799 if (ret < 0 && !errcode)
806 static int __mtdswap_choose_gc_tree(
struct mtdswap_dev *d)
822 static int mtdswap_wlfreq(
unsigned int maxdiff)
845 y = (x * h + base / 2) / base;
847 return COLLECT_NONDIRTY_FREQ2 +
y;
850 static int mtdswap_choose_wl_tree(
struct mtdswap_dev *d)
852 static unsigned int pick_cnt;
853 unsigned int i, idx = -1, wear,
max;
869 if (max > MAX_ERASE_DIFF && pick_cnt >= mtdswap_wlfreq(max) - 1) {
878 static int mtdswap_choose_gc_tree(
struct mtdswap_dev *d,
879 unsigned int background)
887 idx = mtdswap_choose_wl_tree(d);
891 return __mtdswap_choose_gc_tree(d);
895 unsigned int background)
905 idx = mtdswap_choose_gc_tree(d, background);
918 static unsigned int mtdswap_test_patt(
unsigned int i)
920 return i % 2 ? 0x55555555 : 0xAAAAAAAA;
923 static unsigned int mtdswap_eblk_passes(
struct mtdswap_dev *d,
927 unsigned int test,
i,
j, patt, mtd_pages;
929 unsigned int *
p1 = (
unsigned int *)d->
page_buf;
930 unsigned char *p2 = (
unsigned char *)d->
oob_buf;
940 base = mtdswap_eb_offset(d, eb);
943 for (test = 0; test < 2; test++) {
945 for (i = 0; i < mtd_pages; i++) {
946 patt = mtdswap_test_patt(test + i);
949 ret = mtd_write_oob(mtd, pos, &ops);
957 for (i = 0; i < mtd_pages; i++) {
962 patt = mtdswap_test_patt(test + i);
967 for (j = 0; j < mtd->
ecclayout->oobavail; j++)
968 if (p2[j] != (
unsigned char)patt)
974 ret = mtdswap_erase_block(d, eb);
983 mtdswap_handle_badblock(d, eb);
987 static int mtdswap_gc(
struct mtdswap_dev *d,
unsigned int background)
995 eb = mtdswap_pick_gc_eblk(d, background);
999 ret = mtdswap_gc_eblock(d, eb);
1004 mtdswap_handle_badblock(d, eb);
1009 ret = mtdswap_erase_block(d, eb);
1011 (ret || !mtdswap_eblk_passes(d, eb)))
1019 else if (ret != -
EIO && !mtd_is_eccerr(ret))
1031 ret = mtdswap_gc(d, 1);
1037 static void mtdswap_cleanup(
struct mtdswap_dev *d)
1057 unsigned int badcnt;
1061 if (mtd_can_have_bb(mtd))
1070 unsigned long page,
char *buf)
1073 unsigned int newblock,
mapped;
1094 mtdswap_store_eb(d, eb);
1099 ret = mtdswap_write_block(d, buf, page, &newblock, 0);
1112 static int mtdswap_auto_header(
struct mtdswap_dev *d,
char *buf)
1118 hd->
info.version = 1;
1120 hd->
info.nr_badpages = 0;
1128 unsigned long page,
char *buf)
1132 unsigned int realblock,
retries;
1142 return mtdswap_auto_header(d, buf);
1166 if (mtd_is_bitflip(ret)) {
1201 for (page = first; page < first + nr_pages; page++) {
1206 mtdswap_store_eb(d, eb);
1219 static int mtdswap_show(
struct seq_file *
s,
void *data)
1226 unsigned int i, cw = 0, cwp = 0, cwecount = 0, bb_cnt, mapped,
pages;
1228 char *
name[] = {
"clean",
"used",
"low",
"high",
"dirty",
"bitflip",
1253 for (i = 0; i < d->
eblks; i++)
1257 bb_cnt = mtdswap_badblocks(d->
mtd, use_size);
1261 for (i = 0; i <
pages; i++)
1271 if (
min[i] != max[i])
1272 seq_printf(
s,
"%s:\t%5d erase blocks, erased min %d, "
1276 seq_printf(
s,
"%s:\t%5d erase blocks, all erased %d "
1281 seq_printf(
s,
"bad:\t%5u erase blocks\n", bb_cnt);
1284 seq_printf(
s,
"current erase block: %u pages used, %u free, "
1285 "erased %u times\n",
1312 .open = mtdswap_open,
1318 static int mtdswap_add_debugfs(
struct mtdswap_dev *d)
1320 struct gendisk *gd = d->
mbd_dev->disk;
1321 struct device *dev = disk_to_dev(gd);
1331 dev_err(dev,
"failed to initialize debugfs\n");
1340 dev_err(d->
dev,
"debugfs_create_file failed\n");
1349 static int mtdswap_init(
struct mtdswap_dev *d,
unsigned int eblocks,
1350 unsigned int spare_cnt)
1353 unsigned int i, eblk_bytes,
pages, blocks;
1369 goto page_data_fail;
1380 for (i = 0; i <
pages; i++)
1383 for (i = 0; i < blocks; i++)
1394 mtdswap_scan_eblks(d);
1418 unsigned int eblocks, eavailable, bad_blocks, spare_cnt;
1419 uint64_t swap_size, use_size, size_limit;
1423 parts = &partitions[0];
1427 while ((this_opt =
strsep(&parts,
",")) !=
NULL) {
1431 if (mtd->
index == part)
1435 if (mtd->
index != part)
1459 "%d available, %zu needed.\n",
1464 if (spare_eblocks > 100)
1465 spare_eblocks = 100;
1467 use_size = mtd->
size;
1470 if (mtd->
size > size_limit) {
1473 use_size = size_limit;
1476 eblocks = mtd_div_by_eb(use_size, mtd);
1478 bad_blocks = mtdswap_badblocks(mtd, use_size);
1479 eavailable = eblocks - bad_blocks;
1488 spare_cnt = div_u64((
uint64_t)eavailable * spare_eblocks, 100);
1493 if (spare_cnt > eavailable - 1)
1494 spare_cnt = eavailable - 1;
1499 printk(
KERN_INFO "%s: Enabling MTD swap on device %lu, size %llu KB, "
1500 "%u spare, %u bad blocks\n",
1524 if (mtdswap_init(d, eblocks, spare_cnt) < 0)
1530 d->
dev = disk_to_dev(mbd_dev->
disk);
1532 ret = mtdswap_add_debugfs(d);
1534 goto debugfs_failed;
1564 .flush = mtdswap_flush,
1565 .readsect = mtdswap_readsect,
1566 .writesect = mtdswap_writesect,
1567 .discard = mtdswap_discard,
1568 .background = mtdswap_background,
1569 .add_mtd = mtdswap_add_mtd,
1570 .remove_dev = mtdswap_remove_dev,
1574 static int __init mtdswap_modinit(
void)
1579 static void __exit mtdswap_modexit(
void)