25 #include <linux/bitops.h>
27 #include <linux/string.h>
29 #include <linux/slab.h>
30 #include <asm/kmap_types.h>
115 #define bm_print_lock_info(m) __bm_print_lock_info(m, __func__)
116 static void __bm_print_lock_info(
struct drbd_conf *mdev,
const char *
func)
121 dev_err(
DEV,
"FIXME %s in %s, bitmap locked for '%s' by %s\n",
137 dev_err(
DEV,
"FIXME no bitmap in drbd_bm_lock!?\n");
143 if (trylock_failed) {
144 dev_warn(
DEV,
"%s going to '%s' but bitmap already locked for '%s' by %s\n",
155 dev_err(
DEV,
"FIXME bitmap already locked in bm_lock\n");
166 dev_err(
DEV,
"FIXME no bitmap in drbd_bm_unlock!?\n");
171 dev_err(
DEV,
"FIXME bitmap not locked in bm_unlock\n");
188 #define BM_PAGE_IDX_MASK ((1UL<<24)-1)
190 #define BM_PAGE_IO_LOCK 31
192 #define BM_PAGE_IO_ERROR 30
195 #define BM_PAGE_NEED_WRITEOUT 29
198 #define BM_PAGE_LAZY_WRITEOUT 28
205 static void bm_store_page_idx(
struct page *
page,
unsigned long idx)
208 set_page_private(page, idx);
211 static unsigned long bm_page_to_idx(
struct page *
page)
219 static void bm_page_lock_io(
struct drbd_conf *mdev,
int page_nr)
226 static void bm_page_unlock_io(
struct drbd_conf *mdev,
int page_nr)
229 void *addr = &page_private(b->
bm_pages[page_nr]);
237 static void bm_set_page_unchanged(
struct page *
page)
244 static void bm_set_page_need_writeout(
struct page *page)
249 static int bm_test_page_unchanged(
struct page *page)
251 volatile const unsigned long *addr = &page_private(page);
255 static void bm_set_page_io_err(
struct page *page)
260 static void bm_clear_page_io_err(
struct page *page)
265 static void bm_set_page_lazy_writeout(
struct page *page)
270 static int bm_test_page_lazy_writeout(
struct page *page)
276 static unsigned int bm_word_to_page_idx(
struct drbd_bitmap *b,
unsigned long long_nr)
279 unsigned int page_nr = long_nr >> (
PAGE_SHIFT - LN2_BPL + 3);
284 static unsigned int bm_bit_to_page_idx(
struct drbd_bitmap *b,
u64 bitnr)
287 unsigned int page_nr = bitnr >> (
PAGE_SHIFT + 3);
292 static unsigned long *__bm_map_pidx(
struct drbd_bitmap *b,
unsigned int idx)
298 static unsigned long *bm_map_pidx(
struct drbd_bitmap *b,
unsigned int idx)
300 return __bm_map_pidx(b, idx);
303 static void __bm_unmap(
unsigned long *p_addr)
308 static void bm_unmap(
unsigned long *p_addr)
310 return __bm_unmap(p_addr);
314 #define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
321 #define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1))
324 #define LWPP (PAGE_SIZE/sizeof(long))
333 static void bm_free_pages(
struct page **
pages,
unsigned long number)
339 for (i = 0; i < number; i++) {
342 "a NULL pointer; i=%lu n=%lu\n",
351 static void bm_vk_free(
void *
ptr,
int v)
362 static struct page **bm_realloc_pages(
struct drbd_bitmap *b,
unsigned long want)
364 struct page **old_pages = b->
bm_pages;
365 struct page **new_pages, *
page;
366 unsigned int i,
bytes, vmalloced = 0;
380 bytes =
sizeof(
struct page *)*want;
390 for (i = 0; i <
have; i++)
391 new_pages[i] = old_pages[i];
392 for (; i < want; i++) {
395 bm_free_pages(new_pages + have, i - have);
396 bm_vk_free(new_pages, vmalloced);
401 bm_store_page_idx(page, i);
405 for (i = 0; i < want; i++)
406 new_pages[i] = old_pages[i];
443 return mdev->
bitmap->bm_dev_capacity;
451 bm_free_pages(mdev->
bitmap->bm_pages, mdev->
bitmap->bm_number_of_pages);
462 #define BITS_PER_PAGE (1UL << (PAGE_SHIFT + 3))
463 #define BITS_PER_PAGE_MASK (BITS_PER_PAGE - 1)
464 #define BITS_PER_LONG_MASK (BITS_PER_LONG - 1)
468 unsigned long *p_addr, *bm;
478 mask = cpu_to_lel(mask);
487 cleared = hweight_long(*bm & ~mask);
495 cleared += hweight_long(*bm);
505 unsigned long *p_addr, *bm;
514 mask = cpu_to_lel(mask);
537 static unsigned long bm_count_bits(
struct drbd_bitmap *b)
539 unsigned long *p_addr;
540 unsigned long bits = 0;
542 int idx,
i, last_word;
546 p_addr = __bm_map_pidx(b, idx);
547 for (i = 0; i <
LWPP; i++)
548 bits += hweight_long(p_addr[i]);
554 p_addr = __bm_map_pidx(b, idx);
555 for (i = 0; i < last_word; i++)
556 bits += hweight_long(p_addr[i]);
557 p_addr[last_word] &= cpu_to_lel(mask);
558 bits += hweight_long(p_addr[last_word]);
561 p_addr[last_word+1] = 0;
569 unsigned long *p_addr, *bm;
580 while (offset < end) {
582 idx = bm_word_to_page_idx(b, offset);
583 p_addr = bm_map_pidx(b, idx);
584 bm = p_addr +
MLPP(offset);
585 if (bm+do_now > p_addr + LWPP) {
587 p_addr, bm, (
int)do_now);
589 memset(bm, c, do_now *
sizeof(
long));
591 bm_set_page_need_writeout(b->
bm_pages[idx]);
607 unsigned long bits, words, owords, obits;
608 unsigned long want,
have, onpages;
609 struct page **npages, **opages =
NULL;
610 int err = 0, growing;
611 int opages_vmalloced;
617 dev_info(
DEV,
"drbd_bm_resize called with capacity == %llu\n",
618 (
unsigned long long)capacity);
637 bm_free_pages(opages, onpages);
638 bm_vk_free(opages, opages_vmalloced);
648 words =
ALIGN(bits, 64) >> LN2_BPL;
653 if (bits > bits_on_disk) {
655 dev_info(
DEV,
"bits_on_disk = %llu\n", bits_on_disk);
670 npages = bm_realloc_pages(b, want);
683 growing = bits > obits;
684 if (opages && growing && set_new_bits)
695 bm_memset(b, owords, 0xff, words-owords);
696 b->
bm_set += bits - obits;
698 bm_memset(b, owords, 0x00, words-owords);
704 bm_free_pages(opages + want, have - want);
707 (
void)bm_clear_surplus(b);
710 if (opages != npages)
711 bm_vk_free(opages, opages_vmalloced);
713 b->
bm_set = bm_count_bits(b);
714 dev_info(
DEV,
"resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want);
740 spin_unlock_irqrestore(&b->
bm_lock, flags);
782 unsigned long *p_addr, *bm;
787 end = offset + number;
797 while (offset < end) {
799 idx = bm_word_to_page_idx(b, offset);
800 p_addr = bm_map_pidx(b, idx);
801 bm = p_addr +
MLPP(offset);
804 bits = hweight_long(*bm);
805 word = *bm | *buffer++;
810 bm_set_page_need_writeout(b->
bm_pages[idx]);
818 b->
bm_set -= bm_clear_surplus(b);
829 unsigned long *p_addr, *bm;
832 end = offset + number;
841 dev_err(
DEV,
"offset=%lu number=%lu bm_words=%lu\n",
842 (
unsigned long) offset,
843 (
unsigned long) number,
846 while (offset < end) {
848 p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, offset));
849 bm = p_addr +
MLPP(offset);
868 (
void)bm_clear_surplus(b);
891 #define BM_AIO_COPY_PAGES 1
892 #define BM_WRITE_ALL_PAGES 2
897 static void bm_aio_ctx_destroy(
struct kref *
kref)
906 static void bm_async_io_complete(
struct bio *bio,
int error)
911 unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page);
912 int uptodate = bio_flagged(bio, BIO_UPTODATE);
919 if (!error && !uptodate)
923 !bm_test_page_unchanged(b->
bm_pages[idx]))
924 dev_warn(
DEV,
"bitmap page idx %u changed during IO!\n", idx);
930 bm_set_page_io_err(b->
bm_pages[idx]);
934 dev_err(
DEV,
"IO ERROR %d on bitmap page idx %u\n",
937 bm_clear_page_io_err(b->
bm_pages[idx]);
941 bm_page_unlock_io(mdev, idx);
951 kref_put(&ctx->
kref, &bm_aio_ctx_destroy);
964 mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset;
971 (drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9);
974 bm_page_lock_io(mdev, page_nr);
977 bm_set_page_unchanged(b->
bm_pages[page_nr]);
987 bm_store_page_idx(page, page_nr);
991 bio->bi_bdev = mdev->ldev->md_bdev;
992 bio->bi_sector = on_disk_sector;
996 bio->bi_private =
ctx;
997 bio->bi_end_io = bm_async_io_complete;
1045 dev_err(
DEV,
"ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n");
1060 if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
1064 bm_test_page_unchanged(b->
bm_pages[i])) {
1070 if (lazy_writeout_upper_idx &&
1071 !bm_test_page_lazy_writeout(b->
bm_pages[i])) {
1077 bm_page_io_async(ctx, i,
rw);
1093 kref_put(&ctx->
kref, &bm_aio_ctx_destroy);
1095 dev_info(
DEV,
"bitmap %s of %u pages took %lu jiffies\n",
1096 rw == WRITE ?
"WRITE" :
"READ",
1097 count, jiffies - now);
1100 dev_alert(
DEV,
"we had at least one MD IO ERROR during bitmap IO\n");
1110 drbd_md_flush(mdev);
1112 b->
bm_set = bm_count_bits(b);
1113 dev_info(
DEV,
"recounting of set bits took additional %lu jiffies\n",
1118 dev_info(
DEV,
"%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
1121 kref_put(&ctx->
kref, &bm_aio_ctx_destroy);
1131 return bm_rw(mdev,
READ, 0, 0);
1142 return bm_rw(mdev, WRITE, 0, 0);
1153 return bm_rw(mdev, WRITE, BM_WRITE_ALL_PAGES, 0);
1200 if (bm_test_page_unchanged(mdev->
bitmap->bm_pages[idx])) {
1219 dev_err(
DEV,
"ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n");
1234 kref_put(&ctx->
kref, &bm_aio_ctx_destroy);
1246 static unsigned long __bm_find_next(
struct drbd_conf *mdev,
unsigned long bm_fo,
1247 const int find_zero_bit)
1250 unsigned long *p_addr;
1262 p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo));
1273 bm_fo = bit_offset +
i;
1286 static unsigned long bm_find_next(
struct drbd_conf *mdev,
1287 unsigned long bm_fo,
const int find_zero_bit)
1299 i = __bm_find_next(mdev, bm_fo, find_zero_bit);
1301 spin_unlock_irq(&b->bm_lock);
1307 return bm_find_next(mdev, bm_fo, 0);
1312 unsigned long drbd_bm_find_next_zero(
struct drbd_conf *mdev,
unsigned long bm_fo)
1314 return bm_find_next(mdev, bm_fo, 1);
1323 return __bm_find_next(mdev, bm_fo, 0);
1329 return __bm_find_next(mdev, bm_fo, 1);
1338 static int __bm_change_bits_to(
struct drbd_conf *mdev,
const unsigned long s,
1339 unsigned long e,
int val)
1342 unsigned long *p_addr =
NULL;
1343 unsigned long bitnr;
1344 unsigned int last_page_nr = -1
U;
1346 int changed_total = 0;
1349 dev_err(
DEV,
"ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n",
1353 for (bitnr = s; bitnr <=
e; bitnr++) {
1354 unsigned int page_nr = bm_bit_to_page_idx(b, bitnr);
1355 if (page_nr != last_page_nr) {
1359 bm_set_page_lazy_writeout(b->
bm_pages[last_page_nr]);
1361 bm_set_page_need_writeout(b->
bm_pages[last_page_nr]);
1364 p_addr = __bm_map_pidx(b, page_nr);
1365 last_page_nr = page_nr;
1375 bm_set_page_lazy_writeout(b->
bm_pages[last_page_nr]);
1377 bm_set_page_need_writeout(b->
bm_pages[last_page_nr]);
1379 b->
bm_set += changed_total;
1380 return changed_total;
1387 static int bm_change_bits_to(
struct drbd_conf *mdev,
const unsigned long s,
1388 const unsigned long e,
int val)
1390 unsigned long flags;
1401 c = __bm_change_bits_to(mdev, s, e, val);
1403 spin_unlock_irqrestore(&b->bm_lock, flags);
1410 return bm_change_bits_to(mdev, s, e, 1);
1416 return -bm_change_bits_to(mdev, s, e, 0);
1421 static inline void bm_set_full_words_within_one_page(
struct drbd_bitmap *b,
1422 int page_nr,
int first_word,
int last_word)
1427 for (i = first_word; i < last_word; i++) {
1428 bits = hweight_long(paddr[i]);
1462 __bm_change_bits_to(mdev, s, e, 1);
1473 __bm_change_bits_to(mdev, s, sl-1, 1);
1480 first_word =
MLPP(sl >> LN2_BPL);
1484 for (page_nr = first_page; page_nr < last_page; page_nr++) {
1485 bm_set_full_words_within_one_page(mdev->
bitmap, page_nr, first_word, last_word);
1492 last_word =
MLPP(el >> LN2_BPL);
1501 bm_set_full_words_within_one_page(mdev->
bitmap, last_page, first_word, last_word);
1509 __bm_change_bits_to(mdev, el, e, 1);
1522 unsigned long flags;
1524 unsigned long *p_addr;
1533 if (bitnr < b->bm_bits) {
1534 p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr));
1537 }
else if (bitnr == b->
bm_bits) {
1544 spin_unlock_irqrestore(&b->
bm_lock, flags);
1551 unsigned long flags;
1553 unsigned long *p_addr =
NULL;
1554 unsigned long bitnr;
1555 unsigned int page_nr = -1
U;
1568 for (bitnr = s; bitnr <=
e; bitnr++) {
1569 unsigned int idx = bm_bit_to_page_idx(b, bitnr);
1570 if (page_nr != idx) {
1574 p_addr = bm_map_pidx(b, idx);
1579 c += (0 != test_bit_le(bitnr - (page_nr << (
PAGE_SHIFT+3)), p_addr));
1584 spin_unlock_irqrestore(&b->
bm_lock, flags);
1607 unsigned long flags;
1608 unsigned long *p_addr, *bm;
1622 p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
1623 bm = p_addr +
MLPP(s);
1625 count += hweight_long(*bm++);
1628 dev_err(
DEV,
"start offset (%d) too large in drbd_bm_e_weight\n", s);
1630 spin_unlock_irqrestore(&b->
bm_lock, flags);
1639 unsigned long *p_addr, *bm;
1659 p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
1660 bm = p_addr +
MLPP(s);
1662 count += hweight_long(*bm);
1669 b->
bm_set -= bm_clear_surplus(b);
1671 dev_err(
DEV,
"start offset (%lu) too large in drbd_bm_ALe_set_all\n", s);