26 #include <linux/module.h>
28 #include <asm/uaccess.h>
38 #include <linux/slab.h>
40 #define __KERNEL_SYSCALLS__
43 #include <linux/random.h>
44 #include <linux/string.h>
57 static int drbd_do_handshake(
struct drbd_conf *mdev);
58 static int drbd_do_auth(
struct drbd_conf *mdev);
64 #define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
89 tmp = page_chain_next(page);
99 set_page_private(page, 0);
109 static struct page *page_chain_tail(
struct page *
page,
int *len)
113 while ((tmp = page_chain_next(page)))
120 static int page_chain_free(
struct page *page)
131 static void page_chain_add(
struct page **
head,
132 struct page *chain_first,
struct page *chain_last)
136 tmp = page_chain_tail(chain_first,
NULL);
137 BUG_ON(tmp != chain_last);
141 set_page_private(chain_last, (
unsigned long)*head);
145 static struct page *drbd_pp_first_pages_or_try_alloc(
struct drbd_conf *mdev,
int number)
147 struct page *page =
NULL;
148 struct page *tmp =
NULL;
166 for (i = 0; i < number; i++) {
170 set_page_private(tmp, (
unsigned long)page);
181 tmp = page_chain_tail(page,
NULL);
202 if (drbd_ee_has_active_page(e))
204 list_move(le, to_be_freed);
208 static void drbd_kick_lo_and_reclaim_net(
struct drbd_conf *mdev)
214 reclaim_net_ee(mdev, &reclaimed);
235 struct page *page =
NULL;
240 if (
atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers)
241 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
243 while (page ==
NULL) {
246 drbd_kick_lo_and_reclaim_net(mdev);
248 if (
atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers) {
249 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
275 static void drbd_pp_free(
struct drbd_conf *mdev,
struct page *page,
int is_net)
284 i = page_chain_free(page);
287 tmp = page_chain_tail(page, &i);
296 is_net ?
"pp_in_use_by_net" :
"pp_in_use", i);
322 struct page *page =
NULL;
331 dev_err(
DEV,
"alloc_ee: Allocation of an EE failed\n");
362 drbd_pp_free(mdev, e->
pages, is_net);
373 int is_net = list == &mdev->
net_ee;
376 list_splice_init(list, &work_list);
396 static int drbd_process_done_ee(
struct drbd_conf *mdev)
404 reclaim_net_ee(mdev, &reclaimed);
405 list_splice_init(&mdev->
done_ee, &work_list);
417 ok = e->w.cb(mdev, &e->w, !ok) && ok;
431 while (!list_empty(head)) {
449 static int drbd_accept(
struct drbd_conf *mdev,
const char **
what,
452 struct sock *
sk = sock->
sk;
456 err = sock->
ops->listen(sock, 5);
460 *what =
"sock_create_lite";
467 err = sock->
ops->accept(sock, *newsock, 0);
473 (*newsock)->ops = sock->
ops;
474 __module_get((*newsock)->ops->owner);
480 static int drbd_recv_short(
struct drbd_conf *mdev,
struct socket *sock,
490 .msg_iov = (
struct iovec *)&iov,
503 static int drbd_recv(
struct drbd_conf *mdev,
void *buf,
size_t size)
512 .msg_iov = (
struct iovec *)&iov,
534 dev_err(
DEV,
"sock_recvmsg returned %d\n", rv);
536 }
else if (rv == 0) {
561 static void drbd_setbufsize(
struct socket *sock,
unsigned int snd,
566 sock->
sk->sk_sndbuf = snd;
570 sock->
sk->sk_rcvbuf = rcv;
581 int disconnect_on_error = 1;
583 if (!get_net_conf(mdev))
586 what =
"sock_create_kern";
594 sock->
sk->sk_rcvtimeo =
595 sock->
sk->sk_sndtimeo = mdev->
net_conf->try_connect_int*
HZ;
596 drbd_setbufsize(sock, mdev->
net_conf->sndbuf_size,
609 src_in6.sin6_port = 0;
613 what =
"bind before connect";
614 err = sock->
ops->bind(sock,
622 disconnect_on_error = 0;
624 err = sock->
ops->connect(sock,
641 disconnect_on_error = 0;
644 dev_err(
DEV,
"%s failed, err = %d\n", what, err);
646 if (disconnect_on_error)
659 if (!get_net_conf(mdev))
662 what =
"sock_create_kern";
671 timeo += (
random32() & 1) ? timeo / 7 : -timeo / 7;
674 s_listen->sk->sk_rcvtimeo = timeo;
675 s_listen->sk->sk_sndtimeo = timeo;
676 drbd_setbufsize(s_listen, mdev->
net_conf->sndbuf_size,
679 what =
"bind before listen";
680 err = s_listen->ops->bind(s_listen,
686 err = drbd_accept(mdev, &what, s_listen, &s_estab);
693 dev_err(
DEV,
"%s failed, err = %d\n", what, err);
702 static int drbd_send_fp(
struct drbd_conf *mdev,
715 rr = drbd_recv_short(mdev, sock, h,
sizeof(*h), 0);
728 static int drbd_socket_okay(
struct drbd_conf *mdev,
struct socket **sock)
738 if (rr > 0 || rr == -
EAGAIN) {
755 static int drbd_connect(
struct drbd_conf *mdev)
774 s = drbd_try_connect(mdev);
791 dev_err(
DEV,
"Logic error in drbd_connect()\n");
792 goto out_release_sockets;
798 ok = drbd_socket_okay(mdev, &sock);
799 ok = drbd_socket_okay(mdev, &msock) && ok;
805 s = drbd_wait_for_connect(mdev);
807 try = drbd_recv_fp(mdev, s);
808 drbd_socket_okay(mdev, &sock);
809 drbd_socket_okay(mdev, &msock);
835 goto out_release_sockets;
840 goto out_release_sockets;
844 ok = drbd_socket_okay(mdev, &sock);
845 ok = drbd_socket_okay(mdev, &msock) && ok;
865 sock->
sk->sk_sndtimeo =
866 sock->
sk->sk_rcvtimeo = mdev->
net_conf->ping_timeo*4*
HZ/10;
873 drbd_tcp_nodelay(sock);
874 drbd_tcp_nodelay(msock);
877 mdev->
meta.socket = msock;
882 h = drbd_do_handshake(mdev);
888 switch (drbd_do_auth(mdev)) {
890 dev_err(
DEV,
"Authentication of peer failed\n");
893 dev_err(
DEV,
"Authentication of peer failed, trying again.\n");
936 static int drbd_recv_header(
struct drbd_conf *mdev,
enum drbd_packets *cmd,
unsigned int *packet_size)
941 r = drbd_recv(mdev, h,
sizeof(*h));
944 dev_warn(
DEV,
"short read expecting header on sock: r=%d\n", r);
955 dev_err(
DEV,
"magic?? on data m: 0x%08x c: %d l: %d\n",
966 static void drbd_flush(
struct drbd_conf *mdev)
974 dev_info(
DEV,
"local disk flush failed with status %d\n", rv);
1016 if (epoch_size != 0 &&
1019 if (!(ev & EV_CLEANUP)) {
1065 static char *write_ordering_str[] = {
1075 if (wo ==
WO_drain_io && mdev->ldev->dc.no_disk_drain)
1100 const unsigned rw,
const int fault_type)
1104 struct page *page = e->
pages;
1107 unsigned n_bios = 0;
1120 bio = bio_alloc(
GFP_NOIO, nr_pages);
1122 dev_err(
DEV,
"submit_ee: Allocation of a bio failed\n");
1127 bio->bi_bdev = mdev->ldev->backing_bdev;
1129 bio->bi_private =
e;
1132 bio->bi_next =
bios;
1142 if (bio->bi_vcnt == 0) {
1144 "bio_add_page failed for len=%u, "
1145 "bi_vcnt=0 (bi_sector=%llu)\n",
1146 len, (
unsigned long long)bio->bi_sector);
1162 bios = bios->bi_next;
1163 bio->bi_next =
NULL;
1165 drbd_generic_make_request(mdev, fault_type, bio);
1172 bios = bios->bi_next;
1205 dev_warn(
DEV,
"Allocation of an epoch failed, slowing down\n");
1260 unsigned long *
data;
1266 rr = drbd_recv(mdev, dig_in, dgs);
1270 "short read receiving data digest: read %d expected %d\n",
1283 if (
sector + (data_size>>9) > capacity) {
1284 dev_err(
DEV,
"request from peer beyond end of local disk: "
1285 "capacity: %llus < sector: %llus + size: %u\n",
1286 (
unsigned long long)capacity,
1287 (
unsigned long long)
sector, data_size);
1306 rr = drbd_recv(mdev, data, len);
1308 dev_err(
DEV,
"Fault injection: Corrupting data on receive\n");
1309 data[0] = data[0] ^ (
unsigned long)-1;
1315 dev_warn(
DEV,
"short read receiving data: read %d expected %d\n",
1324 if (
memcmp(dig_in, dig_vv, dgs)) {
1325 dev_err(
DEV,
"Digest integrity check FAILED: %llus +%u\n",
1326 (
unsigned long long)
sector, data_size);
1328 dgs, dig_in, dig_vv, e);
1340 static int drbd_drain_block(
struct drbd_conf *mdev,
int data_size)
1349 page = drbd_pp_alloc(mdev, 1, 1);
1358 "short read receiving data: read %d expected %d\n",
1365 drbd_pp_free(mdev, page, 0);
1372 struct bio_vec *bvec;
1374 int dgs, rr,
i, expect;
1382 rr = drbd_recv(mdev, dig_in, dgs);
1386 "short read receiving data reply digest: read %d expected %d\n",
1399 D_ASSERT(sector == bio->bi_sector);
1401 bio_for_each_segment(bvec, bio, i) {
1402 expect =
min_t(
int, data_size, bvec->bv_len);
1403 rr = drbd_recv(mdev,
1404 kmap(bvec->bv_page)+bvec->bv_offset,
1410 "read %d expected %d\n",
1419 if (
memcmp(dig_in, dig_vv, dgs)) {
1420 dev_err(
DEV,
"Digest integrity check FAILED. Broken NICs?\n");
1457 e = read_in_block(mdev,
ID_SYNCER, sector, data_size);
1467 e->
w.cb = e_end_resync_block;
1470 list_add(&e->
w.list, &mdev->
sync_ee);
1478 dev_err(
DEV,
"submit failed, triggering re-connect\n");
1499 req = _ar_id_to_req(mdev, p->
block_id, sector);
1502 dev_err(
DEV,
"Got a corrupt block_id/sector pair(1).\n");
1509 ok = recv_dless_read(mdev, req, sector, data_size);
1520 static int receive_RSDataReply(
struct drbd_conf *mdev,
enum drbd_packets cmd,
unsigned int data_size)
1533 ok = recv_resync_read(mdev, sector, data_size);
1536 dev_err(
DEV,
"Can not write resync data to local disk.\n");
1538 ok = drbd_drain_block(mdev, data_size);
1575 if (mdev->
net_conf->two_primaries) {
1584 drbd_may_finish_epoch(mdev, e->
epoch,
EV_PUT + (cancel ? EV_CLEANUP : 0));
1589 static int e_send_discard_ack(
struct drbd_conf *mdev,
struct drbd_work *w,
int unused)
1646 static int drbd_wait_peer_seq(
struct drbd_conf *mdev,
const u32 packet_seq)
1657 if (signal_pending(
current)) {
1665 if (timeout == 0 && p_seq == mdev->
peer_seq) {
1667 dev_err(
DEV,
"ASSERT FAILED waited 30 seconds for sequence update, forcing reconnect\n");
1672 if (mdev->
peer_seq+1 == packet_seq)
1681 static unsigned long wire_flags_to_bio(
struct drbd_conf *mdev,
u32 dpf)
1706 return drbd_drain_block(mdev, data_size);
1715 e = read_in_block(mdev, p->
block_id, sector, data_size);
1721 e->
w.cb = e_end_block;
1724 rw |= wire_flags_to_bio(mdev, dp_flags);
1725 if (e->
pages == NULL) {
1740 if (!mdev->
net_conf->two_primaries) {
1745 const int size = e->
size;
1792 goto out_interrupted;
1796 hlist_add_head(&e->
collision, ee_hash_slot(mdev, sector));
1798 #define OVERLAPS overlaps(i->sector, i->size, sector, size)
1799 slot = tl_hash_slot(mdev, sector);
1802 int have_unacked = 0;
1803 int have_conflict = 0;
1811 dev_alert(
DEV,
"%s[%u] Concurrent local write detected!"
1812 " new: %llus +%u; pending: %llus +%u\n",
1814 (
unsigned long long)sector, size,
1826 if (first && discard && have_unacked) {
1827 dev_alert(
DEV,
"Concurrent write! [DISCARD BY FLAG] sec=%llus\n",
1828 (
unsigned long long)sector);
1830 e->
w.cb = e_send_discard_ack;
1844 if (signal_pending(
current)) {
1850 goto out_interrupted;
1856 dev_alert(
DEV,
"Concurrent write! [W AFTERWARDS] "
1857 "sec=%llus\n", (
unsigned long long)sector);
1858 }
else if (discard) {
1875 switch (mdev->
net_conf->wire_protocol) {
1903 dev_err(
DEV,
"submit failed, triggering re-connect\n");
1912 drbd_may_finish_epoch(mdev, e->
epoch,
EV_PUT + EV_CLEANUP);
1931 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
1932 unsigned long db, dt, dbdt;
1941 spin_lock_irq(&mdev->
al_lock);
1946 spin_unlock_irq(&mdev->
al_lock);
1951 spin_unlock_irq(&mdev->
al_lock);
1953 curr_events = (
int)part_stat_read(&disk->part0,
sectors[0]) +
1954 (
int)part_stat_read(&disk->part0,
sectors[1]) -
1985 static int receive_DataRequest(
struct drbd_conf *mdev,
enum drbd_packets cmd,
unsigned int digest_size)
1992 unsigned int fault_type;
1999 dev_err(
DEV,
"%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2000 (
unsigned long long)sector, size);
2003 if (sector + (size>>9) > capacity) {
2004 dev_err(
DEV,
"%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2005 (
unsigned long long)sector, size);
2026 dev_err(
DEV,
"unexpected command (%s) in receive_DataRequest\n",
2030 dev_err(
DEV,
"Can not satisfy peer's read request, "
2031 "no local data.\n");
2034 return drbd_drain_block(mdev, digest_size);
2088 goto submit_for_resync;
2105 dev_info(
DEV,
"Online Verify start sector: %llu\n",
2106 (
unsigned long long)sector);
2113 dev_err(
DEV,
"unexpected command (%s) in receive_DataRequest\n",
2159 dev_err(
DEV,
"submit failed, triggering re-connect\n");
2173 int self, peer, rv = -100;
2174 unsigned long ch_self, ch_peer;
2176 self = mdev->ldev->md.uuid[
UI_BITMAP] & 1;
2182 switch (mdev->
net_conf->after_sb_0p) {
2191 if (
self == 0 && peer == 1) {
2195 if (
self == 1 && peer == 0) {
2201 if (
self == 0 && peer == 1) {
2205 if (
self == 1 && peer == 0) {
2210 dev_warn(
DEV,
"Discard younger/older primary did not find a decision\n"
2211 "Using discard-least-changes instead\n");
2213 if (ch_peer == 0 && ch_self == 0) {
2218 if (ch_peer == 0) { rv = 1;
break; }
2219 if (ch_self == 0) { rv = -1;
break; }
2224 if (ch_self < ch_peer)
2226 else if (ch_self > ch_peer)
2247 switch (mdev->
net_conf->after_sb_1p) {
2258 hg = drbd_asb_recover_0p(mdev);
2265 rv = drbd_asb_recover_0p(mdev);
2270 hg = drbd_asb_recover_0p(mdev);
2282 dev_warn(
DEV,
"Successfully gave up primary role.\n");
2296 switch (mdev->
net_conf->after_sb_2p) {
2307 rv = drbd_asb_recover_0p(mdev);
2312 hg = drbd_asb_recover_0p(mdev);
2323 dev_warn(
DEV,
"Successfully gave up primary role.\n");
2337 dev_info(
DEV,
"%s uuid info vanished while I was looking!\n", text);
2340 dev_info(
DEV,
"%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2346 (
unsigned long long)bits,
2347 (
unsigned long long)flags);
2393 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((
u64)1)) == (mdev->
p_uuid[UI_HISTORY_START + 1] & ~((
u64)1))) {
2394 dev_info(
DEV,
"was SyncSource, missed the resync finished event, corrected myself:\n");
2397 drbd_uuid_dump(mdev,
"self", mdev->ldev->md.uuid,
2401 dev_info(
DEV,
"was SyncSource (peer failed to write sync_uuid)\n");
2408 if (mdev->ldev->md.uuid[UI_BITMAP] == (
u64)0 && mdev->
p_uuid[UI_BITMAP] != (
u64)0) {
2413 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((
u64)1)) == (mdev->
p_uuid[UI_BITMAP] & ~((
u64)1)) &&
2414 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((
u64)1)) == (mdev->
p_uuid[UI_HISTORY_START] & ~((
u64)1))) {
2415 dev_info(
DEV,
"was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2424 dev_info(
DEV,
"was SyncTarget (failed to write sync_uuid)\n");
2457 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((
u64)1)) ==
2458 (mdev->
p_uuid[UI_HISTORY_START + 1] & ~((
u64)1)) :
2469 dev_info(
DEV,
"Lost last syncUUID packet, corrected:\n");
2494 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((
u64)1)) ==
2503 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2504 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2506 dev_info(
DEV,
"Last syncUUID did not get through, corrected:\n");
2507 drbd_uuid_dump(mdev,
"self", mdev->ldev->md.uuid,
2518 self = mdev->ldev->md.uuid[
i] & ~((
u64)1);
2526 if (
self == peer &&
self != ((
u64)0))
2531 self = mdev->ldev->md.uuid[
i] & ~((
u64)1);
2557 drbd_uuid_dump(mdev,
"self", mdev->ldev->md.uuid, mdev->
comm_bm_set, 0);
2558 drbd_uuid_dump(mdev,
"peer", mdev->
p_uuid,
2561 hg = drbd_uuid_compare(mdev, &rule_nr);
2563 dev_info(
DEV,
"uuid_compare()=%d by rule %d\n", hg, rule_nr);
2566 dev_alert(
DEV,
"Unrelated data, aborting!\n");
2570 dev_alert(
DEV,
"To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
2576 int f = (hg == -100) ||
abs(hg) == 2;
2580 dev_info(
DEV,
"Becoming sync %s due to disk states.\n",
2581 hg > 0 ?
"source" :
"target");
2587 if (hg == 100 || (hg == -100 && mdev->
net_conf->always_asbp)) {
2590 int forced = (hg == -100);
2594 hg = drbd_asb_recover_0p(mdev);
2597 hg = drbd_asb_recover_1p(mdev);
2600 hg = drbd_asb_recover_2p(mdev);
2603 if (
abs(hg) < 100) {
2604 dev_warn(
DEV,
"Split-Brain detected, %d primaries, "
2605 "automatically solved. Sync from %s node\n",
2606 pcount, (hg < 0) ?
"peer" :
"this");
2609 " UUIDs where ambiguous.\n");
2622 dev_warn(
DEV,
"Split-Brain detected, manually solved. "
2623 "Sync from %s node\n",
2624 (hg < 0) ?
"peer" :
"this");
2632 dev_alert(
DEV,
"Split-Brain detected but unresolved, dropping connection!\n");
2638 dev_err(
DEV,
"I shall become SyncSource, but I am inconsistent!\n");
2644 switch (mdev->
net_conf->rr_conflict) {
2649 dev_err(
DEV,
"I shall become SyncTarget, but I am primary!\n");
2652 dev_warn(
DEV,
"Becoming SyncTarget, violating the stable-data"
2659 dev_info(
DEV,
"dry-run connect: No resync, would become Connected immediately.\n");
2661 dev_info(
DEV,
"dry-run connect: Would become %s, doing a %s resync.",
2663 abs(hg) >= 2 ?
"full" :
"bit-map based");
2668 dev_info(
DEV,
"Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
2676 }
else if (hg < 0) {
2681 dev_info(
DEV,
"No resync, but %lu bits in bitmap!\n",
2713 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
2714 int p_want_lose, p_two_primaries,
cf;
2730 if (p_proto != mdev->
net_conf->wire_protocol) {
2731 dev_err(
DEV,
"incompatible communication protocols\n");
2735 if (cmp_after_sb(p_after_sb_0p, mdev->
net_conf->after_sb_0p)) {
2736 dev_err(
DEV,
"incompatible after-sb-0pri settings\n");
2740 if (cmp_after_sb(p_after_sb_1p, mdev->
net_conf->after_sb_1p)) {
2741 dev_err(
DEV,
"incompatible after-sb-1pri settings\n");
2745 if (cmp_after_sb(p_after_sb_2p, mdev->
net_conf->after_sb_2p)) {
2746 dev_err(
DEV,
"incompatible after-sb-2pri settings\n");
2750 if (p_want_lose && mdev->
net_conf->want_lose) {
2751 dev_err(
DEV,
"both sides have the 'want_lose' flag set\n");
2755 if (p_two_primaries != mdev->
net_conf->two_primaries) {
2756 dev_err(
DEV,
"incompatible setting of the two-primaries options\n");
2761 unsigned char *my_alg = mdev->
net_conf->integrity_alg;
2763 if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size)
2767 if (
strcmp(p_integrity_alg, my_alg)) {
2768 dev_err(
DEV,
"incompatible setting of the data-integrity-alg\n");
2772 my_alg[0] ? my_alg : (
unsigned char *)
"<not-used>");
2788 const char *alg,
const char *
name)
2797 dev_err(
DEV,
"Can not allocate \"%s\" as %s (reason: %ld)\n",
2798 alg, name, PTR_ERR(tfm));
2801 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
2802 crypto_free_hash(tfm);
2803 dev_err(
DEV,
"\"%s\" is not a digest (%s)\n", alg, name);
2809 static int receive_SyncParam(
struct drbd_conf *mdev,
enum drbd_packets cmd,
unsigned int packet_size)
2817 int *rs_plan_s =
NULL;
2820 exp_max_sz = apv <= 87 ?
sizeof(
struct p_rs_param)
2821 : apv == 88 ?
sizeof(struct p_rs_param)
2823 : apv <= 94 ? sizeof(struct p_rs_param_89)
2824 : sizeof(struct p_rs_param_95);
2826 if (packet_size > exp_max_sz) {
2827 dev_err(
DEV,
"SyncParam packet too long: received %u, expected <= %u bytes\n",
2828 packet_size, exp_max_sz);
2834 data_size = packet_size - header_size;
2835 }
else if (apv <= 94) {
2837 data_size = packet_size - header_size;
2841 data_size = packet_size - header_size;
2848 if (drbd_recv(mdev, &p->
head.payload, header_size) != header_size)
2857 "peer wants %u, accepting only up to %u byte\n",
2862 if (drbd_recv(mdev, p->
verify_alg, data_size) != data_size)
2881 dev_err(
DEV,
"Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
2887 if (IS_ERR(verify_tfm)) {
2895 dev_err(
DEV,
"Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
2901 if (IS_ERR(csums_tfm)) {
2915 if (fifo_size != mdev->
rs_plan_s.size && fifo_size > 0) {
2916 rs_plan_s = kzalloc(
sizeof(
int) * fifo_size,
GFP_KERNEL);
2918 dev_err(
DEV,
"kmalloc of fifo_buffer failed");
2940 if (fifo_size != mdev->
rs_plan_s.size) {
2953 crypto_free_hash(csums_tfm);
2955 crypto_free_hash(verify_tfm);
2961 static void warn_if_differ_considerably(
struct drbd_conf *mdev,
2965 if (a == 0 || b == 0)
2967 d = (a >
b) ? (a - b) : (b -
a);
2968 if (d > (a>>3) || d > (b>>3))
2969 dev_warn(
DEV,
"Considerable difference in %s: %llus vs. %llus\n", s,
2970 (
unsigned long long)a, (
unsigned long long)b);
2977 sector_t p_size, p_usize, my_usize;
2985 dev_err(
DEV,
"some backing storage is needed\n");
2995 warn_if_differ_considerably(mdev,
"lower level device sizes",
2996 p_size, drbd_get_max_capacity(mdev->ldev));
2997 warn_if_differ_considerably(mdev,
"user requested size",
2998 p_usize, mdev->ldev->dc.disk_size);
3006 my_usize = mdev->ldev->dc.disk_size;
3008 if (mdev->ldev->dc.disk_size != p_usize) {
3009 mdev->ldev->dc.disk_size = p_usize;
3010 dev_info(
DEV,
"Peer sets u_size to %lu sectors\n",
3011 (
unsigned long)mdev->ldev->dc.disk_size);
3020 dev_err(
DEV,
"The peer's disk size is too small!\n");
3022 mdev->ldev->dc.disk_size = my_usize;
3038 drbd_set_my_capacity(mdev, p_size);
3045 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3046 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3055 drbd_get_capacity(mdev->
this_bdev) || ldsc) {
3065 dev_info(
DEV,
"Resync of new storage suppressed with --assume-clean\n");
3080 int i, updated_uuids = 0;
3093 (mdev->
ed_uuid & ~((
u64)1)) != (p_uuid[UI_CURRENT] & ~((
u64)1))) {
3094 dev_err(
DEV,
"Can only connect to data with current UUID=%016llX\n",
3095 (
unsigned long long)mdev->
ed_uuid);
3101 int skip_initial_sync =
3106 if (skip_initial_sync) {
3107 dev_info(
DEV,
"Accepted new current UUID, preparing to skip initial sync\n");
3109 "clear_n_write from receive_uuids",
3123 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3132 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3160 ms.conn = c_tab[
ps.conn];
3165 ms.peer_isp = (
ps.aftr_isp |
ps.user_isp);
3186 val = convert_state(
val);
3206 real_peer_disk = peer_state.disk;
3265 if (peer_state.conn ==
C_AHEAD)
3289 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
3297 dev_err(
DEV,
"Disk attach process on the peer node was aborted.\n");
3314 ns.peer = peer_state.role;
3315 ns.pdsk = real_peer_disk;
3316 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
3325 dev_err(
DEV,
"Aborting Connect, can not thaw IO with an only Consistent peer\n");
3332 rv = _drbd_set_state(mdev,
ns, cs_flags, NULL);
3394 receive_bitmap_plain(
struct drbd_conf *mdev,
unsigned int data_size,
3398 unsigned want = num_words *
sizeof(
long);
3401 if (want != data_size) {
3402 dev_err(
DEV,
"%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
3407 err = drbd_recv(mdev, buffer, want);
3431 recv_bm_rle_bits(
struct drbd_conf *mdev,
3442 int toggle = DCBP_get_start(p);
3446 bitstream_init(&bs, p->
code, len, DCBP_get_pad_bits(p));
3448 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3452 for (have = bits; have > 0; s += rl, toggle = !
toggle) {
3453 bits = vli_decode_bits(&rl, look_ahead);
3460 dev_err(
DEV,
"bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
3467 dev_err(
DEV,
"bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
3468 have, bits, look_ahead,
3469 (
unsigned int)(bs.cur.b - p->
code),
3470 (
unsigned int)bs.buf_len);
3473 look_ahead >>=
bits;
3476 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
3479 look_ahead |= tmp <<
have;
3484 bm_xfer_ctx_bit_to_word_offset(c);
3501 return recv_bm_rle_bits(mdev, p, c);
3531 r = (total >
UINT_MAX/1000) ? (total / (plain/1000))
3532 : (1000 * total / plain);
3538 dev_info(
DEV,
"%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
3539 "total %u; compression: %u.%u%%\n",
3543 total, r/10, r % 10);
3570 dev_err(
DEV,
"failed to allocate one page buffer in %s\n", __func__);
3581 err = receive_bitmap_plain(mdev, data_size, buffer, &c);
3588 dev_err(
DEV,
"ReportCBitmap packet too large\n");
3593 memcpy(p, h,
sizeof(*h));
3594 if (drbd_recv(mdev, p->
head.payload, data_size) != data_size)
3596 if (data_size <= (
sizeof(*p) -
sizeof(p->
head))) {
3597 dev_err(
DEV,
"ReportCBitmap packet too small (l:%u)\n", data_size);
3600 err = decode_bitmap_c(mdev, p, &c);
3602 dev_warn(
DEV,
"receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
3614 if (!drbd_recv_header(mdev, &cmd, &data_size))
3632 dev_info(
DEV,
"unexpected cstate (%s) in receive_bitmap\n",
3648 static char sink[128];
3651 dev_warn(
DEV,
"skipping unknown optional packet type %d, l: %d!\n",
3656 want =
min_t(
int, size,
sizeof(sink));
3657 r = drbd_recv(mdev, sink, want);
3668 drbd_tcp_quickack(mdev->data.socket);
3673 static int receive_out_of_sync(
struct drbd_conf *mdev,
enum drbd_packets cmd,
unsigned int data_size)
3683 dev_err(
DEV,
"ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
3700 static struct data_cmd drbd_cmd_handler[] = {
3702 [P_DATA_REPLY] = { 1,
sizeof(
struct p_data), receive_DataReply },
3703 [P_RS_DATA_REPLY] = { 1,
sizeof(
struct p_data), receive_RSDataReply } ,
3704 [P_BARRIER] = { 0,
sizeof(
struct p_barrier), receive_Barrier } ,
3705 [P_BITMAP] = { 1,
sizeof(
struct p_header80), receive_bitmap } ,
3706 [P_COMPRESSED_BITMAP] = { 1,
sizeof(
struct p_header80), receive_bitmap } ,
3707 [P_UNPLUG_REMOTE] = { 0,
sizeof(
struct p_header80), receive_UnplugRemote },
3708 [P_DATA_REQUEST] = { 0,
sizeof(
struct p_block_req), receive_DataRequest },
3709 [P_RS_DATA_REQUEST] = { 0,
sizeof(
struct p_block_req), receive_DataRequest },
3710 [P_SYNC_PARAM] = { 1,
sizeof(
struct p_header80), receive_SyncParam },
3711 [P_SYNC_PARAM89] = { 1,
sizeof(
struct p_header80), receive_SyncParam },
3712 [P_PROTOCOL] = { 1,
sizeof(
struct p_protocol), receive_protocol },
3713 [P_UUIDS] = { 0,
sizeof(
struct p_uuids), receive_uuids },
3714 [P_SIZES] = { 0,
sizeof(
struct p_sizes), receive_sizes },
3715 [P_STATE] = { 0,
sizeof(
struct p_state), receive_state },
3716 [P_STATE_CHG_REQ] = { 0,
sizeof(
struct p_req_state), receive_req_state },
3717 [P_SYNC_UUID] = { 0,
sizeof(
struct p_rs_uuid), receive_sync_uuid },
3718 [P_OV_REQUEST] = { 0,
sizeof(
struct p_block_req), receive_DataRequest },
3719 [P_OV_REPLY] = { 1,
sizeof(
struct p_block_req), receive_DataRequest },
3720 [P_CSUM_RS_REQUEST] = { 1,
sizeof(
struct p_block_req), receive_DataRequest },
3721 [P_DELAY_PROBE] = { 0,
sizeof(
struct p_delay_probe93), receive_skip },
3722 [P_OUT_OF_SYNC] = { 0,
sizeof(
struct p_block_desc), receive_out_of_sync },
3725 [P_MAX_CMD] = { 0, 0, NULL },
3735 static void drbdd(
struct drbd_conf *mdev)
3738 unsigned int packet_size;
3745 if (!drbd_recv_header(mdev, &cmd, &packet_size))
3749 dev_err(
DEV,
"unknown packet type %d, l: %d!\n", cmd, packet_size);
3754 if (packet_size - shs > 0 && !drbd_cmd_handler[cmd].expect_payload) {
3755 dev_err(
DEV,
"No payload expected %s l:%d\n", cmdname(cmd), packet_size);
3760 rv = drbd_recv(mdev, &header->
h80.payload, shs);
3763 dev_warn(
DEV,
"short read while reading sub header: rv=%d\n", rv);
3768 rv = drbd_cmd_handler[
cmd].
function(mdev, cmd, packet_size - shs);
3772 cmdname(cmd), packet_size);
3791 init_completion(&barr.
done);
3792 drbd_queue_work(&mdev->
data.work, &barr.
w);
3809 dev_err(
DEV,
"ASSERT FAILED ee_hash[%u].first == %p, expected NULL\n",
3833 static void drbd_disconnect(
struct drbd_conf *mdev)
3851 drbd_thread_stop(&mdev->
asender);
3888 drbd_process_done_ee(mdev);
3893 if (!is_susp(mdev->
state))
3902 fp = mdev->ldev->dc.fencing;
3943 dev_info(
DEV,
"net_ee not empty, killed %u entries\n", i);
3946 dev_info(
DEV,
"pp_in_use_by_net = %d, expected 0\n", i);
3970 static int drbd_send_handshake(
struct drbd_conf *mdev)
3977 dev_err(
DEV,
"interrupted during initial handshake\n");
3981 if (mdev->
data.socket == NULL) {
3986 memset(p, 0,
sizeof(*p));
4002 static int drbd_do_handshake(
struct drbd_conf *mdev)
4011 rv = drbd_send_handshake(mdev);
4015 rv = drbd_recv_header(mdev, &cmd, &length);
4020 dev_err(
DEV,
"expected HandShake packet, received: %s (0x%04x)\n",
4025 if (length != expect) {
4026 dev_err(
DEV,
"expected HandShake length: %u, received: %u\n",
4031 rv = drbd_recv(mdev, &p->
head.payload, expect);
4035 dev_warn(
DEV,
"short read receiving handshake packet: l=%u\n", rv);
4057 "I support %d-%d, peer supports %d-%d\n",
4063 #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
4064 static int drbd_do_auth(
struct drbd_conf *mdev)
4066 dev_err(
DEV,
"This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4067 dev_err(
DEV,
"You need to disable 'cram-hmac-alg' in drbd.conf.\n");
4071 #define CHALLENGE_LEN 64
4079 static int drbd_do_auth(
struct drbd_conf *mdev)
4081 char my_challenge[CHALLENGE_LEN];
4084 char *right_response =
NULL;
4085 char *peers_ch =
NULL;
4087 unsigned int resp_size;
4097 (
u8 *)mdev->
net_conf->shared_secret, key_len);
4099 dev_err(
DEV,
"crypto_hash_setkey() failed with %d\n", rv);
4110 rv = drbd_recv_header(mdev, &cmd, &length);
4115 dev_err(
DEV,
"expected AuthChallenge packet, received: %s (0x%04x)\n",
4121 if (length > CHALLENGE_LEN * 2) {
4122 dev_err(
DEV,
"expected AuthChallenge payload too big.\n");
4128 if (peers_ch == NULL) {
4129 dev_err(
DEV,
"kmalloc of peers_ch failed\n");
4134 rv = drbd_recv(mdev, peers_ch, length);
4138 dev_warn(
DEV,
"short read AuthChallenge: l=%u\n", rv);
4145 if (response == NULL) {
4146 dev_err(
DEV,
"kmalloc of response failed\n");
4152 sg_set_buf(&
sg, peers_ch, length);
4154 rv = crypto_hash_digest(&
desc, &
sg,
sg.length, response);
4156 dev_err(
DEV,
"crypto_hash_digest() failed with %d\n", rv);
4165 rv = drbd_recv_header(mdev, &cmd, &length);
4170 dev_err(
DEV,
"expected AuthResponse packet, received: %s (0x%04x)\n",
4176 if (length != resp_size) {
4177 dev_err(
DEV,
"expected AuthResponse payload of wrong size\n");
4182 rv = drbd_recv(mdev, response , resp_size);
4184 if (rv != resp_size) {
4186 dev_warn(
DEV,
"short read receiving AuthResponse: l=%u\n", rv);
4192 if (right_response == NULL) {
4193 dev_err(
DEV,
"kmalloc of right_response failed\n");
4198 sg_set_buf(&
sg, my_challenge, CHALLENGE_LEN);
4200 rv = crypto_hash_digest(&
desc, &
sg,
sg.length, right_response);
4202 dev_err(
DEV,
"crypto_hash_digest() failed with %d\n", rv);
4207 rv = !
memcmp(response, right_response, resp_size);
4210 dev_info(
DEV,
"Peer authenticated using %d bytes of '%s' HMAC\n",
4211 resp_size, mdev->
net_conf->cram_hmac_alg);
4218 kfree(right_response);
4227 unsigned int minor = mdev_to_minor(mdev);
4235 h = drbd_connect(mdev);
4237 drbd_disconnect(mdev);
4241 dev_warn(
DEV,
"Discarding network configuration.\n");
4247 if (get_net_conf(mdev)) {
4253 drbd_disconnect(mdev);
4271 dev_err(
DEV,
"Requested state change failed by peer: %s (%d)\n",
4281 return drbd_send_ping_ack(mdev);
4288 mdev->
meta.socket->sk->sk_rcvtimeo = mdev->
net_conf->ping_int*
HZ;
4323 struct hlist_head *slot = tl_hash_slot(mdev, sector);
4328 if ((
unsigned long)req == (
unsigned long)
id) {
4329 if (req->
sector != sector) {
4330 dev_err(
DEV,
"_ack_id_to_req: found req %p but it has "
4331 "wrong sector (%llus versus %llus)\n", req,
4332 (
unsigned long long)req->
sector,
4333 (
unsigned long long)sector);
4345 static int validate_req_change_req_state(
struct drbd_conf *mdev,
4353 req = validator(mdev,
id, sector);
4357 dev_err(
DEV,
"%s: failed to find req %p, sector %llus\n", func,
4358 (
void *)(
unsigned long)
id, (
unsigned long long)sector);
4405 return validate_req_change_req_state(mdev, p->
block_id, sector,
4406 _ack_id_to_req, __func__ , what);
4426 req = _ack_id_to_req(mdev, p->
block_id, sector);
4440 dev_err(
DEV,
"%s: failed to find req %p, sector %llus\n", __func__,
4441 (
void *)(
unsigned long)p->
block_id, (
unsigned long long)sector);
4459 dev_err(
DEV,
"Got NegDReply; Sector %llus, len %u; Fail original request.\n",
4462 return validate_req_change_req_state(mdev, p->
block_id, sector,
4539 if ((mdev->
ov_left & 0x200) == 0x200)
4546 drbd_queue_work_front(&mdev->
data.work, w);
4567 static struct asender_cmd *get_asender_cmd(
int cmd)
4574 [P_PING_ACK] = {
sizeof(
struct p_header80), got_PingAck },
4575 [P_RECV_ACK] = {
sizeof(
struct p_block_ack), got_BlockAck },
4576 [P_WRITE_ACK] = {
sizeof(
struct p_block_ack), got_BlockAck },
4577 [P_RS_WRITE_ACK] = {
sizeof(
struct p_block_ack), got_BlockAck },
4578 [P_DISCARD_ACK] = {
sizeof(
struct p_block_ack), got_BlockAck },
4579 [P_NEG_ACK] = {
sizeof(
struct p_block_ack), got_NegAck },
4580 [P_NEG_DREPLY] = {
sizeof(
struct p_block_ack), got_NegDReply },
4581 [P_NEG_RS_DREPLY] = {
sizeof(
struct p_block_ack), got_NegRSDReply},
4582 [P_OV_RESULT] = {
sizeof(
struct p_block_ack), got_OVResult },
4583 [P_BARRIER_ACK] = {
sizeof(
struct p_barrier_ack), got_BarrierAck },
4585 [P_RS_IS_IN_SYNC] = {
sizeof(
struct p_block_ack), got_IsInSync },
4587 [P_RS_CANCEL] = {
sizeof(
struct p_block_ack), got_NegRSDReply},
4588 [P_MAX_CMD] = { 0, NULL },
4590 if (cmd >
P_MAX_CMD || asender_tbl[cmd].process == NULL)
4592 return &asender_tbl[
cmd];
4606 int ping_timeout_active = 0;
4613 while (get_t_state(thi) ==
Running) {
4616 ERR_IF(!drbd_send_ping(mdev))
goto reconnect;
4617 mdev->
meta.socket->sk->sk_rcvtimeo =
4619 ping_timeout_active = 1;
4626 drbd_tcp_cork(mdev->
meta.socket);
4630 if (!drbd_process_done_ee(mdev))
4635 empty = list_empty(&mdev->
done_ee);
4645 drbd_tcp_uncork(mdev->
meta.socket);
4651 rv = drbd_recv_short(mdev, mdev->
meta.socket,
4652 buf, expect-received, 0);
4670 }
else if (rv == 0) {
4671 dev_err(
DEV,
"meta connection shut down by peer.\n");
4673 }
else if (rv == -
EAGAIN) {
4679 if (ping_timeout_active) {
4680 dev_err(
DEV,
"PingAck did not arrive in time.\n");
4685 }
else if (rv == -
EINTR) {
4688 dev_err(
DEV,
"sock_recvmsg returned %d\n", rv);
4692 if (received == expect && cmd == NULL) {
4694 dev_err(
DEV,
"magic?? on meta m: 0x%08x c: %d l: %d\n",
4703 dev_err(
DEV,
"unknown command?? on meta m: 0x%08x c: %d l: %d\n",
4713 if (received == expect) {
4721 if (cmd == get_asender_cmd(P_PING_ACK))
4722 ping_timeout_active = 0;