28 #include <linux/module.h>
30 #include <linux/types.h>
31 #include <linux/slab.h>
34 #include <linux/sysctl.h>
35 #include <linux/random.h>
37 #include <linux/socket.h>
52 #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_RECOVERY)
55 static void dlm_do_local_recovery_cleanup(
struct dlm_ctxt *
dlm,
u8 dead_node);
57 static int dlm_recovery_thread(
void *
data);
63 static int dlm_pick_recovery_master(
struct dlm_ctxt *
dlm);
64 static int dlm_remaster_locks(
struct dlm_ctxt *
dlm,
u8 dead_node);
65 static int dlm_init_recovery_area(
struct dlm_ctxt *
dlm,
u8 dead_node);
66 static int dlm_request_all_locks(
struct dlm_ctxt *
dlm,
67 u8 request_from,
u8 dead_node);
68 static void dlm_destroy_recovery_area(
struct dlm_ctxt *
dlm,
u8 dead_node);
72 const char *lockname,
int namelen,
75 static int dlm_send_mig_lockres_msg(
struct dlm_ctxt *
dlm,
80 static int dlm_process_recovery_data(
struct dlm_ctxt *
dlm,
83 static int dlm_send_finalize_reco_message(
struct dlm_ctxt *
dlm);
84 static int dlm_send_all_done_msg(
struct dlm_ctxt *
dlm,
85 u8 dead_node,
u8 send_to);
86 static int dlm_send_begin_reco_message(
struct dlm_ctxt *
dlm,
u8 dead_node);
87 static void dlm_move_reco_locks_to_list(
struct dlm_ctxt *
dlm,
89 static void dlm_finish_local_lockres_recovery(
struct dlm_ctxt *
dlm,
90 u8 dead_node,
u8 new_master);
91 static void dlm_reco_ast(
void *astdata);
92 static void dlm_reco_bast(
void *astdata,
int blocked_type);
93 static void dlm_reco_unlock_ast(
void *astdata,
enum dlm_status st);
97 static int dlm_lockres_master_requery(
struct dlm_ctxt *
dlm,
101 static u64 dlm_get_next_mig_cookie(
void);
105 static u64 dlm_mig_cookie = 1;
107 static u64 dlm_get_next_mig_cookie(
void)
110 spin_lock(&dlm_mig_cookie_lock);
112 if (dlm_mig_cookie == (~0ULL))
116 spin_unlock(&dlm_mig_cookie_lock);
120 static inline void dlm_set_reco_dead_node(
struct dlm_ctxt *
dlm,
124 if (dlm->
reco.dead_node != dead_node)
125 mlog(0,
"%s: changing dead_node from %u to %u\n",
126 dlm->
name, dlm->
reco.dead_node, dead_node);
127 dlm->
reco.dead_node = dead_node;
130 static inline void dlm_set_reco_master(
struct dlm_ctxt *dlm,
134 mlog(0,
"%s: changing new_master from %u to %u\n",
135 dlm->
name, dlm->
reco.new_master, master);
136 dlm->
reco.new_master = master;
139 static inline void __dlm_reset_recovery(
struct dlm_ctxt *dlm)
147 static inline void dlm_reset_recovery(
struct dlm_ctxt *dlm)
150 __dlm_reset_recovery(dlm);
165 list_splice_init(&dlm->
work_list, &tmp_list);
171 mlog(0,
"%s: work thread has %d work items\n", dlm->
name, tot);
174 workfunc = item->
func;
175 list_del_init(&item->
list);
183 workfunc(item, item->
data);
208 mlog(0,
"starting dlm recovery thread...\n");
224 mlog(0,
"waiting for dlm recovery thread to exit\n");
255 static void dlm_print_reco_node_status(
struct dlm_ctxt *dlm)
260 mlog(
ML_NOTICE,
"%s(%d): recovery info, state=%s, dead=%u, master=%u\n",
263 dlm->
reco.dead_node, dlm->
reco.new_master);
266 char *
st =
"unknown";
267 switch (ndata->
state) {
287 st =
"finalize-sent";
302 #define DLM_RECO_THREAD_TIMEOUT_MS (5 * 1000)
304 static int dlm_recovery_thread(
void *
data)
310 mlog(0,
"dlm thread running for %s...\n", dlm->
name);
314 status = dlm_do_recovery(dlm);
328 mlog(0,
"quitting DLM recovery thread\n");
333 static int dlm_reco_master_ready(
struct dlm_ctxt *dlm)
371 "domain %s\n", node, dlm->
name);
384 if (dlm_is_node_recovered(dlm, node))
388 "domain %s\n", node, dlm->
name);
392 dlm_is_node_recovered(dlm, node),
396 dlm_is_node_recovered(dlm, node));
405 static int dlm_in_recovery(
struct dlm_ctxt *dlm)
417 if (dlm_in_recovery(dlm)) {
418 mlog(0,
"%s: reco thread %d in recovery: "
419 "state=%d, master=%u, dead=%u\n",
421 dlm->
reco.state, dlm->
reco.new_master,
422 dlm->
reco.dead_node);
427 static void dlm_begin_recovery(
struct dlm_ctxt *dlm)
437 static void dlm_end_recovery(
struct dlm_ctxt *dlm)
447 static void dlm_print_recovery_master(
struct dlm_ctxt *dlm)
450 "dead node %u in domain %s\n", dlm->
reco.new_master,
455 static int dlm_do_recovery(
struct dlm_ctxt *dlm)
465 mlog(0,
"new master %u died while recovering %u!\n",
466 dlm->
reco.new_master, dlm->
reco.dead_node);
479 dlm_set_reco_dead_node(dlm, bit);
482 mlog(
ML_ERROR,
"dead_node %u no longer in recovery map!\n",
483 dlm->
reco.dead_node);
493 mlog(0,
"%s(%d):recovery thread found node %u in the recovery map!\n",
495 dlm->
reco.dead_node);
500 dlm_begin_recovery(dlm);
510 ret = dlm_pick_recovery_master(dlm);
515 mlog(0,
"another node will master this recovery session.\n");
518 dlm_print_recovery_master(dlm);
523 dlm_end_recovery(dlm);
529 dlm_print_recovery_master(dlm);
531 status = dlm_remaster_locks(dlm, dlm->
reco.dead_node);
534 mlog(
ML_ERROR,
"%s: Error %d remastering locks for node %u, "
535 "retrying.\n", dlm->
name, status, dlm->
reco.dead_node);
541 mlog(0,
"DONE mastering recovery of %s:%u here(this=%u)!\n",
543 dlm_reset_recovery(dlm);
545 dlm_end_recovery(dlm);
551 static int dlm_remaster_locks(
struct dlm_ctxt *dlm,
u8 dead_node)
562 status = dlm_init_recovery_area(dlm, dead_node);
565 "retrying\n", dlm->
name);
568 }
while (status != 0);
576 mlog(0,
"%s: Requesting lock info from node %u\n", dlm->
name,
585 status = dlm_request_all_locks(dlm, ndata->
node_num,
599 mlog(0,
"waited 1 sec for %u, "
605 mlog(0,
"%s: node %u returned "
606 "%d during recovery, retrying "
607 "after a short wait\n",
613 }
while (status != 0);
615 spin_lock(&dlm_reco_state_lock);
616 switch (ndata->
state) {
623 mlog(0,
"node %u died after requesting "
624 "recovery info for node %u\n",
631 mlog(0,
"now receiving recovery data from "
632 "node %u for dead node %u\n",
636 mlog(0,
"already receiving recovery data from "
637 "node %u for dead node %u\n",
641 mlog(0,
"already DONE receiving recovery data "
642 "from node %u for dead node %u\n",
646 spin_unlock(&dlm_reco_state_lock);
649 mlog(0,
"%s: Done requesting all lock info\n", dlm->
name);
658 spin_lock(&dlm_reco_state_lock);
660 mlog(0,
"checking recovery state of node %u\n",
662 switch (ndata->
state) {
666 "node %u: state=%d\n",
671 mlog(0,
"node %u died after "
672 "requesting recovery info for "
678 mlog(0,
"%s: node %u still in state %s\n",
681 "receiving" :
"requested");
685 mlog(0,
"%s: node %u state is done\n",
689 mlog(0,
"%s: node %u state is finalize\n",
694 spin_unlock(&dlm_reco_state_lock);
696 mlog(0,
"pass #%d, all_nodes_done?: %s\n", ++pass,
697 all_nodes_done?
"yes":
"no");
698 if (all_nodes_done) {
704 mlog(0,
"all nodes are done! send finalize\n");
705 ret = dlm_send_finalize_reco_message(dlm);
710 dlm_finish_local_lockres_recovery(dlm, dead_node,
713 mlog(0,
"should be done with recovery!\n");
715 mlog(0,
"finishing recovery of %s at %lu, "
716 "dead=%u, this=%u, new=%u\n", dlm->
name,
717 jiffies, dlm->
reco.dead_node,
734 dlm_destroy_recovery_area(dlm, dead_node);
739 static int dlm_init_recovery_area(
struct dlm_ctxt *dlm,
u8 dead_node)
757 ndata = kzalloc(
sizeof(*ndata),
GFP_NOFS);
759 dlm_destroy_recovery_area(dlm, dead_node);
764 spin_lock(&dlm_reco_state_lock);
766 spin_unlock(&dlm_reco_state_lock);
773 static void dlm_destroy_recovery_area(
struct dlm_ctxt *dlm,
u8 dead_node)
778 spin_lock(&dlm_reco_state_lock);
779 list_splice_init(&dlm->
reco.node_data, &tmplist);
780 spin_unlock(&dlm_reco_state_lock);
783 list_del_init(&ndata->
list);
788 static int dlm_request_all_locks(
struct dlm_ctxt *dlm,
u8 request_from,
797 mlog(0,
"dlm_request_all_locks: dead node is %u, sending request "
798 "to %u\n", dead_node, request_from);
807 &
lr,
sizeof(
lr), request_from,
NULL);
811 mlog(
ML_ERROR,
"%s: Error %d send LOCK_REQUEST to node %u "
812 "to recover dead node %u\n", dlm->
name, ret,
813 request_from, dead_node);
832 mlog(
ML_ERROR,
"%s: node %u sent dead_node=%u, but local "
835 dlm_print_reco_node_status(dlm);
858 dlm_init_work_item(dlm,
item, dlm_request_all_locks_worker,
buf);
877 u8 dead_node, reco_master;
878 int skip_all_done = 0;
881 dead_node = item->
u.
ral.dead_node;
882 reco_master = item->
u.
ral.reco_master;
885 mlog(0,
"%s: recovery worker started, dead=%u, master=%u\n",
886 dlm->
name, dead_node, reco_master);
888 if (dead_node != dlm->
reco.dead_node ||
889 reco_master != dlm->
reco.new_master) {
894 "recovery master %u died, thread=(dead=%u,mas=%u)"
895 " current=(dead=%u,mas=%u)\n", dlm->
name,
896 reco_master, dead_node, reco_master,
897 dlm->
reco.dead_node, dlm->
reco.new_master);
900 "master=%u), request(dead=%u, master=%u)\n",
902 dlm->
reco.new_master, dead_node, reco_master);
913 dlm_move_reco_locks_to_list(dlm, &
resources, dead_node);
924 "recovery state for dead node %u, ret=%d\n", dlm->
name,
925 reco_master, dead_node, ret);
936 if (!skip_all_done) {
937 ret = dlm_send_all_done_msg(dlm, dead_node, reco_master);
940 "recovery all-done for dead node %u, ret=%d\n",
941 dlm->
name, reco_master, dead_node, ret);
949 static int dlm_send_all_done_msg(
struct dlm_ctxt *dlm,
u8 dead_node,
u8 send_to)
954 memset(&done_msg, 0,
sizeof(done_msg));
957 mlog(0,
"sending DATA DONE message to %u, "
958 "my node=%u, dead node=%u\n", send_to, done_msg.node_idx,
962 sizeof(done_msg), send_to, &tmpret);
964 mlog(
ML_ERROR,
"%s: Error %d send RECO_DATA_DONE to node %u "
965 "to recover dead node %u\n", dlm->
name, ret, send_to,
987 mlog(0,
"got DATA DONE: dead_node=%u, reco.dead_node=%u, "
988 "node_idx=%u, this node=%u\n", done->
dead_node,
992 "Got DATA DONE: dead_node=%u, reco.dead_node=%u, "
993 "node_idx=%u, this node=%u\n", done->
dead_node,
996 spin_lock(&dlm_reco_state_lock);
1001 switch (ndata->
state) {
1017 mlog(0,
"node %u is DONE sending "
1026 spin_unlock(&dlm_reco_state_lock);
1033 mlog(
ML_ERROR,
"failed to find recovery node data for node "
1037 mlog(0,
"leaving reco data done handler, ret=%d\n", ret);
1041 static void dlm_move_reco_locks_to_list(
struct dlm_ctxt *dlm,
1052 if (dlm_is_recovery_lock(res->
lockname.name,
1056 if (lock->
ml.node == dead_node) {
1057 mlog(0,
"AHA! there was "
1058 "a $RECOVERY lock for dead "
1060 dead_node, dlm->
name);
1061 list_del_init(&lock->
list);
1070 if (res->
owner == dead_node) {
1071 mlog(0,
"found lockres owned by dead node while "
1072 "doing recovery for node %u. sending it.\n",
1076 mlog(0,
"found UNKNOWN owner while doing recovery "
1077 "for node %u. sending it.\n", dead_node);
1086 int total_locks = 0;
1090 for (i=0; i<3; i++) {
1106 int mres_total_locks =
be32_to_cpu(mres->total_locks);
1107 int sz, ret = 0, status = 0;
1108 u8 orig_flags = mres->flags,
1109 orig_master = mres->master;
1112 if (!mres->num_locks)
1119 orig_flags = mres->flags;
1120 BUG_ON(total_locks > mres_total_locks);
1121 if (total_locks == mres_total_locks)
1124 mlog(0,
"%s:%.*s: sending mig lockres (%s) to %u\n",
1125 dlm->name, res->lockname.len, res->lockname.name,
1131 sz, send_to, &status);
1135 mlog(
ML_ERROR,
"%s: res %.*s, Error %d send MIG_LOCKRES to "
1136 "node %u (%s)\n", dlm->name, mres->lockname_len,
1137 mres->lockname, ret, send_to,
1139 "migration" :
"recovery"));
1148 "myself!\n", send_to);
1155 dlm_init_migratable_lockres(mres, res->lockname.name,
1156 res->lockname.len, mres_total_locks,
1157 mig_cookie, orig_flags, orig_master);
1162 const char *lockname,
int namelen,
1177 static void dlm_prepare_lvb_for_migration(
struct dlm_lock *lock,
1192 if (dlm_lvb_is_empty(mres->
lvb)) {
1201 mlog(
ML_ERROR,
"Mismatched lvb in lock cookie=%u:%llu, name=%.*s, "
1213 static int dlm_add_lock_to_array(
struct dlm_lock *lock,
1219 ml = &(mres->
ml[lock_num]);
1221 ml->
type = lock->
ml.type;
1227 dlm_prepare_lvb_for_migration(lock, mres, queue);
1229 ml->
node = lock->
ml.node;
1237 static void dlm_add_dummy_lock(
struct dlm_ctxt *dlm,
1242 dummy.ml.cookie = 0;
1251 static inline int dlm_is_dummy_lock(
struct dlm_ctxt *dlm,
1260 *nodenum = ml->
node;
1278 mlog(0,
"sending to %u\n", send_to);
1280 total_locks = dlm_num_locks_in_lockres(res);
1283 mlog(0,
"argh. lockres has %d locks. this will "
1284 "require more than one network packet to "
1285 "migrate\n", total_locks);
1286 mig_cookie = dlm_get_next_mig_cookie();
1289 dlm_init_migratable_lockres(mres, res->
lockname.name,
1291 mig_cookie, flags, res->
owner);
1295 queue = dlm_list_idx_to_ptr(res, i);
1299 if (!dlm_add_lock_to_array(lock, mres, i))
1304 ret = dlm_send_mig_lockres_msg(dlm, mres, send_to,
1310 if (total_locks == 0) {
1312 mlog(0,
"%s:%.*s: sending dummy lock to %u, %s\n",
1316 dlm_add_dummy_lock(dlm, mres);
1319 ret = dlm_send_mig_lockres_msg(dlm, mres, send_to, res, total_locks);
1325 mlog(
ML_ERROR,
"%s: dlm_send_mig_lockres_msg returned %d\n",
1329 mlog(0,
"%s: node %u went down while sending %s "
1330 "lockres %.*s\n", dlm->
name, send_to,
1369 real_master = mres->
master;
1375 mlog(0,
"%s message received from node %u\n",
1377 "recovery" :
"migration", mres->
master);
1379 mlog(0,
"all done flag. all lockres data received!\n");
1383 item = kzalloc(
sizeof(*item),
GFP_NOFS);
1394 spin_lock(&res->spinlock);
1401 mlog(0,
"lock %.*s is already migrating\n",
1407 "lock %.*s, but marked as recovering!\n",
1410 spin_unlock(&res->spinlock);
1415 spin_unlock(&res->spinlock);
1425 dlm_lockres_get(res);
1440 dlm_lockres_get(res);
1455 spin_lock(&res->spinlock);
1457 spin_unlock(&res->spinlock);
1465 spin_lock(&res->spinlock);
1472 mlog(0,
"recovery has passed me a lockres with an "
1473 "unknown owner.. will need to requery: "
1478 dlm_change_lockres_owner(dlm, res, dlm->
node_num);
1480 spin_unlock(&res->spinlock);
1485 dlm_init_work_item(dlm, item, dlm_mig_lockres_worker,
buf);
1486 item->
u.
ml.lockres =
res;
1487 item->
u.
ml.real_master = real_master;
1488 item->
u.
ml.extra_ref = extra_refs;
1512 static void dlm_mig_lockres_worker(
struct dlm_work_item *item,
void *data)
1524 res = item->
u.
ml.lockres;
1525 real_master = item->
u.
ml.real_master;
1526 extra_ref = item->
u.
ml.extra_ref;
1532 ret = dlm_lockres_master_requery(dlm, res, &real_master);
1534 mlog(0,
"dlm_lockres_master_requery ret=%d\n",
1539 mlog(0,
"lockres %.*s not claimed. "
1540 "this node will take it.\n",
1546 mlog(0,
"master needs to respond to sender "
1547 "that node %u still owns %.*s\n",
1555 ret = dlm_process_recovery_data(dlm, res, mres);
1557 mlog(0,
"dlm_process_recovery_data returned %d\n", ret);
1559 mlog(0,
"dlm_process_recovery_data succeeded\n");
1580 static int dlm_lockres_master_requery(
struct dlm_ctxt *dlm,
1617 while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
1630 mlog(0,
"lock master is %u\n", *real_master);
1639 u8 nodenum,
u8 *real_master)
1645 memset(&req, 0,
sizeof(req));
1651 &req,
sizeof(req), nodenum, &status);
1654 mlog(
ML_ERROR,
"Error %d when sending message %u (key "
1660 *real_master = (
u8) (status & 0xff);
1661 mlog(0,
"node %u responded to master requery with %u\n",
1662 nodenum, *real_master);
1694 master = res->
owner;
1750 static int dlm_process_recovery_data(
struct dlm_ctxt *dlm,
1763 unsigned int added = 0;
1766 mlog(0,
"running %d locks for this lockres\n", mres->
num_locks);
1768 ml = &(mres->
ml[
i]);
1770 if (dlm_is_dummy_lock(dlm, ml, &from)) {
1773 mlog(0,
"%s:%.*s: dummy lock for %u\n",
1786 queue = dlm_list_num_to_pointer(res, ml->
list);
1798 tmpq = dlm_list_idx_to_ptr(res, j);
1814 "with cookie %u:%llu, node %u, "
1815 "list %u, flags 0x%x, type %d, "
1816 "conv %d, highest blocked %d\n",
1825 if (lock->
ml.node != ml->
node) {
1826 c = lock->
ml.cookie;
1828 "cookie %u:%llu, name %.*s, node %u\n",
1835 "node %u, list %u, flags 0x%x, type %d, "
1836 "conv %d, highest blocked %d\n",
1845 if (tmpq != queue) {
1847 mlog(0,
"Lock cookie %u:%llu was on list %u "
1848 "instead of list %u for %.*s\n",
1863 list_move_tail(&lock->
list, queue);
1867 mlog(0,
"just reordered a local lock!\n");
1878 lksb = newlock->
lksb;
1891 if (!dlm_lvb_is_empty(mres->
lvb)) {
1907 if (!dlm_lvb_is_empty(res->
lvb) &&
1912 "lvb! type=%d\n", dlm->
name,
1918 printk(
"]\nmigrated lvb=[");
1949 if (lock->
ml.cookie == ml->
cookie) {
1950 c = lock->
ml.cookie;
1952 "exists on this lockres!\n", dlm->
name,
1958 "node=%u, cookie=%u:%llu, queue=%d\n",
1972 mlog(0,
"%s:%.*s: added lock for node %u, "
1973 "setting refmap bit\n", dlm->
name,
1980 mlog(0,
"done running all the locks\n");
2009 "Recovering res %s:%.*s, is already on recovery list!\n",
2015 dlm_lockres_get(res);
2020 queue = dlm_list_idx_to_ptr(res, i);
2026 mlog(0,
"node died with convert pending "
2027 "on %.*s. move back to granted list.\n",
2034 mlog(0,
"node died with lock pending "
2035 "on %.*s. remove from blocked list and skip.\n",
2055 mlog(0,
"node died with unlock pending "
2056 "on %.*s. remove from blocked list and skip.\n",
2066 mlog(0,
"node died with cancel pending "
2067 "on %.*s. move back to granted list.\n",
2082 static void dlm_finish_local_lockres_recovery(
struct dlm_ctxt *dlm,
2083 u8 dead_node,
u8 new_master)
2093 if (res->
owner == dead_node) {
2094 mlog(0,
"%s: res %.*s, Changing owner from %u to %u\n",
2096 res->
owner, new_master);
2101 dlm_change_lockres_owner(dlm, res, new_master);
2116 bucket = dlm_lockres_hash(dlm, i);
2121 if (res->
owner != dead_node &&
2132 mlog(0,
"%s: res %.*s, Changing owner from %u to %u\n",
2134 res->
owner, new_master);
2136 dlm_change_lockres_owner(dlm, res, new_master);
2146 static inline int dlm_lvb_needs_invalidation(
struct dlm_lock *lock,
int local)
2157 static void dlm_revalidate_lvb(
struct dlm_ctxt *dlm,
2162 int blank_lvb = 0, local = 0;
2172 search_node = dead_node;
2181 queue = dlm_list_idx_to_ptr(res, i);
2183 if (lock->
ml.node == search_node) {
2184 if (dlm_lvb_needs_invalidation(lock, local)) {
2194 mlog(0,
"clearing %.*s lvb, dead node %u had EX\n",
2200 static void dlm_free_dead_locks(
struct dlm_ctxt *dlm,
2204 unsigned int freed = 0;
2218 if (lock->
ml.node == dead_node) {
2219 list_del_init(&lock->
list);
2227 if (lock->
ml.node == dead_node) {
2228 list_del_init(&lock->
list);
2236 if (lock->
ml.node == dead_node) {
2237 list_del_init(&lock->
list);
2246 mlog(0,
"%s:%.*s: freed %u locks for dead node %u, "
2247 "dropping ref from lockres\n", dlm->
name,
2250 mlog(
ML_ERROR,
"%s:%.*s: freed %u locks for dead node %u, "
2251 "but ref was not set\n", dlm->
name,
2257 mlog(0,
"%s:%.*s: dead node %u had a ref, but had "
2258 "no locks and had not purged before dying\n", dlm->
name,
2274 static void dlm_do_local_recovery_cleanup(
struct dlm_ctxt *dlm,
u8 dead_node)
2301 bucket = dlm_lockres_hash(dlm, i);
2305 if (dlm_is_recovery_lock(res->
lockname.name,
2309 if (lock->
ml.node == dead_node) {
2310 mlog(0,
"AHA! there was "
2311 "a $RECOVERY lock for dead "
2313 dead_node, dlm->
name);
2314 list_del_init(&lock->
list);
2324 dlm_revalidate_lvb(dlm, res, dead_node);
2325 if (res->
owner == dead_node) {
2328 "recovery as it is being freed\n",
2336 dlm_free_dead_locks(dlm, res, dead_node);
2345 static void __dlm_hb_node_down(
struct dlm_ctxt *dlm,
int idx)
2349 if (dlm->
reco.new_master == idx) {
2350 mlog(0,
"%s: recovery master %d just died\n",
2356 mlog(0,
"%s: dead master %d had reached "
2357 "finalize1 state, clearing\n", dlm->
name, idx);
2359 __dlm_reset_recovery(dlm);
2365 mlog(0,
"Clearing join state for node %u\n", idx);
2371 mlog(0,
"for domain %s, node %d is already dead. "
2372 "another node likely did recovery already.\n",
2381 mlog(0,
"node %u already removed from domain!\n", idx);
2389 dlm_do_local_recovery_cleanup(dlm, idx);
2394 mlog(0,
"node %u being removed from domain map!\n", idx);
2402 mlog(0,
"domain %s, node %u already added "
2403 "to recovery map!\n", dlm->
name, idx);
2423 __dlm_hb_node_down(dlm, idx);
2445 static void dlm_reco_ast(
void *astdata)
2448 mlog(0,
"ast for recovery lock fired!, this=%u, dlm=%s\n",
2451 static void dlm_reco_bast(
void *astdata,
int blocked_type)
2454 mlog(0,
"bast for recovery lock fired!, this=%u, dlm=%s\n",
2457 static void dlm_reco_unlock_ast(
void *astdata,
enum dlm_status st)
2459 mlog(0,
"unlockast for recovery lock fired!\n");
2474 static int dlm_pick_recovery_master(
struct dlm_ctxt *dlm)
2480 mlog(0,
"starting recovery of %s at %lu, dead=%u, this=%u\n",
2483 memset(&lksb, 0,
sizeof(lksb));
2487 dlm_reco_ast, dlm, dlm_reco_bast);
2489 mlog(0,
"%s: dlmlock($RECOVERY) returned %d, lksb=%d\n",
2493 mlog(0,
"dlm=%s dlmlock says I got it (this=%u)\n",
2498 if (dlm_reco_master_ready(dlm)) {
2499 mlog(0,
"%s: got reco EX lock, but %u will "
2500 "do the recovery\n", dlm->
name,
2501 dlm->
reco.new_master);
2510 mlog(0,
"%s: got reco EX lock, but "
2511 "node got recovered already\n", dlm->
name);
2514 "but no dead node!\n",
2525 mlog(0,
"%s: dead=%u, this=%u, sending "
2526 "begin_reco now\n", dlm->
name,
2528 status = dlm_send_begin_reco_message(dlm,
2529 dlm->
reco.dead_node);
2535 dlm_set_reco_master(dlm, dlm->
node_num);
2541 ret =
dlmunlock(dlm, &lksb, 0, dlm_reco_unlock_ast, dlm);
2543 mlog(0,
"got DLM_DENIED, trying LKM_CANCEL\n");
2556 mlog(0,
"dlm=%s dlmlock says another node got it (this=%u)\n",
2562 dlm_reco_master_ready(dlm),
2564 if (!dlm_reco_master_ready(dlm)) {
2565 mlog(0,
"%s: reco master taking awhile\n",
2570 mlog(0,
"%s: reco master %u is ready to recover %u\n",
2574 mlog(0,
"dlm=%s dlmlock says master node died (this=%u)\n",
2598 static int dlm_send_begin_reco_message(
struct dlm_ctxt *dlm,
u8 dead_node)
2606 mlog(0,
"%s: dead node is %u\n", dlm->
name, dead_node);
2614 memset(&br, 0,
sizeof(br));
2616 br.dead_node = dead_node;
2618 while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
2620 if (nodenum == dead_node) {
2621 mlog(0,
"not sending begin reco to dead node "
2626 mlog(0,
"not sending begin reco to self\n");
2631 mlog(0,
"attempting to send begin reco msg to %d\n",
2634 &br,
sizeof(br), nodenum, &status);
2642 "begin reco msg (%d)\n", dlm->
name, nodenum, ret);
2652 mlog(0,
"%s: trying to start recovery of node "
2653 "%u, but node %u is waiting for last recovery "
2654 "to complete, backoff for a bit\n", dlm->
name,
2655 dead_node, nodenum);
2666 "returned %d\n", dlm->
name, nodenum, ret);
2697 mlog(0,
"%s: node %u wants to recover node %u (%u:%u) "
2698 "but this node is in finalize state, waiting on finalize2\n",
2700 dlm->
reco.dead_node, dlm->
reco.new_master);
2706 mlog(0,
"%s: node %u wants to recover node %u (%u:%u)\n",
2708 dlm->
reco.dead_node, dlm->
reco.new_master);
2715 mlog(0,
"%s: new_master %u died, changing "
2716 "to %u\n", dlm->
name, dlm->
reco.new_master,
2719 mlog(0,
"%s: new_master %u NOT DEAD, changing "
2720 "to %u\n", dlm->
name, dlm->
reco.new_master,
2727 "node %u changing it to %u\n", dlm->
name,
2730 dlm_set_reco_master(dlm, br->
node_idx);
2731 dlm_set_reco_dead_node(dlm, br->
dead_node);
2733 mlog(0,
"recovery master %u sees %u as dead, but this "
2734 "node has not yet. marking %u as dead\n",
2738 mlog(0,
"%u not in domain/live_nodes map "
2739 "so setting it in reco map manually\n",
2751 mlog(0,
"%s: recovery started by node %u, for %u (%u:%u)\n",
2753 dlm->
reco.dead_node, dlm->
reco.new_master);
2759 #define DLM_FINALIZE_STAGE2 0x01
2760 static int dlm_send_finalize_reco_message(
struct dlm_ctxt *dlm)
2769 mlog(0,
"finishing recovery for node %s:%u, "
2770 "stage %d\n", dlm->
name, dlm->
reco.dead_node, stage);
2777 memset(&fr, 0,
sizeof(fr));
2783 while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
2787 &fr,
sizeof(fr), nodenum, &status);
2791 mlog(
ML_ERROR,
"Error %d when sending message %u (key "
2799 "node finished recovery.\n", nodenum);
2830 mlog(0,
"%s: node %u finalizing recovery stage%d of "
2837 mlog(
ML_ERROR,
"node %u sent recovery finalize msg, but node "
2838 "%u is supposed to be the new master, dead=%u\n",
2843 mlog(
ML_ERROR,
"node %u sent recovery finalize msg for dead "
2844 "node %u, but node %u is supposed to be dead\n",
2854 "new master %u for dead node %u, but "
2855 "this node has already received it!\n",
2857 dlm_print_reco_node_status(dlm);
2866 "new master %u for dead node %u, but "
2867 "this node did not have finalize1!\n",
2869 dlm_print_reco_node_status(dlm);
2874 dlm_reset_recovery(dlm);
2881 mlog(0,
"%s: recovery done, reco master was %u, dead now %u, master now %u\n",