27 #include <linux/module.h>
28 #include <linux/types.h>
29 #include <linux/slab.h>
48 #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN)
57 static inline void byte_set_bit(
u8 nr,
u8 map[])
59 map[nr >> 3] |= (1
UL << (nr & 7));
62 static inline int byte_test_bit(
u8 nr,
u8 map[])
64 return ((1
UL << (nr & 7)) & (map[nr >> 3])) != 0;
67 static inline void byte_copymap(
u8 dmap[],
unsigned long smap[],
75 memset(dmap, 0, ((sz + 7) >> 3));
76 for (nn = 0 ; nn < sz; nn++)
78 byte_set_bit(nn, dmap);
81 static void dlm_free_pagevec(
void **vec,
int pages)
88 static void **dlm_alloc_pagevec(
int pages)
96 for (i = 0; i <
pages; i++)
100 mlog(0,
"Allocated DLM hash pagevec; %d pages (%lu expected), %lu buckets per page\n",
105 dlm_free_pagevec(vec, i);
143 #define DLM_DOMAIN_BACKOFF_MS 200
152 void *
data,
void **ret_data);
158 static void dlm_unregister_domain_handlers(
struct dlm_ctxt *
dlm);
179 bucket = dlm_lockres_hash(dlm, q->hash);
182 dlm_lockres_get(res);
198 mlog(0,
"%.*s\n", len, name);
202 bucket = dlm_lockres_hash(dlm, hash);
207 if (res->
lockname.name[0] != name[0])
213 dlm_lockres_get(res);
232 mlog(0,
"%.*s\n", len, name);
263 static struct dlm_ctxt * __dlm_lookup_domain_full(
const char *domain,
int len)
284 static struct dlm_ctxt * __dlm_lookup_domain(
const char *domain)
288 return __dlm_lookup_domain_full(domain,
strlen(domain));
295 static int dlm_wait_on_domain_helper(
const char *domain)
300 spin_lock(&dlm_domain_lock);
302 tmp = __dlm_lookup_domain(domain);
308 spin_unlock(&dlm_domain_lock);
312 static void dlm_free_ctxt_mem(
struct dlm_ctxt *
dlm)
314 dlm_destroy_debugfs_subroot(dlm);
331 static void dlm_ctxt_release(
struct kref *
kref)
341 list_del_init(&dlm->
list);
343 spin_unlock(&dlm_domain_lock);
345 mlog(0,
"freeing memory from domain %s\n", dlm->
name);
349 dlm_free_ctxt_mem(dlm);
351 spin_lock(&dlm_domain_lock);
357 kref_put(&dlm->
dlm_refs, dlm_ctxt_release);
361 static void __dlm_get(
struct dlm_ctxt *dlm)
404 static void dlm_destroy_dlm_worker(
struct dlm_ctxt *dlm)
413 static void dlm_complete_dlm_shutdown(
struct dlm_ctxt *dlm)
415 dlm_unregister_domain_handlers(dlm);
416 dlm_debug_shutdown(dlm);
419 dlm_destroy_dlm_worker(dlm);
424 spin_lock(&dlm_domain_lock);
425 list_del_init(&dlm->
list);
426 spin_unlock(&dlm_domain_lock);
432 static int dlm_migrate_all_locks(
struct dlm_ctxt *dlm)
434 int i,
num,
n, ret = 0;
440 mlog(0,
"Migrating locks from domain %s\n", dlm->
name);
447 bucket = dlm_lockres_hash(dlm, i);
448 iter = bucket->
first;
453 dlm_lockres_get(res);
481 mlog(0,
"%s: %d lock resources in hash last pass\n",
485 mlog(0,
"DONE Migrating locks from domain %s\n", dlm->
name);
489 static int dlm_no_joining_node(
struct dlm_ctxt *dlm)
500 static int dlm_begin_exit_domain_handler(
struct o2net_msg *
msg,
u32 len,
501 void *
data,
void **ret_data)
511 mlog(0,
"%s: Node %u sent a begin exit domain message\n", dlm->
name, node);
522 static void dlm_mark_domain_leaving(
struct dlm_ctxt *dlm)
527 spin_lock(&dlm_domain_lock);
531 mlog(0,
"Node %d is joining, we wait on it.\n",
534 spin_unlock(&dlm_domain_lock);
542 spin_unlock(&dlm_domain_lock);
545 static void __dlm_print_nodes(
struct dlm_ctxt *dlm)
547 int node = -1, num = 0;
557 printk(
") %u nodes\n", num);
560 static int dlm_exit_domain_handler(
struct o2net_msg *msg,
u32 len,
void *data,
567 mlog(0,
"%p %u %p", msg, len, data);
578 __dlm_print_nodes(dlm);
596 mlog(0,
"%s: Sending domain exit message %u to node %u\n", dlm->
name,
599 memset(&leave_msg, 0,
sizeof(leave_msg));
603 sizeof(leave_msg), node,
NULL);
605 mlog(
ML_ERROR,
"Error %d sending domain exit message %u "
606 "to node %u on domain %s\n", status, msg_type, node,
612 static void dlm_begin_exit_domain(
struct dlm_ctxt *dlm)
641 static void dlm_leave_domain(
struct dlm_ctxt *dlm)
671 "to node %d\n", status, node);
740 mlog(0,
"shutting down domain %s\n", dlm->
name);
741 dlm_begin_exit_domain(dlm);
746 while (dlm_migrate_all_locks(dlm)) {
749 mlog(0,
"%s: more migration to do\n", dlm->
name);
760 dlm_mark_domain_leaving(dlm);
761 dlm_leave_domain(dlm);
764 dlm_complete_dlm_shutdown(dlm);
770 static int dlm_query_join_proto_check(
char *
proto_type,
int node,
777 if (!dlm_protocol_compare(ours, &proto)) {
779 "node %u wanted to join with %s locking protocol "
780 "%u.%u, we respond with %u.%u\n",
789 "Node %u wanted to join with %s locking "
790 "protocol %u.%u, but we have %u.%u, disallowing\n",
824 static void dlm_query_join_wire_to_packet(
u32 wire,
833 static int dlm_query_join_handler(
struct o2net_msg *msg,
u32 len,
void *data,
846 mlog(0,
"node %u wants to join domain %s\n", query->
node_idx,
855 mlog(0,
"node %u is not in our live map yet\n",
864 spin_lock(&dlm_domain_lock);
877 if (!byte_test_bit(nodenum, query->
node_map)) {
878 mlog(0,
"disallow join as node %u does not "
879 "have node %u in its nodemap\n",
906 mlog(0,
"node %u trying to join, but recovery "
907 "is ongoing.\n", bit);
910 mlog(0,
"node %u trying to join, but it "
911 "still needs recovery.\n", bit);
914 mlog(0,
"node %u trying to join, but it "
915 "is still in the domain! needs recovery?\n",
925 if (dlm_query_join_proto_check(
"DLM", bit,
929 }
else if (dlm_query_join_proto_check(
"fs", bit,
937 __dlm_set_joining_node(dlm, query->
node_idx);
944 spin_unlock(&dlm_domain_lock);
947 mlog(0,
"We respond with %u\n", packet.
code);
949 dlm_query_join_packet_to_wire(&packet, &response);
953 static int dlm_assert_joined_handler(
struct o2net_msg *msg,
u32 len,
void *data,
961 mlog(0,
"node %u asserts join on domain %s\n", assert->
node_idx,
964 spin_lock(&dlm_domain_lock);
980 __dlm_print_nodes(dlm);
987 spin_unlock(&dlm_domain_lock);
992 static int dlm_match_regions(
struct dlm_ctxt *dlm,
994 char *local,
int locallen)
998 int localnr,
i,
j, foundit;
1003 mlog(
ML_ERROR,
"Domain %s: Joining node %d has global "
1004 "heartbeat enabled but local node %d does not\n",
1013 "heartbeat enabled but joining node %d does not\n",
1030 for (i = 0; i < localnr; ++
i) {
1043 "in local node %d but not in joining node %d\n",
1056 for (j = 0; j < localnr; ++
j) {
1066 "in joining node %d but not in local node %d\n",
1078 static int dlm_send_regions(
struct dlm_ctxt *dlm,
unsigned long *node_map)
1112 mlog(0,
"Sending regions to node %d\n", i);
1131 static int dlm_query_region_handler(
struct o2net_msg *msg,
u32 len,
1132 void *data,
void **ret_data)
1142 mlog(0,
"Node %u queries hb regions on domain %s\n", qr->
qr_node,
1154 spin_lock(&dlm_domain_lock);
1157 mlog(
ML_ERROR,
"Node %d queried hb regions on domain %s "
1165 mlog(
ML_ERROR,
"Node %d queried hb regions on domain %s "
1174 mlog(
ML_ERROR,
"Node %d queried hb regions on domain %s "
1175 "but active dlm protocol is %d.%d\n", qr->
qr_node,
1181 status = dlm_match_regions(dlm, qr, local,
sizeof(qr->
qr_regions));
1186 spin_unlock(&dlm_domain_lock);
1201 mlog(0,
"Node %3d, %pI4:%u\n", qn->
qn_nodes[j].ni_nodenum,
1202 &(qn->
qn_nodes[j].ni_ipv4_address),
1209 if (qn->
qn_nodes[j].ni_nodenum == i) {
1215 if (!local && !remote)
1218 if ((local && !remote) || (!local && remote))
1228 if (remote && !local)
1230 "registered in joining node %d but not in "
1236 if (local && !remote)
1238 "registered in local node %d but not in "
1243 BUG_ON((!local && !remote));
1253 static int dlm_send_nodeinfo(
struct dlm_ctxt *dlm,
unsigned long *node_map)
1289 i + 1)) < O2NM_MAX_NODES) {
1293 mlog(0,
"Sending nodeinfo to node %d\n", i);
1311 static int dlm_query_nodeinfo_handler(
struct o2net_msg *msg,
u32 len,
1312 void *data,
void **ret_data)
1316 int locked = 0, status = -
EINVAL;
1323 spin_lock(&dlm_domain_lock);
1326 mlog(
ML_ERROR,
"Node %d queried nodes on domain %s before "
1334 mlog(
ML_ERROR,
"Node %d queried nodes on domain %s but "
1344 "but active dlm protocol is %d.%d\n", qn->
qn_nodenum,
1350 status = dlm_match_nodes(dlm, qn);
1355 spin_unlock(&dlm_domain_lock);
1360 static int dlm_cancel_join_handler(
struct o2net_msg *msg,
u32 len,
void *data,
1368 mlog(0,
"node %u cancels join on domain %s\n", cancel->
node_idx,
1371 spin_lock(&dlm_domain_lock);
1372 dlm = __dlm_lookup_domain_full(cancel->
domain, cancel->
name_len);
1384 spin_unlock(&dlm_domain_lock);
1389 static int dlm_send_one_join_cancel(
struct dlm_ctxt *dlm,
1395 memset(&cancel_msg, 0,
sizeof(cancel_msg));
1396 cancel_msg.node_idx = dlm->
node_num;
1398 memcpy(cancel_msg.domain, dlm->
name, cancel_msg.name_len);
1401 &cancel_msg,
sizeof(cancel_msg), node,
1404 mlog(
ML_ERROR,
"Error %d when sending message %u (key 0x%x) to "
1415 static int dlm_send_join_cancels(
struct dlm_ctxt *dlm,
1416 unsigned long *node_map,
1417 unsigned int map_size)
1423 sizeof(
unsigned long))) {
1425 "map_size %u != BITS_TO_LONGS(O2NM_MAX_NODES) %u\n",
1433 node + 1)) < O2NM_MAX_NODES) {
1437 tmpstat = dlm_send_one_join_cancel(dlm, node);
1440 "node %d\n", tmpstat, node);
1451 static int dlm_request_join(
struct dlm_ctxt *dlm,
1460 mlog(0,
"querying node %d\n", node);
1462 memset(&join_msg, 0,
sizeof(join_msg));
1465 memcpy(join_msg.domain, dlm->
name, join_msg.name_len);
1470 byte_copymap(join_msg.node_map, dlm->
live_nodes_map, O2NM_MAX_NODES);
1473 sizeof(join_msg), node, &join_resp);
1475 mlog(
ML_ERROR,
"Error %d when sending message %u (key 0x%x) to "
1480 dlm_query_join_wire_to_packet(join_resp, &packet);
1491 *response = packet.
code;
1494 "This node requested DLM locking protocol %u.%u and "
1495 "filesystem locking protocol %u.%u. At least one of "
1496 "the protocol versions on node %d is not compatible, "
1504 *response = packet.
code;
1506 *response = packet.
code;
1511 "Node %d responds JOIN_OK with DLM locking protocol "
1512 "%u.%u and fs locking protocol %u.%u\n",
1524 mlog(0,
"status %d, node %d response is %d\n", status, node,
1531 static int dlm_send_one_join_assert(
struct dlm_ctxt *dlm,
1537 mlog(0,
"Sending join assert to node %u\n", node);
1539 memset(&assert_msg, 0,
sizeof(assert_msg));
1540 assert_msg.node_idx = dlm->
node_num;
1542 memcpy(assert_msg.domain, dlm->
name, assert_msg.name_len);
1545 &assert_msg,
sizeof(assert_msg), node,
1548 mlog(
ML_ERROR,
"Error %d when sending message %u (key 0x%x) to "
1555 static void dlm_send_join_asserts(
struct dlm_ctxt *dlm,
1556 unsigned long *node_map)
1563 node + 1)) < O2NM_MAX_NODES) {
1571 status = dlm_send_one_join_assert(dlm, node);
1579 "join on node %d\n", status, node);
1585 }
while (status && live);
1594 static int dlm_should_restart_join(
struct dlm_ctxt *dlm,
1601 mlog(0,
"Latest response of disallow -- should restart\n");
1613 mlog(0,
"Node maps changed -- should restart\n");
1618 static int dlm_try_to_join_domain(
struct dlm_ctxt *dlm)
1620 int status = 0, tmpstat,
node;
1641 __dlm_set_joining_node(dlm, dlm->
node_num);
1647 node + 1)) < O2NM_MAX_NODES) {
1651 status = dlm_request_join(dlm, node, &response);
1662 if (dlm_should_restart_join(dlm, ctxt, response)) {
1668 mlog(0,
"Yay, done querying nodes!\n");
1701 spin_lock(&dlm_domain_lock);
1704 spin_unlock(&dlm_domain_lock);
1711 __dlm_print_nodes(dlm);
1718 tmpstat = dlm_send_join_cancels(dlm,
1727 mlog(0,
"returning %d\n", status);
1731 static void dlm_unregister_domain_handlers(
struct dlm_ctxt *dlm)
1738 static int dlm_register_domain_handlers(
struct dlm_ctxt *dlm)
1742 mlog(0,
"registering handlers.\n");
1801 dlm_exit_domain_handler,
1864 dlm_begin_exit_domain_handler,
1871 dlm_unregister_domain_handlers(dlm);
1876 static int dlm_join_domain(
struct dlm_ctxt *dlm)
1879 unsigned int backoff;
1880 unsigned int total_backoff = 0;
1884 mlog(0,
"Join domain %s\n", dlm->
name);
1886 status = dlm_register_domain_handlers(dlm);
1892 status = dlm_debug_init(dlm);
1918 status = dlm_try_to_join_domain(dlm);
1923 #define DLM_JOIN_TIMEOUT_MSECS 90000
1925 if (signal_pending(
current)) {
1934 "%s after %u msecs\n", dlm->
name,
1946 backoff = (
unsigned int)(jiffies & 0x3);
1948 total_backoff += backoff;
1949 mlog(0,
"backoff %d\n", backoff);
1952 }
while (status == -
EAGAIN);
1964 dlm_unregister_domain_handlers(dlm);
1965 dlm_debug_shutdown(dlm);
1968 dlm_destroy_dlm_worker(dlm);
1974 static struct dlm_ctxt *dlm_alloc_ctxt(
const char *domain,
2024 ret = dlm_create_debugfs_subroot(dlm);
2038 INIT_LIST_HEAD(&dlm->
list);
2040 INIT_LIST_HEAD(&dlm->
reco.resources);
2041 INIT_LIST_HEAD(&dlm->
reco.received);
2042 INIT_LIST_HEAD(&dlm->
reco.node_data);
2046 dlm->
reco.state = 0;
2051 mlog(0,
"dlm->recovery_map=%p, &(dlm->recovery_map[0])=%p\n",
2090 mlog(0,
"context init: refcount %u\n",
2141 mlog(0,
"register called for domain \"%s\"\n", domain);
2145 if (signal_pending(
current)) {
2153 dlm = __dlm_lookup_domain(domain);
2158 mlog(0,
"This ctxt is not joined yet!\n");
2160 dlm_wait_on_domain_helper(
2168 "Requested locking protocol version is not "
2169 "compatible with already registered domain "
2170 "\"%s\"\n", domain);
2188 new_ctxt = dlm_alloc_ctxt(domain, key);
2212 ret = dlm_join_domain(dlm);
2225 dlm_free_ctxt_mem(new_ctxt);
2236 static void dlm_unregister_net_handlers(
void)
2241 static int dlm_register_net_handlers(
void)
2247 dlm_query_join_handler,
2254 dlm_assert_joined_handler,
2261 dlm_cancel_join_handler,
2268 dlm_query_region_handler,
2276 dlm_query_nodeinfo_handler,
2280 dlm_unregister_net_handlers();
2340 static int __init dlm_init(
void)
2348 mlog(
ML_ERROR,
"Could not create o2dlm_mle slabcache\n");
2355 "o2dlm_lockname slabcaches\n");
2361 mlog(
ML_ERROR,
"Count not create o2dlm_lock slabcache\n");
2365 status = dlm_register_net_handlers();
2367 mlog(
ML_ERROR,
"Unable to register network handlers\n");
2371 status = dlm_create_debugfs_root();
2377 dlm_unregister_net_handlers();
2384 static void __exit dlm_exit (
void)
2386 dlm_destroy_debugfs_root();
2387 dlm_unregister_net_handlers();