34 #define KMSG_COMPONENT "IPVS"
35 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
37 #include <linux/module.h>
38 #include <linux/slab.h>
40 #include <linux/net.h>
45 #include <linux/igmp.h>
46 #include <linux/udp.h>
49 #include <linux/wait.h>
50 #include <linux/kernel.h>
52 #include <asm/unaligned.h>
59 #define IP_VS_SYNC_GROUP 0xe0000051
60 #define IP_VS_SYNC_PORT 8848
62 #define SYNC_PROTO_VER 1
179 #define STYPE_INET6 0
180 #define STYPE_F_INET6 (1 << STYPE_INET6)
182 #define SVER_SHIFT 12
183 #define SVER_MASK 0x0fff
185 #define IPVS_OPT_SEQ_DATA 1
186 #define IPVS_OPT_PE_DATA 2
187 #define IPVS_OPT_PE_NAME 3
188 #define IPVS_OPT_PARAM 7
190 #define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1))
191 #define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1))
192 #define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1))
193 #define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
203 #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0))
204 #define FULL_CONN_SIZE \
205 (sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
242 #define SYNC_MESG_HEADER_LEN 4
243 #define MAX_CONNS_PER_SYNCBUFF 255
323 ip_vs_sync_buff_create(
struct netns_ipvs *ipvs)
335 sb->
mesg->reserved = 0;
339 sb->
mesg->nr_conns = 0;
354 static inline void sb_queue_tail(
struct netns_ipvs *ipvs,
370 ip_vs_sync_buff_release(sb);
405 ip_vs_sync_buff_create_v0(
struct netns_ipvs *ipvs)
437 static int ip_vs_sync_conn_needed(
struct netns_ipvs *ipvs,
443 unsigned int sync_refresh_period;
451 if (!((1 << cp->
state) &
462 if (!((1 << cp->
state) &
476 sync_refresh_period = sysctl_sync_refresh_period(ipvs);
477 if (sync_refresh_period > 0) {
478 long diff = n - orig;
484 if (
abs(diff) <
min_t(
long, sync_refresh_period, min_diff)) {
487 if (retries >= sysctl_sync_retries(ipvs))
490 (sync_refresh_period >> 3)))
495 sync_period = sysctl_sync_period(ipvs);
496 if (sync_period > 0) {
498 pkts % sync_period != sysctl_sync_threshold(ipvs))
500 }
else if (sync_refresh_period <= 0 &&
501 pkts != sysctl_sync_threshold(ipvs))
507 return n == orig ||
force;
531 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
540 id = select_master_thread_id(ipvs, cp);
547 sb_queue_tail(ipvs, ms);
553 buff = ip_vs_sync_buff_create_v0(ipvs);
556 pr_err(
"ip_vs_sync_buff_create failed.\n");
590 sb_queue_tail(ipvs, ms);
601 pkts = sysctl_sync_threshold(ipvs);
620 unsigned int len, pe_name_len,
pad;
623 if (sysctl_sync_ver(ipvs) == 0) {
631 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
650 id = select_master_thread_id(ipvs, cp);
653 #ifdef CONFIG_IP_VS_IPV6
666 len += pe_name_len + 2;
676 sb_queue_tail(ipvs, ms);
684 buff = ip_vs_sync_buff_create(ipvs);
687 pr_err(
"ip_vs_sync_buff_create failed.\n");
695 buff->
head += pad + len;
696 m->
size += pad + len;
716 #ifdef CONFIG_IP_VS_IPV6
747 *(p++) = pe_name_len;
748 memcpy(p, cp->
pe->name, pe_name_len);
763 pkts = sysctl_sync_threshold(ipvs);
773 __u8 *pe_data,
unsigned int pe_data_len,
774 __u8 *pe_name,
unsigned int pe_name_len)
776 #ifdef CONFIG_IP_VS_IPV6
778 ip_vs_conn_fill_param(net, af, sc->
v6.protocol,
785 ip_vs_conn_fill_param(net, af, sc->
v4.protocol,
795 memcpy(buff, pe_name, pe_name_len);
799 IP_VS_DBG(3,
"BACKUP, no %s engine found/loaded\n",
811 module_put(p->
pe->module);
829 unsigned long timeout,
__u32 fwmark,
846 spin_lock(&cp->
lock);
848 !(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
860 spin_unlock(&cp->
lock);
873 param->
vport, protocol, fwmark, flags);
881 IP_VS_DBG(2,
"BACKUP, add new conn. failed\n");
908 if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->
timeout_table)
919 static void ip_vs_process_message_v0(
struct net *net,
const char *
buffer,
944 if (p > buffer+buflen) {
945 IP_VS_ERR_RL(
"BACKUP v0, Dropping buffer bogus conn options\n");
954 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
957 IP_VS_DBG(2,
"BACKUP v0, Unsupported protocol %u\n",
962 IP_VS_DBG(2,
"BACKUP v0, Invalid %s state %u\n",
969 IP_VS_DBG(2,
"BACKUP v0, Invalid template state %u\n",
991 static inline int ip_vs_proc_seqopt(
__u8 *p,
unsigned int plen,
1000 IP_VS_DBG(2,
"BACKUP, bogus conn options length\n");
1004 IP_VS_DBG(2,
"BACKUP, conn options found twice\n");
1013 static int ip_vs_proc_str(
__u8 *p,
unsigned int plen,
unsigned int *
data_len,
1017 if (plen > maxlen) {
1018 IP_VS_DBG(2,
"BACKUP, bogus par.data len > %d\n", maxlen);
1021 if (*opt_flags & flag) {
1022 IP_VS_DBG(2,
"BACKUP, Par.data found twice 0x%x\n", flag);
1033 static inline int ip_vs_proc_sync_conn(
struct net *net,
__u8 *p,
__u8 *msg_end)
1040 unsigned int af,
state, pe_data_len=0, pe_name_len=0;
1048 #ifdef CONFIG_IP_VS_IPV6
1052 IP_VS_DBG(3,
"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
1056 }
else if (!s->
v4.type) {
1066 while (p < msg_end) {
1075 if (!plen || ((p + plen) > msg_end))
1080 if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
1085 if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
1092 if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
1101 IP_VS_DBG(3,
"BACKUP, Unknown mandatory param %d found\n",
1102 ptype & ~IPVS_OPT_F_PARAM);
1115 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
1118 IP_VS_DBG(3,
"BACKUP, Unsupported protocol %u\n",
1124 IP_VS_DBG(3,
"BACKUP, Invalid %s state %u\n",
1132 IP_VS_DBG(3,
"BACKUP, Invalid template state %u\n",
1137 if (ip_vs_conn_fill_param_sync(net, af, s, ¶m, pe_data,
1138 pe_data_len, pe_name, pe_name_len)) {
1144 ip_vs_proc_conn(net, ¶m, flags, state, s->
v4.protocol, af,
1149 #ifdef CONFIG_IP_VS_IPV6
1151 ip_vs_proc_conn(net, ¶m, flags, state, s->
v6.protocol, af,
1160 IP_VS_DBG(2,
"BACKUP, Single msg dropped err:%d\n", retc);
1169 static void ip_vs_process_message(
struct net *net,
__u8 *buffer,
1170 const size_t buflen)
1178 IP_VS_DBG(2,
"BACKUP, message header too short\n");
1184 if (buflen != m2->
size) {
1185 IP_VS_DBG(2,
"BACKUP, bogus message size\n");
1195 && (m2->
spare == 0)) {
1206 if (p +
sizeof(s->
v4) > buffer+buflen) {
1214 if (msg_end > buffer+buflen) {
1215 IP_VS_ERR_RL(
"BACKUP, Dropping buffer, msg > buffer\n");
1219 IP_VS_ERR_RL(
"BACKUP, Dropping buffer, Unknown version %d\n",
1224 retc = ip_vs_proc_sync_conn(net, p, msg_end);
1226 IP_VS_ERR_RL(
"BACKUP, Dropping buffer, Err: %d in decoding\n",
1231 msg_end = p + ((size + 3) & ~3);
1235 ip_vs_process_message_v0(net, buffer, buflen);
1244 static void set_sock_size(
struct sock *
sk,
int mode,
int val)
1266 static void set_mcast_loop(
struct sock *sk,
u_char loop)
1292 static int set_mcast_if(
struct sock *sk,
char *ifname)
1296 struct net *net = sock_net(sk);
1302 if (sk->sk_bound_dev_if && dev->
ifindex != sk->sk_bound_dev_if)
1318 static int set_sync_mesg_maxlen(
struct net *net,
int sync_state)
1329 num = (dev->
mtu -
sizeof(
struct iphdr) -
1334 IP_VS_DBG(7,
"setting the maximum length of sync sending "
1343 IP_VS_DBG(7,
"setting the maximum length of sync receiving "
1359 struct net *net = sock_net(sk);
1364 memset(&mreq, 0,
sizeof(mreq));
1370 if (sk->sk_bound_dev_if && dev->
ifindex != sk->sk_bound_dev_if)
1373 mreq.imr_ifindex = dev->
ifindex;
1383 static int bind_mcastif_addr(
struct socket *
sock,
char *ifname)
1385 struct net *net = sock_net(sock->
sk);
1396 pr_err(
"You probably need to specify IP address on "
1397 "multicast interface.\n");
1399 IP_VS_DBG(7,
"binding socket with (%s) %pI4\n",
1404 sin.sin_addr.s_addr =
addr;
1407 return sock->
ops->bind(sock, (
struct sockaddr*)&sin,
sizeof(sin));
1413 static struct socket *make_send_sock(
struct net *net,
int id)
1428 pr_err(
"Error during creation of socket; terminating\n");
1429 return ERR_PTR(result);
1436 sk_change_net(sock->
sk, net);
1439 pr_err(
"Error setting outbound mcast interface\n");
1443 set_mcast_loop(sock->
sk, 0);
1444 set_mcast_ttl(sock->
sk, 1);
1445 result = sysctl_sync_sock_size(ipvs);
1447 set_sock_size(sock->
sk, 1, result);
1451 pr_err(
"Error binding address of the mcast interface\n");
1455 result = sock->
ops->connect(sock, (
struct sockaddr *) &mcast_addr,
1458 pr_err(
"Error connecting to the multicast addr\n");
1466 return ERR_PTR(result);
1473 static struct socket *make_receive_sock(
struct net *net,
int id)
1488 pr_err(
"Error during creation of socket; terminating\n");
1489 return ERR_PTR(result);
1496 sk_change_net(sock->
sk, net);
1499 result = sysctl_sync_sock_size(ipvs);
1501 set_sock_size(sock->
sk, 0, result);
1503 result = sock->
ops->bind(sock, (
struct sockaddr *) &mcast_addr,
1506 pr_err(
"Error binding to the multicast addr\n");
1511 result = join_mcast_group(sock->
sk,
1515 pr_err(
"Error joining to the multicast group\n");
1523 return ERR_PTR(result);
1528 ip_vs_send_async(
struct socket *sock,
const char *buffer,
const size_t length)
1535 iov.iov_base = (
void *)buffer;
1555 ret = ip_vs_send_async(sock, (
char *)msg, msize);
1556 if (ret >= 0 || ret == -
EAGAIN)
1558 pr_err(
"ip_vs_send_async error %d\n", ret);
1563 ip_vs_receive(
struct socket *sock,
char *buffer,
const size_t buflen)
1573 iov.iov_len = (
size_t)buflen;
1607 sb = sb_dequeue(ipvs, ms);
1614 static int sync_thread_master(
void *data)
1619 struct sock *sk = tinfo->
sock->sk;
1622 pr_info(
"sync thread started: state = MASTER, mcast_ifn = %s, "
1623 "syncid = %d, id = %d\n",
1627 sb = next_sync_buff(ipvs, ms);
1634 while (ip_vs_send_sync_msg(tinfo->
sock, sb->
mesg) < 0) {
1638 sock_writeable(sk) ||
1644 ip_vs_sync_buff_release(sb);
1650 ip_vs_sync_buff_release(sb);
1653 while ((sb = sb_dequeue(ipvs, ms)))
1654 ip_vs_sync_buff_release(sb);
1658 sb = get_curr_sync_buff(ipvs, ms, 0);
1660 ip_vs_sync_buff_release(sb);
1670 static int sync_thread_backup(
void *data)
1676 pr_info(
"sync thread started: state = BACKUP, mcast_ifn = %s, "
1677 "syncid = %d, id = %d\n",
1682 !skb_queue_empty(&tinfo->
sock->sk->sk_receive_queue)
1686 while (!skb_queue_empty(&(tinfo->
sock->sk->sk_receive_queue))) {
1687 len = ip_vs_receive(tinfo->
sock, tinfo->
buf,
1691 pr_err(
"receiving message error\n");
1698 ip_vs_process_message(tinfo->
net, tinfo->
buf, len);
1724 IP_VS_DBG(7,
"Each ip_vs_sync_conn entry needs %Zd bytes\n",
1740 name =
"ipvs-m:%d:%d";
1741 threadfn = sync_thread_master;
1749 name =
"ipvs-b:%d:%d";
1750 threadfn = sync_thread_backup;
1762 for (
id = 0;
id <
count;
id++, ms++) {
1767 master_wakeup_work_handler);
1771 array = kzalloc(count *
sizeof(
struct task_struct *),
1776 set_sync_mesg_maxlen(net, state);
1779 for (
id = 0;
id <
count;
id++) {
1781 sock = make_send_sock(net,
id);
1783 sock = make_receive_sock(net,
id);
1785 result = PTR_ERR(sock);
1803 result = PTR_ERR(task);
1836 while (count-- > 0) {
1883 pr_info(
"stopping master sync thread %d ...\n",
1902 pr_info(
"stopping backup sync thread %d ...\n",
1903 task_pid_nr(array[
id]));
1938 if (retc && retc != -
ESRCH)
1939 pr_err(
"Failed to stop Master Daemon\n");
1942 if (retc && retc != -
ESRCH)
1943 pr_err(
"Failed to stop Backup Daemon\n");