27 #define KMSG_COMPONENT "IPVS"
28 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
30 #include <linux/module.h>
31 #include <linux/kernel.h>
33 #include <linux/tcp.h>
35 #include <linux/icmp.h>
36 #include <linux/slab.h>
46 #include <linux/netfilter.h>
47 #include <linux/netfilter_ipv4.h>
49 #ifdef CONFIG_IP_VS_IPV6
51 #include <linux/netfilter_ipv6.h>
64 #ifdef CONFIG_IP_VS_PROTO_TCP
68 #ifdef CONFIG_IP_VS_DEBUG
73 #ifdef IP_VS_GENERIC_NETNS
80 #define icmp_id(icmph) (((icmph)->un).echo.id)
81 #define icmpv6_id(icmph) (icmph->icmp6_dataun.u_echo.identifier)
98 #ifdef CONFIG_IP_VS_IPV6
111 INIT_LIST_HEAD(&table[rows]);
118 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
125 u64_stats_update_begin(&s->
syncp);
127 u64_stats_update_end(&s->
syncp);
131 u64_stats_update_begin(&s->
syncp);
133 u64_stats_update_end(&s->
syncp);
137 u64_stats_update_begin(&s->
syncp);
139 u64_stats_update_end(&s->
syncp);
148 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
155 u64_stats_update_begin(&s->
syncp);
157 u64_stats_update_end(&s->
syncp);
161 u64_stats_update_begin(&s->
syncp);
163 u64_stats_update_end(&s->
syncp);
167 u64_stats_update_begin(&s->
syncp);
169 u64_stats_update_end(&s->
syncp);
196 if (
likely(pd->
pp->state_transition))
197 pd->
pp->state_transition(cp, direction, skb, pd);
201 ip_vs_conn_fill_param_persist(
const struct ip_vs_service *svc,
207 ip_vs_conn_fill_param(svc->
net, svc->
af, protocol, caddr, cport, vaddr,
210 if (p->
pe && p->
pe->fill_param)
211 return p->
pe->fill_param(p, skb);
239 ip_vs_fill_iphdr(svc->
af, skb_network_header(skb), &iph);
242 #ifdef CONFIG_IP_VS_IPV6
244 ipv6_addr_prefix(&snet.in6, &iph.saddr.in6, svc->
netmask);
247 snet.ip = iph.saddr.ip & svc->
netmask;
251 IP_VS_DBG_ADDR(svc->
af, &iph.saddr),
ntohs(src_port),
252 IP_VS_DBG_ADDR(svc->
af, &iph.daddr),
ntohs(dst_port),
253 IP_VS_DBG_ADDR(svc->
af, &snet));
269 int protocol = iph.protocol;
273 if (dst_port == svc->
port) {
295 if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
296 vaddr, vport, &
param) < 0) {
310 dest = svc->
scheduler->schedule(svc, skb);
312 IP_VS_DBG(1,
"p-schedule: no dest found.\n");
341 if (dport == svc->
port && dest->
port)
351 ip_vs_conn_fill_param(svc->
net, svc->
af, iph.protocol, &iph.saddr,
352 src_port, &iph.daddr, dst_port, &
param);
364 ip_vs_control_add(cp, ct);
367 ip_vs_conn_stats(cp, svc);
405 ip_vs_fill_iphdr(svc->
af, skb_network_header(skb), &iph);
406 pptr = skb_header_pointer(skb, iph.
len,
sizeof(_ports), _ports);
418 "Not scheduling FTPDATA");
428 "Not scheduling reply for existing connection");
429 __ip_vs_conn_put(cp);
437 return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored);
446 pr_err(
"Schedule: port zero only supported "
447 "in persistent services, "
448 "check your ipvs configuration\n");
452 dest = svc->
scheduler->schedule(svc, skb);
454 IP_VS_DBG(1,
"Schedule: no dest found.\n");
473 flags, dest, skb->
mark);
481 "d:%s:%u conn->flags:%X conn->refcnt:%d\n",
488 ip_vs_conn_stats(cp, svc);
509 ip_vs_fill_iphdr(svc->
af, skb_network_header(skb), &iph);
511 pptr = skb_header_pointer(skb, iph.
len,
sizeof(_ports), _ports);
513 ip_vs_service_put(svc);
520 #ifdef CONFIG_IP_VS_IPV6
530 ipvs = net_ipvs(net);
539 ip_vs_service_put(svc);
542 IP_VS_DBG(6,
"%s(): create a cache_bypass entry\n", __func__);
547 &iph.
daddr, pptr[1], &p);
556 ip_vs_in_stats(cp, skb);
578 ip_vs_service_put(svc);
582 ip_vs_service_put(svc);
591 #ifdef CONFIG_IP_VS_IPV6
594 struct net *net = dev_net(skb_dst(skb)->
dev);
608 static int sysctl_snat_reroute(
struct sk_buff *skb)
610 struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
657 #ifdef CONFIG_IP_VS_IPV6
658 static inline int ip_vs_gather_frags_v6(
struct sk_buff *skb,
u_int32_t user)
665 static int ip_vs_route_me_harder(
int af,
struct sk_buff *skb)
667 #ifdef CONFIG_IP_VS_IPV6
688 struct iphdr *iph = ip_hdr(skb);
689 unsigned int icmp_offset = iph->ihl*4;
690 struct icmphdr *icmph = (
struct icmphdr *)(skb_network_header(skb) +
692 struct iphdr *ciph = (
struct iphdr *)(icmph + 1);
712 ports[1] = cp->
vport;
714 ports[0] = cp->
dport;
724 "Forwarding altered outgoing ICMP");
727 "Forwarding altered incoming ICMP");
730 #ifdef CONFIG_IP_VS_IPV6
734 struct ipv6hdr *iph = ipv6_hdr(skb);
735 unsigned int icmp_offset =
sizeof(
struct ipv6hdr);
754 ports[1] = cp->
vport;
756 ports[0] = cp->
dport;
761 skb->
len - icmp_offset,
763 skb->
csum_start = skb_network_header(skb) - skb->
head + icmp_offset;
769 (
void *)ciph - (
void *)iph,
770 "Forwarding altered outgoing ICMPv6");
773 (
void *)ciph - (
void *)iph,
774 "Forwarding altered incoming ICMPv6");
781 static int handle_response_icmp(
int af,
struct sk_buff *skb,
785 unsigned int offset,
unsigned int ihl)
787 unsigned int verdict =
NF_DROP;
790 pr_err(
"shouldn't reach here, because the box is on the "
791 "half connection in the tun/dr module.\n");
798 IP_VS_DBG_ADDR(af, snet));
804 offset += 2 *
sizeof(
__u16);
808 #ifdef CONFIG_IP_VS_IPV6
810 ip_vs_nat_icmp_v6(skb, pp, cp, 1);
815 if (ip_vs_route_me_harder(af, skb))
819 ip_vs_out_stats(cp, skb);
829 __ip_vs_conn_put(cp);
839 static int ip_vs_out_icmp(
struct sk_buff *skb,
int *related,
840 unsigned int hooknum)
844 struct iphdr _ciph, *cih;
854 if (ip_is_fragment(ip_hdr(skb))) {
855 if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum)))
860 offset = ihl = iph->ihl * 4;
861 ic = skb_header_pointer(skb, offset,
sizeof(_icmph), &_icmph);
865 IP_VS_DBG(12,
"Outgoing ICMP (%d,%d) %pI4->%pI4\n",
884 offset +=
sizeof(_icmph);
885 cih = skb_header_pointer(skb, offset,
sizeof(_ciph), &_ciph);
899 "Checking outgoing ICMP for");
901 offset += cih->ihl * 4;
903 ip_vs_fill_iphdr(
AF_INET, cih, &ciph);
914 #ifdef CONFIG_IP_VS_IPV6
915 static int ip_vs_out_icmp_v6(
struct sk_buff *skb,
int *related,
916 unsigned int hooknum)
932 if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
937 offset =
sizeof(
struct ipv6hdr);
938 ic = skb_header_pointer(skb, offset,
sizeof(_icmph), &_icmph);
942 IP_VS_DBG(12,
"Outgoing ICMPv6 (%d,%d) %pI6->%pI6\n",
961 offset +=
sizeof(_icmph);
962 cih = skb_header_pointer(skb, offset,
sizeof(_ciph), &_ciph);
976 "Checking outgoing ICMPv6 for");
978 offset +=
sizeof(
struct ipv6hdr);
980 ip_vs_fill_iphdr(
AF_INET6, cih, &ciph);
988 pp, offset,
sizeof(
struct ipv6hdr));
995 static inline int is_sctp_abort(
const struct sk_buff *skb,
int nh_len)
999 sizeof(schunk), &schunk);
1007 static inline int is_tcp_reset(
const struct sk_buff *skb,
int nh_len)
1011 th = skb_header_pointer(skb, nh_len,
sizeof(_tcph), &_tcph);
1034 #ifdef CONFIG_IP_VS_IPV6
1036 ipv6_hdr(skb)->saddr = cp->
vaddr.
in6;
1040 ip_hdr(skb)->saddr = cp->
vaddr.
ip;
1059 if (ip_vs_route_me_harder(af, skb))
1064 ip_vs_out_stats(cp, skb);
1087 ip_vs_out(
unsigned int hooknum,
struct sk_buff *skb,
int af)
1115 if (!net_ipvs(net)->
enable)
1118 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1119 #ifdef CONFIG_IP_VS_IPV6
1123 int verdict = ip_vs_out_icmp_v6(skb, &related,
1128 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1134 int verdict = ip_vs_out_icmp(skb, &related, hooknum);
1138 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1147 #ifdef CONFIG_IP_VS_IPV6
1150 if (ip_vs_gather_frags_v6(skb,
1151 ip_vs_defrag_user(hooknum)))
1155 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1159 if (ip_vs_gather_frags(skb,
1160 ip_vs_defrag_user(hooknum)))
1163 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1172 return handle_response(af, skb, pd, cp, iph.len);
1173 if (sysctl_nat_icmp_send(net) &&
1179 pptr = skb_header_pointer(skb, iph.len,
1180 sizeof(_ports), _ports);
1194 && !is_tcp_reset(skb, iph.len))
1196 && !is_sctp_abort(skb,
1198 #ifdef CONFIG_IP_VS_IPV6
1201 dev_net(skb_dst(skb)->
dev);
1219 "ip_vs_out: packet continues traversal as normal");
1229 ip_vs_reply4(
unsigned int hooknum,
struct sk_buff *skb,
1231 int (*okfn)(
struct sk_buff *))
1233 return ip_vs_out(hooknum, skb,
AF_INET);
1241 ip_vs_local_reply4(
unsigned int hooknum,
struct sk_buff *skb,
1243 int (*okfn)(
struct sk_buff *))
1245 unsigned int verdict;
1249 verdict = ip_vs_out(hooknum, skb,
AF_INET);
1254 #ifdef CONFIG_IP_VS_IPV6
1262 ip_vs_reply6(
unsigned int hooknum,
struct sk_buff *skb,
1264 int (*okfn)(
struct sk_buff *))
1266 return ip_vs_out(hooknum, skb,
AF_INET6);
1274 ip_vs_local_reply6(
unsigned int hooknum,
struct sk_buff *skb,
1276 int (*okfn)(
struct sk_buff *))
1278 unsigned int verdict;
1282 verdict = ip_vs_out(hooknum, skb,
AF_INET6);
1296 ip_vs_in_icmp(
struct sk_buff *skb,
int *related,
unsigned int hooknum)
1298 struct net *net =
NULL;
1301 struct iphdr _ciph, *cih;
1312 if (ip_is_fragment(ip_hdr(skb))) {
1313 if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum)))
1318 offset = ihl = iph->ihl * 4;
1319 ic = skb_header_pointer(skb, offset,
sizeof(_icmph), &_icmph);
1323 IP_VS_DBG(12,
"Incoming ICMP (%d,%d) %pI4->%pI4\n",
1342 offset +=
sizeof(_icmph);
1343 cih = skb_header_pointer(skb, offset,
sizeof(_ciph), &_ciph);
1355 if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL))
1357 offset += cih->ihl * 4;
1358 cih = skb_header_pointer(skb, offset,
sizeof(_ciph), &_ciph);
1375 "Checking incoming ICMP for");
1378 offset += cih->ihl * 4;
1380 ip_vs_fill_iphdr(
AF_INET, cih, &ciph);
1393 IP_VS_DBG(1,
"Incoming ICMP: failed checksum from %pI4!\n",
1408 __skb_pull(skb, ihl +
sizeof(_icmph));
1409 offset2 -= ihl +
sizeof(_icmph);
1410 skb_reset_network_header(skb);
1411 IP_VS_DBG(12,
"ICMP for IPIP %pI4->%pI4: mtu=%u\n",
1412 &ip_hdr(skb)->
saddr, &ip_hdr(skb)->
daddr, mtu);
1427 if (mtu > 68 +
sizeof(
struct iphdr))
1428 mtu -=
sizeof(
struct iphdr);
1434 __skb_pull(skb, offset2);
1435 skb_reset_network_header(skb);
1436 IP_VS_DBG(12,
"Sending ICMP for %pI4->%pI4: t=%u, c=%u, i=%u\n",
1441 ip_vs_out_stats(cp, skb);
1450 ip_vs_in_stats(cp, skb);
1452 offset += 2 *
sizeof(
__u16);
1456 __ip_vs_conn_put(cp);
1461 #ifdef CONFIG_IP_VS_IPV6
1463 ip_vs_in_icmp_v6(
struct sk_buff *skb,
int *related,
unsigned int hooknum)
1465 struct net *net =
NULL;
1474 unsigned int offset, verdict;
1480 if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
1484 iph = ipv6_hdr(skb);
1485 offset =
sizeof(
struct ipv6hdr);
1486 ic = skb_header_pointer(skb, offset,
sizeof(_icmph), &_icmph);
1490 IP_VS_DBG(12,
"Incoming ICMPv6 (%d,%d) %pI6->%pI6\n",
1509 offset +=
sizeof(_icmph);
1510 cih = skb_header_pointer(skb, offset,
sizeof(_ciph), &_ciph);
1526 "Checking incoming ICMPv6 for");
1528 offset +=
sizeof(
struct ipv6hdr);
1530 ip_vs_fill_iphdr(
AF_INET6, cih, &ciph);
1537 ip_vs_in_stats(cp, skb);
1540 offset += 2 *
sizeof(
__u16);
1541 verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum);
1543 __ip_vs_conn_put(cp);
1555 ip_vs_in(
unsigned int hooknum,
struct sk_buff *skb,
int af)
1577 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1579 " ignored in hook %u\n",
1581 IP_VS_DBG_ADDR(af, &iph.
daddr), hooknum);
1586 if (!net_ipvs(net)->
enable)
1589 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1594 struct sock *sk = skb->
sk;
1601 #ifdef CONFIG_IP_VS_IPV6
1605 int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum);
1609 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1615 int verdict = ip_vs_in_icmp(skb, &related, hooknum);
1619 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1642 "ip_vs_in: packet continues traversal as normal");
1647 ipvs = net_ipvs(net);
1652 if (sysctl_expire_nodest_conn(ipvs)) {
1658 __ip_vs_conn_put(cp);
1662 ip_vs_in_stats(cp, skb);
1682 pkts = sysctl_sync_threshold(ipvs);
1698 ip_vs_remote_request4(
unsigned int hooknum,
struct sk_buff *skb,
1701 int (*okfn)(
struct sk_buff *))
1703 return ip_vs_in(hooknum, skb,
AF_INET);
1711 ip_vs_local_request4(
unsigned int hooknum,
struct sk_buff *skb,
1713 int (*okfn)(
struct sk_buff *))
1715 unsigned int verdict;
1719 verdict = ip_vs_in(hooknum, skb,
AF_INET);
1724 #ifdef CONFIG_IP_VS_IPV6
1731 ip_vs_remote_request6(
unsigned int hooknum,
struct sk_buff *skb,
1734 int (*okfn)(
struct sk_buff *))
1736 return ip_vs_in(hooknum, skb,
AF_INET6);
1744 ip_vs_local_request6(
unsigned int hooknum,
struct sk_buff *skb,
1746 int (*okfn)(
struct sk_buff *))
1748 unsigned int verdict;
1752 verdict = ip_vs_in(hooknum, skb,
AF_INET6);
1770 ip_vs_forward_icmp(
unsigned int hooknum,
struct sk_buff *skb,
1772 int (*okfn)(
struct sk_buff *))
1782 if (!net_ipvs(net)->
enable)
1785 return ip_vs_in_icmp(skb, &r, hooknum);
1788 #ifdef CONFIG_IP_VS_IPV6
1790 ip_vs_forward_icmp_v6(
unsigned int hooknum,
struct sk_buff *skb,
1792 int (*okfn)(
struct sk_buff *))
1802 if (!net_ipvs(net)->
enable)
1805 return ip_vs_in_icmp_v6(skb, &r, hooknum);
1813 .hook = ip_vs_reply4,
1823 .hook = ip_vs_remote_request4,
1831 .hook = ip_vs_local_reply4,
1839 .hook = ip_vs_local_request4,
1848 .hook = ip_vs_forward_icmp,
1856 .hook = ip_vs_reply4,
1862 #ifdef CONFIG_IP_VS_IPV6
1865 .hook = ip_vs_reply6,
1875 .hook = ip_vs_remote_request6,
1883 .hook = ip_vs_local_reply6,
1891 .hook = ip_vs_local_request6,
1900 .hook = ip_vs_forward_icmp_v6,
1908 .hook = ip_vs_reply6,
1919 static int __net_init __ip_vs_init(
struct net *net)
1936 goto estimator_fail;
1975 static void __net_exit __ip_vs_cleanup(
struct net *net)
1983 IP_VS_DBG(2,
"ipvs netns %d released\n", net_ipvs(net)->
gen);
1987 static void __net_exit __ip_vs_dev_cleanup(
struct net *net)
1990 net_ipvs(net)->enable = 0;
1997 .init = __ip_vs_init,
1998 .exit = __ip_vs_cleanup,
1999 .id = &ip_vs_net_id,
2004 .exit = __ip_vs_dev_cleanup,
2010 static int __init ip_vs_init(
void)
2016 pr_err(
"can't setup control.\n");
2024 pr_err(
"can't setup connection table.\n");
2025 goto cleanup_protocol;
2038 pr_err(
"can't register hooks.\n");
2044 pr_err(
"can't register netlink/ioctl.\n");
2067 static void __exit ip_vs_cleanup(
void)