25 #define KMSG_COMPONENT "IPVS"
26 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
30 #include <linux/net.h>
31 #include <linux/kernel.h>
32 #include <linux/module.h>
35 #include <linux/slab.h>
38 #include <linux/random.h>
44 #ifndef CONFIG_IP_VS_TAB_BITS
45 #define CONFIG_IP_VS_TAB_BITS 12
76 #define CT_LOCKARRAY_BITS 5
77 #define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS)
78 #define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1)
89 static inline void ct_read_lock(
unsigned int key)
94 static inline void ct_read_unlock(
unsigned int key)
99 static inline void ct_write_lock(
unsigned int key)
104 static inline void ct_write_unlock(
unsigned int key)
109 static inline void ct_read_lock_bh(
unsigned int key)
114 static inline void ct_read_unlock_bh(
unsigned int key)
119 static inline void ct_write_lock_bh(
unsigned int key)
124 static inline void ct_write_unlock_bh(
unsigned int key)
133 static unsigned int ip_vs_conn_hashkey(
struct net *
net,
int af,
unsigned int proto,
137 #ifdef CONFIG_IP_VS_IPV6
139 return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
141 ((
size_t)net>>8)) & ip_vs_conn_tab_mask;
145 ((
size_t)net>>8)) & ip_vs_conn_tab_mask;
155 return p->
pe->hashkey_raw(p, ip_vs_conn_rnd, inverse) &
166 return ip_vs_conn_hashkey(p->
net, p->
af, p->
protocol, addr, port);
169 static unsigned int ip_vs_conn_hashkey_conn(
const struct ip_vs_conn *
cp)
173 ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->
af, cp->
protocol,
182 return ip_vs_conn_hashkey_param(&p,
false);
189 static inline int ip_vs_conn_hash(
struct ip_vs_conn *cp)
198 hash = ip_vs_conn_hashkey_conn(cp);
201 spin_lock(&cp->
lock);
204 hlist_add_head(&cp->
c_list, &ip_vs_conn_tab[hash]);
209 pr_err(
"%s(): request for already hashed, called from %pF\n",
210 __func__, __builtin_return_address(0));
214 spin_unlock(&cp->
lock);
215 ct_write_unlock(hash);
225 static inline int ip_vs_conn_unhash(
struct ip_vs_conn *cp)
231 hash = ip_vs_conn_hashkey_conn(cp);
234 spin_lock(&cp->
lock);
244 spin_unlock(&cp->
lock);
245 ct_write_unlock(hash);
264 hash = ip_vs_conn_hashkey_param(p,
false);
269 if (cp->
af == p->
af &&
275 ip_vs_conn_net_eq(cp, p->
net)) {
278 ct_read_unlock(hash);
283 ct_read_unlock(hash);
292 cp = __ip_vs_conn_in_get(p);
293 if (!cp &&
atomic_read(&ip_vs_conn_no_cport_cnt)) {
295 cport_zero_p.
cport = 0;
296 cp = __ip_vs_conn_in_get(&cport_zero_p);
303 cp ?
"hit" :
"not hit");
309 ip_vs_conn_fill_param_proto(
int af,
const struct sk_buff *
skb,
311 unsigned int proto_off,
int inverse,
315 struct net *net = skb_net(skb);
317 pptr = skb_header_pointer(skb, proto_off,
sizeof(_ports), _ports);
323 pptr[0], &iph->
daddr, pptr[1], p);
326 pptr[1], &iph->
saddr, pptr[0], p);
333 unsigned int proto_off,
int inverse)
337 if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p))
351 hash = ip_vs_conn_hashkey_param(p,
false);
356 if (!ip_vs_conn_net_eq(cp, p->
net))
359 if (p->
pe == cp->
pe && p->
pe->ct_match(p, cp))
364 if (cp->
af == p->
af &&
380 ct_read_unlock(hash);
386 cp ?
"hit" :
"not hit");
404 hash = ip_vs_conn_hashkey_param(p,
true);
409 if (cp->
af == p->
af &&
414 ip_vs_conn_net_eq(cp, p->
net)) {
422 ct_read_unlock(hash);
428 ret ?
"hit" :
"not hit");
436 unsigned int proto_off,
int inverse)
440 if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p))
456 __ip_vs_conn_put(cp);
465 if (ip_vs_conn_unhash(cp)) {
466 spin_lock(&cp->
lock);
472 spin_unlock(&cp->
lock);
484 static inline void ip_vs_bind_xmit(
struct ip_vs_conn *cp)
509 #ifdef CONFIG_IP_VS_IPV6
510 static inline void ip_vs_bind_xmit_v6(
struct ip_vs_conn *cp)
550 unsigned int conn_flags;
579 "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
585 ip_vs_fwd_tag(cp), cp->
state,
625 spin_lock(&cp->
lock);
627 spin_unlock(&cp->
lock);
636 ip_vs_bind_dest(cp, dest);
637 spin_unlock(&cp->
lock);
641 #ifdef CONFIG_IP_VS_IPV6
643 ip_vs_bind_xmit_v6(cp);
660 static inline void ip_vs_unbind_dest(
struct ip_vs_conn *cp)
668 "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d "
674 ip_vs_fwd_tag(cp), cp->
state,
679 if (!(cp->
flags & IP_VS_CONN_F_TEMPLATE)) {
694 if (ip_vs_dest_totalconns(dest) < dest->
l_threshold)
697 if (ip_vs_dest_totalconns(dest) * 4 < dest->
u_threshold * 3)
712 static int expire_quiescent_template(
struct netns_ipvs *ipvs,
731 struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct));
736 if ((dest ==
NULL) ||
738 expire_quiescent_template(ipvs, dest)) {
740 "protocol %s s:%s:%d v:%s:%d "
743 IP_VS_DBG_ADDR(ct->
af, &ct->
caddr),
745 IP_VS_DBG_ADDR(ct->
af, &ct->
vaddr),
747 IP_VS_DBG_ADDR(ct->
af, &ct->
daddr),
754 if (ip_vs_conn_unhash(ct)) {
772 static void ip_vs_conn_expire(
unsigned long data)
775 struct net *net = ip_vs_conn_net(cp);
802 if (timer_pending(&cp->
timer))
807 ip_vs_control_del(cp);
824 ip_vs_unbind_dest(cp);
837 IP_VS_DBG(7,
"delayed: conn->refcnt-1=%d conn->n_control=%d\n",
868 cp = kmem_cache_zalloc(ip_vs_conn_cachep,
GFP_ATOMIC);
874 INIT_HLIST_NODE(&cp->
c_list);
876 ip_vs_conn_net_set(cp, p->
net);
879 ip_vs_addr_copy(p->
af, &cp->caddr, p->
caddr);
880 cp->cport = p->
cport;
881 ip_vs_addr_copy(p->
af, &cp->vaddr, p->
vaddr);
882 cp->vport = p->
vport;
889 if (flags & IP_VS_CONN_F_TEMPLATE && p->
pe) {
912 ip_vs_bind_dest(cp, dest);
920 #ifdef CONFIG_IP_VS_IPV6
922 ip_vs_bind_xmit_v6(cp);
937 if (ip_vs_conntrack_enabled(ipvs))
949 #ifdef CONFIG_PROC_FS
950 struct ip_vs_iter_state {
955 static void *ip_vs_conn_array(
struct seq_file *seq, loff_t
pos)
959 struct ip_vs_iter_state *iter = seq->
private;
963 ct_read_lock_bh(idx);
966 iter->l = &ip_vs_conn_tab[
idx];
970 ct_read_unlock_bh(idx);
976 static void *ip_vs_conn_seq_start(
struct seq_file *seq, loff_t *pos)
978 struct ip_vs_iter_state *iter = seq->
private;
984 static void *ip_vs_conn_seq_next(
struct seq_file *seq,
void *
v, loff_t *pos)
987 struct ip_vs_iter_state *iter = seq->
private;
994 return ip_vs_conn_array(seq, 0);
997 if ((e = cp->
c_list.next))
1000 idx = l - ip_vs_conn_tab;
1001 ct_read_unlock_bh(idx);
1003 while (++idx < ip_vs_conn_tab_size) {
1004 ct_read_lock_bh(idx);
1006 iter->l = &ip_vs_conn_tab[
idx];
1009 ct_read_unlock_bh(idx);
1015 static void ip_vs_conn_seq_stop(
struct seq_file *seq,
void *v)
1017 struct ip_vs_iter_state *iter = seq->
private;
1021 ct_read_unlock_bh(l - ip_vs_conn_tab);
1024 static int ip_vs_conn_seq_show(
struct seq_file *seq,
void *v)
1029 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n");
1032 struct net *net = seq_file_net(seq);
1036 if (!ip_vs_conn_net_eq(cp, net))
1041 memcpy(pe_data + 1, cp->
pe->name, len);
1042 pe_data[len + 1] =
' ';
1044 len += cp->
pe->show_pe_data(cp, pe_data + len);
1046 pe_data[len] =
'\0';
1048 #ifdef CONFIG_IP_VS_IPV6
1051 "%pI6 %04X %-11s %7lu%s\n",
1057 (cp->
timer.expires-jiffies)/
HZ, pe_data);
1061 "%-3s %08X %04X %08X %04X"
1062 " %08X %04X %-11s %7lu%s\n",
1068 (cp->
timer.expires-jiffies)/
HZ, pe_data);
1074 .
start = ip_vs_conn_seq_start,
1075 .next = ip_vs_conn_seq_next,
1076 .stop = ip_vs_conn_seq_stop,
1077 .show = ip_vs_conn_seq_show,
1083 sizeof(
struct ip_vs_iter_state));
1088 .open = ip_vs_conn_open,
1094 static const char *ip_vs_origin_name(
unsigned int flags)
1096 if (flags & IP_VS_CONN_F_SYNC)
1102 static int ip_vs_conn_sync_seq_show(
struct seq_file *seq,
void *v)
1107 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n");
1110 struct net *net = seq_file_net(seq);
1112 if (!ip_vs_conn_net_eq(cp, net))
1115 #ifdef CONFIG_IP_VS_IPV6
1117 seq_printf(seq,
"%-3s %pI6 %04X %pI6 %04X %pI6 %04X %-11s %-6s %7lu\n",
1123 ip_vs_origin_name(cp->
flags),
1124 (cp->
timer.expires-jiffies)/
HZ);
1128 "%-3s %08X %04X %08X %04X "
1129 "%08X %04X %-11s %-6s %7lu\n",
1135 ip_vs_origin_name(cp->
flags),
1136 (cp->
timer.expires-jiffies)/
HZ);
1142 .
start = ip_vs_conn_seq_start,
1143 .next = ip_vs_conn_seq_next,
1144 .stop = ip_vs_conn_seq_stop,
1145 .show = ip_vs_conn_sync_seq_show,
1148 static int ip_vs_conn_sync_open(
struct inode *inode,
struct file *file)
1150 return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops,
1151 sizeof(
struct ip_vs_iter_state));
1156 .open = ip_vs_conn_sync_open,
1168 static inline int todrop_entry(
struct ip_vs_conn *cp)
1174 static const char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
1175 static char todrop_counter[9] = {0};
1187 if (i > 8 || i < 0)
return 0;
1189 if (!todrop_rate[i])
return 0;
1190 if (--todrop_counter[i] > 0)
return 0;
1192 todrop_counter[
i] = todrop_rate[
i];
1205 for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) {
1206 unsigned int hash =
net_random() & ip_vs_conn_tab_mask;
1212 ct_write_lock_bh(hash);
1215 if (cp->
flags & IP_VS_CONN_F_TEMPLATE)
1218 if (!ip_vs_conn_net_eq(cp, net))
1227 if (todrop_entry(cp))
1235 if (!todrop_entry(cp))
1246 ct_write_unlock_bh(hash);
1254 static void ip_vs_conn_flush(
struct net *net)
1261 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
1267 ct_write_lock_bh(idx);
1270 if (!ip_vs_conn_net_eq(cp, net))
1279 ct_write_unlock_bh(idx);
1306 ip_vs_conn_flush(net);
1316 ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
1317 ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1;
1322 ip_vs_conn_tab =
vmalloc(ip_vs_conn_tab_size *
sizeof(*ip_vs_conn_tab));
1323 if (!ip_vs_conn_tab)
1330 if (!ip_vs_conn_cachep) {
1331 vfree(ip_vs_conn_tab);
1335 pr_info(
"Connection hash table configured "
1336 "(size=%d, memory=%ldKbytes)\n",
1337 ip_vs_conn_tab_size,
1338 (
long)(ip_vs_conn_tab_size*
sizeof(
struct list_head))/1024);
1339 IP_VS_DBG(0,
"Each connection entry needs %Zd bytes at least\n",
1342 for (idx = 0; idx < ip_vs_conn_tab_size; idx++)
1359 vfree(ip_vs_conn_tab);