Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
ip_vs_proto_tcp.c
Go to the documentation of this file.
1 /*
2  * ip_vs_proto_tcp.c: TCP load balancing support for IPVS
3  *
4  * Authors: Wensong Zhang <[email protected]>
5  * Julian Anastasov <[email protected]>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version
10  * 2 of the License, or (at your option) any later version.
11  *
12  * Changes: Hans Schillstrom <[email protected]>
13  *
14  * Network name space (netns) aware.
15  * Global data moved to netns i.e struct netns_ipvs
16  * tcp_timeouts table has copy per netns in a hash table per
17  * protocol ip_vs_proto_data and is handled by netns
18  */
19 
20 #define KMSG_COMPONENT "IPVS"
21 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
22 
23 #include <linux/kernel.h>
24 #include <linux/ip.h>
25 #include <linux/tcp.h> /* for tcphdr */
26 #include <net/ip.h>
27 #include <net/tcp.h> /* for csum_tcpudp_magic */
28 #include <net/ip6_checksum.h>
29 #include <linux/netfilter.h>
30 #include <linux/netfilter_ipv4.h>
31 
32 #include <net/ip_vs.h>
33 
34 static int
35 tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
36  int *verdict, struct ip_vs_conn **cpp)
37 {
38  struct net *net;
39  struct ip_vs_service *svc;
40  struct tcphdr _tcph, *th;
41  struct ip_vs_iphdr iph;
42 
43  ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
44 
45  th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph);
46  if (th == NULL) {
47  *verdict = NF_DROP;
48  return 0;
49  }
50  net = skb_net(skb);
51  /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
52  if (th->syn &&
53  (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
54  &iph.daddr, th->dest))) {
55  int ignored;
56 
57  if (ip_vs_todrop(net_ipvs(net))) {
58  /*
59  * It seems that we are very loaded.
60  * We have to drop this packet :(
61  */
62  ip_vs_service_put(svc);
63  *verdict = NF_DROP;
64  return 0;
65  }
66 
67  /*
68  * Let the virtual server select a real server for the
69  * incoming connection, and create a connection entry.
70  */
71  *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
72  if (!*cpp && ignored <= 0) {
73  if (!ignored)
74  *verdict = ip_vs_leave(svc, skb, pd);
75  else {
76  ip_vs_service_put(svc);
77  *verdict = NF_DROP;
78  }
79  return 0;
80  }
81  ip_vs_service_put(svc);
82  }
83  /* NF_ACCEPT */
84  return 1;
85 }
86 
87 
88 static inline void
89 tcp_fast_csum_update(int af, struct tcphdr *tcph,
90  const union nf_inet_addr *oldip,
91  const union nf_inet_addr *newip,
92  __be16 oldport, __be16 newport)
93 {
94 #ifdef CONFIG_IP_VS_IPV6
95  if (af == AF_INET6)
96  tcph->check =
97  csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
98  ip_vs_check_diff2(oldport, newport,
99  ~csum_unfold(tcph->check))));
100  else
101 #endif
102  tcph->check =
103  csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
104  ip_vs_check_diff2(oldport, newport,
105  ~csum_unfold(tcph->check))));
106 }
107 
108 
109 static inline void
110 tcp_partial_csum_update(int af, struct tcphdr *tcph,
111  const union nf_inet_addr *oldip,
112  const union nf_inet_addr *newip,
113  __be16 oldlen, __be16 newlen)
114 {
115 #ifdef CONFIG_IP_VS_IPV6
116  if (af == AF_INET6)
117  tcph->check =
118  ~csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
119  ip_vs_check_diff2(oldlen, newlen,
120  csum_unfold(tcph->check))));
121  else
122 #endif
123  tcph->check =
124  ~csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
125  ip_vs_check_diff2(oldlen, newlen,
126  csum_unfold(tcph->check))));
127 }
128 
129 
130 static int
131 tcp_snat_handler(struct sk_buff *skb,
132  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
133 {
134  struct tcphdr *tcph;
135  unsigned int tcphoff;
136  int oldlen;
137  int payload_csum = 0;
138 
139 #ifdef CONFIG_IP_VS_IPV6
140  if (cp->af == AF_INET6)
141  tcphoff = sizeof(struct ipv6hdr);
142  else
143 #endif
144  tcphoff = ip_hdrlen(skb);
145  oldlen = skb->len - tcphoff;
146 
147  /* csum_check requires unshared skb */
148  if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
149  return 0;
150 
151  if (unlikely(cp->app != NULL)) {
152  int ret;
153 
154  /* Some checks before mangling */
155  if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
156  return 0;
157 
158  /* Call application helper if needed */
159  if (!(ret = ip_vs_app_pkt_out(cp, skb)))
160  return 0;
161  /* ret=2: csum update is needed after payload mangling */
162  if (ret == 1)
163  oldlen = skb->len - tcphoff;
164  else
165  payload_csum = 1;
166  }
167 
168  tcph = (void *)skb_network_header(skb) + tcphoff;
169  tcph->source = cp->vport;
170 
171  /* Adjust TCP checksums */
172  if (skb->ip_summed == CHECKSUM_PARTIAL) {
173  tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
174  htons(oldlen),
175  htons(skb->len - tcphoff));
176  } else if (!payload_csum) {
177  /* Only port and addr are changed, do fast csum update */
178  tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
179  cp->dport, cp->vport);
180  if (skb->ip_summed == CHECKSUM_COMPLETE)
181  skb->ip_summed = (cp->app && pp->csum_check) ?
183  } else {
184  /* full checksum calculation */
185  tcph->check = 0;
186  skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
187 #ifdef CONFIG_IP_VS_IPV6
188  if (cp->af == AF_INET6)
189  tcph->check = csum_ipv6_magic(&cp->vaddr.in6,
190  &cp->caddr.in6,
191  skb->len - tcphoff,
192  cp->protocol, skb->csum);
193  else
194 #endif
195  tcph->check = csum_tcpudp_magic(cp->vaddr.ip,
196  cp->caddr.ip,
197  skb->len - tcphoff,
198  cp->protocol,
199  skb->csum);
201 
202  IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
203  pp->name, tcph->check,
204  (char*)&(tcph->check) - (char*)tcph);
205  }
206  return 1;
207 }
208 
209 
210 static int
211 tcp_dnat_handler(struct sk_buff *skb,
212  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
213 {
214  struct tcphdr *tcph;
215  unsigned int tcphoff;
216  int oldlen;
217  int payload_csum = 0;
218 
219 #ifdef CONFIG_IP_VS_IPV6
220  if (cp->af == AF_INET6)
221  tcphoff = sizeof(struct ipv6hdr);
222  else
223 #endif
224  tcphoff = ip_hdrlen(skb);
225  oldlen = skb->len - tcphoff;
226 
227  /* csum_check requires unshared skb */
228  if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
229  return 0;
230 
231  if (unlikely(cp->app != NULL)) {
232  int ret;
233 
234  /* Some checks before mangling */
235  if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
236  return 0;
237 
238  /*
239  * Attempt ip_vs_app call.
240  * It will fix ip_vs_conn and iph ack_seq stuff
241  */
242  if (!(ret = ip_vs_app_pkt_in(cp, skb)))
243  return 0;
244  /* ret=2: csum update is needed after payload mangling */
245  if (ret == 1)
246  oldlen = skb->len - tcphoff;
247  else
248  payload_csum = 1;
249  }
250 
251  tcph = (void *)skb_network_header(skb) + tcphoff;
252  tcph->dest = cp->dport;
253 
254  /*
255  * Adjust TCP checksums
256  */
257  if (skb->ip_summed == CHECKSUM_PARTIAL) {
258  tcp_partial_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
259  htons(oldlen),
260  htons(skb->len - tcphoff));
261  } else if (!payload_csum) {
262  /* Only port and addr are changed, do fast csum update */
263  tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
264  cp->vport, cp->dport);
265  if (skb->ip_summed == CHECKSUM_COMPLETE)
266  skb->ip_summed = (cp->app && pp->csum_check) ?
268  } else {
269  /* full checksum calculation */
270  tcph->check = 0;
271  skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
272 #ifdef CONFIG_IP_VS_IPV6
273  if (cp->af == AF_INET6)
274  tcph->check = csum_ipv6_magic(&cp->caddr.in6,
275  &cp->daddr.in6,
276  skb->len - tcphoff,
277  cp->protocol, skb->csum);
278  else
279 #endif
280  tcph->check = csum_tcpudp_magic(cp->caddr.ip,
281  cp->daddr.ip,
282  skb->len - tcphoff,
283  cp->protocol,
284  skb->csum);
286  }
287  return 1;
288 }
289 
290 
291 static int
292 tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
293 {
294  unsigned int tcphoff;
295 
296 #ifdef CONFIG_IP_VS_IPV6
297  if (af == AF_INET6)
298  tcphoff = sizeof(struct ipv6hdr);
299  else
300 #endif
301  tcphoff = ip_hdrlen(skb);
302 
303  switch (skb->ip_summed) {
304  case CHECKSUM_NONE:
305  skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
306  case CHECKSUM_COMPLETE:
307 #ifdef CONFIG_IP_VS_IPV6
308  if (af == AF_INET6) {
309  if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
310  &ipv6_hdr(skb)->daddr,
311  skb->len - tcphoff,
312  ipv6_hdr(skb)->nexthdr,
313  skb->csum)) {
314  IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
315  "Failed checksum for");
316  return 0;
317  }
318  } else
319 #endif
320  if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
321  ip_hdr(skb)->daddr,
322  skb->len - tcphoff,
323  ip_hdr(skb)->protocol,
324  skb->csum)) {
325  IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
326  "Failed checksum for");
327  return 0;
328  }
329  break;
330  default:
331  /* No need to checksum. */
332  break;
333  }
334 
335  return 1;
336 }
337 
338 
339 #define TCP_DIR_INPUT 0
340 #define TCP_DIR_OUTPUT 4
341 #define TCP_DIR_INPUT_ONLY 8
342 
343 static const int tcp_state_off[IP_VS_DIR_LAST] = {
347 };
348 
349 /*
350  * Timeout table[state]
351  */
352 static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
353  [IP_VS_TCP_S_NONE] = 2*HZ,
354  [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ,
355  [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ,
356  [IP_VS_TCP_S_SYN_RECV] = 1*60*HZ,
357  [IP_VS_TCP_S_FIN_WAIT] = 2*60*HZ,
358  [IP_VS_TCP_S_TIME_WAIT] = 2*60*HZ,
359  [IP_VS_TCP_S_CLOSE] = 10*HZ,
360  [IP_VS_TCP_S_CLOSE_WAIT] = 60*HZ,
361  [IP_VS_TCP_S_LAST_ACK] = 30*HZ,
362  [IP_VS_TCP_S_LISTEN] = 2*60*HZ,
363  [IP_VS_TCP_S_SYNACK] = 120*HZ,
364  [IP_VS_TCP_S_LAST] = 2*HZ,
365 };
366 
367 static const char *const tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
368  [IP_VS_TCP_S_NONE] = "NONE",
369  [IP_VS_TCP_S_ESTABLISHED] = "ESTABLISHED",
370  [IP_VS_TCP_S_SYN_SENT] = "SYN_SENT",
371  [IP_VS_TCP_S_SYN_RECV] = "SYN_RECV",
372  [IP_VS_TCP_S_FIN_WAIT] = "FIN_WAIT",
373  [IP_VS_TCP_S_TIME_WAIT] = "TIME_WAIT",
374  [IP_VS_TCP_S_CLOSE] = "CLOSE",
375  [IP_VS_TCP_S_CLOSE_WAIT] = "CLOSE_WAIT",
376  [IP_VS_TCP_S_LAST_ACK] = "LAST_ACK",
377  [IP_VS_TCP_S_LISTEN] = "LISTEN",
378  [IP_VS_TCP_S_SYNACK] = "SYNACK",
379  [IP_VS_TCP_S_LAST] = "BUG!",
380 };
381 
382 #define sNO IP_VS_TCP_S_NONE
383 #define sES IP_VS_TCP_S_ESTABLISHED
384 #define sSS IP_VS_TCP_S_SYN_SENT
385 #define sSR IP_VS_TCP_S_SYN_RECV
386 #define sFW IP_VS_TCP_S_FIN_WAIT
387 #define sTW IP_VS_TCP_S_TIME_WAIT
388 #define sCL IP_VS_TCP_S_CLOSE
389 #define sCW IP_VS_TCP_S_CLOSE_WAIT
390 #define sLA IP_VS_TCP_S_LAST_ACK
391 #define sLI IP_VS_TCP_S_LISTEN
392 #define sSA IP_VS_TCP_S_SYNACK
393 
394 struct tcp_states_t {
396 };
397 
398 static const char * tcp_state_name(int state)
399 {
400  if (state >= IP_VS_TCP_S_LAST)
401  return "ERR!";
402  return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
403 }
404 
405 static struct tcp_states_t tcp_states [] = {
406 /* INPUT */
407 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
408 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
409 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
410 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
411 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
412 
413 /* OUTPUT */
414 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
415 /*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},
416 /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
417 /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
418 /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
419 
420 /* INPUT-ONLY */
421 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
422 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
423 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
424 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
425 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
426 };
427 
428 static struct tcp_states_t tcp_states_dos [] = {
429 /* INPUT */
430 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
431 /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
432 /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
433 /*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
434 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
435 
436 /* OUTPUT */
437 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
438 /*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},
439 /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
440 /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
441 /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
442 
443 /* INPUT-ONLY */
444 /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
445 /*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
446 /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
447 /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
448 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
449 };
450 
451 static void tcp_timeout_change(struct ip_vs_proto_data *pd, int flags)
452 {
453  int on = (flags & 1); /* secure_tcp */
454 
455  /*
456  ** FIXME: change secure_tcp to independent sysctl var
457  ** or make it per-service or per-app because it is valid
458  ** for most if not for all of the applications. Something
459  ** like "capabilities" (flags) for each object.
460  */
461  pd->tcp_state_table = (on ? tcp_states_dos : tcp_states);
462 }
463 
464 static inline int tcp_state_idx(struct tcphdr *th)
465 {
466  if (th->rst)
467  return 3;
468  if (th->syn)
469  return 0;
470  if (th->fin)
471  return 1;
472  if (th->ack)
473  return 2;
474  return -1;
475 }
476 
477 static inline void
478 set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
479  int direction, struct tcphdr *th)
480 {
481  int state_idx;
483  int state_off = tcp_state_off[direction];
484 
485  /*
486  * Update state offset to INPUT_ONLY if necessary
487  * or delete NO_OUTPUT flag if output packet detected
488  */
489  if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
490  if (state_off == TCP_DIR_OUTPUT)
492  else
493  state_off = TCP_DIR_INPUT_ONLY;
494  }
495 
496  if ((state_idx = tcp_state_idx(th)) < 0) {
497  IP_VS_DBG(8, "tcp_state_idx=%d!!!\n", state_idx);
498  goto tcp_state_out;
499  }
500 
501  new_state =
502  pd->tcp_state_table[state_off+state_idx].next_state[cp->state];
503 
504  tcp_state_out:
505  if (new_state != cp->state) {
506  struct ip_vs_dest *dest = cp->dest;
507 
508  IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
509  "%s:%d state: %s->%s conn->refcnt:%d\n",
510  pd->pp->name,
511  ((state_off == TCP_DIR_OUTPUT) ?
512  "output " : "input "),
513  th->syn ? 'S' : '.',
514  th->fin ? 'F' : '.',
515  th->ack ? 'A' : '.',
516  th->rst ? 'R' : '.',
517  IP_VS_DBG_ADDR(cp->af, &cp->daddr),
518  ntohs(cp->dport),
519  IP_VS_DBG_ADDR(cp->af, &cp->caddr),
520  ntohs(cp->cport),
521  tcp_state_name(cp->state),
522  tcp_state_name(new_state),
523  atomic_read(&cp->refcnt));
524 
525  if (dest) {
526  if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
527  (new_state != IP_VS_TCP_S_ESTABLISHED)) {
528  atomic_dec(&dest->activeconns);
529  atomic_inc(&dest->inactconns);
531  } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
532  (new_state == IP_VS_TCP_S_ESTABLISHED)) {
533  atomic_inc(&dest->activeconns);
534  atomic_dec(&dest->inactconns);
536  }
537  }
538  }
539 
540  if (likely(pd))
541  cp->timeout = pd->timeout_table[cp->state = new_state];
542  else /* What to do ? */
543  cp->timeout = tcp_timeouts[cp->state = new_state];
544 }
545 
546 /*
547  * Handle state transitions
548  */
549 static void
550 tcp_state_transition(struct ip_vs_conn *cp, int direction,
551  const struct sk_buff *skb,
552  struct ip_vs_proto_data *pd)
553 {
554  struct tcphdr _tcph, *th;
555 
556 #ifdef CONFIG_IP_VS_IPV6
557  int ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
558 #else
559  int ihl = ip_hdrlen(skb);
560 #endif
561 
562  th = skb_header_pointer(skb, ihl, sizeof(_tcph), &_tcph);
563  if (th == NULL)
564  return;
565 
566  spin_lock(&cp->lock);
567  set_tcp_state(pd, cp, direction, th);
568  spin_unlock(&cp->lock);
569 }
570 
571 static inline __u16 tcp_app_hashkey(__be16 port)
572 {
573  return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
574  & TCP_APP_TAB_MASK;
575 }
576 
577 
578 static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
579 {
580  struct ip_vs_app *i;
581  __u16 hash;
582  __be16 port = inc->port;
583  int ret = 0;
584  struct netns_ipvs *ipvs = net_ipvs(net);
586 
587  hash = tcp_app_hashkey(port);
588 
589  spin_lock_bh(&ipvs->tcp_app_lock);
590  list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {
591  if (i->port == port) {
592  ret = -EEXIST;
593  goto out;
594  }
595  }
596  list_add(&inc->p_list, &ipvs->tcp_apps[hash]);
597  atomic_inc(&pd->appcnt);
598 
599  out:
600  spin_unlock_bh(&ipvs->tcp_app_lock);
601  return ret;
602 }
603 
604 
605 static void
606 tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
607 {
608  struct netns_ipvs *ipvs = net_ipvs(net);
610 
611  spin_lock_bh(&ipvs->tcp_app_lock);
612  atomic_dec(&pd->appcnt);
613  list_del(&inc->p_list);
614  spin_unlock_bh(&ipvs->tcp_app_lock);
615 }
616 
617 
618 static int
619 tcp_app_conn_bind(struct ip_vs_conn *cp)
620 {
621  struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
622  int hash;
623  struct ip_vs_app *inc;
624  int result = 0;
625 
626  /* Default binding: bind app only for NAT */
628  return 0;
629 
630  /* Lookup application incarnations and bind the right one */
631  hash = tcp_app_hashkey(cp->vport);
632 
633  spin_lock(&ipvs->tcp_app_lock);
634  list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) {
635  if (inc->port == cp->vport) {
636  if (unlikely(!ip_vs_app_inc_get(inc)))
637  break;
638  spin_unlock(&ipvs->tcp_app_lock);
639 
640  IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
641  "%s:%u to app %s on port %u\n",
642  __func__,
643  IP_VS_DBG_ADDR(cp->af, &cp->caddr),
644  ntohs(cp->cport),
645  IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
646  ntohs(cp->vport),
647  inc->name, ntohs(inc->port));
648 
649  cp->app = inc;
650  if (inc->init_conn)
651  result = inc->init_conn(inc, cp);
652  goto out;
653  }
654  }
655  spin_unlock(&ipvs->tcp_app_lock);
656 
657  out:
658  return result;
659 }
660 
661 
662 /*
663  * Set LISTEN timeout. (ip_vs_conn_put will setup timer)
664  */
665 void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
666 {
668 
669  spin_lock(&cp->lock);
671  cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
672  : tcp_timeouts[IP_VS_TCP_S_LISTEN]);
673  spin_unlock(&cp->lock);
674 }
675 
676 /* ---------------------------------------------
677  * timeouts is netns related now.
678  * ---------------------------------------------
679  */
680 static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
681 {
682  struct netns_ipvs *ipvs = net_ipvs(net);
683 
684  ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
685  spin_lock_init(&ipvs->tcp_app_lock);
686  pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
687  sizeof(tcp_timeouts));
688  if (!pd->timeout_table)
689  return -ENOMEM;
690  pd->tcp_state_table = tcp_states;
691  return 0;
692 }
693 
694 static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd)
695 {
696  kfree(pd->timeout_table);
697 }
698 
699 
701  .name = "TCP",
702  .protocol = IPPROTO_TCP,
703  .num_states = IP_VS_TCP_S_LAST,
704  .dont_defrag = 0,
705  .init = NULL,
706  .exit = NULL,
707  .init_netns = __ip_vs_tcp_init,
708  .exit_netns = __ip_vs_tcp_exit,
709  .register_app = tcp_register_app,
710  .unregister_app = tcp_unregister_app,
711  .conn_schedule = tcp_conn_schedule,
712  .conn_in_get = ip_vs_conn_in_get_proto,
713  .conn_out_get = ip_vs_conn_out_get_proto,
714  .snat_handler = tcp_snat_handler,
715  .dnat_handler = tcp_dnat_handler,
716  .csum_check = tcp_csum_check,
717  .state_name = tcp_state_name,
718  .state_transition = tcp_state_transition,
719  .app_conn_bind = tcp_app_conn_bind,
720  .debug_packet = ip_vs_tcpudp_debug_packet,
721  .timeout_change = tcp_timeout_change,
722 };