Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
ip_vs_xmit.c
Go to the documentation of this file.
1 /*
2  * ip_vs_xmit.c: various packet transmitters for IPVS
3  *
4  * Authors: Wensong Zhang <[email protected]>
5  * Julian Anastasov <[email protected]>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version
10  * 2 of the License, or (at your option) any later version.
11  *
12  * Changes:
13  *
14  * Description of forwarding methods:
15  * - all transmitters are called from LOCAL_IN (remote clients) and
16  * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD
17  * - not all connections have destination server, for example,
18  * connections in backup server when fwmark is used
19  * - bypass connections use daddr from packet
20  * LOCAL_OUT rules:
21  * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING)
22  * - skb->pkt_type is not set yet
23  * - the only place where we can see skb->sk != NULL
24  */
25 
26 #define KMSG_COMPONENT "IPVS"
27 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
28 
29 #include <linux/kernel.h>
30 #include <linux/slab.h>
31 #include <linux/tcp.h> /* for tcphdr */
32 #include <net/ip.h>
33 #include <net/tcp.h> /* for csum_tcpudp_magic */
34 #include <net/udp.h>
35 #include <net/icmp.h> /* for icmp_send */
36 #include <net/route.h> /* for ip_route_output */
37 #include <net/ipv6.h>
38 #include <net/ip6_route.h>
39 #include <net/addrconf.h>
40 #include <linux/icmpv6.h>
41 #include <linux/netfilter.h>
42 #include <linux/netfilter_ipv4.h>
43 
44 #include <net/ip_vs.h>
45 
46 enum {
47  IP_VS_RT_MODE_LOCAL = 1, /* Allow local dest */
48  IP_VS_RT_MODE_NON_LOCAL = 2, /* Allow non-local dest */
49  IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to
50  * local
51  */
52  IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */
53  IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */
54 };
55 
56 /*
57  * Destination cache to speed up outgoing route lookup
58  */
59 static inline void
60 __ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst,
61  u32 dst_cookie)
62 {
63  struct dst_entry *old_dst;
64 
65  old_dst = dest->dst_cache;
66  dest->dst_cache = dst;
67  dest->dst_rtos = rtos;
68  dest->dst_cookie = dst_cookie;
69  dst_release(old_dst);
70 }
71 
72 static inline struct dst_entry *
73 __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
74 {
75  struct dst_entry *dst = dest->dst_cache;
76 
77  if (!dst)
78  return NULL;
79  if ((dst->obsolete || rtos != dest->dst_rtos) &&
80  dst->ops->check(dst, dest->dst_cookie) == NULL) {
81  dest->dst_cache = NULL;
82  dst_release(dst);
83  return NULL;
84  }
85  dst_hold(dst);
86  return dst;
87 }
88 
89 static inline bool
90 __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
91 {
92  if (IP6CB(skb)->frag_max_size) {
93  /* frag_max_size tell us that, this packet have been
94  * defragmented by netfilter IPv6 conntrack module.
95  */
96  if (IP6CB(skb)->frag_max_size > mtu)
97  return true; /* largest fragment violate MTU */
98  }
99  else if (skb->len > mtu && !skb_is_gso(skb)) {
100  return true; /* Packet size violate MTU size */
101  }
102  return false;
103 }
104 
105 /* Get route to daddr, update *saddr, optionally bind route to saddr */
106 static struct rtable *do_output_route4(struct net *net, __be32 daddr,
107  u32 rtos, int rt_mode, __be32 *saddr)
108 {
109  struct flowi4 fl4;
110  struct rtable *rt;
111  int loop = 0;
112 
113  memset(&fl4, 0, sizeof(fl4));
114  fl4.daddr = daddr;
115  fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
116  fl4.flowi4_tos = rtos;
117  fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?
119 
120 retry:
121  rt = ip_route_output_key(net, &fl4);
122  if (IS_ERR(rt)) {
123  /* Invalid saddr ? */
124  if (PTR_ERR(rt) == -EINVAL && *saddr &&
125  rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
126  *saddr = 0;
127  flowi4_update_output(&fl4, 0, rtos, daddr, 0);
128  goto retry;
129  }
130  IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
131  return NULL;
132  } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
133  ip_rt_put(rt);
134  *saddr = fl4.saddr;
135  flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr);
136  loop++;
137  goto retry;
138  }
139  *saddr = fl4.saddr;
140  return rt;
141 }
142 
143 /* Get route to destination or remote server */
144 static struct rtable *
145 __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
146  __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr)
147 {
148  struct net *net = dev_net(skb_dst(skb)->dev);
149  struct rtable *rt; /* Route to the other host */
150  struct rtable *ort; /* Original route */
151  int local;
152 
153  if (dest) {
154  spin_lock(&dest->dst_lock);
155  if (!(rt = (struct rtable *)
156  __ip_vs_dst_check(dest, rtos))) {
157  rt = do_output_route4(net, dest->addr.ip, rtos,
158  rt_mode, &dest->dst_saddr.ip);
159  if (!rt) {
160  spin_unlock(&dest->dst_lock);
161  return NULL;
162  }
163  __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
164  IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, "
165  "rtos=%X\n",
166  &dest->addr.ip, &dest->dst_saddr.ip,
167  atomic_read(&rt->dst.__refcnt), rtos);
168  }
169  daddr = dest->addr.ip;
170  if (ret_saddr)
171  *ret_saddr = dest->dst_saddr.ip;
172  spin_unlock(&dest->dst_lock);
173  } else {
174  __be32 saddr = htonl(INADDR_ANY);
175 
176  /* For such unconfigured boxes avoid many route lookups
177  * for performance reasons because we do not remember saddr
178  */
179  rt_mode &= ~IP_VS_RT_MODE_CONNECT;
180  rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr);
181  if (!rt)
182  return NULL;
183  if (ret_saddr)
184  *ret_saddr = saddr;
185  }
186 
187  local = rt->rt_flags & RTCF_LOCAL;
188  if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
189  rt_mode)) {
190  IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
191  (rt->rt_flags & RTCF_LOCAL) ?
192  "local":"non-local", &daddr);
193  ip_rt_put(rt);
194  return NULL;
195  }
196  if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
197  !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) {
198  IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
199  "requires NAT method, dest: %pI4\n",
200  &ip_hdr(skb)->daddr, &daddr);
201  ip_rt_put(rt);
202  return NULL;
203  }
204  if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
205  IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
206  "to non-local address, dest: %pI4\n",
207  &ip_hdr(skb)->saddr, &daddr);
208  ip_rt_put(rt);
209  return NULL;
210  }
211 
212  return rt;
213 }
214 
215 /* Reroute packet to local IPv4 stack after DNAT */
216 static int
217 __ip_vs_reroute_locally(struct sk_buff *skb)
218 {
219  struct rtable *rt = skb_rtable(skb);
220  struct net_device *dev = rt->dst.dev;
221  struct net *net = dev_net(dev);
222  struct iphdr *iph = ip_hdr(skb);
223 
224  if (rt_is_input_route(rt)) {
225  unsigned long orefdst = skb->_skb_refdst;
226 
227  if (ip_route_input(skb, iph->daddr, iph->saddr,
228  iph->tos, skb->dev))
229  return 0;
230  refdst_drop(orefdst);
231  } else {
232  struct flowi4 fl4 = {
233  .daddr = iph->daddr,
234  .saddr = iph->saddr,
235  .flowi4_tos = RT_TOS(iph->tos),
236  .flowi4_mark = skb->mark,
237  };
238 
239  rt = ip_route_output_key(net, &fl4);
240  if (IS_ERR(rt))
241  return 0;
242  if (!(rt->rt_flags & RTCF_LOCAL)) {
243  ip_rt_put(rt);
244  return 0;
245  }
246  /* Drop old route. */
247  skb_dst_drop(skb);
248  skb_dst_set(skb, &rt->dst);
249  }
250  return 1;
251 }
252 
253 #ifdef CONFIG_IP_VS_IPV6
254 
255 static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
256 {
257  return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
258 }
259 
260 static struct dst_entry *
261 __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
262  struct in6_addr *ret_saddr, int do_xfrm)
263 {
264  struct dst_entry *dst;
265  struct flowi6 fl6 = {
266  .daddr = *daddr,
267  };
268 
269  dst = ip6_route_output(net, NULL, &fl6);
270  if (dst->error)
271  goto out_err;
272  if (!ret_saddr)
273  return dst;
274  if (ipv6_addr_any(&fl6.saddr) &&
275  ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
276  &fl6.daddr, 0, &fl6.saddr) < 0)
277  goto out_err;
278  if (do_xfrm) {
279  dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
280  if (IS_ERR(dst)) {
281  dst = NULL;
282  goto out_err;
283  }
284  }
285  *ret_saddr = fl6.saddr;
286  return dst;
287 
288 out_err:
289  dst_release(dst);
290  IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
291  return NULL;
292 }
293 
294 /*
295  * Get route to destination or remote server
296  */
297 static struct rt6_info *
298 __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
299  struct in6_addr *daddr, struct in6_addr *ret_saddr,
300  int do_xfrm, int rt_mode)
301 {
302  struct net *net = dev_net(skb_dst(skb)->dev);
303  struct rt6_info *rt; /* Route to the other host */
304  struct rt6_info *ort; /* Original route */
305  struct dst_entry *dst;
306  int local;
307 
308  if (dest) {
309  spin_lock(&dest->dst_lock);
310  rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
311  if (!rt) {
312  u32 cookie;
313 
314  dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
315  &dest->dst_saddr.in6,
316  do_xfrm);
317  if (!dst) {
318  spin_unlock(&dest->dst_lock);
319  return NULL;
320  }
321  rt = (struct rt6_info *) dst;
322  cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
323  __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
324  IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
325  &dest->addr.in6, &dest->dst_saddr.in6,
326  atomic_read(&rt->dst.__refcnt));
327  }
328  if (ret_saddr)
329  *ret_saddr = dest->dst_saddr.in6;
330  spin_unlock(&dest->dst_lock);
331  } else {
332  dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
333  if (!dst)
334  return NULL;
335  rt = (struct rt6_info *) dst;
336  }
337 
338  local = __ip_vs_is_local_route6(rt);
339  if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
340  rt_mode)) {
341  IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n",
342  local ? "local":"non-local", daddr);
343  dst_release(&rt->dst);
344  return NULL;
345  }
346  if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
347  !((ort = (struct rt6_info *) skb_dst(skb)) &&
348  __ip_vs_is_local_route6(ort))) {
349  IP_VS_DBG_RL("Redirect from non-local address %pI6 to local "
350  "requires NAT method, dest: %pI6\n",
351  &ipv6_hdr(skb)->daddr, daddr);
352  dst_release(&rt->dst);
353  return NULL;
354  }
355  if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
356  ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
358  IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 "
359  "to non-local address, dest: %pI6\n",
360  &ipv6_hdr(skb)->saddr, daddr);
361  dst_release(&rt->dst);
362  return NULL;
363  }
364 
365  return rt;
366 }
367 #endif
368 
369 
370 /*
371  * Release dest->dst_cache before a dest is removed
372  */
373 void
375 {
376  struct dst_entry *old_dst;
377 
378  old_dst = dest->dst_cache;
379  dest->dst_cache = NULL;
380  dst_release(old_dst);
381  dest->dst_saddr.ip = 0;
382 }
383 
384 #define IP_VS_XMIT_TUNNEL(skb, cp) \
385 ({ \
386  int __ret = NF_ACCEPT; \
387  \
388  (skb)->ipvs_property = 1; \
389  if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \
390  __ret = ip_vs_confirm_conntrack(skb); \
391  if (__ret == NF_ACCEPT) { \
392  nf_reset(skb); \
393  skb_forward_csum(skb); \
394  } \
395  __ret; \
396 })
397 
398 #define IP_VS_XMIT_NAT(pf, skb, cp, local) \
399 do { \
400  (skb)->ipvs_property = 1; \
401  if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
402  ip_vs_notrack(skb); \
403  else \
404  ip_vs_update_conntrack(skb, cp, 1); \
405  if (local) \
406  return NF_ACCEPT; \
407  skb_forward_csum(skb); \
408  NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
409  skb_dst(skb)->dev, dst_output); \
410 } while (0)
411 
412 #define IP_VS_XMIT(pf, skb, cp, local) \
413 do { \
414  (skb)->ipvs_property = 1; \
415  if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
416  ip_vs_notrack(skb); \
417  if (local) \
418  return NF_ACCEPT; \
419  skb_forward_csum(skb); \
420  NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
421  skb_dst(skb)->dev, dst_output); \
422 } while (0)
423 
424 
425 /*
426  * NULL transmitter (do nothing except return NF_ACCEPT)
427  */
428 int
429 ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
430  struct ip_vs_protocol *pp)
431 {
432  /* we do not touch skb and do not need pskb ptr */
433  IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
434 }
435 
436 
437 /*
438  * Bypass transmitter
439  * Let packets bypass the destination when the destination is not
440  * available, it may be only used in transparent cache cluster.
441  */
442 int
443 ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
444  struct ip_vs_protocol *pp)
445 {
446  struct rtable *rt; /* Route to the other host */
447  struct iphdr *iph = ip_hdr(skb);
448  int mtu;
449 
450  EnterFunction(10);
451 
452  if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos),
453  IP_VS_RT_MODE_NON_LOCAL, NULL)))
454  goto tx_error_icmp;
455 
456  /* MTU checking */
457  mtu = dst_mtu(&rt->dst);
458  if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
459  !skb_is_gso(skb)) {
460  ip_rt_put(rt);
462  IP_VS_DBG_RL("%s(): frag needed\n", __func__);
463  goto tx_error;
464  }
465 
466  /*
467  * Call ip_send_check because we are not sure it is called
468  * after ip_defrag. Is copy-on-write needed?
469  */
470  if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
471  ip_rt_put(rt);
472  return NF_STOLEN;
473  }
474  ip_send_check(ip_hdr(skb));
475 
476  /* drop old route */
477  skb_dst_drop(skb);
478  skb_dst_set(skb, &rt->dst);
479 
480  /* Another hack: avoid icmp_send in ip_fragment */
481  skb->local_df = 1;
482 
483  IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
484 
485  LeaveFunction(10);
486  return NF_STOLEN;
487 
488  tx_error_icmp:
489  dst_link_failure(skb);
490  tx_error:
491  kfree_skb(skb);
492  LeaveFunction(10);
493  return NF_STOLEN;
494 }
495 
496 #ifdef CONFIG_IP_VS_IPV6
497 int
498 ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
499  struct ip_vs_protocol *pp)
500 {
501  struct rt6_info *rt; /* Route to the other host */
502  struct ipv6hdr *iph = ipv6_hdr(skb);
503  int mtu;
504 
505  EnterFunction(10);
506 
507  if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0,
508  IP_VS_RT_MODE_NON_LOCAL)))
509  goto tx_error_icmp;
510 
511  /* MTU checking */
512  mtu = dst_mtu(&rt->dst);
513  if (__mtu_check_toobig_v6(skb, mtu)) {
514  if (!skb->dev) {
515  struct net *net = dev_net(skb_dst(skb)->dev);
516 
517  skb->dev = net->loopback_dev;
518  }
519  icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
520  dst_release(&rt->dst);
521  IP_VS_DBG_RL("%s(): frag needed\n", __func__);
522  goto tx_error;
523  }
524 
525  /*
526  * Call ip_send_check because we are not sure it is called
527  * after ip_defrag. Is copy-on-write needed?
528  */
529  skb = skb_share_check(skb, GFP_ATOMIC);
530  if (unlikely(skb == NULL)) {
531  dst_release(&rt->dst);
532  return NF_STOLEN;
533  }
534 
535  /* drop old route */
536  skb_dst_drop(skb);
537  skb_dst_set(skb, &rt->dst);
538 
539  /* Another hack: avoid icmp_send in ip_fragment */
540  skb->local_df = 1;
541 
542  IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
543 
544  LeaveFunction(10);
545  return NF_STOLEN;
546 
547  tx_error_icmp:
548  dst_link_failure(skb);
549  tx_error:
550  kfree_skb(skb);
551  LeaveFunction(10);
552  return NF_STOLEN;
553 }
554 #endif
555 
556 /*
557  * NAT transmitter (only for outside-to-inside nat forwarding)
558  * Not used for related ICMP
559  */
560 int
561 ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
562  struct ip_vs_protocol *pp)
563 {
564  struct rtable *rt; /* Route to the other host */
565  int mtu;
566  struct iphdr *iph = ip_hdr(skb);
567  int local;
568 
569  EnterFunction(10);
570 
571  /* check if it is a connection of no-client-port */
572  if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
573  __be16 _pt, *p;
574  p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
575  if (p == NULL)
576  goto tx_error;
577  ip_vs_conn_fill_cport(cp, *p);
578  IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
579  }
580 
581  if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
582  RT_TOS(iph->tos),
584  IP_VS_RT_MODE_NON_LOCAL |
585  IP_VS_RT_MODE_RDR, NULL)))
586  goto tx_error_icmp;
587  local = rt->rt_flags & RTCF_LOCAL;
588  /*
589  * Avoid duplicate tuple in reply direction for NAT traffic
590  * to local address when connection is sync-ed
591  */
592 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
593  if (cp->flags & IP_VS_CONN_F_SYNC && local) {
594  enum ip_conntrack_info ctinfo;
595  struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
596 
597  if (ct && !nf_ct_is_untracked(ct)) {
598  IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
599  "ip_vs_nat_xmit(): "
600  "stopping DNAT to local address");
601  goto tx_error_put;
602  }
603  }
604 #endif
605 
606  /* From world but DNAT to loopback address? */
607  if (local && ipv4_is_loopback(cp->daddr.ip) &&
608  rt_is_input_route(skb_rtable(skb))) {
609  IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
610  "stopping DNAT to loopback address");
611  goto tx_error_put;
612  }
613 
614  /* MTU checking */
615  mtu = dst_mtu(&rt->dst);
616  if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
617  !skb_is_gso(skb)) {
619  IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
620  "ip_vs_nat_xmit(): frag needed for");
621  goto tx_error_put;
622  }
623 
624  /* copy-on-write the packet before mangling it */
625  if (!skb_make_writable(skb, sizeof(struct iphdr)))
626  goto tx_error_put;
627 
628  if (skb_cow(skb, rt->dst.dev->hard_header_len))
629  goto tx_error_put;
630 
631  /* mangle the packet */
632  if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
633  goto tx_error_put;
634  ip_hdr(skb)->daddr = cp->daddr.ip;
635  ip_send_check(ip_hdr(skb));
636 
637  if (!local) {
638  /* drop old route */
639  skb_dst_drop(skb);
640  skb_dst_set(skb, &rt->dst);
641  } else {
642  ip_rt_put(rt);
643  /*
644  * Some IPv4 replies get local address from routes,
645  * not from iph, so while we DNAT after routing
646  * we need this second input/output route.
647  */
648  if (!__ip_vs_reroute_locally(skb))
649  goto tx_error;
650  }
651 
652  IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
653 
654  /* FIXME: when application helper enlarges the packet and the length
655  is larger than the MTU of outgoing device, there will be still
656  MTU problem. */
657 
658  /* Another hack: avoid icmp_send in ip_fragment */
659  skb->local_df = 1;
660 
661  IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
662 
663  LeaveFunction(10);
664  return NF_STOLEN;
665 
666  tx_error_icmp:
667  dst_link_failure(skb);
668  tx_error:
669  kfree_skb(skb);
670  LeaveFunction(10);
671  return NF_STOLEN;
672  tx_error_put:
673  ip_rt_put(rt);
674  goto tx_error;
675 }
676 
677 #ifdef CONFIG_IP_VS_IPV6
678 int
679 ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
680  struct ip_vs_protocol *pp)
681 {
682  struct rt6_info *rt; /* Route to the other host */
683  int mtu;
684  int local;
685 
686  EnterFunction(10);
687 
688  /* check if it is a connection of no-client-port */
689  if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
690  __be16 _pt, *p;
691  p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
692  sizeof(_pt), &_pt);
693  if (p == NULL)
694  goto tx_error;
695  ip_vs_conn_fill_cport(cp, *p);
696  IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
697  }
698 
699  if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
700  0, (IP_VS_RT_MODE_LOCAL |
701  IP_VS_RT_MODE_NON_LOCAL |
702  IP_VS_RT_MODE_RDR))))
703  goto tx_error_icmp;
704  local = __ip_vs_is_local_route6(rt);
705  /*
706  * Avoid duplicate tuple in reply direction for NAT traffic
707  * to local address when connection is sync-ed
708  */
709 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
710  if (cp->flags & IP_VS_CONN_F_SYNC && local) {
711  enum ip_conntrack_info ctinfo;
712  struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
713 
714  if (ct && !nf_ct_is_untracked(ct)) {
715  IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
716  "ip_vs_nat_xmit_v6(): "
717  "stopping DNAT to local address");
718  goto tx_error_put;
719  }
720  }
721 #endif
722 
723  /* From world but DNAT to loopback address? */
724  if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
725  ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
726  IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
727  "ip_vs_nat_xmit_v6(): "
728  "stopping DNAT to loopback address");
729  goto tx_error_put;
730  }
731 
732  /* MTU checking */
733  mtu = dst_mtu(&rt->dst);
734  if (__mtu_check_toobig_v6(skb, mtu)) {
735  if (!skb->dev) {
736  struct net *net = dev_net(skb_dst(skb)->dev);
737 
738  skb->dev = net->loopback_dev;
739  }
740  icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
741  IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
742  "ip_vs_nat_xmit_v6(): frag needed for");
743  goto tx_error_put;
744  }
745 
746  /* copy-on-write the packet before mangling it */
747  if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
748  goto tx_error_put;
749 
750  if (skb_cow(skb, rt->dst.dev->hard_header_len))
751  goto tx_error_put;
752 
753  /* mangle the packet */
754  if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
755  goto tx_error;
756  ipv6_hdr(skb)->daddr = cp->daddr.in6;
757 
758  if (!local || !skb->dev) {
759  /* drop the old route when skb is not shared */
760  skb_dst_drop(skb);
761  skb_dst_set(skb, &rt->dst);
762  } else {
763  /* destined to loopback, do we need to change route? */
764  dst_release(&rt->dst);
765  }
766 
767  IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
768 
769  /* FIXME: when application helper enlarges the packet and the length
770  is larger than the MTU of outgoing device, there will be still
771  MTU problem. */
772 
773  /* Another hack: avoid icmp_send in ip_fragment */
774  skb->local_df = 1;
775 
776  IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
777 
778  LeaveFunction(10);
779  return NF_STOLEN;
780 
781 tx_error_icmp:
782  dst_link_failure(skb);
783 tx_error:
784  LeaveFunction(10);
785  kfree_skb(skb);
786  return NF_STOLEN;
787 tx_error_put:
788  dst_release(&rt->dst);
789  goto tx_error;
790 }
791 #endif
792 
793 
794 /*
795  * IP Tunneling transmitter
796  *
797  * This function encapsulates the packet in a new IP packet, its
798  * destination will be set to cp->daddr. Most code of this function
799  * is taken from ipip.c.
800  *
801  * It is used in VS/TUN cluster. The load balancer selects a real
802  * server from a cluster based on a scheduling algorithm,
803  * encapsulates the request packet and forwards it to the selected
804  * server. For example, all real servers are configured with
805  * "ifconfig tunl0 <Virtual IP Address> up". When the server receives
806  * the encapsulated packet, it will decapsulate the packet, processe
807  * the request and return the response packets directly to the client
808  * without passing the load balancer. This can greatly increase the
809  * scalability of virtual server.
810  *
811  * Used for ANY protocol
812  */
813 int
814 ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
815  struct ip_vs_protocol *pp)
816 {
817  struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
818  struct rtable *rt; /* Route to the other host */
819  __be32 saddr; /* Source for tunnel */
820  struct net_device *tdev; /* Device to other host */
821  struct iphdr *old_iph = ip_hdr(skb);
822  u8 tos = old_iph->tos;
823  __be16 df;
824  struct iphdr *iph; /* Our new IP header */
825  unsigned int max_headroom; /* The extra header space needed */
826  int mtu;
827  int ret;
828 
829  EnterFunction(10);
830 
831  if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
833  IP_VS_RT_MODE_NON_LOCAL |
835  &saddr)))
836  goto tx_error_icmp;
837  if (rt->rt_flags & RTCF_LOCAL) {
838  ip_rt_put(rt);
839  IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
840  }
841 
842  tdev = rt->dst.dev;
843 
844  mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
845  if (mtu < 68) {
846  IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
847  goto tx_error_put;
848  }
849  if (rt_is_output_route(skb_rtable(skb)))
850  skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
851 
852  /* Copy DF, reset fragment offset and MF */
853  df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
854 
855  if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) {
857  IP_VS_DBG_RL("%s(): frag needed\n", __func__);
858  goto tx_error_put;
859  }
860 
861  /*
862  * Okay, now see if we can stuff it in the buffer as-is.
863  */
864  max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
865 
866  if (skb_headroom(skb) < max_headroom
867  || skb_cloned(skb) || skb_shared(skb)) {
868  struct sk_buff *new_skb =
869  skb_realloc_headroom(skb, max_headroom);
870  if (!new_skb) {
871  ip_rt_put(rt);
872  kfree_skb(skb);
873  IP_VS_ERR_RL("%s(): no memory\n", __func__);
874  return NF_STOLEN;
875  }
876  consume_skb(skb);
877  skb = new_skb;
878  old_iph = ip_hdr(skb);
879  }
880 
881  skb->transport_header = skb->network_header;
882 
883  /* fix old IP header checksum */
884  ip_send_check(old_iph);
885 
886  skb_push(skb, sizeof(struct iphdr));
887  skb_reset_network_header(skb);
888  memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
889 
890  /* drop old route */
891  skb_dst_drop(skb);
892  skb_dst_set(skb, &rt->dst);
893 
894  /*
895  * Push down and install the IPIP header.
896  */
897  iph = ip_hdr(skb);
898  iph->version = 4;
899  iph->ihl = sizeof(struct iphdr)>>2;
900  iph->frag_off = df;
901  iph->protocol = IPPROTO_IPIP;
902  iph->tos = tos;
903  iph->daddr = cp->daddr.ip;
904  iph->saddr = saddr;
905  iph->ttl = old_iph->ttl;
906  ip_select_ident(iph, &rt->dst, NULL);
907 
908  /* Another hack: avoid icmp_send in ip_fragment */
909  skb->local_df = 1;
910 
911  ret = IP_VS_XMIT_TUNNEL(skb, cp);
912  if (ret == NF_ACCEPT)
913  ip_local_out(skb);
914  else if (ret == NF_DROP)
915  kfree_skb(skb);
916 
917  LeaveFunction(10);
918 
919  return NF_STOLEN;
920 
921  tx_error_icmp:
922  dst_link_failure(skb);
923  tx_error:
924  kfree_skb(skb);
925  LeaveFunction(10);
926  return NF_STOLEN;
927 tx_error_put:
928  ip_rt_put(rt);
929  goto tx_error;
930 }
931 
932 #ifdef CONFIG_IP_VS_IPV6
933 int
934 ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
935  struct ip_vs_protocol *pp)
936 {
937  struct rt6_info *rt; /* Route to the other host */
938  struct in6_addr saddr; /* Source for tunnel */
939  struct net_device *tdev; /* Device to other host */
940  struct ipv6hdr *old_iph = ipv6_hdr(skb);
941  struct ipv6hdr *iph; /* Our new IP header */
942  unsigned int max_headroom; /* The extra header space needed */
943  int mtu;
944  int ret;
945 
946  EnterFunction(10);
947 
948  if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
949  &saddr, 1, (IP_VS_RT_MODE_LOCAL |
950  IP_VS_RT_MODE_NON_LOCAL))))
951  goto tx_error_icmp;
952  if (__ip_vs_is_local_route6(rt)) {
953  dst_release(&rt->dst);
954  IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
955  }
956 
957  tdev = rt->dst.dev;
958 
959  mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
960  if (mtu < IPV6_MIN_MTU) {
961  IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
962  IPV6_MIN_MTU);
963  goto tx_error_put;
964  }
965  if (skb_dst(skb))
966  skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
967 
968  /* MTU checking: Notice that 'mtu' have been adjusted before hand */
969  if (__mtu_check_toobig_v6(skb, mtu)) {
970  if (!skb->dev) {
971  struct net *net = dev_net(skb_dst(skb)->dev);
972 
973  skb->dev = net->loopback_dev;
974  }
975  icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
976  IP_VS_DBG_RL("%s(): frag needed\n", __func__);
977  goto tx_error_put;
978  }
979 
980  /*
981  * Okay, now see if we can stuff it in the buffer as-is.
982  */
983  max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
984 
985  if (skb_headroom(skb) < max_headroom
986  || skb_cloned(skb) || skb_shared(skb)) {
987  struct sk_buff *new_skb =
988  skb_realloc_headroom(skb, max_headroom);
989  if (!new_skb) {
990  dst_release(&rt->dst);
991  kfree_skb(skb);
992  IP_VS_ERR_RL("%s(): no memory\n", __func__);
993  return NF_STOLEN;
994  }
995  consume_skb(skb);
996  skb = new_skb;
997  old_iph = ipv6_hdr(skb);
998  }
999 
1000  skb->transport_header = skb->network_header;
1001 
1002  skb_push(skb, sizeof(struct ipv6hdr));
1003  skb_reset_network_header(skb);
1004  memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1005 
1006  /* drop old route */
1007  skb_dst_drop(skb);
1008  skb_dst_set(skb, &rt->dst);
1009 
1010  /*
1011  * Push down and install the IPIP header.
1012  */
1013  iph = ipv6_hdr(skb);
1014  iph->version = 6;
1015  iph->nexthdr = IPPROTO_IPV6;
1016  iph->payload_len = old_iph->payload_len;
1017  be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
1018  iph->priority = old_iph->priority;
1019  memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
1020  iph->daddr = cp->daddr.in6;
1021  iph->saddr = saddr;
1022  iph->hop_limit = old_iph->hop_limit;
1023 
1024  /* Another hack: avoid icmp_send in ip_fragment */
1025  skb->local_df = 1;
1026 
1027  ret = IP_VS_XMIT_TUNNEL(skb, cp);
1028  if (ret == NF_ACCEPT)
1029  ip6_local_out(skb);
1030  else if (ret == NF_DROP)
1031  kfree_skb(skb);
1032 
1033  LeaveFunction(10);
1034 
1035  return NF_STOLEN;
1036 
1037 tx_error_icmp:
1038  dst_link_failure(skb);
1039 tx_error:
1040  kfree_skb(skb);
1041  LeaveFunction(10);
1042  return NF_STOLEN;
1043 tx_error_put:
1044  dst_release(&rt->dst);
1045  goto tx_error;
1046 }
1047 #endif
1048 
1049 
1050 /*
1051  * Direct Routing transmitter
1052  * Used for ANY protocol
1053  */
1054 int
1055 ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1056  struct ip_vs_protocol *pp)
1057 {
1058  struct rtable *rt; /* Route to the other host */
1059  struct iphdr *iph = ip_hdr(skb);
1060  int mtu;
1061 
1062  EnterFunction(10);
1063 
1064  if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1065  RT_TOS(iph->tos),
1067  IP_VS_RT_MODE_NON_LOCAL |
1069  goto tx_error_icmp;
1070  if (rt->rt_flags & RTCF_LOCAL) {
1071  ip_rt_put(rt);
1072  IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
1073  }
1074 
1075  /* MTU checking */
1076  mtu = dst_mtu(&rt->dst);
1077  if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
1078  !skb_is_gso(skb)) {
1080  ip_rt_put(rt);
1081  IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1082  goto tx_error;
1083  }
1084 
1085  /*
1086  * Call ip_send_check because we are not sure it is called
1087  * after ip_defrag. Is copy-on-write needed?
1088  */
1089  if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
1090  ip_rt_put(rt);
1091  return NF_STOLEN;
1092  }
1093  ip_send_check(ip_hdr(skb));
1094 
1095  /* drop old route */
1096  skb_dst_drop(skb);
1097  skb_dst_set(skb, &rt->dst);
1098 
1099  /* Another hack: avoid icmp_send in ip_fragment */
1100  skb->local_df = 1;
1101 
1102  IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
1103 
1104  LeaveFunction(10);
1105  return NF_STOLEN;
1106 
1107  tx_error_icmp:
1108  dst_link_failure(skb);
1109  tx_error:
1110  kfree_skb(skb);
1111  LeaveFunction(10);
1112  return NF_STOLEN;
1113 }
1114 
1115 #ifdef CONFIG_IP_VS_IPV6
1116 int
1117 ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1118  struct ip_vs_protocol *pp)
1119 {
1120  struct rt6_info *rt; /* Route to the other host */
1121  int mtu;
1122 
1123  EnterFunction(10);
1124 
1125  if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
1126  0, (IP_VS_RT_MODE_LOCAL |
1127  IP_VS_RT_MODE_NON_LOCAL))))
1128  goto tx_error_icmp;
1129  if (__ip_vs_is_local_route6(rt)) {
1130  dst_release(&rt->dst);
1131  IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
1132  }
1133 
1134  /* MTU checking */
1135  mtu = dst_mtu(&rt->dst);
1136  if (__mtu_check_toobig_v6(skb, mtu)) {
1137  if (!skb->dev) {
1138  struct net *net = dev_net(skb_dst(skb)->dev);
1139 
1140  skb->dev = net->loopback_dev;
1141  }
1142  icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1143  dst_release(&rt->dst);
1144  IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1145  goto tx_error;
1146  }
1147 
1148  /*
1149  * Call ip_send_check because we are not sure it is called
1150  * after ip_defrag. Is copy-on-write needed?
1151  */
1152  skb = skb_share_check(skb, GFP_ATOMIC);
1153  if (unlikely(skb == NULL)) {
1154  dst_release(&rt->dst);
1155  return NF_STOLEN;
1156  }
1157 
1158  /* drop old route */
1159  skb_dst_drop(skb);
1160  skb_dst_set(skb, &rt->dst);
1161 
1162  /* Another hack: avoid icmp_send in ip_fragment */
1163  skb->local_df = 1;
1164 
1165  IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
1166 
1167  LeaveFunction(10);
1168  return NF_STOLEN;
1169 
1170 tx_error_icmp:
1171  dst_link_failure(skb);
1172 tx_error:
1173  kfree_skb(skb);
1174  LeaveFunction(10);
1175  return NF_STOLEN;
1176 }
1177 #endif
1178 
1179 
1180 /*
1181  * ICMP packet transmitter
1182  * called by the ip_vs_in_icmp
1183  */
1184 int
1185 ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1186  struct ip_vs_protocol *pp, int offset, unsigned int hooknum)
1187 {
1188  struct rtable *rt; /* Route to the other host */
1189  int mtu;
1190  int rc;
1191  int local;
1192  int rt_mode;
1193 
1194  EnterFunction(10);
1195 
1196  /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
1197  forwarded directly here, because there is no need to
1198  translate address/port back */
1199  if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1200  if (cp->packet_xmit)
1201  rc = cp->packet_xmit(skb, cp, pp);
1202  else
1203  rc = NF_ACCEPT;
1204  /* do not touch skb anymore */
1205  atomic_inc(&cp->in_pkts);
1206  goto out;
1207  }
1208 
1209  /*
1210  * mangle and send the packet here (only for VS/NAT)
1211  */
1212 
1213  /* LOCALNODE from FORWARD hook is not supported */
1214  rt_mode = (hooknum != NF_INET_FORWARD) ?
1215  IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1216  IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
1217  if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1218  RT_TOS(ip_hdr(skb)->tos),
1219  rt_mode, NULL)))
1220  goto tx_error_icmp;
1221  local = rt->rt_flags & RTCF_LOCAL;
1222 
1223  /*
1224  * Avoid duplicate tuple in reply direction for NAT traffic
1225  * to local address when connection is sync-ed
1226  */
1227 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
1228  if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1229  enum ip_conntrack_info ctinfo;
1230  struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1231 
1232  if (ct && !nf_ct_is_untracked(ct)) {
1233  IP_VS_DBG(10, "%s(): "
1234  "stopping DNAT to local address %pI4\n",
1235  __func__, &cp->daddr.ip);
1236  goto tx_error_put;
1237  }
1238  }
1239 #endif
1240 
1241  /* From world but DNAT to loopback address? */
1242  if (local && ipv4_is_loopback(cp->daddr.ip) &&
1243  rt_is_input_route(skb_rtable(skb))) {
1244  IP_VS_DBG(1, "%s(): "
1245  "stopping DNAT to loopback %pI4\n",
1246  __func__, &cp->daddr.ip);
1247  goto tx_error_put;
1248  }
1249 
1250  /* MTU checking */
1251  mtu = dst_mtu(&rt->dst);
1252  if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
1253  !skb_is_gso(skb)) {
1255  IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1256  goto tx_error_put;
1257  }
1258 
1259  /* copy-on-write the packet before mangling it */
1260  if (!skb_make_writable(skb, offset))
1261  goto tx_error_put;
1262 
1263  if (skb_cow(skb, rt->dst.dev->hard_header_len))
1264  goto tx_error_put;
1265 
1266  ip_vs_nat_icmp(skb, pp, cp, 0);
1267 
1268  if (!local) {
1269  /* drop the old route when skb is not shared */
1270  skb_dst_drop(skb);
1271  skb_dst_set(skb, &rt->dst);
1272  } else {
1273  ip_rt_put(rt);
1274  /*
1275  * Some IPv4 replies get local address from routes,
1276  * not from iph, so while we DNAT after routing
1277  * we need this second input/output route.
1278  */
1279  if (!__ip_vs_reroute_locally(skb))
1280  goto tx_error;
1281  }
1282 
1283  /* Another hack: avoid icmp_send in ip_fragment */
1284  skb->local_df = 1;
1285 
1286  IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
1287 
1288  rc = NF_STOLEN;
1289  goto out;
1290 
1291  tx_error_icmp:
1292  dst_link_failure(skb);
1293  tx_error:
1294  dev_kfree_skb(skb);
1295  rc = NF_STOLEN;
1296  out:
1297  LeaveFunction(10);
1298  return rc;
1299  tx_error_put:
1300  ip_rt_put(rt);
1301  goto tx_error;
1302 }
1303 
1304 #ifdef CONFIG_IP_VS_IPV6
1305 int
1306 ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1307  struct ip_vs_protocol *pp, int offset, unsigned int hooknum)
1308 {
1309  struct rt6_info *rt; /* Route to the other host */
1310  int mtu;
1311  int rc;
1312  int local;
1313  int rt_mode;
1314 
1315  EnterFunction(10);
1316 
1317  /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
1318  forwarded directly here, because there is no need to
1319  translate address/port back */
1320  if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1321  if (cp->packet_xmit)
1322  rc = cp->packet_xmit(skb, cp, pp);
1323  else
1324  rc = NF_ACCEPT;
1325  /* do not touch skb anymore */
1326  atomic_inc(&cp->in_pkts);
1327  goto out;
1328  }
1329 
1330  /*
1331  * mangle and send the packet here (only for VS/NAT)
1332  */
1333 
1334  /* LOCALNODE from FORWARD hook is not supported */
1335  rt_mode = (hooknum != NF_INET_FORWARD) ?
1336  IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1337  IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
1338  if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
1339  0, rt_mode)))
1340  goto tx_error_icmp;
1341 
1342  local = __ip_vs_is_local_route6(rt);
1343  /*
1344  * Avoid duplicate tuple in reply direction for NAT traffic
1345  * to local address when connection is sync-ed
1346  */
1347 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
1348  if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1349  enum ip_conntrack_info ctinfo;
1350  struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1351 
1352  if (ct && !nf_ct_is_untracked(ct)) {
1353  IP_VS_DBG(10, "%s(): "
1354  "stopping DNAT to local address %pI6\n",
1355  __func__, &cp->daddr.in6);
1356  goto tx_error_put;
1357  }
1358  }
1359 #endif
1360 
1361  /* From world but DNAT to loopback address? */
1362  if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
1363  ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
1364  IP_VS_DBG(1, "%s(): "
1365  "stopping DNAT to loopback %pI6\n",
1366  __func__, &cp->daddr.in6);
1367  goto tx_error_put;
1368  }
1369 
1370  /* MTU checking */
1371  mtu = dst_mtu(&rt->dst);
1372  if (__mtu_check_toobig_v6(skb, mtu)) {
1373  if (!skb->dev) {
1374  struct net *net = dev_net(skb_dst(skb)->dev);
1375 
1376  skb->dev = net->loopback_dev;
1377  }
1378  icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1379  IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1380  goto tx_error_put;
1381  }
1382 
1383  /* copy-on-write the packet before mangling it */
1384  if (!skb_make_writable(skb, offset))
1385  goto tx_error_put;
1386 
1387  if (skb_cow(skb, rt->dst.dev->hard_header_len))
1388  goto tx_error_put;
1389 
1390  ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1391 
1392  if (!local || !skb->dev) {
1393  /* drop the old route when skb is not shared */
1394  skb_dst_drop(skb);
1395  skb_dst_set(skb, &rt->dst);
1396  } else {
1397  /* destined to loopback, do we need to change route? */
1398  dst_release(&rt->dst);
1399  }
1400 
1401  /* Another hack: avoid icmp_send in ip_fragment */
1402  skb->local_df = 1;
1403 
1404  IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
1405 
1406  rc = NF_STOLEN;
1407  goto out;
1408 
1409 tx_error_icmp:
1410  dst_link_failure(skb);
1411 tx_error:
1412  dev_kfree_skb(skb);
1413  rc = NF_STOLEN;
1414 out:
1415  LeaveFunction(10);
1416  return rc;
1417 tx_error_put:
1418  dst_release(&rt->dst);
1419  goto tx_error;
1420 }
1421 #endif