Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
ip6_output.c
Go to the documentation of this file.
1 /*
2  * IPv6 output functions
3  * Linux INET6 implementation
4  *
5  * Authors:
6  * Pedro Roque <[email protected]>
7  *
8  * Based on linux/net/ipv4/ip_output.c
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public License
12  * as published by the Free Software Foundation; either version
13  * 2 of the License, or (at your option) any later version.
14  *
15  * Changes:
16  * A.N.Kuznetsov : airthmetics in fragmentation.
17  * extension headers are implemented.
18  * route changes now work.
19  * ip6_forward does not confuse sniffers.
20  * etc.
21  *
22  * H. von Brand : Added missing #include <linux/string.h>
23  * Imran Patel : frag id should be in NBO
24  * Kazunori MIYAZAWA @USAGI
25  * : add ip6_append_data and related functions
26  * for datagram xmit
27  */
28 
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41 
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44 
45 #include <net/sock.h>
46 #include <net/snmp.h>
47 
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58 
59 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60 
61 int __ip6_local_out(struct sk_buff *skb)
62 {
63  int len;
64 
65  len = skb->len - sizeof(struct ipv6hdr);
66  if (len > IPV6_MAXPLEN)
67  len = 0;
68  ipv6_hdr(skb)->payload_len = htons(len);
69 
70  return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
71  skb_dst(skb)->dev, dst_output);
72 }
73 
74 int ip6_local_out(struct sk_buff *skb)
75 {
76  int err;
77 
78  err = __ip6_local_out(skb);
79  if (likely(err == 1))
80  err = dst_output(skb);
81 
82  return err;
83 }
85 
86 static int ip6_finish_output2(struct sk_buff *skb)
87 {
88  struct dst_entry *dst = skb_dst(skb);
89  struct net_device *dev = dst->dev;
90  struct neighbour *neigh;
91  struct rt6_info *rt;
92 
93  skb->protocol = htons(ETH_P_IPV6);
94  skb->dev = dev;
95 
96  if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
97  struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
98 
99  if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
100  ((mroute6_socket(dev_net(dev), skb) &&
101  !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
102  ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
103  &ipv6_hdr(skb)->saddr))) {
104  struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
105 
106  /* Do not check for IFF_ALLMULTI; multicast routing
107  is not supported in any case.
108  */
109  if (newskb)
111  newskb, NULL, newskb->dev,
113 
114  if (ipv6_hdr(skb)->hop_limit == 0) {
115  IP6_INC_STATS(dev_net(dev), idev,
117  kfree_skb(skb);
118  return 0;
119  }
120  }
121 
122  IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
123  skb->len);
124  }
125 
126  rt = (struct rt6_info *) dst;
127  neigh = rt->n;
128  if (neigh)
129  return dst_neigh_output(dst, neigh, skb);
130 
131  IP6_INC_STATS_BH(dev_net(dst->dev),
132  ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
133  kfree_skb(skb);
134  return -EINVAL;
135 }
136 
137 static int ip6_finish_output(struct sk_buff *skb)
138 {
139  if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
140  dst_allfrag(skb_dst(skb)))
141  return ip6_fragment(skb, ip6_finish_output2);
142  else
143  return ip6_finish_output2(skb);
144 }
145 
146 int ip6_output(struct sk_buff *skb)
147 {
148  struct net_device *dev = skb_dst(skb)->dev;
149  struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
150  if (unlikely(idev->cnf.disable_ipv6)) {
151  IP6_INC_STATS(dev_net(dev), idev,
153  kfree_skb(skb);
154  return 0;
155  }
156 
158  ip6_finish_output,
159  !(IP6CB(skb)->flags & IP6SKB_REROUTED));
160 }
161 
162 /*
163  * xmit an sk_buff (used by TCP, SCTP and DCCP)
164  */
165 
166 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
167  struct ipv6_txoptions *opt, int tclass)
168 {
169  struct net *net = sock_net(sk);
170  struct ipv6_pinfo *np = inet6_sk(sk);
171  struct in6_addr *first_hop = &fl6->daddr;
172  struct dst_entry *dst = skb_dst(skb);
173  struct ipv6hdr *hdr;
174  u8 proto = fl6->flowi6_proto;
175  int seg_len = skb->len;
176  int hlimit = -1;
177  u32 mtu;
178 
179  if (opt) {
180  unsigned int head_room;
181 
182  /* First: exthdrs may take lots of space (~8K for now)
183  MAX_HEADER is not enough.
184  */
185  head_room = opt->opt_nflen + opt->opt_flen;
186  seg_len += head_room;
187  head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
188 
189  if (skb_headroom(skb) < head_room) {
190  struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
191  if (skb2 == NULL) {
192  IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
194  kfree_skb(skb);
195  return -ENOBUFS;
196  }
197  consume_skb(skb);
198  skb = skb2;
199  skb_set_owner_w(skb, sk);
200  }
201  if (opt->opt_flen)
202  ipv6_push_frag_opts(skb, opt, &proto);
203  if (opt->opt_nflen)
204  ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
205  }
206 
207  skb_push(skb, sizeof(struct ipv6hdr));
208  skb_reset_network_header(skb);
209  hdr = ipv6_hdr(skb);
210 
211  /*
212  * Fill in the IPv6 header
213  */
214  if (np)
215  hlimit = np->hop_limit;
216  if (hlimit < 0)
217  hlimit = ip6_dst_hoplimit(dst);
218 
219  *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
220 
221  hdr->payload_len = htons(seg_len);
222  hdr->nexthdr = proto;
223  hdr->hop_limit = hlimit;
224 
225  hdr->saddr = fl6->saddr;
226  hdr->daddr = *first_hop;
227 
228  skb->priority = sk->sk_priority;
229  skb->mark = sk->sk_mark;
230 
231  mtu = dst_mtu(dst);
232  if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
233  IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
234  IPSTATS_MIB_OUT, skb->len);
236  dst->dev, dst_output);
237  }
238 
239  net_dbg_ratelimited("IPv6: sending pkt_too_big to self\n");
240  skb->dev = dst->dev;
241  icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
242  IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
243  kfree_skb(skb);
244  return -EMSGSIZE;
245 }
246 
248 
249 /*
250  * To avoid extra problems ND packets are send through this
251  * routine. It's code duplication but I really want to avoid
252  * extra checks since ipv6_build_header is used by TCP (which
253  * is for us performance critical)
254  */
255 
256 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
257  const struct in6_addr *saddr, const struct in6_addr *daddr,
258  int proto, int len)
259 {
260  struct ipv6_pinfo *np = inet6_sk(sk);
261  struct ipv6hdr *hdr;
262 
263  skb->protocol = htons(ETH_P_IPV6);
264  skb->dev = dev;
265 
266  skb_reset_network_header(skb);
267  skb_put(skb, sizeof(struct ipv6hdr));
268  hdr = ipv6_hdr(skb);
269 
270  *(__be32*)hdr = htonl(0x60000000);
271 
272  hdr->payload_len = htons(len);
273  hdr->nexthdr = proto;
274  hdr->hop_limit = np->hop_limit;
275 
276  hdr->saddr = *saddr;
277  hdr->daddr = *daddr;
278 
279  return 0;
280 }
281 
282 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
283 {
284  struct ip6_ra_chain *ra;
285  struct sock *last = NULL;
286 
287  read_lock(&ip6_ra_lock);
288  for (ra = ip6_ra_chain; ra; ra = ra->next) {
289  struct sock *sk = ra->sk;
290  if (sk && ra->sel == sel &&
291  (!sk->sk_bound_dev_if ||
292  sk->sk_bound_dev_if == skb->dev->ifindex)) {
293  if (last) {
294  struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
295  if (skb2)
296  rawv6_rcv(last, skb2);
297  }
298  last = sk;
299  }
300  }
301 
302  if (last) {
303  rawv6_rcv(last, skb);
304  read_unlock(&ip6_ra_lock);
305  return 1;
306  }
307  read_unlock(&ip6_ra_lock);
308  return 0;
309 }
310 
311 static int ip6_forward_proxy_check(struct sk_buff *skb)
312 {
313  struct ipv6hdr *hdr = ipv6_hdr(skb);
314  u8 nexthdr = hdr->nexthdr;
315  __be16 frag_off;
316  int offset;
317 
318  if (ipv6_ext_hdr(nexthdr)) {
319  offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
320  if (offset < 0)
321  return 0;
322  } else
323  offset = sizeof(struct ipv6hdr);
324 
325  if (nexthdr == IPPROTO_ICMPV6) {
326  struct icmp6hdr *icmp6;
327 
328  if (!pskb_may_pull(skb, (skb_network_header(skb) +
329  offset + 1 - skb->data)))
330  return 0;
331 
332  icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
333 
334  switch (icmp6->icmp6_type) {
339  case NDISC_REDIRECT:
340  /* For reaction involving unicast neighbor discovery
341  * message destined to the proxied address, pass it to
342  * input function.
343  */
344  return 1;
345  default:
346  break;
347  }
348  }
349 
350  /*
351  * The proxying router can't forward traffic sent to a link-local
352  * address, so signal the sender and discard the packet. This
353  * behavior is clarified by the MIPv6 specification.
354  */
355  if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
356  dst_link_failure(skb);
357  return -1;
358  }
359 
360  return 0;
361 }
362 
363 static inline int ip6_forward_finish(struct sk_buff *skb)
364 {
365  return dst_output(skb);
366 }
367 
368 int ip6_forward(struct sk_buff *skb)
369 {
370  struct dst_entry *dst = skb_dst(skb);
371  struct ipv6hdr *hdr = ipv6_hdr(skb);
372  struct inet6_skb_parm *opt = IP6CB(skb);
373  struct net *net = dev_net(dst->dev);
374  u32 mtu;
375 
376  if (net->ipv6.devconf_all->forwarding == 0)
377  goto error;
378 
379  if (skb_warn_if_lro(skb))
380  goto drop;
381 
382  if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
383  IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
384  goto drop;
385  }
386 
387  if (skb->pkt_type != PACKET_HOST)
388  goto drop;
389 
390  skb_forward_csum(skb);
391 
392  /*
393  * We DO NOT make any processing on
394  * RA packets, pushing them to user level AS IS
395  * without ane WARRANTY that application will be able
396  * to interpret them. The reason is that we
397  * cannot make anything clever here.
398  *
399  * We are not end-node, so that if packet contains
400  * AH/ESP, we cannot make anything.
401  * Defragmentation also would be mistake, RA packets
402  * cannot be fragmented, because there is no warranty
403  * that different fragments will go along one path. --ANK
404  */
405  if (opt->ra) {
406  u8 *ptr = skb_network_header(skb) + opt->ra;
407  if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
408  return 0;
409  }
410 
411  /*
412  * check and decrement ttl
413  */
414  if (hdr->hop_limit <= 1) {
415  /* Force OUTPUT device used as source address */
416  skb->dev = dst->dev;
418  IP6_INC_STATS_BH(net,
419  ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
420 
421  kfree_skb(skb);
422  return -ETIMEDOUT;
423  }
424 
425  /* XXX: idev->cnf.proxy_ndp? */
426  if (net->ipv6.devconf_all->proxy_ndp &&
427  pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
428  int proxied = ip6_forward_proxy_check(skb);
429  if (proxied > 0)
430  return ip6_input(skb);
431  else if (proxied < 0) {
432  IP6_INC_STATS(net, ip6_dst_idev(dst),
434  goto drop;
435  }
436  }
437 
438  if (!xfrm6_route_forward(skb)) {
439  IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
440  goto drop;
441  }
442  dst = skb_dst(skb);
443 
444  /* IPv6 specs say nothing about it, but it is clear that we cannot
445  send redirects to source routed frames.
446  We don't send redirects to frames decapsulated from IPsec.
447  */
448  if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
449  struct in6_addr *target = NULL;
450  struct inet_peer *peer;
451  struct rt6_info *rt;
452 
453  /*
454  * incoming and outgoing devices are the same
455  * send a redirect.
456  */
457 
458  rt = (struct rt6_info *) dst;
459  if (rt->rt6i_flags & RTF_GATEWAY)
460  target = &rt->rt6i_gateway;
461  else
462  target = &hdr->daddr;
463 
464  peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
465 
466  /* Limit redirects both by destination (here)
467  and by source (inside ndisc_send_redirect)
468  */
469  if (inet_peer_xrlim_allow(peer, 1*HZ))
470  ndisc_send_redirect(skb, target);
471  if (peer)
472  inet_putpeer(peer);
473  } else {
474  int addrtype = ipv6_addr_type(&hdr->saddr);
475 
476  /* This check is security critical. */
477  if (addrtype == IPV6_ADDR_ANY ||
479  goto error;
480  if (addrtype & IPV6_ADDR_LINKLOCAL) {
483  goto error;
484  }
485  }
486 
487  mtu = dst_mtu(dst);
488  if (mtu < IPV6_MIN_MTU)
489  mtu = IPV6_MIN_MTU;
490 
491  if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
492  (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
493  /* Again, force OUTPUT device used as source address */
494  skb->dev = dst->dev;
495  icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
496  IP6_INC_STATS_BH(net,
497  ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
498  IP6_INC_STATS_BH(net,
499  ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
500  kfree_skb(skb);
501  return -EMSGSIZE;
502  }
503 
504  if (skb_cow(skb, dst->dev->hard_header_len)) {
505  IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
506  goto drop;
507  }
508 
509  hdr = ipv6_hdr(skb);
510 
511  /* Mangling hops number delayed to point after skb COW */
512 
513  hdr->hop_limit--;
514 
515  IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
516  IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
517  return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
518  ip6_forward_finish);
519 
520 error:
521  IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
522 drop:
523  kfree_skb(skb);
524  return -EINVAL;
525 }
526 
527 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
528 {
529  to->pkt_type = from->pkt_type;
530  to->priority = from->priority;
531  to->protocol = from->protocol;
532  skb_dst_drop(to);
533  skb_dst_set(to, dst_clone(skb_dst(from)));
534  to->dev = from->dev;
535  to->mark = from->mark;
536 
537 #ifdef CONFIG_NET_SCHED
538  to->tc_index = from->tc_index;
539 #endif
540  nf_copy(to, from);
541 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
542  defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
543  to->nf_trace = from->nf_trace;
544 #endif
545  skb_copy_secmark(to, from);
546 }
547 
548 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
549 {
550  u16 offset = sizeof(struct ipv6hdr);
551  struct ipv6_opt_hdr *exthdr =
552  (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
553  unsigned int packet_len = skb->tail - skb->network_header;
554  int found_rhdr = 0;
555  *nexthdr = &ipv6_hdr(skb)->nexthdr;
556 
557  while (offset + 1 <= packet_len) {
558 
559  switch (**nexthdr) {
560 
561  case NEXTHDR_HOP:
562  break;
563  case NEXTHDR_ROUTING:
564  found_rhdr = 1;
565  break;
566  case NEXTHDR_DEST:
567 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
568  if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
569  break;
570 #endif
571  if (found_rhdr)
572  return offset;
573  break;
574  default :
575  return offset;
576  }
577 
578  offset += ipv6_optlen(exthdr);
579  *nexthdr = &exthdr->nexthdr;
580  exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
581  offset);
582  }
583 
584  return offset;
585 }
586 
587 void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
588 {
589  static atomic_t ipv6_fragmentation_id;
590  int old, new;
591 
592  if (rt && !(rt->dst.flags & DST_NOPEER)) {
593  struct inet_peer *peer;
594  struct net *net;
595 
596  net = dev_net(rt->dst.dev);
597  peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
598  if (peer) {
599  fhdr->identification = htonl(inet_getid(peer, 0));
600  inet_putpeer(peer);
601  return;
602  }
603  }
604  do {
605  old = atomic_read(&ipv6_fragmentation_id);
606  new = old + 1;
607  if (!new)
608  new = 1;
609  } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
610  fhdr->identification = htonl(new);
611 }
612 
613 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
614 {
615  struct sk_buff *frag;
616  struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
617  struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
618  struct ipv6hdr *tmp_hdr;
619  struct frag_hdr *fh;
620  unsigned int mtu, hlen, left, len;
621  int hroom, troom;
622  __be32 frag_id = 0;
623  int ptr, offset = 0, err=0;
624  u8 *prevhdr, nexthdr = 0;
625  struct net *net = dev_net(skb_dst(skb)->dev);
626 
627  hlen = ip6_find_1stfragopt(skb, &prevhdr);
628  nexthdr = *prevhdr;
629 
630  mtu = ip6_skb_dst_mtu(skb);
631 
632  /* We must not fragment if the socket is set to force MTU discovery
633  * or if the skb it not generated by a local socket.
634  */
635  if (unlikely(!skb->local_df && skb->len > mtu) ||
636  (IP6CB(skb)->frag_max_size &&
637  IP6CB(skb)->frag_max_size > mtu)) {
638  if (skb->sk && dst_allfrag(skb_dst(skb)))
639  sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
640 
641  skb->dev = skb_dst(skb)->dev;
642  icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
643  IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
645  kfree_skb(skb);
646  return -EMSGSIZE;
647  }
648 
649  if (np && np->frag_size < mtu) {
650  if (np->frag_size)
651  mtu = np->frag_size;
652  }
653  mtu -= hlen + sizeof(struct frag_hdr);
654 
655  if (skb_has_frag_list(skb)) {
656  int first_len = skb_pagelen(skb);
657  struct sk_buff *frag2;
658 
659  if (first_len - hlen > mtu ||
660  ((first_len - hlen) & 7) ||
661  skb_cloned(skb))
662  goto slow_path;
663 
664  skb_walk_frags(skb, frag) {
665  /* Correct geometry. */
666  if (frag->len > mtu ||
667  ((frag->len & 7) && frag->next) ||
668  skb_headroom(frag) < hlen)
669  goto slow_path_clean;
670 
671  /* Partially cloned skb? */
672  if (skb_shared(frag))
673  goto slow_path_clean;
674 
675  BUG_ON(frag->sk);
676  if (skb->sk) {
677  frag->sk = skb->sk;
678  frag->destructor = sock_wfree;
679  }
680  skb->truesize -= frag->truesize;
681  }
682 
683  err = 0;
684  offset = 0;
685  frag = skb_shinfo(skb)->frag_list;
686  skb_frag_list_init(skb);
687  /* BUILD HEADER */
688 
689  *prevhdr = NEXTHDR_FRAGMENT;
690  tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
691  if (!tmp_hdr) {
692  IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
694  return -ENOMEM;
695  }
696 
697  __skb_pull(skb, hlen);
698  fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
699  __skb_push(skb, hlen);
700  skb_reset_network_header(skb);
701  memcpy(skb_network_header(skb), tmp_hdr, hlen);
702 
703  ipv6_select_ident(fh, rt);
704  fh->nexthdr = nexthdr;
705  fh->reserved = 0;
706  fh->frag_off = htons(IP6_MF);
707  frag_id = fh->identification;
708 
709  first_len = skb_pagelen(skb);
710  skb->data_len = first_len - skb_headlen(skb);
711  skb->len = first_len;
712  ipv6_hdr(skb)->payload_len = htons(first_len -
713  sizeof(struct ipv6hdr));
714 
715  dst_hold(&rt->dst);
716 
717  for (;;) {
718  /* Prepare header of the next frame,
719  * before previous one went down. */
720  if (frag) {
721  frag->ip_summed = CHECKSUM_NONE;
722  skb_reset_transport_header(frag);
723  fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
724  __skb_push(frag, hlen);
725  skb_reset_network_header(frag);
726  memcpy(skb_network_header(frag), tmp_hdr,
727  hlen);
728  offset += skb->len - hlen - sizeof(struct frag_hdr);
729  fh->nexthdr = nexthdr;
730  fh->reserved = 0;
731  fh->frag_off = htons(offset);
732  if (frag->next != NULL)
733  fh->frag_off |= htons(IP6_MF);
734  fh->identification = frag_id;
735  ipv6_hdr(frag)->payload_len =
736  htons(frag->len -
737  sizeof(struct ipv6hdr));
738  ip6_copy_metadata(frag, skb);
739  }
740 
741  err = output(skb);
742  if(!err)
743  IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
745 
746  if (err || !frag)
747  break;
748 
749  skb = frag;
750  frag = skb->next;
751  skb->next = NULL;
752  }
753 
754  kfree(tmp_hdr);
755 
756  if (err == 0) {
757  IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
759  dst_release(&rt->dst);
760  return 0;
761  }
762 
763  while (frag) {
764  skb = frag->next;
765  kfree_skb(frag);
766  frag = skb;
767  }
768 
769  IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
771  dst_release(&rt->dst);
772  return err;
773 
774 slow_path_clean:
775  skb_walk_frags(skb, frag2) {
776  if (frag2 == frag)
777  break;
778  frag2->sk = NULL;
779  frag2->destructor = NULL;
780  skb->truesize += frag2->truesize;
781  }
782  }
783 
784 slow_path:
785  if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
786  skb_checksum_help(skb))
787  goto fail;
788 
789  left = skb->len - hlen; /* Space per frame */
790  ptr = hlen; /* Where to start from */
791 
792  /*
793  * Fragment the datagram.
794  */
795 
796  *prevhdr = NEXTHDR_FRAGMENT;
797  hroom = LL_RESERVED_SPACE(rt->dst.dev);
798  troom = rt->dst.dev->needed_tailroom;
799 
800  /*
801  * Keep copying data until we run out.
802  */
803  while(left > 0) {
804  len = left;
805  /* IF: it doesn't fit, use 'mtu' - the data space left */
806  if (len > mtu)
807  len = mtu;
808  /* IF: we are not sending up to and including the packet end
809  then align the next start on an eight byte boundary */
810  if (len < left) {
811  len &= ~7;
812  }
813  /*
814  * Allocate buffer.
815  */
816 
817  if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
818  hroom + troom, GFP_ATOMIC)) == NULL) {
819  NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
820  IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
822  err = -ENOMEM;
823  goto fail;
824  }
825 
826  /*
827  * Set up data on packet
828  */
829 
830  ip6_copy_metadata(frag, skb);
831  skb_reserve(frag, hroom);
832  skb_put(frag, len + hlen + sizeof(struct frag_hdr));
833  skb_reset_network_header(frag);
834  fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
835  frag->transport_header = (frag->network_header + hlen +
836  sizeof(struct frag_hdr));
837 
838  /*
839  * Charge the memory for the fragment to any owner
840  * it might possess
841  */
842  if (skb->sk)
843  skb_set_owner_w(frag, skb->sk);
844 
845  /*
846  * Copy the packet header into the new buffer.
847  */
848  skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
849 
850  /*
851  * Build fragment header.
852  */
853  fh->nexthdr = nexthdr;
854  fh->reserved = 0;
855  if (!frag_id) {
856  ipv6_select_ident(fh, rt);
857  frag_id = fh->identification;
858  } else
859  fh->identification = frag_id;
860 
861  /*
862  * Copy a block of the IP datagram.
863  */
864  if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
865  BUG();
866  left -= len;
867 
868  fh->frag_off = htons(offset);
869  if (left > 0)
870  fh->frag_off |= htons(IP6_MF);
871  ipv6_hdr(frag)->payload_len = htons(frag->len -
872  sizeof(struct ipv6hdr));
873 
874  ptr += len;
875  offset += len;
876 
877  /*
878  * Put this fragment into the sending queue.
879  */
880  err = output(frag);
881  if (err)
882  goto fail;
883 
884  IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
886  }
887  IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
889  consume_skb(skb);
890  return err;
891 
892 fail:
893  IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
895  kfree_skb(skb);
896  return err;
897 }
898 
899 static inline int ip6_rt_check(const struct rt6key *rt_key,
900  const struct in6_addr *fl_addr,
901  const struct in6_addr *addr_cache)
902 {
903  return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
904  (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
905 }
906 
907 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
908  struct dst_entry *dst,
909  const struct flowi6 *fl6)
910 {
911  struct ipv6_pinfo *np = inet6_sk(sk);
912  struct rt6_info *rt = (struct rt6_info *)dst;
913 
914  if (!dst)
915  goto out;
916 
917  /* Yes, checking route validity in not connected
918  * case is not very simple. Take into account,
919  * that we do not support routing by source, TOS,
920  * and MSG_DONTROUTE --ANK (980726)
921  *
922  * 1. ip6_rt_check(): If route was host route,
923  * check that cached destination is current.
924  * If it is network route, we still may
925  * check its validity using saved pointer
926  * to the last used address: daddr_cache.
927  * We do not want to save whole address now,
928  * (because main consumer of this service
929  * is tcp, which has not this problem),
930  * so that the last trick works only on connected
931  * sockets.
932  * 2. oif also should be the same.
933  */
934  if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
935 #ifdef CONFIG_IPV6_SUBTREES
936  ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
937 #endif
938  (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
939  dst_release(dst);
940  dst = NULL;
941  }
942 
943 out:
944  return dst;
945 }
946 
947 static int ip6_dst_lookup_tail(struct sock *sk,
948  struct dst_entry **dst, struct flowi6 *fl6)
949 {
950  struct net *net = sock_net(sk);
951 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
952  struct neighbour *n;
953  struct rt6_info *rt;
954 #endif
955  int err;
956 
957  if (*dst == NULL)
958  *dst = ip6_route_output(net, sk, fl6);
959 
960  if ((err = (*dst)->error))
961  goto out_err_release;
962 
963  if (ipv6_addr_any(&fl6->saddr)) {
964  struct rt6_info *rt = (struct rt6_info *) *dst;
965  err = ip6_route_get_saddr(net, rt, &fl6->daddr,
966  sk ? inet6_sk(sk)->srcprefs : 0,
967  &fl6->saddr);
968  if (err)
969  goto out_err_release;
970  }
971 
972 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
973  /*
974  * Here if the dst entry we've looked up
975  * has a neighbour entry that is in the INCOMPLETE
976  * state and the src address from the flow is
977  * marked as OPTIMISTIC, we release the found
978  * dst entry and replace it instead with the
979  * dst entry of the nexthop router
980  */
981  rt = (struct rt6_info *) *dst;
982  n = rt->n;
983  if (n && !(n->nud_state & NUD_VALID)) {
984  struct inet6_ifaddr *ifp;
985  struct flowi6 fl_gw6;
986  int redirect;
987 
988  ifp = ipv6_get_ifaddr(net, &fl6->saddr,
989  (*dst)->dev, 1);
990 
991  redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
992  if (ifp)
993  in6_ifa_put(ifp);
994 
995  if (redirect) {
996  /*
997  * We need to get the dst entry for the
998  * default router instead
999  */
1000  dst_release(*dst);
1001  memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1002  memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1003  *dst = ip6_route_output(net, sk, &fl_gw6);
1004  if ((err = (*dst)->error))
1005  goto out_err_release;
1006  }
1007  }
1008 #endif
1009 
1010  return 0;
1011 
1012 out_err_release:
1013  if (err == -ENETUNREACH)
1015  dst_release(*dst);
1016  *dst = NULL;
1017  return err;
1018 }
1019 
1030 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
1031 {
1032  *dst = NULL;
1033  return ip6_dst_lookup_tail(sk, dst, fl6);
1034 }
1036 
1049 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1050  const struct in6_addr *final_dst,
1051  bool can_sleep)
1052 {
1053  struct dst_entry *dst = NULL;
1054  int err;
1055 
1056  err = ip6_dst_lookup_tail(sk, &dst, fl6);
1057  if (err)
1058  return ERR_PTR(err);
1059  if (final_dst)
1060  fl6->daddr = *final_dst;
1061  if (can_sleep)
1062  fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1063 
1064  return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1065 }
1067 
1083 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1084  const struct in6_addr *final_dst,
1085  bool can_sleep)
1086 {
1087  struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1088  int err;
1089 
1090  dst = ip6_sk_dst_check(sk, dst, fl6);
1091 
1092  err = ip6_dst_lookup_tail(sk, &dst, fl6);
1093  if (err)
1094  return ERR_PTR(err);
1095  if (final_dst)
1096  fl6->daddr = *final_dst;
1097  if (can_sleep)
1098  fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1099 
1100  return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1101 }
1103 
1104 static inline int ip6_ufo_append_data(struct sock *sk,
1105  int getfrag(void *from, char *to, int offset, int len,
1106  int odd, struct sk_buff *skb),
1107  void *from, int length, int hh_len, int fragheaderlen,
1108  int transhdrlen, int mtu,unsigned int flags,
1109  struct rt6_info *rt)
1110 
1111 {
1112  struct sk_buff *skb;
1113  int err;
1114 
1115  /* There is support for UDP large send offload by network
1116  * device, so create one single skb packet containing complete
1117  * udp datagram
1118  */
1119  if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1120  skb = sock_alloc_send_skb(sk,
1121  hh_len + fragheaderlen + transhdrlen + 20,
1122  (flags & MSG_DONTWAIT), &err);
1123  if (skb == NULL)
1124  return err;
1125 
1126  /* reserve space for Hardware header */
1127  skb_reserve(skb, hh_len);
1128 
1129  /* create space for UDP/IP header */
1130  skb_put(skb,fragheaderlen + transhdrlen);
1131 
1132  /* initialize network header pointer */
1133  skb_reset_network_header(skb);
1134 
1135  /* initialize protocol header pointer */
1136  skb->transport_header = skb->network_header + fragheaderlen;
1137 
1138  skb->ip_summed = CHECKSUM_PARTIAL;
1139  skb->csum = 0;
1140  }
1141 
1142  err = skb_append_datato_frags(sk,skb, getfrag, from,
1143  (length - transhdrlen));
1144  if (!err) {
1145  struct frag_hdr fhdr;
1146 
1147  /* Specify the length of each IPv6 datagram fragment.
1148  * It has to be a multiple of 8.
1149  */
1150  skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1151  sizeof(struct frag_hdr)) & ~7;
1152  skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1153  ipv6_select_ident(&fhdr, rt);
1154  skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1155  __skb_queue_tail(&sk->sk_write_queue, skb);
1156 
1157  return 0;
1158  }
1159  /* There is not enough support do UPD LSO,
1160  * so follow normal path
1161  */
1162  kfree_skb(skb);
1163 
1164  return err;
1165 }
1166 
1167 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1168  gfp_t gfp)
1169 {
1170  return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1171 }
1172 
1173 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1174  gfp_t gfp)
1175 {
1176  return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1177 }
1178 
1179 static void ip6_append_data_mtu(int *mtu,
1180  int *maxfraglen,
1181  unsigned int fragheaderlen,
1182  struct sk_buff *skb,
1183  struct rt6_info *rt)
1184 {
1185  if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1186  if (skb == NULL) {
1187  /* first fragment, reserve header_len */
1188  *mtu = *mtu - rt->dst.header_len;
1189 
1190  } else {
1191  /*
1192  * this fragment is not first, the headers
1193  * space is regarded as data space.
1194  */
1195  *mtu = dst_mtu(rt->dst.path);
1196  }
1197  *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1198  + fragheaderlen - sizeof(struct frag_hdr);
1199  }
1200 }
1201 
1202 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1203  int offset, int len, int odd, struct sk_buff *skb),
1204  void *from, int length, int transhdrlen,
1205  int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1206  struct rt6_info *rt, unsigned int flags, int dontfrag)
1207 {
1208  struct inet_sock *inet = inet_sk(sk);
1209  struct ipv6_pinfo *np = inet6_sk(sk);
1210  struct inet_cork *cork;
1211  struct sk_buff *skb, *skb_prev = NULL;
1212  unsigned int maxfraglen, fragheaderlen;
1213  int exthdrlen;
1214  int dst_exthdrlen;
1215  int hh_len;
1216  int mtu;
1217  int copy;
1218  int err;
1219  int offset = 0;
1220  __u8 tx_flags = 0;
1221 
1222  if (flags&MSG_PROBE)
1223  return 0;
1224  cork = &inet->cork.base;
1225  if (skb_queue_empty(&sk->sk_write_queue)) {
1226  /*
1227  * setup for corking
1228  */
1229  if (opt) {
1230  if (WARN_ON(np->cork.opt))
1231  return -EINVAL;
1232 
1233  np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1234  if (unlikely(np->cork.opt == NULL))
1235  return -ENOBUFS;
1236 
1237  np->cork.opt->tot_len = opt->tot_len;
1238  np->cork.opt->opt_flen = opt->opt_flen;
1239  np->cork.opt->opt_nflen = opt->opt_nflen;
1240 
1241  np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1242  sk->sk_allocation);
1243  if (opt->dst0opt && !np->cork.opt->dst0opt)
1244  return -ENOBUFS;
1245 
1246  np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1247  sk->sk_allocation);
1248  if (opt->dst1opt && !np->cork.opt->dst1opt)
1249  return -ENOBUFS;
1250 
1251  np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1252  sk->sk_allocation);
1253  if (opt->hopopt && !np->cork.opt->hopopt)
1254  return -ENOBUFS;
1255 
1256  np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1257  sk->sk_allocation);
1258  if (opt->srcrt && !np->cork.opt->srcrt)
1259  return -ENOBUFS;
1260 
1261  /* need source address above miyazawa*/
1262  }
1263  dst_hold(&rt->dst);
1264  cork->dst = &rt->dst;
1265  inet->cork.fl.u.ip6 = *fl6;
1266  np->cork.hop_limit = hlimit;
1267  np->cork.tclass = tclass;
1268  if (rt->dst.flags & DST_XFRM_TUNNEL)
1269  mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1270  rt->dst.dev->mtu : dst_mtu(&rt->dst);
1271  else
1272  mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1273  rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1274  if (np->frag_size < mtu) {
1275  if (np->frag_size)
1276  mtu = np->frag_size;
1277  }
1278  cork->fragsize = mtu;
1279  if (dst_allfrag(rt->dst.path))
1280  cork->flags |= IPCORK_ALLFRAG;
1281  cork->length = 0;
1282  exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
1283  length += exthdrlen;
1284  transhdrlen += exthdrlen;
1285  dst_exthdrlen = rt->dst.header_len;
1286  } else {
1287  rt = (struct rt6_info *)cork->dst;
1288  fl6 = &inet->cork.fl.u.ip6;
1289  opt = np->cork.opt;
1290  transhdrlen = 0;
1291  exthdrlen = 0;
1292  dst_exthdrlen = 0;
1293  mtu = cork->fragsize;
1294  }
1295 
1296  hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1297 
1298  fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1299  (opt ? opt->opt_nflen : 0);
1300  maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1301 
1302  if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1303  if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1304  ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1305  return -EMSGSIZE;
1306  }
1307  }
1308 
1309  /* For UDP, check if TX timestamp is enabled */
1310  if (sk->sk_type == SOCK_DGRAM) {
1311  err = sock_tx_timestamp(sk, &tx_flags);
1312  if (err)
1313  goto error;
1314  }
1315 
1316  /*
1317  * Let's try using as much space as possible.
1318  * Use MTU if total length of the message fits into the MTU.
1319  * Otherwise, we need to reserve fragment header and
1320  * fragment alignment (= 8-15 octects, in total).
1321  *
1322  * Note that we may need to "move" the data from the tail of
1323  * of the buffer to the new fragment when we split
1324  * the message.
1325  *
1326  * FIXME: It may be fragmented into multiple chunks
1327  * at once if non-fragmentable extension headers
1328  * are too large.
1329  * --yoshfuji
1330  */
1331 
1332  cork->length += length;
1333  if (length > mtu) {
1334  int proto = sk->sk_protocol;
1335  if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1336  ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1337  return -EMSGSIZE;
1338  }
1339 
1340  if (proto == IPPROTO_UDP &&
1341  (rt->dst.dev->features & NETIF_F_UFO)) {
1342 
1343  err = ip6_ufo_append_data(sk, getfrag, from, length,
1344  hh_len, fragheaderlen,
1345  transhdrlen, mtu, flags, rt);
1346  if (err)
1347  goto error;
1348  return 0;
1349  }
1350  }
1351 
1352  if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1353  goto alloc_new_skb;
1354 
1355  while (length > 0) {
1356  /* Check if the remaining data fits into current packet. */
1357  copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1358  if (copy < length)
1359  copy = maxfraglen - skb->len;
1360 
1361  if (copy <= 0) {
1362  char *data;
1363  unsigned int datalen;
1364  unsigned int fraglen;
1365  unsigned int fraggap;
1366  unsigned int alloclen;
1367 alloc_new_skb:
1368  /* There's no room in the current skb */
1369  if (skb)
1370  fraggap = skb->len - maxfraglen;
1371  else
1372  fraggap = 0;
1373  /* update mtu and maxfraglen if necessary */
1374  if (skb == NULL || skb_prev == NULL)
1375  ip6_append_data_mtu(&mtu, &maxfraglen,
1376  fragheaderlen, skb, rt);
1377 
1378  skb_prev = skb;
1379 
1380  /*
1381  * If remaining data exceeds the mtu,
1382  * we know we need more fragment(s).
1383  */
1384  datalen = length + fraggap;
1385 
1386  if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1387  datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1388  if ((flags & MSG_MORE) &&
1389  !(rt->dst.dev->features&NETIF_F_SG))
1390  alloclen = mtu;
1391  else
1392  alloclen = datalen + fragheaderlen;
1393 
1394  alloclen += dst_exthdrlen;
1395 
1396  if (datalen != length + fraggap) {
1397  /*
1398  * this is not the last fragment, the trailer
1399  * space is regarded as data space.
1400  */
1401  datalen += rt->dst.trailer_len;
1402  }
1403 
1404  alloclen += rt->dst.trailer_len;
1405  fraglen = datalen + fragheaderlen;
1406 
1407  /*
1408  * We just reserve space for fragment header.
1409  * Note: this may be overallocation if the message
1410  * (without MSG_MORE) fits into the MTU.
1411  */
1412  alloclen += sizeof(struct frag_hdr);
1413 
1414  if (transhdrlen) {
1415  skb = sock_alloc_send_skb(sk,
1416  alloclen + hh_len,
1417  (flags & MSG_DONTWAIT), &err);
1418  } else {
1419  skb = NULL;
1420  if (atomic_read(&sk->sk_wmem_alloc) <=
1421  2 * sk->sk_sndbuf)
1422  skb = sock_wmalloc(sk,
1423  alloclen + hh_len, 1,
1424  sk->sk_allocation);
1425  if (unlikely(skb == NULL))
1426  err = -ENOBUFS;
1427  else {
1428  /* Only the initial fragment
1429  * is time stamped.
1430  */
1431  tx_flags = 0;
1432  }
1433  }
1434  if (skb == NULL)
1435  goto error;
1436  /*
1437  * Fill in the control structures
1438  */
1439  skb->ip_summed = CHECKSUM_NONE;
1440  skb->csum = 0;
1441  /* reserve for fragmentation and ipsec header */
1442  skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1443  dst_exthdrlen);
1444 
1445  if (sk->sk_type == SOCK_DGRAM)
1446  skb_shinfo(skb)->tx_flags = tx_flags;
1447 
1448  /*
1449  * Find where to start putting bytes
1450  */
1451  data = skb_put(skb, fraglen);
1452  skb_set_network_header(skb, exthdrlen);
1453  data += fragheaderlen;
1454  skb->transport_header = (skb->network_header +
1455  fragheaderlen);
1456  if (fraggap) {
1458  skb_prev, maxfraglen,
1459  data + transhdrlen, fraggap, 0);
1460  skb_prev->csum = csum_sub(skb_prev->csum,
1461  skb->csum);
1462  data += fraggap;
1463  pskb_trim_unique(skb_prev, maxfraglen);
1464  }
1465  copy = datalen - transhdrlen - fraggap;
1466 
1467  if (copy < 0) {
1468  err = -EINVAL;
1469  kfree_skb(skb);
1470  goto error;
1471  } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1472  err = -EFAULT;
1473  kfree_skb(skb);
1474  goto error;
1475  }
1476 
1477  offset += copy;
1478  length -= datalen - fraggap;
1479  transhdrlen = 0;
1480  exthdrlen = 0;
1481  dst_exthdrlen = 0;
1482 
1483  /*
1484  * Put the packet on the pending queue
1485  */
1486  __skb_queue_tail(&sk->sk_write_queue, skb);
1487  continue;
1488  }
1489 
1490  if (copy > length)
1491  copy = length;
1492 
1493  if (!(rt->dst.dev->features&NETIF_F_SG)) {
1494  unsigned int off;
1495 
1496  off = skb->len;
1497  if (getfrag(from, skb_put(skb, copy),
1498  offset, copy, off, skb) < 0) {
1499  __skb_trim(skb, off);
1500  err = -EFAULT;
1501  goto error;
1502  }
1503  } else {
1504  int i = skb_shinfo(skb)->nr_frags;
1505  struct page_frag *pfrag = sk_page_frag(sk);
1506 
1507  err = -ENOMEM;
1508  if (!sk_page_frag_refill(sk, pfrag))
1509  goto error;
1510 
1511  if (!skb_can_coalesce(skb, i, pfrag->page,
1512  pfrag->offset)) {
1513  err = -EMSGSIZE;
1514  if (i == MAX_SKB_FRAGS)
1515  goto error;
1516 
1517  __skb_fill_page_desc(skb, i, pfrag->page,
1518  pfrag->offset, 0);
1519  skb_shinfo(skb)->nr_frags = ++i;
1520  get_page(pfrag->page);
1521  }
1522  copy = min_t(int, copy, pfrag->size - pfrag->offset);
1523  if (getfrag(from,
1524  page_address(pfrag->page) + pfrag->offset,
1525  offset, copy, skb->len, skb) < 0)
1526  goto error_efault;
1527 
1528  pfrag->offset += copy;
1529  skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1530  skb->len += copy;
1531  skb->data_len += copy;
1532  skb->truesize += copy;
1533  atomic_add(copy, &sk->sk_wmem_alloc);
1534  }
1535  offset += copy;
1536  length -= copy;
1537  }
1538 
1539  return 0;
1540 
1541 error_efault:
1542  err = -EFAULT;
1543 error:
1544  cork->length -= length;
1545  IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1546  return err;
1547 }
1549 
1550 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1551 {
1552  if (np->cork.opt) {
1553  kfree(np->cork.opt->dst0opt);
1554  kfree(np->cork.opt->dst1opt);
1555  kfree(np->cork.opt->hopopt);
1556  kfree(np->cork.opt->srcrt);
1557  kfree(np->cork.opt);
1558  np->cork.opt = NULL;
1559  }
1560 
1561  if (inet->cork.base.dst) {
1562  dst_release(inet->cork.base.dst);
1563  inet->cork.base.dst = NULL;
1564  inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1565  }
1566  memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1567 }
1568 
1570 {
1571  struct sk_buff *skb, *tmp_skb;
1572  struct sk_buff **tail_skb;
1573  struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1574  struct inet_sock *inet = inet_sk(sk);
1575  struct ipv6_pinfo *np = inet6_sk(sk);
1576  struct net *net = sock_net(sk);
1577  struct ipv6hdr *hdr;
1578  struct ipv6_txoptions *opt = np->cork.opt;
1579  struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1580  struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1581  unsigned char proto = fl6->flowi6_proto;
1582  int err = 0;
1583 
1584  if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1585  goto out;
1586  tail_skb = &(skb_shinfo(skb)->frag_list);
1587 
1588  /* move skb->data to ip header from ext header */
1589  if (skb->data < skb_network_header(skb))
1590  __skb_pull(skb, skb_network_offset(skb));
1591  while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1592  __skb_pull(tmp_skb, skb_network_header_len(skb));
1593  *tail_skb = tmp_skb;
1594  tail_skb = &(tmp_skb->next);
1595  skb->len += tmp_skb->len;
1596  skb->data_len += tmp_skb->len;
1597  skb->truesize += tmp_skb->truesize;
1598  tmp_skb->destructor = NULL;
1599  tmp_skb->sk = NULL;
1600  }
1601 
1602  /* Allow local fragmentation. */
1603  if (np->pmtudisc < IPV6_PMTUDISC_DO)
1604  skb->local_df = 1;
1605 
1606  *final_dst = fl6->daddr;
1607  __skb_pull(skb, skb_network_header_len(skb));
1608  if (opt && opt->opt_flen)
1609  ipv6_push_frag_opts(skb, opt, &proto);
1610  if (opt && opt->opt_nflen)
1611  ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1612 
1613  skb_push(skb, sizeof(struct ipv6hdr));
1614  skb_reset_network_header(skb);
1615  hdr = ipv6_hdr(skb);
1616 
1617  *(__be32*)hdr = fl6->flowlabel |
1618  htonl(0x60000000 | ((int)np->cork.tclass << 20));
1619 
1620  hdr->hop_limit = np->cork.hop_limit;
1621  hdr->nexthdr = proto;
1622  hdr->saddr = fl6->saddr;
1623  hdr->daddr = *final_dst;
1624 
1625  skb->priority = sk->sk_priority;
1626  skb->mark = sk->sk_mark;
1627 
1628  skb_dst_set(skb, dst_clone(&rt->dst));
1629  IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1630  if (proto == IPPROTO_ICMPV6) {
1631  struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1632 
1633  ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1635  }
1636 
1637  err = ip6_local_out(skb);
1638  if (err) {
1639  if (err > 0)
1640  err = net_xmit_errno(err);
1641  if (err)
1642  goto error;
1643  }
1644 
1645 out:
1646  ip6_cork_release(inet, np);
1647  return err;
1648 error:
1650  goto out;
1651 }
1653 
1655 {
1656  struct sk_buff *skb;
1657 
1658  while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1659  if (skb_dst(skb))
1660  IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1662  kfree_skb(skb);
1663  }
1664 
1665  ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1666 }