Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
ip6_gre.c
Go to the documentation of this file.
1 /*
2  * GRE over IPv6 protocol decoder.
3  *
4  * Authors: Dmitry Kozlov ([email protected])
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <linux/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/mroute.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
35 #include <linux/hash.h>
36 #include <linux/if_tunnel.h>
37 #include <linux/ip6_tunnel.h>
38 
39 #include <net/sock.h>
40 #include <net/ip.h>
41 #include <net/icmp.h>
42 #include <net/protocol.h>
43 #include <net/addrconf.h>
44 #include <net/arp.h>
45 #include <net/checksum.h>
46 #include <net/dsfield.h>
47 #include <net/inet_ecn.h>
48 #include <net/xfrm.h>
49 #include <net/net_namespace.h>
50 #include <net/netns/generic.h>
51 #include <net/rtnetlink.h>
52 
53 #include <net/ipv6.h>
54 #include <net/ip6_fib.h>
55 #include <net/ip6_route.h>
56 #include <net/ip6_tunnel.h>
57 
58 
59 static bool log_ecn_error = true;
60 module_param(log_ecn_error, bool, 0644);
61 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
62 
63 #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
64 #define IPV6_TCLASS_SHIFT 20
65 
66 #define HASH_SIZE_SHIFT 5
67 #define HASH_SIZE (1 << HASH_SIZE_SHIFT)
68 
69 static int ip6gre_net_id __read_mostly;
70 struct ip6gre_net {
72 
74 };
75 
76 static struct rtnl_link_ops ip6gre_link_ops __read_mostly;
77 static int ip6gre_tunnel_init(struct net_device *dev);
78 static void ip6gre_tunnel_setup(struct net_device *dev);
79 static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
80 static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
81 
82 /* Tunnel hash table */
83 
84 /*
85  4 hash tables:
86 
87  3: (remote,local)
88  2: (remote,*)
89  1: (*,local)
90  0: (*,*)
91 
92  We require exact key match i.e. if a key is present in packet
93  it will match only tunnel with the same key; if it is not present,
94  it will match only keyless tunnel.
95 
96  All keysless packets, if not matched configured keyless tunnels
97  will match fallback tunnel.
98  */
99 
100 #define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1))
101 static u32 HASH_ADDR(const struct in6_addr *addr)
102 {
103  u32 hash = ipv6_addr_hash(addr);
104 
105  return hash_32(hash, HASH_SIZE_SHIFT);
106 }
107 
108 #define tunnels_r_l tunnels[3]
109 #define tunnels_r tunnels[2]
110 #define tunnels_l tunnels[1]
111 #define tunnels_wc tunnels[0]
112 /*
113  * Locking : hash tables are protected by RCU and RTNL
114  */
115 
116 #define for_each_ip_tunnel_rcu(start) \
117  for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
118 
119 /* often modified stats are per cpu, other are shared (netdev->stats) */
120 struct pcpu_tstats {
121  u64 rx_packets;
122  u64 rx_bytes;
123  u64 tx_packets;
124  u64 tx_bytes;
125  struct u64_stats_sync syncp;
126 };
127 
128 static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev,
129  struct rtnl_link_stats64 *tot)
130 {
131  int i;
132 
134  const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
136  unsigned int start;
137 
138  do {
139  start = u64_stats_fetch_begin_bh(&tstats->syncp);
140  rx_packets = tstats->rx_packets;
141  tx_packets = tstats->tx_packets;
142  rx_bytes = tstats->rx_bytes;
143  tx_bytes = tstats->tx_bytes;
144  } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
145 
146  tot->rx_packets += rx_packets;
147  tot->tx_packets += tx_packets;
148  tot->rx_bytes += rx_bytes;
149  tot->tx_bytes += tx_bytes;
150  }
151 
152  tot->multicast = dev->stats.multicast;
153  tot->rx_crc_errors = dev->stats.rx_crc_errors;
154  tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
155  tot->rx_length_errors = dev->stats.rx_length_errors;
156  tot->rx_frame_errors = dev->stats.rx_frame_errors;
157  tot->rx_errors = dev->stats.rx_errors;
158 
159  tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
160  tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
161  tot->tx_dropped = dev->stats.tx_dropped;
162  tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
163  tot->tx_errors = dev->stats.tx_errors;
164 
165  return tot;
166 }
167 
168 /* Given src, dst and key, find appropriate for input tunnel. */
169 
170 static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
171  const struct in6_addr *remote, const struct in6_addr *local,
172  __be32 key, __be16 gre_proto)
173 {
174  struct net *net = dev_net(dev);
175  int link = dev->ifindex;
176  unsigned int h0 = HASH_ADDR(remote);
177  unsigned int h1 = HASH_KEY(key);
178  struct ip6_tnl *t, *cand = NULL;
179  struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
180  int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
182  int score, cand_score = 4;
183 
184  for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
185  if (!ipv6_addr_equal(local, &t->parms.laddr) ||
186  !ipv6_addr_equal(remote, &t->parms.raddr) ||
187  key != t->parms.i_key ||
188  !(t->dev->flags & IFF_UP))
189  continue;
190 
191  if (t->dev->type != ARPHRD_IP6GRE &&
192  t->dev->type != dev_type)
193  continue;
194 
195  score = 0;
196  if (t->parms.link != link)
197  score |= 1;
198  if (t->dev->type != dev_type)
199  score |= 2;
200  if (score == 0)
201  return t;
202 
203  if (score < cand_score) {
204  cand = t;
205  cand_score = score;
206  }
207  }
208 
209  for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
210  if (!ipv6_addr_equal(remote, &t->parms.raddr) ||
211  key != t->parms.i_key ||
212  !(t->dev->flags & IFF_UP))
213  continue;
214 
215  if (t->dev->type != ARPHRD_IP6GRE &&
216  t->dev->type != dev_type)
217  continue;
218 
219  score = 0;
220  if (t->parms.link != link)
221  score |= 1;
222  if (t->dev->type != dev_type)
223  score |= 2;
224  if (score == 0)
225  return t;
226 
227  if (score < cand_score) {
228  cand = t;
229  cand_score = score;
230  }
231  }
232 
233  for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
234  if ((!ipv6_addr_equal(local, &t->parms.laddr) &&
235  (!ipv6_addr_equal(local, &t->parms.raddr) ||
236  !ipv6_addr_is_multicast(local))) ||
237  key != t->parms.i_key ||
238  !(t->dev->flags & IFF_UP))
239  continue;
240 
241  if (t->dev->type != ARPHRD_IP6GRE &&
242  t->dev->type != dev_type)
243  continue;
244 
245  score = 0;
246  if (t->parms.link != link)
247  score |= 1;
248  if (t->dev->type != dev_type)
249  score |= 2;
250  if (score == 0)
251  return t;
252 
253  if (score < cand_score) {
254  cand = t;
255  cand_score = score;
256  }
257  }
258 
259  for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
260  if (t->parms.i_key != key ||
261  !(t->dev->flags & IFF_UP))
262  continue;
263 
264  if (t->dev->type != ARPHRD_IP6GRE &&
265  t->dev->type != dev_type)
266  continue;
267 
268  score = 0;
269  if (t->parms.link != link)
270  score |= 1;
271  if (t->dev->type != dev_type)
272  score |= 2;
273  if (score == 0)
274  return t;
275 
276  if (score < cand_score) {
277  cand = t;
278  cand_score = score;
279  }
280  }
281 
282  if (cand != NULL)
283  return cand;
284 
285  dev = ign->fb_tunnel_dev;
286  if (dev->flags & IFF_UP)
287  return netdev_priv(dev);
288 
289  return NULL;
290 }
291 
292 static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign,
293  const struct __ip6_tnl_parm *p)
294 {
295  const struct in6_addr *remote = &p->raddr;
296  const struct in6_addr *local = &p->laddr;
297  unsigned int h = HASH_KEY(p->i_key);
298  int prio = 0;
299 
300  if (!ipv6_addr_any(local))
301  prio |= 1;
302  if (!ipv6_addr_any(remote) && !ipv6_addr_is_multicast(remote)) {
303  prio |= 2;
304  h ^= HASH_ADDR(remote);
305  }
306 
307  return &ign->tunnels[prio][h];
308 }
309 
310 static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign,
311  const struct ip6_tnl *t)
312 {
313  return __ip6gre_bucket(ign, &t->parms);
314 }
315 
316 static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t)
317 {
318  struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
319 
321  rcu_assign_pointer(*tp, t);
322 }
323 
324 static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t)
325 {
326  struct ip6_tnl __rcu **tp;
327  struct ip6_tnl *iter;
328 
329  for (tp = ip6gre_bucket(ign, t);
330  (iter = rtnl_dereference(*tp)) != NULL;
331  tp = &iter->next) {
332  if (t == iter) {
333  rcu_assign_pointer(*tp, t->next);
334  break;
335  }
336  }
337 }
338 
339 static struct ip6_tnl *ip6gre_tunnel_find(struct net *net,
340  const struct __ip6_tnl_parm *parms,
341  int type)
342 {
343  const struct in6_addr *remote = &parms->raddr;
344  const struct in6_addr *local = &parms->laddr;
345  __be32 key = parms->i_key;
346  int link = parms->link;
347  struct ip6_tnl *t;
348  struct ip6_tnl __rcu **tp;
349  struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
350 
351  for (tp = __ip6gre_bucket(ign, parms);
352  (t = rtnl_dereference(*tp)) != NULL;
353  tp = &t->next)
354  if (ipv6_addr_equal(local, &t->parms.laddr) &&
355  ipv6_addr_equal(remote, &t->parms.raddr) &&
356  key == t->parms.i_key &&
357  link == t->parms.link &&
358  type == t->dev->type)
359  break;
360 
361  return t;
362 }
363 
364 static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
365  const struct __ip6_tnl_parm *parms, int create)
366 {
367  struct ip6_tnl *t, *nt;
368  struct net_device *dev;
369  char name[IFNAMSIZ];
370  struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
371 
372  t = ip6gre_tunnel_find(net, parms, ARPHRD_IP6GRE);
373  if (t || !create)
374  return t;
375 
376  if (parms->name[0])
377  strlcpy(name, parms->name, IFNAMSIZ);
378  else
379  strcpy(name, "ip6gre%d");
380 
381  dev = alloc_netdev(sizeof(*t), name, ip6gre_tunnel_setup);
382  if (!dev)
383  return NULL;
384 
385  dev_net_set(dev, net);
386 
387  nt = netdev_priv(dev);
388  nt->parms = *parms;
389  dev->rtnl_link_ops = &ip6gre_link_ops;
390 
391  nt->dev = dev;
392  ip6gre_tnl_link_config(nt, 1);
393 
394  if (register_netdevice(dev) < 0)
395  goto failed_free;
396 
397  /* Can use a lockless transmit, unless we generate output sequences */
398  if (!(nt->parms.o_flags & GRE_SEQ))
399  dev->features |= NETIF_F_LLTX;
400 
401  dev_hold(dev);
402  ip6gre_tunnel_link(ign, nt);
403  return nt;
404 
405 failed_free:
406  free_netdev(dev);
407  return NULL;
408 }
409 
410 static void ip6gre_tunnel_uninit(struct net_device *dev)
411 {
412  struct net *net = dev_net(dev);
413  struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
414 
415  ip6gre_tunnel_unlink(ign, netdev_priv(dev));
416  dev_put(dev);
417 }
418 
419 
420 static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
421  u8 type, u8 code, int offset, __be32 info)
422 {
423  const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
424  __be16 *p = (__be16 *)(skb->data + offset);
425  int grehlen = offset + 4;
426  struct ip6_tnl *t;
427  __be16 flags;
428 
429  flags = p[0];
431  if (flags&(GRE_VERSION|GRE_ROUTING))
432  return;
433  if (flags&GRE_KEY) {
434  grehlen += 4;
435  if (flags&GRE_CSUM)
436  grehlen += 4;
437  }
438  }
439 
440  /* If only 8 bytes returned, keyed message will be dropped here */
441  if (!pskb_may_pull(skb, grehlen))
442  return;
443  ipv6h = (const struct ipv6hdr *)skb->data;
444  p = (__be16 *)(skb->data + offset);
445 
446  t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr,
447  flags & GRE_KEY ?
448  *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
449  p[1]);
450  if (t == NULL)
451  return;
452 
453  switch (type) {
454  __u32 teli;
455  struct ipv6_tlv_tnl_enc_lim *tel;
456  __u32 mtu;
457  case ICMPV6_DEST_UNREACH:
458  net_warn_ratelimited("%s: Path to destination invalid or inactive!\n",
459  t->parms.name);
460  break;
461  case ICMPV6_TIME_EXCEED:
462  if (code == ICMPV6_EXC_HOPLIMIT) {
463  net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
464  t->parms.name);
465  }
466  break;
467  case ICMPV6_PARAMPROB:
468  teli = 0;
469  if (code == ICMPV6_HDR_FIELD)
470  teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
471 
472  if (teli && teli == info - 2) {
473  tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
474  if (tel->encap_limit == 0) {
475  net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
476  t->parms.name);
477  }
478  } else {
479  net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
480  t->parms.name);
481  }
482  break;
483  case ICMPV6_PKT_TOOBIG:
484  mtu = info - offset;
485  if (mtu < IPV6_MIN_MTU)
486  mtu = IPV6_MIN_MTU;
487  t->dev->mtu = mtu;
488  break;
489  }
490 
491  if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
492  t->err_count++;
493  else
494  t->err_count = 1;
495  t->err_time = jiffies;
496 }
497 
498 static int ip6gre_rcv(struct sk_buff *skb)
499 {
500  const struct ipv6hdr *ipv6h;
501  u8 *h;
502  __be16 flags;
503  __sum16 csum = 0;
504  __be32 key = 0;
505  u32 seqno = 0;
506  struct ip6_tnl *tunnel;
507  int offset = 4;
508  __be16 gre_proto;
509  int err;
510 
511  if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
512  goto drop;
513 
514  ipv6h = ipv6_hdr(skb);
515  h = skb->data;
516  flags = *(__be16 *)h;
517 
518  if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
519  /* - Version must be 0.
520  - We do not support routing headers.
521  */
522  if (flags&(GRE_VERSION|GRE_ROUTING))
523  goto drop;
524 
525  if (flags&GRE_CSUM) {
526  switch (skb->ip_summed) {
527  case CHECKSUM_COMPLETE:
528  csum = csum_fold(skb->csum);
529  if (!csum)
530  break;
531  /* fall through */
532  case CHECKSUM_NONE:
533  skb->csum = 0;
534  csum = __skb_checksum_complete(skb);
536  }
537  offset += 4;
538  }
539  if (flags&GRE_KEY) {
540  key = *(__be32 *)(h + offset);
541  offset += 4;
542  }
543  if (flags&GRE_SEQ) {
544  seqno = ntohl(*(__be32 *)(h + offset));
545  offset += 4;
546  }
547  }
548 
549  gre_proto = *(__be16 *)(h + 2);
550 
551  tunnel = ip6gre_tunnel_lookup(skb->dev,
552  &ipv6h->saddr, &ipv6h->daddr, key,
553  gre_proto);
554  if (tunnel) {
555  struct pcpu_tstats *tstats;
556 
557  if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
558  goto drop;
559 
560  if (!ip6_tnl_rcv_ctl(tunnel, &ipv6h->daddr, &ipv6h->saddr)) {
561  tunnel->dev->stats.rx_dropped++;
562  goto drop;
563  }
564 
565  secpath_reset(skb);
566 
567  skb->protocol = gre_proto;
568  /* WCCP version 1 and 2 protocol decoding.
569  * - Change protocol to IP
570  * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
571  */
572  if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
573  skb->protocol = htons(ETH_P_IP);
574  if ((*(h + offset) & 0xF0) != 0x40)
575  offset += 4;
576  }
577 
578  skb->mac_header = skb->network_header;
579  __pskb_pull(skb, offset);
580  skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
581  skb->pkt_type = PACKET_HOST;
582 
583  if (((flags&GRE_CSUM) && csum) ||
584  (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
585  tunnel->dev->stats.rx_crc_errors++;
586  tunnel->dev->stats.rx_errors++;
587  goto drop;
588  }
589  if (tunnel->parms.i_flags&GRE_SEQ) {
590  if (!(flags&GRE_SEQ) ||
591  (tunnel->i_seqno &&
592  (s32)(seqno - tunnel->i_seqno) < 0)) {
593  tunnel->dev->stats.rx_fifo_errors++;
594  tunnel->dev->stats.rx_errors++;
595  goto drop;
596  }
597  tunnel->i_seqno = seqno + 1;
598  }
599 
600  /* Warning: All skb pointers will be invalidated! */
601  if (tunnel->dev->type == ARPHRD_ETHER) {
602  if (!pskb_may_pull(skb, ETH_HLEN)) {
603  tunnel->dev->stats.rx_length_errors++;
604  tunnel->dev->stats.rx_errors++;
605  goto drop;
606  }
607 
608  ipv6h = ipv6_hdr(skb);
609  skb->protocol = eth_type_trans(skb, tunnel->dev);
610  skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
611  }
612 
613  __skb_tunnel_rx(skb, tunnel->dev);
614 
615  skb_reset_network_header(skb);
616 
617  err = IP6_ECN_decapsulate(ipv6h, skb);
618  if (unlikely(err)) {
619  if (log_ecn_error)
620  net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n",
621  &ipv6h->saddr,
622  ipv6_get_dsfield(ipv6h));
623  if (err > 1) {
624  ++tunnel->dev->stats.rx_frame_errors;
625  ++tunnel->dev->stats.rx_errors;
626  goto drop;
627  }
628  }
629 
630  tstats = this_cpu_ptr(tunnel->dev->tstats);
631  u64_stats_update_begin(&tstats->syncp);
632  tstats->rx_packets++;
633  tstats->rx_bytes += skb->len;
634  u64_stats_update_end(&tstats->syncp);
635 
636  netif_rx(skb);
637 
638  return 0;
639  }
641 
642 drop:
643  kfree_skb(skb);
644  return 0;
645 }
646 
650 };
651 
652 static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
653 {
654  memset(opt, 0, sizeof(struct ipv6_tel_txoption));
655 
657  opt->dst_opt[3] = 1;
658  opt->dst_opt[4] = encap_limit;
659  opt->dst_opt[5] = IPV6_TLV_PADN;
660  opt->dst_opt[6] = 1;
661 
662  opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
663  opt->ops.opt_nflen = 8;
664 }
665 
666 static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
667  struct net_device *dev,
668  __u8 dsfield,
669  struct flowi6 *fl6,
670  int encap_limit,
671  __u32 *pmtu)
672 {
673  struct net *net = dev_net(dev);
674  struct ip6_tnl *tunnel = netdev_priv(dev);
675  struct net_device *tdev; /* Device to other host */
676  struct ipv6hdr *ipv6h; /* Our new IP header */
677  unsigned int max_headroom; /* The extra header space needed */
678  int gre_hlen;
679  struct ipv6_tel_txoption opt;
680  int mtu;
681  struct dst_entry *dst = NULL, *ndst = NULL;
682  struct net_device_stats *stats = &tunnel->dev->stats;
683  int err = -1;
684  u8 proto;
685  int pkt_len;
686  struct sk_buff *new_skb;
687 
688  if (dev->type == ARPHRD_ETHER)
689  IPCB(skb)->flags = 0;
690 
691  if (dev->header_ops && dev->type == ARPHRD_IP6GRE) {
692  gre_hlen = 0;
693  ipv6h = (struct ipv6hdr *)skb->data;
694  fl6->daddr = ipv6h->daddr;
695  } else {
696  gre_hlen = tunnel->hlen;
697  fl6->daddr = tunnel->parms.raddr;
698  }
699 
700  if (!fl6->flowi6_mark)
701  dst = ip6_tnl_dst_check(tunnel);
702 
703  if (!dst) {
704  ndst = ip6_route_output(net, NULL, fl6);
705 
706  if (ndst->error)
707  goto tx_err_link_failure;
708  ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0);
709  if (IS_ERR(ndst)) {
710  err = PTR_ERR(ndst);
711  ndst = NULL;
712  goto tx_err_link_failure;
713  }
714  dst = ndst;
715  }
716 
717  tdev = dst->dev;
718 
719  if (tdev == dev) {
720  stats->collisions++;
721  net_warn_ratelimited("%s: Local routing loop detected!\n",
722  tunnel->parms.name);
723  goto tx_err_dst_release;
724  }
725 
726  mtu = dst_mtu(dst) - sizeof(*ipv6h);
727  if (encap_limit >= 0) {
728  max_headroom += 8;
729  mtu -= 8;
730  }
731  if (mtu < IPV6_MIN_MTU)
732  mtu = IPV6_MIN_MTU;
733  if (skb_dst(skb))
734  skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
735  if (skb->len > mtu) {
736  *pmtu = mtu;
737  err = -EMSGSIZE;
738  goto tx_err_dst_release;
739  }
740 
741  if (tunnel->err_count > 0) {
742  if (time_before(jiffies,
743  tunnel->err_time + IP6TUNNEL_ERR_TIMEO)) {
744  tunnel->err_count--;
745 
746  dst_link_failure(skb);
747  } else
748  tunnel->err_count = 0;
749  }
750 
751  max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
752 
753  if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
754  (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
755  new_skb = skb_realloc_headroom(skb, max_headroom);
756  if (max_headroom > dev->needed_headroom)
757  dev->needed_headroom = max_headroom;
758  if (!new_skb)
759  goto tx_err_dst_release;
760 
761  if (skb->sk)
762  skb_set_owner_w(new_skb, skb->sk);
763  consume_skb(skb);
764  skb = new_skb;
765  }
766 
767  skb_dst_drop(skb);
768 
769  if (fl6->flowi6_mark) {
770  skb_dst_set(skb, dst);
771  ndst = NULL;
772  } else {
773  skb_dst_set_noref(skb, dst);
774  }
775 
776  skb->transport_header = skb->network_header;
777 
778  proto = NEXTHDR_GRE;
779  if (encap_limit >= 0) {
780  init_tel_txopt(&opt, encap_limit);
781  ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
782  }
783 
784  skb_push(skb, gre_hlen);
785  skb_reset_network_header(skb);
786 
787  /*
788  * Push down and install the IP header.
789  */
790  ipv6h = ipv6_hdr(skb);
791  *(__be32 *)ipv6h = fl6->flowlabel | htonl(0x60000000);
792  dsfield = INET_ECN_encapsulate(0, dsfield);
793  ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
794  ipv6h->hop_limit = tunnel->parms.hop_limit;
795  ipv6h->nexthdr = proto;
796  ipv6h->saddr = fl6->saddr;
797  ipv6h->daddr = fl6->daddr;
798 
799  ((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags;
800  ((__be16 *)(ipv6h + 1))[1] = (dev->type == ARPHRD_ETHER) ?
801  htons(ETH_P_TEB) : skb->protocol;
802 
803  if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
804  __be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4);
805 
806  if (tunnel->parms.o_flags&GRE_SEQ) {
807  ++tunnel->o_seqno;
808  *ptr = htonl(tunnel->o_seqno);
809  ptr--;
810  }
811  if (tunnel->parms.o_flags&GRE_KEY) {
812  *ptr = tunnel->parms.o_key;
813  ptr--;
814  }
815  if (tunnel->parms.o_flags&GRE_CSUM) {
816  *ptr = 0;
817  *(__sum16 *)ptr = ip_compute_csum((void *)(ipv6h+1),
818  skb->len - sizeof(struct ipv6hdr));
819  }
820  }
821 
822  nf_reset(skb);
823  pkt_len = skb->len;
824  err = ip6_local_out(skb);
825 
826  if (net_xmit_eval(err) == 0) {
827  struct pcpu_tstats *tstats = this_cpu_ptr(tunnel->dev->tstats);
828 
829  tstats->tx_bytes += pkt_len;
830  tstats->tx_packets++;
831  } else {
832  stats->tx_errors++;
833  stats->tx_aborted_errors++;
834  }
835 
836  if (ndst)
837  ip6_tnl_dst_store(tunnel, ndst);
838 
839  return 0;
840 tx_err_link_failure:
841  stats->tx_carrier_errors++;
842  dst_link_failure(skb);
843 tx_err_dst_release:
844  dst_release(ndst);
845  return err;
846 }
847 
848 static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
849 {
850  struct ip6_tnl *t = netdev_priv(dev);
851  const struct iphdr *iph = ip_hdr(skb);
852  int encap_limit = -1;
853  struct flowi6 fl6;
854  __u8 dsfield;
855  __u32 mtu;
856  int err;
857 
858  if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
859  encap_limit = t->parms.encap_limit;
860 
861  memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
862  fl6.flowi6_proto = IPPROTO_IPIP;
863 
864  dsfield = ipv4_get_dsfield(iph);
865 
866  if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
867  fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
869  if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
870  fl6.flowi6_mark = skb->mark;
871 
872  err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
873  if (err != 0) {
874  /* XXX: send ICMP error even if DF is not set. */
875  if (err == -EMSGSIZE)
877  htonl(mtu));
878  return -1;
879  }
880 
881  return 0;
882 }
883 
884 static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
885 {
886  struct ip6_tnl *t = netdev_priv(dev);
887  struct ipv6hdr *ipv6h = ipv6_hdr(skb);
888  int encap_limit = -1;
889  __u16 offset;
890  struct flowi6 fl6;
891  __u8 dsfield;
892  __u32 mtu;
893  int err;
894 
895  if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
896  return -1;
897 
898  offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
899  if (offset > 0) {
900  struct ipv6_tlv_tnl_enc_lim *tel;
901  tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
902  if (tel->encap_limit == 0) {
904  ICMPV6_HDR_FIELD, offset + 2);
905  return -1;
906  }
907  encap_limit = tel->encap_limit - 1;
908  } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
909  encap_limit = t->parms.encap_limit;
910 
911  memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
912  fl6.flowi6_proto = IPPROTO_IPV6;
913 
914  dsfield = ipv6_get_dsfield(ipv6h);
915  if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
916  fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
917  if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
918  fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
919  if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
920  fl6.flowi6_mark = skb->mark;
921 
922  err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
923  if (err != 0) {
924  if (err == -EMSGSIZE)
925  icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
926  return -1;
927  }
928 
929  return 0;
930 }
931 
946 static inline bool ip6gre_tnl_addr_conflict(const struct ip6_tnl *t,
947  const struct ipv6hdr *hdr)
948 {
949  return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
950 }
951 
952 static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
953 {
954  struct ip6_tnl *t = netdev_priv(dev);
955  int encap_limit = -1;
956  struct flowi6 fl6;
957  __u32 mtu;
958  int err;
959 
960  if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
961  encap_limit = t->parms.encap_limit;
962 
963  memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
964  fl6.flowi6_proto = skb->protocol;
965 
966  err = ip6gre_xmit2(skb, dev, 0, &fl6, encap_limit, &mtu);
967 
968  return err;
969 }
970 
971 static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb,
972  struct net_device *dev)
973 {
974  struct ip6_tnl *t = netdev_priv(dev);
975  struct net_device_stats *stats = &t->dev->stats;
976  int ret;
977 
978  if (!ip6_tnl_xmit_ctl(t))
979  return -1;
980 
981  switch (skb->protocol) {
982  case htons(ETH_P_IP):
983  ret = ip6gre_xmit_ipv4(skb, dev);
984  break;
985  case htons(ETH_P_IPV6):
986  ret = ip6gre_xmit_ipv6(skb, dev);
987  break;
988  default:
989  ret = ip6gre_xmit_other(skb, dev);
990  break;
991  }
992 
993  if (ret < 0)
994  goto tx_err;
995 
996  return NETDEV_TX_OK;
997 
998 tx_err:
999  stats->tx_errors++;
1000  stats->tx_dropped++;
1001  kfree_skb(skb);
1002  return NETDEV_TX_OK;
1003 }
1004 
1005 static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
1006 {
1007  struct net_device *dev = t->dev;
1008  struct __ip6_tnl_parm *p = &t->parms;
1009  struct flowi6 *fl6 = &t->fl.u.ip6;
1010  int addend = sizeof(struct ipv6hdr) + 4;
1011 
1012  if (dev->type != ARPHRD_ETHER) {
1013  memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
1014  memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
1015  }
1016 
1017  /* Set up flowi template */
1018  fl6->saddr = p->laddr;
1019  fl6->daddr = p->raddr;
1020  fl6->flowi6_oif = p->link;
1021  fl6->flowlabel = 0;
1022 
1023  if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
1024  fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
1026  fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
1027 
1029  p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
1030 
1031  if (p->flags&IP6_TNL_F_CAP_XMIT &&
1032  p->flags&IP6_TNL_F_CAP_RCV && dev->type != ARPHRD_ETHER)
1033  dev->flags |= IFF_POINTOPOINT;
1034  else
1035  dev->flags &= ~IFF_POINTOPOINT;
1036 
1037  dev->iflink = p->link;
1038 
1039  /* Precalculate GRE options length */
1040  if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1041  if (t->parms.o_flags&GRE_CSUM)
1042  addend += 4;
1043  if (t->parms.o_flags&GRE_KEY)
1044  addend += 4;
1045  if (t->parms.o_flags&GRE_SEQ)
1046  addend += 4;
1047  }
1048 
1049  if (p->flags & IP6_TNL_F_CAP_XMIT) {
1050  int strict = (ipv6_addr_type(&p->raddr) &
1052 
1053  struct rt6_info *rt = rt6_lookup(dev_net(dev),
1054  &p->raddr, &p->laddr,
1055  p->link, strict);
1056 
1057  if (rt == NULL)
1058  return;
1059 
1060  if (rt->dst.dev) {
1061  dev->hard_header_len = rt->dst.dev->hard_header_len + addend;
1062 
1063  if (set_mtu) {
1064  dev->mtu = rt->dst.dev->mtu - addend;
1065  if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1066  dev->mtu -= 8;
1067 
1068  if (dev->mtu < IPV6_MIN_MTU)
1069  dev->mtu = IPV6_MIN_MTU;
1070  }
1071  }
1072  dst_release(&rt->dst);
1073  }
1074 
1075  t->hlen = addend;
1076 }
1077 
1078 static int ip6gre_tnl_change(struct ip6_tnl *t,
1079  const struct __ip6_tnl_parm *p, int set_mtu)
1080 {
1081  t->parms.laddr = p->laddr;
1082  t->parms.raddr = p->raddr;
1083  t->parms.flags = p->flags;
1084  t->parms.hop_limit = p->hop_limit;
1085  t->parms.encap_limit = p->encap_limit;
1086  t->parms.flowinfo = p->flowinfo;
1087  t->parms.link = p->link;
1088  t->parms.proto = p->proto;
1089  t->parms.i_key = p->i_key;
1090  t->parms.o_key = p->o_key;
1091  t->parms.i_flags = p->i_flags;
1092  t->parms.o_flags = p->o_flags;
1093  ip6_tnl_dst_reset(t);
1094  ip6gre_tnl_link_config(t, set_mtu);
1095  return 0;
1096 }
1097 
1098 static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p,
1099  const struct ip6_tnl_parm2 *u)
1100 {
1101  p->laddr = u->laddr;
1102  p->raddr = u->raddr;
1103  p->flags = u->flags;
1104  p->hop_limit = u->hop_limit;
1105  p->encap_limit = u->encap_limit;
1106  p->flowinfo = u->flowinfo;
1107  p->link = u->link;
1108  p->i_key = u->i_key;
1109  p->o_key = u->o_key;
1110  p->i_flags = u->i_flags;
1111  p->o_flags = u->o_flags;
1112  memcpy(p->name, u->name, sizeof(u->name));
1113 }
1114 
1115 static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u,
1116  const struct __ip6_tnl_parm *p)
1117 {
1118  u->proto = IPPROTO_GRE;
1119  u->laddr = p->laddr;
1120  u->raddr = p->raddr;
1121  u->flags = p->flags;
1122  u->hop_limit = p->hop_limit;
1123  u->encap_limit = p->encap_limit;
1124  u->flowinfo = p->flowinfo;
1125  u->link = p->link;
1126  u->i_key = p->i_key;
1127  u->o_key = p->o_key;
1128  u->i_flags = p->i_flags;
1129  u->o_flags = p->o_flags;
1130  memcpy(u->name, p->name, sizeof(u->name));
1131 }
1132 
1133 static int ip6gre_tunnel_ioctl(struct net_device *dev,
1134  struct ifreq *ifr, int cmd)
1135 {
1136  int err = 0;
1137  struct ip6_tnl_parm2 p;
1138  struct __ip6_tnl_parm p1;
1139  struct ip6_tnl *t;
1140  struct net *net = dev_net(dev);
1141  struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
1142 
1143  switch (cmd) {
1144  case SIOCGETTUNNEL:
1145  t = NULL;
1146  if (dev == ign->fb_tunnel_dev) {
1147  if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1148  err = -EFAULT;
1149  break;
1150  }
1151  ip6gre_tnl_parm_from_user(&p1, &p);
1152  t = ip6gre_tunnel_locate(net, &p1, 0);
1153  }
1154  if (t == NULL)
1155  t = netdev_priv(dev);
1156  ip6gre_tnl_parm_to_user(&p, &t->parms);
1157  if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1158  err = -EFAULT;
1159  break;
1160 
1161  case SIOCADDTUNNEL:
1162  case SIOCCHGTUNNEL:
1163  err = -EPERM;
1164  if (!capable(CAP_NET_ADMIN))
1165  goto done;
1166 
1167  err = -EFAULT;
1168  if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1169  goto done;
1170 
1171  err = -EINVAL;
1172  if ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))
1173  goto done;
1174 
1175  if (!(p.i_flags&GRE_KEY))
1176  p.i_key = 0;
1177  if (!(p.o_flags&GRE_KEY))
1178  p.o_key = 0;
1179 
1180  ip6gre_tnl_parm_from_user(&p1, &p);
1181  t = ip6gre_tunnel_locate(net, &p1, cmd == SIOCADDTUNNEL);
1182 
1183  if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1184  if (t != NULL) {
1185  if (t->dev != dev) {
1186  err = -EEXIST;
1187  break;
1188  }
1189  } else {
1190  t = netdev_priv(dev);
1191 
1192  ip6gre_tunnel_unlink(ign, t);
1193  synchronize_net();
1194  ip6gre_tnl_change(t, &p1, 1);
1195  ip6gre_tunnel_link(ign, t);
1196  netdev_state_change(dev);
1197  }
1198  }
1199 
1200  if (t) {
1201  err = 0;
1202 
1203  ip6gre_tnl_parm_to_user(&p, &t->parms);
1204  if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1205  err = -EFAULT;
1206  } else
1207  err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1208  break;
1209 
1210  case SIOCDELTUNNEL:
1211  err = -EPERM;
1212  if (!capable(CAP_NET_ADMIN))
1213  goto done;
1214 
1215  if (dev == ign->fb_tunnel_dev) {
1216  err = -EFAULT;
1217  if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1218  goto done;
1219  err = -ENOENT;
1220  ip6gre_tnl_parm_from_user(&p1, &p);
1221  t = ip6gre_tunnel_locate(net, &p1, 0);
1222  if (t == NULL)
1223  goto done;
1224  err = -EPERM;
1225  if (t == netdev_priv(ign->fb_tunnel_dev))
1226  goto done;
1227  dev = t->dev;
1228  }
1229  unregister_netdevice(dev);
1230  err = 0;
1231  break;
1232 
1233  default:
1234  err = -EINVAL;
1235  }
1236 
1237 done:
1238  return err;
1239 }
1240 
1241 static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1242 {
1243  struct ip6_tnl *tunnel = netdev_priv(dev);
1244  if (new_mtu < 68 ||
1245  new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1246  return -EINVAL;
1247  dev->mtu = new_mtu;
1248  return 0;
1249 }
1250 
1251 static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
1252  unsigned short type,
1253  const void *daddr, const void *saddr, unsigned int len)
1254 {
1255  struct ip6_tnl *t = netdev_priv(dev);
1256  struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);
1257  __be16 *p = (__be16 *)(ipv6h+1);
1258 
1259  *(__be32 *)ipv6h = t->fl.u.ip6.flowlabel | htonl(0x60000000);
1260  ipv6h->hop_limit = t->parms.hop_limit;
1261  ipv6h->nexthdr = NEXTHDR_GRE;
1262  ipv6h->saddr = t->parms.laddr;
1263  ipv6h->daddr = t->parms.raddr;
1264 
1265  p[0] = t->parms.o_flags;
1266  p[1] = htons(type);
1267 
1268  /*
1269  * Set the source hardware address.
1270  */
1271 
1272  if (saddr)
1273  memcpy(&ipv6h->saddr, saddr, sizeof(struct in6_addr));
1274  if (daddr)
1275  memcpy(&ipv6h->daddr, daddr, sizeof(struct in6_addr));
1276  if (!ipv6_addr_any(&ipv6h->daddr))
1277  return t->hlen;
1278 
1279  return -t->hlen;
1280 }
1281 
1282 static const struct header_ops ip6gre_header_ops = {
1283  .create = ip6gre_header,
1284 };
1285 
1286 static const struct net_device_ops ip6gre_netdev_ops = {
1287  .ndo_init = ip6gre_tunnel_init,
1288  .ndo_uninit = ip6gre_tunnel_uninit,
1289  .ndo_start_xmit = ip6gre_tunnel_xmit,
1290  .ndo_do_ioctl = ip6gre_tunnel_ioctl,
1291  .ndo_change_mtu = ip6gre_tunnel_change_mtu,
1292  .ndo_get_stats64 = ip6gre_get_stats64,
1293 };
1294 
1295 static void ip6gre_dev_free(struct net_device *dev)
1296 {
1297  free_percpu(dev->tstats);
1298  free_netdev(dev);
1299 }
1300 
1301 static void ip6gre_tunnel_setup(struct net_device *dev)
1302 {
1303  struct ip6_tnl *t;
1304 
1305  dev->netdev_ops = &ip6gre_netdev_ops;
1306  dev->destructor = ip6gre_dev_free;
1307 
1308  dev->type = ARPHRD_IP6GRE;
1309  dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr) + 4;
1310  dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr) - 4;
1311  t = netdev_priv(dev);
1312  if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1313  dev->mtu -= 8;
1314  dev->flags |= IFF_NOARP;
1315  dev->iflink = 0;
1316  dev->addr_len = sizeof(struct in6_addr);
1317  dev->features |= NETIF_F_NETNS_LOCAL;
1319 }
1320 
1321 static int ip6gre_tunnel_init(struct net_device *dev)
1322 {
1323  struct ip6_tnl *tunnel;
1324 
1325  tunnel = netdev_priv(dev);
1326 
1327  tunnel->dev = dev;
1328  strcpy(tunnel->parms.name, dev->name);
1329 
1330  memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
1331  memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
1332 
1333  if (ipv6_addr_any(&tunnel->parms.raddr))
1334  dev->header_ops = &ip6gre_header_ops;
1335 
1336  dev->tstats = alloc_percpu(struct pcpu_tstats);
1337  if (!dev->tstats)
1338  return -ENOMEM;
1339 
1340  return 0;
1341 }
1342 
1343 static void ip6gre_fb_tunnel_init(struct net_device *dev)
1344 {
1345  struct ip6_tnl *tunnel = netdev_priv(dev);
1346 
1347  tunnel->dev = dev;
1348  strcpy(tunnel->parms.name, dev->name);
1349 
1350  tunnel->hlen = sizeof(struct ipv6hdr) + 4;
1351 
1352  dev_hold(dev);
1353 }
1354 
1355 
1356 static struct inet6_protocol ip6gre_protocol __read_mostly = {
1357  .handler = ip6gre_rcv,
1358  .err_handler = ip6gre_err,
1359  .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1360 };
1361 
1362 static void ip6gre_destroy_tunnels(struct ip6gre_net *ign,
1363  struct list_head *head)
1364 {
1365  int prio;
1366 
1367  for (prio = 0; prio < 4; prio++) {
1368  int h;
1369  for (h = 0; h < HASH_SIZE; h++) {
1370  struct ip6_tnl *t;
1371 
1372  t = rtnl_dereference(ign->tunnels[prio][h]);
1373 
1374  while (t != NULL) {
1375  unregister_netdevice_queue(t->dev, head);
1376  t = rtnl_dereference(t->next);
1377  }
1378  }
1379  }
1380 }
1381 
1382 static int __net_init ip6gre_init_net(struct net *net)
1383 {
1384  struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
1385  int err;
1386 
1387  ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
1388  ip6gre_tunnel_setup);
1389  if (!ign->fb_tunnel_dev) {
1390  err = -ENOMEM;
1391  goto err_alloc_dev;
1392  }
1393  dev_net_set(ign->fb_tunnel_dev, net);
1394 
1395  ip6gre_fb_tunnel_init(ign->fb_tunnel_dev);
1396  ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops;
1397 
1398  err = register_netdev(ign->fb_tunnel_dev);
1399  if (err)
1400  goto err_reg_dev;
1401 
1402  rcu_assign_pointer(ign->tunnels_wc[0],
1403  netdev_priv(ign->fb_tunnel_dev));
1404  return 0;
1405 
1406 err_reg_dev:
1407  ip6gre_dev_free(ign->fb_tunnel_dev);
1408 err_alloc_dev:
1409  return err;
1410 }
1411 
1412 static void __net_exit ip6gre_exit_net(struct net *net)
1413 {
1414  struct ip6gre_net *ign;
1415  LIST_HEAD(list);
1416 
1417  ign = net_generic(net, ip6gre_net_id);
1418  rtnl_lock();
1419  ip6gre_destroy_tunnels(ign, &list);
1421  rtnl_unlock();
1422 }
1423 
1424 static struct pernet_operations ip6gre_net_ops = {
1425  .init = ip6gre_init_net,
1426  .exit = ip6gre_exit_net,
1427  .id = &ip6gre_net_id,
1428  .size = sizeof(struct ip6gre_net),
1429 };
1430 
1431 static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1432 {
1433  __be16 flags;
1434 
1435  if (!data)
1436  return 0;
1437 
1438  flags = 0;
1439  if (data[IFLA_GRE_IFLAGS])
1440  flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1441  if (data[IFLA_GRE_OFLAGS])
1442  flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1443  if (flags & (GRE_VERSION|GRE_ROUTING))
1444  return -EINVAL;
1445 
1446  return 0;
1447 }
1448 
1449 static int ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1450 {
1451  struct in6_addr daddr;
1452 
1453  if (tb[IFLA_ADDRESS]) {
1454  if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1455  return -EINVAL;
1456  if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1457  return -EADDRNOTAVAIL;
1458  }
1459 
1460  if (!data)
1461  goto out;
1462 
1463  if (data[IFLA_GRE_REMOTE]) {
1464  nla_memcpy(&daddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr));
1465  if (ipv6_addr_any(&daddr))
1466  return -EINVAL;
1467  }
1468 
1469 out:
1470  return ip6gre_tunnel_validate(tb, data);
1471 }
1472 
1473 
1474 static void ip6gre_netlink_parms(struct nlattr *data[],
1475  struct __ip6_tnl_parm *parms)
1476 {
1477  memset(parms, 0, sizeof(*parms));
1478 
1479  if (!data)
1480  return;
1481 
1482  if (data[IFLA_GRE_LINK])
1483  parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1484 
1485  if (data[IFLA_GRE_IFLAGS])
1486  parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1487 
1488  if (data[IFLA_GRE_OFLAGS])
1489  parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1490 
1491  if (data[IFLA_GRE_IKEY])
1492  parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1493 
1494  if (data[IFLA_GRE_OKEY])
1495  parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1496 
1497  if (data[IFLA_GRE_LOCAL])
1498  nla_memcpy(&parms->laddr, data[IFLA_GRE_LOCAL], sizeof(struct in6_addr));
1499 
1500  if (data[IFLA_GRE_REMOTE])
1501  nla_memcpy(&parms->raddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr));
1502 
1503  if (data[IFLA_GRE_TTL])
1504  parms->hop_limit = nla_get_u8(data[IFLA_GRE_TTL]);
1505 
1506  if (data[IFLA_GRE_ENCAP_LIMIT])
1507  parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]);
1508 
1509  if (data[IFLA_GRE_FLOWINFO])
1510  parms->flowinfo = nla_get_u32(data[IFLA_GRE_FLOWINFO]);
1511 
1512  if (data[IFLA_GRE_FLAGS])
1513  parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]);
1514 }
1515 
1516 static int ip6gre_tap_init(struct net_device *dev)
1517 {
1518  struct ip6_tnl *tunnel;
1519 
1520  tunnel = netdev_priv(dev);
1521 
1522  tunnel->dev = dev;
1523  strcpy(tunnel->parms.name, dev->name);
1524 
1525  ip6gre_tnl_link_config(tunnel, 1);
1526 
1527  dev->tstats = alloc_percpu(struct pcpu_tstats);
1528  if (!dev->tstats)
1529  return -ENOMEM;
1530 
1531  return 0;
1532 }
1533 
1534 static const struct net_device_ops ip6gre_tap_netdev_ops = {
1535  .ndo_init = ip6gre_tap_init,
1536  .ndo_uninit = ip6gre_tunnel_uninit,
1537  .ndo_start_xmit = ip6gre_tunnel_xmit,
1538  .ndo_set_mac_address = eth_mac_addr,
1539  .ndo_validate_addr = eth_validate_addr,
1540  .ndo_change_mtu = ip6gre_tunnel_change_mtu,
1541  .ndo_get_stats64 = ip6gre_get_stats64,
1542 };
1543 
1544 static void ip6gre_tap_setup(struct net_device *dev)
1545 {
1546 
1547  ether_setup(dev);
1548 
1549  dev->netdev_ops = &ip6gre_tap_netdev_ops;
1550  dev->destructor = ip6gre_dev_free;
1551 
1552  dev->iflink = 0;
1553  dev->features |= NETIF_F_NETNS_LOCAL;
1554 }
1555 
1556 static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
1557  struct nlattr *tb[], struct nlattr *data[])
1558 {
1559  struct ip6_tnl *nt;
1560  struct net *net = dev_net(dev);
1561  struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
1562  int err;
1563 
1564  nt = netdev_priv(dev);
1565  ip6gre_netlink_parms(data, &nt->parms);
1566 
1567  if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
1568  return -EEXIST;
1569 
1570  if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1571  eth_hw_addr_random(dev);
1572 
1573  nt->dev = dev;
1574  ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
1575 
1576  /* Can use a lockless transmit, unless we generate output sequences */
1577  if (!(nt->parms.o_flags & GRE_SEQ))
1578  dev->features |= NETIF_F_LLTX;
1579 
1580  err = register_netdevice(dev);
1581  if (err)
1582  goto out;
1583 
1584  dev_hold(dev);
1585  ip6gre_tunnel_link(ign, nt);
1586 
1587 out:
1588  return err;
1589 }
1590 
1591 static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
1592  struct nlattr *data[])
1593 {
1594  struct ip6_tnl *t, *nt;
1595  struct net *net = dev_net(dev);
1596  struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
1597  struct __ip6_tnl_parm p;
1598 
1599  if (dev == ign->fb_tunnel_dev)
1600  return -EINVAL;
1601 
1602  nt = netdev_priv(dev);
1603  ip6gre_netlink_parms(data, &p);
1604 
1605  t = ip6gre_tunnel_locate(net, &p, 0);
1606 
1607  if (t) {
1608  if (t->dev != dev)
1609  return -EEXIST;
1610  } else {
1611  t = nt;
1612 
1613  ip6gre_tunnel_unlink(ign, t);
1614  ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
1615  ip6gre_tunnel_link(ign, t);
1616  netdev_state_change(dev);
1617  }
1618 
1619  return 0;
1620 }
1621 
1622 static size_t ip6gre_get_size(const struct net_device *dev)
1623 {
1624  return
1625  /* IFLA_GRE_LINK */
1626  nla_total_size(4) +
1627  /* IFLA_GRE_IFLAGS */
1628  nla_total_size(2) +
1629  /* IFLA_GRE_OFLAGS */
1630  nla_total_size(2) +
1631  /* IFLA_GRE_IKEY */
1632  nla_total_size(4) +
1633  /* IFLA_GRE_OKEY */
1634  nla_total_size(4) +
1635  /* IFLA_GRE_LOCAL */
1636  nla_total_size(sizeof(struct in6_addr)) +
1637  /* IFLA_GRE_REMOTE */
1638  nla_total_size(sizeof(struct in6_addr)) +
1639  /* IFLA_GRE_TTL */
1640  nla_total_size(1) +
1641  /* IFLA_GRE_TOS */
1642  nla_total_size(1) +
1643  /* IFLA_GRE_ENCAP_LIMIT */
1644  nla_total_size(1) +
1645  /* IFLA_GRE_FLOWINFO */
1646  nla_total_size(4) +
1647  /* IFLA_GRE_FLAGS */
1648  nla_total_size(4) +
1649  0;
1650 }
1651 
1652 static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1653 {
1654  struct ip6_tnl *t = netdev_priv(dev);
1655  struct __ip6_tnl_parm *p = &t->parms;
1656 
1657  if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1658  nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
1659  nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
1660  nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1661  nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1662  nla_put(skb, IFLA_GRE_LOCAL, sizeof(struct in6_addr), &p->laddr) ||
1663  nla_put(skb, IFLA_GRE_REMOTE, sizeof(struct in6_addr), &p->raddr) ||
1664  nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) ||
1665  /*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/
1666  nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
1667  nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
1668  nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags))
1669  goto nla_put_failure;
1670  return 0;
1671 
1672 nla_put_failure:
1673  return -EMSGSIZE;
1674 }
1675 
1676 static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
1677  [IFLA_GRE_LINK] = { .type = NLA_U32 },
1678  [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1679  [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1680  [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1681  [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1682  [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
1683  [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct ipv6hdr, daddr) },
1684  [IFLA_GRE_TTL] = { .type = NLA_U8 },
1685  [IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 },
1686  [IFLA_GRE_FLOWINFO] = { .type = NLA_U32 },
1687  [IFLA_GRE_FLAGS] = { .type = NLA_U32 },
1688 };
1689 
1690 static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
1691  .kind = "ip6gre",
1692  .maxtype = IFLA_GRE_MAX,
1693  .policy = ip6gre_policy,
1694  .priv_size = sizeof(struct ip6_tnl),
1695  .setup = ip6gre_tunnel_setup,
1696  .validate = ip6gre_tunnel_validate,
1697  .newlink = ip6gre_newlink,
1698  .changelink = ip6gre_changelink,
1699  .get_size = ip6gre_get_size,
1700  .fill_info = ip6gre_fill_info,
1701 };
1702 
1703 static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = {
1704  .kind = "ip6gretap",
1705  .maxtype = IFLA_GRE_MAX,
1706  .policy = ip6gre_policy,
1707  .priv_size = sizeof(struct ip6_tnl),
1708  .setup = ip6gre_tap_setup,
1709  .validate = ip6gre_tap_validate,
1710  .newlink = ip6gre_newlink,
1711  .changelink = ip6gre_changelink,
1712  .get_size = ip6gre_get_size,
1713  .fill_info = ip6gre_fill_info,
1714 };
1715 
1716 /*
1717  * And now the modules code and kernel interface.
1718  */
1719 
1720 static int __init ip6gre_init(void)
1721 {
1722  int err;
1723 
1724  pr_info("GRE over IPv6 tunneling driver\n");
1725 
1726  err = register_pernet_device(&ip6gre_net_ops);
1727  if (err < 0)
1728  return err;
1729 
1730  err = inet6_add_protocol(&ip6gre_protocol, IPPROTO_GRE);
1731  if (err < 0) {
1732  pr_info("%s: can't add protocol\n", __func__);
1733  goto add_proto_failed;
1734  }
1735 
1736  err = rtnl_link_register(&ip6gre_link_ops);
1737  if (err < 0)
1738  goto rtnl_link_failed;
1739 
1740  err = rtnl_link_register(&ip6gre_tap_ops);
1741  if (err < 0)
1742  goto tap_ops_failed;
1743 
1744 out:
1745  return err;
1746 
1747 tap_ops_failed:
1748  rtnl_link_unregister(&ip6gre_link_ops);
1749 rtnl_link_failed:
1750  inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
1751 add_proto_failed:
1752  unregister_pernet_device(&ip6gre_net_ops);
1753  goto out;
1754 }
1755 
1756 static void __exit ip6gre_fini(void)
1757 {
1758  rtnl_link_unregister(&ip6gre_tap_ops);
1759  rtnl_link_unregister(&ip6gre_link_ops);
1760  inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
1761  unregister_pernet_device(&ip6gre_net_ops);
1762 }
1763 
1764 module_init(ip6gre_init);
1765 module_exit(ip6gre_fini);
1766 MODULE_LICENSE("GPL");
1767 MODULE_AUTHOR("D. Kozlov ([email protected])");
1768 MODULE_DESCRIPTION("GRE over IPv6 tunneling device");
1769 MODULE_ALIAS_RTNL_LINK("ip6gre");
1770 MODULE_ALIAS_NETDEV("ip6gre0");