Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
ipip.c
Go to the documentation of this file.
1 /*
2  * Linux NET3: IP/IP protocol decoder.
3  *
4  * Authors:
5  * Sam Lantinga ([email protected]) 02/01/95
6  *
7  * Fixes:
8  * Alan Cox : Merged and made usable non modular (its so tiny its silly as
9  * a module taking up 2 pages).
10  * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  * to keep ip_forward happy.
12  * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
14  * David Woodhouse : Perform some basic ICMP handling.
15  * IPIP Routing without decapsulation.
16  * Carlos Picoto : GRE over IP support
17  * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  * I do not want to merge them together.
19  *
20  * This program is free software; you can redistribute it and/or
21  * modify it under the terms of the GNU General Public License
22  * as published by the Free Software Foundation; either version
23  * 2 of the License, or (at your option) any later version.
24  *
25  */
26 
27 /* tunnel.c: an IP tunnel driver
28 
29  The purpose of this driver is to provide an IP tunnel through
30  which you can tunnel network traffic transparently across subnets.
31 
32  This was written by looking at Nick Holloway's dummy driver
33  Thanks for the great code!
34 
35  -Sam Lantinga ([email protected]) 02/01/95
36 
37  Minor tweaks:
38  Cleaned up the code a little and added some pre-1.3.0 tweaks.
39  dev->hard_header/hard_header_len changed to use no headers.
40  Comments/bracketing tweaked.
41  Made the tunnels use dev->name not tunnel: when error reporting.
42  Added tx_dropped stat
43 
44  -Alan Cox ([email protected]) 21 March 95
45 
46  Reworked:
47  Changed to tunnel to destination gateway in addition to the
48  tunnel's pointopoint address
49  Almost completely rewritten
50  Note: There is currently no firewall or ICMP handling done.
51 
52  -Sam Lantinga ([email protected]) 02/13/96
53 
54 */
55 
56 /* Things I wish I had known when writing the tunnel driver:
57 
58  When the tunnel_xmit() function is called, the skb contains the
59  packet to be sent (plus a great deal of extra info), and dev
60  contains the tunnel device that _we_ are.
61 
62  When we are passed a packet, we are expected to fill in the
63  source address with our source IP address.
64 
65  What is the proper way to allocate, copy and free a buffer?
66  After you allocate it, it is a "0 length" chunk of memory
67  starting at zero. If you want to add headers to the buffer
68  later, you'll have to call "skb_reserve(skb, amount)" with
69  the amount of memory you want reserved. Then, you call
70  "skb_put(skb, amount)" with the amount of space you want in
71  the buffer. skb_put() returns a pointer to the top (#0) of
72  that buffer. skb->len is set to the amount of space you have
73  "allocated" with skb_put(). You can then write up to skb->len
74  bytes to that buffer. If you need more, you can call skb_put()
75  again with the additional amount of space you need. You can
76  find out how much more space you can allocate by calling
77  "skb_tailroom(skb)".
78  Now, to add header space, call "skb_push(skb, header_len)".
79  This creates space at the beginning of the buffer and returns
80  a pointer to this new space. If later you need to strip a
81  header from a buffer, call "skb_pull(skb, header_len)".
82  skb_headroom() will return how much space is left at the top
83  of the buffer (before the main data). Remember, this headroom
84  space must be reserved before the skb_put() function is called.
85  */
86 
87 /*
88  This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89 
90  For comments look at net/ipv4/ip_gre.c --ANK
91  */
92 
93 
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <asm/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/mroute.h>
107 #include <linux/init.h>
108 #include <linux/netfilter_ipv4.h>
109 #include <linux/if_ether.h>
110 
111 #include <net/sock.h>
112 #include <net/ip.h>
113 #include <net/icmp.h>
114 #include <net/ipip.h>
115 #include <net/inet_ecn.h>
116 #include <net/xfrm.h>
117 #include <net/net_namespace.h>
118 #include <net/netns/generic.h>
119 
120 #define HASH_SIZE 16
121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122 
123 static bool log_ecn_error = true;
124 module_param(log_ecn_error, bool, 0644);
125 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126 
127 static int ipip_net_id __read_mostly;
128 struct ipip_net {
133  struct ip_tunnel __rcu **tunnels[4];
134 
136 };
137 
138 static int ipip_tunnel_init(struct net_device *dev);
139 static void ipip_tunnel_setup(struct net_device *dev);
140 static void ipip_dev_free(struct net_device *dev);
141 
142 /*
143  * Locking : hash tables are protected by RCU and RTNL
144  */
145 
146 #define for_each_ip_tunnel_rcu(start) \
147  for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
148 
149 /* often modified stats are per cpu, other are shared (netdev->stats) */
150 struct pcpu_tstats {
151  u64 rx_packets;
152  u64 rx_bytes;
153  u64 tx_packets;
154  u64 tx_bytes;
155  struct u64_stats_sync syncp;
156 };
157 
158 static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
159  struct rtnl_link_stats64 *tot)
160 {
161  int i;
162 
164  const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
166  unsigned int start;
167 
168  do {
169  start = u64_stats_fetch_begin_bh(&tstats->syncp);
170  rx_packets = tstats->rx_packets;
171  tx_packets = tstats->tx_packets;
172  rx_bytes = tstats->rx_bytes;
173  tx_bytes = tstats->tx_bytes;
174  } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
175 
176  tot->rx_packets += rx_packets;
177  tot->tx_packets += tx_packets;
178  tot->rx_bytes += rx_bytes;
179  tot->tx_bytes += tx_bytes;
180  }
181 
182  tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
183  tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
184  tot->tx_dropped = dev->stats.tx_dropped;
185  tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
186  tot->tx_errors = dev->stats.tx_errors;
187  tot->collisions = dev->stats.collisions;
188 
189  return tot;
190 }
191 
192 static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
193  __be32 remote, __be32 local)
194 {
195  unsigned int h0 = HASH(remote);
196  unsigned int h1 = HASH(local);
197  struct ip_tunnel *t;
198  struct ipip_net *ipn = net_generic(net, ipip_net_id);
199 
200  for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
201  if (local == t->parms.iph.saddr &&
202  remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
203  return t;
204 
206  if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
207  return t;
208 
210  if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
211  return t;
212 
213  t = rcu_dereference(ipn->tunnels_wc[0]);
214  if (t && (t->dev->flags&IFF_UP))
215  return t;
216  return NULL;
217 }
218 
219 static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
221 {
222  __be32 remote = parms->iph.daddr;
223  __be32 local = parms->iph.saddr;
224  unsigned int h = 0;
225  int prio = 0;
226 
227  if (remote) {
228  prio |= 2;
229  h ^= HASH(remote);
230  }
231  if (local) {
232  prio |= 1;
233  h ^= HASH(local);
234  }
235  return &ipn->tunnels[prio][h];
236 }
237 
238 static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
239  struct ip_tunnel *t)
240 {
241  return __ipip_bucket(ipn, &t->parms);
242 }
243 
244 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
245 {
246  struct ip_tunnel __rcu **tp;
247  struct ip_tunnel *iter;
248 
249  for (tp = ipip_bucket(ipn, t);
250  (iter = rtnl_dereference(*tp)) != NULL;
251  tp = &iter->next) {
252  if (t == iter) {
253  rcu_assign_pointer(*tp, t->next);
254  break;
255  }
256  }
257 }
258 
259 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
260 {
261  struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
262 
264  rcu_assign_pointer(*tp, t);
265 }
266 
267 static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
268  struct ip_tunnel_parm *parms, int create)
269 {
270  __be32 remote = parms->iph.daddr;
271  __be32 local = parms->iph.saddr;
272  struct ip_tunnel *t, *nt;
273  struct ip_tunnel __rcu **tp;
274  struct net_device *dev;
275  char name[IFNAMSIZ];
276  struct ipip_net *ipn = net_generic(net, ipip_net_id);
277 
278  for (tp = __ipip_bucket(ipn, parms);
279  (t = rtnl_dereference(*tp)) != NULL;
280  tp = &t->next) {
281  if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
282  return t;
283  }
284  if (!create)
285  return NULL;
286 
287  if (parms->name[0])
288  strlcpy(name, parms->name, IFNAMSIZ);
289  else
290  strcpy(name, "tunl%d");
291 
292  dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
293  if (dev == NULL)
294  return NULL;
295 
296  dev_net_set(dev, net);
297 
298  nt = netdev_priv(dev);
299  nt->parms = *parms;
300 
301  if (ipip_tunnel_init(dev) < 0)
302  goto failed_free;
303 
304  if (register_netdevice(dev) < 0)
305  goto failed_free;
306 
307  strcpy(nt->parms.name, dev->name);
308 
309  dev_hold(dev);
310  ipip_tunnel_link(ipn, nt);
311  return nt;
312 
313 failed_free:
314  ipip_dev_free(dev);
315  return NULL;
316 }
317 
318 /* called with RTNL */
319 static void ipip_tunnel_uninit(struct net_device *dev)
320 {
321  struct net *net = dev_net(dev);
322  struct ipip_net *ipn = net_generic(net, ipip_net_id);
323 
324  if (dev == ipn->fb_tunnel_dev)
325  RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
326  else
327  ipip_tunnel_unlink(ipn, netdev_priv(dev));
328  dev_put(dev);
329 }
330 
331 static int ipip_err(struct sk_buff *skb, u32 info)
332 {
333 
334 /* All the routers (except for Linux) return only
335  8 bytes of packet payload. It means, that precise relaying of
336  ICMP in the real Internet is absolutely infeasible.
337  */
338  const struct iphdr *iph = (const struct iphdr *)skb->data;
339  const int type = icmp_hdr(skb)->type;
340  const int code = icmp_hdr(skb)->code;
341  struct ip_tunnel *t;
342  int err;
343 
344  switch (type) {
345  default:
346  case ICMP_PARAMETERPROB:
347  return 0;
348 
349  case ICMP_DEST_UNREACH:
350  switch (code) {
351  case ICMP_SR_FAILED:
352  case ICMP_PORT_UNREACH:
353  /* Impossible event. */
354  return 0;
355  default:
356  /* All others are translated to HOST_UNREACH.
357  rfc2003 contains "deep thoughts" about NET_UNREACH,
358  I believe they are just ether pollution. --ANK
359  */
360  break;
361  }
362  break;
363  case ICMP_TIME_EXCEEDED:
364  if (code != ICMP_EXC_TTL)
365  return 0;
366  break;
367  case ICMP_REDIRECT:
368  break;
369  }
370 
371  err = -ENOENT;
372  t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
373  if (t == NULL)
374  goto out;
375 
376  if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
377  ipv4_update_pmtu(skb, dev_net(skb->dev), info,
378  t->dev->ifindex, 0, IPPROTO_IPIP, 0);
379  err = 0;
380  goto out;
381  }
382 
383  if (type == ICMP_REDIRECT) {
384  ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
385  IPPROTO_IPIP, 0);
386  err = 0;
387  goto out;
388  }
389 
390  if (t->parms.iph.daddr == 0)
391  goto out;
392 
393  err = 0;
394  if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
395  goto out;
396 
397  if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
398  t->err_count++;
399  else
400  t->err_count = 1;
401  t->err_time = jiffies;
402 out:
403 
404  return err;
405 }
406 
407 static int ipip_rcv(struct sk_buff *skb)
408 {
409  struct ip_tunnel *tunnel;
410  const struct iphdr *iph = ip_hdr(skb);
411  int err;
412 
413  tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
414  if (tunnel != NULL) {
415  struct pcpu_tstats *tstats;
416 
417  if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
418  goto drop;
419 
420  secpath_reset(skb);
421 
422  skb->mac_header = skb->network_header;
423  skb_reset_network_header(skb);
424  skb->protocol = htons(ETH_P_IP);
425  skb->pkt_type = PACKET_HOST;
426 
427  __skb_tunnel_rx(skb, tunnel->dev);
428 
429  err = IP_ECN_decapsulate(iph, skb);
430  if (unlikely(err)) {
431  if (log_ecn_error)
432  net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
433  &iph->saddr, iph->tos);
434  if (err > 1) {
435  ++tunnel->dev->stats.rx_frame_errors;
436  ++tunnel->dev->stats.rx_errors;
437  goto drop;
438  }
439  }
440 
441  tstats = this_cpu_ptr(tunnel->dev->tstats);
442  u64_stats_update_begin(&tstats->syncp);
443  tstats->rx_packets++;
444  tstats->rx_bytes += skb->len;
445  u64_stats_update_end(&tstats->syncp);
446 
447  netif_rx(skb);
448  return 0;
449  }
450 
451  return -1;
452 
453 drop:
454  kfree_skb(skb);
455  return 0;
456 }
457 
458 /*
459  * This function assumes it is being called from dev_queue_xmit()
460  * and that skb is filled properly by that function.
461  */
462 
463 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
464 {
465  struct ip_tunnel *tunnel = netdev_priv(dev);
466  struct pcpu_tstats *tstats;
467  const struct iphdr *tiph = &tunnel->parms.iph;
468  u8 tos = tunnel->parms.iph.tos;
469  __be16 df = tiph->frag_off;
470  struct rtable *rt; /* Route to the other host */
471  struct net_device *tdev; /* Device to other host */
472  const struct iphdr *old_iph = ip_hdr(skb);
473  struct iphdr *iph; /* Our new IP header */
474  unsigned int max_headroom; /* The extra header space needed */
475  __be32 dst = tiph->daddr;
476  struct flowi4 fl4;
477  int mtu;
478 
479  if (skb->protocol != htons(ETH_P_IP))
480  goto tx_error;
481 
482  if (tos & 1)
483  tos = old_iph->tos;
484 
485  if (!dst) {
486  /* NBMA tunnel */
487  if ((rt = skb_rtable(skb)) == NULL) {
488  dev->stats.tx_fifo_errors++;
489  goto tx_error;
490  }
491  dst = rt_nexthop(rt, old_iph->daddr);
492  }
493 
494  rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
495  dst, tiph->saddr,
496  0, 0,
497  IPPROTO_IPIP, RT_TOS(tos),
498  tunnel->parms.link);
499  if (IS_ERR(rt)) {
500  dev->stats.tx_carrier_errors++;
501  goto tx_error_icmp;
502  }
503  tdev = rt->dst.dev;
504 
505  if (tdev == dev) {
506  ip_rt_put(rt);
507  dev->stats.collisions++;
508  goto tx_error;
509  }
510 
511  df |= old_iph->frag_off & htons(IP_DF);
512 
513  if (df) {
514  mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
515 
516  if (mtu < 68) {
517  dev->stats.collisions++;
518  ip_rt_put(rt);
519  goto tx_error;
520  }
521 
522  if (skb_dst(skb))
523  skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
524 
525  if ((old_iph->frag_off & htons(IP_DF)) &&
526  mtu < ntohs(old_iph->tot_len)) {
528  htonl(mtu));
529  ip_rt_put(rt);
530  goto tx_error;
531  }
532  }
533 
534  if (tunnel->err_count > 0) {
535  if (time_before(jiffies,
536  tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
537  tunnel->err_count--;
538  dst_link_failure(skb);
539  } else
540  tunnel->err_count = 0;
541  }
542 
543  /*
544  * Okay, now see if we can stuff it in the buffer as-is.
545  */
546  max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
547 
548  if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
549  (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
550  struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
551  if (!new_skb) {
552  ip_rt_put(rt);
553  dev->stats.tx_dropped++;
554  dev_kfree_skb(skb);
555  return NETDEV_TX_OK;
556  }
557  if (skb->sk)
558  skb_set_owner_w(new_skb, skb->sk);
559  dev_kfree_skb(skb);
560  skb = new_skb;
561  old_iph = ip_hdr(skb);
562  }
563 
564  skb->transport_header = skb->network_header;
565  skb_push(skb, sizeof(struct iphdr));
566  skb_reset_network_header(skb);
567  memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
570  skb_dst_drop(skb);
571  skb_dst_set(skb, &rt->dst);
572 
573  /*
574  * Push down and install the IPIP header.
575  */
576 
577  iph = ip_hdr(skb);
578  iph->version = 4;
579  iph->ihl = sizeof(struct iphdr)>>2;
580  iph->frag_off = df;
581  iph->protocol = IPPROTO_IPIP;
582  iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
583  iph->daddr = fl4.daddr;
584  iph->saddr = fl4.saddr;
585 
586  if ((iph->ttl = tiph->ttl) == 0)
587  iph->ttl = old_iph->ttl;
588 
589  nf_reset(skb);
590  tstats = this_cpu_ptr(dev->tstats);
591  __IPTUNNEL_XMIT(tstats, &dev->stats);
592  return NETDEV_TX_OK;
593 
594 tx_error_icmp:
595  dst_link_failure(skb);
596 tx_error:
597  dev->stats.tx_errors++;
598  dev_kfree_skb(skb);
599  return NETDEV_TX_OK;
600 }
601 
602 static void ipip_tunnel_bind_dev(struct net_device *dev)
603 {
604  struct net_device *tdev = NULL;
605  struct ip_tunnel *tunnel;
606  const struct iphdr *iph;
607 
608  tunnel = netdev_priv(dev);
609  iph = &tunnel->parms.iph;
610 
611  if (iph->daddr) {
612  struct rtable *rt;
613  struct flowi4 fl4;
614 
615  rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
616  iph->daddr, iph->saddr,
617  0, 0,
618  IPPROTO_IPIP,
619  RT_TOS(iph->tos),
620  tunnel->parms.link);
621  if (!IS_ERR(rt)) {
622  tdev = rt->dst.dev;
623  ip_rt_put(rt);
624  }
625  dev->flags |= IFF_POINTOPOINT;
626  }
627 
628  if (!tdev && tunnel->parms.link)
629  tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
630 
631  if (tdev) {
632  dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
633  dev->mtu = tdev->mtu - sizeof(struct iphdr);
634  }
635  dev->iflink = tunnel->parms.link;
636 }
637 
638 static int
639 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
640 {
641  int err = 0;
642  struct ip_tunnel_parm p;
643  struct ip_tunnel *t;
644  struct net *net = dev_net(dev);
645  struct ipip_net *ipn = net_generic(net, ipip_net_id);
646 
647  switch (cmd) {
648  case SIOCGETTUNNEL:
649  t = NULL;
650  if (dev == ipn->fb_tunnel_dev) {
651  if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
652  err = -EFAULT;
653  break;
654  }
655  t = ipip_tunnel_locate(net, &p, 0);
656  }
657  if (t == NULL)
658  t = netdev_priv(dev);
659  memcpy(&p, &t->parms, sizeof(p));
660  if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
661  err = -EFAULT;
662  break;
663 
664  case SIOCADDTUNNEL:
665  case SIOCCHGTUNNEL:
666  err = -EPERM;
667  if (!capable(CAP_NET_ADMIN))
668  goto done;
669 
670  err = -EFAULT;
671  if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
672  goto done;
673 
674  err = -EINVAL;
675  if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
676  p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
677  goto done;
678  if (p.iph.ttl)
679  p.iph.frag_off |= htons(IP_DF);
680 
681  t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
682 
683  if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
684  if (t != NULL) {
685  if (t->dev != dev) {
686  err = -EEXIST;
687  break;
688  }
689  } else {
690  if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
691  (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
692  err = -EINVAL;
693  break;
694  }
695  t = netdev_priv(dev);
696  ipip_tunnel_unlink(ipn, t);
697  synchronize_net();
698  t->parms.iph.saddr = p.iph.saddr;
699  t->parms.iph.daddr = p.iph.daddr;
700  memcpy(dev->dev_addr, &p.iph.saddr, 4);
701  memcpy(dev->broadcast, &p.iph.daddr, 4);
702  ipip_tunnel_link(ipn, t);
703  netdev_state_change(dev);
704  }
705  }
706 
707  if (t) {
708  err = 0;
709  if (cmd == SIOCCHGTUNNEL) {
710  t->parms.iph.ttl = p.iph.ttl;
711  t->parms.iph.tos = p.iph.tos;
712  t->parms.iph.frag_off = p.iph.frag_off;
713  if (t->parms.link != p.link) {
714  t->parms.link = p.link;
715  ipip_tunnel_bind_dev(dev);
716  netdev_state_change(dev);
717  }
718  }
719  if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
720  err = -EFAULT;
721  } else
722  err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
723  break;
724 
725  case SIOCDELTUNNEL:
726  err = -EPERM;
727  if (!capable(CAP_NET_ADMIN))
728  goto done;
729 
730  if (dev == ipn->fb_tunnel_dev) {
731  err = -EFAULT;
732  if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
733  goto done;
734  err = -ENOENT;
735  if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
736  goto done;
737  err = -EPERM;
738  if (t->dev == ipn->fb_tunnel_dev)
739  goto done;
740  dev = t->dev;
741  }
742  unregister_netdevice(dev);
743  err = 0;
744  break;
745 
746  default:
747  err = -EINVAL;
748  }
749 
750 done:
751  return err;
752 }
753 
754 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
755 {
756  if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
757  return -EINVAL;
758  dev->mtu = new_mtu;
759  return 0;
760 }
761 
762 static const struct net_device_ops ipip_netdev_ops = {
763  .ndo_uninit = ipip_tunnel_uninit,
764  .ndo_start_xmit = ipip_tunnel_xmit,
765  .ndo_do_ioctl = ipip_tunnel_ioctl,
766  .ndo_change_mtu = ipip_tunnel_change_mtu,
767  .ndo_get_stats64 = ipip_get_stats64,
768 };
769 
770 static void ipip_dev_free(struct net_device *dev)
771 {
772  free_percpu(dev->tstats);
773  free_netdev(dev);
774 }
775 
776 static void ipip_tunnel_setup(struct net_device *dev)
777 {
778  dev->netdev_ops = &ipip_netdev_ops;
779  dev->destructor = ipip_dev_free;
780 
781  dev->type = ARPHRD_TUNNEL;
782  dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
783  dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
784  dev->flags = IFF_NOARP;
785  dev->iflink = 0;
786  dev->addr_len = 4;
788  dev->features |= NETIF_F_LLTX;
790 }
791 
792 static int ipip_tunnel_init(struct net_device *dev)
793 {
794  struct ip_tunnel *tunnel = netdev_priv(dev);
795 
796  tunnel->dev = dev;
797 
798  memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
799  memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
800 
801  ipip_tunnel_bind_dev(dev);
802 
803  dev->tstats = alloc_percpu(struct pcpu_tstats);
804  if (!dev->tstats)
805  return -ENOMEM;
806 
807  return 0;
808 }
809 
810 static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
811 {
812  struct ip_tunnel *tunnel = netdev_priv(dev);
813  struct iphdr *iph = &tunnel->parms.iph;
814  struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
815 
816  tunnel->dev = dev;
817  strcpy(tunnel->parms.name, dev->name);
818 
819  iph->version = 4;
820  iph->protocol = IPPROTO_IPIP;
821  iph->ihl = 5;
822 
823  dev->tstats = alloc_percpu(struct pcpu_tstats);
824  if (!dev->tstats)
825  return -ENOMEM;
826 
827  dev_hold(dev);
828  rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
829  return 0;
830 }
831 
832 static struct xfrm_tunnel ipip_handler __read_mostly = {
833  .handler = ipip_rcv,
834  .err_handler = ipip_err,
835  .priority = 1,
836 };
837 
838 static const char banner[] __initconst =
839  KERN_INFO "IPv4 over IPv4 tunneling driver\n";
840 
841 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
842 {
843  int prio;
844 
845  for (prio = 1; prio < 4; prio++) {
846  int h;
847  for (h = 0; h < HASH_SIZE; h++) {
848  struct ip_tunnel *t;
849 
850  t = rtnl_dereference(ipn->tunnels[prio][h]);
851  while (t != NULL) {
853  t = rtnl_dereference(t->next);
854  }
855  }
856  }
857 }
858 
859 static int __net_init ipip_init_net(struct net *net)
860 {
861  struct ipip_net *ipn = net_generic(net, ipip_net_id);
862  struct ip_tunnel *t;
863  int err;
864 
865  ipn->tunnels[0] = ipn->tunnels_wc;
866  ipn->tunnels[1] = ipn->tunnels_l;
867  ipn->tunnels[2] = ipn->tunnels_r;
868  ipn->tunnels[3] = ipn->tunnels_r_l;
869 
870  ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
871  "tunl0",
872  ipip_tunnel_setup);
873  if (!ipn->fb_tunnel_dev) {
874  err = -ENOMEM;
875  goto err_alloc_dev;
876  }
877  dev_net_set(ipn->fb_tunnel_dev, net);
878 
879  err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
880  if (err)
881  goto err_reg_dev;
882 
883  if ((err = register_netdev(ipn->fb_tunnel_dev)))
884  goto err_reg_dev;
885 
886  t = netdev_priv(ipn->fb_tunnel_dev);
887 
888  strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
889  return 0;
890 
891 err_reg_dev:
892  ipip_dev_free(ipn->fb_tunnel_dev);
893 err_alloc_dev:
894  /* nothing */
895  return err;
896 }
897 
898 static void __net_exit ipip_exit_net(struct net *net)
899 {
900  struct ipip_net *ipn = net_generic(net, ipip_net_id);
901  LIST_HEAD(list);
902 
903  rtnl_lock();
904  ipip_destroy_tunnels(ipn, &list);
907  rtnl_unlock();
908 }
909 
910 static struct pernet_operations ipip_net_ops = {
911  .init = ipip_init_net,
912  .exit = ipip_exit_net,
913  .id = &ipip_net_id,
914  .size = sizeof(struct ipip_net),
915 };
916 
917 static int __init ipip_init(void)
918 {
919  int err;
920 
921  printk(banner);
922 
923  err = register_pernet_device(&ipip_net_ops);
924  if (err < 0)
925  return err;
926  err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
927  if (err < 0) {
928  unregister_pernet_device(&ipip_net_ops);
929  pr_info("%s: can't register tunnel\n", __func__);
930  }
931  return err;
932 }
933 
934 static void __exit ipip_fini(void)
935 {
936  if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
937  pr_info("%s: can't deregister tunnel\n", __func__);
938 
939  unregister_pernet_device(&ipip_net_ops);
940 }
941 
942 module_init(ipip_init);
943 module_exit(ipip_fini);
944 MODULE_LICENSE("GPL");
945 MODULE_ALIAS_NETDEV("tunl0");