Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
ip_vti.c
Go to the documentation of this file.
1 /*
2  * Linux NET3: IP/IP protocol decoder modified to support
3  * virtual tunnel interface
4  *
5  * Authors:
6  * Saurabh Mohan ([email protected]) 05/07/2012
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License
10  * as published by the Free Software Foundation; either version
11  * 2 of the License, or (at your option) any later version.
12  *
13  */
14 
15 /*
16  This version of net/ipv4/ip_vti.c is cloned of net/ipv4/ipip.c
17 
18  For comments look at net/ipv4/ip_gre.c --ANK
19  */
20 
21 
22 #include <linux/capability.h>
23 #include <linux/module.h>
24 #include <linux/types.h>
25 #include <linux/kernel.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/netfilter_ipv4.h>
36 #include <linux/if_ether.h>
37 
38 #include <net/sock.h>
39 #include <net/ip.h>
40 #include <net/icmp.h>
41 #include <net/ipip.h>
42 #include <net/inet_ecn.h>
43 #include <net/xfrm.h>
44 #include <net/net_namespace.h>
45 #include <net/netns/generic.h>
46 
47 #define HASH_SIZE 16
48 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&(HASH_SIZE-1))
49 
50 static struct rtnl_link_ops vti_link_ops __read_mostly;
51 
52 static int vti_net_id __read_mostly;
53 struct vti_net {
58  struct ip_tunnel __rcu **tunnels[4];
59 
61 };
62 
63 static int vti_fb_tunnel_init(struct net_device *dev);
64 static int vti_tunnel_init(struct net_device *dev);
65 static void vti_tunnel_setup(struct net_device *dev);
66 static void vti_dev_free(struct net_device *dev);
67 static int vti_tunnel_bind_dev(struct net_device *dev);
68 
69 /* Locking : hash tables are protected by RCU and RTNL */
70 
71 #define for_each_ip_tunnel_rcu(start) \
72  for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
73 
74 /* often modified stats are per cpu, other are shared (netdev->stats) */
75 struct pcpu_tstats {
77  u64 rx_bytes;
79  u64 tx_bytes;
80  struct u64_stats_sync syncp;
81 };
82 
83 #define VTI_XMIT(stats1, stats2) do { \
84  int err; \
85  int pkt_len = skb->len; \
86  err = dst_output(skb); \
87  if (net_xmit_eval(err) == 0) { \
88  u64_stats_update_begin(&(stats1)->syncp); \
89  (stats1)->tx_bytes += pkt_len; \
90  (stats1)->tx_packets++; \
91  u64_stats_update_end(&(stats1)->syncp); \
92  } else { \
93  (stats2)->tx_errors++; \
94  (stats2)->tx_aborted_errors++; \
95  } \
96 } while (0)
97 
98 
99 static struct rtnl_link_stats64 *vti_get_stats64(struct net_device *dev,
100  struct rtnl_link_stats64 *tot)
101 {
102  int i;
103 
105  const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
107  unsigned int start;
108 
109  do {
110  start = u64_stats_fetch_begin_bh(&tstats->syncp);
111  rx_packets = tstats->rx_packets;
112  tx_packets = tstats->tx_packets;
113  rx_bytes = tstats->rx_bytes;
114  tx_bytes = tstats->tx_bytes;
115  } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
116 
117  tot->rx_packets += rx_packets;
118  tot->tx_packets += tx_packets;
119  tot->rx_bytes += rx_bytes;
120  tot->tx_bytes += tx_bytes;
121  }
122 
123  tot->multicast = dev->stats.multicast;
124  tot->rx_crc_errors = dev->stats.rx_crc_errors;
125  tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
126  tot->rx_length_errors = dev->stats.rx_length_errors;
127  tot->rx_errors = dev->stats.rx_errors;
128  tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
129  tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
130  tot->tx_dropped = dev->stats.tx_dropped;
131  tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
132  tot->tx_errors = dev->stats.tx_errors;
133 
134  return tot;
135 }
136 
137 static struct ip_tunnel *vti_tunnel_lookup(struct net *net,
138  __be32 remote, __be32 local)
139 {
140  unsigned h0 = HASH(remote);
141  unsigned h1 = HASH(local);
142  struct ip_tunnel *t;
143  struct vti_net *ipn = net_generic(net, vti_net_id);
144 
145  for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
146  if (local == t->parms.iph.saddr &&
147  remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
148  return t;
150  if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
151  return t;
152 
154  if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
155  return t;
156 
158  if (t && (t->dev->flags&IFF_UP))
159  return t;
160  return NULL;
161 }
162 
163 static struct ip_tunnel __rcu **__vti_bucket(struct vti_net *ipn,
165 {
166  __be32 remote = parms->iph.daddr;
167  __be32 local = parms->iph.saddr;
168  unsigned h = 0;
169  int prio = 0;
170 
171  if (remote) {
172  prio |= 2;
173  h ^= HASH(remote);
174  }
175  if (local) {
176  prio |= 1;
177  h ^= HASH(local);
178  }
179  return &ipn->tunnels[prio][h];
180 }
181 
182 static inline struct ip_tunnel __rcu **vti_bucket(struct vti_net *ipn,
183  struct ip_tunnel *t)
184 {
185  return __vti_bucket(ipn, &t->parms);
186 }
187 
188 static void vti_tunnel_unlink(struct vti_net *ipn, struct ip_tunnel *t)
189 {
190  struct ip_tunnel __rcu **tp;
191  struct ip_tunnel *iter;
192 
193  for (tp = vti_bucket(ipn, t);
194  (iter = rtnl_dereference(*tp)) != NULL;
195  tp = &iter->next) {
196  if (t == iter) {
197  rcu_assign_pointer(*tp, t->next);
198  break;
199  }
200  }
201 }
202 
203 static void vti_tunnel_link(struct vti_net *ipn, struct ip_tunnel *t)
204 {
205  struct ip_tunnel __rcu **tp = vti_bucket(ipn, t);
206 
208  rcu_assign_pointer(*tp, t);
209 }
210 
211 static struct ip_tunnel *vti_tunnel_locate(struct net *net,
212  struct ip_tunnel_parm *parms,
213  int create)
214 {
215  __be32 remote = parms->iph.daddr;
216  __be32 local = parms->iph.saddr;
217  struct ip_tunnel *t, *nt;
218  struct ip_tunnel __rcu **tp;
219  struct net_device *dev;
220  char name[IFNAMSIZ];
221  struct vti_net *ipn = net_generic(net, vti_net_id);
222 
223  for (tp = __vti_bucket(ipn, parms);
224  (t = rtnl_dereference(*tp)) != NULL;
225  tp = &t->next) {
226  if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
227  return t;
228  }
229  if (!create)
230  return NULL;
231 
232  if (parms->name[0])
233  strlcpy(name, parms->name, IFNAMSIZ);
234  else
235  strcpy(name, "vti%d");
236 
237  dev = alloc_netdev(sizeof(*t), name, vti_tunnel_setup);
238  if (dev == NULL)
239  return NULL;
240 
241  dev_net_set(dev, net);
242 
243  nt = netdev_priv(dev);
244  nt->parms = *parms;
245  dev->rtnl_link_ops = &vti_link_ops;
246 
247  vti_tunnel_bind_dev(dev);
248 
249  if (register_netdevice(dev) < 0)
250  goto failed_free;
251 
252  dev_hold(dev);
253  vti_tunnel_link(ipn, nt);
254  return nt;
255 
256 failed_free:
257  free_netdev(dev);
258  return NULL;
259 }
260 
261 static void vti_tunnel_uninit(struct net_device *dev)
262 {
263  struct net *net = dev_net(dev);
264  struct vti_net *ipn = net_generic(net, vti_net_id);
265 
266  vti_tunnel_unlink(ipn, netdev_priv(dev));
267  dev_put(dev);
268 }
269 
270 static int vti_err(struct sk_buff *skb, u32 info)
271 {
272 
273  /* All the routers (except for Linux) return only
274  * 8 bytes of packet payload. It means, that precise relaying of
275  * ICMP in the real Internet is absolutely infeasible.
276  */
277  struct iphdr *iph = (struct iphdr *)skb->data;
278  const int type = icmp_hdr(skb)->type;
279  const int code = icmp_hdr(skb)->code;
280  struct ip_tunnel *t;
281  int err;
282 
283  switch (type) {
284  default:
285  case ICMP_PARAMETERPROB:
286  return 0;
287 
288  case ICMP_DEST_UNREACH:
289  switch (code) {
290  case ICMP_SR_FAILED:
291  case ICMP_PORT_UNREACH:
292  /* Impossible event. */
293  return 0;
294  default:
295  /* All others are translated to HOST_UNREACH. */
296  break;
297  }
298  break;
299  case ICMP_TIME_EXCEEDED:
300  if (code != ICMP_EXC_TTL)
301  return 0;
302  break;
303  }
304 
305  err = -ENOENT;
306 
307  t = vti_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
308  if (t == NULL)
309  goto out;
310 
311  if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
312  ipv4_update_pmtu(skb, dev_net(skb->dev), info,
313  t->parms.link, 0, IPPROTO_IPIP, 0);
314  err = 0;
315  goto out;
316  }
317 
318  err = 0;
319  if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
320  goto out;
321 
322  if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
323  t->err_count++;
324  else
325  t->err_count = 1;
326  t->err_time = jiffies;
327 out:
328  return err;
329 }
330 
331 /* We dont digest the packet therefore let the packet pass */
332 static int vti_rcv(struct sk_buff *skb)
333 {
334  struct ip_tunnel *tunnel;
335  const struct iphdr *iph = ip_hdr(skb);
336 
337  tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
338  if (tunnel != NULL) {
339  struct pcpu_tstats *tstats;
340 
341  if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
342  return -1;
343 
344  tstats = this_cpu_ptr(tunnel->dev->tstats);
345  u64_stats_update_begin(&tstats->syncp);
346  tstats->rx_packets++;
347  tstats->rx_bytes += skb->len;
348  u64_stats_update_end(&tstats->syncp);
349 
350  skb->mark = 0;
351  secpath_reset(skb);
352  skb->dev = tunnel->dev;
353  return 1;
354  }
355 
356  return -1;
357 }
358 
359 /* This function assumes it is being called from dev_queue_xmit()
360  * and that skb is filled properly by that function.
361  */
362 
363 static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
364 {
365  struct ip_tunnel *tunnel = netdev_priv(dev);
366  struct pcpu_tstats *tstats;
367  struct iphdr *tiph = &tunnel->parms.iph;
368  u8 tos;
369  struct rtable *rt; /* Route to the other host */
370  struct net_device *tdev; /* Device to other host */
371  struct iphdr *old_iph = ip_hdr(skb);
372  __be32 dst = tiph->daddr;
373  struct flowi4 fl4;
374 
375  if (skb->protocol != htons(ETH_P_IP))
376  goto tx_error;
377 
378  tos = old_iph->tos;
379 
380  memset(&fl4, 0, sizeof(fl4));
381  flowi4_init_output(&fl4, tunnel->parms.link,
382  be32_to_cpu(tunnel->parms.i_key), RT_TOS(tos),
384  IPPROTO_IPIP, 0,
385  dst, tiph->saddr, 0, 0);
386  rt = ip_route_output_key(dev_net(dev), &fl4);
387  if (IS_ERR(rt)) {
388  dev->stats.tx_carrier_errors++;
389  goto tx_error_icmp;
390  }
391  /* if there is no transform then this tunnel is not functional.
392  * Or if the xfrm is not mode tunnel.
393  */
394  if (!rt->dst.xfrm ||
395  rt->dst.xfrm->props.mode != XFRM_MODE_TUNNEL) {
396  dev->stats.tx_carrier_errors++;
397  goto tx_error_icmp;
398  }
399  tdev = rt->dst.dev;
400 
401  if (tdev == dev) {
402  ip_rt_put(rt);
403  dev->stats.collisions++;
404  goto tx_error;
405  }
406 
407  if (tunnel->err_count > 0) {
408  if (time_before(jiffies,
409  tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
410  tunnel->err_count--;
411  dst_link_failure(skb);
412  } else
413  tunnel->err_count = 0;
414  }
415 
418  skb_dst_drop(skb);
419  skb_dst_set(skb, &rt->dst);
420  nf_reset(skb);
421  skb->dev = skb_dst(skb)->dev;
422 
423  tstats = this_cpu_ptr(dev->tstats);
424  VTI_XMIT(tstats, &dev->stats);
425  return NETDEV_TX_OK;
426 
427 tx_error_icmp:
428  dst_link_failure(skb);
429 tx_error:
430  dev->stats.tx_errors++;
431  dev_kfree_skb(skb);
432  return NETDEV_TX_OK;
433 }
434 
435 static int vti_tunnel_bind_dev(struct net_device *dev)
436 {
437  struct net_device *tdev = NULL;
438  struct ip_tunnel *tunnel;
439  struct iphdr *iph;
440 
441  tunnel = netdev_priv(dev);
442  iph = &tunnel->parms.iph;
443 
444  if (iph->daddr) {
445  struct rtable *rt;
446  struct flowi4 fl4;
447  memset(&fl4, 0, sizeof(fl4));
448  flowi4_init_output(&fl4, tunnel->parms.link,
449  be32_to_cpu(tunnel->parms.i_key),
451  IPPROTO_IPIP, 0,
452  iph->daddr, iph->saddr, 0, 0);
453  rt = ip_route_output_key(dev_net(dev), &fl4);
454  if (!IS_ERR(rt)) {
455  tdev = rt->dst.dev;
456  ip_rt_put(rt);
457  }
458  dev->flags |= IFF_POINTOPOINT;
459  }
460 
461  if (!tdev && tunnel->parms.link)
462  tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
463 
464  if (tdev) {
465  dev->hard_header_len = tdev->hard_header_len +
466  sizeof(struct iphdr);
467  dev->mtu = tdev->mtu;
468  }
469  dev->iflink = tunnel->parms.link;
470  return dev->mtu;
471 }
472 
473 static int
474 vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
475 {
476  int err = 0;
477  struct ip_tunnel_parm p;
478  struct ip_tunnel *t;
479  struct net *net = dev_net(dev);
480  struct vti_net *ipn = net_generic(net, vti_net_id);
481 
482  switch (cmd) {
483  case SIOCGETTUNNEL:
484  t = NULL;
485  if (dev == ipn->fb_tunnel_dev) {
486  if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
487  sizeof(p))) {
488  err = -EFAULT;
489  break;
490  }
491  t = vti_tunnel_locate(net, &p, 0);
492  }
493  if (t == NULL)
494  t = netdev_priv(dev);
495  memcpy(&p, &t->parms, sizeof(p));
496  p.i_flags |= GRE_KEY | VTI_ISVTI;
497  p.o_flags |= GRE_KEY;
498  if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
499  err = -EFAULT;
500  break;
501 
502  case SIOCADDTUNNEL:
503  case SIOCCHGTUNNEL:
504  err = -EPERM;
505  if (!capable(CAP_NET_ADMIN))
506  goto done;
507 
508  err = -EFAULT;
509  if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
510  goto done;
511 
512  err = -EINVAL;
513  if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
514  p.iph.ihl != 5)
515  goto done;
516 
517  t = vti_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
518 
519  if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
520  if (t != NULL) {
521  if (t->dev != dev) {
522  err = -EEXIST;
523  break;
524  }
525  } else {
526  if (((dev->flags&IFF_POINTOPOINT) &&
527  !p.iph.daddr) ||
528  (!(dev->flags&IFF_POINTOPOINT) &&
529  p.iph.daddr)) {
530  err = -EINVAL;
531  break;
532  }
533  t = netdev_priv(dev);
534  vti_tunnel_unlink(ipn, t);
535  synchronize_net();
536  t->parms.iph.saddr = p.iph.saddr;
537  t->parms.iph.daddr = p.iph.daddr;
538  t->parms.i_key = p.i_key;
539  t->parms.o_key = p.o_key;
540  t->parms.iph.protocol = IPPROTO_IPIP;
541  memcpy(dev->dev_addr, &p.iph.saddr, 4);
542  memcpy(dev->broadcast, &p.iph.daddr, 4);
543  vti_tunnel_link(ipn, t);
544  netdev_state_change(dev);
545  }
546  }
547 
548  if (t) {
549  err = 0;
550  if (cmd == SIOCCHGTUNNEL) {
551  t->parms.i_key = p.i_key;
552  t->parms.o_key = p.o_key;
553  if (t->parms.link != p.link) {
554  t->parms.link = p.link;
555  vti_tunnel_bind_dev(dev);
556  netdev_state_change(dev);
557  }
558  }
559  p.i_flags |= GRE_KEY | VTI_ISVTI;
560  p.o_flags |= GRE_KEY;
561  if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms,
562  sizeof(p)))
563  err = -EFAULT;
564  } else
565  err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
566  break;
567 
568  case SIOCDELTUNNEL:
569  err = -EPERM;
570  if (!capable(CAP_NET_ADMIN))
571  goto done;
572 
573  if (dev == ipn->fb_tunnel_dev) {
574  err = -EFAULT;
575  if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
576  sizeof(p)))
577  goto done;
578  err = -ENOENT;
579 
580  t = vti_tunnel_locate(net, &p, 0);
581  if (t == NULL)
582  goto done;
583  err = -EPERM;
584  if (t->dev == ipn->fb_tunnel_dev)
585  goto done;
586  dev = t->dev;
587  }
588  unregister_netdevice(dev);
589  err = 0;
590  break;
591 
592  default:
593  err = -EINVAL;
594  }
595 
596 done:
597  return err;
598 }
599 
600 static int vti_tunnel_change_mtu(struct net_device *dev, int new_mtu)
601 {
602  if (new_mtu < 68 || new_mtu > 0xFFF8)
603  return -EINVAL;
604  dev->mtu = new_mtu;
605  return 0;
606 }
607 
608 static const struct net_device_ops vti_netdev_ops = {
609  .ndo_init = vti_tunnel_init,
610  .ndo_uninit = vti_tunnel_uninit,
611  .ndo_start_xmit = vti_tunnel_xmit,
612  .ndo_do_ioctl = vti_tunnel_ioctl,
613  .ndo_change_mtu = vti_tunnel_change_mtu,
614  .ndo_get_stats64 = vti_get_stats64,
615 };
616 
617 static void vti_dev_free(struct net_device *dev)
618 {
619  free_percpu(dev->tstats);
620  free_netdev(dev);
621 }
622 
623 static void vti_tunnel_setup(struct net_device *dev)
624 {
625  dev->netdev_ops = &vti_netdev_ops;
626  dev->destructor = vti_dev_free;
627 
628  dev->type = ARPHRD_TUNNEL;
629  dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
630  dev->mtu = ETH_DATA_LEN;
631  dev->flags = IFF_NOARP;
632  dev->iflink = 0;
633  dev->addr_len = 4;
635  dev->features |= NETIF_F_LLTX;
637 }
638 
639 static int vti_tunnel_init(struct net_device *dev)
640 {
641  struct ip_tunnel *tunnel = netdev_priv(dev);
642 
643  tunnel->dev = dev;
644  strcpy(tunnel->parms.name, dev->name);
645 
646  memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
647  memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
648 
649  dev->tstats = alloc_percpu(struct pcpu_tstats);
650  if (!dev->tstats)
651  return -ENOMEM;
652 
653  return 0;
654 }
655 
656 static int __net_init vti_fb_tunnel_init(struct net_device *dev)
657 {
658  struct ip_tunnel *tunnel = netdev_priv(dev);
659  struct iphdr *iph = &tunnel->parms.iph;
660  struct vti_net *ipn = net_generic(dev_net(dev), vti_net_id);
661 
662  tunnel->dev = dev;
663  strcpy(tunnel->parms.name, dev->name);
664 
665  iph->version = 4;
666  iph->protocol = IPPROTO_IPIP;
667  iph->ihl = 5;
668 
669  dev->tstats = alloc_percpu(struct pcpu_tstats);
670  if (!dev->tstats)
671  return -ENOMEM;
672 
673  dev_hold(dev);
674  rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
675  return 0;
676 }
677 
678 static struct xfrm_tunnel vti_handler __read_mostly = {
679  .handler = vti_rcv,
680  .err_handler = vti_err,
681  .priority = 1,
682 };
683 
684 static void vti_destroy_tunnels(struct vti_net *ipn, struct list_head *head)
685 {
686  int prio;
687 
688  for (prio = 1; prio < 4; prio++) {
689  int h;
690  for (h = 0; h < HASH_SIZE; h++) {
691  struct ip_tunnel *t;
692 
693  t = rtnl_dereference(ipn->tunnels[prio][h]);
694  while (t != NULL) {
696  t = rtnl_dereference(t->next);
697  }
698  }
699  }
700 }
701 
702 static int __net_init vti_init_net(struct net *net)
703 {
704  int err;
705  struct vti_net *ipn = net_generic(net, vti_net_id);
706 
707  ipn->tunnels[0] = ipn->tunnels_wc;
708  ipn->tunnels[1] = ipn->tunnels_l;
709  ipn->tunnels[2] = ipn->tunnels_r;
710  ipn->tunnels[3] = ipn->tunnels_r_l;
711 
712  ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
713  "ip_vti0",
714  vti_tunnel_setup);
715  if (!ipn->fb_tunnel_dev) {
716  err = -ENOMEM;
717  goto err_alloc_dev;
718  }
719  dev_net_set(ipn->fb_tunnel_dev, net);
720 
721  err = vti_fb_tunnel_init(ipn->fb_tunnel_dev);
722  if (err)
723  goto err_reg_dev;
724  ipn->fb_tunnel_dev->rtnl_link_ops = &vti_link_ops;
725 
726  err = register_netdev(ipn->fb_tunnel_dev);
727  if (err)
728  goto err_reg_dev;
729  return 0;
730 
731 err_reg_dev:
732  vti_dev_free(ipn->fb_tunnel_dev);
733 err_alloc_dev:
734  /* nothing */
735  return err;
736 }
737 
738 static void __net_exit vti_exit_net(struct net *net)
739 {
740  struct vti_net *ipn = net_generic(net, vti_net_id);
741  LIST_HEAD(list);
742 
743  rtnl_lock();
744  vti_destroy_tunnels(ipn, &list);
746  rtnl_unlock();
747 }
748 
749 static struct pernet_operations vti_net_ops = {
750  .init = vti_init_net,
751  .exit = vti_exit_net,
752  .id = &vti_net_id,
753  .size = sizeof(struct vti_net),
754 };
755 
756 static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
757 {
758  return 0;
759 }
760 
761 static void vti_netlink_parms(struct nlattr *data[],
762  struct ip_tunnel_parm *parms)
763 {
764  memset(parms, 0, sizeof(*parms));
765 
766  parms->iph.protocol = IPPROTO_IPIP;
767 
768  if (!data)
769  return;
770 
771  if (data[IFLA_VTI_LINK])
772  parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
773 
774  if (data[IFLA_VTI_IKEY])
775  parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]);
776 
777  if (data[IFLA_VTI_OKEY])
778  parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]);
779 
780  if (data[IFLA_VTI_LOCAL])
781  parms->iph.saddr = nla_get_be32(data[IFLA_VTI_LOCAL]);
782 
783  if (data[IFLA_VTI_REMOTE])
784  parms->iph.daddr = nla_get_be32(data[IFLA_VTI_REMOTE]);
785 
786 }
787 
788 static int vti_newlink(struct net *src_net, struct net_device *dev,
789  struct nlattr *tb[], struct nlattr *data[])
790 {
791  struct ip_tunnel *nt;
792  struct net *net = dev_net(dev);
793  struct vti_net *ipn = net_generic(net, vti_net_id);
794  int mtu;
795  int err;
796 
797  nt = netdev_priv(dev);
798  vti_netlink_parms(data, &nt->parms);
799 
800  if (vti_tunnel_locate(net, &nt->parms, 0))
801  return -EEXIST;
802 
803  mtu = vti_tunnel_bind_dev(dev);
804  if (!tb[IFLA_MTU])
805  dev->mtu = mtu;
806 
807  err = register_netdevice(dev);
808  if (err)
809  goto out;
810 
811  dev_hold(dev);
812  vti_tunnel_link(ipn, nt);
813 
814 out:
815  return err;
816 }
817 
818 static int vti_changelink(struct net_device *dev, struct nlattr *tb[],
819  struct nlattr *data[])
820 {
821  struct ip_tunnel *t, *nt;
822  struct net *net = dev_net(dev);
823  struct vti_net *ipn = net_generic(net, vti_net_id);
824  struct ip_tunnel_parm p;
825  int mtu;
826 
827  if (dev == ipn->fb_tunnel_dev)
828  return -EINVAL;
829 
830  nt = netdev_priv(dev);
831  vti_netlink_parms(data, &p);
832 
833  t = vti_tunnel_locate(net, &p, 0);
834 
835  if (t) {
836  if (t->dev != dev)
837  return -EEXIST;
838  } else {
839  t = nt;
840 
841  vti_tunnel_unlink(ipn, t);
842  t->parms.iph.saddr = p.iph.saddr;
843  t->parms.iph.daddr = p.iph.daddr;
844  t->parms.i_key = p.i_key;
845  t->parms.o_key = p.o_key;
846  if (dev->type != ARPHRD_ETHER) {
847  memcpy(dev->dev_addr, &p.iph.saddr, 4);
848  memcpy(dev->broadcast, &p.iph.daddr, 4);
849  }
850  vti_tunnel_link(ipn, t);
851  netdev_state_change(dev);
852  }
853 
854  if (t->parms.link != p.link) {
855  t->parms.link = p.link;
856  mtu = vti_tunnel_bind_dev(dev);
857  if (!tb[IFLA_MTU])
858  dev->mtu = mtu;
859  netdev_state_change(dev);
860  }
861 
862  return 0;
863 }
864 
865 static size_t vti_get_size(const struct net_device *dev)
866 {
867  return
868  /* IFLA_VTI_LINK */
869  nla_total_size(4) +
870  /* IFLA_VTI_IKEY */
871  nla_total_size(4) +
872  /* IFLA_VTI_OKEY */
873  nla_total_size(4) +
874  /* IFLA_VTI_LOCAL */
875  nla_total_size(4) +
876  /* IFLA_VTI_REMOTE */
877  nla_total_size(4) +
878  0;
879 }
880 
881 static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev)
882 {
883  struct ip_tunnel *t = netdev_priv(dev);
884  struct ip_tunnel_parm *p = &t->parms;
885 
886  nla_put_u32(skb, IFLA_VTI_LINK, p->link);
887  nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key);
888  nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key);
889  nla_put_be32(skb, IFLA_VTI_LOCAL, p->iph.saddr);
890  nla_put_be32(skb, IFLA_VTI_REMOTE, p->iph.daddr);
891 
892  return 0;
893 }
894 
895 static const struct nla_policy vti_policy[IFLA_VTI_MAX + 1] = {
896  [IFLA_VTI_LINK] = { .type = NLA_U32 },
897  [IFLA_VTI_IKEY] = { .type = NLA_U32 },
898  [IFLA_VTI_OKEY] = { .type = NLA_U32 },
899  [IFLA_VTI_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
900  [IFLA_VTI_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
901 };
902 
903 static struct rtnl_link_ops vti_link_ops __read_mostly = {
904  .kind = "vti",
905  .maxtype = IFLA_VTI_MAX,
906  .policy = vti_policy,
907  .priv_size = sizeof(struct ip_tunnel),
908  .setup = vti_tunnel_setup,
909  .validate = vti_tunnel_validate,
910  .newlink = vti_newlink,
911  .changelink = vti_changelink,
912  .get_size = vti_get_size,
913  .fill_info = vti_fill_info,
914 };
915 
916 static int __init vti_init(void)
917 {
918  int err;
919 
920  pr_info("IPv4 over IPSec tunneling driver\n");
921 
922  err = register_pernet_device(&vti_net_ops);
923  if (err < 0)
924  return err;
925  err = xfrm4_mode_tunnel_input_register(&vti_handler);
926  if (err < 0) {
927  unregister_pernet_device(&vti_net_ops);
928  pr_info(KERN_INFO "vti init: can't register tunnel\n");
929  }
930 
931  err = rtnl_link_register(&vti_link_ops);
932  if (err < 0)
933  goto rtnl_link_failed;
934 
935  return err;
936 
937 rtnl_link_failed:
939  unregister_pernet_device(&vti_net_ops);
940  return err;
941 }
942 
943 static void __exit vti_fini(void)
944 {
945  rtnl_link_unregister(&vti_link_ops);
946  if (xfrm4_mode_tunnel_input_deregister(&vti_handler))
947  pr_info("vti close: can't deregister tunnel\n");
948 
949  unregister_pernet_device(&vti_net_ops);
950 }
951 
952 module_init(vti_init);
953 module_exit(vti_fini);
954 MODULE_LICENSE("GPL");
956 MODULE_ALIAS_NETDEV("ip_vti0");