13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
22 #include <linux/netdevice.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/mroute.h>
29 #include <linux/in6.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
34 #include <linux/if_ether.h>
35 #include <linux/hash.h>
36 #include <linux/if_tunnel.h>
59 static bool log_ecn_error =
true;
63 #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
64 #define IPV6_TCLASS_SHIFT 20
66 #define HASH_SIZE_SHIFT 5
67 #define HASH_SIZE (1 << HASH_SIZE_SHIFT)
80 static void ip6gre_tnl_link_config(
struct ip6_tnl *
t,
int set_mtu);
100 #define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1))
103 u32 hash = ipv6_addr_hash(addr);
108 #define tunnels_r_l tunnels[3]
109 #define tunnels_r tunnels[2]
110 #define tunnels_l tunnels[1]
111 #define tunnels_wc tunnels[0]
116 #define for_each_ip_tunnel_rcu(start) \
117 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
139 start = u64_stats_fetch_begin_bh(&tstats->
syncp);
144 }
while (u64_stats_fetch_retry_bh(&tstats->
syncp, start));
174 struct net *
net = dev_net(dev);
176 unsigned int h0 = HASH_ADDR(remote);
182 int score, cand_score = 4;
185 if (!ipv6_addr_equal(local, &t->
parms.laddr) ||
186 !ipv6_addr_equal(remote, &t->
parms.raddr) ||
187 key != t->
parms.i_key ||
192 t->
dev->type != dev_type)
196 if (t->
parms.link != link)
198 if (t->
dev->type != dev_type)
203 if (score < cand_score) {
210 if (!ipv6_addr_equal(remote, &t->
parms.raddr) ||
211 key != t->
parms.i_key ||
216 t->
dev->type != dev_type)
220 if (t->
parms.link != link)
222 if (t->
dev->type != dev_type)
227 if (score < cand_score) {
234 if ((!ipv6_addr_equal(local, &t->
parms.laddr) &&
235 (!ipv6_addr_equal(local, &t->
parms.raddr) ||
236 !ipv6_addr_is_multicast(local))) ||
237 key != t->
parms.i_key ||
242 t->
dev->type != dev_type)
246 if (t->
parms.link != link)
248 if (t->
dev->type != dev_type)
253 if (score < cand_score) {
260 if (t->
parms.i_key != key ||
265 t->
dev->type != dev_type)
269 if (t->
parms.link != link)
271 if (t->
dev->type != dev_type)
276 if (score < cand_score) {
287 return netdev_priv(dev);
300 if (!ipv6_addr_any(local))
302 if (!ipv6_addr_any(remote) && !ipv6_addr_is_multicast(remote)) {
304 h ^= HASH_ADDR(remote);
313 return __ip6gre_bucket(ign, &t->
parms);
329 for (tp = ip6gre_bucket(ign, t);
339 static struct ip6_tnl *ip6gre_tunnel_find(
struct net *net,
346 int link = parms->
link;
351 for (tp = __ip6gre_bucket(ign, parms);
354 if (ipv6_addr_equal(local, &t->
parms.laddr) &&
355 ipv6_addr_equal(remote, &t->
parms.raddr) &&
356 key == t->
parms.i_key &&
357 link == t->
parms.link &&
358 type == t->
dev->type)
364 static struct ip6_tnl *ip6gre_tunnel_locate(
struct net *net,
381 dev =
alloc_netdev(
sizeof(*t), name, ip6gre_tunnel_setup);
385 dev_net_set(dev, net);
387 nt = netdev_priv(dev);
392 ip6gre_tnl_link_config(nt, 1);
402 ip6gre_tunnel_link(ign, nt);
410 static void ip6gre_tunnel_uninit(
struct net_device *dev)
412 struct net *net = dev_net(dev);
415 ip6gre_tunnel_unlink(ign, netdev_priv(dev));
425 int grehlen = offset + 4;
441 if (!pskb_may_pull(skb, grehlen))
446 t = ip6gre_tunnel_lookup(skb->
dev, &ipv6h->
daddr, &ipv6h->
saddr,
448 *(((
__be32 *)p) + (grehlen / 4) - 1) : 0,
472 if (teli && teli == info - 2) {
498 static int ip6gre_rcv(
struct sk_buff *skb)
511 if (!pskb_may_pull(skb,
sizeof(
struct in6_addr)))
514 ipv6h = ipv6_hdr(skb);
528 csum = csum_fold(skb->
csum);
540 key = *(
__be32 *)(h + offset);
549 gre_proto = *(
__be16 *)(h + 2);
551 tunnel = ip6gre_tunnel_lookup(skb->
dev,
561 tunnel->
dev->stats.rx_dropped++;
574 if ((*(h + offset) & 0xF0) != 0x40)
579 __pskb_pull(skb, offset);
580 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
583 if (((flags&GRE_CSUM) && csum) ||
584 (!(flags&GRE_CSUM) && tunnel->
parms.i_flags&GRE_CSUM)) {
585 tunnel->
dev->stats.rx_crc_errors++;
586 tunnel->
dev->stats.rx_errors++;
589 if (tunnel->
parms.i_flags&GRE_SEQ) {
590 if (!(flags&GRE_SEQ) ||
593 tunnel->
dev->stats.rx_fifo_errors++;
594 tunnel->
dev->stats.rx_errors++;
602 if (!pskb_may_pull(skb,
ETH_HLEN)) {
603 tunnel->
dev->stats.rx_length_errors++;
604 tunnel->
dev->stats.rx_errors++;
608 ipv6h = ipv6_hdr(skb);
610 skb_postpull_rcsum(skb, eth_hdr(skb),
ETH_HLEN);
613 __skb_tunnel_rx(skb, tunnel->
dev);
615 skb_reset_network_header(skb);
617 err = IP6_ECN_decapsulate(ipv6h, skb);
622 ipv6_get_dsfield(ipv6h));
624 ++tunnel->
dev->stats.rx_frame_errors;
625 ++tunnel->
dev->stats.rx_errors;
631 u64_stats_update_begin(&tstats->
syncp);
634 u64_stats_update_end(&tstats->
syncp);
663 opt->
ops.opt_nflen = 8;
673 struct net *net = dev_net(dev);
674 struct ip6_tnl *tunnel = netdev_priv(dev);
677 unsigned int max_headroom;
689 IPCB(skb)->flags = 0;
696 gre_hlen = tunnel->
hlen;
700 if (!fl6->flowi6_mark)
707 goto tx_err_link_failure;
712 goto tx_err_link_failure;
723 goto tx_err_dst_release;
726 mtu = dst_mtu(dst) -
sizeof(*ipv6h);
727 if (encap_limit >= 0) {
734 skb_dst(skb)->ops->update_pmtu(skb_dst(skb),
NULL, skb, mtu);
735 if (skb->
len > mtu) {
738 goto tx_err_dst_release;
746 dst_link_failure(skb);
753 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
754 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
759 goto tx_err_dst_release;
762 skb_set_owner_w(new_skb, skb->
sk);
769 if (fl6->flowi6_mark) {
770 skb_dst_set(skb, dst);
779 if (encap_limit >= 0) {
780 init_tel_txopt(&opt, encap_limit);
785 skb_reset_network_header(skb);
790 ipv6h = ipv6_hdr(skb);
792 dsfield = INET_ECN_encapsulate(0, dsfield);
799 ((
__be16 *)(ipv6h + 1))[0] = tunnel->
parms.o_flags;
803 if (tunnel->
parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
806 if (tunnel->
parms.o_flags&GRE_SEQ) {
811 if (tunnel->
parms.o_flags&GRE_KEY) {
812 *ptr = tunnel->
parms.o_key;
815 if (tunnel->
parms.o_flags&GRE_CSUM) {
842 dst_link_failure(skb);
850 struct ip6_tnl *t = netdev_priv(dev);
851 const struct iphdr *iph = ip_hdr(skb);
852 int encap_limit = -1;
859 encap_limit = t->
parms.encap_limit;
861 memcpy(&fl6, &t->
fl.u.ip6,
sizeof(fl6));
864 dsfield = ipv4_get_dsfield(iph);
870 fl6.flowi6_mark = skb->
mark;
872 err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
886 struct ip6_tnl *t = netdev_priv(dev);
887 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
888 int encap_limit = -1;
895 if (ipv6_addr_equal(&t->
parms.raddr, &ipv6h->
saddr))
909 encap_limit = t->
parms.encap_limit;
911 memcpy(&fl6, &t->
fl.u.ip6,
sizeof(fl6));
914 dsfield = ipv6_get_dsfield(ipv6h);
920 fl6.flowi6_mark = skb->
mark;
922 err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
946 static inline bool ip6gre_tnl_addr_conflict(
const struct ip6_tnl *t,
949 return ipv6_addr_equal(&t->
parms.raddr, &hdr->
saddr);
954 struct ip6_tnl *t = netdev_priv(dev);
955 int encap_limit = -1;
961 encap_limit = t->
parms.encap_limit;
963 memcpy(&fl6, &t->
fl.u.ip6,
sizeof(fl6));
966 err = ip6gre_xmit2(skb, dev, 0, &fl6, encap_limit, &mtu);
974 struct ip6_tnl *t = netdev_priv(dev);
983 ret = ip6gre_xmit_ipv4(skb, dev);
986 ret = ip6gre_xmit_ipv6(skb, dev);
989 ret = ip6gre_xmit_other(skb, dev);
1005 static void ip6gre_tnl_link_config(
struct ip6_tnl *t,
int set_mtu)
1010 int addend =
sizeof(
struct ipv6hdr) + 4;
1020 fl6->flowi6_oif = p->
link;
1040 if (t->
parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1041 if (t->
parms.o_flags&GRE_CSUM)
1043 if (t->
parms.o_flags&GRE_KEY)
1045 if (t->
parms.o_flags&GRE_SEQ)
1050 int strict = (ipv6_addr_type(&p->
raddr) &
1064 dev->
mtu = rt->
dst.dev->mtu - addend;
1078 static int ip6gre_tnl_change(
struct ip6_tnl *t,
1094 ip6gre_tnl_link_config(t, set_mtu);
1133 static int ip6gre_tunnel_ioctl(
struct net_device *dev,
1140 struct net *net = dev_net(dev);
1151 ip6gre_tnl_parm_from_user(&
p1, &p);
1152 t = ip6gre_tunnel_locate(net, &
p1, 0);
1155 t = netdev_priv(dev);
1156 ip6gre_tnl_parm_to_user(&p, &t->
parms);
1180 ip6gre_tnl_parm_from_user(&
p1, &p);
1185 if (t->
dev != dev) {
1190 t = netdev_priv(dev);
1192 ip6gre_tunnel_unlink(ign, t);
1194 ip6gre_tnl_change(t, &
p1, 1);
1195 ip6gre_tunnel_link(ign, t);
1203 ip6gre_tnl_parm_to_user(&p, &t->
parms);
1220 ip6gre_tnl_parm_from_user(&
p1, &p);
1221 t = ip6gre_tunnel_locate(net, &
p1, 0);
1229 unregister_netdevice(dev);
1241 static int ip6gre_tunnel_change_mtu(
struct net_device *dev,
int new_mtu)
1243 struct ip6_tnl *tunnel = netdev_priv(dev);
1252 unsigned short type,
1253 const void *
daddr,
const void *
saddr,
unsigned int len)
1255 struct ip6_tnl *t = netdev_priv(dev);
1259 *(
__be32 *)ipv6h = t->
fl.u.ip6.flowlabel |
htonl(0x60000000);
1265 p[0] = t->
parms.o_flags;
1276 if (!ipv6_addr_any(&ipv6h->
daddr))
1282 static const struct header_ops ip6gre_header_ops = {
1283 .create = ip6gre_header,
1287 .ndo_init = ip6gre_tunnel_init,
1288 .ndo_uninit = ip6gre_tunnel_uninit,
1289 .ndo_start_xmit = ip6gre_tunnel_xmit,
1290 .ndo_do_ioctl = ip6gre_tunnel_ioctl,
1291 .ndo_change_mtu = ip6gre_tunnel_change_mtu,
1292 .ndo_get_stats64 = ip6gre_get_stats64,
1295 static void ip6gre_dev_free(
struct net_device *dev)
1301 static void ip6gre_tunnel_setup(
struct net_device *dev)
1311 t = netdev_priv(dev);
1321 static int ip6gre_tunnel_init(
struct net_device *dev)
1325 tunnel = netdev_priv(dev);
1333 if (ipv6_addr_any(&tunnel->
parms.raddr))
1343 static void ip6gre_fb_tunnel_init(
struct net_device *dev)
1345 struct ip6_tnl *tunnel = netdev_priv(dev);
1356 static struct inet6_protocol ip6gre_protocol
__read_mostly = {
1357 .handler = ip6gre_rcv,
1358 .err_handler = ip6gre_err,
1359 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1362 static void ip6gre_destroy_tunnels(
struct ip6gre_net *ign,
1367 for (prio = 0; prio < 4; prio++) {
1382 static int __net_init ip6gre_init_net(
struct net *net)
1388 ip6gre_tunnel_setup);
1412 static void __net_exit ip6gre_exit_net(
struct net *net)
1419 ip6gre_destroy_tunnels(ign, &
list);
1425 .init = ip6gre_init_net,
1426 .exit = ip6gre_exit_net,
1427 .id = &ip6gre_net_id,
1440 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1442 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1449 static int ip6gre_tap_validate(
struct nlattr *tb[],
struct nlattr *data[])
1454 if (nla_len(tb[IFLA_ADDRESS]) !=
ETH_ALEN)
1456 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1465 if (ipv6_addr_any(&daddr))
1470 return ip6gre_tunnel_validate(tb, data);
1474 static void ip6gre_netlink_parms(
struct nlattr *data[],
1477 memset(parms, 0,
sizeof(*parms));
1483 parms->
link = nla_get_u32(data[IFLA_GRE_LINK]);
1485 if (data[IFLA_GRE_IFLAGS])
1486 parms->
i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1488 if (data[IFLA_GRE_OFLAGS])
1489 parms->
o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1492 parms->
i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1495 parms->
o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1500 if (data[IFLA_GRE_REMOTE])
1504 parms->
hop_limit = nla_get_u8(data[IFLA_GRE_TTL]);
1507 parms->
encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]);
1510 parms->
flowinfo = nla_get_u32(data[IFLA_GRE_FLOWINFO]);
1513 parms->
flags = nla_get_u32(data[IFLA_GRE_FLAGS]);
1516 static int ip6gre_tap_init(
struct net_device *dev)
1520 tunnel = netdev_priv(dev);
1525 ip6gre_tnl_link_config(tunnel, 1);
1535 .ndo_init = ip6gre_tap_init,
1536 .ndo_uninit = ip6gre_tunnel_uninit,
1537 .ndo_start_xmit = ip6gre_tunnel_xmit,
1540 .ndo_change_mtu = ip6gre_tunnel_change_mtu,
1541 .ndo_get_stats64 = ip6gre_get_stats64,
1544 static void ip6gre_tap_setup(
struct net_device *dev)
1556 static int ip6gre_newlink(
struct net *src_net,
struct net_device *dev,
1560 struct net *net = dev_net(dev);
1564 nt = netdev_priv(dev);
1565 ip6gre_netlink_parms(data, &nt->
parms);
1567 if (ip6gre_tunnel_find(net, &nt->
parms, dev->
type))
1571 eth_hw_addr_random(dev);
1574 ip6gre_tnl_link_config(nt, !tb[
IFLA_MTU]);
1577 if (!(nt->
parms.o_flags & GRE_SEQ))
1585 ip6gre_tunnel_link(ign, nt);
1595 struct net *net = dev_net(dev);
1602 nt = netdev_priv(dev);
1603 ip6gre_netlink_parms(data, &p);
1605 t = ip6gre_tunnel_locate(net, &p, 0);
1613 ip6gre_tunnel_unlink(ign, t);
1614 ip6gre_tnl_change(t, &p, !tb[
IFLA_MTU]);
1615 ip6gre_tunnel_link(ign, t);
1622 static size_t ip6gre_get_size(
const struct net_device *dev)
1636 nla_total_size(
sizeof(
struct in6_addr)) +
1638 nla_total_size(
sizeof(
struct in6_addr)) +
1654 struct ip6_tnl *t = netdev_priv(dev);
1657 if (nla_put_u32(skb, IFLA_GRE_LINK, p->
link) ||
1658 nla_put_be16(skb, IFLA_GRE_IFLAGS, p->
i_flags) ||
1659 nla_put_be16(skb, IFLA_GRE_OFLAGS, p->
o_flags) ||
1660 nla_put_be32(skb, IFLA_GRE_IKEY, p->
i_key) ||
1661 nla_put_be32(skb, IFLA_GRE_OKEY, p->
o_key) ||
1664 nla_put_u8(skb, IFLA_GRE_TTL, p->
hop_limit) ||
1666 nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->
encap_limit) ||
1667 nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->
flowinfo) ||
1669 goto nla_put_failure;
1690 static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
1693 .policy = ip6gre_policy,
1694 .priv_size =
sizeof(
struct ip6_tnl),
1695 .
setup = ip6gre_tunnel_setup,
1696 .validate = ip6gre_tunnel_validate,
1697 .newlink = ip6gre_newlink,
1698 .changelink = ip6gre_changelink,
1699 .get_size = ip6gre_get_size,
1700 .fill_info = ip6gre_fill_info,
1703 static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = {
1704 .
kind =
"ip6gretap",
1706 .policy = ip6gre_policy,
1707 .priv_size =
sizeof(
struct ip6_tnl),
1708 .
setup = ip6gre_tap_setup,
1709 .validate = ip6gre_tap_validate,
1710 .newlink = ip6gre_newlink,
1711 .changelink = ip6gre_changelink,
1712 .get_size = ip6gre_get_size,
1713 .fill_info = ip6gre_fill_info,
1720 static int __init ip6gre_init(
void)
1724 pr_info(
"GRE over IPv6 tunneling driver\n");
1732 pr_info(
"%s: can't add protocol\n", __func__);
1733 goto add_proto_failed;
1738 goto rtnl_link_failed;
1742 goto tap_ops_failed;
1756 static void __exit ip6gre_fini(
void)