13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <asm/uaccess.h>
22 #include <linux/netdevice.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/mroute.h>
29 #include <linux/in6.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
34 #include <linux/if_ether.h>
51 #if IS_ENABLED(CONFIG_IPV6)
123 static bool log_ecn_error =
true;
161 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
163 #define tunnels_r_l tunnels[3]
164 #define tunnels_r tunnels[2]
165 #define tunnels_l tunnels[1]
166 #define tunnels_wc tunnels[0]
171 #define for_each_ip_tunnel_rcu(start) \
172 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
194 start = u64_stats_fetch_begin_bh(&tstats->
syncp);
199 }
while (u64_stats_fetch_retry_bh(&tstats->
syncp, start));
229 return key == p->
i_key;
243 struct net *
net = dev_net(dev);
245 unsigned int h0 =
HASH(remote);
246 unsigned int h1 =
HASH(key);
251 int score, cand_score = 4;
254 if (local != t->
parms.iph.saddr ||
255 remote != t->
parms.iph.daddr ||
259 if (!ipgre_key_match(&t->
parms, flags, key))
263 t->
dev->type != dev_type)
267 if (t->
parms.link != link)
269 if (t->
dev->type != dev_type)
274 if (score < cand_score) {
281 if (remote != t->
parms.iph.daddr ||
285 if (!ipgre_key_match(&t->
parms, flags, key))
289 t->
dev->type != dev_type)
293 if (t->
parms.link != link)
295 if (t->
dev->type != dev_type)
300 if (score < cand_score) {
307 if ((local != t->
parms.iph.saddr &&
308 (local != t->
parms.iph.daddr ||
309 !ipv4_is_multicast(local))) ||
313 if (!ipgre_key_match(&t->
parms, flags, key))
317 t->
dev->type != dev_type)
321 if (t->
parms.link != link)
323 if (t->
dev->type != dev_type)
328 if (score < cand_score) {
335 if (t->
parms.i_key != key ||
340 t->
dev->type != dev_type)
344 if (t->
parms.link != link)
346 if (t->
dev->type != dev_type)
351 if (score < cand_score) {
362 return netdev_priv(dev);
373 unsigned int h =
HASH(key);
378 if (remote && !ipv4_is_multicast(remote)) {
389 return __ipgre_bucket(ign, &t->
parms);
405 for (tp = ipgre_bucket(ign, t);
415 static struct ip_tunnel *ipgre_tunnel_find(
struct net *net,
422 int link = parms->
link;
427 for (tp = __ipgre_bucket(ign, parms);
430 if (local == t->
parms.iph.saddr &&
431 remote == t->
parms.iph.daddr &&
432 key == t->
parms.i_key &&
433 link == t->
parms.link &&
434 type == t->
dev->type)
440 static struct ip_tunnel *ipgre_tunnel_locate(
struct net *net,
457 dev =
alloc_netdev(
sizeof(*t), name, ipgre_tunnel_setup);
461 dev_net_set(dev, net);
463 nt = netdev_priv(dev);
467 dev->
mtu = ipgre_tunnel_bind_dev(dev);
477 ipgre_tunnel_link(ign, nt);
485 static void ipgre_tunnel_uninit(
struct net_device *dev)
487 struct net *net = dev_net(dev);
490 ipgre_tunnel_unlink(ign, netdev_priv(dev));
513 int grehlen = (iph->ihl<<2) + 4;
514 const int type = icmp_hdr(skb)->type;
515 const int code = icmp_hdr(skb)->code;
532 if (skb_headlen(skb) < grehlen)
536 key = *(((
__be32 *)p) + (grehlen / 4) - 1);
582 if (t->
parms.iph.daddr == 0 ||
583 ipv4_is_multicast(t->
parms.iph.daddr))
597 ipgre_ecn_encapsulate(
u8 tos,
const struct iphdr *old_iph,
struct sk_buff *skb)
601 inner = old_iph->
tos;
603 inner = ipv6_get_dsfield((
const struct ipv6hdr *)old_iph);
604 return INET_ECN_encapsulate(tos, inner);
607 static int ipgre_rcv(
struct sk_buff *skb)
609 const struct iphdr *iph;
620 if (!pskb_may_pull(skb, 16))
637 csum = csum_fold(skb->
csum);
649 key = *(
__be32 *)(h + offset);
658 gre_proto = *(
__be16 *)(h + 2);
660 tunnel = ipgre_tunnel_lookup(skb->
dev,
675 if ((*(h + offset) & 0xF0) != 0x40)
680 __pskb_pull(skb, offset);
681 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
683 #ifdef CONFIG_NET_IPGRE_BROADCAST
684 if (ipv4_is_multicast(iph->
daddr)) {
686 if (rt_is_output_route(skb_rtable(skb)))
688 tunnel->
dev->stats.multicast++;
693 if (((flags&GRE_CSUM) && csum) ||
694 (!(flags&GRE_CSUM) && tunnel->
parms.i_flags&GRE_CSUM)) {
695 tunnel->
dev->stats.rx_crc_errors++;
696 tunnel->
dev->stats.rx_errors++;
699 if (tunnel->
parms.i_flags&GRE_SEQ) {
700 if (!(flags&GRE_SEQ) ||
702 tunnel->
dev->stats.rx_fifo_errors++;
703 tunnel->
dev->stats.rx_errors++;
711 if (!pskb_may_pull(skb,
ETH_HLEN)) {
712 tunnel->
dev->stats.rx_length_errors++;
713 tunnel->
dev->stats.rx_errors++;
719 skb_postpull_rcsum(skb, eth_hdr(skb),
ETH_HLEN);
722 __skb_tunnel_rx(skb, tunnel->
dev);
724 skb_reset_network_header(skb);
725 err = IP_ECN_decapsulate(iph, skb);
731 ++tunnel->
dev->stats.rx_frame_errors;
732 ++tunnel->
dev->stats.rx_errors;
738 u64_stats_update_begin(&tstats->
syncp);
741 u64_stats_update_end(&tstats->
syncp);
743 gro_cells_receive(&tunnel->
gro_cells, skb);
755 struct ip_tunnel *tunnel = netdev_priv(dev);
757 const struct iphdr *old_iph = ip_hdr(skb);
758 const struct iphdr *tiph;
765 unsigned int max_headroom;
775 IPCB(skb)->flags = 0;
781 gre_hlen = tunnel->
hlen;
782 tiph = &tunnel->
parms.iph;
785 if ((dst = tiph->
daddr) == 0) {
788 if (skb_dst(skb) ==
NULL) {
789 dev->
stats.tx_fifo_errors++;
794 rt = skb_rtable(skb);
795 dst = rt_nexthop(rt, old_iph->
daddr);
797 #if IS_ENABLED(CONFIG_IPV6)
801 bool do_tx_error_icmp;
804 neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->
daddr);
809 addr_type = ipv6_addr_type(addr6);
812 addr6 = &ipv6_hdr(skb)->daddr;
813 addr_type = ipv6_addr_type(addr6);
817 do_tx_error_icmp =
true;
819 do_tx_error_icmp =
false;
820 dst = addr6->s6_addr32[3];
822 neigh_release(neigh);
823 if (do_tx_error_icmp)
837 tos = ipv6_get_dsfield((
const struct ipv6hdr *)old_iph);
840 rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->
saddr,
844 dev->
stats.tx_carrier_errors++;
851 dev->
stats.collisions++;
859 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
862 skb_dst(skb)->ops->update_pmtu(skb_dst(skb),
NULL, skb, mtu);
874 #if IS_ENABLED(CONFIG_IPV6)
878 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >=
IPV6_MIN_MTU) {
879 if ((tunnel->
parms.iph.daddr &&
880 !ipv4_is_multicast(tunnel->
parms.iph.daddr)) ||
881 rt6->rt6i_dst.plen == 128) {
883 dst_metric_set(skb_dst(skb),
RTAX_MTU, mtu);
900 dst_link_failure(skb);
907 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
908 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
914 dev->
stats.tx_dropped++;
919 skb_set_owner_w(new_skb, skb->
sk);
922 old_iph = ip_hdr(skb);
925 skb_reset_transport_header(skb);
927 skb_reset_network_header(skb);
932 skb_dst_set(skb, &rt->
dst);
940 iph->ihl =
sizeof(
struct iphdr) >> 2;
943 iph->
tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
944 iph->
daddr = fl4.daddr;
945 iph->
saddr = fl4.saddr;
947 if ((iph->
ttl = tiph->
ttl) == 0) {
950 #if IS_ENABLED(CONFIG_IPV6)
952 iph->
ttl = ((
const struct ipv6hdr *)old_iph)->hop_limit;
955 iph->
ttl = ip4_dst_hoplimit(&rt->
dst);
962 if (tunnel->
parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
965 if (tunnel->
parms.o_flags&GRE_SEQ) {
970 if (tunnel->
parms.o_flags&GRE_KEY) {
971 *ptr = tunnel->
parms.o_key;
974 if (tunnel->
parms.o_flags&GRE_CSUM) {
985 #if IS_ENABLED(CONFIG_IPV6)
987 dst_link_failure(skb);
990 dev->
stats.tx_errors++;
995 static int ipgre_tunnel_bind_dev(
struct net_device *dev)
999 const struct iphdr *iph;
1002 int addend =
sizeof(
struct iphdr) + 4;
1004 tunnel = netdev_priv(dev);
1005 iph = &tunnel->
parms.iph;
1013 rt = ip_route_output_gre(dev_net(dev), &fl4,
1015 tunnel->
parms.o_key,
1017 tunnel->
parms.link);
1027 if (!tdev && tunnel->
parms.link)
1037 if (tunnel->
parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1038 if (tunnel->
parms.o_flags&GRE_CSUM)
1040 if (tunnel->
parms.o_flags&GRE_KEY)
1042 if (tunnel->
parms.o_flags&GRE_SEQ)
1051 tunnel->
hlen = addend;
1062 struct net *net = dev_net(dev);
1073 t = ipgre_tunnel_locate(net, &p, 0);
1076 t = netdev_priv(dev);
1109 if (t->
dev != dev) {
1114 unsigned int nflags = 0;
1116 t = netdev_priv(dev);
1118 if (ipv4_is_multicast(p.
iph.daddr))
1120 else if (p.
iph.daddr)
1127 ipgre_tunnel_unlink(ign, t);
1135 ipgre_tunnel_link(ign, t);
1145 t->
parms.iph.frag_off = p.
iph.frag_off;
1148 dev->
mtu = ipgre_tunnel_bind_dev(dev);
1168 if ((t = ipgre_tunnel_locate(net, &p, 0)) ==
NULL)
1175 unregister_netdevice(dev);
1187 static int ipgre_tunnel_change_mtu(
struct net_device *dev,
int new_mtu)
1189 struct ip_tunnel *tunnel = netdev_priv(dev);
1227 unsigned short type,
1228 const void *
daddr,
const void *
saddr,
unsigned int len)
1235 p[0] = t->
parms.o_flags;
1252 static int ipgre_header_parse(
const struct sk_buff *skb,
unsigned char *haddr)
1254 const struct iphdr *iph = (
const struct iphdr *) skb_mac_header(skb);
1259 static const struct header_ops ipgre_header_ops = {
1260 .create = ipgre_header,
1261 .parse = ipgre_header_parse,
1264 #ifdef CONFIG_NET_IPGRE_BROADCAST
1265 static int ipgre_open(
struct net_device *dev)
1269 if (ipv4_is_multicast(t->
parms.iph.daddr)) {
1273 rt = ip_route_output_gre(dev_net(dev), &fl4,
1283 if (__in_dev_get_rtnl(dev) ==
NULL)
1291 static int ipgre_close(
struct net_device *dev)
1295 if (ipv4_is_multicast(t->
parms.iph.daddr) && t->
mlink) {
1296 struct in_device *in_dev;
1307 .ndo_init = ipgre_tunnel_init,
1308 .ndo_uninit = ipgre_tunnel_uninit,
1309 #ifdef CONFIG_NET_IPGRE_BROADCAST
1310 .ndo_open = ipgre_open,
1311 .ndo_stop = ipgre_close,
1313 .ndo_start_xmit = ipgre_tunnel_xmit,
1314 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1315 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1316 .ndo_get_stats64 = ipgre_get_stats64,
1319 static void ipgre_dev_free(
struct net_device *dev)
1321 struct ip_tunnel *tunnel = netdev_priv(dev);
1328 #define GRE_FEATURES (NETIF_F_SG | \
1329 NETIF_F_FRAGLIST | \
1333 static void ipgre_tunnel_setup(
struct net_device *dev)
1351 static int ipgre_tunnel_init(
struct net_device *dev)
1357 tunnel = netdev_priv(dev);
1358 iph = &tunnel->
parms.iph;
1367 #ifdef CONFIG_NET_IPGRE_BROADCAST
1368 if (ipv4_is_multicast(iph->
daddr)) {
1382 err = gro_cells_init(&tunnel->
gro_cells, dev);
1391 static void ipgre_fb_tunnel_init(
struct net_device *dev)
1393 struct ip_tunnel *tunnel = netdev_priv(dev);
1402 tunnel->
hlen =
sizeof(
struct iphdr) + 4;
1409 .handler = ipgre_rcv,
1410 .err_handler = ipgre_err,
1417 for (prio = 0; prio < 4; prio++) {
1432 static int __net_init ipgre_init_net(
struct net *net)
1438 ipgre_tunnel_setup);
1461 static void __net_exit ipgre_exit_net(
struct net *net)
1468 ipgre_destroy_tunnels(ign, &
list);
1474 .init = ipgre_init_net,
1475 .exit = ipgre_exit_net,
1476 .id = &ipgre_net_id,
1489 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1491 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1498 static int ipgre_tap_validate(
struct nlattr *tb[],
struct nlattr *data[])
1503 if (nla_len(tb[IFLA_ADDRESS]) !=
ETH_ALEN)
1505 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1513 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1519 return ipgre_tunnel_validate(tb, data);
1522 static void ipgre_netlink_parms(
struct nlattr *data[],
1525 memset(parms, 0,
sizeof(*parms));
1533 parms->
link = nla_get_u32(data[IFLA_GRE_LINK]);
1535 if (data[IFLA_GRE_IFLAGS])
1536 parms->
i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1538 if (data[IFLA_GRE_OFLAGS])
1539 parms->
o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1542 parms->
i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1545 parms->
o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1548 parms->
iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1550 if (data[IFLA_GRE_REMOTE])
1551 parms->
iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1554 parms->
iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1557 parms->
iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1563 static int ipgre_tap_init(
struct net_device *dev)
1567 tunnel = netdev_priv(dev);
1572 ipgre_tunnel_bind_dev(dev);
1582 .ndo_init = ipgre_tap_init,
1583 .ndo_uninit = ipgre_tunnel_uninit,
1584 .ndo_start_xmit = ipgre_tunnel_xmit,
1587 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1588 .ndo_get_stats64 = ipgre_get_stats64,
1591 static void ipgre_tap_setup(
struct net_device *dev)
1603 static int ipgre_newlink(
struct net *src_net,
struct net_device *dev,
struct nlattr *tb[],
1607 struct net *net = dev_net(dev);
1612 nt = netdev_priv(dev);
1613 ipgre_netlink_parms(data, &nt->
parms);
1615 if (ipgre_tunnel_find(net, &nt->
parms, dev->
type))
1619 eth_hw_addr_random(dev);
1621 mtu = ipgre_tunnel_bind_dev(dev);
1626 if (!(nt->
parms.o_flags & GRE_SEQ))
1634 ipgre_tunnel_link(ign, nt);
1644 struct net *net = dev_net(dev);
1652 nt = netdev_priv(dev);
1653 ipgre_netlink_parms(data, &p);
1655 t = ipgre_tunnel_locate(net, &p, 0);
1664 unsigned int nflags = 0;
1666 if (ipv4_is_multicast(p.iph.daddr))
1668 else if (p.iph.daddr)
1671 if ((dev->
flags ^ nflags) &
1676 ipgre_tunnel_unlink(ign, t);
1677 t->
parms.iph.saddr = p.iph.saddr;
1678 t->
parms.iph.daddr = p.iph.daddr;
1679 t->
parms.i_key = p.i_key;
1684 ipgre_tunnel_link(ign, t);
1688 t->
parms.o_key = p.o_key;
1689 t->
parms.iph.ttl = p.iph.ttl;
1690 t->
parms.iph.tos = p.iph.tos;
1691 t->
parms.iph.frag_off = p.iph.frag_off;
1693 if (t->
parms.link != p.link) {
1694 t->
parms.link = p.link;
1695 mtu = ipgre_tunnel_bind_dev(dev);
1704 static size_t ipgre_get_size(
const struct net_device *dev)
1735 if (nla_put_u32(skb, IFLA_GRE_LINK, p->
link) ||
1736 nla_put_be16(skb, IFLA_GRE_IFLAGS, p->
i_flags) ||
1737 nla_put_be16(skb, IFLA_GRE_OFLAGS, p->
o_flags) ||
1738 nla_put_be32(skb, IFLA_GRE_IKEY, p->
i_key) ||
1739 nla_put_be32(skb, IFLA_GRE_OKEY, p->
o_key) ||
1740 nla_put_be32(skb, IFLA_GRE_LOCAL, p->
iph.saddr) ||
1741 nla_put_be32(skb, IFLA_GRE_REMOTE, p->
iph.daddr) ||
1742 nla_put_u8(skb, IFLA_GRE_TTL, p->
iph.ttl) ||
1743 nla_put_u8(skb, IFLA_GRE_TOS, p->
iph.tos) ||
1746 goto nla_put_failure;
1769 .policy = ipgre_policy,
1771 .
setup = ipgre_tunnel_setup,
1772 .validate = ipgre_tunnel_validate,
1773 .newlink = ipgre_newlink,
1774 .changelink = ipgre_changelink,
1775 .get_size = ipgre_get_size,
1776 .fill_info = ipgre_fill_info,
1782 .policy = ipgre_policy,
1784 .
setup = ipgre_tap_setup,
1785 .validate = ipgre_tap_validate,
1786 .newlink = ipgre_newlink,
1787 .changelink = ipgre_changelink,
1788 .get_size = ipgre_get_size,
1789 .fill_info = ipgre_fill_info,
1796 static int __init ipgre_init(
void)
1800 pr_info(
"GRE over IPv4 tunneling driver\n");
1808 pr_info(
"%s: can't add protocol\n", __func__);
1809 goto add_proto_failed;
1814 goto rtnl_link_failed;
1818 goto tap_ops_failed;
1832 static void __exit ipgre_fini(
void)
1837 pr_info(
"%s: can't remove protocol\n", __func__);