19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22 #include <linux/module.h>
23 #include <linux/if_arp.h>
24 #include <linux/if_vlan.h>
29 #include <linux/time.h>
31 #include <linux/genetlink.h>
32 #include <linux/kernel.h>
37 #include <linux/tcp.h>
38 #include <linux/udp.h>
39 #include <linux/ethtool.h>
40 #include <linux/wait.h>
41 #include <asm/div64.h>
43 #include <linux/netfilter_bridge.h>
44 #include <linux/netfilter_ipv4.h>
46 #include <linux/list.h>
70 #define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
92 static int queue_gso_packets(
struct net *,
int dp_ifindex,
struct sk_buff *,
94 static int queue_userspace_packet(
struct net *,
int dp_ifindex,
120 return vport->
ops->get_name(vport);
123 static int get_dpifindex(
struct datapath *dp)
132 ifindex = local->
ops->get_ifindex(local);
147 release_net(ovs_dp_get_net(dp));
164 head = vport_hash_bucket(dp, port_no);
165 hlist_for_each_entry_rcu(vport, n, head, dp_hash_node) {
178 if (!IS_ERR(vport)) {
237 stats_counter = &stats->
n_hit;
243 u64_stats_update_begin(&stats->
sync);
245 u64_stats_update_end(&stats->
sync);
248 static struct genl_family dp_packet_genl_family = {
264 if (upcall_info->
portid == 0) {
269 dp_ifindex = get_dpifindex(dp);
275 if (!skb_is_gso(skb))
276 err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
278 err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
287 u64_stats_update_begin(&stats->
sync);
289 u64_stats_update_end(&stats->
sync);
294 static int queue_gso_packets(
struct net *net,
int dp_ifindex,
298 unsigned short gso_type = skb_shinfo(skb)->gso_type;
306 return PTR_ERR(segs);
311 err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info);
320 later_key = *upcall_info->
key;
323 later_info = *upcall_info;
324 later_info.
key = &later_key;
325 upcall_info = &later_info;
327 }
while ((skb = skb->
next));
337 }
while ((skb = nskb));
341 static int queue_userspace_packet(
struct net *net,
int dp_ifindex,
371 len += nla_total_size(skb->
len);
374 len += nla_total_size(8);
382 upcall =
genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
383 0, upcall_info->
cmd);
388 nla_nest_end(user_skb, nla);
392 nla_get_u64(upcall_info->
userdata));
398 err = genlmsg_unicast(net, user_skb, upcall_info->
portid);
406 static int flush_flows(
struct datapath *dp)
422 static int validate_actions(
const struct nlattr *
attr,
425 static int validate_sample(
const struct nlattr *
attr,
433 memset(attrs, 0,
sizeof(attrs));
444 if (!probability ||
nla_len(probability) !=
sizeof(
u32))
450 return validate_actions(actions, key, depth + 1);
453 static int validate_tp_port(
const struct sw_flow_key *flow_key)
456 if (flow_key->
ipv4.tp.src || flow_key->
ipv4.tp.dst)
459 if (flow_key->
ipv6.tp.src || flow_key->
ipv6.tp.dst)
466 static int validate_set(
const struct nlattr *a,
469 const struct nlattr *ovs_key = nla_data(a);
491 if (!flow_key->
ip.proto)
494 ipv4_key = nla_data(ovs_key);
507 return validate_tp_port(flow_key);
513 return validate_tp_port(flow_key);
522 static int validate_userspace(
const struct nlattr *attr)
532 attr, userspace_policy);
543 static int validate_actions(
const struct nlattr *attr,
563 int type = nla_type(a);
566 (action_lens[type] != nla_len(a) &&
567 action_lens[type] != (
u32)-1))
575 err = validate_userspace(a);
598 err = validate_set(a, key);
604 err = validate_sample(a, key, depth);
620 static void clear_stats(
struct sw_flow *flow)
647 len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
654 memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len);
656 skb_reset_mac_header(packet);
657 eth = eth_hdr(packet);
678 &flow->
key.phy.in_port,
695 OVS_CB(packet)->flow = flow;
728 static struct genl_ops dp_packet_genl_ops[] = {
731 .policy = packet_policy,
732 .doit = ovs_packet_cmd_execute
741 stats->
n_flows = ovs_flow_tbl_count(table);
752 start = u64_stats_fetch_begin_bh(&percpu_stats->
sync);
753 local_stats = *percpu_stats;
754 }
while (u64_stats_fetch_retry_bh(&percpu_stats->
sync, start));
756 stats->
n_hit += local_stats.n_hit;
757 stats->
n_missed += local_stats.n_missed;
758 stats->
n_lost += local_stats.n_lost;
770 .hdrsize =
sizeof(
struct ovs_header),
782 static int ovs_flow_cmd_fill_info(
struct sw_flow *flow,
struct datapath *dp,
786 const int skb_orig_len = skb->
len;
789 struct ovs_header *ovs_header;
796 lockdep_genl_is_held());
798 ovs_header =
genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
806 goto nla_put_failure;
810 nla_nest_end(skb, nla);
812 spin_lock_bh(&flow->
lock);
817 spin_unlock_bh(&flow->
lock);
821 goto nla_put_failure;
823 if (stats.n_packets &&
826 goto nla_put_failure;
830 goto nla_put_failure;
844 if (err < 0 && skb_orig_len)
847 return genlmsg_end(skb, ovs_header);
852 genlmsg_cancel(skb, ovs_header);
856 static struct sk_buff *ovs_flow_cmd_alloc_info(
struct sw_flow *flow)
862 lockdep_genl_is_held());
871 len += nla_total_size(1);
873 len += nla_total_size(8);
880 static struct sk_buff *ovs_flow_cmd_build_info(
struct sw_flow *flow,
887 skb = ovs_flow_cmd_alloc_info(flow);
891 retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd);
896 static int ovs_flow_cmd_new_or_set(
struct sk_buff *skb,
struct genl_info *info)
899 struct ovs_header *ovs_header = info->
userhdr;
918 error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0);
942 if (ovs_flow_tbl_need_to_expand(table)) {
946 if (!IS_ERR(new_table)) {
956 error = PTR_ERR(flow);
964 error = PTR_ERR(acts);
966 goto error_free_flow;
973 reply = ovs_flow_cmd_build_info(flow, dp, info->
snd_portid,
979 struct nlattr *acts_attrs;
994 lockdep_genl_is_held());
1003 error = PTR_ERR(new_acts);
1004 if (IS_ERR(new_acts))
1011 reply = ovs_flow_cmd_build_info(flow, dp, info->
snd_portid,
1016 spin_lock_bh(&flow->
lock);
1018 spin_unlock_bh(&flow->
lock);
1024 ovs_dp_flow_multicast_group.
id, info->
nlhdr,
1028 ovs_dp_flow_multicast_group.
id, PTR_ERR(reply));
1040 struct ovs_header *ovs_header = info->
userhdr;
1049 if (!a[OVS_FLOW_ATTR_KEY])
1055 dp = get_dp(sock_net(skb->
sk), ovs_header->
dp_ifindex);
1064 reply = ovs_flow_cmd_build_info(flow, dp, info->
snd_portid,
1067 return PTR_ERR(reply);
1069 return genlmsg_reply(reply, info);
1075 struct ovs_header *ovs_header = info->
userhdr;
1084 dp = get_dp(sock_net(skb->
sk), ovs_header->
dp_ifindex);
1088 if (!a[OVS_FLOW_ATTR_KEY])
1089 return flush_flows(dp);
1100 reply = ovs_flow_cmd_alloc_info(flow);
1106 err = ovs_flow_cmd_fill_info(flow, dp, reply, info->
snd_portid,
1119 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->
nlh));
1123 dp = get_dp(sock_net(skb->
sk), ovs_header->
dp_ifindex);
1133 bucket = cb->
args[0];
1139 if (ovs_flow_cmd_fill_info(flow, dp, skb,
1145 cb->
args[0] = bucket;
1151 static struct genl_ops dp_flow_genl_ops[] = {
1154 .policy = flow_policy,
1155 .doit = ovs_flow_cmd_new_or_set
1159 .policy = flow_policy,
1160 .doit = ovs_flow_cmd_del
1164 .policy = flow_policy,
1165 .doit = ovs_flow_cmd_get,
1166 .dumpit = ovs_flow_cmd_dump
1170 .policy = flow_policy,
1171 .doit = ovs_flow_cmd_new_or_set,
1180 static struct genl_family dp_datapath_genl_family = {
1182 .hdrsize =
sizeof(
struct ovs_header),
1193 static int ovs_dp_cmd_fill_info(
struct datapath *dp,
struct sk_buff *skb,
1196 struct ovs_header *ovs_header;
1200 ovs_header =
genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1211 goto nla_put_failure;
1213 get_dp_stats(dp, &dp_stats);
1215 goto nla_put_failure;
1217 return genlmsg_end(skb, ovs_header);
1220 genlmsg_cancel(skb, ovs_header);
1235 retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd);
1238 return ERR_PTR(retval);
1244 static struct datapath *lookup_datapath(
struct net *net,
1245 struct ovs_header *ovs_header,
1253 struct vport *vport;
1260 return dp ? dp : ERR_PTR(-
ENODEV);
1269 struct vport *vport;
1282 goto err_unlock_rtnl;
1284 ovs_dp_set_net(dp, hold_net(sock_net(skb->
sk)));
1295 goto err_destroy_table;
1302 goto err_destroy_percpu;
1309 parms.
name = nla_data(a[OVS_DP_ATTR_NAME]);
1314 parms.
upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
1316 vport = new_vport(&parms);
1317 if (IS_ERR(vport)) {
1318 err = PTR_ERR(vport);
1322 goto err_destroy_ports_array;
1325 reply = ovs_dp_cmd_build_info(dp, info->
snd_portid,
1327 err = PTR_ERR(reply);
1329 goto err_destroy_local_port;
1331 ovs_net =
net_generic(ovs_dp_get_net(dp), ovs_net_id);
1336 ovs_dp_datapath_multicast_group.
id, info->
nlhdr,
1340 err_destroy_local_port:
1342 err_destroy_ports_array:
1349 release_net(ovs_dp_get_net(dp));
1358 static void __dp_destroy(
struct datapath *dp)
1365 struct vport *vport;
1383 call_rcu(&dp->rcu, destroy_dp_rcu);
1392 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1397 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1399 err = PTR_ERR(reply);
1405 genl_notify(reply, genl_info_net(info), info->snd_portid,
1406 ovs_dp_datapath_multicast_group.
id, info->nlhdr,
1418 dp = lookup_datapath(sock_net(skb->
sk), info->
userhdr, info->
attrs);
1422 reply = ovs_dp_cmd_build_info(dp, info->
snd_portid,
1424 if (IS_ERR(reply)) {
1425 err = PTR_ERR(reply);
1427 ovs_dp_datapath_multicast_group.
id, err);
1432 ovs_dp_datapath_multicast_group.
id, info->
nlhdr,
1443 dp = lookup_datapath(sock_net(skb->
sk), info->
userhdr, info->
attrs);
1447 reply = ovs_dp_cmd_build_info(dp, info->
snd_portid,
1450 return PTR_ERR(reply);
1452 return genlmsg_reply(reply, info);
1457 struct ovs_net *ovs_net =
net_generic(sock_net(skb->
sk), ovs_net_id);
1476 static struct genl_ops dp_datapath_genl_ops[] = {
1479 .policy = datapath_policy,
1480 .doit = ovs_dp_cmd_new
1484 .policy = datapath_policy,
1485 .doit = ovs_dp_cmd_del
1489 .policy = datapath_policy,
1490 .doit = ovs_dp_cmd_get,
1491 .dumpit = ovs_dp_cmd_dump
1495 .policy = datapath_policy,
1496 .doit = ovs_dp_cmd_set,
1509 static struct genl_family dp_vport_genl_family = {
1511 .hdrsize =
sizeof(
struct ovs_header),
1523 static int ovs_vport_cmd_fill_info(
struct vport *vport,
struct sk_buff *skb,
1526 struct ovs_header *ovs_header;
1530 ovs_header =
genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
1541 goto nla_put_failure;
1546 goto nla_put_failure;
1552 return genlmsg_end(skb, ovs_header);
1557 genlmsg_cancel(skb, ovs_header);
1572 retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
1575 return ERR_PTR(retval);
1581 static struct vport *lookup_vport(
struct net *net,
1582 struct ovs_header *ovs_header,
1586 struct vport *vport;
1597 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1600 return ERR_PTR(-
EFBIG);
1606 vport = ovs_vport_rtnl_rcu(dp, port_no);
1617 struct ovs_header *ovs_header = info->
userhdr;
1620 struct vport *vport;
1631 dp = get_dp(sock_net(skb->
sk), ovs_header->
dp_ifindex);
1636 if (a[OVS_VPORT_ATTR_PORT_NO]) {
1637 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1643 vport = ovs_vport_rtnl_rcu(dp, port_no);
1648 for (port_no = 1; ; port_no++) {
1653 vport = ovs_vport_rtnl(dp, port_no);
1659 parms.
name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1664 parms.
upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
1666 vport = new_vport(&parms);
1667 err = PTR_ERR(vport);
1673 if (IS_ERR(reply)) {
1674 err = PTR_ERR(reply);
1691 struct vport *vport;
1695 vport = lookup_vport(sock_net(skb->
sk), info->
userhdr, a);
1696 err = PTR_ERR(vport);
1709 if (a[OVS_VPORT_ATTR_UPCALL_PID])
1710 vport->
upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
1714 if (IS_ERR(reply)) {
1716 ovs_dp_vport_multicast_group.
id, PTR_ERR(reply));
1732 struct vport *vport;
1736 vport = lookup_vport(sock_net(skb->
sk), info->
userhdr, a);
1737 err = PTR_ERR(vport);
1741 if (vport->
port_no == OVSP_LOCAL) {
1748 err = PTR_ERR(reply);
1765 struct ovs_header *ovs_header = info->
userhdr;
1767 struct vport *vport;
1771 vport = lookup_vport(sock_net(skb->
sk), ovs_header, a);
1772 err = PTR_ERR(vport);
1778 err = PTR_ERR(reply);
1784 return genlmsg_reply(reply, info);
1793 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->
nlh));
1795 int bucket = cb->
args[0], skip = cb->
args[1];
1798 dp = get_dp(sock_net(skb->
sk), ovs_header->
dp_ifindex);
1804 struct vport *vport;
1808 hlist_for_each_entry_rcu(vport, n, &dp->
ports[i], dp_hash_node) {
1810 ovs_vport_cmd_fill_info(vport, skb,
1830 static struct genl_ops dp_vport_genl_ops[] = {
1833 .policy = vport_policy,
1834 .doit = ovs_vport_cmd_new
1838 .policy = vport_policy,
1839 .doit = ovs_vport_cmd_del
1843 .policy = vport_policy,
1844 .doit = ovs_vport_cmd_get,
1845 .dumpit = ovs_vport_cmd_dump
1849 .policy = vport_policy,
1850 .doit = ovs_vport_cmd_set,
1862 { &dp_datapath_genl_family,
1863 dp_datapath_genl_ops,
ARRAY_SIZE(dp_datapath_genl_ops),
1864 &ovs_dp_datapath_multicast_group },
1865 { &dp_vport_genl_family,
1866 dp_vport_genl_ops,
ARRAY_SIZE(dp_vport_genl_ops),
1867 &ovs_dp_vport_multicast_group },
1868 { &dp_flow_genl_family,
1869 dp_flow_genl_ops,
ARRAY_SIZE(dp_flow_genl_ops),
1870 &ovs_dp_flow_multicast_group },
1871 { &dp_packet_genl_family,
1872 dp_packet_genl_ops,
ARRAY_SIZE(dp_packet_genl_ops),
1876 static void dp_unregister_genl(
int n_families)
1880 for (i = 0; i < n_families; i++)
1884 static int dp_register_genl(
void)
1891 for (i = 0; i <
ARRAY_SIZE(dp_genl_families); i++) {
1910 dp_unregister_genl(n_registered);
1922 struct ovs_net *ovs_net =
net_generic(net, ovs_net_id);
1929 if (!IS_ERR(new_table)) {
1941 static int __net_init ovs_init_net(
struct net *net)
1943 struct ovs_net *ovs_net =
net_generic(net, ovs_net_id);
1945 INIT_LIST_HEAD(&ovs_net->
dps);
1949 static void __net_exit ovs_exit_net(
struct net *net)
1951 struct ovs_net *ovs_net =
net_generic(net, ovs_net_id);
1961 .
init = ovs_init_net,
1962 .exit = ovs_exit_net,
1964 .size =
sizeof(
struct ovs_net),
1967 static int __init dp_init(
void)
1974 pr_info(
"Open vSwitch switching datapath\n");
1982 goto error_flow_exit;
1986 goto error_vport_exit;
1990 goto error_netns_exit;
1992 err = dp_register_genl();
1994 goto error_unreg_notifier;
2000 error_unreg_notifier:
2012 static void dp_cleanup(
void)
2015 dp_unregister_genl(
ARRAY_SIZE(dp_genl_families));