75 #include <asm/uaccess.h>
76 #include <linux/bitops.h>
77 #include <linux/capability.h>
79 #include <linux/types.h>
80 #include <linux/kernel.h>
81 #include <linux/hash.h>
82 #include <linux/slab.h>
83 #include <linux/sched.h>
85 #include <linux/string.h>
87 #include <linux/socket.h>
89 #include <linux/errno.h>
91 #include <linux/if_ether.h>
92 #include <linux/netdevice.h>
94 #include <linux/ethtool.h>
99 #include <linux/rtnetlink.h>
102 #include <linux/stat.h>
110 #include <linux/module.h>
116 #include <asm/current.h>
117 #include <linux/audit.h>
120 #include <linux/ctype.h>
121 #include <linux/if_arp.h>
122 #include <linux/if_vlan.h>
123 #include <linux/ip.h>
125 #include <linux/ipv6.h>
126 #include <linux/in.h>
128 #include <linux/random.h>
132 #include <linux/pci.h>
142 #define MAX_GRO_SKBS 8
145 #define GRO_MAX_HEAD (MAX_HEADER + 128)
175 #define PTYPE_HASH_SIZE (16)
176 #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
204 static inline void dev_base_seq_inc(
struct net *
net)
209 static inline struct hlist_head *dev_name_hash(
struct net *net,
const char *
name)
216 static inline struct hlist_head *dev_index_hash(
struct net *net,
int ifindex)
238 struct net *net = dev_net(dev);
244 hlist_add_head_rcu(&dev->
name_hlist, dev_name_hash(net, dev->
name));
246 dev_index_hash(net, dev->
ifindex));
249 dev_base_seq_inc(net);
257 static void unlist_netdevice(
struct net_device *dev)
268 dev_base_seq_inc(dev_net(dev));
285 #ifdef CONFIG_LOCKDEP
290 static const unsigned short netdev_lock_type[] =
307 static const char *
const netdev_lock_name[] =
308 {
"_xmit_NETROM",
"_xmit_ETHER",
"_xmit_EETHER",
"_xmit_AX25",
309 "_xmit_PRONET",
"_xmit_CHAOS",
"_xmit_IEEE802",
"_xmit_ARCNET",
310 "_xmit_APPLETLK",
"_xmit_DLCI",
"_xmit_ATM",
"_xmit_METRICOM",
311 "_xmit_IEEE1394",
"_xmit_EUI64",
"_xmit_INFINIBAND",
"_xmit_SLIP",
312 "_xmit_CSLIP",
"_xmit_SLIP6",
"_xmit_CSLIP6",
"_xmit_RSRVD",
313 "_xmit_ADAPT",
"_xmit_ROSE",
"_xmit_X25",
"_xmit_HWX25",
314 "_xmit_PPP",
"_xmit_CISCO",
"_xmit_LAPB",
"_xmit_DDCMP",
315 "_xmit_RAWHDLC",
"_xmit_TUNNEL",
"_xmit_TUNNEL6",
"_xmit_FRAD",
316 "_xmit_SKIP",
"_xmit_LOOPBACK",
"_xmit_LOCALTLK",
"_xmit_FDDI",
317 "_xmit_BIF",
"_xmit_SIT",
"_xmit_IPDDP",
"_xmit_IPGRE",
318 "_xmit_PIMREG",
"_xmit_HIPPI",
"_xmit_ASH",
"_xmit_ECONET",
319 "_xmit_IRDA",
"_xmit_FCPP",
"_xmit_FCAL",
"_xmit_FCPL",
320 "_xmit_FCFABRIC",
"_xmit_IEEE80211",
"_xmit_IEEE80211_PRISM",
321 "_xmit_IEEE80211_RADIOTAP",
"_xmit_PHONET",
"_xmit_PHONET_PIPE",
322 "_xmit_IEEE802154",
"_xmit_VOID",
"_xmit_NONE"};
327 static inline unsigned short netdev_lock_pos(
unsigned short dev_type)
331 for (i = 0; i <
ARRAY_SIZE(netdev_lock_type); i++)
332 if (netdev_lock_type[i] == dev_type)
338 static inline void netdev_set_xmit_lockdep_class(
spinlock_t *lock,
339 unsigned short dev_type)
343 i = netdev_lock_pos(dev_type);
345 netdev_lock_name[i]);
348 static inline void netdev_set_addr_lockdep_class(
struct net_device *dev)
352 i = netdev_lock_pos(dev->
type);
354 &netdev_addr_lock_key[i],
355 netdev_lock_name[i]);
358 static inline void netdev_set_xmit_lockdep_class(
spinlock_t *lock,
359 unsigned short dev_type)
362 static inline void netdev_set_addr_lockdep_class(
struct net_device *dev)
414 spin_lock(&ptype_lock);
415 list_add_rcu(&pt->
list, head);
416 spin_unlock(&ptype_lock);
438 spin_lock(&ptype_lock);
442 list_del_rcu(&pt->
list);
447 pr_warn(
"dev_remove_pack: %p not found\n", pt);
449 spin_unlock(&ptype_lock);
491 static int netdev_boot_setup_add(
char *name,
struct ifmap *
map)
498 if (s[i].name[0] ==
'\0' || s[i].name[0] ==
' ') {
499 memset(s[i].name, 0,
sizeof(s[i].name));
501 memcpy(&s[i].map, map,
sizeof(s[i].map));
506 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
524 if (s[i].name[0] !=
'\0' && s[i].name[0] !=
' ' &&
554 sprintf(name,
"%s%d", prefix, unit);
564 if (!
strcmp(name, s[i].name))
565 return s[
i].
map.base_addr;
582 memset(&map, 0,
sizeof(map));
593 return netdev_boot_setup_add(str, &map);
648 hlist_for_each_entry_rcu(dev, p, head, name_hlist)
724 hlist_for_each_entry_rcu(dev, p, head, index_hlist)
777 if (dev->
type == type &&
791 if (dev->
type == type)
804 if (dev->
type == type) {
832 if (((dev->
flags ^ if_flags) & mask) == 0) {
859 if (*name ==
'/' ||
isspace(*name))
882 static int __dev_alloc_name(
struct net *net,
const char *name,
char *
buf)
887 unsigned long *
inuse;
897 if (p[1] !=
'd' ||
strchr(p + 2,
'%'))
908 if (i < 0 || i >= max_netdevices)
955 ret = __dev_alloc_name(net, name, buf);
962 static int dev_alloc_name_ns(
struct net *net,
969 ret = __dev_alloc_name(net, name, buf);
975 static int dev_get_valid_name(
struct net *net,
985 return dev_alloc_name_ns(net, dev, name);
988 else if (dev->
name != name)
1021 err = dev_get_valid_name(net, dev, newname);
1039 hlist_add_head_rcu(&dev->
name_hlist, dev_name_hash(net, dev->
name));
1043 ret = notifier_to_errno(ret);
1052 pr_err(
"%s: name change rollback failed: %d\n",
1163 no_module = request_module(
"netdev-%s", name);
1165 if (!request_module(
"%s", name))
1166 pr_warn(
"Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
1172 static int __dev_open(
struct net_device *dev)
1179 if (!netif_device_present(dev))
1183 ret = notifier_to_errno(ret);
1199 net_dmaengine_get();
1227 ret = __dev_open(dev);
1275 net_dmaengine_put();
1281 static int __dev_close(
struct net_device *dev)
1287 retval = __dev_close_many(&single);
1292 static int dev_close_many(
struct list_head *head)
1301 __dev_close_many(head);
1309 list_splice(&tmp_list, head);
1328 dev_close_many(&single);
1350 if (is_vlan_dev(dev))
1362 static int dev_boot_phase = 1;
1394 err = notifier_to_errno(err);
1487 #ifdef HAVE_JUMP_LABEL
1492 static atomic_t netstamp_needed_deferred;
1497 #ifdef HAVE_JUMP_LABEL
1498 int deferred =
atomic_xchg(&netstamp_needed_deferred, 0);
1502 static_key_slow_dec(&netstamp_needed);
1507 static_key_slow_inc(&netstamp_needed);
1513 #ifdef HAVE_JUMP_LABEL
1519 static_key_slow_dec(&netstamp_needed);
1523 static inline void net_timestamp_set(
struct sk_buff *
skb)
1526 if (static_key_false(&netstamp_needed))
1527 __net_timestamp(skb);
1530 #define net_timestamp_check(COND, SKB) \
1531 if (static_key_false(&netstamp_needed)) { \
1532 if ((COND) && !(SKB)->tstamp.tv64) \
1533 __net_timestamp(SKB); \
1536 static int net_hwtstamp_validate(
struct ifreq *ifr)
1541 int tx_type_valid = 0;
1542 int rx_filter_valid = 0;
1561 switch (rx_filter) {
1577 rx_filter_valid = 1;
1581 if (!tx_type_valid || !rx_filter_valid)
1587 static inline bool is_skb_forwardable(
struct net_device *dev,
1596 if (skb->
len <= len)
1602 if (skb_is_gso(skb))
1639 if (
unlikely(!is_skb_forwardable(dev, skb))) {
1657 static inline int deliver_skb(
struct sk_buff *skb,
1664 return pt_prev->
func(skb, skb->
dev, pt_prev, orig_dev);
1692 list_for_each_entry_rcu(ptype, &ptype_all,
list) {
1696 if ((ptype->
dev == dev || !ptype->
dev) &&
1697 (!skb_loop_sk(ptype, skb))) {
1699 deliver_skb(skb2, pt_prev, skb->
dev);
1708 net_timestamp_set(skb2);
1714 skb_reset_mac_header(skb2);
1716 if (skb_network_header(skb2) < skb2->
data ||
1721 skb_reset_network_header(skb2);
1730 pt_prev->
func(skb2, skb->
dev, pt_prev, skb->
dev);
1747 static void netif_setup_tc(
struct net_device *dev,
unsigned int txq)
1754 pr_warn(
"Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
1761 int q = netdev_get_prio_tc_map(dev, i);
1765 pr_warn(
"Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
1767 netdev_set_prio_tc_map(dev, i, 0);
1783 if (dev->
reg_state == NETREG_REGISTERED ||
1784 dev->
reg_state == NETREG_UNREGISTERING) {
1793 netif_setup_tc(dev, txq);
1795 if (txq < dev->real_num_tx_queues)
1796 qdisc_reset_all_tx_gt(dev, txq);
1815 int netif_set_real_num_rx_queues(
struct net_device *dev,
unsigned int rxq)
1819 if (rxq < 1 || rxq > dev->num_rx_queues)
1822 if (dev->
reg_state == NETREG_REGISTERED) {
1831 dev->real_num_rx_queues = rxq;
1849 static inline void __netif_reschedule(
struct Qdisc *
q)
1852 unsigned long flags;
1866 __netif_reschedule(q);
1874 unsigned long flags;
1905 netif_running(dev)) {
1906 netif_tx_stop_all_queues(dev);
1920 netif_running(dev)) {
1921 netif_tx_wake_all_queues(dev);
1927 static void skb_warn_bad_offload(
const struct sk_buff *skb)
1933 if (dev && dev->
dev.parent)
1936 WARN(1,
"%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
1937 "gso_type=%d ip_summed=%d\n",
1938 driver, dev ? &dev->
features : &null_features,
1939 skb->
sk ? &skb->
sk->sk_route_caps : &null_features,
1940 skb->
len, skb->
data_len, skb_shinfo(skb)->gso_size,
1941 skb_shinfo(skb)->gso_type, skb->
ip_summed);
1954 goto out_set_summed;
1956 if (
unlikely(skb_shinfo(skb)->gso_size)) {
1957 skb_warn_bad_offload(skb);
1961 offset = skb_checksum_start_offset(skb);
1968 if (skb_cloned(skb) &&
2013 skb_reset_mac_header(skb);
2015 __skb_pull(skb, skb->
mac_len);
2018 skb_warn_bad_offload(skb);
2020 if (skb_header_cloned(skb) &&
2022 return ERR_PTR(err);
2026 list_for_each_entry_rcu(ptype,
2031 segs = ERR_PTR(err);
2032 if (err || skb_gso_ok(skb, features))
2034 __skb_push(skb, (skb->
data -
2035 skb_network_header(skb)));
2043 __skb_push(skb, skb->
data - skb_mac_header(skb));
2051 void netdev_rx_csum_fault(
struct net_device *dev)
2054 pr_err(
"%s: hw csum failure\n", dev ? dev->
name :
"<unknown>");
2068 #ifdef CONFIG_HIGHMEM
2071 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2073 if (PageHighMem(skb_frag_page(frag)))
2083 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2098 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
2100 static void dev_gso_skb_destructor(
struct sk_buff *skb)
2110 }
while (skb->
next);
2136 return PTR_ERR(segs);
2160 !can_checksum_protocol(features, protocol)) {
2163 }
else if (illegal_highdma(skb->
dev, skb)) {
2175 if (skb_shinfo(skb)->gso_segs > skb->
dev->gso_max_segs)
2182 return harmonize_features(skb, protocol, features);
2188 return harmonize_features(skb, protocol, features);
2192 return harmonize_features(skb, protocol, features);
2202 static inline int skb_needs_linearize(
struct sk_buff *skb,
2205 return skb_is_nonlinear(skb) &&
2206 ((skb_has_frag_list(skb) &&
2208 (skb_shinfo(skb)->nr_frags &&
2217 unsigned int skb_len;
2240 if (netif_needs_gso(skb, features)) {
2241 if (
unlikely(dev_gso_segment(skb, features)))
2246 if (skb_needs_linearize(skb, features) &&
2247 __skb_linearize(skb))
2255 skb_set_transport_header(skb,
2256 skb_checksum_start_offset(skb));
2263 if (!list_empty(&ptype_all))
2264 dev_queue_xmit_nit(skb, dev);
2268 trace_net_dev_xmit(skb, rc, dev, skb_len);
2270 txq_trans_update(txq);
2288 if (!list_empty(&ptype_all))
2289 dev_queue_xmit_nit(nskb, dev);
2291 skb_len = nskb->
len;
2293 trace_net_dev_xmit(nskb, rc, dev, skb_len);
2296 goto out_kfree_gso_skb;
2301 txq_trans_update(txq);
2304 }
while (skb->
next);
2322 unsigned int num_tx_queues)
2326 u16 qcount = num_tx_queues;
2328 if (skb_rx_queue_recorded(skb)) {
2329 hash = skb_get_rx_queue(skb);
2330 while (
unlikely(hash >= num_tx_queues))
2331 hash -= num_tx_queues;
2336 u8 tc = netdev_get_prio_tc_map(dev, skb->
priority);
2341 if (skb->
sk && skb->
sk->sk_hash)
2342 hash = skb->
sk->sk_hash;
2345 hash = jhash_1word(hash, hashrnd);
2347 return (
u16) (((
u64) hash * qcount) >> 32) + qoffset;
2355 dev->
name, queue_index,
2365 struct xps_dev_maps *dev_maps;
2366 struct xps_map *
map;
2367 int queue_index = -1;
2376 queue_index = map->queues[0];
2379 if (skb->
sk && skb->
sk->sk_hash)
2380 hash = skb->
sk->sk_hash;
2384 hash = jhash_1word(hash, hashrnd);
2385 queue_index = map->queues[
2386 ((
u64)hash * map->len) >> 32];
2410 queue_index = dev_cap_txqueue(dev, queue_index);
2413 queue_index = sk_tx_queue_get(sk);
2419 queue_index = get_xps_queue(dev, skb);
2420 if (queue_index < 0)
2421 queue_index = skb_tx_hash(dev, skb);
2423 if (queue_index != old_index && sk) {
2427 if (dst && skb_dst(skb) == dst)
2428 sk_tx_queue_set(sk, queue_index);
2433 skb_set_queue_mapping(skb, queue_index);
2434 return netdev_get_tx_queue(dev, queue_index);
2437 static inline int __dev_xmit_skb(
struct sk_buff *skb,
struct Qdisc *
q,
2446 qdisc_calculate_pkt_len(skb, q);
2453 contended = qdisc_is_running(q);
2457 spin_lock(root_lock);
2462 qdisc_run_begin(q)) {
2471 qdisc_bstats_update(q, skb);
2486 if (qdisc_run_begin(q)) {
2494 spin_unlock(root_lock);
2500 #if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
2506 unsigned int prioidx = skb->
sk->sk_cgrp_prioidx;
2508 if (prioidx < map->priomap_len)
2509 skb->
priority = map->priomap[prioidx];
2513 #define skb_update_prio(skb)
2517 #define RECURSION_LIMIT 10
2525 skb_reset_mac_header(skb);
2526 __skb_pull(skb, skb_network_offset(skb));
2578 #ifdef CONFIG_NET_CLS_ACT
2581 trace_net_dev_queue(skb);
2583 rc = __dev_xmit_skb(skb, q, dev, txq);
2605 goto recursion_alert;
2609 if (!netif_xmit_stopped(txq)) {
2613 if (dev_xmit_complete(rc)) {
2632 rcu_read_unlock_bh();
2637 rcu_read_unlock_bh();
2705 static struct rps_dev_flow *
2709 if (next_cpu != RPS_NO_CPU) {
2710 #ifdef CONFIG_RFS_ACCEL
2711 struct netdev_rx_queue *rxqueue;
2713 struct rps_dev_flow *old_rflow;
2719 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
2722 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
2723 if (rxq_index == skb_get_rx_queue(skb))
2726 rxqueue = dev->_rx + rxq_index;
2730 flow_id = skb->
rxhash & flow_table->mask;
2731 rc = dev->
netdev_ops->ndo_rx_flow_steer(dev, skb,
2732 rxq_index, flow_id);
2736 rflow = &flow_table->flows[flow_id];
2738 if (old_rflow->filter == rflow->filter)
2739 old_rflow->filter = RPS_NO_FILTER;
2756 struct rps_dev_flow **rflowp)
2758 struct netdev_rx_queue *rxqueue;
2759 struct rps_map *
map;
2760 struct rps_dev_flow_table *flow_table;
2761 struct rps_sock_flow_table *sock_flow_table;
2765 if (skb_rx_queue_recorded(skb)) {
2767 if (
unlikely(index >= dev->real_num_rx_queues)) {
2769 "%s received packet on queue %u, but number "
2770 "of RX queues is %u\n",
2771 dev->
name, index, dev->real_num_rx_queues);
2774 rxqueue = dev->_rx +
index;
2780 if (map->len == 1 &&
2782 tcpu = map->cpus[0];
2791 skb_reset_network_header(skb);
2792 if (!skb_get_rxhash(skb))
2797 if (flow_table && sock_flow_table) {
2799 struct rps_dev_flow *rflow;
2801 rflow = &flow_table->flows[skb->
rxhash & flow_table->mask];
2804 next_cpu = sock_flow_table->ents[skb->
rxhash &
2805 sock_flow_table->mask];
2821 rflow->last_qtail)) >= 0)) {
2823 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
2826 if (tcpu != RPS_NO_CPU &&
cpu_online(tcpu)) {
2834 tcpu = map->cpus[((
u64) skb->
rxhash * map->len) >> 32];
2846 #ifdef CONFIG_RFS_ACCEL
2859 bool rps_may_expire_flow(
struct net_device *dev,
u16 rxq_index,
2860 u32 flow_id,
u16 filter_id)
2862 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
2863 struct rps_dev_flow_table *flow_table;
2864 struct rps_dev_flow *rflow;
2870 if (flow_table && flow_id <= flow_table->
mask) {
2871 rflow = &flow_table->flows[flow_id];
2873 if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
2875 rflow->last_qtail) <
2876 (
int)(10 * flow_table->mask)))
2887 static void rps_trigger_softirq(
void *
data)
2891 ____napi_schedule(sd, &sd->
backlog);
2908 sd->rps_ipi_next = mysd->rps_ipi_list;
2909 mysd->rps_ipi_list = sd;
2922 static int enqueue_to_backlog(
struct sk_buff *skb,
int cpu,
2923 unsigned int *qtail)
2926 unsigned long flags;
2937 input_queue_tail_incr_save(sd, qtail);
2947 if (!rps_ipi_queued(sd))
2948 ____napi_schedule(sd, &sd->
backlog);
2958 atomic_long_inc(&skb->
dev->rx_dropped);
2983 if (netpoll_rx(skb))
2988 trace_netif_rx(skb);
2990 if (static_key_false(&rps_needed)) {
2991 struct rps_dev_flow voidflow, *rflow = &voidflow;
2997 cpu = get_rps_cpu(skb->
dev, skb, &rflow);
3001 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3009 ret = enqueue_to_backlog(skb,
get_cpu(), &qtail);
3044 clist = clist->
next;
3047 trace_kfree_skb(skb, net_tx_action);
3067 root_lock = qdisc_lock(q);
3068 if (spin_trylock(root_lock)) {
3073 spin_unlock(root_lock);
3077 __netif_reschedule(q);
3088 #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
3089 (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
3092 unsigned char *
addr) __read_mostly;
3096 #ifdef CONFIG_NET_CLS_ACT
3123 spin_lock(qdisc_lock(q));
3125 result = qdisc_enqueue_root(skb, q);
3126 spin_unlock(qdisc_lock(q));
3142 *ret = deliver_skb(skb, *pt_prev, orig_dev);
3146 switch (ing_filter(skb, rxq)) {
3175 void *rx_handler_data)
3210 static bool skb_pfmemalloc_protocol(
struct sk_buff *skb)
3223 static int __netif_receive_skb(
struct sk_buff *skb)
3229 bool deliver_exact =
false;
3232 unsigned long pflags =
current->flags;
3236 trace_netif_receive_skb(skb);
3247 if (sk_memalloc_socks() && skb_pfmemalloc(skb))
3251 if (netpoll_receive_skb(skb))
3254 orig_dev = skb->
dev;
3256 skb_reset_network_header(skb);
3257 skb_reset_transport_header(skb);
3258 skb_reset_mac_len(skb);
3275 #ifdef CONFIG_NET_CLS_ACT
3282 if (sk_memalloc_socks() && skb_pfmemalloc(skb))
3285 list_for_each_entry_rcu(ptype, &ptype_all,
list) {
3286 if (!ptype->
dev || ptype->
dev == skb->
dev) {
3288 ret = deliver_skb(skb, pt_prev, orig_dev);
3294 #ifdef CONFIG_NET_CLS_ACT
3295 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
3301 if (sk_memalloc_socks() && skb_pfmemalloc(skb)
3302 && !skb_pfmemalloc_protocol(skb))
3307 ret = deliver_skb(skb, pt_prev, orig_dev);
3319 ret = deliver_skb(skb, pt_prev, orig_dev);
3328 deliver_exact =
true;
3340 null_or_dev = deliver_exact ? skb->
dev :
NULL;
3343 list_for_each_entry_rcu(ptype,
3345 if (ptype->
type == type &&
3346 (ptype->
dev == null_or_dev || ptype->
dev == skb->
dev ||
3347 ptype->
dev == orig_dev)) {
3349 ret = deliver_skb(skb, pt_prev, orig_dev);
3358 ret = pt_prev->
func(skb, skb->
dev, pt_prev, orig_dev);
3361 atomic_long_inc(&skb->
dev->rx_dropped);
3399 if (static_key_false(&rps_needed)) {
3400 struct rps_dev_flow voidflow, *rflow = &voidflow;
3405 cpu = get_rps_cpu(skb->
dev, skb, &rflow);
3408 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3415 return __netif_receive_skb(skb);
3422 static void flush_backlog(
void *
arg)
3430 if (skb->
dev == dev) {
3433 input_queue_head_incr(sd);
3439 if (skb->
dev == dev) {
3442 input_queue_head_incr(sd);
3447 static int napi_gro_complete(
struct sk_buff *skb)
3457 skb_shinfo(skb)->gso_size = 0;
3462 list_for_each_entry_rcu(ptype, head,
list) {
3495 for (skb = prev;
skb; skb =
prev) {
3502 napi_gro_complete(skb);
3523 if (skb_is_gso(skb) || skb_has_frag_list(skb))
3527 list_for_each_entry_rcu(ptype, head,
list) {
3531 skb_set_network_header(skb, skb_gro_offset(skb));
3543 if (&ptype->
list == head)
3554 napi_gro_complete(nskb);
3567 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
3573 if (skb_headlen(skb) < skb_gro_offset(skb)) {
3574 int grow = skb_gro_offset(skb) - skb_headlen(skb);
3583 skb_shinfo(skb)->frags[0].page_offset += grow;
3584 skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
3586 if (
unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
3587 skb_frag_unref(skb, 0);
3588 memmove(skb_shinfo(skb)->frags,
3589 skb_shinfo(skb)->frags + 1,
3590 --skb_shinfo(skb)->nr_frags *
sizeof(
skb_frag_t));
3607 unsigned int maclen = skb->
dev->hard_header_len;
3610 unsigned long diffs;
3612 diffs = (
unsigned long)p->
dev ^ (
unsigned long)skb->
dev;
3615 diffs |= compare_ether_header(skb_mac_header(p),
3616 skb_gro_mac_header(skb));
3618 diffs =
memcmp(skb_mac_header(p),
3619 skb_gro_mac_header(skb),
3656 static void skb_gro_reset_offset(
struct sk_buff *skb)
3667 !PageHighMem(skb_frag_page(frag0))) {
3668 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
3669 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
3675 skb_gro_reset_offset(skb);
3683 __skb_pull(skb, skb_headlen(skb));
3722 napi_reuse_skb(napi, skb);
3742 skb_reset_mac_header(skb);
3743 skb_gro_reset_offset(skb);
3745 off = skb_gro_offset(skb);
3746 hlen = off +
sizeof(*eth);
3747 eth = skb_gro_header_fast(skb, off);
3748 if (skb_gro_header_hard(skb, hlen)) {
3749 eth = skb_gro_header_slow(skb, hlen, off);
3751 napi_reuse_skb(napi, skb);
3757 skb_gro_pull(skb,
sizeof(*eth));
3771 struct sk_buff *skb = napi_frags_skb(napi);
3784 static void net_rps_action_and_irq_enable(
struct softnet_data *sd)
3790 sd->rps_ipi_list =
NULL;
3799 __smp_call_function_single(remsd->cpu,
3808 static int process_backlog(
struct napi_struct *napi,
int quota)
3817 if (sd->rps_ipi_list) {
3819 net_rps_action_and_irq_enable(sd);
3824 while (work < quota) {
3830 __netif_receive_skb(skb);
3832 input_queue_head_incr(sd);
3833 if (++work >= quota) {
3845 if (qlen < quota - work) {
3856 quota = work + qlen;
3873 unsigned long flags;
3894 unsigned long flags;
3921 #ifdef CONFIG_NETPOLL
3923 napi->poll_owner = -1;
3934 napi_free_frags(napi);
3936 for (skb = napi->
gro_list; skb; skb = next) {
3950 unsigned long time_limit =
jiffies + 2;
3976 have = netpoll_poll_lock(n);
3988 work = n->
poll(n, weight);
4004 if (
unlikely(napi_disable_pending(n))) {
4021 netpoll_poll_unlock(have);
4024 net_rps_action_and_irq_enable(sd);
4026 #ifdef CONFIG_NET_DMA
4057 gifconf_list[
family] = gifconf;
4074 static int dev_ifname(
struct net *net,
struct ifreq __user *arg)
4107 static int dev_ifconf(
struct net *net,
char __user *arg)
4132 for (i = 0; i <
NPROTO; i++) {
4133 if (gifconf_list[i]) {
4136 done = gifconf_list[
i](
dev,
NULL, 0);
4138 done = gifconf_list[
i](
dev, pos + total,
4150 ifc.ifc_len = total;
4158 #ifdef CONFIG_PROC_FS
4160 #define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
4162 #define get_bucket(x) ((x) >> BUCKET_SPACE)
4163 #define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
4164 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
4166 static inline struct net_device *dev_from_same_bucket(
struct seq_file *seq, loff_t *pos)
4168 struct net *net = seq_file_net(seq);
4172 unsigned int count = 0,
offset = get_offset(*pos);
4175 hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
4186 unsigned int bucket;
4189 dev = dev_from_same_bucket(seq, pos);
4193 bucket = get_bucket(*pos) + 1;
4194 *pos = set_bucket_offset(bucket, 1);
4204 void *dev_seq_start(
struct seq_file *seq, loff_t *pos)
4214 return dev_from_bucket(seq, pos);
4217 void *dev_seq_next(
struct seq_file *seq,
void *
v, loff_t *pos)
4220 return dev_from_bucket(seq, pos);
4223 void dev_seq_stop(
struct seq_file *seq,
void *
v)
4234 seq_printf(seq,
"%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
4235 "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
4257 static int dev_seq_show(
struct seq_file *seq,
void *
v)
4262 " face |bytes packets errs drop fifo frame "
4263 "compressed multicast|bytes packets errs "
4264 "drop fifo colls carrier compressed\n");
4266 dev_seq_printf_stats(seq, v);
4270 static struct softnet_data *softnet_get_online(loff_t *pos)
4274 while (*pos < nr_cpu_ids)
4283 static void *softnet_seq_start(
struct seq_file *seq, loff_t *pos)
4285 return softnet_get_online(pos);
4288 static void *softnet_seq_next(
struct seq_file *seq,
void *v, loff_t *pos)
4291 return softnet_get_online(pos);
4294 static void softnet_seq_stop(
struct seq_file *seq,
void *v)
4298 static int softnet_seq_show(
struct seq_file *seq,
void *v)
4302 seq_printf(seq,
"%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
4310 .
start = dev_seq_start,
4311 .next = dev_seq_next,
4312 .stop = dev_seq_stop,
4313 .show = dev_seq_show,
4324 .open = dev_seq_open,
4331 .
start = softnet_seq_start,
4332 .next = softnet_seq_next,
4333 .stop = softnet_seq_stop,
4334 .show = softnet_seq_show,
4337 static int softnet_seq_open(
struct inode *inode,
struct file *file)
4339 return seq_open(file, &softnet_seq_ops);
4344 .open = softnet_seq_open,
4350 static void *ptype_get_idx(loff_t pos)
4356 list_for_each_entry_rcu(pt, &ptype_all,
list) {
4363 list_for_each_entry_rcu(pt, &ptype_base[t],
list) {
4372 static void *ptype_seq_start(
struct seq_file *seq, loff_t *pos)
4379 static void *ptype_seq_next(
struct seq_file *seq,
void *v, loff_t *pos)
4387 return ptype_get_idx(0);
4390 nxt = pt->
list.next;
4392 if (nxt != &ptype_all)
4395 nxt = ptype_base[0].
next;
4399 while (nxt == &ptype_base[hash]) {
4400 if (++hash >= PTYPE_HASH_SIZE)
4408 static void ptype_seq_stop(
struct seq_file *seq,
void *v)
4414 static int ptype_seq_show(
struct seq_file *seq,
void *v)
4419 seq_puts(seq,
"Type Device Function\n");
4420 else if (pt->
dev ==
NULL || dev_net(pt->
dev) == seq_file_net(seq)) {
4434 .
start = ptype_seq_start,
4435 .next = ptype_seq_next,
4436 .stop = ptype_seq_stop,
4437 .show = ptype_seq_show,
4440 static int ptype_seq_open(
struct inode *inode,
struct file *file)
4448 .open = ptype_seq_open,
4455 static int __net_init dev_proc_net_init(
struct net *net)
4480 static void __net_exit dev_proc_net_exit(
struct net *net)
4490 .init = dev_proc_net_init,
4491 .exit = dev_proc_net_exit,
4499 #define dev_proc_init() 0
4562 static void dev_change_rx_flags(
struct net_device *dev,
int flags)
4570 static int __dev_set_promiscuity(
struct net_device *dev,
int inc)
4572 unsigned int old_flags = dev->
flags;
4589 pr_warn(
"%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
4594 if (dev->
flags != old_flags) {
4595 pr_info(
"device %s %s promiscuous mode\n",
4602 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
4608 audit_get_sessionid(
current));
4611 dev_change_rx_flags(dev, IFF_PROMISC);
4629 unsigned int old_flags = dev->
flags;
4632 err = __dev_set_promiscuity(dev, inc);
4635 if (dev->
flags != old_flags)
4656 unsigned int old_flags = dev->
flags;
4671 pr_warn(
"%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
4676 if (dev->
flags ^ old_flags) {
4698 if (!netif_device_present(dev))
4706 __dev_set_promiscuity(dev, 1);
4709 __dev_set_promiscuity(dev, -1);
4720 netif_addr_lock_bh(dev);
4722 netif_addr_unlock_bh(dev);
4735 flags = (dev->
flags & ~(IFF_PROMISC |
4740 (dev->
gflags & (IFF_PROMISC |
4743 if (netif_running(dev)) {
4744 if (netif_oper_up(dev))
4746 if (netif_carrier_ok(dev))
4748 if (netif_dormant(dev))
4758 unsigned int old_flags = dev->
flags;
4789 if ((old_flags ^ flags) &
IFF_UP) {
4790 ret = ((old_flags &
IFF_UP) ? __dev_close : __dev_open)(
dev);
4796 if ((flags ^ dev->
gflags) & IFF_PROMISC) {
4819 unsigned int changes = dev->
flags ^ old_flags;
4822 if (dev->
flags & IFF_UP)
4828 if (dev->
flags & IFF_UP &&
4844 unsigned int changes, old_flags = dev->
flags;
4850 changes = old_flags ^ dev->
flags;
4871 if (new_mtu == dev->
mtu)
4878 if (!netif_device_present(dev))
4900 dev->
group = new_group;
4920 if (!netif_device_present(dev))
4933 static int dev_ifsioc_locked(
struct net *net,
struct ifreq *ifr,
unsigned int cmd)
4948 ifr->ifr_metric = 0;
4952 ifr->ifr_mtu = dev->
mtu;
4957 memset(ifr->ifr_hwaddr.sa_data, 0,
sizeof ifr->ifr_hwaddr.sa_data);
4960 min(
sizeof ifr->ifr_hwaddr.sa_data, (
size_t) dev->
addr_len));
4961 ifr->ifr_hwaddr.sa_family = dev->
type;
4969 ifr->ifr_map.mem_start = dev->
mem_start;
4970 ifr->ifr_map.mem_end = dev->
mem_end;
4971 ifr->ifr_map.base_addr = dev->
base_addr;
4972 ifr->ifr_map.irq = dev->
irq;
4973 ifr->ifr_map.dma = dev->
dma;
4974 ifr->ifr_map.port = dev->
if_port;
4978 ifr->ifr_ifindex = dev->
ifindex;
5000 static int dev_ifsioc(
struct net *net,
struct ifreq *ifr,
unsigned int cmd)
5026 if (ifr->ifr_hwaddr.sa_family != dev->
type)
5029 min(
sizeof ifr->ifr_hwaddr.sa_data, (
size_t) dev->
addr_len));
5035 if (!netif_device_present(dev))
5045 if (!netif_device_present(dev))
5053 if (!netif_device_present(dev))
5058 if (ifr->ifr_qlen < 0)
5064 ifr->ifr_newname[
IFNAMSIZ-1] =
'\0';
5068 err = net_hwtstamp_validate(ifr);
5094 if (netif_device_present(dev))
5123 int dev_ioctl(
struct net *net,
unsigned int cmd,
void __user *arg)
5136 ret = dev_ifconf(net, (
char __user *) arg);
5141 return dev_ifname(net, (
struct ifreq __user *)arg);
5148 colon =
strchr(ifr.ifr_name,
':');
5173 ret = dev_ifsioc_locked(net, &ifr, cmd);
5179 sizeof(
struct ifreq)))
5193 sizeof(
struct ifreq)))
5211 ret = dev_ifsioc(net, &ifr, cmd);
5217 sizeof(
struct ifreq)))
5253 ret = dev_ifsioc(net, &ifr, cmd);
5275 ret = dev_ifsioc(net, &ifr, cmd);
5278 sizeof(
struct ifreq)))
5298 static int dev_new_index(
struct net *net)
5305 return net->
ifindex = ifindex;
5312 static void net_set_todo(
struct net_device *dev)
5317 static void rollback_registered_many(
struct list_head *head)
5330 pr_debug(
"unregister_netdevice: device %s/%p never was registered\n",
5342 dev_close_many(head);
5346 unlist_netdevice(dev);
5393 list_add(&dev->unreg_list, &single);
5394 rollback_registered_many(&single);
5404 netdev_warn(dev,
"mixed HW and IP checksum settings.\n");
5412 "Dropping NETIF_F_SG since no checksum feature.\n");
5413 features &= ~NETIF_F_SG;
5418 netdev_dbg(dev,
"Dropping TSO features since no SG feature.\n");
5419 features &= ~NETIF_F_ALL_TSO;
5427 if ((features &
NETIF_F_GSO) && !(features & NETIF_F_SG)) {
5428 netdev_dbg(dev,
"Dropping NETIF_F_GSO since no SG feature.\n");
5429 features &= ~NETIF_F_GSO;
5435 if (!((features & NETIF_F_GEN_CSUM) ||
5439 "Dropping NETIF_F_UFO since no checksum offload features.\n");
5440 features &= ~NETIF_F_UFO;
5443 if (!(features & NETIF_F_SG)) {
5445 "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
5446 features &= ~NETIF_F_UFO;
5460 features = netdev_get_wanted_features(dev);
5463 features = dev->
netdev_ops->ndo_fix_features(dev, features);
5466 features = netdev_fix_features(dev, features);
5471 netdev_dbg(dev,
"Features changed: %pNF -> %pNF\n",
5475 err = dev->
netdev_ops->ndo_set_features(dev, features);
5479 "set_features() failed (%d); wanted %pNF, left %pNF\n",
5535 netif_dormant_on(dev);
5537 netif_dormant_off(dev);
5539 if (netif_carrier_ok(rootdev)) {
5540 if (!netif_carrier_ok(dev))
5543 if (netif_carrier_ok(dev))
5550 static int netif_alloc_rx_queues(
struct net_device *dev)
5552 unsigned int i, count = dev->num_rx_queues;
5553 struct netdev_rx_queue *
rx;
5557 rx = kcalloc(count,
sizeof(
struct netdev_rx_queue),
GFP_KERNEL);
5559 pr_err(
"netdev: Unable to allocate %u rx queues\n", count);
5564 for (i = 0; i <
count; i++)
5570 static void netdev_init_one_queue(
struct net_device *dev,
5575 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->
type);
5584 static int netif_alloc_netdev_queues(
struct net_device *dev)
5593 pr_err(
"netdev: Unable to allocate %u tx queues\n", count);
5598 netdev_for_each_tx_queue(dev, netdev_init_one_queue,
NULL);
5624 struct net *net = dev_net(dev);
5636 netdev_set_addr_lockdep_class(dev);
5640 ret = dev_get_valid_name(net, dev, dev->
name);
5656 dev->
ifindex = dev_new_index(net);
5684 ret = notifier_to_errno(ret);
5706 list_netdevice(dev);
5711 ret = notifier_to_errno(ret);
5713 rollback_registered(dev);
5821 static void netdev_wait_allrefs(
struct net_device *dev)
5823 unsigned long rebroadcast_time, warning_time;
5828 rebroadcast_time = warning_time =
jiffies;
5831 while (refcnt != 0) {
5864 pr_emerg(
"unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
5900 list_replace_init(&net_todo_list, &list);
5906 if (!list_empty(&list))
5909 while (!list_empty(&list)) {
5919 pr_err(
"network todo '%s' but state %d\n",
5929 netdev_wait_allrefs(dev);
5951 #if BITS_PER_LONG == 64
5952 BUILD_BUG_ON(
sizeof(*stats64) !=
sizeof(*netdev_stats));
5953 memcpy(stats64, netdev_stats,
sizeof(*stats64));
5955 size_t i, n =
sizeof(*stats64) /
sizeof(
u64);
5956 const unsigned long *
src = (
const unsigned long *)netdev_stats;
5959 BUILD_BUG_ON(
sizeof(*netdev_stats) /
sizeof(
unsigned long) !=
5960 sizeof(*stats64) /
sizeof(
u64));
5961 for (i = 0; i <
n; i++)
5983 memset(storage, 0,
sizeof(*storage));
5999 #ifdef CONFIG_NET_CLS_ACT
6005 netdev_init_one_queue(dev, queue,
NULL);
6013 static const struct ethtool_ops default_ethtool_ops;
6029 unsigned int txqs,
unsigned int rxqs)
6038 pr_err(
"alloc_netdev: Unable to allocate device with zero queues\n");
6044 pr_err(
"alloc_netdev: Unable to allocate device with zero RX queues\n");
6053 alloc_size += sizeof_priv;
6060 pr_err(
"alloc_netdev: Unable to allocate device\n");
6065 dev->
padded = (
char *)dev - (
char *)
p;
6090 if (netif_alloc_netdev_queues(dev))
6094 dev->num_rx_queues = rxqs;
6095 dev->real_num_rx_queues = rxqs;
6096 if (netif_alloc_rx_queues(dev))
6135 release_net(dev_net(dev));
6154 if (dev->
reg_state == NETREG_UNINITIALIZED) {
6203 rollback_registered(dev);
6218 if (!list_empty(head)) {
6219 rollback_registered_many(head);
6240 unregister_netdevice(dev);
6277 if (net_eq(dev_net(dev), net))
6288 if (dev_get_valid_name(net, dev, pat) < 0)
6301 unlist_netdevice(dev);
6327 dev_net_set(dev, net);
6332 dev->
ifindex = dev_new_index(net);
6342 list_netdevice(dev);
6366 unsigned int cpu, oldcpu = (
unsigned long)ocpu;
6380 list_skb = &(*list_skb)->
next;
6404 input_queue_head_incr(oldsd);
6408 input_queue_head_incr(oldsd);
6428 if (mask & NETIF_F_GEN_CSUM)
6436 if (all & NETIF_F_GEN_CSUM)
6443 static struct hlist_head *netdev_create_hash(
void)
6457 static int __net_init netdev_init(
struct net *net)
6488 const char *
empty =
"";
6490 parent = dev->
dev.parent;
6495 if (driver && driver->
name)
6496 return driver->
name;
6500 static int __netdev_printk(
const char *
level,
const struct net_device *dev,
6505 if (dev && dev->
dev.parent) {
6506 r = dev_printk_emit(level[1] -
'0',
6510 dev_name(dev->
dev.parent),
6511 netdev_name(dev), vaf);
6513 r =
printk(
"%s%s: %pV", level, netdev_name(dev), vaf);
6515 r =
printk(
"%s(NULL net_device): %pV", level, vaf);
6533 r = __netdev_printk(level, dev, &vaf);
6541 #define define_netdev_printk_level(func, level) \
6542 int func(const struct net_device *dev, const char *fmt, ...) \
6545 struct va_format vaf; \
6548 va_start(args, fmt); \
6553 r = __netdev_printk(level, dev, &vaf); \
6559 EXPORT_SYMBOL(func);
6569 static void __net_exit netdev_exit(
struct net *net)
6576 .init = netdev_init,
6577 .exit = netdev_exit,
6580 static void __net_exit default_device_exit(
struct net *net)
6604 pr_emerg(
"%s: failed to move %s to init_net: %d\n",
6605 __func__, dev->
name, err);
6638 .exit = default_device_exit,
6639 .exit_batch = default_device_exit_batch,
6653 static int __init net_dev_init(
void)
6665 INIT_LIST_HEAD(&ptype_all);
6667 INIT_LIST_HEAD(&ptype_base[i]);
6679 memset(sd, 0,
sizeof(*sd));
6687 sd->csd.func = rps_trigger_softirq;
6693 sd->
backlog.poll = process_backlog;
6729 static int __init initialize_hashrnd(
void)