Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
sch_teql.c
Go to the documentation of this file.
1 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
2  *
3  * This program is free software; you can redistribute it and/or
4  * modify it under the terms of the GNU General Public License
5  * as published by the Free Software Foundation; either version
6  * 2 of the License, or (at your option) any later version.
7  *
8  * Authors: Alexey Kuznetsov, <[email protected]>
9  */
10 
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/string.h>
16 #include <linux/errno.h>
17 #include <linux/if_arp.h>
18 #include <linux/netdevice.h>
19 #include <linux/init.h>
20 #include <linux/skbuff.h>
21 #include <linux/moduleparam.h>
22 #include <net/dst.h>
23 #include <net/neighbour.h>
24 #include <net/pkt_sched.h>
25 
26 /*
27  How to setup it.
28  ----------------
29 
30  After loading this module you will find a new device teqlN
31  and new qdisc with the same name. To join a slave to the equalizer
32  you should just set this qdisc on a device f.e.
33 
34  # tc qdisc add dev eth0 root teql0
35  # tc qdisc add dev eth1 root teql0
36 
37  That's all. Full PnP 8)
38 
39  Applicability.
40  --------------
41 
42  1. Slave devices MUST be active devices, i.e., they must raise the tbusy
43  signal and generate EOI events. If you want to equalize virtual devices
44  like tunnels, use a normal eql device.
45  2. This device puts no limitations on physical slave characteristics
46  f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
47  Certainly, large difference in link speeds will make the resulting
48  eqalized link unusable, because of huge packet reordering.
49  I estimate an upper useful difference as ~10 times.
50  3. If the slave requires address resolution, only protocols using
51  neighbour cache (IPv4/IPv6) will work over the equalized link.
52  Other protocols are still allowed to use the slave device directly,
53  which will not break load balancing, though native slave
54  traffic will have the highest priority. */
55 
56 struct teql_master {
57  struct Qdisc_ops qops;
58  struct net_device *dev;
59  struct Qdisc *slaves;
61  unsigned long tx_bytes;
62  unsigned long tx_packets;
63  unsigned long tx_errors;
64  unsigned long tx_dropped;
65 };
66 
68  struct Qdisc *next;
69  struct teql_master *m;
70  struct sk_buff_head q;
71 };
72 
73 #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
74 
75 #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
76 
77 /* "teql*" qdisc routines */
78 
79 static int
80 teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
81 {
82  struct net_device *dev = qdisc_dev(sch);
83  struct teql_sched_data *q = qdisc_priv(sch);
84 
85  if (q->q.qlen < dev->tx_queue_len) {
86  __skb_queue_tail(&q->q, skb);
87  return NET_XMIT_SUCCESS;
88  }
89 
90  return qdisc_drop(skb, sch);
91 }
92 
93 static struct sk_buff *
94 teql_dequeue(struct Qdisc *sch)
95 {
96  struct teql_sched_data *dat = qdisc_priv(sch);
97  struct netdev_queue *dat_queue;
98  struct sk_buff *skb;
99 
100  skb = __skb_dequeue(&dat->q);
101  dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
102  if (skb == NULL) {
103  struct net_device *m = qdisc_dev(dat_queue->qdisc);
104  if (m) {
105  dat->m->slaves = sch;
106  netif_wake_queue(m);
107  }
108  } else {
109  qdisc_bstats_update(sch, skb);
110  }
111  sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
112  return skb;
113 }
114 
115 static struct sk_buff *
116 teql_peek(struct Qdisc *sch)
117 {
118  /* teql is meant to be used as root qdisc */
119  return NULL;
120 }
121 
122 static inline void
123 teql_neigh_release(struct neighbour *n)
124 {
125  if (n)
126  neigh_release(n);
127 }
128 
129 static void
130 teql_reset(struct Qdisc *sch)
131 {
132  struct teql_sched_data *dat = qdisc_priv(sch);
133 
134  skb_queue_purge(&dat->q);
135  sch->q.qlen = 0;
136 }
137 
138 static void
139 teql_destroy(struct Qdisc *sch)
140 {
141  struct Qdisc *q, *prev;
142  struct teql_sched_data *dat = qdisc_priv(sch);
143  struct teql_master *master = dat->m;
144 
145  prev = master->slaves;
146  if (prev) {
147  do {
148  q = NEXT_SLAVE(prev);
149  if (q == sch) {
150  NEXT_SLAVE(prev) = NEXT_SLAVE(q);
151  if (q == master->slaves) {
152  master->slaves = NEXT_SLAVE(q);
153  if (q == master->slaves) {
154  struct netdev_queue *txq;
155  spinlock_t *root_lock;
156 
157  txq = netdev_get_tx_queue(master->dev, 0);
158  master->slaves = NULL;
159 
160  root_lock = qdisc_root_sleeping_lock(txq->qdisc);
161  spin_lock_bh(root_lock);
162  qdisc_reset(txq->qdisc);
163  spin_unlock_bh(root_lock);
164  }
165  }
166  skb_queue_purge(&dat->q);
167  break;
168  }
169 
170  } while ((prev = q) != master->slaves);
171  }
172 }
173 
174 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
175 {
176  struct net_device *dev = qdisc_dev(sch);
177  struct teql_master *m = (struct teql_master *)sch->ops;
178  struct teql_sched_data *q = qdisc_priv(sch);
179 
180  if (dev->hard_header_len > m->dev->hard_header_len)
181  return -EINVAL;
182 
183  if (m->dev == dev)
184  return -ELOOP;
185 
186  q->m = m;
187 
188  skb_queue_head_init(&q->q);
189 
190  if (m->slaves) {
191  if (m->dev->flags & IFF_UP) {
192  if ((m->dev->flags & IFF_POINTOPOINT &&
193  !(dev->flags & IFF_POINTOPOINT)) ||
194  (m->dev->flags & IFF_BROADCAST &&
195  !(dev->flags & IFF_BROADCAST)) ||
196  (m->dev->flags & IFF_MULTICAST &&
197  !(dev->flags & IFF_MULTICAST)) ||
198  dev->mtu < m->dev->mtu)
199  return -EINVAL;
200  } else {
201  if (!(dev->flags&IFF_POINTOPOINT))
202  m->dev->flags &= ~IFF_POINTOPOINT;
203  if (!(dev->flags&IFF_BROADCAST))
204  m->dev->flags &= ~IFF_BROADCAST;
205  if (!(dev->flags&IFF_MULTICAST))
206  m->dev->flags &= ~IFF_MULTICAST;
207  if (dev->mtu < m->dev->mtu)
208  m->dev->mtu = dev->mtu;
209  }
210  q->next = NEXT_SLAVE(m->slaves);
211  NEXT_SLAVE(m->slaves) = sch;
212  } else {
213  q->next = sch;
214  m->slaves = sch;
215  m->dev->mtu = dev->mtu;
216  m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
217  }
218  return 0;
219 }
220 
221 
222 static int
223 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
224  struct net_device *dev, struct netdev_queue *txq,
225  struct dst_entry *dst)
226 {
227  struct neighbour *n;
228  int err = 0;
229 
230  n = dst_neigh_lookup_skb(dst, skb);
231  if (!n)
232  return -ENOENT;
233 
234  if (dst->dev != dev) {
235  struct neighbour *mn;
236 
237  mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
238  neigh_release(n);
239  if (IS_ERR(mn))
240  return PTR_ERR(mn);
241  n = mn;
242  }
243 
244  if (neigh_event_send(n, skb_res) == 0) {
245  int err;
246  char haddr[MAX_ADDR_LEN];
247 
248  neigh_ha_snapshot(haddr, n, dev);
249  err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
250  NULL, skb->len);
251 
252  if (err < 0)
253  err = -EINVAL;
254  } else {
255  err = (skb_res == NULL) ? -EAGAIN : 1;
256  }
257  neigh_release(n);
258  return err;
259 }
260 
261 static inline int teql_resolve(struct sk_buff *skb,
262  struct sk_buff *skb_res,
263  struct net_device *dev,
264  struct netdev_queue *txq)
265 {
266  struct dst_entry *dst = skb_dst(skb);
267  int res;
268 
269  if (txq->qdisc == &noop_qdisc)
270  return -ENODEV;
271 
272  if (!dev->header_ops || !dst)
273  return 0;
274 
275  rcu_read_lock();
276  res = __teql_resolve(skb, skb_res, dev, txq, dst);
277  rcu_read_unlock();
278 
279  return res;
280 }
281 
282 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
283 {
284  struct teql_master *master = netdev_priv(dev);
285  struct Qdisc *start, *q;
286  int busy;
287  int nores;
288  int subq = skb_get_queue_mapping(skb);
289  struct sk_buff *skb_res = NULL;
290 
291  start = master->slaves;
292 
293 restart:
294  nores = 0;
295  busy = 0;
296 
297  q = start;
298  if (!q)
299  goto drop;
300 
301  do {
302  struct net_device *slave = qdisc_dev(q);
303  struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
304  const struct net_device_ops *slave_ops = slave->netdev_ops;
305 
306  if (slave_txq->qdisc_sleeping != q)
307  continue;
308  if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
309  !netif_running(slave)) {
310  busy = 1;
311  continue;
312  }
313 
314  switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
315  case 0:
316  if (__netif_tx_trylock(slave_txq)) {
317  unsigned int length = qdisc_pkt_len(skb);
318 
319  if (!netif_xmit_frozen_or_stopped(slave_txq) &&
320  slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
321  txq_trans_update(slave_txq);
322  __netif_tx_unlock(slave_txq);
323  master->slaves = NEXT_SLAVE(q);
324  netif_wake_queue(dev);
325  master->tx_packets++;
326  master->tx_bytes += length;
327  return NETDEV_TX_OK;
328  }
329  __netif_tx_unlock(slave_txq);
330  }
331  if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
332  busy = 1;
333  break;
334  case 1:
335  master->slaves = NEXT_SLAVE(q);
336  return NETDEV_TX_OK;
337  default:
338  nores = 1;
339  break;
340  }
341  __skb_pull(skb, skb_network_offset(skb));
342  } while ((q = NEXT_SLAVE(q)) != start);
343 
344  if (nores && skb_res == NULL) {
345  skb_res = skb;
346  goto restart;
347  }
348 
349  if (busy) {
350  netif_stop_queue(dev);
351  return NETDEV_TX_BUSY;
352  }
353  master->tx_errors++;
354 
355 drop:
356  master->tx_dropped++;
357  dev_kfree_skb(skb);
358  return NETDEV_TX_OK;
359 }
360 
361 static int teql_master_open(struct net_device *dev)
362 {
363  struct Qdisc *q;
364  struct teql_master *m = netdev_priv(dev);
365  int mtu = 0xFFFE;
366  unsigned int flags = IFF_NOARP | IFF_MULTICAST;
367 
368  if (m->slaves == NULL)
369  return -EUNATCH;
370 
371  flags = FMASK;
372 
373  q = m->slaves;
374  do {
375  struct net_device *slave = qdisc_dev(q);
376 
377  if (slave == NULL)
378  return -EUNATCH;
379 
380  if (slave->mtu < mtu)
381  mtu = slave->mtu;
382  if (slave->hard_header_len > LL_MAX_HEADER)
383  return -EINVAL;
384 
385  /* If all the slaves are BROADCAST, master is BROADCAST
386  If all the slaves are PtP, master is PtP
387  Otherwise, master is NBMA.
388  */
389  if (!(slave->flags&IFF_POINTOPOINT))
390  flags &= ~IFF_POINTOPOINT;
391  if (!(slave->flags&IFF_BROADCAST))
392  flags &= ~IFF_BROADCAST;
393  if (!(slave->flags&IFF_MULTICAST))
394  flags &= ~IFF_MULTICAST;
395  } while ((q = NEXT_SLAVE(q)) != m->slaves);
396 
397  m->dev->mtu = mtu;
398  m->dev->flags = (m->dev->flags&~FMASK) | flags;
399  netif_start_queue(m->dev);
400  return 0;
401 }
402 
403 static int teql_master_close(struct net_device *dev)
404 {
405  netif_stop_queue(dev);
406  return 0;
407 }
408 
409 static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
410  struct rtnl_link_stats64 *stats)
411 {
412  struct teql_master *m = netdev_priv(dev);
413 
414  stats->tx_packets = m->tx_packets;
415  stats->tx_bytes = m->tx_bytes;
416  stats->tx_errors = m->tx_errors;
417  stats->tx_dropped = m->tx_dropped;
418  return stats;
419 }
420 
421 static int teql_master_mtu(struct net_device *dev, int new_mtu)
422 {
423  struct teql_master *m = netdev_priv(dev);
424  struct Qdisc *q;
425 
426  if (new_mtu < 68)
427  return -EINVAL;
428 
429  q = m->slaves;
430  if (q) {
431  do {
432  if (new_mtu > qdisc_dev(q)->mtu)
433  return -EINVAL;
434  } while ((q = NEXT_SLAVE(q)) != m->slaves);
435  }
436 
437  dev->mtu = new_mtu;
438  return 0;
439 }
440 
441 static const struct net_device_ops teql_netdev_ops = {
442  .ndo_open = teql_master_open,
443  .ndo_stop = teql_master_close,
444  .ndo_start_xmit = teql_master_xmit,
445  .ndo_get_stats64 = teql_master_stats64,
446  .ndo_change_mtu = teql_master_mtu,
447 };
448 
449 static __init void teql_master_setup(struct net_device *dev)
450 {
451  struct teql_master *master = netdev_priv(dev);
452  struct Qdisc_ops *ops = &master->qops;
453 
454  master->dev = dev;
455  ops->priv_size = sizeof(struct teql_sched_data);
456 
457  ops->enqueue = teql_enqueue;
458  ops->dequeue = teql_dequeue;
459  ops->peek = teql_peek;
460  ops->init = teql_qdisc_init;
461  ops->reset = teql_reset;
462  ops->destroy = teql_destroy;
463  ops->owner = THIS_MODULE;
464 
465  dev->netdev_ops = &teql_netdev_ops;
466  dev->type = ARPHRD_VOID;
467  dev->mtu = 1500;
468  dev->tx_queue_len = 100;
469  dev->flags = IFF_NOARP;
472 }
473 
474 static LIST_HEAD(master_dev_list);
475 static int max_equalizers = 1;
476 module_param(max_equalizers, int, 0);
477 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
478 
479 static int __init teql_init(void)
480 {
481  int i;
482  int err = -ENODEV;
483 
484  for (i = 0; i < max_equalizers; i++) {
485  struct net_device *dev;
486  struct teql_master *master;
487 
488  dev = alloc_netdev(sizeof(struct teql_master),
489  "teql%d", teql_master_setup);
490  if (!dev) {
491  err = -ENOMEM;
492  break;
493  }
494 
495  if ((err = register_netdev(dev))) {
496  free_netdev(dev);
497  break;
498  }
499 
500  master = netdev_priv(dev);
501 
502  strlcpy(master->qops.id, dev->name, IFNAMSIZ);
503  err = register_qdisc(&master->qops);
504 
505  if (err) {
506  unregister_netdev(dev);
507  free_netdev(dev);
508  break;
509  }
510 
511  list_add_tail(&master->master_list, &master_dev_list);
512  }
513  return i ? 0 : err;
514 }
515 
516 static void __exit teql_exit(void)
517 {
518  struct teql_master *master, *nxt;
519 
520  list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
521 
522  list_del(&master->master_list);
523 
524  unregister_qdisc(&master->qops);
525  unregister_netdev(master->dev);
526  free_netdev(master->dev);
527  }
528 }
529 
530 module_init(teql_init);
531 module_exit(teql_exit);
532 
533 MODULE_LICENSE("GPL");