Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
team_mode_loadbalance.c
Go to the documentation of this file.
1 /*
2  * drivers/net/team/team_mode_loadbalance.c - Load-balancing mode for team
3  * Copyright (c) 2012 Jiri Pirko <[email protected]>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  */
10 
11 #include <linux/kernel.h>
12 #include <linux/types.h>
13 #include <linux/module.h>
14 #include <linux/init.h>
15 #include <linux/errno.h>
16 #include <linux/netdevice.h>
17 #include <linux/filter.h>
18 #include <linux/if_team.h>
19 
20 struct lb_priv;
21 
22 typedef struct team_port *lb_select_tx_port_func_t(struct team *,
23  struct lb_priv *,
24  struct sk_buff *,
25  unsigned char);
26 
27 #define LB_TX_HASHTABLE_SIZE 256 /* hash is a char */
28 
29 struct lb_stats {
31 };
32 
33 struct lb_pcpu_stats {
36 };
37 
38 struct lb_stats_info {
39  struct lb_stats stats;
42 };
43 
45  struct team_port __rcu *port;
47 };
48 
49 struct lb_priv_ex {
50  struct team *team;
53  struct {
54  unsigned int refresh_interval; /* in tenths of second */
57  } stats;
58 };
59 
60 struct lb_priv {
61  struct sk_filter __rcu *fp;
64  struct lb_priv_ex *ex; /* priv extension */
65 };
66 
67 static struct lb_priv *get_lb_priv(struct team *team)
68 {
69  return (struct lb_priv *) &team->mode_priv;
70 }
71 
72 struct lb_port_priv {
75 };
76 
77 static struct lb_port_priv *get_lb_port_priv(struct team_port *port)
78 {
79  return (struct lb_port_priv *) &port->mode_priv;
80 }
81 
82 #define LB_HTPM_PORT_BY_HASH(lp_priv, hash) \
83  (lb_priv)->ex->tx_hash_to_port_mapping[hash].port
84 
85 #define LB_HTPM_OPT_INST_INFO_BY_HASH(lp_priv, hash) \
86  (lb_priv)->ex->tx_hash_to_port_mapping[hash].opt_inst_info
87 
88 static void lb_tx_hash_to_port_mapping_null_port(struct team *team,
89  struct team_port *port)
90 {
91  struct lb_priv *lb_priv = get_lb_priv(team);
92  bool changed = false;
93  int i;
94 
95  for (i = 0; i < LB_TX_HASHTABLE_SIZE; i++) {
96  struct lb_port_mapping *pm;
97 
98  pm = &lb_priv->ex->tx_hash_to_port_mapping[i];
99  if (rcu_access_pointer(pm->port) == port) {
100  RCU_INIT_POINTER(pm->port, NULL);
102  changed = true;
103  }
104  }
105  if (changed)
107 }
108 
109 /* Basic tx selection based solely by hash */
110 static struct team_port *lb_hash_select_tx_port(struct team *team,
111  struct lb_priv *lb_priv,
112  struct sk_buff *skb,
113  unsigned char hash)
114 {
115  int port_index;
116 
117  port_index = hash % team->en_port_count;
118  return team_get_port_by_index_rcu(team, port_index);
119 }
120 
121 /* Hash to port mapping select tx port */
122 static struct team_port *lb_htpm_select_tx_port(struct team *team,
123  struct lb_priv *lb_priv,
124  struct sk_buff *skb,
125  unsigned char hash)
126 {
127  return rcu_dereference_bh(LB_HTPM_PORT_BY_HASH(lb_priv, hash));
128 }
129 
131  char *name;
133 };
134 
135 static const struct lb_select_tx_port lb_select_tx_port_list[] = {
136  {
137  .name = "hash",
138  .func = lb_hash_select_tx_port,
139  },
140  {
141  .name = "hash_to_port_mapping",
142  .func = lb_htpm_select_tx_port,
143  },
144 };
145 #define LB_SELECT_TX_PORT_LIST_COUNT ARRAY_SIZE(lb_select_tx_port_list)
146 
147 static char *lb_select_tx_port_get_name(lb_select_tx_port_func_t *func)
148 {
149  int i;
150 
151  for (i = 0; i < LB_SELECT_TX_PORT_LIST_COUNT; i++) {
152  const struct lb_select_tx_port *item;
153 
154  item = &lb_select_tx_port_list[i];
155  if (item->func == func)
156  return item->name;
157  }
158  return NULL;
159 }
160 
161 static lb_select_tx_port_func_t *lb_select_tx_port_get_func(const char *name)
162 {
163  int i;
164 
165  for (i = 0; i < LB_SELECT_TX_PORT_LIST_COUNT; i++) {
166  const struct lb_select_tx_port *item;
167 
168  item = &lb_select_tx_port_list[i];
169  if (!strcmp(item->name, name))
170  return item->func;
171  }
172  return NULL;
173 }
174 
175 static unsigned int lb_get_skb_hash(struct lb_priv *lb_priv,
176  struct sk_buff *skb)
177 {
178  struct sk_filter *fp;
179  uint32_t lhash;
180  unsigned char *c;
181 
182  fp = rcu_dereference_bh(lb_priv->fp);
183  if (unlikely(!fp))
184  return 0;
185  lhash = SK_RUN_FILTER(fp, skb);
186  c = (char *) &lhash;
187  return c[0] ^ c[1] ^ c[2] ^ c[3];
188 }
189 
190 static void lb_update_tx_stats(unsigned int tx_bytes, struct lb_priv *lb_priv,
191  struct lb_port_priv *lb_port_priv,
192  unsigned char hash)
193 {
194  struct lb_pcpu_stats *pcpu_stats;
195  struct lb_stats *port_stats;
196  struct lb_stats *hash_stats;
197 
198  pcpu_stats = this_cpu_ptr(lb_priv->pcpu_stats);
199  port_stats = this_cpu_ptr(lb_port_priv->pcpu_stats);
200  hash_stats = &pcpu_stats->hash_stats[hash];
201  u64_stats_update_begin(&pcpu_stats->syncp);
202  port_stats->tx_bytes += tx_bytes;
203  hash_stats->tx_bytes += tx_bytes;
204  u64_stats_update_end(&pcpu_stats->syncp);
205 }
206 
207 static bool lb_transmit(struct team *team, struct sk_buff *skb)
208 {
209  struct lb_priv *lb_priv = get_lb_priv(team);
211  struct team_port *port;
212  unsigned char hash;
213  unsigned int tx_bytes = skb->len;
214 
215  hash = lb_get_skb_hash(lb_priv, skb);
216  select_tx_port_func = rcu_dereference_bh(lb_priv->select_tx_port_func);
217  port = select_tx_port_func(team, lb_priv, skb, hash);
218  if (unlikely(!port))
219  goto drop;
220  if (team_dev_queue_xmit(team, port, skb))
221  return false;
222  lb_update_tx_stats(tx_bytes, lb_priv, get_lb_port_priv(port), hash);
223  return true;
224 
225 drop:
226  dev_kfree_skb_any(skb);
227  return false;
228 }
229 
230 static int lb_bpf_func_get(struct team *team, struct team_gsetter_ctx *ctx)
231 {
232  struct lb_priv *lb_priv = get_lb_priv(team);
233 
234  if (!lb_priv->ex->orig_fprog) {
235  ctx->data.bin_val.len = 0;
236  ctx->data.bin_val.ptr = NULL;
237  return 0;
238  }
239  ctx->data.bin_val.len = lb_priv->ex->orig_fprog->len *
240  sizeof(struct sock_filter);
241  ctx->data.bin_val.ptr = lb_priv->ex->orig_fprog->filter;
242  return 0;
243 }
244 
245 static int __fprog_create(struct sock_fprog **pfprog, u32 data_len,
246  const void *data)
247 {
248  struct sock_fprog *fprog;
249  struct sock_filter *filter = (struct sock_filter *) data;
250 
251  if (data_len % sizeof(struct sock_filter))
252  return -EINVAL;
253  fprog = kmalloc(sizeof(struct sock_fprog), GFP_KERNEL);
254  if (!fprog)
255  return -ENOMEM;
256  fprog->filter = kmemdup(filter, data_len, GFP_KERNEL);
257  if (!fprog->filter) {
258  kfree(fprog);
259  return -ENOMEM;
260  }
261  fprog->len = data_len / sizeof(struct sock_filter);
262  *pfprog = fprog;
263  return 0;
264 }
265 
266 static void __fprog_destroy(struct sock_fprog *fprog)
267 {
268  kfree(fprog->filter);
269  kfree(fprog);
270 }
271 
272 static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx)
273 {
274  struct lb_priv *lb_priv = get_lb_priv(team);
275  struct sk_filter *fp = NULL;
276  struct sk_filter *orig_fp;
277  struct sock_fprog *fprog = NULL;
278  int err;
279 
280  if (ctx->data.bin_val.len) {
281  err = __fprog_create(&fprog, ctx->data.bin_val.len,
282  ctx->data.bin_val.ptr);
283  if (err)
284  return err;
285  err = sk_unattached_filter_create(&fp, fprog);
286  if (err) {
287  __fprog_destroy(fprog);
288  return err;
289  }
290  }
291 
292  if (lb_priv->ex->orig_fprog) {
293  /* Clear old filter data */
294  __fprog_destroy(lb_priv->ex->orig_fprog);
295  orig_fp = rcu_dereference_protected(lb_priv->fp,
296  lockdep_is_held(&team->lock));
298  }
299 
300  rcu_assign_pointer(lb_priv->fp, fp);
301  lb_priv->ex->orig_fprog = fprog;
302  return 0;
303 }
304 
305 static int lb_tx_method_get(struct team *team, struct team_gsetter_ctx *ctx)
306 {
307  struct lb_priv *lb_priv = get_lb_priv(team);
309  char *name;
310 
312  lockdep_is_held(&team->lock));
313  name = lb_select_tx_port_get_name(func);
314  BUG_ON(!name);
315  ctx->data.str_val = name;
316  return 0;
317 }
318 
319 static int lb_tx_method_set(struct team *team, struct team_gsetter_ctx *ctx)
320 {
321  struct lb_priv *lb_priv = get_lb_priv(team);
323 
324  func = lb_select_tx_port_get_func(ctx->data.str_val);
325  if (!func)
326  return -EINVAL;
327  rcu_assign_pointer(lb_priv->select_tx_port_func, func);
328  return 0;
329 }
330 
331 static int lb_tx_hash_to_port_mapping_init(struct team *team,
332  struct team_option_inst_info *info)
333 {
334  struct lb_priv *lb_priv = get_lb_priv(team);
335  unsigned char hash = info->array_index;
336 
337  LB_HTPM_OPT_INST_INFO_BY_HASH(lb_priv, hash) = info;
338  return 0;
339 }
340 
341 static int lb_tx_hash_to_port_mapping_get(struct team *team,
342  struct team_gsetter_ctx *ctx)
343 {
344  struct lb_priv *lb_priv = get_lb_priv(team);
345  struct team_port *port;
346  unsigned char hash = ctx->info->array_index;
347 
348  port = LB_HTPM_PORT_BY_HASH(lb_priv, hash);
349  ctx->data.u32_val = port ? port->dev->ifindex : 0;
350  return 0;
351 }
352 
353 static int lb_tx_hash_to_port_mapping_set(struct team *team,
354  struct team_gsetter_ctx *ctx)
355 {
356  struct lb_priv *lb_priv = get_lb_priv(team);
357  struct team_port *port;
358  unsigned char hash = ctx->info->array_index;
359 
360  list_for_each_entry(port, &team->port_list, list) {
361  if (ctx->data.u32_val == port->dev->ifindex &&
362  team_port_enabled(port)) {
364  port);
365  return 0;
366  }
367  }
368  return -ENODEV;
369 }
370 
371 static int lb_hash_stats_init(struct team *team,
372  struct team_option_inst_info *info)
373 {
374  struct lb_priv *lb_priv = get_lb_priv(team);
375  unsigned char hash = info->array_index;
376 
377  lb_priv->ex->stats.info[hash].opt_inst_info = info;
378  return 0;
379 }
380 
381 static int lb_hash_stats_get(struct team *team, struct team_gsetter_ctx *ctx)
382 {
383  struct lb_priv *lb_priv = get_lb_priv(team);
384  unsigned char hash = ctx->info->array_index;
385 
386  ctx->data.bin_val.ptr = &lb_priv->ex->stats.info[hash].stats;
387  ctx->data.bin_val.len = sizeof(struct lb_stats);
388  return 0;
389 }
390 
391 static int lb_port_stats_init(struct team *team,
392  struct team_option_inst_info *info)
393 {
394  struct team_port *port = info->port;
395  struct lb_port_priv *lb_port_priv = get_lb_port_priv(port);
396 
397  lb_port_priv->stats_info.opt_inst_info = info;
398  return 0;
399 }
400 
401 static int lb_port_stats_get(struct team *team, struct team_gsetter_ctx *ctx)
402 {
403  struct team_port *port = ctx->info->port;
404  struct lb_port_priv *lb_port_priv = get_lb_port_priv(port);
405 
406  ctx->data.bin_val.ptr = &lb_port_priv->stats_info.stats;
407  ctx->data.bin_val.len = sizeof(struct lb_stats);
408  return 0;
409 }
410 
411 static void __lb_stats_info_refresh_prepare(struct lb_stats_info *s_info)
412 {
413  memcpy(&s_info->last_stats, &s_info->stats, sizeof(struct lb_stats));
414  memset(&s_info->stats, 0, sizeof(struct lb_stats));
415 }
416 
417 static bool __lb_stats_info_refresh_check(struct lb_stats_info *s_info,
418  struct team *team)
419 {
420  if (memcmp(&s_info->last_stats, &s_info->stats,
421  sizeof(struct lb_stats))) {
423  return true;
424  }
425  return false;
426 }
427 
428 static void __lb_one_cpu_stats_add(struct lb_stats *acc_stats,
429  struct lb_stats *cpu_stats,
430  struct u64_stats_sync *syncp)
431 {
432  unsigned int start;
433  struct lb_stats tmp;
434 
435  do {
436  start = u64_stats_fetch_begin_bh(syncp);
437  tmp.tx_bytes = cpu_stats->tx_bytes;
438  } while (u64_stats_fetch_retry_bh(syncp, start));
439  acc_stats->tx_bytes += tmp.tx_bytes;
440 }
441 
442 static void lb_stats_refresh(struct work_struct *work)
443 {
444  struct team *team;
445  struct lb_priv *lb_priv;
446  struct lb_priv_ex *lb_priv_ex;
447  struct lb_pcpu_stats *pcpu_stats;
448  struct lb_stats *stats;
449  struct lb_stats_info *s_info;
450  struct team_port *port;
451  bool changed = false;
452  int i;
453  int j;
454 
455  lb_priv_ex = container_of(work, struct lb_priv_ex,
456  stats.refresh_dw.work);
457 
458  team = lb_priv_ex->team;
459  lb_priv = get_lb_priv(team);
460 
461  if (!mutex_trylock(&team->lock)) {
462  schedule_delayed_work(&lb_priv_ex->stats.refresh_dw, 0);
463  return;
464  }
465 
466  for (j = 0; j < LB_TX_HASHTABLE_SIZE; j++) {
467  s_info = &lb_priv->ex->stats.info[j];
468  __lb_stats_info_refresh_prepare(s_info);
470  pcpu_stats = per_cpu_ptr(lb_priv->pcpu_stats, i);
471  stats = &pcpu_stats->hash_stats[j];
472  __lb_one_cpu_stats_add(&s_info->stats, stats,
473  &pcpu_stats->syncp);
474  }
475  changed |= __lb_stats_info_refresh_check(s_info, team);
476  }
477 
478  list_for_each_entry(port, &team->port_list, list) {
479  struct lb_port_priv *lb_port_priv = get_lb_port_priv(port);
480 
481  s_info = &lb_port_priv->stats_info;
482  __lb_stats_info_refresh_prepare(s_info);
484  pcpu_stats = per_cpu_ptr(lb_priv->pcpu_stats, i);
485  stats = per_cpu_ptr(lb_port_priv->pcpu_stats, i);
486  __lb_one_cpu_stats_add(&s_info->stats, stats,
487  &pcpu_stats->syncp);
488  }
489  changed |= __lb_stats_info_refresh_check(s_info, team);
490  }
491 
492  if (changed)
494 
495  schedule_delayed_work(&lb_priv_ex->stats.refresh_dw,
496  (lb_priv_ex->stats.refresh_interval * HZ) / 10);
497 
498  mutex_unlock(&team->lock);
499 }
500 
501 static int lb_stats_refresh_interval_get(struct team *team,
502  struct team_gsetter_ctx *ctx)
503 {
504  struct lb_priv *lb_priv = get_lb_priv(team);
505 
506  ctx->data.u32_val = lb_priv->ex->stats.refresh_interval;
507  return 0;
508 }
509 
510 static int lb_stats_refresh_interval_set(struct team *team,
511  struct team_gsetter_ctx *ctx)
512 {
513  struct lb_priv *lb_priv = get_lb_priv(team);
514  unsigned int interval;
515 
516  interval = ctx->data.u32_val;
517  if (lb_priv->ex->stats.refresh_interval == interval)
518  return 0;
519  lb_priv->ex->stats.refresh_interval = interval;
520  if (interval)
521  schedule_delayed_work(&lb_priv->ex->stats.refresh_dw, 0);
522  else
523  cancel_delayed_work(&lb_priv->ex->stats.refresh_dw);
524  return 0;
525 }
526 
527 static const struct team_option lb_options[] = {
528  {
529  .name = "bpf_hash_func",
530  .type = TEAM_OPTION_TYPE_BINARY,
531  .getter = lb_bpf_func_get,
532  .setter = lb_bpf_func_set,
533  },
534  {
535  .name = "lb_tx_method",
536  .type = TEAM_OPTION_TYPE_STRING,
537  .getter = lb_tx_method_get,
538  .setter = lb_tx_method_set,
539  },
540  {
541  .name = "lb_tx_hash_to_port_mapping",
542  .array_size = LB_TX_HASHTABLE_SIZE,
543  .type = TEAM_OPTION_TYPE_U32,
544  .init = lb_tx_hash_to_port_mapping_init,
545  .getter = lb_tx_hash_to_port_mapping_get,
546  .setter = lb_tx_hash_to_port_mapping_set,
547  },
548  {
549  .name = "lb_hash_stats",
550  .array_size = LB_TX_HASHTABLE_SIZE,
551  .type = TEAM_OPTION_TYPE_BINARY,
552  .init = lb_hash_stats_init,
553  .getter = lb_hash_stats_get,
554  },
555  {
556  .name = "lb_port_stats",
557  .per_port = true,
558  .type = TEAM_OPTION_TYPE_BINARY,
559  .init = lb_port_stats_init,
560  .getter = lb_port_stats_get,
561  },
562  {
563  .name = "lb_stats_refresh_interval",
564  .type = TEAM_OPTION_TYPE_U32,
565  .getter = lb_stats_refresh_interval_get,
566  .setter = lb_stats_refresh_interval_set,
567  },
568 };
569 
570 static int lb_init(struct team *team)
571 {
572  struct lb_priv *lb_priv = get_lb_priv(team);
574  int err;
575 
576  /* set default tx port selector */
577  func = lb_select_tx_port_get_func("hash");
578  BUG_ON(!func);
579  rcu_assign_pointer(lb_priv->select_tx_port_func, func);
580 
581  lb_priv->ex = kzalloc(sizeof(*lb_priv->ex), GFP_KERNEL);
582  if (!lb_priv->ex)
583  return -ENOMEM;
584  lb_priv->ex->team = team;
585 
586  lb_priv->pcpu_stats = alloc_percpu(struct lb_pcpu_stats);
587  if (!lb_priv->pcpu_stats) {
588  err = -ENOMEM;
589  goto err_alloc_pcpu_stats;
590  }
591 
592  INIT_DELAYED_WORK(&lb_priv->ex->stats.refresh_dw, lb_stats_refresh);
593 
594  err = team_options_register(team, lb_options, ARRAY_SIZE(lb_options));
595  if (err)
596  goto err_options_register;
597  return 0;
598 
599 err_options_register:
600  free_percpu(lb_priv->pcpu_stats);
601 err_alloc_pcpu_stats:
602  kfree(lb_priv->ex);
603  return err;
604 }
605 
606 static void lb_exit(struct team *team)
607 {
608  struct lb_priv *lb_priv = get_lb_priv(team);
609 
610  team_options_unregister(team, lb_options,
611  ARRAY_SIZE(lb_options));
612  cancel_delayed_work_sync(&lb_priv->ex->stats.refresh_dw);
613  free_percpu(lb_priv->pcpu_stats);
614  kfree(lb_priv->ex);
615 }
616 
617 static int lb_port_enter(struct team *team, struct team_port *port)
618 {
619  struct lb_port_priv *lb_port_priv = get_lb_port_priv(port);
620 
621  lb_port_priv->pcpu_stats = alloc_percpu(struct lb_stats);
622  if (!lb_port_priv->pcpu_stats)
623  return -ENOMEM;
624  return 0;
625 }
626 
627 static void lb_port_leave(struct team *team, struct team_port *port)
628 {
629  struct lb_port_priv *lb_port_priv = get_lb_port_priv(port);
630 
631  free_percpu(lb_port_priv->pcpu_stats);
632 }
633 
634 static void lb_port_disabled(struct team *team, struct team_port *port)
635 {
636  lb_tx_hash_to_port_mapping_null_port(team, port);
637 }
638 
639 static const struct team_mode_ops lb_mode_ops = {
640  .init = lb_init,
641  .exit = lb_exit,
642  .port_enter = lb_port_enter,
643  .port_leave = lb_port_leave,
644  .port_disabled = lb_port_disabled,
645  .transmit = lb_transmit,
646 };
647 
648 static const struct team_mode lb_mode = {
649  .kind = "loadbalance",
650  .owner = THIS_MODULE,
651  .priv_size = sizeof(struct lb_priv),
652  .port_priv_size = sizeof(struct lb_port_priv),
653  .ops = &lb_mode_ops,
654 };
655 
656 static int __init lb_init_module(void)
657 {
658  return team_mode_register(&lb_mode);
659 }
660 
661 static void __exit lb_cleanup_module(void)
662 {
663  team_mode_unregister(&lb_mode);
664 }
665 
666 module_init(lb_init_module);
667 module_exit(lb_cleanup_module);
668 
669 MODULE_LICENSE("GPL v2");
670 MODULE_AUTHOR("Jiri Pirko <[email protected]>");
671 MODULE_DESCRIPTION("Load-balancing mode for team");
672 MODULE_ALIAS("team-mode-loadbalance");