Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
multicast.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2006 Intel Corporation. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses. You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  * Redistribution and use in source and binary forms, with or
11  * without modification, are permitted provided that the following
12  * conditions are met:
13  *
14  * - Redistributions of source code must retain the above
15  * copyright notice, this list of conditions and the following
16  * disclaimer.
17  *
18  * - Redistributions in binary form must reproduce the above
19  * copyright notice, this list of conditions and the following
20  * disclaimer in the documentation and/or other materials
21  * provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/completion.h>
34 #include <linux/dma-mapping.h>
35 #include <linux/err.h>
36 #include <linux/interrupt.h>
37 #include <linux/export.h>
38 #include <linux/slab.h>
39 #include <linux/bitops.h>
40 #include <linux/random.h>
41 
42 #include <rdma/ib_cache.h>
43 #include "sa.h"
44 
45 static void mcast_add_one(struct ib_device *device);
46 static void mcast_remove_one(struct ib_device *device);
47 
48 static struct ib_client mcast_client = {
49  .name = "ib_multicast",
50  .add = mcast_add_one,
51  .remove = mcast_remove_one
52 };
53 
54 static struct ib_sa_client sa_client;
55 static struct workqueue_struct *mcast_wq;
56 static union ib_gid mgid0;
57 
58 struct mcast_device;
59 
60 struct mcast_port {
61  struct mcast_device *dev;
63  struct rb_root table;
65  struct completion comp;
67 };
68 
69 struct mcast_device {
70  struct ib_device *device;
73  int end_port;
74  struct mcast_port port[0];
75 };
76 
81 };
82 
88 };
89 
90 enum {
92 };
93 
94 struct mcast_member;
95 
96 struct mcast_group {
98  struct rb_node node;
99  struct mcast_port *port;
105  int members[3];
109  int query_id;
112  int retries;
113 };
114 
115 struct mcast_member {
119  struct list_head list;
122  struct completion comp;
123 };
124 
125 static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
126  void *context);
127 static void leave_handler(int status, struct ib_sa_mcmember_rec *rec,
128  void *context);
129 
130 static struct mcast_group *mcast_find(struct mcast_port *port,
131  union ib_gid *mgid)
132 {
133  struct rb_node *node = port->table.rb_node;
134  struct mcast_group *group;
135  int ret;
136 
137  while (node) {
138  group = rb_entry(node, struct mcast_group, node);
139  ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid);
140  if (!ret)
141  return group;
142 
143  if (ret < 0)
144  node = node->rb_left;
145  else
146  node = node->rb_right;
147  }
148  return NULL;
149 }
150 
151 static struct mcast_group *mcast_insert(struct mcast_port *port,
152  struct mcast_group *group,
153  int allow_duplicates)
154 {
155  struct rb_node **link = &port->table.rb_node;
156  struct rb_node *parent = NULL;
157  struct mcast_group *cur_group;
158  int ret;
159 
160  while (*link) {
161  parent = *link;
162  cur_group = rb_entry(parent, struct mcast_group, node);
163 
164  ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw,
165  sizeof group->rec.mgid);
166  if (ret < 0)
167  link = &(*link)->rb_left;
168  else if (ret > 0)
169  link = &(*link)->rb_right;
170  else if (allow_duplicates)
171  link = &(*link)->rb_left;
172  else
173  return cur_group;
174  }
175  rb_link_node(&group->node, parent, link);
176  rb_insert_color(&group->node, &port->table);
177  return NULL;
178 }
179 
180 static void deref_port(struct mcast_port *port)
181 {
182  if (atomic_dec_and_test(&port->refcount))
183  complete(&port->comp);
184 }
185 
186 static void release_group(struct mcast_group *group)
187 {
188  struct mcast_port *port = group->port;
189  unsigned long flags;
190 
191  spin_lock_irqsave(&port->lock, flags);
192  if (atomic_dec_and_test(&group->refcount)) {
193  rb_erase(&group->node, &port->table);
194  spin_unlock_irqrestore(&port->lock, flags);
195  kfree(group);
196  deref_port(port);
197  } else
198  spin_unlock_irqrestore(&port->lock, flags);
199 }
200 
201 static void deref_member(struct mcast_member *member)
202 {
203  if (atomic_dec_and_test(&member->refcount))
204  complete(&member->comp);
205 }
206 
207 static void queue_join(struct mcast_member *member)
208 {
209  struct mcast_group *group = member->group;
210  unsigned long flags;
211 
212  spin_lock_irqsave(&group->lock, flags);
213  list_add_tail(&member->list, &group->pending_list);
214  if (group->state == MCAST_IDLE) {
215  group->state = MCAST_BUSY;
216  atomic_inc(&group->refcount);
217  queue_work(mcast_wq, &group->work);
218  }
219  spin_unlock_irqrestore(&group->lock, flags);
220 }
221 
222 /*
223  * A multicast group has three types of members: full member, non member, and
224  * send only member. We need to keep track of the number of members of each
225  * type based on their join state. Adjust the number of members the belong to
226  * the specified join states.
227  */
228 static void adjust_membership(struct mcast_group *group, u8 join_state, int inc)
229 {
230  int i;
231 
232  for (i = 0; i < 3; i++, join_state >>= 1)
233  if (join_state & 0x1)
234  group->members[i] += inc;
235 }
236 
237 /*
238  * If a multicast group has zero members left for a particular join state, but
239  * the group is still a member with the SA, we need to leave that join state.
240  * Determine which join states we still belong to, but that do not have any
241  * active members.
242  */
243 static u8 get_leave_state(struct mcast_group *group)
244 {
245  u8 leave_state = 0;
246  int i;
247 
248  for (i = 0; i < 3; i++)
249  if (!group->members[i])
250  leave_state |= (0x1 << i);
251 
252  return leave_state & group->rec.join_state;
253 }
254 
255 static int check_selector(ib_sa_comp_mask comp_mask,
256  ib_sa_comp_mask selector_mask,
258  u8 selector, u8 src_value, u8 dst_value)
259 {
260  int err;
261 
262  if (!(comp_mask & selector_mask) || !(comp_mask & value_mask))
263  return 0;
264 
265  switch (selector) {
266  case IB_SA_GT:
267  err = (src_value <= dst_value);
268  break;
269  case IB_SA_LT:
270  err = (src_value >= dst_value);
271  break;
272  case IB_SA_EQ:
273  err = (src_value != dst_value);
274  break;
275  default:
276  err = 0;
277  break;
278  }
279 
280  return err;
281 }
282 
283 static int cmp_rec(struct ib_sa_mcmember_rec *src,
284  struct ib_sa_mcmember_rec *dst, ib_sa_comp_mask comp_mask)
285 {
286  /* MGID must already match */
287 
288  if (comp_mask & IB_SA_MCMEMBER_REC_PORT_GID &&
289  memcmp(&src->port_gid, &dst->port_gid, sizeof src->port_gid))
290  return -EINVAL;
291  if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey)
292  return -EINVAL;
293  if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid)
294  return -EINVAL;
295  if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR,
297  src->mtu, dst->mtu))
298  return -EINVAL;
299  if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS &&
300  src->traffic_class != dst->traffic_class)
301  return -EINVAL;
302  if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey)
303  return -EINVAL;
304  if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR,
306  src->rate, dst->rate))
307  return -EINVAL;
308  if (check_selector(comp_mask,
313  return -EINVAL;
314  if (comp_mask & IB_SA_MCMEMBER_REC_SL && src->sl != dst->sl)
315  return -EINVAL;
316  if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL &&
317  src->flow_label != dst->flow_label)
318  return -EINVAL;
319  if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT &&
320  src->hop_limit != dst->hop_limit)
321  return -EINVAL;
322  if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE && src->scope != dst->scope)
323  return -EINVAL;
324 
325  /* join_state checked separately, proxy_join ignored */
326 
327  return 0;
328 }
329 
330 static int send_join(struct mcast_group *group, struct mcast_member *member)
331 {
332  struct mcast_port *port = group->port;
333  int ret;
334 
335  group->last_join = member;
336  ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
338  &member->multicast.rec,
339  member->multicast.comp_mask,
340  3000, GFP_KERNEL, join_handler, group,
341  &group->query);
342  if (ret >= 0) {
343  group->query_id = ret;
344  ret = 0;
345  }
346  return ret;
347 }
348 
349 static int send_leave(struct mcast_group *group, u8 leave_state)
350 {
351  struct mcast_port *port = group->port;
352  struct ib_sa_mcmember_rec rec;
353  int ret;
354 
355  rec = group->rec;
356  rec.join_state = leave_state;
357  group->leave_state = leave_state;
358 
359  ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
360  port->port_num, IB_SA_METHOD_DELETE, &rec,
364  3000, GFP_KERNEL, leave_handler,
365  group, &group->query);
366  if (ret >= 0) {
367  group->query_id = ret;
368  ret = 0;
369  }
370  return ret;
371 }
372 
373 static void join_group(struct mcast_group *group, struct mcast_member *member,
374  u8 join_state)
375 {
376  member->state = MCAST_MEMBER;
377  adjust_membership(group, join_state, 1);
378  group->rec.join_state |= join_state;
379  member->multicast.rec = group->rec;
380  member->multicast.rec.join_state = join_state;
381  list_move(&member->list, &group->active_list);
382 }
383 
384 static int fail_join(struct mcast_group *group, struct mcast_member *member,
385  int status)
386 {
387  spin_lock_irq(&group->lock);
388  list_del_init(&member->list);
389  spin_unlock_irq(&group->lock);
390  return member->multicast.callback(status, &member->multicast);
391 }
392 
393 static void process_group_error(struct mcast_group *group)
394 {
395  struct mcast_member *member;
396  int ret = 0;
397  u16 pkey_index;
398 
399  if (group->state == MCAST_PKEY_EVENT)
400  ret = ib_find_pkey(group->port->dev->device,
401  group->port->port_num,
402  be16_to_cpu(group->rec.pkey), &pkey_index);
403 
404  spin_lock_irq(&group->lock);
405  if (group->state == MCAST_PKEY_EVENT && !ret &&
406  group->pkey_index == pkey_index)
407  goto out;
408 
409  while (!list_empty(&group->active_list)) {
410  member = list_entry(group->active_list.next,
411  struct mcast_member, list);
412  atomic_inc(&member->refcount);
413  list_del_init(&member->list);
414  adjust_membership(group, member->multicast.rec.join_state, -1);
415  member->state = MCAST_ERROR;
416  spin_unlock_irq(&group->lock);
417 
418  ret = member->multicast.callback(-ENETRESET,
419  &member->multicast);
420  deref_member(member);
421  if (ret)
423  spin_lock_irq(&group->lock);
424  }
425 
426  group->rec.join_state = 0;
427 out:
428  group->state = MCAST_BUSY;
429  spin_unlock_irq(&group->lock);
430 }
431 
432 static void mcast_work_handler(struct work_struct *work)
433 {
434  struct mcast_group *group;
435  struct mcast_member *member;
436  struct ib_sa_multicast *multicast;
437  int status, ret;
438  u8 join_state;
439 
440  group = container_of(work, typeof(*group), work);
441 retest:
442  spin_lock_irq(&group->lock);
443  while (!list_empty(&group->pending_list) ||
444  (group->state != MCAST_BUSY)) {
445 
446  if (group->state != MCAST_BUSY) {
447  spin_unlock_irq(&group->lock);
448  process_group_error(group);
449  goto retest;
450  }
451 
452  member = list_entry(group->pending_list.next,
453  struct mcast_member, list);
454  multicast = &member->multicast;
455  join_state = multicast->rec.join_state;
456  atomic_inc(&member->refcount);
457 
458  if (join_state == (group->rec.join_state & join_state)) {
459  status = cmp_rec(&group->rec, &multicast->rec,
460  multicast->comp_mask);
461  if (!status)
462  join_group(group, member, join_state);
463  else
464  list_del_init(&member->list);
465  spin_unlock_irq(&group->lock);
466  ret = multicast->callback(status, multicast);
467  } else {
468  spin_unlock_irq(&group->lock);
469  status = send_join(group, member);
470  if (!status) {
471  deref_member(member);
472  return;
473  }
474  ret = fail_join(group, member, status);
475  }
476 
477  deref_member(member);
478  if (ret)
480  spin_lock_irq(&group->lock);
481  }
482 
483  join_state = get_leave_state(group);
484  if (join_state) {
485  group->rec.join_state &= ~join_state;
486  spin_unlock_irq(&group->lock);
487  if (send_leave(group, join_state))
488  goto retest;
489  } else {
490  group->state = MCAST_IDLE;
491  spin_unlock_irq(&group->lock);
492  release_group(group);
493  }
494 }
495 
496 /*
497  * Fail a join request if it is still active - at the head of the pending queue.
498  */
499 static void process_join_error(struct mcast_group *group, int status)
500 {
501  struct mcast_member *member;
502  int ret;
503 
504  spin_lock_irq(&group->lock);
505  member = list_entry(group->pending_list.next,
506  struct mcast_member, list);
507  if (group->last_join == member) {
508  atomic_inc(&member->refcount);
509  list_del_init(&member->list);
510  spin_unlock_irq(&group->lock);
511  ret = member->multicast.callback(status, &member->multicast);
512  deref_member(member);
513  if (ret)
515  } else
516  spin_unlock_irq(&group->lock);
517 }
518 
519 static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
520  void *context)
521 {
522  struct mcast_group *group = context;
523  u16 pkey_index = MCAST_INVALID_PKEY_INDEX;
524 
525  if (status)
526  process_join_error(group, status);
527  else {
528  ib_find_pkey(group->port->dev->device, group->port->port_num,
529  be16_to_cpu(rec->pkey), &pkey_index);
530 
531  spin_lock_irq(&group->port->lock);
532  group->rec = *rec;
533  if (group->state == MCAST_BUSY &&
535  group->pkey_index = pkey_index;
536  if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) {
537  rb_erase(&group->node, &group->port->table);
538  mcast_insert(group->port, group, 1);
539  }
540  spin_unlock_irq(&group->port->lock);
541  }
542  mcast_work_handler(&group->work);
543 }
544 
545 static void leave_handler(int status, struct ib_sa_mcmember_rec *rec,
546  void *context)
547 {
548  struct mcast_group *group = context;
549 
550  if (status && group->retries > 0 &&
551  !send_leave(group, group->leave_state))
552  group->retries--;
553  else
554  mcast_work_handler(&group->work);
555 }
556 
557 static struct mcast_group *acquire_group(struct mcast_port *port,
558  union ib_gid *mgid, gfp_t gfp_mask)
559 {
560  struct mcast_group *group, *cur_group;
561  unsigned long flags;
562  int is_mgid0;
563 
564  is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0);
565  if (!is_mgid0) {
566  spin_lock_irqsave(&port->lock, flags);
567  group = mcast_find(port, mgid);
568  if (group)
569  goto found;
570  spin_unlock_irqrestore(&port->lock, flags);
571  }
572 
573  group = kzalloc(sizeof *group, gfp_mask);
574  if (!group)
575  return NULL;
576 
577  group->retries = 3;
578  group->port = port;
579  group->rec.mgid = *mgid;
581  INIT_LIST_HEAD(&group->pending_list);
582  INIT_LIST_HEAD(&group->active_list);
583  INIT_WORK(&group->work, mcast_work_handler);
584  spin_lock_init(&group->lock);
585 
586  spin_lock_irqsave(&port->lock, flags);
587  cur_group = mcast_insert(port, group, is_mgid0);
588  if (cur_group) {
589  kfree(group);
590  group = cur_group;
591  } else
592  atomic_inc(&port->refcount);
593 found:
594  atomic_inc(&group->refcount);
595  spin_unlock_irqrestore(&port->lock, flags);
596  return group;
597 }
598 
599 /*
600  * We serialize all join requests to a single group to make our lives much
601  * easier. Otherwise, two users could try to join the same group
602  * simultaneously, with different configurations, one could leave while the
603  * join is in progress, etc., which makes locking around error recovery
604  * difficult.
605  */
606 struct ib_sa_multicast *
608  struct ib_device *device, u8 port_num,
609  struct ib_sa_mcmember_rec *rec,
610  ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
611  int (*callback)(int status,
612  struct ib_sa_multicast *multicast),
613  void *context)
614 {
615  struct mcast_device *dev;
616  struct mcast_member *member;
617  struct ib_sa_multicast *multicast;
618  int ret;
619 
620  dev = ib_get_client_data(device, &mcast_client);
621  if (!dev)
622  return ERR_PTR(-ENODEV);
623 
624  member = kmalloc(sizeof *member, gfp_mask);
625  if (!member)
626  return ERR_PTR(-ENOMEM);
627 
628  ib_sa_client_get(client);
629  member->client = client;
630  member->multicast.rec = *rec;
631  member->multicast.comp_mask = comp_mask;
632  member->multicast.callback = callback;
633  member->multicast.context = context;
634  init_completion(&member->comp);
635  atomic_set(&member->refcount, 1);
636  member->state = MCAST_JOINING;
637 
638  member->group = acquire_group(&dev->port[port_num - dev->start_port],
639  &rec->mgid, gfp_mask);
640  if (!member->group) {
641  ret = -ENOMEM;
642  goto err;
643  }
644 
645  /*
646  * The user will get the multicast structure in their callback. They
647  * could then free the multicast structure before we can return from
648  * this routine. So we save the pointer to return before queuing
649  * any callback.
650  */
651  multicast = &member->multicast;
652  queue_join(member);
653  return multicast;
654 
655 err:
656  ib_sa_client_put(client);
657  kfree(member);
658  return ERR_PTR(ret);
659 }
661 
662 void ib_sa_free_multicast(struct ib_sa_multicast *multicast)
663 {
664  struct mcast_member *member;
665  struct mcast_group *group;
666 
667  member = container_of(multicast, struct mcast_member, multicast);
668  group = member->group;
669 
670  spin_lock_irq(&group->lock);
671  if (member->state == MCAST_MEMBER)
672  adjust_membership(group, multicast->rec.join_state, -1);
673 
674  list_del_init(&member->list);
675 
676  if (group->state == MCAST_IDLE) {
677  group->state = MCAST_BUSY;
678  spin_unlock_irq(&group->lock);
679  /* Continue to hold reference on group until callback */
680  queue_work(mcast_wq, &group->work);
681  } else {
682  spin_unlock_irq(&group->lock);
683  release_group(group);
684  }
685 
686  deref_member(member);
687  wait_for_completion(&member->comp);
688  ib_sa_client_put(member->client);
689  kfree(member);
690 }
692 
693 int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
694  union ib_gid *mgid, struct ib_sa_mcmember_rec *rec)
695 {
696  struct mcast_device *dev;
697  struct mcast_port *port;
698  struct mcast_group *group;
699  unsigned long flags;
700  int ret = 0;
701 
702  dev = ib_get_client_data(device, &mcast_client);
703  if (!dev)
704  return -ENODEV;
705 
706  port = &dev->port[port_num - dev->start_port];
707  spin_lock_irqsave(&port->lock, flags);
708  group = mcast_find(port, mgid);
709  if (group)
710  *rec = group->rec;
711  else
712  ret = -EADDRNOTAVAIL;
713  spin_unlock_irqrestore(&port->lock, flags);
714 
715  return ret;
716 }
718 
719 int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
720  struct ib_sa_mcmember_rec *rec,
721  struct ib_ah_attr *ah_attr)
722 {
723  int ret;
724  u16 gid_index;
725  u8 p;
726 
727  ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index);
728  if (ret)
729  return ret;
730 
731  memset(ah_attr, 0, sizeof *ah_attr);
732  ah_attr->dlid = be16_to_cpu(rec->mlid);
733  ah_attr->sl = rec->sl;
734  ah_attr->port_num = port_num;
735  ah_attr->static_rate = rec->rate;
736 
737  ah_attr->ah_flags = IB_AH_GRH;
738  ah_attr->grh.dgid = rec->mgid;
739 
740  ah_attr->grh.sgid_index = (u8) gid_index;
741  ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
742  ah_attr->grh.hop_limit = rec->hop_limit;
743  ah_attr->grh.traffic_class = rec->traffic_class;
744 
745  return 0;
746 }
748 
749 static void mcast_groups_event(struct mcast_port *port,
751 {
752  struct mcast_group *group;
753  struct rb_node *node;
754  unsigned long flags;
755 
756  spin_lock_irqsave(&port->lock, flags);
757  for (node = rb_first(&port->table); node; node = rb_next(node)) {
758  group = rb_entry(node, struct mcast_group, node);
759  spin_lock(&group->lock);
760  if (group->state == MCAST_IDLE) {
761  atomic_inc(&group->refcount);
762  queue_work(mcast_wq, &group->work);
763  }
764  if (group->state != MCAST_GROUP_ERROR)
765  group->state = state;
766  spin_unlock(&group->lock);
767  }
768  spin_unlock_irqrestore(&port->lock, flags);
769 }
770 
771 static void mcast_event_handler(struct ib_event_handler *handler,
772  struct ib_event *event)
773 {
774  struct mcast_device *dev;
775  int index;
776 
777  dev = container_of(handler, struct mcast_device, event_handler);
778  if (rdma_port_get_link_layer(dev->device, event->element.port_num) !=
780  return;
781 
782  index = event->element.port_num - dev->start_port;
783 
784  switch (event->event) {
785  case IB_EVENT_PORT_ERR:
786  case IB_EVENT_LID_CHANGE:
787  case IB_EVENT_SM_CHANGE:
789  mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR);
790  break;
792  mcast_groups_event(&dev->port[index], MCAST_PKEY_EVENT);
793  break;
794  default:
795  break;
796  }
797 }
798 
799 static void mcast_add_one(struct ib_device *device)
800 {
801  struct mcast_device *dev;
802  struct mcast_port *port;
803  int i;
804  int count = 0;
805 
807  return;
808 
809  dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port,
810  GFP_KERNEL);
811  if (!dev)
812  return;
813 
814  if (device->node_type == RDMA_NODE_IB_SWITCH)
815  dev->start_port = dev->end_port = 0;
816  else {
817  dev->start_port = 1;
818  dev->end_port = device->phys_port_cnt;
819  }
820 
821  for (i = 0; i <= dev->end_port - dev->start_port; i++) {
822  if (rdma_port_get_link_layer(device, dev->start_port + i) !=
824  continue;
825  port = &dev->port[i];
826  port->dev = dev;
827  port->port_num = dev->start_port + i;
828  spin_lock_init(&port->lock);
829  port->table = RB_ROOT;
830  init_completion(&port->comp);
831  atomic_set(&port->refcount, 1);
832  ++count;
833  }
834 
835  if (!count) {
836  kfree(dev);
837  return;
838  }
839 
840  dev->device = device;
841  ib_set_client_data(device, &mcast_client, dev);
842 
843  INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler);
845 }
846 
847 static void mcast_remove_one(struct ib_device *device)
848 {
849  struct mcast_device *dev;
850  struct mcast_port *port;
851  int i;
852 
853  dev = ib_get_client_data(device, &mcast_client);
854  if (!dev)
855  return;
856 
858  flush_workqueue(mcast_wq);
859 
860  for (i = 0; i <= dev->end_port - dev->start_port; i++) {
861  if (rdma_port_get_link_layer(device, dev->start_port + i) ==
863  port = &dev->port[i];
864  deref_port(port);
865  wait_for_completion(&port->comp);
866  }
867  }
868 
869  kfree(dev);
870 }
871 
872 int mcast_init(void)
873 {
874  int ret;
875 
876  mcast_wq = create_singlethread_workqueue("ib_mcast");
877  if (!mcast_wq)
878  return -ENOMEM;
879 
880  ib_sa_register_client(&sa_client);
881 
882  ret = ib_register_client(&mcast_client);
883  if (ret)
884  goto err;
885  return 0;
886 
887 err:
888  ib_sa_unregister_client(&sa_client);
889  destroy_workqueue(mcast_wq);
890  return ret;
891 }
892 
893 void mcast_cleanup(void)
894 {
895  ib_unregister_client(&mcast_client);
896  ib_sa_unregister_client(&sa_client);
897  destroy_workqueue(mcast_wq);
898 }