Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
mad.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses. You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  * Redistribution and use in source and binary forms, with or
11  * without modification, are permitted provided that the following
12  * conditions are met:
13  *
14  * - Redistributions of source code must retain the above
15  * copyright notice, this list of conditions and the following
16  * disclaimer.
17  *
18  * - Redistributions in binary form must reproduce the above
19  * copyright notice, this list of conditions and the following
20  * disclaimer in the documentation and/or other materials
21  * provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <rdma/ib_mad.h>
34 #include <rdma/ib_smi.h>
35 #include <rdma/ib_sa.h>
36 #include <rdma/ib_cache.h>
37 
38 #include <linux/random.h>
39 #include <linux/mlx4/cmd.h>
40 #include <linux/gfp.h>
41 #include <rdma/ib_pma.h>
42 
43 #include "mlx4_ib.h"
44 
45 enum {
48 };
49 
50 #define MLX4_TUN_SEND_WRID_SHIFT 34
51 #define MLX4_TUN_QPN_SHIFT 32
52 #define MLX4_TUN_WRID_RECV (((u64) 1) << MLX4_TUN_SEND_WRID_SHIFT)
53 #define MLX4_TUN_SET_WRID_QPN(a) (((u64) ((a) & 0x3)) << MLX4_TUN_QPN_SHIFT)
54 
55 #define MLX4_TUN_IS_RECV(a) (((a) >> MLX4_TUN_SEND_WRID_SHIFT) & 0x1)
56 #define MLX4_TUN_WRID_QPN(a) (((a) >> MLX4_TUN_QPN_SHIFT) & 0x3)
57 
58  /* Port mgmt change event handling */
59 
60 #define GET_BLK_PTR_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.block_ptr)
61 #define GET_MASK_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.tbl_entries_mask)
62 #define NUM_IDX_IN_PKEY_TBL_BLK 32
63 #define GUID_TBL_ENTRY_SIZE 8 /* size in bytes */
64 #define GUID_TBL_BLK_NUM_ENTRIES 8
65 #define GUID_TBL_BLK_SIZE (GUID_TBL_ENTRY_SIZE * GUID_TBL_BLK_NUM_ENTRIES)
66 
68  struct ib_grh grh;
69  u8 payload[256];
70 } __packed;
71 
73  u8 payload[256];
74 } __packed;
75 
77  struct ib_grh grh;
79  struct ib_mad mad;
80 } __packed;
81 
84  struct ib_grh grh;
85  struct ib_mad mad;
86 } __packed;
87 
88 static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num);
89 static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num);
90 static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
91  int block, u32 change_bitmap);
92 
94 {
95 #define NODE_GUID_HI ((u64) (((u64)IB_OPENIB_OUI) << 40))
96  return cpu_to_be64(NODE_GUID_HI | random32());
97 }
98 
100 {
101  return cpu_to_be64(atomic_inc_return(&ctx->tid)) |
102  cpu_to_be64(0xff00000000000000LL);
103 }
104 
105 int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
106  int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
107  void *in_mad, void *response_mad)
108 {
109  struct mlx4_cmd_mailbox *inmailbox, *outmailbox;
110  void *inbox;
111  int err;
112  u32 in_modifier = port;
113  u8 op_modifier = 0;
114 
115  inmailbox = mlx4_alloc_cmd_mailbox(dev->dev);
116  if (IS_ERR(inmailbox))
117  return PTR_ERR(inmailbox);
118  inbox = inmailbox->buf;
119 
120  outmailbox = mlx4_alloc_cmd_mailbox(dev->dev);
121  if (IS_ERR(outmailbox)) {
122  mlx4_free_cmd_mailbox(dev->dev, inmailbox);
123  return PTR_ERR(outmailbox);
124  }
125 
126  memcpy(inbox, in_mad, 256);
127 
128  /*
129  * Key check traps can't be generated unless we have in_wc to
130  * tell us where to send the trap.
131  */
132  if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_MKEY) || !in_wc)
133  op_modifier |= 0x1;
134  if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_BKEY) || !in_wc)
135  op_modifier |= 0x2;
136  if (mlx4_is_mfunc(dev->dev) &&
137  (mad_ifc_flags & MLX4_MAD_IFC_NET_VIEW || in_wc))
138  op_modifier |= 0x8;
139 
140  if (in_wc) {
141  struct {
142  __be32 my_qpn;
143  u32 reserved1;
144  __be32 rqpn;
145  u8 sl;
146  u8 g_path;
147  u16 reserved2[2];
148  __be16 pkey;
149  u32 reserved3[11];
150  u8 grh[40];
151  } *ext_info;
152 
153  memset(inbox + 256, 0, 256);
154  ext_info = inbox + 256;
155 
156  ext_info->my_qpn = cpu_to_be32(in_wc->qp->qp_num);
157  ext_info->rqpn = cpu_to_be32(in_wc->src_qp);
158  ext_info->sl = in_wc->sl << 4;
159  ext_info->g_path = in_wc->dlid_path_bits |
160  (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0);
161  ext_info->pkey = cpu_to_be16(in_wc->pkey_index);
162 
163  if (in_grh)
164  memcpy(ext_info->grh, in_grh, 40);
165 
166  op_modifier |= 0x4;
167 
168  in_modifier |= in_wc->slid << 16;
169  }
170 
171  err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier,
172  mlx4_is_master(dev->dev) ? (op_modifier & ~0x8) : op_modifier,
174  (op_modifier & 0x8) ? MLX4_CMD_NATIVE : MLX4_CMD_WRAPPED);
175 
176  if (!err)
177  memcpy(response_mad, outmailbox->buf, 256);
178 
179  mlx4_free_cmd_mailbox(dev->dev, inmailbox);
180  mlx4_free_cmd_mailbox(dev->dev, outmailbox);
181 
182  return err;
183 }
184 
185 static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
186 {
187  struct ib_ah *new_ah;
188  struct ib_ah_attr ah_attr;
189  unsigned long flags;
190 
191  if (!dev->send_agent[port_num - 1][0])
192  return;
193 
194  memset(&ah_attr, 0, sizeof ah_attr);
195  ah_attr.dlid = lid;
196  ah_attr.sl = sl;
197  ah_attr.port_num = port_num;
198 
199  new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
200  &ah_attr);
201  if (IS_ERR(new_ah))
202  return;
203 
204  spin_lock_irqsave(&dev->sm_lock, flags);
205  if (dev->sm_ah[port_num - 1])
206  ib_destroy_ah(dev->sm_ah[port_num - 1]);
207  dev->sm_ah[port_num - 1] = new_ah;
208  spin_unlock_irqrestore(&dev->sm_lock, flags);
209 }
210 
211 /*
212  * Snoop SM MADs for port info, GUID info, and P_Key table sets, so we can
213  * synthesize LID change, Client-Rereg, GID change, and P_Key change events.
214  */
215 static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad,
216  u16 prev_lid)
217 {
218  struct ib_port_info *pinfo;
219  u16 lid;
220  __be16 *base;
221  u32 bn, pkey_change_bitmap;
222  int i;
223 
224 
225  struct mlx4_ib_dev *dev = to_mdev(ibdev);
226  if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
227  mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
228  mad->mad_hdr.method == IB_MGMT_METHOD_SET)
229  switch (mad->mad_hdr.attr_id) {
231  pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data;
232  lid = be16_to_cpu(pinfo->lid);
233 
234  update_sm_ah(dev, port_num,
235  be16_to_cpu(pinfo->sm_lid),
236  pinfo->neighbormtu_mastersmsl & 0xf);
237 
238  if (pinfo->clientrereg_resv_subnetto & 0x80)
239  handle_client_rereg_event(dev, port_num);
240 
241  if (prev_lid != lid)
242  handle_lid_change_event(dev, port_num);
243  break;
244 
246  if (!mlx4_is_mfunc(dev->dev)) {
247  mlx4_ib_dispatch_event(dev, port_num,
249  break;
250  }
251 
252  /* at this point, we are running in the master.
253  * Slaves do not receive SMPs.
254  */
255  bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod) & 0xFFFF;
256  base = (__be16 *) &(((struct ib_smp *)mad)->data[0]);
257  pkey_change_bitmap = 0;
258  for (i = 0; i < 32; i++) {
259  pr_debug("PKEY[%d] = x%x\n",
260  i + bn*32, be16_to_cpu(base[i]));
261  if (be16_to_cpu(base[i]) !=
262  dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32]) {
263  pkey_change_bitmap |= (1 << i);
264  dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32] =
265  be16_to_cpu(base[i]);
266  }
267  }
268  pr_debug("PKEY Change event: port=%d, "
269  "block=0x%x, change_bitmap=0x%x\n",
270  port_num, bn, pkey_change_bitmap);
271 
272  if (pkey_change_bitmap) {
273  mlx4_ib_dispatch_event(dev, port_num,
275  if (!dev->sriov.is_going_down)
276  __propagate_pkey_ev(dev, port_num, bn,
277  pkey_change_bitmap);
278  }
279  break;
280 
282  /* paravirtualized master's guid is guid 0 -- does not change */
283  if (!mlx4_is_master(dev->dev))
284  mlx4_ib_dispatch_event(dev, port_num,
286  /*if master, notify relevant slaves*/
287  if (mlx4_is_master(dev->dev) &&
288  !dev->sriov.is_going_down) {
289  bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod);
290  mlx4_ib_update_cache_on_guid_change(dev, bn, port_num,
291  (u8 *)(&((struct ib_smp *)mad)->data));
292  mlx4_ib_notify_slaves_on_guid_change(dev, bn, port_num,
293  (u8 *)(&((struct ib_smp *)mad)->data));
294  }
295  break;
296 
297  default:
298  break;
299  }
300 }
301 
302 static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
303  int block, u32 change_bitmap)
304 {
305  int i, ix, slave, err;
306  int have_event = 0;
307 
308  for (slave = 0; slave < dev->dev->caps.sqp_demux; slave++) {
309  if (slave == mlx4_master_func_num(dev->dev))
310  continue;
311  if (!mlx4_is_slave_active(dev->dev, slave))
312  continue;
313 
314  have_event = 0;
315  for (i = 0; i < 32; i++) {
316  if (!(change_bitmap & (1 << i)))
317  continue;
318  for (ix = 0;
319  ix < dev->dev->caps.pkey_table_len[port_num]; ix++) {
320  if (dev->pkeys.virt2phys_pkey[slave][port_num - 1]
321  [ix] == i + 32 * block) {
322  err = mlx4_gen_pkey_eqe(dev->dev, slave, port_num);
323  pr_debug("propagate_pkey_ev: slave %d,"
324  " port %d, ix %d (%d)\n",
325  slave, port_num, ix, err);
326  have_event = 1;
327  break;
328  }
329  }
330  if (have_event)
331  break;
332  }
333  }
334 }
335 
336 static void node_desc_override(struct ib_device *dev,
337  struct ib_mad *mad)
338 {
339  unsigned long flags;
340 
341  if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
342  mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
343  mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP &&
344  mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) {
345  spin_lock_irqsave(&to_mdev(dev)->sm_lock, flags);
346  memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64);
347  spin_unlock_irqrestore(&to_mdev(dev)->sm_lock, flags);
348  }
349 }
350 
351 static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *mad)
352 {
353  int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
354  struct ib_mad_send_buf *send_buf;
355  struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
356  int ret;
357  unsigned long flags;
358 
359  if (agent) {
360  send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
362  if (IS_ERR(send_buf))
363  return;
364  /*
365  * We rely here on the fact that MLX QPs don't use the
366  * address handle after the send is posted (this is
367  * wrong following the IB spec strictly, but we know
368  * it's OK for our devices).
369  */
370  spin_lock_irqsave(&dev->sm_lock, flags);
371  memcpy(send_buf->mad, mad, sizeof *mad);
372  if ((send_buf->ah = dev->sm_ah[port_num - 1]))
373  ret = ib_post_send_mad(send_buf, NULL);
374  else
375  ret = -EINVAL;
376  spin_unlock_irqrestore(&dev->sm_lock, flags);
377 
378  if (ret)
379  ib_free_send_mad(send_buf);
380  }
381 }
382 
383 static int mlx4_ib_demux_sa_handler(struct ib_device *ibdev, int port, int slave,
384  struct ib_sa_mad *sa_mad)
385 {
386  int ret = 0;
387 
388  /* dispatch to different sa handlers */
389  switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
391  ret = mlx4_ib_mcg_demux_handler(ibdev, port, slave, sa_mad);
392  break;
393  default:
394  break;
395  }
396  return ret;
397 }
398 
399 int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid)
400 {
401  struct mlx4_ib_dev *dev = to_mdev(ibdev);
402  int i;
403 
404  for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
405  if (dev->sriov.demux[port - 1].guid_cache[i] == guid)
406  return i;
407  }
408  return -1;
409 }
410 
411 
412 static int find_slave_port_pkey_ix(struct mlx4_ib_dev *dev, int slave,
413  u8 port, u16 pkey, u16 *ix)
414 {
415  int i, ret;
416  u8 unassigned_pkey_ix, pkey_ix, partial_ix = 0xFF;
417  u16 slot_pkey;
418 
419  if (slave == mlx4_master_func_num(dev->dev))
420  return ib_find_cached_pkey(&dev->ib_dev, port, pkey, ix);
421 
422  unassigned_pkey_ix = dev->dev->phys_caps.pkey_phys_table_len[port] - 1;
423 
424  for (i = 0; i < dev->dev->caps.pkey_table_len[port]; i++) {
425  if (dev->pkeys.virt2phys_pkey[slave][port - 1][i] == unassigned_pkey_ix)
426  continue;
427 
428  pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][i];
429 
430  ret = ib_get_cached_pkey(&dev->ib_dev, port, pkey_ix, &slot_pkey);
431  if (ret)
432  continue;
433  if ((slot_pkey & 0x7FFF) == (pkey & 0x7FFF)) {
434  if (slot_pkey & 0x8000) {
435  *ix = (u16) pkey_ix;
436  return 0;
437  } else {
438  /* take first partial pkey index found */
439  if (partial_ix == 0xFF)
440  partial_ix = pkey_ix;
441  }
442  }
443  }
444 
445  if (partial_ix < 0xFF) {
446  *ix = (u16) partial_ix;
447  return 0;
448  }
449 
450  return -EINVAL;
451 }
452 
453 int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
454  enum ib_qp_type dest_qpt, struct ib_wc *wc,
455  struct ib_grh *grh, struct ib_mad *mad)
456 {
457  struct ib_sge list;
458  struct ib_send_wr wr, *bad_wr;
459  struct mlx4_ib_demux_pv_ctx *tun_ctx;
460  struct mlx4_ib_demux_pv_qp *tun_qp;
461  struct mlx4_rcv_tunnel_mad *tun_mad;
462  struct ib_ah_attr attr;
463  struct ib_ah *ah;
464  struct ib_qp *src_qp = NULL;
465  unsigned tun_tx_ix = 0;
466  int dqpn;
467  int ret = 0;
468  u16 tun_pkey_ix;
469  u16 cached_pkey;
470 
471  if (dest_qpt > IB_QPT_GSI)
472  return -EINVAL;
473 
474  tun_ctx = dev->sriov.demux[port-1].tun[slave];
475 
476  /* check if proxy qp created */
477  if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE)
478  return -EAGAIN;
479 
480  /* QP0 forwarding only for Dom0 */
481  if (!dest_qpt && (mlx4_master_func_num(dev->dev) != slave))
482  return -EINVAL;
483 
484  if (!dest_qpt)
485  tun_qp = &tun_ctx->qp[0];
486  else
487  tun_qp = &tun_ctx->qp[1];
488 
489  /* compute P_Key index to put in tunnel header for slave */
490  if (dest_qpt) {
491  u16 pkey_ix;
492  ret = ib_get_cached_pkey(&dev->ib_dev, port, wc->pkey_index, &cached_pkey);
493  if (ret)
494  return -EINVAL;
495 
496  ret = find_slave_port_pkey_ix(dev, slave, port, cached_pkey, &pkey_ix);
497  if (ret)
498  return -EINVAL;
499  tun_pkey_ix = pkey_ix;
500  } else
501  tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
502 
503  dqpn = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave + port + (dest_qpt * 2) - 1;
504 
505  /* get tunnel tx data buf for slave */
506  src_qp = tun_qp->qp;
507 
508  /* create ah. Just need an empty one with the port num for the post send.
509  * The driver will set the force loopback bit in post_send */
510  memset(&attr, 0, sizeof attr);
511  attr.port_num = port;
512  ah = ib_create_ah(tun_ctx->pd, &attr);
513  if (IS_ERR(ah))
514  return -ENOMEM;
515 
516  /* allocate tunnel tx buf after pass failure returns */
517  spin_lock(&tun_qp->tx_lock);
518  if (tun_qp->tx_ix_head - tun_qp->tx_ix_tail >=
519  (MLX4_NUM_TUNNEL_BUFS - 1))
520  ret = -EAGAIN;
521  else
522  tun_tx_ix = (++tun_qp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
523  spin_unlock(&tun_qp->tx_lock);
524  if (ret)
525  goto out;
526 
527  tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
528  if (tun_qp->tx_ring[tun_tx_ix].ah)
529  ib_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah);
530  tun_qp->tx_ring[tun_tx_ix].ah = ah;
531  ib_dma_sync_single_for_cpu(&dev->ib_dev,
532  tun_qp->tx_ring[tun_tx_ix].buf.map,
533  sizeof (struct mlx4_rcv_tunnel_mad),
534  DMA_TO_DEVICE);
535 
536  /* copy over to tunnel buffer */
537  if (grh)
538  memcpy(&tun_mad->grh, grh, sizeof *grh);
539  memcpy(&tun_mad->mad, mad, sizeof *mad);
540 
541  /* adjust tunnel data */
542  tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix);
543  tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
544  tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
545  tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF);
546  tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0;
547 
548  ib_dma_sync_single_for_device(&dev->ib_dev,
549  tun_qp->tx_ring[tun_tx_ix].buf.map,
550  sizeof (struct mlx4_rcv_tunnel_mad),
551  DMA_TO_DEVICE);
552 
553  list.addr = tun_qp->tx_ring[tun_tx_ix].buf.map;
554  list.length = sizeof (struct mlx4_rcv_tunnel_mad);
555  list.lkey = tun_ctx->mr->lkey;
556 
557  wr.wr.ud.ah = ah;
558  wr.wr.ud.port_num = port;
559  wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
560  wr.wr.ud.remote_qpn = dqpn;
561  wr.next = NULL;
562  wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt);
563  wr.sg_list = &list;
564  wr.num_sge = 1;
565  wr.opcode = IB_WR_SEND;
567 
568  ret = ib_post_send(src_qp, &wr, &bad_wr);
569 out:
570  if (ret)
571  ib_destroy_ah(ah);
572  return ret;
573 }
574 
575 static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
576  struct ib_wc *wc, struct ib_grh *grh,
577  struct ib_mad *mad)
578 {
579  struct mlx4_ib_dev *dev = to_mdev(ibdev);
580  int err;
581  int slave;
582  u8 *slave_id;
583 
584  /* Initially assume that this mad is for us */
585  slave = mlx4_master_func_num(dev->dev);
586 
587  /* See if the slave id is encoded in a response mad */
588  if (mad->mad_hdr.method & 0x80) {
589  slave_id = (u8 *) &mad->mad_hdr.tid;
590  slave = *slave_id;
591  if (slave != 255) /*255 indicates the dom0*/
592  *slave_id = 0; /* remap tid */
593  }
594 
595  /* If a grh is present, we demux according to it */
596  if (wc->wc_flags & IB_WC_GRH) {
597  slave = mlx4_ib_find_real_gid(ibdev, port, grh->dgid.global.interface_id);
598  if (slave < 0) {
599  mlx4_ib_warn(ibdev, "failed matching grh\n");
600  return -ENOENT;
601  }
602  }
603  /* Class-specific handling */
604  switch (mad->mad_hdr.mgmt_class) {
606  if (mlx4_ib_demux_sa_handler(ibdev, port, slave,
607  (struct ib_sa_mad *) mad))
608  return 0;
609  break;
610  case IB_MGMT_CLASS_CM:
611  if (mlx4_ib_demux_cm_handler(ibdev, port, &slave, mad))
612  return 0;
613  break;
615  if (mad->mad_hdr.method != IB_MGMT_METHOD_GET_RESP)
616  return 0;
617  break;
618  default:
619  /* Drop unsupported classes for slaves in tunnel mode */
620  if (slave != mlx4_master_func_num(dev->dev)) {
621  pr_debug("dropping unsupported ingress mad from class:%d "
622  "for slave:%d\n", mad->mad_hdr.mgmt_class, slave);
623  return 0;
624  }
625  }
626  /*make sure that no slave==255 was not handled yet.*/
627  if (slave >= dev->dev->caps.sqp_demux) {
628  mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
629  slave, dev->dev->caps.sqp_demux);
630  return -ENOENT;
631  }
632 
633  err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
634  if (err)
635  pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
636  slave, err);
637  return 0;
638 }
639 
640 static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
641  struct ib_wc *in_wc, struct ib_grh *in_grh,
642  struct ib_mad *in_mad, struct ib_mad *out_mad)
643 {
644  u16 slid, prev_lid = 0;
645  int err;
646  struct ib_port_attr pattr;
647 
648  if (in_wc && in_wc->qp->qp_num) {
649  pr_debug("received MAD: slid:%d sqpn:%d "
650  "dlid_bits:%d dqpn:%d wc_flags:0x%x, cls %x, mtd %x, atr %x\n",
651  in_wc->slid, in_wc->src_qp,
652  in_wc->dlid_path_bits,
653  in_wc->qp->qp_num,
654  in_wc->wc_flags,
655  in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
656  be16_to_cpu(in_mad->mad_hdr.attr_id));
657  if (in_wc->wc_flags & IB_WC_GRH) {
658  pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n",
659  be64_to_cpu(in_grh->sgid.global.subnet_prefix),
660  be64_to_cpu(in_grh->sgid.global.interface_id));
661  pr_debug("dgid_hi:0x%016llx dgid_lo:0x%016llx\n",
662  be64_to_cpu(in_grh->dgid.global.subnet_prefix),
663  be64_to_cpu(in_grh->dgid.global.interface_id));
664  }
665  }
666 
667  slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
668 
669  if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) {
670  forward_trap(to_mdev(ibdev), port_num, in_mad);
672  }
673 
674  if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
675  in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
676  if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
677  in_mad->mad_hdr.method != IB_MGMT_METHOD_SET &&
678  in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
679  return IB_MAD_RESULT_SUCCESS;
680 
681  /*
682  * Don't process SMInfo queries -- the SMA can't handle them.
683  */
684  if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO)
685  return IB_MAD_RESULT_SUCCESS;
686  } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
687  in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 ||
688  in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2 ||
689  in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) {
690  if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
691  in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
692  return IB_MAD_RESULT_SUCCESS;
693  } else
694  return IB_MAD_RESULT_SUCCESS;
695 
696  if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
697  in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
698  in_mad->mad_hdr.method == IB_MGMT_METHOD_SET &&
699  in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
700  !ib_query_port(ibdev, port_num, &pattr))
701  prev_lid = pattr.lid;
702 
703  err = mlx4_MAD_IFC(to_mdev(ibdev),
704  (mad_flags & IB_MAD_IGNORE_MKEY ? MLX4_MAD_IFC_IGNORE_MKEY : 0) |
705  (mad_flags & IB_MAD_IGNORE_BKEY ? MLX4_MAD_IFC_IGNORE_BKEY : 0) |
707  port_num, in_wc, in_grh, in_mad, out_mad);
708  if (err)
709  return IB_MAD_RESULT_FAILURE;
710 
711  if (!out_mad->mad_hdr.status) {
712  if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV))
713  smp_snoop(ibdev, port_num, in_mad, prev_lid);
714  /* slaves get node desc from FW */
715  if (!mlx4_is_slave(to_mdev(ibdev)->dev))
716  node_desc_override(ibdev, out_mad);
717  }
718 
719  /* set return bit in status of directed route responses */
720  if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
721  out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
722 
723  if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
724  /* no response for trap repress */
726 
728 }
729 
730 static void edit_counter(struct mlx4_counter *cnt,
731  struct ib_pma_portcounters *pma_cnt)
732 {
733  pma_cnt->port_xmit_data = cpu_to_be32((be64_to_cpu(cnt->tx_bytes)>>2));
734  pma_cnt->port_rcv_data = cpu_to_be32((be64_to_cpu(cnt->rx_bytes)>>2));
737 }
738 
739 static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
740  struct ib_wc *in_wc, struct ib_grh *in_grh,
741  struct ib_mad *in_mad, struct ib_mad *out_mad)
742 {
743  struct mlx4_cmd_mailbox *mailbox;
744  struct mlx4_ib_dev *dev = to_mdev(ibdev);
745  int err;
746  u32 inmod = dev->counters[port_num - 1] & 0xffff;
747  u8 mode;
748 
749  if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
750  return -EINVAL;
751 
752  mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
753  if (IS_ERR(mailbox))
754  return IB_MAD_RESULT_FAILURE;
755 
756  err = mlx4_cmd_box(dev->dev, 0, mailbox->dma, inmod, 0,
759  if (err)
760  err = IB_MAD_RESULT_FAILURE;
761  else {
762  memset(out_mad->data, 0, sizeof out_mad->data);
763  mode = ((struct mlx4_counter *)mailbox->buf)->counter_mode;
764  switch (mode & 0xf) {
765  case 0:
766  edit_counter(mailbox->buf,
767  (void *)(out_mad->data + 40));
769  break;
770  default:
771  err = IB_MAD_RESULT_FAILURE;
772  }
773  }
774 
775  mlx4_free_cmd_mailbox(dev->dev, mailbox);
776 
777  return err;
778 }
779 
780 int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
781  struct ib_wc *in_wc, struct ib_grh *in_grh,
782  struct ib_mad *in_mad, struct ib_mad *out_mad)
783 {
784  switch (rdma_port_get_link_layer(ibdev, port_num)) {
786  return ib_process_mad(ibdev, mad_flags, port_num, in_wc,
787  in_grh, in_mad, out_mad);
789  return iboe_process_mad(ibdev, mad_flags, port_num, in_wc,
790  in_grh, in_mad, out_mad);
791  default:
792  return -EINVAL;
793  }
794 }
795 
796 static void send_handler(struct ib_mad_agent *agent,
797  struct ib_mad_send_wc *mad_send_wc)
798 {
799  if (mad_send_wc->send_buf->context[0])
800  ib_destroy_ah(mad_send_wc->send_buf->context[0]);
801  ib_free_send_mad(mad_send_wc->send_buf);
802 }
803 
805 {
806  struct ib_mad_agent *agent;
807  int p, q;
808  int ret;
809  enum rdma_link_layer ll;
810 
811  for (p = 0; p < dev->num_ports; ++p) {
812  ll = rdma_port_get_link_layer(&dev->ib_dev, p + 1);
813  for (q = 0; q <= 1; ++q) {
814  if (ll == IB_LINK_LAYER_INFINIBAND) {
815  agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
816  q ? IB_QPT_GSI : IB_QPT_SMI,
817  NULL, 0, send_handler,
818  NULL, NULL);
819  if (IS_ERR(agent)) {
820  ret = PTR_ERR(agent);
821  goto err;
822  }
823  dev->send_agent[p][q] = agent;
824  } else
825  dev->send_agent[p][q] = NULL;
826  }
827  }
828 
829  return 0;
830 
831 err:
832  for (p = 0; p < dev->num_ports; ++p)
833  for (q = 0; q <= 1; ++q)
834  if (dev->send_agent[p][q])
836 
837  return ret;
838 }
839 
841 {
842  struct ib_mad_agent *agent;
843  int p, q;
844 
845  for (p = 0; p < dev->num_ports; ++p) {
846  for (q = 0; q <= 1; ++q) {
847  agent = dev->send_agent[p][q];
848  if (agent) {
849  dev->send_agent[p][q] = NULL;
851  }
852  }
853 
854  if (dev->sm_ah[p])
855  ib_destroy_ah(dev->sm_ah[p]);
856  }
857 }
858 
859 static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num)
860 {
862 
863  if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down)
864  mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num,
866 }
867 
868 static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num)
869 {
870  /* re-configure the alias-guid and mcg's */
871  if (mlx4_is_master(dev->dev)) {
872  mlx4_ib_invalidate_all_guid_record(dev, port_num);
873 
874  if (!dev->sriov.is_going_down) {
875  mlx4_ib_mcg_port_cleanup(&dev->sriov.demux[port_num - 1], 0);
876  mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num,
878  }
879  }
881 }
882 
883 static void propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
884  struct mlx4_eqe *eqe)
885 {
886  __propagate_pkey_ev(dev, port_num, GET_BLK_PTR_FROM_EQE(eqe),
887  GET_MASK_FROM_EQE(eqe));
888 }
889 
890 static void handle_slaves_guid_change(struct mlx4_ib_dev *dev, u8 port_num,
891  u32 guid_tbl_blk_num, u32 change_bitmap)
892 {
893  struct ib_smp *in_mad = NULL;
894  struct ib_smp *out_mad = NULL;
895  u16 i;
896 
897  if (!mlx4_is_mfunc(dev->dev) || !mlx4_is_master(dev->dev))
898  return;
899 
900  in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
901  out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
902  if (!in_mad || !out_mad) {
903  mlx4_ib_warn(&dev->ib_dev, "failed to allocate memory for guid info mads\n");
904  goto out;
905  }
906 
907  guid_tbl_blk_num *= 4;
908 
909  for (i = 0; i < 4; i++) {
910  if (change_bitmap && (!((change_bitmap >> (8 * i)) & 0xff)))
911  continue;
912  memset(in_mad, 0, sizeof *in_mad);
913  memset(out_mad, 0, sizeof *out_mad);
914 
915  in_mad->base_version = 1;
917  in_mad->class_version = 1;
918  in_mad->method = IB_MGMT_METHOD_GET;
919  in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
920  in_mad->attr_mod = cpu_to_be32(guid_tbl_blk_num + i);
921 
922  if (mlx4_MAD_IFC(dev,
924  port_num, NULL, NULL, in_mad, out_mad)) {
925  mlx4_ib_warn(&dev->ib_dev, "Failed in get GUID INFO MAD_IFC\n");
926  goto out;
927  }
928 
929  mlx4_ib_update_cache_on_guid_change(dev, guid_tbl_blk_num + i,
930  port_num,
931  (u8 *)(&((struct ib_smp *)out_mad)->data));
932  mlx4_ib_notify_slaves_on_guid_change(dev, guid_tbl_blk_num + i,
933  port_num,
934  (u8 *)(&((struct ib_smp *)out_mad)->data));
935  }
936 
937 out:
938  kfree(in_mad);
939  kfree(out_mad);
940  return;
941 }
942 
944 {
945  struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
946  struct mlx4_ib_dev *dev = ew->ib_dev;
947  struct mlx4_eqe *eqe = &(ew->ib_eqe);
948  u8 port = eqe->event.port_mgmt_change.port;
950  u32 tbl_block;
951  u32 change_bitmap;
952 
953  switch (eqe->subtype) {
955  changed_attr = be32_to_cpu(eqe->event.port_mgmt_change.params.port_info.changed_attr);
956 
957  /* Update the SM ah - This should be done before handling
958  the other changed attributes so that MADs can be sent to the SM */
959  if (changed_attr & MSTR_SM_CHANGE_MASK) {
960  u16 lid = be16_to_cpu(eqe->event.port_mgmt_change.params.port_info.mstr_sm_lid);
961  u8 sl = eqe->event.port_mgmt_change.params.port_info.mstr_sm_sl & 0xf;
962  update_sm_ah(dev, port, lid, sl);
963  }
964 
965  /* Check if it is a lid change event */
966  if (changed_attr & MLX4_EQ_PORT_INFO_LID_CHANGE_MASK)
967  handle_lid_change_event(dev, port);
968 
969  /* Generate GUID changed event */
970  if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) {
972  /*if master, notify all slaves*/
973  if (mlx4_is_master(dev->dev))
974  mlx4_gen_slaves_port_mgt_ev(dev->dev, port,
975  MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK);
976  }
977 
978  if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK)
979  handle_client_rereg_event(dev, port);
980  break;
981 
984  if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down)
985  propagate_pkey_ev(dev, port, eqe);
986  break;
988  /* paravirtualized master's guid is guid 0 -- does not change */
989  if (!mlx4_is_master(dev->dev))
991  /*if master, notify relevant slaves*/
992  else if (!dev->sriov.is_going_down) {
993  tbl_block = GET_BLK_PTR_FROM_EQE(eqe);
994  change_bitmap = GET_MASK_FROM_EQE(eqe);
995  handle_slaves_guid_change(dev, port, tbl_block, change_bitmap);
996  }
997  break;
998  default:
999  pr_warn("Unsupported subtype 0x%x for "
1000  "Port Management Change event\n", eqe->subtype);
1001  }
1002 
1003  kfree(ew);
1004 }
1005 
1006 void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
1007  enum ib_event_type type)
1008 {
1009  struct ib_event event;
1010 
1011  event.device = &dev->ib_dev;
1012  event.element.port_num = port_num;
1013  event.event = type;
1014 
1015  ib_dispatch_event(&event);
1016 }
1017 
1018 static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg)
1019 {
1020  unsigned long flags;
1021  struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context;
1022  struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
1023  spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
1024  if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE)
1025  queue_work(ctx->wq, &ctx->work);
1026  spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
1027 }
1028 
1029 static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
1030  struct mlx4_ib_demux_pv_qp *tun_qp,
1031  int index)
1032 {
1033  struct ib_sge sg_list;
1034  struct ib_recv_wr recv_wr, *bad_recv_wr;
1035  int size;
1036 
1037  size = (tun_qp->qp->qp_type == IB_QPT_UD) ?
1038  sizeof (struct mlx4_tunnel_mad) : sizeof (struct mlx4_mad_rcv_buf);
1039 
1040  sg_list.addr = tun_qp->ring[index].map;
1041  sg_list.length = size;
1042  sg_list.lkey = ctx->mr->lkey;
1043 
1044  recv_wr.next = NULL;
1045  recv_wr.sg_list = &sg_list;
1046  recv_wr.num_sge = 1;
1047  recv_wr.wr_id = (u64) index | MLX4_TUN_WRID_RECV |
1049  ib_dma_sync_single_for_device(ctx->ib_dev, tun_qp->ring[index].map,
1050  size, DMA_FROM_DEVICE);
1051  return ib_post_recv(tun_qp->qp, &recv_wr, &bad_recv_wr);
1052 }
1053 
1054 static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port,
1055  int slave, struct ib_sa_mad *sa_mad)
1056 {
1057  int ret = 0;
1058 
1059  /* dispatch to different sa handlers */
1060  switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
1062  ret = mlx4_ib_mcg_multiplex_handler(ibdev, port, slave, sa_mad);
1063  break;
1064  default:
1065  break;
1066  }
1067  return ret;
1068 }
1069 
1070 static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
1071 {
1072  int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave;
1073 
1074  return (qpn >= proxy_start && qpn <= proxy_start + 1);
1075 }
1076 
1077 
1078 int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
1079  enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
1080  u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad)
1081 {
1082  struct ib_sge list;
1083  struct ib_send_wr wr, *bad_wr;
1084  struct mlx4_ib_demux_pv_ctx *sqp_ctx;
1085  struct mlx4_ib_demux_pv_qp *sqp;
1086  struct mlx4_mad_snd_buf *sqp_mad;
1087  struct ib_ah *ah;
1088  struct ib_qp *send_qp = NULL;
1089  unsigned wire_tx_ix = 0;
1090  int ret = 0;
1091  u16 wire_pkey_ix;
1092  int src_qpnum;
1093  u8 sgid_index;
1094 
1095 
1096  sqp_ctx = dev->sriov.sqps[port-1];
1097 
1098  /* check if proxy qp created */
1099  if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE)
1100  return -EAGAIN;
1101 
1102  /* QP0 forwarding only for Dom0 */
1103  if (dest_qpt == IB_QPT_SMI && (mlx4_master_func_num(dev->dev) != slave))
1104  return -EINVAL;
1105 
1106  if (dest_qpt == IB_QPT_SMI) {
1107  src_qpnum = 0;
1108  sqp = &sqp_ctx->qp[0];
1109  wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
1110  } else {
1111  src_qpnum = 1;
1112  sqp = &sqp_ctx->qp[1];
1113  wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][pkey_index];
1114  }
1115 
1116  send_qp = sqp->qp;
1117 
1118  /* create ah */
1119  sgid_index = attr->grh.sgid_index;
1120  attr->grh.sgid_index = 0;
1121  ah = ib_create_ah(sqp_ctx->pd, attr);
1122  if (IS_ERR(ah))
1123  return -ENOMEM;
1124  attr->grh.sgid_index = sgid_index;
1125  to_mah(ah)->av.ib.gid_index = sgid_index;
1126  /* get rid of force-loopback bit */
1127  to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF);
1128  spin_lock(&sqp->tx_lock);
1129  if (sqp->tx_ix_head - sqp->tx_ix_tail >=
1130  (MLX4_NUM_TUNNEL_BUFS - 1))
1131  ret = -EAGAIN;
1132  else
1133  wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
1134  spin_unlock(&sqp->tx_lock);
1135  if (ret)
1136  goto out;
1137 
1138  sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr);
1139  if (sqp->tx_ring[wire_tx_ix].ah)
1140  ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah);
1141  sqp->tx_ring[wire_tx_ix].ah = ah;
1142  ib_dma_sync_single_for_cpu(&dev->ib_dev,
1143  sqp->tx_ring[wire_tx_ix].buf.map,
1144  sizeof (struct mlx4_mad_snd_buf),
1145  DMA_TO_DEVICE);
1146 
1147  memcpy(&sqp_mad->payload, mad, sizeof *mad);
1148 
1149  ib_dma_sync_single_for_device(&dev->ib_dev,
1150  sqp->tx_ring[wire_tx_ix].buf.map,
1151  sizeof (struct mlx4_mad_snd_buf),
1152  DMA_TO_DEVICE);
1153 
1154  list.addr = sqp->tx_ring[wire_tx_ix].buf.map;
1155  list.length = sizeof (struct mlx4_mad_snd_buf);
1156  list.lkey = sqp_ctx->mr->lkey;
1157 
1158  wr.wr.ud.ah = ah;
1159  wr.wr.ud.port_num = port;
1160  wr.wr.ud.pkey_index = wire_pkey_ix;
1161  wr.wr.ud.remote_qkey = qkey;
1162  wr.wr.ud.remote_qpn = remote_qpn;
1163  wr.next = NULL;
1164  wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum);
1165  wr.sg_list = &list;
1166  wr.num_sge = 1;
1167  wr.opcode = IB_WR_SEND;
1169 
1170  ret = ib_post_send(send_qp, &wr, &bad_wr);
1171 out:
1172  if (ret)
1173  ib_destroy_ah(ah);
1174  return ret;
1175 }
1176 
1177 static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc)
1178 {
1179  struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
1180  struct mlx4_ib_demux_pv_qp *tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc->wr_id)];
1181  int wr_ix = wc->wr_id & (MLX4_NUM_TUNNEL_BUFS - 1);
1182  struct mlx4_tunnel_mad *tunnel = tun_qp->ring[wr_ix].addr;
1183  struct mlx4_ib_ah ah;
1184  struct ib_ah_attr ah_attr;
1185  u8 *slave_id;
1186  int slave;
1187 
1188  /* Get slave that sent this packet */
1189  if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
1190  wc->src_qp >= dev->dev->phys_caps.base_proxy_sqpn + 8 * MLX4_MFUNC_MAX ||
1191  (wc->src_qp & 0x1) != ctx->port - 1 ||
1192  wc->src_qp & 0x4) {
1193  mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d\n", wc->src_qp);
1194  return;
1195  }
1196  slave = ((wc->src_qp & ~0x7) - dev->dev->phys_caps.base_proxy_sqpn) / 8;
1197  if (slave != ctx->slave) {
1198  mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
1199  "belongs to another slave\n", wc->src_qp);
1200  return;
1201  }
1202  if (slave != mlx4_master_func_num(dev->dev) && !(wc->src_qp & 0x2)) {
1203  mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
1204  "non-master trying to send QP0 packets\n", wc->src_qp);
1205  return;
1206  }
1207 
1208  /* Map transaction ID */
1209  ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map,
1210  sizeof (struct mlx4_tunnel_mad),
1211  DMA_FROM_DEVICE);
1212  switch (tunnel->mad.mad_hdr.method) {
1213  case IB_MGMT_METHOD_SET:
1214  case IB_MGMT_METHOD_GET:
1215  case IB_MGMT_METHOD_REPORT:
1217  case IB_SA_METHOD_DELETE:
1220  slave_id = (u8 *) &tunnel->mad.mad_hdr.tid;
1221  if (*slave_id) {
1222  mlx4_ib_warn(ctx->ib_dev, "egress mad has non-null tid msb:%d "
1223  "class:%d slave:%d\n", *slave_id,
1224  tunnel->mad.mad_hdr.mgmt_class, slave);
1225  return;
1226  } else
1227  *slave_id = slave;
1228  default:
1229  /* nothing */;
1230  }
1231 
1232  /* Class-specific handling */
1233  switch (tunnel->mad.mad_hdr.mgmt_class) {
1235  if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave,
1236  (struct ib_sa_mad *) &tunnel->mad))
1237  return;
1238  break;
1239  case IB_MGMT_CLASS_CM:
1240  if (mlx4_ib_multiplex_cm_handler(ctx->ib_dev, ctx->port, slave,
1241  (struct ib_mad *) &tunnel->mad))
1242  return;
1243  break;
1245  if (tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_GET &&
1246  tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_SET)
1247  return;
1248  break;
1249  default:
1250  /* Drop unsupported classes for slaves in tunnel mode */
1251  if (slave != mlx4_master_func_num(dev->dev)) {
1252  mlx4_ib_warn(ctx->ib_dev, "dropping unsupported egress mad from class:%d "
1253  "for slave:%d\n", tunnel->mad.mad_hdr.mgmt_class, slave);
1254  return;
1255  }
1256  }
1257 
1258  /* We are using standard ib_core services to send the mad, so generate a
1259  * stadard address handle by decoding the tunnelled mlx4_ah fields */
1260  memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av));
1261  ah.ibah.device = ctx->ib_dev;
1262  mlx4_ib_query_ah(&ah.ibah, &ah_attr);
1263  if ((ah_attr.ah_flags & IB_AH_GRH) &&
1264  (ah_attr.grh.sgid_index != slave)) {
1265  mlx4_ib_warn(ctx->ib_dev, "slave:%d accessed invalid sgid_index:%d\n",
1266  slave, ah_attr.grh.sgid_index);
1267  return;
1268  }
1269 
1270  mlx4_ib_send_to_wire(dev, slave, ctx->port,
1271  is_proxy_qp0(dev, wc->src_qp, slave) ?
1273  be16_to_cpu(tunnel->hdr.pkey_index),
1274  be32_to_cpu(tunnel->hdr.remote_qpn),
1275  be32_to_cpu(tunnel->hdr.qkey),
1276  &ah_attr, &tunnel->mad);
1277 }
1278 
1279 static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
1280  enum ib_qp_type qp_type, int is_tun)
1281 {
1282  int i;
1283  struct mlx4_ib_demux_pv_qp *tun_qp;
1284  int rx_buf_size, tx_buf_size;
1285 
1286  if (qp_type > IB_QPT_GSI)
1287  return -EINVAL;
1288 
1289  tun_qp = &ctx->qp[qp_type];
1290 
1291  tun_qp->ring = kzalloc(sizeof (struct mlx4_ib_buf) * MLX4_NUM_TUNNEL_BUFS,
1292  GFP_KERNEL);
1293  if (!tun_qp->ring)
1294  return -ENOMEM;
1295 
1296  tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
1297  sizeof (struct mlx4_ib_tun_tx_buf),
1298  GFP_KERNEL);
1299  if (!tun_qp->tx_ring) {
1300  kfree(tun_qp->ring);
1301  tun_qp->ring = NULL;
1302  return -ENOMEM;
1303  }
1304 
1305  if (is_tun) {
1306  rx_buf_size = sizeof (struct mlx4_tunnel_mad);
1307  tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
1308  } else {
1309  rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
1310  tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
1311  }
1312 
1313  for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1314  tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL);
1315  if (!tun_qp->ring[i].addr)
1316  goto err;
1317  tun_qp->ring[i].map = ib_dma_map_single(ctx->ib_dev,
1318  tun_qp->ring[i].addr,
1319  rx_buf_size,
1320  DMA_FROM_DEVICE);
1321  }
1322 
1323  for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1324  tun_qp->tx_ring[i].buf.addr =
1325  kmalloc(tx_buf_size, GFP_KERNEL);
1326  if (!tun_qp->tx_ring[i].buf.addr)
1327  goto tx_err;
1328  tun_qp->tx_ring[i].buf.map =
1329  ib_dma_map_single(ctx->ib_dev,
1330  tun_qp->tx_ring[i].buf.addr,
1331  tx_buf_size,
1332  DMA_TO_DEVICE);
1333  tun_qp->tx_ring[i].ah = NULL;
1334  }
1335  spin_lock_init(&tun_qp->tx_lock);
1336  tun_qp->tx_ix_head = 0;
1337  tun_qp->tx_ix_tail = 0;
1338  tun_qp->proxy_qpt = qp_type;
1339 
1340  return 0;
1341 
1342 tx_err:
1343  while (i > 0) {
1344  --i;
1345  ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
1346  tx_buf_size, DMA_TO_DEVICE);
1347  kfree(tun_qp->tx_ring[i].buf.addr);
1348  }
1349  kfree(tun_qp->tx_ring);
1350  tun_qp->tx_ring = NULL;
1352 err:
1353  while (i > 0) {
1354  --i;
1355  ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
1356  rx_buf_size, DMA_FROM_DEVICE);
1357  kfree(tun_qp->ring[i].addr);
1358  }
1359  kfree(tun_qp->ring);
1360  tun_qp->ring = NULL;
1361  return -ENOMEM;
1362 }
1363 
1364 static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
1365  enum ib_qp_type qp_type, int is_tun)
1366 {
1367  int i;
1368  struct mlx4_ib_demux_pv_qp *tun_qp;
1369  int rx_buf_size, tx_buf_size;
1370 
1371  if (qp_type > IB_QPT_GSI)
1372  return;
1373 
1374  tun_qp = &ctx->qp[qp_type];
1375  if (is_tun) {
1376  rx_buf_size = sizeof (struct mlx4_tunnel_mad);
1377  tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
1378  } else {
1379  rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
1380  tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
1381  }
1382 
1383 
1384  for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1385  ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
1386  rx_buf_size, DMA_FROM_DEVICE);
1387  kfree(tun_qp->ring[i].addr);
1388  }
1389 
1390  for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1391  ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
1392  tx_buf_size, DMA_TO_DEVICE);
1393  kfree(tun_qp->tx_ring[i].buf.addr);
1394  if (tun_qp->tx_ring[i].ah)
1395  ib_destroy_ah(tun_qp->tx_ring[i].ah);
1396  }
1397  kfree(tun_qp->tx_ring);
1398  kfree(tun_qp->ring);
1399 }
1400 
1401 static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
1402 {
1403  struct mlx4_ib_demux_pv_ctx *ctx;
1404  struct mlx4_ib_demux_pv_qp *tun_qp;
1405  struct ib_wc wc;
1406  int ret;
1407  ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
1408  ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
1409 
1410  while (ib_poll_cq(ctx->cq, 1, &wc) == 1) {
1411  tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
1412  if (wc.status == IB_WC_SUCCESS) {
1413  switch (wc.opcode) {
1414  case IB_WC_RECV:
1415  mlx4_ib_multiplex_mad(ctx, &wc);
1416  ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp,
1417  wc.wr_id &
1418  (MLX4_NUM_TUNNEL_BUFS - 1));
1419  if (ret)
1420  pr_err("Failed reposting tunnel "
1421  "buf:%lld\n", wc.wr_id);
1422  break;
1423  case IB_WC_SEND:
1424  pr_debug("received tunnel send completion:"
1425  "wrid=0x%llx, status=0x%x\n",
1426  wc.wr_id, wc.status);
1427  ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
1428  (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1429  tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1430  = NULL;
1431  spin_lock(&tun_qp->tx_lock);
1432  tun_qp->tx_ix_tail++;
1433  spin_unlock(&tun_qp->tx_lock);
1434 
1435  break;
1436  default:
1437  break;
1438  }
1439  } else {
1440  pr_debug("mlx4_ib: completion error in tunnel: %d."
1441  " status = %d, wrid = 0x%llx\n",
1442  ctx->slave, wc.status, wc.wr_id);
1443  if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
1444  ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
1445  (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1446  tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1447  = NULL;
1448  spin_lock(&tun_qp->tx_lock);
1449  tun_qp->tx_ix_tail++;
1450  spin_unlock(&tun_qp->tx_lock);
1451  }
1452  }
1453  }
1454 }
1455 
1456 static void pv_qp_event_handler(struct ib_event *event, void *qp_context)
1457 {
1458  struct mlx4_ib_demux_pv_ctx *sqp = qp_context;
1459 
1460  /* It's worse than that! He's dead, Jim! */
1461  pr_err("Fatal error (%d) on a MAD QP on port %d\n",
1462  event->event, sqp->port);
1463 }
1464 
1465 static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
1466  enum ib_qp_type qp_type, int create_tun)
1467 {
1468  int i, ret;
1469  struct mlx4_ib_demux_pv_qp *tun_qp;
1470  struct mlx4_ib_qp_tunnel_init_attr qp_init_attr;
1471  struct ib_qp_attr attr;
1472  int qp_attr_mask_INIT;
1473 
1474  if (qp_type > IB_QPT_GSI)
1475  return -EINVAL;
1476 
1477  tun_qp = &ctx->qp[qp_type];
1478 
1479  memset(&qp_init_attr, 0, sizeof qp_init_attr);
1480  qp_init_attr.init_attr.send_cq = ctx->cq;
1481  qp_init_attr.init_attr.recv_cq = ctx->cq;
1482  qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
1483  qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS;
1484  qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS;
1485  qp_init_attr.init_attr.cap.max_send_sge = 1;
1486  qp_init_attr.init_attr.cap.max_recv_sge = 1;
1487  if (create_tun) {
1488  qp_init_attr.init_attr.qp_type = IB_QPT_UD;
1489  qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_TUNNEL_QP;
1490  qp_init_attr.port = ctx->port;
1491  qp_init_attr.slave = ctx->slave;
1492  qp_init_attr.proxy_qp_type = qp_type;
1493  qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX |
1495  } else {
1496  qp_init_attr.init_attr.qp_type = qp_type;
1497  qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_SQP;
1498  qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY;
1499  }
1500  qp_init_attr.init_attr.port_num = ctx->port;
1501  qp_init_attr.init_attr.qp_context = ctx;
1502  qp_init_attr.init_attr.event_handler = pv_qp_event_handler;
1503  tun_qp->qp = ib_create_qp(ctx->pd, &qp_init_attr.init_attr);
1504  if (IS_ERR(tun_qp->qp)) {
1505  ret = PTR_ERR(tun_qp->qp);
1506  tun_qp->qp = NULL;
1507  pr_err("Couldn't create %s QP (%d)\n",
1508  create_tun ? "tunnel" : "special", ret);
1509  return ret;
1510  }
1511 
1512  memset(&attr, 0, sizeof attr);
1513  attr.qp_state = IB_QPS_INIT;
1514  attr.pkey_index =
1515  to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0];
1516  attr.qkey = IB_QP1_QKEY;
1517  attr.port_num = ctx->port;
1518  ret = ib_modify_qp(tun_qp->qp, &attr, qp_attr_mask_INIT);
1519  if (ret) {
1520  pr_err("Couldn't change %s qp state to INIT (%d)\n",
1521  create_tun ? "tunnel" : "special", ret);
1522  goto err_qp;
1523  }
1524  attr.qp_state = IB_QPS_RTR;
1525  ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE);
1526  if (ret) {
1527  pr_err("Couldn't change %s qp state to RTR (%d)\n",
1528  create_tun ? "tunnel" : "special", ret);
1529  goto err_qp;
1530  }
1531  attr.qp_state = IB_QPS_RTS;
1532  attr.sq_psn = 0;
1533  ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN);
1534  if (ret) {
1535  pr_err("Couldn't change %s qp state to RTS (%d)\n",
1536  create_tun ? "tunnel" : "special", ret);
1537  goto err_qp;
1538  }
1539 
1540  for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1541  ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i);
1542  if (ret) {
1543  pr_err(" mlx4_ib_post_pv_buf error"
1544  " (err = %d, i = %d)\n", ret, i);
1545  goto err_qp;
1546  }
1547  }
1548  return 0;
1549 
1550 err_qp:
1551  ib_destroy_qp(tun_qp->qp);
1552  tun_qp->qp = NULL;
1553  return ret;
1554 }
1555 
1556 /*
1557  * IB MAD completion callback for real SQPs
1558  */
1559 static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
1560 {
1561  struct mlx4_ib_demux_pv_ctx *ctx;
1562  struct mlx4_ib_demux_pv_qp *sqp;
1563  struct ib_wc wc;
1564  struct ib_grh *grh;
1565  struct ib_mad *mad;
1566 
1567  ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
1568  ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
1569 
1570  while (mlx4_ib_poll_cq(ctx->cq, 1, &wc) == 1) {
1571  sqp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
1572  if (wc.status == IB_WC_SUCCESS) {
1573  switch (wc.opcode) {
1574  case IB_WC_SEND:
1575  ib_destroy_ah(sqp->tx_ring[wc.wr_id &
1576  (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1577  sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1578  = NULL;
1579  spin_lock(&sqp->tx_lock);
1580  sqp->tx_ix_tail++;
1581  spin_unlock(&sqp->tx_lock);
1582  break;
1583  case IB_WC_RECV:
1584  mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *)
1585  (sqp->ring[wc.wr_id &
1586  (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload);
1587  grh = &(((struct mlx4_mad_rcv_buf *)
1588  (sqp->ring[wc.wr_id &
1589  (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh);
1590  mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad);
1591  if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id &
1592  (MLX4_NUM_TUNNEL_BUFS - 1)))
1593  pr_err("Failed reposting SQP "
1594  "buf:%lld\n", wc.wr_id);
1595  break;
1596  default:
1597  BUG_ON(1);
1598  break;
1599  }
1600  } else {
1601  pr_debug("mlx4_ib: completion error in tunnel: %d."
1602  " status = %d, wrid = 0x%llx\n",
1603  ctx->slave, wc.status, wc.wr_id);
1604  if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
1605  ib_destroy_ah(sqp->tx_ring[wc.wr_id &
1606  (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1607  sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1608  = NULL;
1609  spin_lock(&sqp->tx_lock);
1610  sqp->tx_ix_tail++;
1611  spin_unlock(&sqp->tx_lock);
1612  }
1613  }
1614  }
1615 }
1616 
1617 static int alloc_pv_object(struct mlx4_ib_dev *dev, int slave, int port,
1618  struct mlx4_ib_demux_pv_ctx **ret_ctx)
1619 {
1620  struct mlx4_ib_demux_pv_ctx *ctx;
1621 
1622  *ret_ctx = NULL;
1623  ctx = kzalloc(sizeof (struct mlx4_ib_demux_pv_ctx), GFP_KERNEL);
1624  if (!ctx) {
1625  pr_err("failed allocating pv resource context "
1626  "for port %d, slave %d\n", port, slave);
1627  return -ENOMEM;
1628  }
1629 
1630  ctx->ib_dev = &dev->ib_dev;
1631  ctx->port = port;
1632  ctx->slave = slave;
1633  *ret_ctx = ctx;
1634  return 0;
1635 }
1636 
1637 static void free_pv_object(struct mlx4_ib_dev *dev, int slave, int port)
1638 {
1639  if (dev->sriov.demux[port - 1].tun[slave]) {
1640  kfree(dev->sriov.demux[port - 1].tun[slave]);
1641  dev->sriov.demux[port - 1].tun[slave] = NULL;
1642  }
1643 }
1644 
1645 static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
1646  int create_tun, struct mlx4_ib_demux_pv_ctx *ctx)
1647 {
1648  int ret, cq_size;
1649 
1650  if (ctx->state != DEMUX_PV_STATE_DOWN)
1651  return -EEXIST;
1652 
1654  /* have QP0 only on port owner, and only if link layer is IB */
1655  if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) &&
1657  ctx->has_smi = 1;
1658 
1659  if (ctx->has_smi) {
1660  ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_SMI, create_tun);
1661  if (ret) {
1662  pr_err("Failed allocating qp0 tunnel bufs (%d)\n", ret);
1663  goto err_out;
1664  }
1665  }
1666 
1667  ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_GSI, create_tun);
1668  if (ret) {
1669  pr_err("Failed allocating qp1 tunnel bufs (%d)\n", ret);
1670  goto err_out_qp0;
1671  }
1672 
1673  cq_size = 2 * MLX4_NUM_TUNNEL_BUFS;
1674  if (ctx->has_smi)
1675  cq_size *= 2;
1676 
1677  ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler,
1678  NULL, ctx, cq_size, 0);
1679  if (IS_ERR(ctx->cq)) {
1680  ret = PTR_ERR(ctx->cq);
1681  pr_err("Couldn't create tunnel CQ (%d)\n", ret);
1682  goto err_buf;
1683  }
1684 
1685  ctx->pd = ib_alloc_pd(ctx->ib_dev);
1686  if (IS_ERR(ctx->pd)) {
1687  ret = PTR_ERR(ctx->pd);
1688  pr_err("Couldn't create tunnel PD (%d)\n", ret);
1689  goto err_cq;
1690  }
1691 
1692  ctx->mr = ib_get_dma_mr(ctx->pd, IB_ACCESS_LOCAL_WRITE);
1693  if (IS_ERR(ctx->mr)) {
1694  ret = PTR_ERR(ctx->mr);
1695  pr_err("Couldn't get tunnel DMA MR (%d)\n", ret);
1696  goto err_pd;
1697  }
1698 
1699  if (ctx->has_smi) {
1700  ret = create_pv_sqp(ctx, IB_QPT_SMI, create_tun);
1701  if (ret) {
1702  pr_err("Couldn't create %s QP0 (%d)\n",
1703  create_tun ? "tunnel for" : "", ret);
1704  goto err_mr;
1705  }
1706  }
1707 
1708  ret = create_pv_sqp(ctx, IB_QPT_GSI, create_tun);
1709  if (ret) {
1710  pr_err("Couldn't create %s QP1 (%d)\n",
1711  create_tun ? "tunnel for" : "", ret);
1712  goto err_qp0;
1713  }
1714 
1715  if (create_tun)
1716  INIT_WORK(&ctx->work, mlx4_ib_tunnel_comp_worker);
1717  else
1718  INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker);
1719 
1720  ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq;
1721 
1722  ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
1723  if (ret) {
1724  pr_err("Couldn't arm tunnel cq (%d)\n", ret);
1725  goto err_wq;
1726  }
1728  return 0;
1729 
1730 err_wq:
1731  ctx->wq = NULL;
1732  ib_destroy_qp(ctx->qp[1].qp);
1733  ctx->qp[1].qp = NULL;
1734 
1735 
1736 err_qp0:
1737  if (ctx->has_smi)
1738  ib_destroy_qp(ctx->qp[0].qp);
1739  ctx->qp[0].qp = NULL;
1740 
1741 err_mr:
1742  ib_dereg_mr(ctx->mr);
1743  ctx->mr = NULL;
1744 
1745 err_pd:
1746  ib_dealloc_pd(ctx->pd);
1747  ctx->pd = NULL;
1748 
1749 err_cq:
1750  ib_destroy_cq(ctx->cq);
1751  ctx->cq = NULL;
1752 
1753 err_buf:
1754  mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, create_tun);
1755 
1756 err_out_qp0:
1757  if (ctx->has_smi)
1758  mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, create_tun);
1759 err_out:
1760  ctx->state = DEMUX_PV_STATE_DOWN;
1761  return ret;
1762 }
1763 
1764 static void destroy_pv_resources(struct mlx4_ib_dev *dev, int slave, int port,
1765  struct mlx4_ib_demux_pv_ctx *ctx, int flush)
1766 {
1767  if (!ctx)
1768  return;
1769  if (ctx->state > DEMUX_PV_STATE_DOWN) {
1771  if (flush)
1772  flush_workqueue(ctx->wq);
1773  if (ctx->has_smi) {
1774  ib_destroy_qp(ctx->qp[0].qp);
1775  ctx->qp[0].qp = NULL;
1776  mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, 1);
1777  }
1778  ib_destroy_qp(ctx->qp[1].qp);
1779  ctx->qp[1].qp = NULL;
1780  mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, 1);
1781  ib_dereg_mr(ctx->mr);
1782  ctx->mr = NULL;
1783  ib_dealloc_pd(ctx->pd);
1784  ctx->pd = NULL;
1785  ib_destroy_cq(ctx->cq);
1786  ctx->cq = NULL;
1787  ctx->state = DEMUX_PV_STATE_DOWN;
1788  }
1789 }
1790 
1791 static int mlx4_ib_tunnels_update(struct mlx4_ib_dev *dev, int slave,
1792  int port, int do_init)
1793 {
1794  int ret = 0;
1795 
1796  if (!do_init) {
1797  clean_vf_mcast(&dev->sriov.demux[port - 1], slave);
1798  /* for master, destroy real sqp resources */
1799  if (slave == mlx4_master_func_num(dev->dev))
1800  destroy_pv_resources(dev, slave, port,
1801  dev->sriov.sqps[port - 1], 1);
1802  /* destroy the tunnel qp resources */
1803  destroy_pv_resources(dev, slave, port,
1804  dev->sriov.demux[port - 1].tun[slave], 1);
1805  return 0;
1806  }
1807 
1808  /* create the tunnel qp resources */
1809  ret = create_pv_resources(&dev->ib_dev, slave, port, 1,
1810  dev->sriov.demux[port - 1].tun[slave]);
1811 
1812  /* for master, create the real sqp resources */
1813  if (!ret && slave == mlx4_master_func_num(dev->dev))
1814  ret = create_pv_resources(&dev->ib_dev, slave, port, 0,
1815  dev->sriov.sqps[port - 1]);
1816  return ret;
1817 }
1818 
1820 {
1821  struct mlx4_ib_demux_work *dmxw;
1822 
1823  dmxw = container_of(work, struct mlx4_ib_demux_work, work);
1824  mlx4_ib_tunnels_update(dmxw->dev, dmxw->slave, (int) dmxw->port,
1825  dmxw->do_init);
1826  kfree(dmxw);
1827  return;
1828 }
1829 
1830 static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
1831  struct mlx4_ib_demux_ctx *ctx,
1832  int port)
1833 {
1834  char name[12];
1835  int ret = 0;
1836  int i;
1837 
1838  ctx->tun = kcalloc(dev->dev->caps.sqp_demux,
1839  sizeof (struct mlx4_ib_demux_pv_ctx *), GFP_KERNEL);
1840  if (!ctx->tun)
1841  return -ENOMEM;
1842 
1843  ctx->dev = dev;
1844  ctx->port = port;
1845  ctx->ib_dev = &dev->ib_dev;
1846 
1847  for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
1848  ret = alloc_pv_object(dev, i, port, &ctx->tun[i]);
1849  if (ret) {
1850  ret = -ENOMEM;
1851  goto err_mcg;
1852  }
1853  }
1854 
1855  ret = mlx4_ib_mcg_port_init(ctx);
1856  if (ret) {
1857  pr_err("Failed initializing mcg para-virt (%d)\n", ret);
1858  goto err_mcg;
1859  }
1860 
1861  snprintf(name, sizeof name, "mlx4_ibt%d", port);
1862  ctx->wq = create_singlethread_workqueue(name);
1863  if (!ctx->wq) {
1864  pr_err("Failed to create tunnelling WQ for port %d\n", port);
1865  ret = -ENOMEM;
1866  goto err_wq;
1867  }
1868 
1869  snprintf(name, sizeof name, "mlx4_ibud%d", port);
1870  ctx->ud_wq = create_singlethread_workqueue(name);
1871  if (!ctx->ud_wq) {
1872  pr_err("Failed to create up/down WQ for port %d\n", port);
1873  ret = -ENOMEM;
1874  goto err_udwq;
1875  }
1876 
1877  return 0;
1878 
1879 err_udwq:
1880  destroy_workqueue(ctx->wq);
1881  ctx->wq = NULL;
1882 
1883 err_wq:
1884  mlx4_ib_mcg_port_cleanup(ctx, 1);
1885 err_mcg:
1886  for (i = 0; i < dev->dev->caps.sqp_demux; i++)
1887  free_pv_object(dev, i, port);
1888  kfree(ctx->tun);
1889  ctx->tun = NULL;
1890  return ret;
1891 }
1892 
1893 static void mlx4_ib_free_sqp_ctx(struct mlx4_ib_demux_pv_ctx *sqp_ctx)
1894 {
1895  if (sqp_ctx->state > DEMUX_PV_STATE_DOWN) {
1896  sqp_ctx->state = DEMUX_PV_STATE_DOWNING;
1897  flush_workqueue(sqp_ctx->wq);
1898  if (sqp_ctx->has_smi) {
1899  ib_destroy_qp(sqp_ctx->qp[0].qp);
1900  sqp_ctx->qp[0].qp = NULL;
1901  mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_SMI, 0);
1902  }
1903  ib_destroy_qp(sqp_ctx->qp[1].qp);
1904  sqp_ctx->qp[1].qp = NULL;
1905  mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_GSI, 0);
1906  ib_dereg_mr(sqp_ctx->mr);
1907  sqp_ctx->mr = NULL;
1908  ib_dealloc_pd(sqp_ctx->pd);
1909  sqp_ctx->pd = NULL;
1910  ib_destroy_cq(sqp_ctx->cq);
1911  sqp_ctx->cq = NULL;
1912  sqp_ctx->state = DEMUX_PV_STATE_DOWN;
1913  }
1914 }
1915 
1916 static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx)
1917 {
1918  int i;
1919  if (ctx) {
1920  struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
1921  mlx4_ib_mcg_port_cleanup(ctx, 1);
1922  for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
1923  if (!ctx->tun[i])
1924  continue;
1925  if (ctx->tun[i]->state > DEMUX_PV_STATE_DOWN)
1926  ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING;
1927  }
1928  flush_workqueue(ctx->wq);
1929  for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
1930  destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0);
1931  free_pv_object(dev, i, ctx->port);
1932  }
1933  kfree(ctx->tun);
1934  destroy_workqueue(ctx->ud_wq);
1935  destroy_workqueue(ctx->wq);
1936  }
1937 }
1938 
1939 static void mlx4_ib_master_tunnels(struct mlx4_ib_dev *dev, int do_init)
1940 {
1941  int i;
1942 
1943  if (!mlx4_is_master(dev->dev))
1944  return;
1945  /* initialize or tear down tunnel QPs for the master */
1946  for (i = 0; i < dev->dev->caps.num_ports; i++)
1947  mlx4_ib_tunnels_update(dev, mlx4_master_func_num(dev->dev), i + 1, do_init);
1948  return;
1949 }
1950 
1952 {
1953  int i = 0;
1954  int err;
1955 
1956  if (!mlx4_is_mfunc(dev->dev))
1957  return 0;
1958 
1959  dev->sriov.is_going_down = 0;
1960  spin_lock_init(&dev->sriov.going_down_lock);
1962 
1963  mlx4_ib_warn(&dev->ib_dev, "multi-function enabled\n");
1964 
1965  if (mlx4_is_slave(dev->dev)) {
1966  mlx4_ib_warn(&dev->ib_dev, "operating in qp1 tunnel mode\n");
1967  return 0;
1968  }
1969 
1970  for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
1971  if (i == mlx4_master_func_num(dev->dev))
1972  mlx4_put_slave_node_guid(dev->dev, i, dev->ib_dev.node_guid);
1973  else
1975  }
1976 
1978  if (err) {
1979  mlx4_ib_warn(&dev->ib_dev, "Failed init alias guid process.\n");
1980  goto paravirt_err;
1981  }
1982  err = mlx4_ib_device_register_sysfs(dev);
1983  if (err) {
1984  mlx4_ib_warn(&dev->ib_dev, "Failed to register sysfs\n");
1985  goto sysfs_err;
1986  }
1987 
1988  mlx4_ib_warn(&dev->ib_dev, "initializing demux service for %d qp1 clients\n",
1989  dev->dev->caps.sqp_demux);
1990  for (i = 0; i < dev->num_ports; i++) {
1991  union ib_gid gid;
1992  err = __mlx4_ib_query_gid(&dev->ib_dev, i + 1, 0, &gid, 1);
1993  if (err)
1994  goto demux_err;
1995  dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id;
1996  err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1,
1997  &dev->sriov.sqps[i]);
1998  if (err)
1999  goto demux_err;
2000  err = mlx4_ib_alloc_demux_ctx(dev, &dev->sriov.demux[i], i + 1);
2001  if (err)
2002  goto demux_err;
2003  }
2004  mlx4_ib_master_tunnels(dev, 1);
2005  return 0;
2006 
2007 demux_err:
2008  while (i > 0) {
2009  free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1);
2010  mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
2011  --i;
2012  }
2014 
2015 sysfs_err:
2017 
2018 paravirt_err:
2019  mlx4_ib_cm_paravirt_clean(dev, -1);
2020 
2021  return err;
2022 }
2023 
2025 {
2026  int i;
2027  unsigned long flags;
2028 
2029  if (!mlx4_is_mfunc(dev->dev))
2030  return;
2031 
2032  spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
2033  dev->sriov.is_going_down = 1;
2034  spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
2035  if (mlx4_is_master(dev->dev)) {
2036  for (i = 0; i < dev->num_ports; i++) {
2037  flush_workqueue(dev->sriov.demux[i].ud_wq);
2038  mlx4_ib_free_sqp_ctx(dev->sriov.sqps[i]);
2039  kfree(dev->sriov.sqps[i]);
2040  dev->sriov.sqps[i] = NULL;
2041  mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
2042  }
2043 
2044  mlx4_ib_cm_paravirt_clean(dev, -1);
2047  }
2048 }