Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
qib_ud.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
3  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses. You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  * Redistribution and use in source and binary forms, with or
12  * without modification, are permitted provided that the following
13  * conditions are met:
14  *
15  * - Redistributions of source code must retain the above
16  * copyright notice, this list of conditions and the following
17  * disclaimer.
18  *
19  * - Redistributions in binary form must reproduce the above
20  * copyright notice, this list of conditions and the following
21  * disclaimer in the documentation and/or other materials
22  * provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #include <rdma/ib_smi.h>
35 
36 #include "qib.h"
37 #include "qib_mad.h"
38 
49 static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
50 {
51  struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
52  struct qib_pportdata *ppd;
53  struct qib_qp *qp;
54  struct ib_ah_attr *ah_attr;
55  unsigned long flags;
56  struct qib_sge_state ssge;
57  struct qib_sge *sge;
58  struct ib_wc wc;
59  u32 length;
60 
61  qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn);
62  if (!qp) {
63  ibp->n_pkt_drops++;
64  return;
65  }
66  if (qp->ibqp.qp_type != sqp->ibqp.qp_type ||
68  ibp->n_pkt_drops++;
69  goto drop;
70  }
71 
72  ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr;
73  ppd = ppd_from_ibp(ibp);
74 
75  if (qp->ibqp.qp_num > 1) {
76  u16 pkey1;
77  u16 pkey2;
78  u16 lid;
79 
80  pkey1 = qib_get_pkey(ibp, sqp->s_pkey_index);
81  pkey2 = qib_get_pkey(ibp, qp->s_pkey_index);
82  if (unlikely(!qib_pkey_ok(pkey1, pkey2))) {
83  lid = ppd->lid | (ah_attr->src_path_bits &
84  ((1 << ppd->lmc) - 1));
86  ah_attr->sl,
87  sqp->ibqp.qp_num, qp->ibqp.qp_num,
88  cpu_to_be16(lid),
89  cpu_to_be16(ah_attr->dlid));
90  goto drop;
91  }
92  }
93 
94  /*
95  * Check that the qkey matches (except for QP0, see 9.6.1.4.1).
96  * Qkeys with the high order bit set mean use the
97  * qkey from the QP context instead of the WR (see 10.2.5).
98  */
99  if (qp->ibqp.qp_num) {
100  u32 qkey;
101 
102  qkey = (int)swqe->wr.wr.ud.remote_qkey < 0 ?
103  sqp->qkey : swqe->wr.wr.ud.remote_qkey;
104  if (unlikely(qkey != qp->qkey)) {
105  u16 lid;
106 
107  lid = ppd->lid | (ah_attr->src_path_bits &
108  ((1 << ppd->lmc) - 1));
110  ah_attr->sl,
111  sqp->ibqp.qp_num, qp->ibqp.qp_num,
112  cpu_to_be16(lid),
113  cpu_to_be16(ah_attr->dlid));
114  goto drop;
115  }
116  }
117 
118  /*
119  * A GRH is expected to precede the data even if not
120  * present on the wire.
121  */
122  length = swqe->length;
123  memset(&wc, 0, sizeof wc);
124  wc.byte_len = length + sizeof(struct ib_grh);
125 
126  if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
127  wc.wc_flags = IB_WC_WITH_IMM;
128  wc.ex.imm_data = swqe->wr.ex.imm_data;
129  }
130 
131  spin_lock_irqsave(&qp->r_lock, flags);
132 
133  /*
134  * Get the next work request entry to find where to put the data.
135  */
136  if (qp->r_flags & QIB_R_REUSE_SGE)
137  qp->r_flags &= ~QIB_R_REUSE_SGE;
138  else {
139  int ret;
140 
141  ret = qib_get_rwqe(qp, 0);
142  if (ret < 0) {
144  goto bail_unlock;
145  }
146  if (!ret) {
147  if (qp->ibqp.qp_num == 0)
148  ibp->n_vl15_dropped++;
149  goto bail_unlock;
150  }
151  }
152  /* Silently drop packets which are too big. */
153  if (unlikely(wc.byte_len > qp->r_len)) {
154  qp->r_flags |= QIB_R_REUSE_SGE;
155  ibp->n_pkt_drops++;
156  goto bail_unlock;
157  }
158 
159  if (ah_attr->ah_flags & IB_AH_GRH) {
160  qib_copy_sge(&qp->r_sge, &ah_attr->grh,
161  sizeof(struct ib_grh), 1);
162  wc.wc_flags |= IB_WC_GRH;
163  } else
164  qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
165  ssge.sg_list = swqe->sg_list + 1;
166  ssge.sge = *swqe->sg_list;
167  ssge.num_sge = swqe->wr.num_sge;
168  sge = &ssge.sge;
169  while (length) {
170  u32 len = sge->length;
171 
172  if (len > length)
173  len = length;
174  if (len > sge->sge_length)
175  len = sge->sge_length;
176  BUG_ON(len == 0);
177  qib_copy_sge(&qp->r_sge, sge->vaddr, len, 1);
178  sge->vaddr += len;
179  sge->length -= len;
180  sge->sge_length -= len;
181  if (sge->sge_length == 0) {
182  if (--ssge.num_sge)
183  *sge = *ssge.sg_list++;
184  } else if (sge->length == 0 && sge->mr->lkey) {
185  if (++sge->n >= QIB_SEGSZ) {
186  if (++sge->m >= sge->mr->mapsz)
187  break;
188  sge->n = 0;
189  }
190  sge->vaddr =
191  sge->mr->map[sge->m]->segs[sge->n].vaddr;
192  sge->length =
193  sge->mr->map[sge->m]->segs[sge->n].length;
194  }
195  length -= len;
196  }
197  qib_put_ss(&qp->r_sge);
199  goto bail_unlock;
200  wc.wr_id = qp->r_wr_id;
201  wc.status = IB_WC_SUCCESS;
202  wc.opcode = IB_WC_RECV;
203  wc.qp = &qp->ibqp;
204  wc.src_qp = sqp->ibqp.qp_num;
205  wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ?
206  swqe->wr.wr.ud.pkey_index : 0;
207  wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1));
208  wc.sl = ah_attr->sl;
209  wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
210  wc.port_num = qp->port_num;
211  /* Signal completion event if the solicited bit is set. */
212  qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
213  swqe->wr.send_flags & IB_SEND_SOLICITED);
214  ibp->n_loop_pkts++;
215 bail_unlock:
216  spin_unlock_irqrestore(&qp->r_lock, flags);
217 drop:
218  if (atomic_dec_and_test(&qp->refcount))
219  wake_up(&qp->wait);
220 }
221 
228 int qib_make_ud_req(struct qib_qp *qp)
229 {
230  struct qib_other_headers *ohdr;
231  struct ib_ah_attr *ah_attr;
232  struct qib_pportdata *ppd;
233  struct qib_ibport *ibp;
234  struct qib_swqe *wqe;
235  unsigned long flags;
236  u32 nwords;
237  u32 extra_bytes;
238  u32 bth0;
239  u16 lrh0;
240  u16 lid;
241  int ret = 0;
242  int next_cur;
243 
244  spin_lock_irqsave(&qp->s_lock, flags);
245 
247  if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND))
248  goto bail;
249  /* We are in the error state, flush the work request. */
250  if (qp->s_last == qp->s_head)
251  goto bail;
252  /* If DMAs are in progress, we can't flush immediately. */
253  if (atomic_read(&qp->s_dma_busy)) {
254  qp->s_flags |= QIB_S_WAIT_DMA;
255  goto bail;
256  }
257  wqe = get_swqe_ptr(qp, qp->s_last);
259  goto done;
260  }
261 
262  if (qp->s_cur == qp->s_head)
263  goto bail;
264 
265  wqe = get_swqe_ptr(qp, qp->s_cur);
266  next_cur = qp->s_cur + 1;
267  if (next_cur >= qp->s_size)
268  next_cur = 0;
269 
270  /* Construct the header. */
271  ibp = to_iport(qp->ibqp.device, qp->port_num);
272  ppd = ppd_from_ibp(ibp);
273  ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
274  if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) {
275  if (ah_attr->dlid != QIB_PERMISSIVE_LID)
276  ibp->n_multicast_xmit++;
277  else
278  ibp->n_unicast_xmit++;
279  } else {
280  ibp->n_unicast_xmit++;
281  lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
282  if (unlikely(lid == ppd->lid)) {
283  /*
284  * If DMAs are in progress, we can't generate
285  * a completion for the loopback packet since
286  * it would be out of order.
287  * XXX Instead of waiting, we could queue a
288  * zero length descriptor so we get a callback.
289  */
290  if (atomic_read(&qp->s_dma_busy)) {
291  qp->s_flags |= QIB_S_WAIT_DMA;
292  goto bail;
293  }
294  qp->s_cur = next_cur;
295  spin_unlock_irqrestore(&qp->s_lock, flags);
296  qib_ud_loopback(qp, wqe);
297  spin_lock_irqsave(&qp->s_lock, flags);
299  goto done;
300  }
301  }
302 
303  qp->s_cur = next_cur;
304  extra_bytes = -wqe->length & 3;
305  nwords = (wqe->length + extra_bytes) >> 2;
306 
307  /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
308  qp->s_hdrwords = 7;
309  qp->s_cur_size = wqe->length;
310  qp->s_cur_sge = &qp->s_sge;
311  qp->s_srate = ah_attr->static_rate;
312  qp->s_wqe = wqe;
313  qp->s_sge.sge = wqe->sg_list[0];
314  qp->s_sge.sg_list = wqe->sg_list + 1;
315  qp->s_sge.num_sge = wqe->wr.num_sge;
316  qp->s_sge.total_len = wqe->length;
317 
318  if (ah_attr->ah_flags & IB_AH_GRH) {
319  /* Header size in 32-bit words. */
320  qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh,
321  &ah_attr->grh,
322  qp->s_hdrwords, nwords);
323  lrh0 = QIB_LRH_GRH;
324  ohdr = &qp->s_hdr->u.l.oth;
325  /*
326  * Don't worry about sending to locally attached multicast
327  * QPs. It is unspecified by the spec. what happens.
328  */
329  } else {
330  /* Header size in 32-bit words. */
331  lrh0 = QIB_LRH_BTH;
332  ohdr = &qp->s_hdr->u.oth;
333  }
334  if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
335  qp->s_hdrwords++;
336  ohdr->u.ud.imm_data = wqe->wr.ex.imm_data;
337  bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
338  } else
339  bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
340  lrh0 |= ah_attr->sl << 4;
341  if (qp->ibqp.qp_type == IB_QPT_SMI)
342  lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
343  else
344  lrh0 |= ibp->sl_to_vl[ah_attr->sl] << 12;
345  qp->s_hdr->lrh[0] = cpu_to_be16(lrh0);
346  qp->s_hdr->lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */
347  qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
348  lid = ppd->lid;
349  if (lid) {
350  lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
351  qp->s_hdr->lrh[3] = cpu_to_be16(lid);
352  } else
353  qp->s_hdr->lrh[3] = IB_LID_PERMISSIVE;
354  if (wqe->wr.send_flags & IB_SEND_SOLICITED)
355  bth0 |= IB_BTH_SOLICITED;
356  bth0 |= extra_bytes << 20;
357  bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? QIB_DEFAULT_P_KEY :
358  qib_get_pkey(ibp, qp->ibqp.qp_type == IB_QPT_GSI ?
359  wqe->wr.wr.ud.pkey_index : qp->s_pkey_index);
360  ohdr->bth[0] = cpu_to_be32(bth0);
361  /*
362  * Use the multicast QP if the destination LID is a multicast LID.
363  */
364  ohdr->bth[1] = ah_attr->dlid >= QIB_MULTICAST_LID_BASE &&
365  ah_attr->dlid != QIB_PERMISSIVE_LID ?
367  cpu_to_be32(wqe->wr.wr.ud.remote_qpn);
368  ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK);
369  /*
370  * Qkeys with the high order bit set mean use the
371  * qkey from the QP context instead of the WR (see 10.2.5).
372  */
373  ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ?
374  qp->qkey : wqe->wr.wr.ud.remote_qkey);
375  ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
376 
377 done:
378  ret = 1;
379  goto unlock;
380 
381 bail:
382  qp->s_flags &= ~QIB_S_BUSY;
383 unlock:
384  spin_unlock_irqrestore(&qp->s_lock, flags);
385  return ret;
386 }
387 
388 static unsigned qib_lookup_pkey(struct qib_ibport *ibp, u16 pkey)
389 {
390  struct qib_pportdata *ppd = ppd_from_ibp(ibp);
391  struct qib_devdata *dd = ppd->dd;
392  unsigned ctxt = ppd->hw_pidx;
393  unsigned i;
394 
395  pkey &= 0x7fff; /* remove limited/full membership bit */
396 
397  for (i = 0; i < ARRAY_SIZE(dd->rcd[ctxt]->pkeys); ++i)
398  if ((dd->rcd[ctxt]->pkeys[i] & 0x7fff) == pkey)
399  return i;
400 
401  /*
402  * Should not get here, this means hardware failed to validate pkeys.
403  * Punt and return index 0.
404  */
405  return 0;
406 }
407 
421 void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
422  int has_grh, void *data, u32 tlen, struct qib_qp *qp)
423 {
424  struct qib_other_headers *ohdr;
425  int opcode;
426  u32 hdrsize;
427  u32 pad;
428  struct ib_wc wc;
429  u32 qkey;
430  u32 src_qp;
431  u16 dlid;
432 
433  /* Check for GRH */
434  if (!has_grh) {
435  ohdr = &hdr->u.oth;
436  hdrsize = 8 + 12 + 8; /* LRH + BTH + DETH */
437  } else {
438  ohdr = &hdr->u.l.oth;
439  hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */
440  }
441  qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
442  src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & QIB_QPN_MASK;
443 
444  /*
445  * Get the number of bytes the message was padded by
446  * and drop incomplete packets.
447  */
448  pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
449  if (unlikely(tlen < (hdrsize + pad + 4)))
450  goto drop;
451 
452  tlen -= hdrsize + pad + 4;
453 
454  /*
455  * Check that the permissive LID is only used on QP0
456  * and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1).
457  */
458  if (qp->ibqp.qp_num) {
459  if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE ||
460  hdr->lrh[3] == IB_LID_PERMISSIVE))
461  goto drop;
462  if (qp->ibqp.qp_num > 1) {
463  u16 pkey1, pkey2;
464 
465  pkey1 = be32_to_cpu(ohdr->bth[0]);
466  pkey2 = qib_get_pkey(ibp, qp->s_pkey_index);
467  if (unlikely(!qib_pkey_ok(pkey1, pkey2))) {
469  pkey1,
470  (be16_to_cpu(hdr->lrh[0]) >> 4) &
471  0xF,
472  src_qp, qp->ibqp.qp_num,
473  hdr->lrh[3], hdr->lrh[1]);
474  return;
475  }
476  }
477  if (unlikely(qkey != qp->qkey)) {
479  (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
480  src_qp, qp->ibqp.qp_num,
481  hdr->lrh[3], hdr->lrh[1]);
482  return;
483  }
484  /* Drop invalid MAD packets (see 13.5.3.1). */
485  if (unlikely(qp->ibqp.qp_num == 1 &&
486  (tlen != 256 ||
487  (be16_to_cpu(hdr->lrh[0]) >> 12) == 15)))
488  goto drop;
489  } else {
490  struct ib_smp *smp;
491 
492  /* Drop invalid MAD packets (see 13.5.3.1). */
493  if (tlen != 256 || (be16_to_cpu(hdr->lrh[0]) >> 12) != 15)
494  goto drop;
495  smp = (struct ib_smp *) data;
496  if ((hdr->lrh[1] == IB_LID_PERMISSIVE ||
497  hdr->lrh[3] == IB_LID_PERMISSIVE) &&
499  goto drop;
500  }
501 
502  /*
503  * The opcode is in the low byte when its in network order
504  * (top byte when in host order).
505  */
506  opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
507  if (qp->ibqp.qp_num > 1 &&
508  opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
509  wc.ex.imm_data = ohdr->u.ud.imm_data;
511  tlen -= sizeof(u32);
512  } else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
513  wc.ex.imm_data = 0;
514  wc.wc_flags = 0;
515  } else
516  goto drop;
517 
518  /*
519  * A GRH is expected to precede the data even if not
520  * present on the wire.
521  */
522  wc.byte_len = tlen + sizeof(struct ib_grh);
523 
524  /*
525  * Get the next work request entry to find where to put the data.
526  */
527  if (qp->r_flags & QIB_R_REUSE_SGE)
528  qp->r_flags &= ~QIB_R_REUSE_SGE;
529  else {
530  int ret;
531 
532  ret = qib_get_rwqe(qp, 0);
533  if (ret < 0) {
535  return;
536  }
537  if (!ret) {
538  if (qp->ibqp.qp_num == 0)
539  ibp->n_vl15_dropped++;
540  return;
541  }
542  }
543  /* Silently drop packets which are too big. */
544  if (unlikely(wc.byte_len > qp->r_len)) {
545  qp->r_flags |= QIB_R_REUSE_SGE;
546  goto drop;
547  }
548  if (has_grh) {
549  qib_copy_sge(&qp->r_sge, &hdr->u.l.grh,
550  sizeof(struct ib_grh), 1);
551  wc.wc_flags |= IB_WC_GRH;
552  } else
553  qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
554  qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
555  qib_put_ss(&qp->r_sge);
557  return;
558  wc.wr_id = qp->r_wr_id;
559  wc.status = IB_WC_SUCCESS;
560  wc.opcode = IB_WC_RECV;
561  wc.vendor_err = 0;
562  wc.qp = &qp->ibqp;
563  wc.src_qp = src_qp;
564  wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ?
565  qib_lookup_pkey(ibp, be32_to_cpu(ohdr->bth[0])) : 0;
566  wc.slid = be16_to_cpu(hdr->lrh[3]);
567  wc.sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF;
568  dlid = be16_to_cpu(hdr->lrh[1]);
569  /*
570  * Save the LMC lower bits if the destination LID is a unicast LID.
571  */
572  wc.dlid_path_bits = dlid >= QIB_MULTICAST_LID_BASE ? 0 :
573  dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1);
574  wc.port_num = qp->port_num;
575  /* Signal completion event if the solicited bit is set. */
576  qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
577  (ohdr->bth[0] &
579  return;
580 
581 drop:
582  ibp->n_pkt_drops++;
583 }