Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
qib_verbs.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012 Intel Corporation. All rights reserved.
3  * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
4  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses. You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  * Redistribution and use in source and binary forms, with or
13  * without modification, are permitted provided that the following
14  * conditions are met:
15  *
16  * - Redistributions of source code must retain the above
17  * copyright notice, this list of conditions and the following
18  * disclaimer.
19  *
20  * - Redistributions in binary form must reproduce the above
21  * copyright notice, this list of conditions and the following
22  * disclaimer in the documentation and/or other materials
23  * provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34 
35 #include <rdma/ib_mad.h>
36 #include <rdma/ib_user_verbs.h>
37 #include <linux/io.h>
38 #include <linux/module.h>
39 #include <linux/utsname.h>
40 #include <linux/rculist.h>
41 #include <linux/mm.h>
42 #include <linux/random.h>
43 
44 #include "qib.h"
45 #include "qib_common.h"
46 
47 static unsigned int ib_qib_qp_table_size = 256;
48 module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO);
49 MODULE_PARM_DESC(qp_table_size, "QP table size");
50 
51 unsigned int ib_qib_lkey_table_size = 16;
53  S_IRUGO);
54 MODULE_PARM_DESC(lkey_table_size,
55  "LKEY table size in bits (2^n, 1 <= n <= 23)");
56 
57 static unsigned int ib_qib_max_pds = 0xFFFF;
58 module_param_named(max_pds, ib_qib_max_pds, uint, S_IRUGO);
60  "Maximum number of protection domains to support");
61 
62 static unsigned int ib_qib_max_ahs = 0xFFFF;
63 module_param_named(max_ahs, ib_qib_max_ahs, uint, S_IRUGO);
64 MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
65 
66 unsigned int ib_qib_max_cqes = 0x2FFFF;
68 MODULE_PARM_DESC(max_cqes,
69  "Maximum number of completion queue entries to support");
70 
71 unsigned int ib_qib_max_cqs = 0x1FFFF;
73 MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
74 
75 unsigned int ib_qib_max_qp_wrs = 0x3FFF;
77 MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
78 
79 unsigned int ib_qib_max_qps = 16384;
81 MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
82 
83 unsigned int ib_qib_max_sges = 0x60;
85 MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
86 
87 unsigned int ib_qib_max_mcast_grps = 16384;
89 MODULE_PARM_DESC(max_mcast_grps,
90  "Maximum number of multicast groups to support");
91 
92 unsigned int ib_qib_max_mcast_qp_attached = 16;
94  uint, S_IRUGO);
95 MODULE_PARM_DESC(max_mcast_qp_attached,
96  "Maximum number of attached QPs to support");
97 
98 unsigned int ib_qib_max_srqs = 1024;
100 MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
101 
102 unsigned int ib_qib_max_srq_sges = 128;
104 MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
105 
106 unsigned int ib_qib_max_srq_wrs = 0x1FFFF;
108 MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
109 
110 static unsigned int ib_qib_disable_sma;
111 module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO);
112 MODULE_PARM_DESC(disable_sma, "Disable the SMA");
113 
114 /*
115  * Note that it is OK to post send work requests in the SQE and ERR
116  * states; qib_do_send() will process them and generate error
117  * completions as per IB 1.2 C10-96.
118  */
119 const int ib_qib_state_ops[IB_QPS_ERR + 1] = {
120  [IB_QPS_RESET] = 0,
132 };
133 
134 struct qib_ucontext {
136 };
137 
138 static inline struct qib_ucontext *to_iucontext(struct ib_ucontext
139  *ibucontext)
140 {
141  return container_of(ibucontext, struct qib_ucontext, ibucontext);
142 }
143 
144 /*
145  * Translate ib_wr_opcode into ib_wc_opcode.
146  */
155 };
156 
157 /*
158  * System image GUID.
159  */
161 
168 void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release)
169 {
170  struct qib_sge *sge = &ss->sge;
171 
172  while (length) {
173  u32 len = sge->length;
174 
175  if (len > length)
176  len = length;
177  if (len > sge->sge_length)
178  len = sge->sge_length;
179  BUG_ON(len == 0);
180  memcpy(sge->vaddr, data, len);
181  sge->vaddr += len;
182  sge->length -= len;
183  sge->sge_length -= len;
184  if (sge->sge_length == 0) {
185  if (release)
186  qib_put_mr(sge->mr);
187  if (--ss->num_sge)
188  *sge = *ss->sg_list++;
189  } else if (sge->length == 0 && sge->mr->lkey) {
190  if (++sge->n >= QIB_SEGSZ) {
191  if (++sge->m >= sge->mr->mapsz)
192  break;
193  sge->n = 0;
194  }
195  sge->vaddr =
196  sge->mr->map[sge->m]->segs[sge->n].vaddr;
197  sge->length =
198  sge->mr->map[sge->m]->segs[sge->n].length;
199  }
200  data += len;
201  length -= len;
202  }
203 }
204 
211 {
212  struct qib_sge *sge = &ss->sge;
213 
214  while (length) {
215  u32 len = sge->length;
216 
217  if (len > length)
218  len = length;
219  if (len > sge->sge_length)
220  len = sge->sge_length;
221  BUG_ON(len == 0);
222  sge->vaddr += len;
223  sge->length -= len;
224  sge->sge_length -= len;
225  if (sge->sge_length == 0) {
226  if (release)
227  qib_put_mr(sge->mr);
228  if (--ss->num_sge)
229  *sge = *ss->sg_list++;
230  } else if (sge->length == 0 && sge->mr->lkey) {
231  if (++sge->n >= QIB_SEGSZ) {
232  if (++sge->m >= sge->mr->mapsz)
233  break;
234  sge->n = 0;
235  }
236  sge->vaddr =
237  sge->mr->map[sge->m]->segs[sge->n].vaddr;
238  sge->length =
239  sge->mr->map[sge->m]->segs[sge->n].length;
240  }
241  length -= len;
242  }
243 }
244 
245 /*
246  * Count the number of DMA descriptors needed to send length bytes of data.
247  * Don't modify the qib_sge_state to get the count.
248  * Return zero if any of the segments is not aligned.
249  */
250 static u32 qib_count_sge(struct qib_sge_state *ss, u32 length)
251 {
252  struct qib_sge *sg_list = ss->sg_list;
253  struct qib_sge sge = ss->sge;
254  u8 num_sge = ss->num_sge;
255  u32 ndesc = 1; /* count the header */
256 
257  while (length) {
258  u32 len = sge.length;
259 
260  if (len > length)
261  len = length;
262  if (len > sge.sge_length)
263  len = sge.sge_length;
264  BUG_ON(len == 0);
265  if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
266  (len != length && (len & (sizeof(u32) - 1)))) {
267  ndesc = 0;
268  break;
269  }
270  ndesc++;
271  sge.vaddr += len;
272  sge.length -= len;
273  sge.sge_length -= len;
274  if (sge.sge_length == 0) {
275  if (--num_sge)
276  sge = *sg_list++;
277  } else if (sge.length == 0 && sge.mr->lkey) {
278  if (++sge.n >= QIB_SEGSZ) {
279  if (++sge.m >= sge.mr->mapsz)
280  break;
281  sge.n = 0;
282  }
283  sge.vaddr =
284  sge.mr->map[sge.m]->segs[sge.n].vaddr;
285  sge.length =
286  sge.mr->map[sge.m]->segs[sge.n].length;
287  }
288  length -= len;
289  }
290  return ndesc;
291 }
292 
293 /*
294  * Copy from the SGEs to the data buffer.
295  */
296 static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length)
297 {
298  struct qib_sge *sge = &ss->sge;
299 
300  while (length) {
301  u32 len = sge->length;
302 
303  if (len > length)
304  len = length;
305  if (len > sge->sge_length)
306  len = sge->sge_length;
307  BUG_ON(len == 0);
308  memcpy(data, sge->vaddr, len);
309  sge->vaddr += len;
310  sge->length -= len;
311  sge->sge_length -= len;
312  if (sge->sge_length == 0) {
313  if (--ss->num_sge)
314  *sge = *ss->sg_list++;
315  } else if (sge->length == 0 && sge->mr->lkey) {
316  if (++sge->n >= QIB_SEGSZ) {
317  if (++sge->m >= sge->mr->mapsz)
318  break;
319  sge->n = 0;
320  }
321  sge->vaddr =
322  sge->mr->map[sge->m]->segs[sge->n].vaddr;
323  sge->length =
324  sge->mr->map[sge->m]->segs[sge->n].length;
325  }
326  data += len;
327  length -= len;
328  }
329 }
330 
336 static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
337  int *scheduled)
338 {
339  struct qib_swqe *wqe;
340  u32 next;
341  int i;
342  int j;
343  int acc;
344  int ret;
345  unsigned long flags;
346  struct qib_lkey_table *rkt;
347  struct qib_pd *pd;
348 
349  spin_lock_irqsave(&qp->s_lock, flags);
350 
351  /* Check that state is OK to post send. */
353  goto bail_inval;
354 
355  /* IB spec says that num_sge == 0 is OK. */
356  if (wr->num_sge > qp->s_max_sge)
357  goto bail_inval;
358 
359  /*
360  * Don't allow RDMA reads or atomic operations on UC or
361  * undefined operations.
362  * Make sure buffer is large enough to hold the result for atomics.
363  */
364  if (wr->opcode == IB_WR_FAST_REG_MR) {
365  if (qib_fast_reg_mr(qp, wr))
366  goto bail_inval;
367  } else if (qp->ibqp.qp_type == IB_QPT_UC) {
368  if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
369  goto bail_inval;
370  } else if (qp->ibqp.qp_type != IB_QPT_RC) {
371  /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */
372  if (wr->opcode != IB_WR_SEND &&
374  goto bail_inval;
375  /* Check UD destination address PD */
376  if (qp->ibqp.pd != wr->wr.ud.ah->pd)
377  goto bail_inval;
378  } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
379  goto bail_inval;
380  else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
381  (wr->num_sge == 0 ||
382  wr->sg_list[0].length < sizeof(u64) ||
383  wr->sg_list[0].addr & (sizeof(u64) - 1)))
384  goto bail_inval;
385  else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
386  goto bail_inval;
387 
388  next = qp->s_head + 1;
389  if (next >= qp->s_size)
390  next = 0;
391  if (next == qp->s_last) {
392  ret = -ENOMEM;
393  goto bail;
394  }
395 
396  rkt = &to_idev(qp->ibqp.device)->lk_table;
397  pd = to_ipd(qp->ibqp.pd);
398  wqe = get_swqe_ptr(qp, qp->s_head);
399  wqe->wr = *wr;
400  wqe->length = 0;
401  j = 0;
402  if (wr->num_sge) {
403  acc = wr->opcode >= IB_WR_RDMA_READ ?
405  for (i = 0; i < wr->num_sge; i++) {
406  u32 length = wr->sg_list[i].length;
407  int ok;
408 
409  if (length == 0)
410  continue;
411  ok = qib_lkey_ok(rkt, pd, &wqe->sg_list[j],
412  &wr->sg_list[i], acc);
413  if (!ok)
414  goto bail_inval_free;
415  wqe->length += length;
416  j++;
417  }
418  wqe->wr.num_sge = j;
419  }
420  if (qp->ibqp.qp_type == IB_QPT_UC ||
421  qp->ibqp.qp_type == IB_QPT_RC) {
422  if (wqe->length > 0x80000000U)
423  goto bail_inval_free;
424  } else if (wqe->length > (dd_from_ibdev(qp->ibqp.device)->pport +
425  qp->port_num - 1)->ibmtu)
426  goto bail_inval_free;
427  else
428  atomic_inc(&to_iah(wr->wr.ud.ah)->refcount);
429  wqe->ssn = qp->s_ssn++;
430  qp->s_head = next;
431 
432  ret = 0;
433  goto bail;
434 
435 bail_inval_free:
436  while (j) {
437  struct qib_sge *sge = &wqe->sg_list[--j];
438 
439  qib_put_mr(sge->mr);
440  }
441 bail_inval:
442  ret = -EINVAL;
443 bail:
444  if (!ret && !wr->next &&
445  !qib_sdma_empty(
446  dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) {
447  qib_schedule_send(qp);
448  *scheduled = 1;
449  }
450  spin_unlock_irqrestore(&qp->s_lock, flags);
451  return ret;
452 }
453 
462 static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
463  struct ib_send_wr **bad_wr)
464 {
465  struct qib_qp *qp = to_iqp(ibqp);
466  int err = 0;
467  int scheduled = 0;
468 
469  for (; wr; wr = wr->next) {
470  err = qib_post_one_send(qp, wr, &scheduled);
471  if (err) {
472  *bad_wr = wr;
473  goto bail;
474  }
475  }
476 
477  /* Try to do the send work in the caller's context. */
478  if (!scheduled)
479  qib_do_send(&qp->s_work);
480 
481 bail:
482  return err;
483 }
484 
493 static int qib_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
494  struct ib_recv_wr **bad_wr)
495 {
496  struct qib_qp *qp = to_iqp(ibqp);
497  struct qib_rwq *wq = qp->r_rq.wq;
498  unsigned long flags;
499  int ret;
500 
501  /* Check that state is OK to post receive. */
502  if (!(ib_qib_state_ops[qp->state] & QIB_POST_RECV_OK) || !wq) {
503  *bad_wr = wr;
504  ret = -EINVAL;
505  goto bail;
506  }
507 
508  for (; wr; wr = wr->next) {
509  struct qib_rwqe *wqe;
510  u32 next;
511  int i;
512 
513  if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
514  *bad_wr = wr;
515  ret = -EINVAL;
516  goto bail;
517  }
518 
519  spin_lock_irqsave(&qp->r_rq.lock, flags);
520  next = wq->head + 1;
521  if (next >= qp->r_rq.size)
522  next = 0;
523  if (next == wq->tail) {
524  spin_unlock_irqrestore(&qp->r_rq.lock, flags);
525  *bad_wr = wr;
526  ret = -ENOMEM;
527  goto bail;
528  }
529 
530  wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
531  wqe->wr_id = wr->wr_id;
532  wqe->num_sge = wr->num_sge;
533  for (i = 0; i < wr->num_sge; i++)
534  wqe->sg_list[i] = wr->sg_list[i];
535  /* Make sure queue entry is written before the head index. */
536  smp_wmb();
537  wq->head = next;
538  spin_unlock_irqrestore(&qp->r_rq.lock, flags);
539  }
540  ret = 0;
541 
542 bail:
543  return ret;
544 }
545 
559 static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
560  int has_grh, void *data, u32 tlen, struct qib_qp *qp)
561 {
562  struct qib_ibport *ibp = &rcd->ppd->ibport_data;
563 
564  spin_lock(&qp->r_lock);
565 
566  /* Check for valid receive state. */
568  ibp->n_pkt_drops++;
569  goto unlock;
570  }
571 
572  switch (qp->ibqp.qp_type) {
573  case IB_QPT_SMI:
574  case IB_QPT_GSI:
575  if (ib_qib_disable_sma)
576  break;
577  /* FALLTHROUGH */
578  case IB_QPT_UD:
579  qib_ud_rcv(ibp, hdr, has_grh, data, tlen, qp);
580  break;
581 
582  case IB_QPT_RC:
583  qib_rc_rcv(rcd, hdr, has_grh, data, tlen, qp);
584  break;
585 
586  case IB_QPT_UC:
587  qib_uc_rcv(ibp, hdr, has_grh, data, tlen, qp);
588  break;
589 
590  default:
591  break;
592  }
593 
594 unlock:
595  spin_unlock(&qp->r_lock);
596 }
597 
608 void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
609 {
610  struct qib_pportdata *ppd = rcd->ppd;
611  struct qib_ibport *ibp = &ppd->ibport_data;
612  struct qib_ib_header *hdr = rhdr;
613  struct qib_other_headers *ohdr;
614  struct qib_qp *qp;
615  u32 qp_num;
616  int lnh;
617  u8 opcode;
618  u16 lid;
619 
620  /* 24 == LRH+BTH+CRC */
621  if (unlikely(tlen < 24))
622  goto drop;
623 
624  /* Check for a valid destination LID (see ch. 7.11.1). */
625  lid = be16_to_cpu(hdr->lrh[1]);
626  if (lid < QIB_MULTICAST_LID_BASE) {
627  lid &= ~((1 << ppd->lmc) - 1);
628  if (unlikely(lid != ppd->lid))
629  goto drop;
630  }
631 
632  /* Check for GRH */
633  lnh = be16_to_cpu(hdr->lrh[0]) & 3;
634  if (lnh == QIB_LRH_BTH)
635  ohdr = &hdr->u.oth;
636  else if (lnh == QIB_LRH_GRH) {
637  u32 vtf;
638 
639  ohdr = &hdr->u.l.oth;
640  if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
641  goto drop;
642  vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
643  if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
644  goto drop;
645  } else
646  goto drop;
647 
648  opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
649  ibp->opstats[opcode & 0x7f].n_bytes += tlen;
650  ibp->opstats[opcode & 0x7f].n_packets++;
651 
652  /* Get the destination QP number. */
653  qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
654  if (qp_num == QIB_MULTICAST_QPN) {
655  struct qib_mcast *mcast;
656  struct qib_mcast_qp *p;
657 
658  if (lnh != QIB_LRH_GRH)
659  goto drop;
660  mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid);
661  if (mcast == NULL)
662  goto drop;
663  ibp->n_multicast_rcv++;
664  list_for_each_entry_rcu(p, &mcast->qp_list, list)
665  qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp);
666  /*
667  * Notify qib_multicast_detach() if it is waiting for us
668  * to finish.
669  */
670  if (atomic_dec_return(&mcast->refcount) <= 1)
671  wake_up(&mcast->wait);
672  } else {
673  if (rcd->lookaside_qp) {
674  if (rcd->lookaside_qpn != qp_num) {
676  &rcd->lookaside_qp->refcount))
677  wake_up(
678  &rcd->lookaside_qp->wait);
679  rcd->lookaside_qp = NULL;
680  }
681  }
682  if (!rcd->lookaside_qp) {
683  qp = qib_lookup_qpn(ibp, qp_num);
684  if (!qp)
685  goto drop;
686  rcd->lookaside_qp = qp;
687  rcd->lookaside_qpn = qp_num;
688  } else
689  qp = rcd->lookaside_qp;
690  ibp->n_unicast_rcv++;
691  qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp);
692  }
693  return;
694 
695 drop:
696  ibp->n_pkt_drops++;
697 }
698 
699 /*
700  * This is called from a timer to check for QPs
701  * which need kernel memory in order to send a packet.
702  */
703 static void mem_timer(unsigned long data)
704 {
705  struct qib_ibdev *dev = (struct qib_ibdev *) data;
706  struct list_head *list = &dev->memwait;
707  struct qib_qp *qp = NULL;
708  unsigned long flags;
709 
710  spin_lock_irqsave(&dev->pending_lock, flags);
711  if (!list_empty(list)) {
712  qp = list_entry(list->next, struct qib_qp, iowait);
713  list_del_init(&qp->iowait);
714  atomic_inc(&qp->refcount);
715  if (!list_empty(list))
716  mod_timer(&dev->mem_timer, jiffies + 1);
717  }
718  spin_unlock_irqrestore(&dev->pending_lock, flags);
719 
720  if (qp) {
721  spin_lock_irqsave(&qp->s_lock, flags);
722  if (qp->s_flags & QIB_S_WAIT_KMEM) {
723  qp->s_flags &= ~QIB_S_WAIT_KMEM;
724  qib_schedule_send(qp);
725  }
726  spin_unlock_irqrestore(&qp->s_lock, flags);
727  if (atomic_dec_and_test(&qp->refcount))
728  wake_up(&qp->wait);
729  }
730 }
731 
732 static void update_sge(struct qib_sge_state *ss, u32 length)
733 {
734  struct qib_sge *sge = &ss->sge;
735 
736  sge->vaddr += length;
737  sge->length -= length;
738  sge->sge_length -= length;
739  if (sge->sge_length == 0) {
740  if (--ss->num_sge)
741  *sge = *ss->sg_list++;
742  } else if (sge->length == 0 && sge->mr->lkey) {
743  if (++sge->n >= QIB_SEGSZ) {
744  if (++sge->m >= sge->mr->mapsz)
745  return;
746  sge->n = 0;
747  }
748  sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
749  sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
750  }
751 }
752 
753 #ifdef __LITTLE_ENDIAN
754 static inline u32 get_upper_bits(u32 data, u32 shift)
755 {
756  return data >> shift;
757 }
758 
759 static inline u32 set_upper_bits(u32 data, u32 shift)
760 {
761  return data << shift;
762 }
763 
764 static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
765 {
766  data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
767  data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
768  return data;
769 }
770 #else
771 static inline u32 get_upper_bits(u32 data, u32 shift)
772 {
773  return data << shift;
774 }
775 
776 static inline u32 set_upper_bits(u32 data, u32 shift)
777 {
778  return data >> shift;
779 }
780 
781 static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
782 {
783  data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
784  data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
785  return data;
786 }
787 #endif
788 
789 static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
790  u32 length, unsigned flush_wc)
791 {
792  u32 extra = 0;
793  u32 data = 0;
794  u32 last;
795 
796  while (1) {
797  u32 len = ss->sge.length;
798  u32 off;
799 
800  if (len > length)
801  len = length;
802  if (len > ss->sge.sge_length)
803  len = ss->sge.sge_length;
804  BUG_ON(len == 0);
805  /* If the source address is not aligned, try to align it. */
806  off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
807  if (off) {
808  u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
809  ~(sizeof(u32) - 1));
810  u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
811  u32 y;
812 
813  y = sizeof(u32) - off;
814  if (len > y)
815  len = y;
816  if (len + extra >= sizeof(u32)) {
817  data |= set_upper_bits(v, extra *
818  BITS_PER_BYTE);
819  len = sizeof(u32) - extra;
820  if (len == length) {
821  last = data;
822  break;
823  }
824  __raw_writel(data, piobuf);
825  piobuf++;
826  extra = 0;
827  data = 0;
828  } else {
829  /* Clear unused upper bytes */
830  data |= clear_upper_bytes(v, len, extra);
831  if (len == length) {
832  last = data;
833  break;
834  }
835  extra += len;
836  }
837  } else if (extra) {
838  /* Source address is aligned. */
839  u32 *addr = (u32 *) ss->sge.vaddr;
840  int shift = extra * BITS_PER_BYTE;
841  int ushift = 32 - shift;
842  u32 l = len;
843 
844  while (l >= sizeof(u32)) {
845  u32 v = *addr;
846 
847  data |= set_upper_bits(v, shift);
848  __raw_writel(data, piobuf);
849  data = get_upper_bits(v, ushift);
850  piobuf++;
851  addr++;
852  l -= sizeof(u32);
853  }
854  /*
855  * We still have 'extra' number of bytes leftover.
856  */
857  if (l) {
858  u32 v = *addr;
859 
860  if (l + extra >= sizeof(u32)) {
861  data |= set_upper_bits(v, shift);
862  len -= l + extra - sizeof(u32);
863  if (len == length) {
864  last = data;
865  break;
866  }
867  __raw_writel(data, piobuf);
868  piobuf++;
869  extra = 0;
870  data = 0;
871  } else {
872  /* Clear unused upper bytes */
873  data |= clear_upper_bytes(v, l, extra);
874  if (len == length) {
875  last = data;
876  break;
877  }
878  extra += l;
879  }
880  } else if (len == length) {
881  last = data;
882  break;
883  }
884  } else if (len == length) {
885  u32 w;
886 
887  /*
888  * Need to round up for the last dword in the
889  * packet.
890  */
891  w = (len + 3) >> 2;
892  qib_pio_copy(piobuf, ss->sge.vaddr, w - 1);
893  piobuf += w - 1;
894  last = ((u32 *) ss->sge.vaddr)[w - 1];
895  break;
896  } else {
897  u32 w = len >> 2;
898 
899  qib_pio_copy(piobuf, ss->sge.vaddr, w);
900  piobuf += w;
901 
902  extra = len & (sizeof(u32) - 1);
903  if (extra) {
904  u32 v = ((u32 *) ss->sge.vaddr)[w];
905 
906  /* Clear unused upper bytes */
907  data = clear_upper_bytes(v, extra, 0);
908  }
909  }
910  update_sge(ss, len);
911  length -= len;
912  }
913  /* Update address before sending packet. */
914  update_sge(ss, length);
915  if (flush_wc) {
916  /* must flush early everything before trigger word */
917  qib_flush_wc();
918  __raw_writel(last, piobuf);
919  /* be sure trigger word is written */
920  qib_flush_wc();
921  } else
922  __raw_writel(last, piobuf);
923 }
924 
925 static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev,
926  struct qib_qp *qp)
927 {
928  struct qib_verbs_txreq *tx;
929  unsigned long flags;
930 
931  spin_lock_irqsave(&qp->s_lock, flags);
932  spin_lock(&dev->pending_lock);
933 
934  if (!list_empty(&dev->txreq_free)) {
935  struct list_head *l = dev->txreq_free.next;
936 
937  list_del(l);
938  spin_unlock(&dev->pending_lock);
939  spin_unlock_irqrestore(&qp->s_lock, flags);
940  tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
941  } else {
942  if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK &&
943  list_empty(&qp->iowait)) {
944  dev->n_txwait++;
945  qp->s_flags |= QIB_S_WAIT_TX;
946  list_add_tail(&qp->iowait, &dev->txwait);
947  }
948  qp->s_flags &= ~QIB_S_BUSY;
949  spin_unlock(&dev->pending_lock);
950  spin_unlock_irqrestore(&qp->s_lock, flags);
951  tx = ERR_PTR(-EBUSY);
952  }
953  return tx;
954 }
955 
956 static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev,
957  struct qib_qp *qp)
958 {
959  struct qib_verbs_txreq *tx;
960  unsigned long flags;
961 
962  spin_lock_irqsave(&dev->pending_lock, flags);
963  /* assume the list non empty */
964  if (likely(!list_empty(&dev->txreq_free))) {
965  struct list_head *l = dev->txreq_free.next;
966 
967  list_del(l);
968  spin_unlock_irqrestore(&dev->pending_lock, flags);
969  tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
970  } else {
971  /* call slow path to get the extra lock */
972  spin_unlock_irqrestore(&dev->pending_lock, flags);
973  tx = __get_txreq(dev, qp);
974  }
975  return tx;
976 }
977 
979 {
980  struct qib_ibdev *dev;
981  struct qib_qp *qp;
982  unsigned long flags;
983 
984  qp = tx->qp;
985  dev = to_idev(qp->ibqp.device);
986 
987  if (atomic_dec_and_test(&qp->refcount))
988  wake_up(&qp->wait);
989  if (tx->mr) {
990  qib_put_mr(tx->mr);
991  tx->mr = NULL;
992  }
993  if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) {
994  tx->txreq.flags &= ~QIB_SDMA_TXREQ_F_FREEBUF;
995  dma_unmap_single(&dd_from_dev(dev)->pcidev->dev,
996  tx->txreq.addr, tx->hdr_dwords << 2,
997  DMA_TO_DEVICE);
998  kfree(tx->align_buf);
999  }
1000 
1001  spin_lock_irqsave(&dev->pending_lock, flags);
1002 
1003  /* Put struct back on free list */
1004  list_add(&tx->txreq.list, &dev->txreq_free);
1005 
1006  if (!list_empty(&dev->txwait)) {
1007  /* Wake up first QP wanting a free struct */
1008  qp = list_entry(dev->txwait.next, struct qib_qp, iowait);
1009  list_del_init(&qp->iowait);
1010  atomic_inc(&qp->refcount);
1011  spin_unlock_irqrestore(&dev->pending_lock, flags);
1012 
1013  spin_lock_irqsave(&qp->s_lock, flags);
1014  if (qp->s_flags & QIB_S_WAIT_TX) {
1015  qp->s_flags &= ~QIB_S_WAIT_TX;
1016  qib_schedule_send(qp);
1017  }
1018  spin_unlock_irqrestore(&qp->s_lock, flags);
1019 
1020  if (atomic_dec_and_test(&qp->refcount))
1021  wake_up(&qp->wait);
1022  } else
1023  spin_unlock_irqrestore(&dev->pending_lock, flags);
1024 }
1025 
1026 /*
1027  * This is called when there are send DMA descriptors that might be
1028  * available.
1029  *
1030  * This is called with ppd->sdma_lock held.
1031  */
1032 void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail)
1033 {
1034  struct qib_qp *qp, *nqp;
1035  struct qib_qp *qps[20];
1036  struct qib_ibdev *dev;
1037  unsigned i, n;
1038 
1039  n = 0;
1040  dev = &ppd->dd->verbs_dev;
1041  spin_lock(&dev->pending_lock);
1042 
1043  /* Search wait list for first QP wanting DMA descriptors. */
1044  list_for_each_entry_safe(qp, nqp, &dev->dmawait, iowait) {
1045  if (qp->port_num != ppd->port)
1046  continue;
1047  if (n == ARRAY_SIZE(qps))
1048  break;
1049  if (qp->s_tx->txreq.sg_count > avail)
1050  break;
1051  avail -= qp->s_tx->txreq.sg_count;
1052  list_del_init(&qp->iowait);
1053  atomic_inc(&qp->refcount);
1054  qps[n++] = qp;
1055  }
1056 
1057  spin_unlock(&dev->pending_lock);
1058 
1059  for (i = 0; i < n; i++) {
1060  qp = qps[i];
1061  spin_lock(&qp->s_lock);
1062  if (qp->s_flags & QIB_S_WAIT_DMA_DESC) {
1063  qp->s_flags &= ~QIB_S_WAIT_DMA_DESC;
1064  qib_schedule_send(qp);
1065  }
1066  spin_unlock(&qp->s_lock);
1067  if (atomic_dec_and_test(&qp->refcount))
1068  wake_up(&qp->wait);
1069  }
1070 }
1071 
1072 /*
1073  * This is called with ppd->sdma_lock held.
1074  */
1075 static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
1076 {
1077  struct qib_verbs_txreq *tx =
1078  container_of(cookie, struct qib_verbs_txreq, txreq);
1079  struct qib_qp *qp = tx->qp;
1080 
1081  spin_lock(&qp->s_lock);
1082  if (tx->wqe)
1084  else if (qp->ibqp.qp_type == IB_QPT_RC) {
1085  struct qib_ib_header *hdr;
1086 
1087  if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF)
1088  hdr = &tx->align_buf->hdr;
1089  else {
1090  struct qib_ibdev *dev = to_idev(qp->ibqp.device);
1091 
1092  hdr = &dev->pio_hdrs[tx->hdr_inx].hdr;
1093  }
1094  qib_rc_send_complete(qp, hdr);
1095  }
1096  if (atomic_dec_and_test(&qp->s_dma_busy)) {
1097  if (qp->state == IB_QPS_RESET)
1098  wake_up(&qp->wait_dma);
1099  else if (qp->s_flags & QIB_S_WAIT_DMA) {
1100  qp->s_flags &= ~QIB_S_WAIT_DMA;
1101  qib_schedule_send(qp);
1102  }
1103  }
1104  spin_unlock(&qp->s_lock);
1105 
1106  qib_put_txreq(tx);
1107 }
1108 
1109 static int wait_kmem(struct qib_ibdev *dev, struct qib_qp *qp)
1110 {
1111  unsigned long flags;
1112  int ret = 0;
1113 
1114  spin_lock_irqsave(&qp->s_lock, flags);
1115  if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
1116  spin_lock(&dev->pending_lock);
1117  if (list_empty(&qp->iowait)) {
1118  if (list_empty(&dev->memwait))
1119  mod_timer(&dev->mem_timer, jiffies + 1);
1120  qp->s_flags |= QIB_S_WAIT_KMEM;
1121  list_add_tail(&qp->iowait, &dev->memwait);
1122  }
1123  spin_unlock(&dev->pending_lock);
1124  qp->s_flags &= ~QIB_S_BUSY;
1125  ret = -EBUSY;
1126  }
1127  spin_unlock_irqrestore(&qp->s_lock, flags);
1128 
1129  return ret;
1130 }
1131 
1132 static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
1133  u32 hdrwords, struct qib_sge_state *ss, u32 len,
1134  u32 plen, u32 dwords)
1135 {
1136  struct qib_ibdev *dev = to_idev(qp->ibqp.device);
1137  struct qib_devdata *dd = dd_from_dev(dev);
1138  struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
1139  struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1140  struct qib_verbs_txreq *tx;
1141  struct qib_pio_header *phdr;
1142  u32 control;
1143  u32 ndesc;
1144  int ret;
1145 
1146  tx = qp->s_tx;
1147  if (tx) {
1148  qp->s_tx = NULL;
1149  /* resend previously constructed packet */
1150  ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx);
1151  goto bail;
1152  }
1153 
1154  tx = get_txreq(dev, qp);
1155  if (IS_ERR(tx))
1156  goto bail_tx;
1157 
1158  control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
1159  be16_to_cpu(hdr->lrh[0]) >> 12);
1160  tx->qp = qp;
1161  atomic_inc(&qp->refcount);
1162  tx->wqe = qp->s_wqe;
1163  tx->mr = qp->s_rdma_mr;
1164  if (qp->s_rdma_mr)
1165  qp->s_rdma_mr = NULL;
1166  tx->txreq.callback = sdma_complete;
1167  if (dd->flags & QIB_HAS_SDMA_TIMEOUT)
1168  tx->txreq.flags = QIB_SDMA_TXREQ_F_HEADTOHOST;
1169  else
1170  tx->txreq.flags = QIB_SDMA_TXREQ_F_INTREQ;
1171  if (plen + 1 > dd->piosize2kmax_dwords)
1172  tx->txreq.flags |= QIB_SDMA_TXREQ_F_USELARGEBUF;
1173 
1174  if (len) {
1175  /*
1176  * Don't try to DMA if it takes more descriptors than
1177  * the queue holds.
1178  */
1179  ndesc = qib_count_sge(ss, len);
1180  if (ndesc >= ppd->sdma_descq_cnt)
1181  ndesc = 0;
1182  } else
1183  ndesc = 1;
1184  if (ndesc) {
1185  phdr = &dev->pio_hdrs[tx->hdr_inx];
1186  phdr->pbc[0] = cpu_to_le32(plen);
1187  phdr->pbc[1] = cpu_to_le32(control);
1188  memcpy(&phdr->hdr, hdr, hdrwords << 2);
1189  tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEDESC;
1190  tx->txreq.sg_count = ndesc;
1191  tx->txreq.addr = dev->pio_hdrs_phys +
1192  tx->hdr_inx * sizeof(struct qib_pio_header);
1193  tx->hdr_dwords = hdrwords + 2; /* add PBC length */
1194  ret = qib_sdma_verbs_send(ppd, ss, dwords, tx);
1195  goto bail;
1196  }
1197 
1198  /* Allocate a buffer and copy the header and payload to it. */
1199  tx->hdr_dwords = plen + 1;
1200  phdr = kmalloc(tx->hdr_dwords << 2, GFP_ATOMIC);
1201  if (!phdr)
1202  goto err_tx;
1203  phdr->pbc[0] = cpu_to_le32(plen);
1204  phdr->pbc[1] = cpu_to_le32(control);
1205  memcpy(&phdr->hdr, hdr, hdrwords << 2);
1206  qib_copy_from_sge((u32 *) &phdr->hdr + hdrwords, ss, len);
1207 
1208  tx->txreq.addr = dma_map_single(&dd->pcidev->dev, phdr,
1209  tx->hdr_dwords << 2, DMA_TO_DEVICE);
1210  if (dma_mapping_error(&dd->pcidev->dev, tx->txreq.addr))
1211  goto map_err;
1212  tx->align_buf = phdr;
1213  tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEBUF;
1214  tx->txreq.sg_count = 1;
1215  ret = qib_sdma_verbs_send(ppd, NULL, 0, tx);
1216  goto unaligned;
1217 
1218 map_err:
1219  kfree(phdr);
1220 err_tx:
1221  qib_put_txreq(tx);
1222  ret = wait_kmem(dev, qp);
1223 unaligned:
1224  ibp->n_unaligned++;
1225 bail:
1226  return ret;
1227 bail_tx:
1228  ret = PTR_ERR(tx);
1229  goto bail;
1230 }
1231 
1232 /*
1233  * If we are now in the error state, return zero to flush the
1234  * send work request.
1235  */
1236 static int no_bufs_available(struct qib_qp *qp)
1237 {
1238  struct qib_ibdev *dev = to_idev(qp->ibqp.device);
1239  struct qib_devdata *dd;
1240  unsigned long flags;
1241  int ret = 0;
1242 
1243  /*
1244  * Note that as soon as want_buffer() is called and
1245  * possibly before it returns, qib_ib_piobufavail()
1246  * could be called. Therefore, put QP on the I/O wait list before
1247  * enabling the PIO avail interrupt.
1248  */
1249  spin_lock_irqsave(&qp->s_lock, flags);
1250  if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
1251  spin_lock(&dev->pending_lock);
1252  if (list_empty(&qp->iowait)) {
1253  dev->n_piowait++;
1254  qp->s_flags |= QIB_S_WAIT_PIO;
1255  list_add_tail(&qp->iowait, &dev->piowait);
1256  dd = dd_from_dev(dev);
1257  dd->f_wantpiobuf_intr(dd, 1);
1258  }
1259  spin_unlock(&dev->pending_lock);
1260  qp->s_flags &= ~QIB_S_BUSY;
1261  ret = -EBUSY;
1262  }
1263  spin_unlock_irqrestore(&qp->s_lock, flags);
1264  return ret;
1265 }
1266 
1267 static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr,
1268  u32 hdrwords, struct qib_sge_state *ss, u32 len,
1269  u32 plen, u32 dwords)
1270 {
1271  struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
1272  struct qib_pportdata *ppd = dd->pport + qp->port_num - 1;
1273  u32 *hdr = (u32 *) ibhdr;
1274  u32 __iomem *piobuf_orig;
1275  u32 __iomem *piobuf;
1276  u64 pbc;
1277  unsigned long flags;
1278  unsigned flush_wc;
1279  u32 control;
1280  u32 pbufn;
1281 
1282  control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
1283  be16_to_cpu(ibhdr->lrh[0]) >> 12);
1284  pbc = ((u64) control << 32) | plen;
1285  piobuf = dd->f_getsendbuf(ppd, pbc, &pbufn);
1286  if (unlikely(piobuf == NULL))
1287  return no_bufs_available(qp);
1288 
1289  /*
1290  * Write the pbc.
1291  * We have to flush after the PBC for correctness on some cpus
1292  * or WC buffer can be written out of order.
1293  */
1294  writeq(pbc, piobuf);
1295  piobuf_orig = piobuf;
1296  piobuf += 2;
1297 
1298  flush_wc = dd->flags & QIB_PIO_FLUSH_WC;
1299  if (len == 0) {
1300  /*
1301  * If there is just the header portion, must flush before
1302  * writing last word of header for correctness, and after
1303  * the last header word (trigger word).
1304  */
1305  if (flush_wc) {
1306  qib_flush_wc();
1307  qib_pio_copy(piobuf, hdr, hdrwords - 1);
1308  qib_flush_wc();
1309  __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
1310  qib_flush_wc();
1311  } else
1312  qib_pio_copy(piobuf, hdr, hdrwords);
1313  goto done;
1314  }
1315 
1316  if (flush_wc)
1317  qib_flush_wc();
1318  qib_pio_copy(piobuf, hdr, hdrwords);
1319  piobuf += hdrwords;
1320 
1321  /* The common case is aligned and contained in one segment. */
1322  if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
1323  !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
1324  u32 *addr = (u32 *) ss->sge.vaddr;
1325 
1326  /* Update address before sending packet. */
1327  update_sge(ss, len);
1328  if (flush_wc) {
1329  qib_pio_copy(piobuf, addr, dwords - 1);
1330  /* must flush early everything before trigger word */
1331  qib_flush_wc();
1332  __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
1333  /* be sure trigger word is written */
1334  qib_flush_wc();
1335  } else
1336  qib_pio_copy(piobuf, addr, dwords);
1337  goto done;
1338  }
1339  copy_io(piobuf, ss, len, flush_wc);
1340 done:
1341  if (dd->flags & QIB_USE_SPCL_TRIG) {
1342  u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
1343  qib_flush_wc();
1344  __raw_writel(0xaebecede, piobuf_orig + spcl_off);
1345  }
1346  qib_sendbuf_done(dd, pbufn);
1347  if (qp->s_rdma_mr) {
1348  qib_put_mr(qp->s_rdma_mr);
1349  qp->s_rdma_mr = NULL;
1350  }
1351  if (qp->s_wqe) {
1352  spin_lock_irqsave(&qp->s_lock, flags);
1354  spin_unlock_irqrestore(&qp->s_lock, flags);
1355  } else if (qp->ibqp.qp_type == IB_QPT_RC) {
1356  spin_lock_irqsave(&qp->s_lock, flags);
1357  qib_rc_send_complete(qp, ibhdr);
1358  spin_unlock_irqrestore(&qp->s_lock, flags);
1359  }
1360  return 0;
1361 }
1362 
1374 int qib_verbs_send(struct qib_qp *qp, struct qib_ib_header *hdr,
1375  u32 hdrwords, struct qib_sge_state *ss, u32 len)
1376 {
1377  struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
1378  u32 plen;
1379  int ret;
1380  u32 dwords = (len + 3) >> 2;
1381 
1382  /*
1383  * Calculate the send buffer trigger address.
1384  * The +1 counts for the pbc control dword following the pbc length.
1385  */
1386  plen = hdrwords + dwords + 1;
1387 
1388  /*
1389  * VL15 packets (IB_QPT_SMI) will always use PIO, so we
1390  * can defer SDMA restart until link goes ACTIVE without
1391  * worrying about just how we got there.
1392  */
1393  if (qp->ibqp.qp_type == IB_QPT_SMI ||
1394  !(dd->flags & QIB_HAS_SEND_DMA))
1395  ret = qib_verbs_send_pio(qp, hdr, hdrwords, ss, len,
1396  plen, dwords);
1397  else
1398  ret = qib_verbs_send_dma(qp, hdr, hdrwords, ss, len,
1399  plen, dwords);
1400 
1401  return ret;
1402 }
1403 
1404 int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords,
1405  u64 *rwords, u64 *spkts, u64 *rpkts,
1406  u64 *xmit_wait)
1407 {
1408  int ret;
1409  struct qib_devdata *dd = ppd->dd;
1410 
1411  if (!(dd->flags & QIB_PRESENT)) {
1412  /* no hardware, freeze, etc. */
1413  ret = -EINVAL;
1414  goto bail;
1415  }
1416  *swords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDSEND);
1417  *rwords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDRCV);
1418  *spkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTSEND);
1419  *rpkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTRCV);
1420  *xmit_wait = dd->f_portcntr(ppd, QIBPORTCNTR_SENDSTALL);
1421 
1422  ret = 0;
1423 
1424 bail:
1425  return ret;
1426 }
1427 
1436  struct qib_verbs_counters *cntrs)
1437 {
1438  int ret;
1439 
1440  if (!(ppd->dd->flags & QIB_PRESENT)) {
1441  /* no hardware, freeze, etc. */
1442  ret = -EINVAL;
1443  goto bail;
1444  }
1445  cntrs->symbol_error_counter =
1446  ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBSYMBOLERR);
1448  ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKERRRECOV);
1449  /*
1450  * The link downed counter counts when the other side downs the
1451  * connection. We add in the number of times we downed the link
1452  * due to local link integrity errors to compensate.
1453  */
1454  cntrs->link_downed_counter =
1455  ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKDOWN);
1456  cntrs->port_rcv_errors =
1457  ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXDROPPKT) +
1458  ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVOVFL) +
1459  ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERR_RLEN) +
1460  ppd->dd->f_portcntr(ppd, QIBPORTCNTR_INVALIDRLEN) +
1461  ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLINK) +
1462  ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRICRC) +
1463  ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRVCRC) +
1464  ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLPCRC) +
1465  ppd->dd->f_portcntr(ppd, QIBPORTCNTR_BADFORMAT);
1466  cntrs->port_rcv_errors +=
1467  ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXLOCALPHYERR);
1468  cntrs->port_rcv_errors +=
1469  ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXVLERR);
1470  cntrs->port_rcv_remphys_errors =
1471  ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVEBP);
1472  cntrs->port_xmit_discards =
1473  ppd->dd->f_portcntr(ppd, QIBPORTCNTR_UNSUPVL);
1474  cntrs->port_xmit_data = ppd->dd->f_portcntr(ppd,
1476  cntrs->port_rcv_data = ppd->dd->f_portcntr(ppd,
1478  cntrs->port_xmit_packets = ppd->dd->f_portcntr(ppd,
1480  cntrs->port_rcv_packets = ppd->dd->f_portcntr(ppd,
1483  ppd->dd->f_portcntr(ppd, QIBPORTCNTR_LLI);
1485  ppd->dd->f_portcntr(ppd, QIBPORTCNTR_EXCESSBUFOVFL);
1486  cntrs->vl15_dropped =
1487  ppd->dd->f_portcntr(ppd, QIBPORTCNTR_VL15PKTDROP);
1488 
1489  ret = 0;
1490 
1491 bail:
1492  return ret;
1493 }
1494 
1504 {
1505  struct qib_ibdev *dev = &dd->verbs_dev;
1506  struct list_head *list;
1507  struct qib_qp *qps[5];
1508  struct qib_qp *qp;
1509  unsigned long flags;
1510  unsigned i, n;
1511 
1512  list = &dev->piowait;
1513  n = 0;
1514 
1515  /*
1516  * Note: checking that the piowait list is empty and clearing
1517  * the buffer available interrupt needs to be atomic or we
1518  * could end up with QPs on the wait list with the interrupt
1519  * disabled.
1520  */
1521  spin_lock_irqsave(&dev->pending_lock, flags);
1522  while (!list_empty(list)) {
1523  if (n == ARRAY_SIZE(qps))
1524  goto full;
1525  qp = list_entry(list->next, struct qib_qp, iowait);
1526  list_del_init(&qp->iowait);
1527  atomic_inc(&qp->refcount);
1528  qps[n++] = qp;
1529  }
1530  dd->f_wantpiobuf_intr(dd, 0);
1531 full:
1532  spin_unlock_irqrestore(&dev->pending_lock, flags);
1533 
1534  for (i = 0; i < n; i++) {
1535  qp = qps[i];
1536 
1537  spin_lock_irqsave(&qp->s_lock, flags);
1538  if (qp->s_flags & QIB_S_WAIT_PIO) {
1539  qp->s_flags &= ~QIB_S_WAIT_PIO;
1540  qib_schedule_send(qp);
1541  }
1542  spin_unlock_irqrestore(&qp->s_lock, flags);
1543 
1544  /* Notify qib_destroy_qp() if it is waiting. */
1545  if (atomic_dec_and_test(&qp->refcount))
1546  wake_up(&qp->wait);
1547  }
1548 }
1549 
1550 static int qib_query_device(struct ib_device *ibdev,
1551  struct ib_device_attr *props)
1552 {
1553  struct qib_devdata *dd = dd_from_ibdev(ibdev);
1554  struct qib_ibdev *dev = to_idev(ibdev);
1555 
1556  memset(props, 0, sizeof(*props));
1557 
1562  props->page_size_cap = PAGE_SIZE;
1563  props->vendor_id =
1564  QIB_SRC_OUI_1 << 16 | QIB_SRC_OUI_2 << 8 | QIB_SRC_OUI_3;
1565  props->vendor_part_id = dd->deviceid;
1566  props->hw_ver = dd->minrev;
1568  props->max_mr_size = ~0ULL;
1569  props->max_qp = ib_qib_max_qps;
1570  props->max_qp_wr = ib_qib_max_qp_wrs;
1571  props->max_sge = ib_qib_max_sges;
1572  props->max_cq = ib_qib_max_cqs;
1573  props->max_ah = ib_qib_max_ahs;
1574  props->max_cqe = ib_qib_max_cqes;
1575  props->max_mr = dev->lk_table.max;
1576  props->max_fmr = dev->lk_table.max;
1577  props->max_map_per_fmr = 32767;
1578  props->max_pd = ib_qib_max_pds;
1580  props->max_qp_init_rd_atom = 255;
1581  /* props->max_res_rd_atom */
1582  props->max_srq = ib_qib_max_srqs;
1583  props->max_srq_wr = ib_qib_max_srq_wrs;
1585  /* props->local_ca_ack_delay */
1586  props->atomic_cap = IB_ATOMIC_GLOB;
1587  props->max_pkeys = qib_get_npkeys(dd);
1591  props->max_mcast_grp;
1592 
1593  return 0;
1594 }
1595 
1596 static int qib_query_port(struct ib_device *ibdev, u8 port,
1597  struct ib_port_attr *props)
1598 {
1599  struct qib_devdata *dd = dd_from_ibdev(ibdev);
1600  struct qib_ibport *ibp = to_iport(ibdev, port);
1601  struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1602  enum ib_mtu mtu;
1603  u16 lid = ppd->lid;
1604 
1605  memset(props, 0, sizeof(*props));
1606  props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
1607  props->lmc = ppd->lmc;
1608  props->sm_lid = ibp->sm_lid;
1609  props->sm_sl = ibp->sm_sl;
1610  props->state = dd->f_iblink_state(ppd->lastibcstat);
1611  props->phys_state = dd->f_ibphys_portstate(ppd->lastibcstat);
1612  props->port_cap_flags = ibp->port_cap_flags;
1614  props->max_msg_sz = 0x80000000;
1615  props->pkey_tbl_len = qib_get_npkeys(dd);
1616  props->bad_pkey_cntr = ibp->pkey_violations;
1617  props->qkey_viol_cntr = ibp->qkey_violations;
1618  props->active_width = ppd->link_width_active;
1619  /* See rate_show() */
1620  props->active_speed = ppd->link_speed_active;
1621  props->max_vl_num = qib_num_vls(ppd->vls_supported);
1622  props->init_type_reply = 0;
1623 
1624  props->max_mtu = qib_ibmtu ? qib_ibmtu : IB_MTU_4096;
1625  switch (ppd->ibmtu) {
1626  case 4096:
1627  mtu = IB_MTU_4096;
1628  break;
1629  case 2048:
1630  mtu = IB_MTU_2048;
1631  break;
1632  case 1024:
1633  mtu = IB_MTU_1024;
1634  break;
1635  case 512:
1636  mtu = IB_MTU_512;
1637  break;
1638  case 256:
1639  mtu = IB_MTU_256;
1640  break;
1641  default:
1642  mtu = IB_MTU_2048;
1643  }
1644  props->active_mtu = mtu;
1645  props->subnet_timeout = ibp->subnet_timeout;
1646 
1647  return 0;
1648 }
1649 
1650 static int qib_modify_device(struct ib_device *device,
1651  int device_modify_mask,
1652  struct ib_device_modify *device_modify)
1653 {
1654  struct qib_devdata *dd = dd_from_ibdev(device);
1655  unsigned i;
1656  int ret;
1657 
1658  if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
1660  ret = -EOPNOTSUPP;
1661  goto bail;
1662  }
1663 
1664  if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) {
1665  memcpy(device->node_desc, device_modify->node_desc, 64);
1666  for (i = 0; i < dd->num_pports; i++) {
1667  struct qib_ibport *ibp = &dd->pport[i].ibport_data;
1668 
1669  qib_node_desc_chg(ibp);
1670  }
1671  }
1672 
1673  if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) {
1675  cpu_to_be64(device_modify->sys_image_guid);
1676  for (i = 0; i < dd->num_pports; i++) {
1677  struct qib_ibport *ibp = &dd->pport[i].ibport_data;
1678 
1679  qib_sys_guid_chg(ibp);
1680  }
1681  }
1682 
1683  ret = 0;
1684 
1685 bail:
1686  return ret;
1687 }
1688 
1689 static int qib_modify_port(struct ib_device *ibdev, u8 port,
1690  int port_modify_mask, struct ib_port_modify *props)
1691 {
1692  struct qib_ibport *ibp = to_iport(ibdev, port);
1693  struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1694 
1695  ibp->port_cap_flags |= props->set_port_cap_mask;
1696  ibp->port_cap_flags &= ~props->clr_port_cap_mask;
1697  if (props->set_port_cap_mask || props->clr_port_cap_mask)
1698  qib_cap_mask_chg(ibp);
1699  if (port_modify_mask & IB_PORT_SHUTDOWN)
1701  if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
1702  ibp->qkey_violations = 0;
1703  return 0;
1704 }
1705 
1706 static int qib_query_gid(struct ib_device *ibdev, u8 port,
1707  int index, union ib_gid *gid)
1708 {
1709  struct qib_devdata *dd = dd_from_ibdev(ibdev);
1710  int ret = 0;
1711 
1712  if (!port || port > dd->num_pports)
1713  ret = -EINVAL;
1714  else {
1715  struct qib_ibport *ibp = to_iport(ibdev, port);
1716  struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1717 
1718  gid->global.subnet_prefix = ibp->gid_prefix;
1719  if (index == 0)
1720  gid->global.interface_id = ppd->guid;
1721  else if (index < QIB_GUIDS_PER_PORT)
1722  gid->global.interface_id = ibp->guids[index - 1];
1723  else
1724  ret = -EINVAL;
1725  }
1726 
1727  return ret;
1728 }
1729 
1730 static struct ib_pd *qib_alloc_pd(struct ib_device *ibdev,
1731  struct ib_ucontext *context,
1732  struct ib_udata *udata)
1733 {
1734  struct qib_ibdev *dev = to_idev(ibdev);
1735  struct qib_pd *pd;
1736  struct ib_pd *ret;
1737 
1738  /*
1739  * This is actually totally arbitrary. Some correctness tests
1740  * assume there's a maximum number of PDs that can be allocated.
1741  * We don't actually have this limit, but we fail the test if
1742  * we allow allocations of more than we report for this value.
1743  */
1744 
1745  pd = kmalloc(sizeof *pd, GFP_KERNEL);
1746  if (!pd) {
1747  ret = ERR_PTR(-ENOMEM);
1748  goto bail;
1749  }
1750 
1751  spin_lock(&dev->n_pds_lock);
1752  if (dev->n_pds_allocated == ib_qib_max_pds) {
1753  spin_unlock(&dev->n_pds_lock);
1754  kfree(pd);
1755  ret = ERR_PTR(-ENOMEM);
1756  goto bail;
1757  }
1758 
1759  dev->n_pds_allocated++;
1760  spin_unlock(&dev->n_pds_lock);
1761 
1762  /* ib_alloc_pd() will initialize pd->ibpd. */
1763  pd->user = udata != NULL;
1764 
1765  ret = &pd->ibpd;
1766 
1767 bail:
1768  return ret;
1769 }
1770 
1771 static int qib_dealloc_pd(struct ib_pd *ibpd)
1772 {
1773  struct qib_pd *pd = to_ipd(ibpd);
1774  struct qib_ibdev *dev = to_idev(ibpd->device);
1775 
1776  spin_lock(&dev->n_pds_lock);
1777  dev->n_pds_allocated--;
1778  spin_unlock(&dev->n_pds_lock);
1779 
1780  kfree(pd);
1781 
1782  return 0;
1783 }
1784 
1785 int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
1786 {
1787  /* A multicast address requires a GRH (see ch. 8.4.1). */
1788  if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE &&
1789  ah_attr->dlid != QIB_PERMISSIVE_LID &&
1790  !(ah_attr->ah_flags & IB_AH_GRH))
1791  goto bail;
1792  if ((ah_attr->ah_flags & IB_AH_GRH) &&
1793  ah_attr->grh.sgid_index >= QIB_GUIDS_PER_PORT)
1794  goto bail;
1795  if (ah_attr->dlid == 0)
1796  goto bail;
1797  if (ah_attr->port_num < 1 ||
1798  ah_attr->port_num > ibdev->phys_port_cnt)
1799  goto bail;
1800  if (ah_attr->static_rate != IB_RATE_PORT_CURRENT &&
1801  ib_rate_to_mult(ah_attr->static_rate) < 0)
1802  goto bail;
1803  if (ah_attr->sl > 15)
1804  goto bail;
1805  return 0;
1806 bail:
1807  return -EINVAL;
1808 }
1809 
1817 static struct ib_ah *qib_create_ah(struct ib_pd *pd,
1818  struct ib_ah_attr *ah_attr)
1819 {
1820  struct qib_ah *ah;
1821  struct ib_ah *ret;
1822  struct qib_ibdev *dev = to_idev(pd->device);
1823  unsigned long flags;
1824 
1825  if (qib_check_ah(pd->device, ah_attr)) {
1826  ret = ERR_PTR(-EINVAL);
1827  goto bail;
1828  }
1829 
1830  ah = kmalloc(sizeof *ah, GFP_ATOMIC);
1831  if (!ah) {
1832  ret = ERR_PTR(-ENOMEM);
1833  goto bail;
1834  }
1835 
1836  spin_lock_irqsave(&dev->n_ahs_lock, flags);
1837  if (dev->n_ahs_allocated == ib_qib_max_ahs) {
1838  spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1839  kfree(ah);
1840  ret = ERR_PTR(-ENOMEM);
1841  goto bail;
1842  }
1843 
1844  dev->n_ahs_allocated++;
1845  spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1846 
1847  /* ib_create_ah() will initialize ah->ibah. */
1848  ah->attr = *ah_attr;
1849  atomic_set(&ah->refcount, 0);
1850 
1851  ret = &ah->ibah;
1852 
1853 bail:
1854  return ret;
1855 }
1856 
1857 struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid)
1858 {
1859  struct ib_ah_attr attr;
1860  struct ib_ah *ah = ERR_PTR(-EINVAL);
1861  struct qib_qp *qp0;
1862 
1863  memset(&attr, 0, sizeof attr);
1864  attr.dlid = dlid;
1865  attr.port_num = ppd_from_ibp(ibp)->port;
1866  rcu_read_lock();
1867  qp0 = rcu_dereference(ibp->qp0);
1868  if (qp0)
1869  ah = ib_create_ah(qp0->ibqp.pd, &attr);
1870  rcu_read_unlock();
1871  return ah;
1872 }
1873 
1880 static int qib_destroy_ah(struct ib_ah *ibah)
1881 {
1882  struct qib_ibdev *dev = to_idev(ibah->device);
1883  struct qib_ah *ah = to_iah(ibah);
1884  unsigned long flags;
1885 
1886  if (atomic_read(&ah->refcount) != 0)
1887  return -EBUSY;
1888 
1889  spin_lock_irqsave(&dev->n_ahs_lock, flags);
1890  dev->n_ahs_allocated--;
1891  spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1892 
1893  kfree(ah);
1894 
1895  return 0;
1896 }
1897 
1898 static int qib_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
1899 {
1900  struct qib_ah *ah = to_iah(ibah);
1901 
1902  if (qib_check_ah(ibah->device, ah_attr))
1903  return -EINVAL;
1904 
1905  ah->attr = *ah_attr;
1906 
1907  return 0;
1908 }
1909 
1910 static int qib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
1911 {
1912  struct qib_ah *ah = to_iah(ibah);
1913 
1914  *ah_attr = ah->attr;
1915 
1916  return 0;
1917 }
1918 
1923 unsigned qib_get_npkeys(struct qib_devdata *dd)
1924 {
1925  return ARRAY_SIZE(dd->rcd[0]->pkeys);
1926 }
1927 
1928 /*
1929  * Return the indexed PKEY from the port PKEY table.
1930  * No need to validate rcd[ctxt]; the port is setup if we are here.
1931  */
1932 unsigned qib_get_pkey(struct qib_ibport *ibp, unsigned index)
1933 {
1934  struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1935  struct qib_devdata *dd = ppd->dd;
1936  unsigned ctxt = ppd->hw_pidx;
1937  unsigned ret;
1938 
1939  /* dd->rcd null if mini_init or some init failures */
1940  if (!dd->rcd || index >= ARRAY_SIZE(dd->rcd[ctxt]->pkeys))
1941  ret = 0;
1942  else
1943  ret = dd->rcd[ctxt]->pkeys[index];
1944 
1945  return ret;
1946 }
1947 
1948 static int qib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
1949  u16 *pkey)
1950 {
1951  struct qib_devdata *dd = dd_from_ibdev(ibdev);
1952  int ret;
1953 
1954  if (index >= qib_get_npkeys(dd)) {
1955  ret = -EINVAL;
1956  goto bail;
1957  }
1958 
1959  *pkey = qib_get_pkey(to_iport(ibdev, port), index);
1960  ret = 0;
1961 
1962 bail:
1963  return ret;
1964 }
1965 
1972 static struct ib_ucontext *qib_alloc_ucontext(struct ib_device *ibdev,
1973  struct ib_udata *udata)
1974 {
1975  struct qib_ucontext *context;
1976  struct ib_ucontext *ret;
1977 
1978  context = kmalloc(sizeof *context, GFP_KERNEL);
1979  if (!context) {
1980  ret = ERR_PTR(-ENOMEM);
1981  goto bail;
1982  }
1983 
1984  ret = &context->ibucontext;
1985 
1986 bail:
1987  return ret;
1988 }
1989 
1990 static int qib_dealloc_ucontext(struct ib_ucontext *context)
1991 {
1992  kfree(to_iucontext(context));
1993  return 0;
1994 }
1995 
1996 static void init_ibport(struct qib_pportdata *ppd)
1997 {
1998  struct qib_verbs_counters cntrs;
1999  struct qib_ibport *ibp = &ppd->ibport_data;
2000 
2001  spin_lock_init(&ibp->lock);
2002  /* Set the prefix to the default value (see ch. 4.1.1) */
2010  if (ppd->dd->flags & QIB_HAS_LINK_LATENCY)
2017 
2018  /* Snapshot current HW counters to "clear" them. */
2019  qib_get_counters(ppd, &cntrs);
2020  ibp->z_symbol_error_counter = cntrs.symbol_error_counter;
2022  cntrs.link_error_recovery_counter;
2023  ibp->z_link_downed_counter = cntrs.link_downed_counter;
2024  ibp->z_port_rcv_errors = cntrs.port_rcv_errors;
2025  ibp->z_port_rcv_remphys_errors = cntrs.port_rcv_remphys_errors;
2026  ibp->z_port_xmit_discards = cntrs.port_xmit_discards;
2027  ibp->z_port_xmit_data = cntrs.port_xmit_data;
2028  ibp->z_port_rcv_data = cntrs.port_rcv_data;
2029  ibp->z_port_xmit_packets = cntrs.port_xmit_packets;
2030  ibp->z_port_rcv_packets = cntrs.port_rcv_packets;
2032  cntrs.local_link_integrity_errors;
2034  cntrs.excessive_buffer_overrun_errors;
2035  ibp->z_vl15_dropped = cntrs.vl15_dropped;
2036  RCU_INIT_POINTER(ibp->qp0, NULL);
2037  RCU_INIT_POINTER(ibp->qp1, NULL);
2038 }
2039 
2046 {
2047  struct qib_ibdev *dev = &dd->verbs_dev;
2048  struct ib_device *ibdev = &dev->ibdev;
2049  struct qib_pportdata *ppd = dd->pport;
2050  unsigned i, lk_tab_size;
2051  int ret;
2052 
2053  dev->qp_table_size = ib_qib_qp_table_size;
2054  get_random_bytes(&dev->qp_rnd, sizeof(dev->qp_rnd));
2055  dev->qp_table = kmalloc(dev->qp_table_size * sizeof *dev->qp_table,
2056  GFP_KERNEL);
2057  if (!dev->qp_table) {
2058  ret = -ENOMEM;
2059  goto err_qpt;
2060  }
2061  for (i = 0; i < dev->qp_table_size; i++)
2062  RCU_INIT_POINTER(dev->qp_table[i], NULL);
2063 
2064  for (i = 0; i < dd->num_pports; i++)
2065  init_ibport(ppd + i);
2066 
2067  /* Only need to initialize non-zero fields. */
2068  spin_lock_init(&dev->qpt_lock);
2069  spin_lock_init(&dev->n_pds_lock);
2070  spin_lock_init(&dev->n_ahs_lock);
2071  spin_lock_init(&dev->n_cqs_lock);
2072  spin_lock_init(&dev->n_qps_lock);
2073  spin_lock_init(&dev->n_srqs_lock);
2075  init_timer(&dev->mem_timer);
2076  dev->mem_timer.function = mem_timer;
2077  dev->mem_timer.data = (unsigned long) dev;
2078 
2079  qib_init_qpn_table(dd, &dev->qpn_table);
2080 
2081  /*
2082  * The top ib_qib_lkey_table_size bits are used to index the
2083  * table. The lower 8 bits can be owned by the user (copied from
2084  * the LKEY). The remaining bits act as a generation number or tag.
2085  */
2086  spin_lock_init(&dev->lk_table.lock);
2087  dev->lk_table.max = 1 << ib_qib_lkey_table_size;
2088  lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
2089  dev->lk_table.table = (struct qib_mregion __rcu **)
2090  __get_free_pages(GFP_KERNEL, get_order(lk_tab_size));
2091  if (dev->lk_table.table == NULL) {
2092  ret = -ENOMEM;
2093  goto err_lk;
2094  }
2095  RCU_INIT_POINTER(dev->dma_mr, NULL);
2096  for (i = 0; i < dev->lk_table.max; i++)
2097  RCU_INIT_POINTER(dev->lk_table.table[i], NULL);
2098  INIT_LIST_HEAD(&dev->pending_mmaps);
2100  dev->mmap_offset = PAGE_SIZE;
2102  INIT_LIST_HEAD(&dev->piowait);
2103  INIT_LIST_HEAD(&dev->dmawait);
2104  INIT_LIST_HEAD(&dev->txwait);
2105  INIT_LIST_HEAD(&dev->memwait);
2106  INIT_LIST_HEAD(&dev->txreq_free);
2107 
2108  if (ppd->sdma_descq_cnt) {
2109  dev->pio_hdrs = dma_alloc_coherent(&dd->pcidev->dev,
2110  ppd->sdma_descq_cnt *
2111  sizeof(struct qib_pio_header),
2112  &dev->pio_hdrs_phys,
2113  GFP_KERNEL);
2114  if (!dev->pio_hdrs) {
2115  ret = -ENOMEM;
2116  goto err_hdrs;
2117  }
2118  }
2119 
2120  for (i = 0; i < ppd->sdma_descq_cnt; i++) {
2121  struct qib_verbs_txreq *tx;
2122 
2123  tx = kzalloc(sizeof *tx, GFP_KERNEL);
2124  if (!tx) {
2125  ret = -ENOMEM;
2126  goto err_tx;
2127  }
2128  tx->hdr_inx = i;
2129  list_add(&tx->txreq.list, &dev->txreq_free);
2130  }
2131 
2132  /*
2133  * The system image GUID is supposed to be the same for all
2134  * IB HCAs in a single system but since there can be other
2135  * device types in the system, we can't be sure this is unique.
2136  */
2137  if (!ib_qib_sys_image_guid)
2138  ib_qib_sys_image_guid = ppd->guid;
2139 
2140  strlcpy(ibdev->name, "qib%d", IB_DEVICE_NAME_MAX);
2141  ibdev->owner = THIS_MODULE;
2142  ibdev->node_guid = ppd->guid;
2144  ibdev->uverbs_cmd_mask =
2145  (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
2146  (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
2147  (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
2148  (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
2149  (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
2150  (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
2151  (1ull << IB_USER_VERBS_CMD_MODIFY_AH) |
2152  (1ull << IB_USER_VERBS_CMD_QUERY_AH) |
2153  (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
2154  (1ull << IB_USER_VERBS_CMD_REG_MR) |
2155  (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
2157  (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
2158  (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
2159  (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
2160  (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
2162  (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
2163  (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
2164  (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
2165  (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
2166  (1ull << IB_USER_VERBS_CMD_POST_SEND) |
2167  (1ull << IB_USER_VERBS_CMD_POST_RECV) |
2168  (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
2169  (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
2170  (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
2171  (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
2172  (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
2173  (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
2175  ibdev->node_type = RDMA_NODE_IB_CA;
2176  ibdev->phys_port_cnt = dd->num_pports;
2177  ibdev->num_comp_vectors = 1;
2178  ibdev->dma_device = &dd->pcidev->dev;
2179  ibdev->query_device = qib_query_device;
2180  ibdev->modify_device = qib_modify_device;
2181  ibdev->query_port = qib_query_port;
2182  ibdev->modify_port = qib_modify_port;
2183  ibdev->query_pkey = qib_query_pkey;
2184  ibdev->query_gid = qib_query_gid;
2185  ibdev->alloc_ucontext = qib_alloc_ucontext;
2186  ibdev->dealloc_ucontext = qib_dealloc_ucontext;
2187  ibdev->alloc_pd = qib_alloc_pd;
2188  ibdev->dealloc_pd = qib_dealloc_pd;
2189  ibdev->create_ah = qib_create_ah;
2190  ibdev->destroy_ah = qib_destroy_ah;
2191  ibdev->modify_ah = qib_modify_ah;
2192  ibdev->query_ah = qib_query_ah;
2193  ibdev->create_srq = qib_create_srq;
2194  ibdev->modify_srq = qib_modify_srq;
2195  ibdev->query_srq = qib_query_srq;
2196  ibdev->destroy_srq = qib_destroy_srq;
2197  ibdev->create_qp = qib_create_qp;
2198  ibdev->modify_qp = qib_modify_qp;
2199  ibdev->query_qp = qib_query_qp;
2200  ibdev->destroy_qp = qib_destroy_qp;
2201  ibdev->post_send = qib_post_send;
2202  ibdev->post_recv = qib_post_receive;
2204  ibdev->create_cq = qib_create_cq;
2205  ibdev->destroy_cq = qib_destroy_cq;
2206  ibdev->resize_cq = qib_resize_cq;
2207  ibdev->poll_cq = qib_poll_cq;
2209  ibdev->get_dma_mr = qib_get_dma_mr;
2210  ibdev->reg_phys_mr = qib_reg_phys_mr;
2211  ibdev->reg_user_mr = qib_reg_user_mr;
2212  ibdev->dereg_mr = qib_dereg_mr;
2216  ibdev->alloc_fmr = qib_alloc_fmr;
2217  ibdev->map_phys_fmr = qib_map_phys_fmr;
2218  ibdev->unmap_fmr = qib_unmap_fmr;
2219  ibdev->dealloc_fmr = qib_dealloc_fmr;
2222  ibdev->process_mad = qib_process_mad;
2223  ibdev->mmap = qib_mmap;
2224  ibdev->dma_ops = &qib_dma_mapping_ops;
2225 
2226  snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
2227  "QLogic Infiniband HCA %s", init_utsname()->nodename);
2228 
2230  if (ret)
2231  goto err_reg;
2232 
2233  ret = qib_create_agents(dev);
2234  if (ret)
2235  goto err_agents;
2236 
2237  if (qib_verbs_register_sysfs(dd))
2238  goto err_class;
2239 
2240  goto bail;
2241 
2242 err_class:
2243  qib_free_agents(dev);
2244 err_agents:
2245  ib_unregister_device(ibdev);
2246 err_reg:
2247 err_tx:
2248  while (!list_empty(&dev->txreq_free)) {
2249  struct list_head *l = dev->txreq_free.next;
2250  struct qib_verbs_txreq *tx;
2251 
2252  list_del(l);
2253  tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
2254  kfree(tx);
2255  }
2256  if (ppd->sdma_descq_cnt)
2257  dma_free_coherent(&dd->pcidev->dev,
2258  ppd->sdma_descq_cnt *
2259  sizeof(struct qib_pio_header),
2260  dev->pio_hdrs, dev->pio_hdrs_phys);
2261 err_hdrs:
2262  free_pages((unsigned long) dev->lk_table.table, get_order(lk_tab_size));
2263 err_lk:
2264  kfree(dev->qp_table);
2265 err_qpt:
2266  qib_dev_err(dd, "cannot register verbs: %d!\n", -ret);
2267 bail:
2268  return ret;
2269 }
2270 
2272 {
2273  struct qib_ibdev *dev = &dd->verbs_dev;
2274  struct ib_device *ibdev = &dev->ibdev;
2275  u32 qps_inuse;
2276  unsigned lk_tab_size;
2277 
2279 
2280  qib_free_agents(dev);
2281 
2282  ib_unregister_device(ibdev);
2283 
2284  if (!list_empty(&dev->piowait))
2285  qib_dev_err(dd, "piowait list not empty!\n");
2286  if (!list_empty(&dev->dmawait))
2287  qib_dev_err(dd, "dmawait list not empty!\n");
2288  if (!list_empty(&dev->txwait))
2289  qib_dev_err(dd, "txwait list not empty!\n");
2290  if (!list_empty(&dev->memwait))
2291  qib_dev_err(dd, "memwait list not empty!\n");
2292  if (dev->dma_mr)
2293  qib_dev_err(dd, "DMA MR not NULL!\n");
2294 
2295  qps_inuse = qib_free_all_qps(dd);
2296  if (qps_inuse)
2297  qib_dev_err(dd, "QP memory leak! %u still in use\n",
2298  qps_inuse);
2299 
2300  del_timer_sync(&dev->mem_timer);
2302  while (!list_empty(&dev->txreq_free)) {
2303  struct list_head *l = dev->txreq_free.next;
2304  struct qib_verbs_txreq *tx;
2305 
2306  list_del(l);
2307  tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
2308  kfree(tx);
2309  }
2310  if (dd->pport->sdma_descq_cnt)
2311  dma_free_coherent(&dd->pcidev->dev,
2312  dd->pport->sdma_descq_cnt *
2313  sizeof(struct qib_pio_header),
2314  dev->pio_hdrs, dev->pio_hdrs_phys);
2315  lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
2316  free_pages((unsigned long) dev->lk_table.table,
2317  get_order(lk_tab_size));
2318  kfree(dev->qp_table);
2319 }
2320 
2321 /*
2322  * This must be called with s_lock held.
2323  */
2324 void qib_schedule_send(struct qib_qp *qp)
2325 {
2326  if (qib_send_ok(qp)) {
2327  struct qib_ibport *ibp =
2328  to_iport(qp->ibqp.device, qp->port_num);
2329  struct qib_pportdata *ppd = ppd_from_ibp(ibp);
2330 
2331  queue_work(ppd->qib_wq, &qp->s_work);
2332  }
2333 }