Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
ipath_verbs.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses. You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  * Redistribution and use in source and binary forms, with or
12  * without modification, are permitted provided that the following
13  * conditions are met:
14  *
15  * - Redistributions of source code must retain the above
16  * copyright notice, this list of conditions and the following
17  * disclaimer.
18  *
19  * - Redistributions in binary form must reproduce the above
20  * copyright notice, this list of conditions and the following
21  * disclaimer in the documentation and/or other materials
22  * provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #include <rdma/ib_mad.h>
35 #include <rdma/ib_user_verbs.h>
36 #include <linux/io.h>
37 #include <linux/slab.h>
38 #include <linux/module.h>
39 #include <linux/utsname.h>
40 #include <linux/rculist.h>
41 
42 #include "ipath_kernel.h"
43 #include "ipath_verbs.h"
44 #include "ipath_common.h"
45 
46 static unsigned int ib_ipath_qp_table_size = 251;
47 module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
48 MODULE_PARM_DESC(qp_table_size, "QP table size");
49 
50 unsigned int ib_ipath_lkey_table_size = 12;
52  S_IRUGO);
53 MODULE_PARM_DESC(lkey_table_size,
54  "LKEY table size in bits (2^n, 1 <= n <= 23)");
55 
56 static unsigned int ib_ipath_max_pds = 0xFFFF;
57 module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO);
59  "Maximum number of protection domains to support");
60 
61 static unsigned int ib_ipath_max_ahs = 0xFFFF;
62 module_param_named(max_ahs, ib_ipath_max_ahs, uint, S_IWUSR | S_IRUGO);
63 MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
64 
65 unsigned int ib_ipath_max_cqes = 0x2FFFF;
67 MODULE_PARM_DESC(max_cqes,
68  "Maximum number of completion queue entries to support");
69 
70 unsigned int ib_ipath_max_cqs = 0x1FFFF;
72 MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
73 
74 unsigned int ib_ipath_max_qp_wrs = 0x3FFF;
76  S_IWUSR | S_IRUGO);
77 MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
78 
79 unsigned int ib_ipath_max_qps = 16384;
81 MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
82 
83 unsigned int ib_ipath_max_sges = 0x60;
85 MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
86 
87 unsigned int ib_ipath_max_mcast_grps = 16384;
89  S_IWUSR | S_IRUGO);
90 MODULE_PARM_DESC(max_mcast_grps,
91  "Maximum number of multicast groups to support");
92 
95  uint, S_IWUSR | S_IRUGO);
96 MODULE_PARM_DESC(max_mcast_qp_attached,
97  "Maximum number of attached QPs to support");
98 
99 unsigned int ib_ipath_max_srqs = 1024;
101 MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
102 
103 unsigned int ib_ipath_max_srq_sges = 128;
105  uint, S_IWUSR | S_IRUGO);
106 MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
107 
108 unsigned int ib_ipath_max_srq_wrs = 0x1FFFF;
110  uint, S_IWUSR | S_IRUGO);
111 MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
112 
113 static unsigned int ib_ipath_disable_sma;
114 module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
115 MODULE_PARM_DESC(disable_sma, "Disable the SMA");
116 
117 /*
118  * Note that it is OK to post send work requests in the SQE and ERR
119  * states; ipath_do_send() will process them and generate error
120  * completions as per IB 1.2 C10-96.
121  */
122 const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
123  [IB_QPS_RESET] = 0,
135 };
136 
139 };
140 
141 static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
142  *ibucontext)
143 {
144  return container_of(ibucontext, struct ipath_ucontext, ibucontext);
145 }
146 
147 /*
148  * Translate ib_wr_opcode into ib_wc_opcode.
149  */
158 };
159 
160 /*
161  * System image GUID.
162  */
163 static __be64 sys_image_guid;
164 
172 {
173  struct ipath_sge *sge = &ss->sge;
174 
175  while (length) {
176  u32 len = sge->length;
177 
178  if (len > length)
179  len = length;
180  if (len > sge->sge_length)
181  len = sge->sge_length;
182  BUG_ON(len == 0);
183  memcpy(sge->vaddr, data, len);
184  sge->vaddr += len;
185  sge->length -= len;
186  sge->sge_length -= len;
187  if (sge->sge_length == 0) {
188  if (--ss->num_sge)
189  *sge = *ss->sg_list++;
190  } else if (sge->length == 0 && sge->mr != NULL) {
191  if (++sge->n >= IPATH_SEGSZ) {
192  if (++sge->m >= sge->mr->mapsz)
193  break;
194  sge->n = 0;
195  }
196  sge->vaddr =
197  sge->mr->map[sge->m]->segs[sge->n].vaddr;
198  sge->length =
199  sge->mr->map[sge->m]->segs[sge->n].length;
200  }
201  data += len;
202  length -= len;
203  }
204 }
205 
212 {
213  struct ipath_sge *sge = &ss->sge;
214 
215  while (length) {
216  u32 len = sge->length;
217 
218  if (len > length)
219  len = length;
220  if (len > sge->sge_length)
221  len = sge->sge_length;
222  BUG_ON(len == 0);
223  sge->vaddr += len;
224  sge->length -= len;
225  sge->sge_length -= len;
226  if (sge->sge_length == 0) {
227  if (--ss->num_sge)
228  *sge = *ss->sg_list++;
229  } else if (sge->length == 0 && sge->mr != NULL) {
230  if (++sge->n >= IPATH_SEGSZ) {
231  if (++sge->m >= sge->mr->mapsz)
232  break;
233  sge->n = 0;
234  }
235  sge->vaddr =
236  sge->mr->map[sge->m]->segs[sge->n].vaddr;
237  sge->length =
238  sge->mr->map[sge->m]->segs[sge->n].length;
239  }
240  length -= len;
241  }
242 }
243 
244 /*
245  * Count the number of DMA descriptors needed to send length bytes of data.
246  * Don't modify the ipath_sge_state to get the count.
247  * Return zero if any of the segments is not aligned.
248  */
249 static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length)
250 {
251  struct ipath_sge *sg_list = ss->sg_list;
252  struct ipath_sge sge = ss->sge;
253  u8 num_sge = ss->num_sge;
254  u32 ndesc = 1; /* count the header */
255 
256  while (length) {
257  u32 len = sge.length;
258 
259  if (len > length)
260  len = length;
261  if (len > sge.sge_length)
262  len = sge.sge_length;
263  BUG_ON(len == 0);
264  if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
265  (len != length && (len & (sizeof(u32) - 1)))) {
266  ndesc = 0;
267  break;
268  }
269  ndesc++;
270  sge.vaddr += len;
271  sge.length -= len;
272  sge.sge_length -= len;
273  if (sge.sge_length == 0) {
274  if (--num_sge)
275  sge = *sg_list++;
276  } else if (sge.length == 0 && sge.mr != NULL) {
277  if (++sge.n >= IPATH_SEGSZ) {
278  if (++sge.m >= sge.mr->mapsz)
279  break;
280  sge.n = 0;
281  }
282  sge.vaddr =
283  sge.mr->map[sge.m]->segs[sge.n].vaddr;
284  sge.length =
285  sge.mr->map[sge.m]->segs[sge.n].length;
286  }
287  length -= len;
288  }
289  return ndesc;
290 }
291 
292 /*
293  * Copy from the SGEs to the data buffer.
294  */
295 static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss,
296  u32 length)
297 {
298  struct ipath_sge *sge = &ss->sge;
299 
300  while (length) {
301  u32 len = sge->length;
302 
303  if (len > length)
304  len = length;
305  if (len > sge->sge_length)
306  len = sge->sge_length;
307  BUG_ON(len == 0);
308  memcpy(data, sge->vaddr, len);
309  sge->vaddr += len;
310  sge->length -= len;
311  sge->sge_length -= len;
312  if (sge->sge_length == 0) {
313  if (--ss->num_sge)
314  *sge = *ss->sg_list++;
315  } else if (sge->length == 0 && sge->mr != NULL) {
316  if (++sge->n >= IPATH_SEGSZ) {
317  if (++sge->m >= sge->mr->mapsz)
318  break;
319  sge->n = 0;
320  }
321  sge->vaddr =
322  sge->mr->map[sge->m]->segs[sge->n].vaddr;
323  sge->length =
324  sge->mr->map[sge->m]->segs[sge->n].length;
325  }
326  data += len;
327  length -= len;
328  }
329 }
330 
336 static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
337 {
338  struct ipath_swqe *wqe;
339  u32 next;
340  int i;
341  int j;
342  int acc;
343  int ret;
344  unsigned long flags;
345  struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
346 
347  spin_lock_irqsave(&qp->s_lock, flags);
348 
349  if (qp->ibqp.qp_type != IB_QPT_SMI &&
350  !(dd->ipath_flags & IPATH_LINKACTIVE)) {
351  ret = -ENETDOWN;
352  goto bail;
353  }
354 
355  /* Check that state is OK to post send. */
357  goto bail_inval;
358 
359  /* IB spec says that num_sge == 0 is OK. */
360  if (wr->num_sge > qp->s_max_sge)
361  goto bail_inval;
362 
363  /*
364  * Don't allow RDMA reads or atomic operations on UC or
365  * undefined operations.
366  * Make sure buffer is large enough to hold the result for atomics.
367  */
368  if (qp->ibqp.qp_type == IB_QPT_UC) {
369  if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
370  goto bail_inval;
371  } else if (qp->ibqp.qp_type == IB_QPT_UD) {
372  /* Check UD opcode */
373  if (wr->opcode != IB_WR_SEND &&
375  goto bail_inval;
376  /* Check UD destination address PD */
377  if (qp->ibqp.pd != wr->wr.ud.ah->pd)
378  goto bail_inval;
379  } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
380  goto bail_inval;
381  else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
382  (wr->num_sge == 0 ||
383  wr->sg_list[0].length < sizeof(u64) ||
384  wr->sg_list[0].addr & (sizeof(u64) - 1)))
385  goto bail_inval;
386  else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
387  goto bail_inval;
388 
389  next = qp->s_head + 1;
390  if (next >= qp->s_size)
391  next = 0;
392  if (next == qp->s_last) {
393  ret = -ENOMEM;
394  goto bail;
395  }
396 
397  wqe = get_swqe_ptr(qp, qp->s_head);
398  wqe->wr = *wr;
399  wqe->length = 0;
400  if (wr->num_sge) {
401  acc = wr->opcode >= IB_WR_RDMA_READ ?
403  for (i = 0, j = 0; i < wr->num_sge; i++) {
404  u32 length = wr->sg_list[i].length;
405  int ok;
406 
407  if (length == 0)
408  continue;
409  ok = ipath_lkey_ok(qp, &wqe->sg_list[j],
410  &wr->sg_list[i], acc);
411  if (!ok)
412  goto bail_inval;
413  wqe->length += length;
414  j++;
415  }
416  wqe->wr.num_sge = j;
417  }
418  if (qp->ibqp.qp_type == IB_QPT_UC ||
419  qp->ibqp.qp_type == IB_QPT_RC) {
420  if (wqe->length > 0x80000000U)
421  goto bail_inval;
422  } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)
423  goto bail_inval;
424  wqe->ssn = qp->s_ssn++;
425  qp->s_head = next;
426 
427  ret = 0;
428  goto bail;
429 
430 bail_inval:
431  ret = -EINVAL;
432 bail:
433  spin_unlock_irqrestore(&qp->s_lock, flags);
434  return ret;
435 }
436 
445 static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
446  struct ib_send_wr **bad_wr)
447 {
448  struct ipath_qp *qp = to_iqp(ibqp);
449  int err = 0;
450 
451  for (; wr; wr = wr->next) {
452  err = ipath_post_one_send(qp, wr);
453  if (err) {
454  *bad_wr = wr;
455  goto bail;
456  }
457  }
458 
459  /* Try to do the send work in the caller's context. */
460  ipath_do_send((unsigned long) qp);
461 
462 bail:
463  return err;
464 }
465 
474 static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
475  struct ib_recv_wr **bad_wr)
476 {
477  struct ipath_qp *qp = to_iqp(ibqp);
478  struct ipath_rwq *wq = qp->r_rq.wq;
479  unsigned long flags;
480  int ret;
481 
482  /* Check that state is OK to post receive. */
483  if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) {
484  *bad_wr = wr;
485  ret = -EINVAL;
486  goto bail;
487  }
488 
489  for (; wr; wr = wr->next) {
490  struct ipath_rwqe *wqe;
491  u32 next;
492  int i;
493 
494  if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
495  *bad_wr = wr;
496  ret = -EINVAL;
497  goto bail;
498  }
499 
500  spin_lock_irqsave(&qp->r_rq.lock, flags);
501  next = wq->head + 1;
502  if (next >= qp->r_rq.size)
503  next = 0;
504  if (next == wq->tail) {
505  spin_unlock_irqrestore(&qp->r_rq.lock, flags);
506  *bad_wr = wr;
507  ret = -ENOMEM;
508  goto bail;
509  }
510 
511  wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
512  wqe->wr_id = wr->wr_id;
513  wqe->num_sge = wr->num_sge;
514  for (i = 0; i < wr->num_sge; i++)
515  wqe->sg_list[i] = wr->sg_list[i];
516  /* Make sure queue entry is written before the head index. */
517  smp_wmb();
518  wq->head = next;
519  spin_unlock_irqrestore(&qp->r_rq.lock, flags);
520  }
521  ret = 0;
522 
523 bail:
524  return ret;
525 }
526 
540 static void ipath_qp_rcv(struct ipath_ibdev *dev,
541  struct ipath_ib_header *hdr, int has_grh,
542  void *data, u32 tlen, struct ipath_qp *qp)
543 {
544  /* Check for valid receive state. */
546  dev->n_pkt_drops++;
547  return;
548  }
549 
550  switch (qp->ibqp.qp_type) {
551  case IB_QPT_SMI:
552  case IB_QPT_GSI:
553  if (ib_ipath_disable_sma)
554  break;
555  /* FALLTHROUGH */
556  case IB_QPT_UD:
557  ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
558  break;
559 
560  case IB_QPT_RC:
561  ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp);
562  break;
563 
564  case IB_QPT_UC:
565  ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp);
566  break;
567 
568  default:
569  break;
570  }
571 }
572 
583 void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
584  u32 tlen)
585 {
586  struct ipath_ib_header *hdr = rhdr;
587  struct ipath_other_headers *ohdr;
588  struct ipath_qp *qp;
589  u32 qp_num;
590  int lnh;
591  u8 opcode;
592  u16 lid;
593 
594  if (unlikely(dev == NULL))
595  goto bail;
596 
597  if (unlikely(tlen < 24)) { /* LRH+BTH+CRC */
598  dev->rcv_errors++;
599  goto bail;
600  }
601 
602  /* Check for a valid destination LID (see ch. 7.11.1). */
603  lid = be16_to_cpu(hdr->lrh[1]);
604  if (lid < IPATH_MULTICAST_LID_BASE) {
605  lid &= ~((1 << dev->dd->ipath_lmc) - 1);
606  if (unlikely(lid != dev->dd->ipath_lid)) {
607  dev->rcv_errors++;
608  goto bail;
609  }
610  }
611 
612  /* Check for GRH */
613  lnh = be16_to_cpu(hdr->lrh[0]) & 3;
614  if (lnh == IPATH_LRH_BTH)
615  ohdr = &hdr->u.oth;
616  else if (lnh == IPATH_LRH_GRH)
617  ohdr = &hdr->u.l.oth;
618  else {
619  dev->rcv_errors++;
620  goto bail;
621  }
622 
623  opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
624  dev->opstats[opcode].n_bytes += tlen;
625  dev->opstats[opcode].n_packets++;
626 
627  /* Get the destination QP number. */
628  qp_num = be32_to_cpu(ohdr->bth[1]) & IPATH_QPN_MASK;
629  if (qp_num == IPATH_MULTICAST_QPN) {
630  struct ipath_mcast *mcast;
631  struct ipath_mcast_qp *p;
632 
633  if (lnh != IPATH_LRH_GRH) {
634  dev->n_pkt_drops++;
635  goto bail;
636  }
637  mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
638  if (mcast == NULL) {
639  dev->n_pkt_drops++;
640  goto bail;
641  }
642  dev->n_multicast_rcv++;
643  list_for_each_entry_rcu(p, &mcast->qp_list, list)
644  ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);
645  /*
646  * Notify ipath_multicast_detach() if it is waiting for us
647  * to finish.
648  */
649  if (atomic_dec_return(&mcast->refcount) <= 1)
650  wake_up(&mcast->wait);
651  } else {
652  qp = ipath_lookup_qpn(&dev->qp_table, qp_num);
653  if (qp) {
654  dev->n_unicast_rcv++;
655  ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data,
656  tlen, qp);
657  /*
658  * Notify ipath_destroy_qp() if it is waiting
659  * for us to finish.
660  */
661  if (atomic_dec_and_test(&qp->refcount))
662  wake_up(&qp->wait);
663  } else
664  dev->n_pkt_drops++;
665  }
666 
667 bail:;
668 }
669 
677 static void ipath_ib_timer(struct ipath_ibdev *dev)
678 {
679  struct ipath_qp *resend = NULL;
680  struct ipath_qp *rnr = NULL;
681  struct list_head *last;
682  struct ipath_qp *qp;
683  unsigned long flags;
684 
685  if (dev == NULL)
686  return;
687 
688  spin_lock_irqsave(&dev->pending_lock, flags);
689  /* Start filling the next pending queue. */
690  if (++dev->pending_index >= ARRAY_SIZE(dev->pending))
691  dev->pending_index = 0;
692  /* Save any requests still in the new queue, they have timed out. */
693  last = &dev->pending[dev->pending_index];
694  while (!list_empty(last)) {
695  qp = list_entry(last->next, struct ipath_qp, timerwait);
696  list_del_init(&qp->timerwait);
697  qp->timer_next = resend;
698  resend = qp;
699  atomic_inc(&qp->refcount);
700  }
701  last = &dev->rnrwait;
702  if (!list_empty(last)) {
703  qp = list_entry(last->next, struct ipath_qp, timerwait);
704  if (--qp->s_rnr_timeout == 0) {
705  do {
706  list_del_init(&qp->timerwait);
707  qp->timer_next = rnr;
708  rnr = qp;
709  atomic_inc(&qp->refcount);
710  if (list_empty(last))
711  break;
712  qp = list_entry(last->next, struct ipath_qp,
713  timerwait);
714  } while (qp->s_rnr_timeout == 0);
715  }
716  }
717  /*
718  * We should only be in the started state if pma_sample_start != 0
719  */
721  --dev->pma_sample_start == 0) {
724  &dev->ipath_rword,
725  &dev->ipath_spkts,
726  &dev->ipath_rpkts,
727  &dev->ipath_xmit_wait);
728  }
730  if (dev->pma_sample_interval == 0) {
731  u64 ta, tb, tc, td, te;
732 
734  ipath_snapshot_counters(dev->dd, &ta, &tb,
735  &tc, &td, &te);
736 
737  dev->ipath_sword = ta - dev->ipath_sword;
738  dev->ipath_rword = tb - dev->ipath_rword;
739  dev->ipath_spkts = tc - dev->ipath_spkts;
740  dev->ipath_rpkts = td - dev->ipath_rpkts;
741  dev->ipath_xmit_wait = te - dev->ipath_xmit_wait;
742  }
743  else
744  dev->pma_sample_interval--;
745  }
746  spin_unlock_irqrestore(&dev->pending_lock, flags);
747 
748  /* XXX What if timer fires again while this is running? */
749  while (resend != NULL) {
750  qp = resend;
751  resend = qp->timer_next;
752 
753  spin_lock_irqsave(&qp->s_lock, flags);
754  if (qp->s_last != qp->s_tail &&
756  dev->n_timeouts++;
757  ipath_restart_rc(qp, qp->s_last_psn + 1);
758  }
759  spin_unlock_irqrestore(&qp->s_lock, flags);
760 
761  /* Notify ipath_destroy_qp() if it is waiting. */
762  if (atomic_dec_and_test(&qp->refcount))
763  wake_up(&qp->wait);
764  }
765  while (rnr != NULL) {
766  qp = rnr;
767  rnr = qp->timer_next;
768 
769  spin_lock_irqsave(&qp->s_lock, flags);
770  if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
771  ipath_schedule_send(qp);
772  spin_unlock_irqrestore(&qp->s_lock, flags);
773 
774  /* Notify ipath_destroy_qp() if it is waiting. */
775  if (atomic_dec_and_test(&qp->refcount))
776  wake_up(&qp->wait);
777  }
778 }
779 
780 static void update_sge(struct ipath_sge_state *ss, u32 length)
781 {
782  struct ipath_sge *sge = &ss->sge;
783 
784  sge->vaddr += length;
785  sge->length -= length;
786  sge->sge_length -= length;
787  if (sge->sge_length == 0) {
788  if (--ss->num_sge)
789  *sge = *ss->sg_list++;
790  } else if (sge->length == 0 && sge->mr != NULL) {
791  if (++sge->n >= IPATH_SEGSZ) {
792  if (++sge->m >= sge->mr->mapsz)
793  return;
794  sge->n = 0;
795  }
796  sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
797  sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
798  }
799 }
800 
801 #ifdef __LITTLE_ENDIAN
802 static inline u32 get_upper_bits(u32 data, u32 shift)
803 {
804  return data >> shift;
805 }
806 
807 static inline u32 set_upper_bits(u32 data, u32 shift)
808 {
809  return data << shift;
810 }
811 
812 static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
813 {
814  data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
815  data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
816  return data;
817 }
818 #else
819 static inline u32 get_upper_bits(u32 data, u32 shift)
820 {
821  return data << shift;
822 }
823 
824 static inline u32 set_upper_bits(u32 data, u32 shift)
825 {
826  return data >> shift;
827 }
828 
829 static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
830 {
831  data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
832  data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
833  return data;
834 }
835 #endif
836 
837 static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
838  u32 length, unsigned flush_wc)
839 {
840  u32 extra = 0;
841  u32 data = 0;
842  u32 last;
843 
844  while (1) {
845  u32 len = ss->sge.length;
846  u32 off;
847 
848  if (len > length)
849  len = length;
850  if (len > ss->sge.sge_length)
851  len = ss->sge.sge_length;
852  BUG_ON(len == 0);
853  /* If the source address is not aligned, try to align it. */
854  off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
855  if (off) {
856  u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
857  ~(sizeof(u32) - 1));
858  u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
859  u32 y;
860 
861  y = sizeof(u32) - off;
862  if (len > y)
863  len = y;
864  if (len + extra >= sizeof(u32)) {
865  data |= set_upper_bits(v, extra *
866  BITS_PER_BYTE);
867  len = sizeof(u32) - extra;
868  if (len == length) {
869  last = data;
870  break;
871  }
872  __raw_writel(data, piobuf);
873  piobuf++;
874  extra = 0;
875  data = 0;
876  } else {
877  /* Clear unused upper bytes */
878  data |= clear_upper_bytes(v, len, extra);
879  if (len == length) {
880  last = data;
881  break;
882  }
883  extra += len;
884  }
885  } else if (extra) {
886  /* Source address is aligned. */
887  u32 *addr = (u32 *) ss->sge.vaddr;
888  int shift = extra * BITS_PER_BYTE;
889  int ushift = 32 - shift;
890  u32 l = len;
891 
892  while (l >= sizeof(u32)) {
893  u32 v = *addr;
894 
895  data |= set_upper_bits(v, shift);
896  __raw_writel(data, piobuf);
897  data = get_upper_bits(v, ushift);
898  piobuf++;
899  addr++;
900  l -= sizeof(u32);
901  }
902  /*
903  * We still have 'extra' number of bytes leftover.
904  */
905  if (l) {
906  u32 v = *addr;
907 
908  if (l + extra >= sizeof(u32)) {
909  data |= set_upper_bits(v, shift);
910  len -= l + extra - sizeof(u32);
911  if (len == length) {
912  last = data;
913  break;
914  }
915  __raw_writel(data, piobuf);
916  piobuf++;
917  extra = 0;
918  data = 0;
919  } else {
920  /* Clear unused upper bytes */
921  data |= clear_upper_bytes(v, l,
922  extra);
923  if (len == length) {
924  last = data;
925  break;
926  }
927  extra += l;
928  }
929  } else if (len == length) {
930  last = data;
931  break;
932  }
933  } else if (len == length) {
934  u32 w;
935 
936  /*
937  * Need to round up for the last dword in the
938  * packet.
939  */
940  w = (len + 3) >> 2;
941  __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
942  piobuf += w - 1;
943  last = ((u32 *) ss->sge.vaddr)[w - 1];
944  break;
945  } else {
946  u32 w = len >> 2;
947 
948  __iowrite32_copy(piobuf, ss->sge.vaddr, w);
949  piobuf += w;
950 
951  extra = len & (sizeof(u32) - 1);
952  if (extra) {
953  u32 v = ((u32 *) ss->sge.vaddr)[w];
954 
955  /* Clear unused upper bytes */
956  data = clear_upper_bytes(v, extra, 0);
957  }
958  }
959  update_sge(ss, len);
960  length -= len;
961  }
962  /* Update address before sending packet. */
963  update_sge(ss, length);
964  if (flush_wc) {
965  /* must flush early everything before trigger word */
966  ipath_flush_wc();
967  __raw_writel(last, piobuf);
968  /* be sure trigger word is written */
969  ipath_flush_wc();
970  } else
971  __raw_writel(last, piobuf);
972 }
973 
974 /*
975  * Convert IB rate to delay multiplier.
976  */
978 {
979  switch (rate) {
980  case IB_RATE_2_5_GBPS: return 8;
981  case IB_RATE_5_GBPS: return 4;
982  case IB_RATE_10_GBPS: return 2;
983  case IB_RATE_20_GBPS: return 1;
984  default: return 0;
985  }
986 }
987 
988 /*
989  * Convert delay multiplier to IB rate
990  */
991 static enum ib_rate ipath_mult_to_ib_rate(unsigned mult)
992 {
993  switch (mult) {
994  case 8: return IB_RATE_2_5_GBPS;
995  case 4: return IB_RATE_5_GBPS;
996  case 2: return IB_RATE_10_GBPS;
997  case 1: return IB_RATE_20_GBPS;
998  default: return IB_RATE_PORT_CURRENT;
999  }
1000 }
1001 
1002 static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev)
1003 {
1004  struct ipath_verbs_txreq *tx = NULL;
1005  unsigned long flags;
1006 
1007  spin_lock_irqsave(&dev->pending_lock, flags);
1008  if (!list_empty(&dev->txreq_free)) {
1009  struct list_head *l = dev->txreq_free.next;
1010 
1011  list_del(l);
1012  tx = list_entry(l, struct ipath_verbs_txreq, txreq.list);
1013  }
1014  spin_unlock_irqrestore(&dev->pending_lock, flags);
1015  return tx;
1016 }
1017 
1018 static inline void put_txreq(struct ipath_ibdev *dev,
1019  struct ipath_verbs_txreq *tx)
1020 {
1021  unsigned long flags;
1022 
1023  spin_lock_irqsave(&dev->pending_lock, flags);
1024  list_add(&tx->txreq.list, &dev->txreq_free);
1025  spin_unlock_irqrestore(&dev->pending_lock, flags);
1026 }
1027 
1028 static void sdma_complete(void *cookie, int status)
1029 {
1030  struct ipath_verbs_txreq *tx = cookie;
1031  struct ipath_qp *qp = tx->qp;
1032  struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
1033  unsigned long flags;
1034  enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
1036 
1037  if (atomic_dec_and_test(&qp->s_dma_busy)) {
1038  spin_lock_irqsave(&qp->s_lock, flags);
1039  if (tx->wqe)
1040  ipath_send_complete(qp, tx->wqe, ibs);
1042  qp->s_last != qp->s_head) ||
1043  (qp->s_flags & IPATH_S_WAIT_DMA))
1044  ipath_schedule_send(qp);
1045  spin_unlock_irqrestore(&qp->s_lock, flags);
1046  wake_up(&qp->wait_dma);
1047  } else if (tx->wqe) {
1048  spin_lock_irqsave(&qp->s_lock, flags);
1049  ipath_send_complete(qp, tx->wqe, ibs);
1050  spin_unlock_irqrestore(&qp->s_lock, flags);
1051  }
1052 
1053  if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
1054  kfree(tx->txreq.map_addr);
1055  put_txreq(dev, tx);
1056 
1057  if (atomic_dec_and_test(&qp->refcount))
1058  wake_up(&qp->wait);
1059 }
1060 
1061 static void decrement_dma_busy(struct ipath_qp *qp)
1062 {
1063  unsigned long flags;
1064 
1065  if (atomic_dec_and_test(&qp->s_dma_busy)) {
1066  spin_lock_irqsave(&qp->s_lock, flags);
1068  qp->s_last != qp->s_head) ||
1069  (qp->s_flags & IPATH_S_WAIT_DMA))
1070  ipath_schedule_send(qp);
1071  spin_unlock_irqrestore(&qp->s_lock, flags);
1072  wake_up(&qp->wait_dma);
1073  }
1074 }
1075 
1076 /*
1077  * Compute the number of clock cycles of delay before sending the next packet.
1078  * The multipliers reflect the number of clocks for the fastest rate so
1079  * one tick at 4xDDR is 8 ticks at 1xSDR.
1080  * If the destination port will take longer to receive a packet than
1081  * the outgoing link can send it, we need to delay sending the next packet
1082  * by the difference in time it takes the receiver to receive and the sender
1083  * to send this packet.
1084  * Note that this delay is always correct for UC and RC but not always
1085  * optimal for UD. For UD, the destination HCA can be different for each
1086  * packet, in which case, we could send packets to a different destination
1087  * while "waiting" for the delay. The overhead for doing this without
1088  * HW support is more than just paying the cost of delaying some packets
1089  * unnecessarily.
1090  */
1091 static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult)
1092 {
1093  return (rcv_mult > snd_mult) ?
1094  (plen * (rcv_mult - snd_mult) + 1) >> 1 : 0;
1095 }
1096 
1097 static int ipath_verbs_send_dma(struct ipath_qp *qp,
1098  struct ipath_ib_header *hdr, u32 hdrwords,
1099  struct ipath_sge_state *ss, u32 len,
1100  u32 plen, u32 dwords)
1101 {
1102  struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
1103  struct ipath_devdata *dd = dev->dd;
1104  struct ipath_verbs_txreq *tx;
1105  u32 *piobuf;
1106  u32 control;
1107  u32 ndesc;
1108  int ret;
1109 
1110  tx = qp->s_tx;
1111  if (tx) {
1112  qp->s_tx = NULL;
1113  /* resend previously constructed packet */
1114  atomic_inc(&qp->s_dma_busy);
1115  ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);
1116  if (ret) {
1117  qp->s_tx = tx;
1118  decrement_dma_busy(qp);
1119  }
1120  goto bail;
1121  }
1122 
1123  tx = get_txreq(dev);
1124  if (!tx) {
1125  ret = -EBUSY;
1126  goto bail;
1127  }
1128 
1129  /*
1130  * Get the saved delay count we computed for the previous packet
1131  * and save the delay count for this packet to be used next time
1132  * we get here.
1133  */
1134  control = qp->s_pkt_delay;
1135  qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
1136 
1137  tx->qp = qp;
1138  atomic_inc(&qp->refcount);
1139  tx->wqe = qp->s_wqe;
1140  tx->txreq.callback = sdma_complete;
1141  tx->txreq.callback_cookie = tx;
1142  tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST |
1144  if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
1146 
1147  /* VL15 packets bypass credit check */
1148  if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) {
1149  control |= 1ULL << 31;
1150  tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15;
1151  }
1152 
1153  if (len) {
1154  /*
1155  * Don't try to DMA if it takes more descriptors than
1156  * the queue holds.
1157  */
1158  ndesc = ipath_count_sge(ss, len);
1159  if (ndesc >= dd->ipath_sdma_descq_cnt)
1160  ndesc = 0;
1161  } else
1162  ndesc = 1;
1163  if (ndesc) {
1164  tx->hdr.pbc[0] = cpu_to_le32(plen);
1165  tx->hdr.pbc[1] = cpu_to_le32(control);
1166  memcpy(&tx->hdr.hdr, hdr, hdrwords << 2);
1167  tx->txreq.sg_count = ndesc;
1168  tx->map_len = (hdrwords + 2) << 2;
1169  tx->txreq.map_addr = &tx->hdr;
1170  atomic_inc(&qp->s_dma_busy);
1171  ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);
1172  if (ret) {
1173  /* save ss and length in dwords */
1174  tx->ss = ss;
1175  tx->len = dwords;
1176  qp->s_tx = tx;
1177  decrement_dma_busy(qp);
1178  }
1179  goto bail;
1180  }
1181 
1182  /* Allocate a buffer and copy the header and payload to it. */
1183  tx->map_len = (plen + 1) << 2;
1184  piobuf = kmalloc(tx->map_len, GFP_ATOMIC);
1185  if (unlikely(piobuf == NULL)) {
1186  ret = -EBUSY;
1187  goto err_tx;
1188  }
1189  tx->txreq.map_addr = piobuf;
1190  tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF;
1191  tx->txreq.sg_count = 1;
1192 
1193  *piobuf++ = (__force u32) cpu_to_le32(plen);
1194  *piobuf++ = (__force u32) cpu_to_le32(control);
1195  memcpy(piobuf, hdr, hdrwords << 2);
1196  ipath_copy_from_sge(piobuf + hdrwords, ss, len);
1197 
1198  atomic_inc(&qp->s_dma_busy);
1199  ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);
1200  /*
1201  * If we couldn't queue the DMA request, save the info
1202  * and try again later rather than destroying the
1203  * buffer and undoing the side effects of the copy.
1204  */
1205  if (ret) {
1206  tx->ss = NULL;
1207  tx->len = 0;
1208  qp->s_tx = tx;
1209  decrement_dma_busy(qp);
1210  }
1211  dev->n_unaligned++;
1212  goto bail;
1213 
1214 err_tx:
1215  if (atomic_dec_and_test(&qp->refcount))
1216  wake_up(&qp->wait);
1217  put_txreq(dev, tx);
1218 bail:
1219  return ret;
1220 }
1221 
1222 static int ipath_verbs_send_pio(struct ipath_qp *qp,
1223  struct ipath_ib_header *ibhdr, u32 hdrwords,
1224  struct ipath_sge_state *ss, u32 len,
1225  u32 plen, u32 dwords)
1226 {
1227  struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
1228  u32 *hdr = (u32 *) ibhdr;
1229  u32 __iomem *piobuf;
1230  unsigned flush_wc;
1231  u32 control;
1232  int ret;
1233  unsigned long flags;
1234 
1235  piobuf = ipath_getpiobuf(dd, plen, NULL);
1236  if (unlikely(piobuf == NULL)) {
1237  ret = -EBUSY;
1238  goto bail;
1239  }
1240 
1241  /*
1242  * Get the saved delay count we computed for the previous packet
1243  * and save the delay count for this packet to be used next time
1244  * we get here.
1245  */
1246  control = qp->s_pkt_delay;
1247  qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
1248 
1249  /* VL15 packets bypass credit check */
1250  if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15)
1251  control |= 1ULL << 31;
1252 
1253  /*
1254  * Write the length to the control qword plus any needed flags.
1255  * We have to flush after the PBC for correctness on some cpus
1256  * or WC buffer can be written out of order.
1257  */
1258  writeq(((u64) control << 32) | plen, piobuf);
1259  piobuf += 2;
1260 
1261  flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
1262  if (len == 0) {
1263  /*
1264  * If there is just the header portion, must flush before
1265  * writing last word of header for correctness, and after
1266  * the last header word (trigger word).
1267  */
1268  if (flush_wc) {
1269  ipath_flush_wc();
1270  __iowrite32_copy(piobuf, hdr, hdrwords - 1);
1271  ipath_flush_wc();
1272  __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
1273  ipath_flush_wc();
1274  } else
1275  __iowrite32_copy(piobuf, hdr, hdrwords);
1276  goto done;
1277  }
1278 
1279  if (flush_wc)
1280  ipath_flush_wc();
1281  __iowrite32_copy(piobuf, hdr, hdrwords);
1282  piobuf += hdrwords;
1283 
1284  /* The common case is aligned and contained in one segment. */
1285  if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
1286  !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
1287  u32 *addr = (u32 *) ss->sge.vaddr;
1288 
1289  /* Update address before sending packet. */
1290  update_sge(ss, len);
1291  if (flush_wc) {
1292  __iowrite32_copy(piobuf, addr, dwords - 1);
1293  /* must flush early everything before trigger word */
1294  ipath_flush_wc();
1295  __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
1296  /* be sure trigger word is written */
1297  ipath_flush_wc();
1298  } else
1299  __iowrite32_copy(piobuf, addr, dwords);
1300  goto done;
1301  }
1302  copy_io(piobuf, ss, len, flush_wc);
1303 done:
1304  if (qp->s_wqe) {
1305  spin_lock_irqsave(&qp->s_lock, flags);
1307  spin_unlock_irqrestore(&qp->s_lock, flags);
1308  }
1309  ret = 0;
1310 bail:
1311  return ret;
1312 }
1313 
1322 int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
1323  u32 hdrwords, struct ipath_sge_state *ss, u32 len)
1324 {
1325  struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
1326  u32 plen;
1327  int ret;
1328  u32 dwords = (len + 3) >> 2;
1329 
1330  /*
1331  * Calculate the send buffer trigger address.
1332  * The +1 counts for the pbc control dword following the pbc length.
1333  */
1334  plen = hdrwords + dwords + 1;
1335 
1336  /*
1337  * VL15 packets (IB_QPT_SMI) will always use PIO, so we
1338  * can defer SDMA restart until link goes ACTIVE without
1339  * worrying about just how we got there.
1340  */
1341  if (qp->ibqp.qp_type == IB_QPT_SMI ||
1343  ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
1344  plen, dwords);
1345  else
1346  ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
1347  plen, dwords);
1348 
1349  return ret;
1350 }
1351 
1353  u64 *rwords, u64 *spkts, u64 *rpkts,
1354  u64 *xmit_wait)
1355 {
1356  int ret;
1357 
1358  if (!(dd->ipath_flags & IPATH_INITTED)) {
1359  /* no hardware, freeze, etc. */
1360  ret = -EINVAL;
1361  goto bail;
1362  }
1363  *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
1364  *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
1365  *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
1366  *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
1367  *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
1368 
1369  ret = 0;
1370 
1371 bail:
1372  return ret;
1373 }
1374 
1383  struct ipath_verbs_counters *cntrs)
1384 {
1385  struct ipath_cregs const *crp = dd->ipath_cregs;
1386  int ret;
1387 
1388  if (!(dd->ipath_flags & IPATH_INITTED)) {
1389  /* no hardware, freeze, etc. */
1390  ret = -EINVAL;
1391  goto bail;
1392  }
1393  cntrs->symbol_error_counter =
1397  /*
1398  * The link downed counter counts when the other side downs the
1399  * connection. We add in the number of times we downed the link
1400  * due to local link integrity errors to compensate.
1401  */
1402  cntrs->link_downed_counter =
1404  cntrs->port_rcv_errors =
1405  ipath_snap_cntr(dd, crp->cr_rxdroppktcnt) +
1406  ipath_snap_cntr(dd, crp->cr_rcvovflcnt) +
1407  ipath_snap_cntr(dd, crp->cr_portovflcnt) +
1408  ipath_snap_cntr(dd, crp->cr_err_rlencnt) +
1410  ipath_snap_cntr(dd, crp->cr_errlinkcnt) +
1411  ipath_snap_cntr(dd, crp->cr_erricrccnt) +
1412  ipath_snap_cntr(dd, crp->cr_errvcrccnt) +
1413  ipath_snap_cntr(dd, crp->cr_errlpcrccnt) +
1414  ipath_snap_cntr(dd, crp->cr_badformatcnt) +
1416  if (crp->cr_rxotherlocalphyerrcnt)
1417  cntrs->port_rcv_errors +=
1419  if (crp->cr_rxvlerrcnt)
1420  cntrs->port_rcv_errors +=
1421  ipath_snap_cntr(dd, crp->cr_rxvlerrcnt);
1422  cntrs->port_rcv_remphys_errors =
1423  ipath_snap_cntr(dd, crp->cr_rcvebpcnt);
1425  cntrs->port_xmit_data = ipath_snap_cntr(dd, crp->cr_wordsendcnt);
1426  cntrs->port_rcv_data = ipath_snap_cntr(dd, crp->cr_wordrcvcnt);
1427  cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt);
1428  cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt);
1432  ((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
1433  dd->ipath_lli_errs : dd->ipath_lli_errors);
1435  crp->cr_excessbufferovflcnt ?
1438  cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ?
1439  ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0;
1440 
1441  ret = 0;
1442 
1443 bail:
1444  return ret;
1445 }
1446 
1458 {
1459  struct list_head *list;
1460  struct ipath_qp *qplist;
1461  struct ipath_qp *qp;
1462  unsigned long flags;
1463 
1464  if (dev == NULL)
1465  goto bail;
1466 
1467  list = &dev->piowait;
1468  qplist = NULL;
1469 
1470  spin_lock_irqsave(&dev->pending_lock, flags);
1471  while (!list_empty(list)) {
1472  qp = list_entry(list->next, struct ipath_qp, piowait);
1473  list_del_init(&qp->piowait);
1474  qp->pio_next = qplist;
1475  qplist = qp;
1476  atomic_inc(&qp->refcount);
1477  }
1478  spin_unlock_irqrestore(&dev->pending_lock, flags);
1479 
1480  while (qplist != NULL) {
1481  qp = qplist;
1482  qplist = qp->pio_next;
1483 
1484  spin_lock_irqsave(&qp->s_lock, flags);
1485  if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
1486  ipath_schedule_send(qp);
1487  spin_unlock_irqrestore(&qp->s_lock, flags);
1488 
1489  /* Notify ipath_destroy_qp() if it is waiting. */
1490  if (atomic_dec_and_test(&qp->refcount))
1491  wake_up(&qp->wait);
1492  }
1493 
1494 bail:
1495  return 0;
1496 }
1497 
1498 static int ipath_query_device(struct ib_device *ibdev,
1499  struct ib_device_attr *props)
1500 {
1501  struct ipath_ibdev *dev = to_idev(ibdev);
1502 
1503  memset(props, 0, sizeof(*props));
1504 
1509  props->page_size_cap = PAGE_SIZE;
1510  props->vendor_id =
1512  props->vendor_part_id = dev->dd->ipath_deviceid;
1513  props->hw_ver = dev->dd->ipath_pcirev;
1514 
1515  props->sys_image_guid = dev->sys_image_guid;
1516 
1517  props->max_mr_size = ~0ull;
1518  props->max_qp = ib_ipath_max_qps;
1519  props->max_qp_wr = ib_ipath_max_qp_wrs;
1520  props->max_sge = ib_ipath_max_sges;
1521  props->max_cq = ib_ipath_max_cqs;
1522  props->max_ah = ib_ipath_max_ahs;
1523  props->max_cqe = ib_ipath_max_cqes;
1524  props->max_mr = dev->lk_table.max;
1525  props->max_fmr = dev->lk_table.max;
1526  props->max_map_per_fmr = 32767;
1527  props->max_pd = ib_ipath_max_pds;
1529  props->max_qp_init_rd_atom = 255;
1530  /* props->max_res_rd_atom */
1531  props->max_srq = ib_ipath_max_srqs;
1534  /* props->local_ca_ack_delay */
1535  props->atomic_cap = IB_ATOMIC_GLOB;
1536  props->max_pkeys = ipath_get_npkeys(dev->dd);
1540  props->max_mcast_grp;
1541 
1542  return 0;
1543 }
1544 
1565  [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN,
1566  [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN,
1567  [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN,
1568  [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN,
1569  [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN,
1570  [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN,
1571  [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN,
1573 };
1574 
1576 {
1577  return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
1578 }
1579 
1580 static int ipath_query_port(struct ib_device *ibdev,
1581  u8 port, struct ib_port_attr *props)
1582 {
1583  struct ipath_ibdev *dev = to_idev(ibdev);
1584  struct ipath_devdata *dd = dev->dd;
1585  enum ib_mtu mtu;
1586  u16 lid = dd->ipath_lid;
1587  u64 ibcstat;
1588 
1589  memset(props, 0, sizeof(*props));
1590  props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
1591  props->lmc = dd->ipath_lmc;
1592  props->sm_lid = dev->sm_lid;
1593  props->sm_sl = dev->sm_sl;
1594  ibcstat = dd->ipath_lastibcstat;
1595  /* map LinkState to IB portinfo values. */
1596  props->state = ipath_ib_linkstate(dd, ibcstat) + 1;
1597 
1598  /* See phys_state_show() */
1599  props->phys_state = /* MEA: assumes shift == 0 */
1600  ipath_cvt_physportstate[dd->ipath_lastibcstat &
1601  dd->ibcs_lts_mask];
1602  props->port_cap_flags = dev->port_cap_flags;
1603  props->gid_tbl_len = 1;
1604  props->max_msg_sz = 0x80000000;
1605  props->pkey_tbl_len = ipath_get_npkeys(dd);
1606  props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) -
1607  dev->z_pkey_violations;
1608  props->qkey_viol_cntr = dev->qkey_violations;
1610  /* See rate_show() */
1612  props->max_vl_num = 1; /* VLCap = VL0 */
1613  props->init_type_reply = 0;
1614 
1616  switch (dd->ipath_ibmtu) {
1617  case 4096:
1618  mtu = IB_MTU_4096;
1619  break;
1620  case 2048:
1621  mtu = IB_MTU_2048;
1622  break;
1623  case 1024:
1624  mtu = IB_MTU_1024;
1625  break;
1626  case 512:
1627  mtu = IB_MTU_512;
1628  break;
1629  case 256:
1630  mtu = IB_MTU_256;
1631  break;
1632  default:
1633  mtu = IB_MTU_2048;
1634  }
1635  props->active_mtu = mtu;
1636  props->subnet_timeout = dev->subnet_timeout;
1637 
1638  return 0;
1639 }
1640 
1641 static int ipath_modify_device(struct ib_device *device,
1642  int device_modify_mask,
1643  struct ib_device_modify *device_modify)
1644 {
1645  int ret;
1646 
1647  if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
1649  ret = -EOPNOTSUPP;
1650  goto bail;
1651  }
1652 
1653  if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC)
1654  memcpy(device->node_desc, device_modify->node_desc, 64);
1655 
1656  if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
1657  to_idev(device)->sys_image_guid =
1658  cpu_to_be64(device_modify->sys_image_guid);
1659 
1660  ret = 0;
1661 
1662 bail:
1663  return ret;
1664 }
1665 
1666 static int ipath_modify_port(struct ib_device *ibdev,
1667  u8 port, int port_modify_mask,
1668  struct ib_port_modify *props)
1669 {
1670  struct ipath_ibdev *dev = to_idev(ibdev);
1671 
1672  dev->port_cap_flags |= props->set_port_cap_mask;
1673  dev->port_cap_flags &= ~props->clr_port_cap_mask;
1674  if (port_modify_mask & IB_PORT_SHUTDOWN)
1676  if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
1677  dev->qkey_violations = 0;
1678  return 0;
1679 }
1680 
1681 static int ipath_query_gid(struct ib_device *ibdev, u8 port,
1682  int index, union ib_gid *gid)
1683 {
1684  struct ipath_ibdev *dev = to_idev(ibdev);
1685  int ret;
1686 
1687  if (index >= 1) {
1688  ret = -EINVAL;
1689  goto bail;
1690  }
1691  gid->global.subnet_prefix = dev->gid_prefix;
1692  gid->global.interface_id = dev->dd->ipath_guid;
1693 
1694  ret = 0;
1695 
1696 bail:
1697  return ret;
1698 }
1699 
1700 static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
1701  struct ib_ucontext *context,
1702  struct ib_udata *udata)
1703 {
1704  struct ipath_ibdev *dev = to_idev(ibdev);
1705  struct ipath_pd *pd;
1706  struct ib_pd *ret;
1707 
1708  /*
1709  * This is actually totally arbitrary. Some correctness tests
1710  * assume there's a maximum number of PDs that can be allocated.
1711  * We don't actually have this limit, but we fail the test if
1712  * we allow allocations of more than we report for this value.
1713  */
1714 
1715  pd = kmalloc(sizeof *pd, GFP_KERNEL);
1716  if (!pd) {
1717  ret = ERR_PTR(-ENOMEM);
1718  goto bail;
1719  }
1720 
1721  spin_lock(&dev->n_pds_lock);
1722  if (dev->n_pds_allocated == ib_ipath_max_pds) {
1723  spin_unlock(&dev->n_pds_lock);
1724  kfree(pd);
1725  ret = ERR_PTR(-ENOMEM);
1726  goto bail;
1727  }
1728 
1729  dev->n_pds_allocated++;
1730  spin_unlock(&dev->n_pds_lock);
1731 
1732  /* ib_alloc_pd() will initialize pd->ibpd. */
1733  pd->user = udata != NULL;
1734 
1735  ret = &pd->ibpd;
1736 
1737 bail:
1738  return ret;
1739 }
1740 
1741 static int ipath_dealloc_pd(struct ib_pd *ibpd)
1742 {
1743  struct ipath_pd *pd = to_ipd(ibpd);
1744  struct ipath_ibdev *dev = to_idev(ibpd->device);
1745 
1746  spin_lock(&dev->n_pds_lock);
1747  dev->n_pds_allocated--;
1748  spin_unlock(&dev->n_pds_lock);
1749 
1750  kfree(pd);
1751 
1752  return 0;
1753 }
1754 
1762 static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
1763  struct ib_ah_attr *ah_attr)
1764 {
1765  struct ipath_ah *ah;
1766  struct ib_ah *ret;
1767  struct ipath_ibdev *dev = to_idev(pd->device);
1768  unsigned long flags;
1769 
1770  /* A multicast address requires a GRH (see ch. 8.4.1). */
1771  if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
1772  ah_attr->dlid != IPATH_PERMISSIVE_LID &&
1773  !(ah_attr->ah_flags & IB_AH_GRH)) {
1774  ret = ERR_PTR(-EINVAL);
1775  goto bail;
1776  }
1777 
1778  if (ah_attr->dlid == 0) {
1779  ret = ERR_PTR(-EINVAL);
1780  goto bail;
1781  }
1782 
1783  if (ah_attr->port_num < 1 ||
1784  ah_attr->port_num > pd->device->phys_port_cnt) {
1785  ret = ERR_PTR(-EINVAL);
1786  goto bail;
1787  }
1788 
1789  ah = kmalloc(sizeof *ah, GFP_ATOMIC);
1790  if (!ah) {
1791  ret = ERR_PTR(-ENOMEM);
1792  goto bail;
1793  }
1794 
1795  spin_lock_irqsave(&dev->n_ahs_lock, flags);
1796  if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
1797  spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1798  kfree(ah);
1799  ret = ERR_PTR(-ENOMEM);
1800  goto bail;
1801  }
1802 
1803  dev->n_ahs_allocated++;
1804  spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1805 
1806  /* ib_create_ah() will initialize ah->ibah. */
1807  ah->attr = *ah_attr;
1808  ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate);
1809 
1810  ret = &ah->ibah;
1811 
1812 bail:
1813  return ret;
1814 }
1815 
1822 static int ipath_destroy_ah(struct ib_ah *ibah)
1823 {
1824  struct ipath_ibdev *dev = to_idev(ibah->device);
1825  struct ipath_ah *ah = to_iah(ibah);
1826  unsigned long flags;
1827 
1828  spin_lock_irqsave(&dev->n_ahs_lock, flags);
1829  dev->n_ahs_allocated--;
1830  spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1831 
1832  kfree(ah);
1833 
1834  return 0;
1835 }
1836 
1837 static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
1838 {
1839  struct ipath_ah *ah = to_iah(ibah);
1840 
1841  *ah_attr = ah->attr;
1842  ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate);
1843 
1844  return 0;
1845 }
1846 
1851 unsigned ipath_get_npkeys(struct ipath_devdata *dd)
1852 {
1853  return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
1854 }
1855 
1861 unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
1862 {
1863  unsigned ret;
1864 
1865  /* always a kernel port, no locking needed */
1866  if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
1867  ret = 0;
1868  else
1869  ret = dd->ipath_pd[0]->port_pkeys[index];
1870 
1871  return ret;
1872 }
1873 
1874 static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
1875  u16 *pkey)
1876 {
1877  struct ipath_ibdev *dev = to_idev(ibdev);
1878  int ret;
1879 
1880  if (index >= ipath_get_npkeys(dev->dd)) {
1881  ret = -EINVAL;
1882  goto bail;
1883  }
1884 
1885  *pkey = ipath_get_pkey(dev->dd, index);
1886  ret = 0;
1887 
1888 bail:
1889  return ret;
1890 }
1891 
1898 static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev,
1899  struct ib_udata *udata)
1900 {
1901  struct ipath_ucontext *context;
1902  struct ib_ucontext *ret;
1903 
1904  context = kmalloc(sizeof *context, GFP_KERNEL);
1905  if (!context) {
1906  ret = ERR_PTR(-ENOMEM);
1907  goto bail;
1908  }
1909 
1910  ret = &context->ibucontext;
1911 
1912 bail:
1913  return ret;
1914 }
1915 
1916 static int ipath_dealloc_ucontext(struct ib_ucontext *context)
1917 {
1918  kfree(to_iucontext(context));
1919  return 0;
1920 }
1921 
1922 static int ipath_verbs_register_sysfs(struct ib_device *dev);
1923 
1924 static void __verbs_timer(unsigned long arg)
1925 {
1926  struct ipath_devdata *dd = (struct ipath_devdata *) arg;
1927 
1928  /* Handle verbs layer timeouts. */
1929  ipath_ib_timer(dd->verbs_dev);
1930 
1931  mod_timer(&dd->verbs_timer, jiffies + 1);
1932 }
1933 
1934 static int enable_timer(struct ipath_devdata *dd)
1935 {
1936  /*
1937  * Early chips had a design flaw where the chip and kernel idea
1938  * of the tail register don't always agree, and therefore we won't
1939  * get an interrupt on the next packet received.
1940  * If the board supports per packet receive interrupts, use it.
1941  * Otherwise, the timer function periodically checks for packets
1942  * to cover this case.
1943  * Either way, the timer is needed for verbs layer related
1944  * processing.
1945  */
1946  if (dd->ipath_flags & IPATH_GPIO_INTR) {
1947  ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
1948  0x2074076542310ULL);
1949  /* Enable GPIO bit 2 interrupt */
1950  dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
1951  ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1952  dd->ipath_gpio_mask);
1953  }
1954 
1955  init_timer(&dd->verbs_timer);
1956  dd->verbs_timer.function = __verbs_timer;
1957  dd->verbs_timer.data = (unsigned long)dd;
1958  dd->verbs_timer.expires = jiffies + 1;
1959  add_timer(&dd->verbs_timer);
1960 
1961  return 0;
1962 }
1963 
1964 static int disable_timer(struct ipath_devdata *dd)
1965 {
1966  /* Disable GPIO bit 2 interrupt */
1967  if (dd->ipath_flags & IPATH_GPIO_INTR) {
1968  /* Disable GPIO bit 2 interrupt */
1969  dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
1970  ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1971  dd->ipath_gpio_mask);
1972  /*
1973  * We might want to undo changes to debugportselect,
1974  * but how?
1975  */
1976  }
1977 
1979 
1980  return 0;
1981 }
1982 
1989 {
1990  struct ipath_verbs_counters cntrs;
1991  struct ipath_ibdev *idev;
1992  struct ib_device *dev;
1993  struct ipath_verbs_txreq *tx;
1994  unsigned i;
1995  int ret;
1996 
1997  idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
1998  if (idev == NULL) {
1999  ret = -ENOMEM;
2000  goto bail;
2001  }
2002 
2003  dev = &idev->ibdev;
2004 
2005  if (dd->ipath_sdma_descq_cnt) {
2006  tx = kmalloc(dd->ipath_sdma_descq_cnt * sizeof *tx,
2007  GFP_KERNEL);
2008  if (tx == NULL) {
2009  ret = -ENOMEM;
2010  goto err_tx;
2011  }
2012  } else
2013  tx = NULL;
2014  idev->txreq_bufs = tx;
2015 
2016  /* Only need to initialize non-zero fields. */
2017  spin_lock_init(&idev->n_pds_lock);
2018  spin_lock_init(&idev->n_ahs_lock);
2019  spin_lock_init(&idev->n_cqs_lock);
2020  spin_lock_init(&idev->n_qps_lock);
2021  spin_lock_init(&idev->n_srqs_lock);
2023 
2024  spin_lock_init(&idev->qp_table.lock);
2025  spin_lock_init(&idev->lk_table.lock);
2027  /* Set the prefix to the default value (see ch. 4.1.1) */
2028  idev->gid_prefix = __constant_cpu_to_be64(0xfe80000000000000ULL);
2029 
2030  ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size);
2031  if (ret)
2032  goto err_qp;
2033 
2034  /*
2035  * The top ib_ipath_lkey_table_size bits are used to index the
2036  * table. The lower 8 bits can be owned by the user (copied from
2037  * the LKEY). The remaining bits act as a generation number or tag.
2038  */
2039  idev->lk_table.max = 1 << ib_ipath_lkey_table_size;
2040  idev->lk_table.table = kzalloc(idev->lk_table.max *
2041  sizeof(*idev->lk_table.table),
2042  GFP_KERNEL);
2043  if (idev->lk_table.table == NULL) {
2044  ret = -ENOMEM;
2045  goto err_lk;
2046  }
2047  INIT_LIST_HEAD(&idev->pending_mmaps);
2048  spin_lock_init(&idev->pending_lock);
2049  idev->mmap_offset = PAGE_SIZE;
2051  INIT_LIST_HEAD(&idev->pending[0]);
2052  INIT_LIST_HEAD(&idev->pending[1]);
2053  INIT_LIST_HEAD(&idev->pending[2]);
2054  INIT_LIST_HEAD(&idev->piowait);
2055  INIT_LIST_HEAD(&idev->rnrwait);
2056  INIT_LIST_HEAD(&idev->txreq_free);
2057  idev->pending_index = 0;
2058  idev->port_cap_flags =
2067 
2068  /* Snapshot current HW counters to "clear" them. */
2069  ipath_get_counters(dd, &cntrs);
2074  idev->z_port_rcv_errors = cntrs.port_rcv_errors;
2078  idev->z_port_xmit_data = cntrs.port_xmit_data;
2079  idev->z_port_rcv_data = cntrs.port_rcv_data;
2080  idev->z_port_xmit_packets = cntrs.port_xmit_packets;
2081  idev->z_port_rcv_packets = cntrs.port_rcv_packets;
2086  idev->z_vl15_dropped = cntrs.vl15_dropped;
2087 
2088  for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++)
2089  list_add(&tx->txreq.list, &idev->txreq_free);
2090 
2091  /*
2092  * The system image GUID is supposed to be the same for all
2093  * IB HCAs in a single system but since there can be other
2094  * device types in the system, we can't be sure this is unique.
2095  */
2096  if (!sys_image_guid)
2097  sys_image_guid = dd->ipath_guid;
2099  idev->ib_unit = dd->ipath_unit;
2100  idev->dd = dd;
2101 
2102  strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
2103  dev->owner = THIS_MODULE;
2104  dev->node_guid = dd->ipath_guid;
2106  dev->uverbs_cmd_mask =
2107  (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
2108  (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
2109  (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
2110  (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
2111  (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
2112  (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
2113  (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
2114  (1ull << IB_USER_VERBS_CMD_QUERY_AH) |
2115  (1ull << IB_USER_VERBS_CMD_REG_MR) |
2116  (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
2118  (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
2119  (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
2120  (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
2121  (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
2123  (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
2124  (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
2125  (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
2126  (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
2127  (1ull << IB_USER_VERBS_CMD_POST_SEND) |
2128  (1ull << IB_USER_VERBS_CMD_POST_RECV) |
2129  (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
2130  (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
2131  (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
2132  (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
2133  (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
2134  (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
2136  dev->node_type = RDMA_NODE_IB_CA;
2137  dev->phys_port_cnt = 1;
2138  dev->num_comp_vectors = 1;
2139  dev->dma_device = &dd->pcidev->dev;
2140  dev->query_device = ipath_query_device;
2141  dev->modify_device = ipath_modify_device;
2142  dev->query_port = ipath_query_port;
2143  dev->modify_port = ipath_modify_port;
2144  dev->query_pkey = ipath_query_pkey;
2145  dev->query_gid = ipath_query_gid;
2146  dev->alloc_ucontext = ipath_alloc_ucontext;
2147  dev->dealloc_ucontext = ipath_dealloc_ucontext;
2148  dev->alloc_pd = ipath_alloc_pd;
2149  dev->dealloc_pd = ipath_dealloc_pd;
2150  dev->create_ah = ipath_create_ah;
2151  dev->destroy_ah = ipath_destroy_ah;
2152  dev->query_ah = ipath_query_ah;
2155  dev->query_srq = ipath_query_srq;
2157  dev->create_qp = ipath_create_qp;
2158  dev->modify_qp = ipath_modify_qp;
2159  dev->query_qp = ipath_query_qp;
2161  dev->post_send = ipath_post_send;
2162  dev->post_recv = ipath_post_receive;
2164  dev->create_cq = ipath_create_cq;
2166  dev->resize_cq = ipath_resize_cq;
2167  dev->poll_cq = ipath_poll_cq;
2172  dev->dereg_mr = ipath_dereg_mr;
2173  dev->alloc_fmr = ipath_alloc_fmr;
2175  dev->unmap_fmr = ipath_unmap_fmr;
2180  dev->mmap = ipath_mmap;
2182 
2183  snprintf(dev->node_desc, sizeof(dev->node_desc),
2184  IPATH_IDSTR " %s", init_utsname()->nodename);
2185 
2186  ret = ib_register_device(dev, NULL);
2187  if (ret)
2188  goto err_reg;
2189 
2190  if (ipath_verbs_register_sysfs(dev))
2191  goto err_class;
2192 
2193  enable_timer(dd);
2194 
2195  goto bail;
2196 
2197 err_class:
2198  ib_unregister_device(dev);
2199 err_reg:
2200  kfree(idev->lk_table.table);
2201 err_lk:
2202  kfree(idev->qp_table.table);
2203 err_qp:
2204  kfree(idev->txreq_bufs);
2205 err_tx:
2206  ib_dealloc_device(dev);
2207  ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
2208  idev = NULL;
2209 
2210 bail:
2211  dd->verbs_dev = idev;
2212  return ret;
2213 }
2214 
2216 {
2217  struct ib_device *ibdev = &dev->ibdev;
2218  u32 qps_inuse;
2219 
2220  ib_unregister_device(ibdev);
2221 
2222  disable_timer(dev->dd);
2223 
2224  if (!list_empty(&dev->pending[0]) ||
2225  !list_empty(&dev->pending[1]) ||
2226  !list_empty(&dev->pending[2]))
2227  ipath_dev_err(dev->dd, "pending list not empty!\n");
2228  if (!list_empty(&dev->piowait))
2229  ipath_dev_err(dev->dd, "piowait list not empty!\n");
2230  if (!list_empty(&dev->rnrwait))
2231  ipath_dev_err(dev->dd, "rnrwait list not empty!\n");
2232  if (!ipath_mcast_tree_empty())
2233  ipath_dev_err(dev->dd, "multicast table memory leak!\n");
2234  /*
2235  * Note that ipath_unregister_ib_device() can be called before all
2236  * the QPs are destroyed!
2237  */
2238  qps_inuse = ipath_free_all_qps(&dev->qp_table);
2239  if (qps_inuse)
2240  ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n",
2241  qps_inuse);
2242  kfree(dev->qp_table.table);
2243  kfree(dev->lk_table.table);
2244  kfree(dev->txreq_bufs);
2245  ib_dealloc_device(ibdev);
2246 }
2247 
2248 static ssize_t show_rev(struct device *device, struct device_attribute *attr,
2249  char *buf)
2250 {
2251  struct ipath_ibdev *dev =
2252  container_of(device, struct ipath_ibdev, ibdev.dev);
2253 
2254  return sprintf(buf, "%x\n", dev->dd->ipath_pcirev);
2255 }
2256 
2257 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
2258  char *buf)
2259 {
2260  struct ipath_ibdev *dev =
2261  container_of(device, struct ipath_ibdev, ibdev.dev);
2262  int ret;
2263 
2264  ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128);
2265  if (ret < 0)
2266  goto bail;
2267  strcat(buf, "\n");
2268  ret = strlen(buf);
2269 
2270 bail:
2271  return ret;
2272 }
2273 
2274 static ssize_t show_stats(struct device *device, struct device_attribute *attr,
2275  char *buf)
2276 {
2277  struct ipath_ibdev *dev =
2278  container_of(device, struct ipath_ibdev, ibdev.dev);
2279  int i;
2280  int len;
2281 
2282  len = sprintf(buf,
2283  "RC resends %d\n"
2284  "RC no QACK %d\n"
2285  "RC ACKs %d\n"
2286  "RC SEQ NAKs %d\n"
2287  "RC RDMA seq %d\n"
2288  "RC RNR NAKs %d\n"
2289  "RC OTH NAKs %d\n"
2290  "RC timeouts %d\n"
2291  "RC RDMA dup %d\n"
2292  "piobuf wait %d\n"
2293  "unaligned %d\n"
2294  "PKT drops %d\n"
2295  "WQE errs %d\n",
2296  dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
2297  dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
2298  dev->n_other_naks, dev->n_timeouts,
2299  dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned,
2300  dev->n_pkt_drops, dev->n_wqe_errs);
2301  for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
2302  const struct ipath_opcode_stats *si = &dev->opstats[i];
2303 
2304  if (!si->n_packets && !si->n_bytes)
2305  continue;
2306  len += sprintf(buf + len, "%02x %llu/%llu\n", i,
2307  (unsigned long long) si->n_packets,
2308  (unsigned long long) si->n_bytes);
2309  }
2310  return len;
2311 }
2312 
2313 static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
2314 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
2315 static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
2316 static DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL);
2317 
2318 static struct device_attribute *ipath_class_attributes[] = {
2319  &dev_attr_hw_rev,
2320  &dev_attr_hca_type,
2321  &dev_attr_board_id,
2322  &dev_attr_stats
2323 };
2324 
2325 static int ipath_verbs_register_sysfs(struct ib_device *dev)
2326 {
2327  int i;
2328  int ret;
2329 
2330  for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i)
2331  if (device_create_file(&dev->dev,
2332  ipath_class_attributes[i])) {
2333  ret = 1;
2334  goto bail;
2335  }
2336 
2337  ret = 0;
2338 
2339 bail:
2340  return ret;
2341 }