36 #include <linux/string.h>
37 #include <linux/slab.h>
38 #include <linux/sched.h>
185 static const u8 mthca_opcode[] = {
207 static void *get_recv_wqe(
struct mthca_qp *qp,
int n)
216 static void *get_send_wqe(
struct mthca_qp *qp,
int n)
220 (n << qp->
sq.wqe_shift);
223 (n << qp->
sq.wqe_shift)) >>
229 static void mthca_wq_reset(
struct mthca_wq *wq)
250 mthca_warn(dev,
"Async event %d for bogus QP %08x\n",
258 event.device = &dev->
ib_dev;
260 event.element.qp = &qp->
ibqp;
261 if (qp->
ibqp.event_handler)
262 qp->
ibqp.event_handler(&event, qp->
ibqp.qp_context);
270 static int to_mthca_state(
enum ib_qp_state ib_state)
324 mthca_warn(dev,
"INIT_IB failed, return code %d.\n", err);
332 u32 hw_access_flags = 0;
357 static inline enum ib_qp_state to_ib_qp_state(
int mthca_state)
359 switch (mthca_state) {
372 static inline enum ib_mig_state to_ib_mig_state(
int mthca_mig_state)
374 switch (mthca_mig_state) {
382 static int to_ib_qp_access_flags(
int mthca_flags)
399 memset(ib_ah_attr, 0,
sizeof *ib_ah_attr);
415 ib_ah_attr->
grh.traffic_class =
417 ib_ah_attr->
grh.flow_label =
420 path->
rgid,
sizeof ib_ah_attr->
grh.dgid.raw);
443 if (IS_ERR(mailbox)) {
444 err = PTR_ERR(mailbox);
450 mthca_warn(dev,
"QUERY_QP failed (%d)\n", err);
454 qp_param = mailbox->
buf;
458 qp->
state = to_ib_qp_state(mthca_state);
498 qp_attr->
cap.max_send_wr = qp->
sq.max;
499 qp_attr->
cap.max_recv_wr = qp->
rq.max;
500 qp_attr->
cap.max_send_sge = qp->
sq.max_gs;
501 qp_attr->
cap.max_recv_sge = qp->
rq.max_gs;
504 qp_init_attr->
cap = qp_attr->
cap;
515 static int mthca_path_set(
struct mthca_dev *dev,
const struct ib_ah_attr *
ah,
523 if (ah->
grh.sgid_index >= dev->
limits.gid_table_len) {
524 mthca_dbg(dev,
"sgid_index (%u) too large. max is %d\n",
525 ah->
grh.sgid_index, dev->
limits.gid_table_len-1);
534 (ah->
grh.traffic_class << 20) |
535 (ah->
grh.flow_label));
543 static int __mthca_modify_qp(
struct ib_qp *ibqp,
556 if (IS_ERR(mailbox)) {
557 err = PTR_ERR(mailbox);
560 qp_param = mailbox->
buf;
561 qp_context = &qp_param->
context;
562 memset(qp_param, 0,
sizeof *qp_param);
590 mthca_dbg(dev,
"path MTU (%u) is invalid\n",
597 if (mthca_is_memfree(dev)) {
609 if (qp->
ibqp.uobject)
630 if (attr_mask & IB_QP_PKEY_INDEX) {
653 u8 sched_queue = ibqp->
uobject ? 0x2 : 0x1;
655 if (mthca_is_memfree(dev))
671 mthca_dbg(dev,
"Alternate P_Key index (%u) too large. max is %d\n",
677 mthca_dbg(dev,
"Alternate port number (%u) is invalid\n",
721 if (mthca_is_memfree(dev)) {
726 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
734 if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
735 qp_context->
params2 |= get_hw_access_flags(qp, attr, attr_mask);
760 if (mthca_is_memfree(dev))
763 if (attr_mask & IB_QP_QKEY) {
770 to_msrq(ibqp->
srq)->srqn);
780 mthca_warn(dev,
"modify QP %d->%d returned %d.\n",
781 cur_state, new_state, err);
786 if (attr_mask & IB_QP_ACCESS_FLAGS)
788 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
792 if (attr_mask & IB_QP_ALT_PATH)
796 store_attrs(to_msqp(qp), attr, attr_mask);
802 if (is_qp0(dev, qp)) {
805 init_port(dev, qp->
port);
821 if (qp->
ibqp.send_cq != qp->
ibqp.recv_cq)
824 mthca_wq_reset(&qp->
sq);
825 qp->
sq.last = get_send_wqe(qp, qp->
sq.max - 1);
827 mthca_wq_reset(&qp->
rq);
828 qp->
rq.last = get_recv_wqe(qp, qp->
rq.max - 1);
830 if (mthca_is_memfree(dev)) {
854 spin_lock_irq(&qp->
sq.lock);
855 spin_lock(&qp->
rq.lock);
856 cur_state = qp->
state;
857 spin_unlock(&qp->
rq.lock);
858 spin_unlock_irq(&qp->
sq.lock);
864 mthca_dbg(dev,
"Bad QP transition (transport %d) "
865 "%d->%d with attr 0x%08x\n",
871 if ((attr_mask & IB_QP_PKEY_INDEX) &&
873 mthca_dbg(dev,
"P_Key index (%u) too large. max is %d\n",
878 if ((attr_mask & IB_QP_PORT) &&
884 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
886 mthca_dbg(dev,
"Max rdma_atomic as initiator %u too large (max is %d)\n",
891 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
893 mthca_dbg(dev,
"Max rdma_atomic as responder %u too large (max %d)\n",
898 if (cur_state == new_state && cur_state ==
IB_QPS_RESET) {
903 err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
910 static int mthca_max_data_size(
struct mthca_dev *dev,
struct mthca_qp *qp,
int desc_sz)
924 if (mthca_is_memfree(dev))
935 return max_data_size;
938 static inline int mthca_max_inline_data(
struct mthca_pd *pd,
int max_data_size)
944 static void mthca_adjust_qp_caps(
struct mthca_dev *dev,
948 int max_data_size = mthca_max_data_size(dev, qp,
950 1 << qp->
sq.wqe_shift));
957 (
min(dev->
limits.max_desc_sz, 1 << qp->
rq.wqe_shift) -
969 static int mthca_alloc_wqe_buf(
struct mthca_dev *dev,
979 if (size > dev->
limits.max_desc_sz)
982 for (qp->
rq.wqe_shift = 6; 1 << qp->
rq.wqe_shift < size;
993 size += mthca_is_memfree(dev) ?
995 sizeof (struct mthca_tavor_ud_seg);
999 size +=
sizeof (struct mthca_raddr_seg);
1003 size +=
sizeof (struct mthca_raddr_seg);
1008 size =
max_t(int, size,
1009 sizeof (struct mthca_atomic_seg) +
1010 sizeof (struct mthca_raddr_seg) +
1011 sizeof (struct mthca_data_seg));
1019 size =
max_t(int, size, sizeof (struct mthca_bind_seg));
1021 size +=
sizeof (struct mthca_next_seg);
1023 if (size > dev->limits.max_desc_sz)
1026 for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
1030 qp->send_wqe_offset =
ALIGN(qp->
rq.
max << qp->rq.wqe_shift,
1031 1 << qp->sq.wqe_shift);
1038 if (pd->ibpd.uobject)
1042 (qp->sq.
max << qp->sq.wqe_shift));
1050 &qp->
queue, &qp->is_direct, pd, 0, &qp->mr);
1061 static
void mthca_free_wqe_buf(struct mthca_dev *dev,
1062 struct mthca_qp *qp)
1065 (qp->
sq.max << qp->
sq.wqe_shift)),
1070 static int mthca_map_memfree(
struct mthca_dev *dev,
1075 if (mthca_is_memfree(dev)) {
1102 static void mthca_unmap_memfree(
struct mthca_dev *dev,
1111 static int mthca_alloc_memfree(
struct mthca_dev *dev,
1114 if (mthca_is_memfree(dev)) {
1116 qp->
qpn, &qp->
rq.db);
1117 if (qp->
rq.db_index < 0)
1121 qp->
qpn, &qp->
sq.db);
1122 if (qp->
sq.db_index < 0) {
1131 static void mthca_free_memfree(
struct mthca_dev *dev,
1134 if (mthca_is_memfree(dev)) {
1140 static int mthca_alloc_qp_common(
struct mthca_dev *dev,
1158 mthca_wq_reset(&qp->
sq);
1159 mthca_wq_reset(&qp->
rq);
1164 ret = mthca_map_memfree(dev, qp);
1168 ret = mthca_alloc_wqe_buf(dev, pd, qp);
1170 mthca_unmap_memfree(dev, qp);
1174 mthca_adjust_qp_caps(dev, pd, qp);
1181 if (pd->
ibpd.uobject)
1184 ret = mthca_alloc_memfree(dev, qp);
1186 mthca_free_wqe_buf(dev, qp);
1187 mthca_unmap_memfree(dev, qp);
1191 if (mthca_is_memfree(dev)) {
1196 for (i = 0; i < qp->
rq.max; ++
i) {
1197 next = get_recv_wqe(qp, i);
1202 for (scatter = (
void *) (next + 1);
1203 (
void *) scatter < (
void *) next + (1 << qp->
rq.wqe_shift);
1208 for (i = 0; i < qp->
sq.max; ++
i) {
1209 next = get_send_wqe(qp, i);
1215 for (i = 0; i < qp->
rq.max; ++
i) {
1216 next = get_recv_wqe(qp, i);
1218 qp->
rq.wqe_shift) | 1);
1223 qp->
sq.last = get_send_wqe(qp, qp->
sq.max - 1);
1224 qp->
rq.last = get_recv_wqe(qp, qp->
rq.max - 1);
1232 int max_data_size = mthca_max_data_size(dev, qp, dev->
limits.max_desc_sz);
1249 if (mthca_is_memfree(dev)) {
1286 err = mthca_set_qp_size(dev, cap, pd, qp);
1297 err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
1304 spin_lock_irq(&dev->
qp_table.lock);
1306 qp->
qpn & (dev->
limits.num_qps - 1), qp);
1307 spin_unlock_irq(&dev->
qp_table.lock);
1312 static void mthca_lock_cqs(
struct mthca_cq *send_cq,
struct mthca_cq *recv_cq)
1315 if (send_cq == recv_cq) {
1316 spin_lock_irq(&send_cq->
lock);
1318 }
else if (send_cq->
cqn < recv_cq->
cqn) {
1319 spin_lock_irq(&send_cq->
lock);
1322 spin_lock_irq(&recv_cq->
lock);
1327 static void mthca_unlock_cqs(
struct mthca_cq *send_cq,
struct mthca_cq *recv_cq)
1330 if (send_cq == recv_cq) {
1332 spin_unlock_irq(&send_cq->
lock);
1333 }
else if (send_cq->
cqn < recv_cq->
cqn) {
1334 spin_unlock(&recv_cq->
lock);
1335 spin_unlock_irq(&send_cq->
lock);
1337 spin_unlock(&send_cq->
lock);
1338 spin_unlock_irq(&recv_cq->
lock);
1352 u32 mqpn = qpn * 2 + dev->
qp_table.sqp_start + port - 1;
1355 sqp->
qp.transport =
MLX;
1356 err = mthca_set_qp_size(dev, cap, pd, &sqp->
qp);
1366 spin_lock_irq(&dev->
qp_table.lock);
1371 spin_unlock_irq(&dev->
qp_table.lock);
1378 sqp->
qp.transport =
MLX;
1380 err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
1381 send_policy, &sqp->
qp);
1394 mthca_lock_cqs(send_cq, recv_cq);
1400 mthca_unlock_cqs(send_cq, recv_cq);
1413 spin_lock_irq(&dev->
qp_table.lock);
1415 spin_unlock_irq(&dev->
qp_table.lock);
1426 send_cq = to_mcq(qp->
ibqp.send_cq);
1427 recv_cq = to_mcq(qp->
ibqp.recv_cq);
1433 mthca_lock_cqs(send_cq, recv_cq);
1441 mthca_unlock_cqs(send_cq, recv_cq);
1454 if (!qp->
ibqp.uobject) {
1457 if (send_cq != recv_cq)
1460 mthca_free_memfree(dev, qp);
1461 mthca_free_wqe_buf(dev, qp);
1464 mthca_unmap_memfree(dev, qp);
1466 if (is_sqp(dev, qp)) {
1469 to_msqp(qp)->header_buf_size,
1470 to_msqp(qp)->header_buf,
1471 to_msqp(qp)->header_dma);
1497 (sqp->
ud_header.lrh.service_level << 8));
1503 sqp->
ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
1507 sqp->
ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
1515 sqp->
ud_header.lrh.virtual_lane = !sqp->
qp.ibqp.qp_num ? 15 : 0;
1519 if (!sqp->
qp.ibqp.qp_num)
1524 wr->
wr.
ud.pkey_index, &pkey);
1529 sqp->
qkey : wr->
wr.
ud.remote_qkey);
1544 static inline int mthca_wq_overflow(
struct mthca_wq *wq,
int nreq,
1555 spin_lock(&cq->
lock);
1557 spin_unlock(&cq->
lock);
1559 return cur + nreq >= wq->
max;
1605 struct mthca_qp *qp = to_mqp(ibqp);
1608 unsigned long flags;
1629 ind = qp->
sq.next_ind;
1631 for (nreq = 0;
wr; ++nreq, wr = wr->
next) {
1632 if (mthca_wq_overflow(&qp->
sq, nreq, qp->
ibqp.send_cq)) {
1633 mthca_err(dev,
"SQ %06x full (%u head, %u tail,"
1634 " %d max, %d nreq)\n", qp->
qpn,
1635 qp->
sq.head, qp->
sq.tail,
1642 wqe = get_send_wqe(qp, ind);
1643 prev_wqe = qp->
sq.last;
1663 switch (wr->opcode) {
1666 set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
1667 wr->wr.atomic.rkey);
1670 set_atomic_seg(wqe, wr);
1673 sizeof (struct mthca_atomic_seg)) / 16;
1679 set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
1693 switch (wr->opcode) {
1696 set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
1710 set_tavor_ud_seg(wqe, wr);
1716 err = build_mlx_header(dev, to_msqp(qp), ind, wr,
1728 if (wr->num_sge > qp->
sq.max_gs) {
1735 for (i = 0; i < wr->num_sge; ++
i) {
1736 mthca_set_data_seg(wqe, wr->sg_list + i);
1745 ((
u32 *) wqe)[1] = 0;
1750 qp->
wrid[ind + qp->
rq.max] = wr->wr_id;
1752 if (wr->opcode >=
ARRAY_SIZE(mthca_opcode)) {
1762 mthca_opcode[wr->opcode]);
1771 op0 = mthca_opcode[wr->opcode];
1785 mthca_write64(((qp->
sq.next_ind << qp->
sq.wqe_shift) +
1787 (qp->
qpn << 8) | size0,
1797 qp->
sq.next_ind =
ind;
1798 qp->
sq.head += nreq;
1800 spin_unlock_irqrestore(&qp->
sq.lock, flags);
1808 struct mthca_qp *qp = to_mqp(ibqp);
1809 unsigned long flags;
1830 ind = qp->
rq.next_ind;
1832 for (nreq = 0;
wr; wr = wr->
next) {
1833 if (mthca_wq_overflow(&qp->
rq, nreq, qp->
ibqp.recv_cq)) {
1834 mthca_err(dev,
"RQ %06x full (%u head, %u tail,"
1835 " %d max, %d nreq)\n", qp->
qpn,
1836 qp->
rq.head, qp->
rq.tail,
1843 wqe = get_recv_wqe(qp, ind);
1844 prev_wqe = qp->
rq.last;
1860 for (i = 0; i < wr->
num_sge; ++
i) {
1861 mthca_set_data_seg(wqe, wr->
sg_list + i);
1884 mthca_write64((qp->
rq.next_ind << qp->
rq.wqe_shift) | size0,
1888 qp->
rq.next_ind =
ind;
1897 mthca_write64((qp->
rq.next_ind << qp->
rq.wqe_shift) | size0,
1902 qp->
rq.next_ind =
ind;
1903 qp->
rq.head += nreq;
1911 spin_unlock_irqrestore(&qp->
rq.lock, flags);
1919 struct mthca_qp *qp = to_mqp(ibqp);
1923 unsigned long flags;
1944 ind = qp->
sq.head & (qp->
sq.max - 1);
1946 for (nreq = 0;
wr; ++nreq, wr = wr->
next) {
1951 ((qp->
sq.head & 0xffff) << 8) |
f0 | op0;
1968 mthca_write64(dbhi, (qp->
qpn << 8) | size0,
1973 if (mthca_wq_overflow(&qp->
sq, nreq, qp->
ibqp.send_cq)) {
1974 mthca_err(dev,
"SQ %06x full (%u head, %u tail,"
1975 " %d max, %d nreq)\n", qp->
qpn,
1976 qp->
sq.head, qp->
sq.tail,
1983 wqe = get_send_wqe(qp, ind);
1984 prev_wqe = qp->
sq.last;
2004 switch (wr->opcode) {
2007 set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
2008 wr->wr.atomic.rkey);
2011 set_atomic_seg(wqe, wr);
2014 sizeof (struct mthca_atomic_seg)) / 16;
2020 set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
2034 switch (wr->opcode) {
2037 set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
2051 set_arbel_ud_seg(wqe, wr);
2057 err = build_mlx_header(dev, to_msqp(qp), ind, wr,
2069 if (wr->num_sge > qp->
sq.max_gs) {
2076 for (i = 0; i < wr->num_sge; ++
i) {
2077 mthca_set_data_seg(wqe, wr->sg_list + i);
2086 ((
u32 *) wqe)[1] = 0;
2091 qp->
wrid[ind + qp->
rq.max] = wr->wr_id;
2093 if (wr->opcode >=
ARRAY_SIZE(mthca_opcode)) {
2103 mthca_opcode[wr->opcode]);
2112 op0 = mthca_opcode[wr->opcode];
2124 dbhi = (nreq << 24) | ((qp->
sq.head & 0xffff) << 8) |
f0 | op0;
2126 qp->
sq.head += nreq;
2151 spin_unlock_irqrestore(&qp->
sq.lock, flags);
2159 struct mthca_qp *qp = to_mqp(ibqp);
2160 unsigned long flags;
2171 ind = qp->
rq.head & (qp->
rq.max - 1);
2173 for (nreq = 0;
wr; ++nreq, wr = wr->
next) {
2174 if (mthca_wq_overflow(&qp->
rq, nreq, qp->
ibqp.recv_cq)) {
2175 mthca_err(dev,
"RQ %06x full (%u head, %u tail,"
2176 " %d max, %d nreq)\n", qp->
qpn,
2177 qp->
rq.head, qp->
rq.tail,
2184 wqe = get_recv_wqe(qp, ind);
2196 for (i = 0; i < wr->num_sge; ++
i) {
2197 mthca_set_data_seg(wqe, wr->sg_list + i);
2201 if (i < qp->
rq.max_gs)
2202 mthca_set_data_seg_inval(wqe);
2212 qp->
rq.head += nreq;
2222 spin_unlock_irqrestore(&qp->
rq.lock, flags);
2235 if (qp->
ibqp.srq && !is_send) {
2241 next = get_send_wqe(qp, index);
2243 next = get_recv_wqe(qp, index);
2280 for (i = 0; i < 2; ++
i) {
2285 "%d, aborting.\n", err);
2292 for (i = 0; i < 2; ++
i)
2305 for (i = 0; i < 2; ++
i)