35 #include <linux/slab.h>
36 #include <linux/netdevice.h>
93 static const __be32 mlx4_ib_opcode[] = {
116 if (!mlx4_is_master(dev->
dev))
119 return qp->
mqp.qpn >= dev->
dev->phys_caps.base_tunnel_sqpn &&
120 qp->
mqp.qpn < dev->
dev->phys_caps.base_tunnel_sqpn +
130 real_sqp = ((mlx4_is_master(dev->
dev) || !mlx4_is_mfunc(dev->
dev)) &&
131 qp->
mqp.qpn >= dev->
dev->phys_caps.base_sqpn &&
132 qp->
mqp.qpn <= dev->
dev->phys_caps.base_sqpn + 3);
136 if (mlx4_is_mfunc(dev->
dev)) {
137 for (i = 0; i < dev->
dev->caps.num_ports; i++) {
138 if (qp->
mqp.qpn == dev->
dev->caps.qp0_proxy[i] ||
139 qp->
mqp.qpn == dev->
dev->caps.qp1_proxy[i]) {
155 real_qp0 = ((mlx4_is_master(dev->
dev) || !mlx4_is_mfunc(dev->
dev)) &&
156 qp->
mqp.qpn >= dev->
dev->phys_caps.base_sqpn &&
157 qp->
mqp.qpn <= dev->
dev->phys_caps.base_sqpn + 1);
161 if (mlx4_is_mfunc(dev->
dev)) {
162 for (i = 0; i < dev->
dev->caps.num_ports; i++) {
163 if (qp->
mqp.qpn == dev->
dev->caps.qp0_proxy[i]) {
174 return mlx4_buf_offset(&qp->
buf, offset);
177 static void *get_recv_wqe(
struct mlx4_ib_qp *qp,
int n)
179 return get_wqe(qp, qp->
rq.offset + (n << qp->
rq.wqe_shift));
182 static void *get_send_wqe(
struct mlx4_ib_qp *qp,
int n)
184 return get_wqe(qp, qp->
sq.offset + (n << qp->
sq.wqe_shift));
208 for (i = 0; i <
s; i += 64) {
209 ind = (i >> qp->
sq.wqe_shift) + n;
212 buf = get_send_wqe(qp, ind & (qp->
sq.wqe_cnt - 1));
213 wqe = buf + (i & ((1 << qp->
sq.wqe_shift) - 1));
217 ctrl = buf = get_send_wqe(qp, n & (qp->
sq.wqe_cnt - 1));
219 for (i = 64; i <
s; i += 64) {
226 static void post_nop_wqe(
struct mlx4_ib_qp *qp,
int n,
int size)
233 ctrl = wqe = get_send_wqe(qp, n & (qp->
sq.wqe_cnt - 1));
239 memset(dgram, 0,
sizeof *dgram);
249 ctrl->srcrb_flags = 0;
250 ctrl->fence_size = size / 16;
264 static inline unsigned pad_wraparound(
struct mlx4_ib_qp *qp,
int ind)
266 unsigned s = qp->
sq.wqe_cnt - (ind & (qp->
sq.wqe_cnt - 1));
268 post_nop_wqe(qp, ind, s << qp->
sq.wqe_shift);
277 struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
280 to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
284 event.element.qp = ibqp;
311 pr_warn(
"Unexpected event type %d "
312 "on QP %06x\n", type, qp->
qpn);
367 int is_user,
int has_rq,
struct mlx4_ib_qp *qp)
378 qp->
rq.wqe_cnt = qp->
rq.max_gs = 0;
397 min(dev->
dev->caps.max_sq_sg,
398 dev->
dev->caps.max_rq_sg));
427 send_wqe_overhead(type, qp->
flags);
429 if (s > dev->
dev->caps.max_sq_desc_sz)
484 if (qp->
sq.wqe_cnt <= dev->
dev->caps.max_wqes)
493 qp->
sq.max_gs = (
min(dev->
dev->caps.max_sq_desc_sz,
495 send_wqe_overhead(type, qp->
flags)) /
499 (qp->
sq.wqe_cnt << qp->
sq.wqe_shift);
500 if (qp->
rq.wqe_shift > qp->
sq.wqe_shift) {
502 qp->
sq.offset = qp->
rq.wqe_cnt << qp->
rq.wqe_shift;
504 qp->
rq.offset = qp->
sq.wqe_cnt << qp->
sq.wqe_shift;
511 min(dev->
dev->caps.max_sq_sg,
512 dev->
dev->caps.max_rq_sg));
519 static int set_user_sq_size(
struct mlx4_ib_dev *dev,
534 (qp->
sq.wqe_cnt << qp->
sq.wqe_shift);
548 for (i = 0; i < qp->
rq.wqe_cnt; i++) {
578 for (i = 0; i < qp->
rq.wqe_cnt; i++) {
607 if (mlx4_is_mfunc(dev->
dev) &&
608 (!mlx4_is_master(dev->
dev) ||
612 else if (mlx4_is_master(dev->
dev))
619 init_attr->
cap.max_recv_sge++;
626 !mlx4_is_master(dev->
dev))
630 else if (tnl_init->
slave == mlx4_master_func_num(dev->
dev))
636 qpn = dev->
dev->phys_caps.base_tunnel_sqpn + 8 * tnl_init->
slave
669 err = set_rq_size(dev, &init_attr->
cap, !!pd->
uobject, qp_has_rq(init_attr), qp);
676 if (ib_copy_from_udata(&ucmd, udata,
sizeof ucmd)) {
683 err = set_user_sq_size(dev, qp, &ucmd);
689 if (IS_ERR(qp->
umem)) {
690 err = PTR_ERR(qp->
umem);
703 if (qp_has_rq(init_attr)) {
718 err = set_kernel_sq_size(dev, &init_attr->
cap, qp_type, qp);
722 if (qp_has_rq(init_attr)) {
747 if (!qp->
sq.wrid || !qp->
rq.wrid) {
756 if (alloc_proxy_bufs(pd->
device, qp)) {
777 qp->
mqp.qpn |= (1 << 23);
786 qp->
mqp.event = mlx4_ib_qp_event;
796 free_proxy_bufs(pd->
device, qp);
799 if (qp_has_rq(init_attr))
816 if (!pd->
uobject && qp_has_rq(init_attr))
842 if (send_cq == recv_cq) {
843 spin_lock_irq(&send_cq->lock);
845 }
else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
846 spin_lock_irq(&send_cq->lock);
849 spin_lock_irq(&recv_cq->lock);
857 if (send_cq == recv_cq) {
859 spin_unlock_irq(&send_cq->lock);
860 }
else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
861 spin_unlock(&recv_cq->lock);
862 spin_unlock_irq(&send_cq->lock);
864 spin_unlock(&send_cq->lock);
865 spin_unlock_irq(&recv_cq->lock);
869 static void del_gid_entries(
struct mlx4_ib_qp *qp)
882 return to_mpd(to_mxrcd(qp->
ibqp.xrcd)->pd);
884 return to_mpd(qp->
ibqp.pd);
890 switch (qp->
ibqp.qp_type) {
892 *send_cq = to_mcq(to_mxrcd(qp->
ibqp.xrcd)->cq);
896 *send_cq = to_mcq(qp->
ibqp.send_cq);
900 *send_cq = to_mcq(qp->
ibqp.send_cq);
901 *recv_cq = to_mcq(qp->
ibqp.recv_cq);
914 pr_warn(
"modify QP %06x to RESET failed.\n",
917 get_cqs(qp, &send_cq, &recv_cq);
919 mlx4_ib_lock_cqs(send_cq, recv_cq);
924 if (send_cq != recv_cq)
930 mlx4_ib_unlock_cqs(send_cq, recv_cq);
934 if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp))
949 free_proxy_bufs(&dev->
ib_dev, qp);
961 if (!mlx4_is_mfunc(dev->
dev) ||
962 (mlx4_is_master(dev->
dev) &&
964 return dev->
dev->phys_caps.base_sqpn +
970 return dev->
dev->caps.qp0_proxy[attr->
port_num - 1];
972 return dev->
dev->caps.qp1_proxy[attr->
port_num - 1];
1002 pd = to_mxrcd(init_attr->
xrcd)->pd;
1003 xrcdn = to_mxrcd(init_attr->
xrcd)->xrcdn;
1004 init_attr->
send_cq = to_mxrcd(init_attr->
xrcd)->cq;
1020 err = create_qp_common(to_mdev(pd->
device), pd, init_attr,
1023 return ERR_PTR(err);
1025 qp->
ibqp.qp_num = qp->
mqp.qpn;
1037 err = create_qp_common(to_mdev(pd->
device), pd, init_attr, udata,
1038 get_sqp_num(to_mdev(pd->
device), init_attr),
1041 return ERR_PTR(err);
1062 if (is_qp0(dev, mqp))
1066 destroy_qp_common(dev, mqp, !!pd->
ibpd.uobject);
1068 if (is_sqp(dev, mqp))
1069 kfree(to_msqp(mqp));
1105 u32 hw_access_flags = 0;
1117 if (!dest_rd_atomic)
1168 if (ah->
grh.sgid_index >= dev->
dev->caps.gid_table_len[port]) {
1169 pr_err(
"sgid_index (%u) too large. max is %d\n",
1170 ah->
grh.sgid_index, dev->
dev->caps.gid_table_len[port] - 1);
1179 (ah->
grh.flow_label));
1185 ((port - 1) << 6) | ((ah->
sl & 7) << 3);
1199 vlan_tag = rdma_get_vlan_id(&dev->
iboe.gid_table[port - 1][ah->
grh.sgid_index]);
1200 if (vlan_tag < 0x1000) {
1209 ((port - 1) << 6) | ((ah->
sl & 0xf) << 2);
1226 static int __mlx4_ib_modify_qp(
struct ib_qp *ibqp,
1227 const struct ib_qp_attr *attr,
int attr_mask,
1239 context = kzalloc(
sizeof *context,
GFP_KERNEL);
1275 pr_err(
"path MTU (%u) is invalid\n",
1296 if (qp->
ibqp.uobject)
1318 context->
pri_path.counter_index = 0xff;
1321 if (attr_mask & IB_QP_PKEY_INDEX) {
1323 context->
pri_path.disable_pkey_check = 0x40;
1330 attr_mask & IB_QP_PORT ?
1362 get_cqs(qp, &send_cq, &recv_cq);
1369 if (!qp->
ibqp.uobject)
1392 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
1399 if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
1400 context->
params2 |= to_mlx4_access_flags(qp, attr, attr_mask);
1415 if (attr_mask & IB_QP_QKEY) {
1420 if (mlx4_is_mfunc(dev->
dev) &&
1424 pr_err(
"Cannot use reserved QKEY"
1425 " 0x%x (range 0xffff0000..0xffffffff"
1426 " is reserved)\n", attr->
qkey);
1467 context->
rlkey |= (1 << 4);
1479 for (i = 0; i < qp->
sq.wqe_cnt; ++
i) {
1480 ctrl = get_send_wqe(qp, i);
1485 stamp_send_wqe(qp, i, 1 << qp->
sq.wqe_shift);
1490 to_mlx4_state(new_state), context, optpar,
1491 sqd_event, &qp->
mqp);
1497 if (attr_mask & IB_QP_ACCESS_FLAGS)
1499 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1501 if (attr_mask & IB_QP_PORT) {
1503 update_mcg_macs(dev, qp);
1505 if (attr_mask & IB_QP_ALT_PATH)
1508 if (is_sqp(dev, qp))
1509 store_sqp_attrs(to_msqp(qp), attr, attr_mask);
1515 if (is_qp0(dev, qp)) {
1518 pr_warn(
"INIT_PORT failed for port %d\n",
1533 if (send_cq != recv_cq)
1551 int attr_mask,
struct ib_udata *udata)
1564 pr_debug(
"qpn 0x%x: invalid attribute mask specified "
1565 "for transition %d to %d. qp_type %d,"
1566 " attr_mask 0x%x\n",
1567 ibqp->
qp_num, cur_state, new_state,
1572 if ((attr_mask & IB_QP_PORT) &&
1574 pr_debug(
"qpn 0x%x: invalid port number (%d) specified "
1575 "for transition %d to %d. qp_type %d\n",
1586 if (attr_mask & IB_QP_PKEY_INDEX) {
1589 pr_debug(
"qpn 0x%x: invalid pkey index (%d) specified "
1590 "for transition %d to %d. qp_type %d\n",
1597 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
1599 pr_debug(
"qpn 0x%x: max_rd_atomic (%d) too large. "
1600 "Transition %d to %d. qp_type %d\n",
1606 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
1608 pr_debug(
"qpn 0x%x: max_dest_rd_atomic (%d) too large. "
1609 "Transition %d to %d. qp_type %d\n",
1615 if (cur_state == new_state && cur_state ==
IB_QPS_RESET) {
1620 err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
1627 static int build_sriov_qp0_header(
struct mlx4_ib_sqp *sqp,
1629 void *wqe,
unsigned *mlx_seg_len)
1649 send_size += wr->
sg_list[i].length;
1671 mlx->rlid = sqp->
ud_header.lrh.destination_lid;
1689 sqp->
ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
1702 if (header_size <= spc) {
1710 inl = (
void *) (inl + 1) +
spc;
1736 void *wqe,
unsigned *mlx_seg_len)
1757 send_size += wr->
sg_list[i].length;
1760 is_grh = mlx4_ib_ah_grh_present(ah);
1762 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
1766 sgid.global.subnet_prefix =
1767 to_mdev(ib_dev)->sriov.demux[sqp->
qp.port - 1].
1769 sgid.global.interface_id =
1770 to_mdev(ib_dev)->sriov.demux[sqp->
qp.port - 1].
1771 guid_cache[ah->
av.
ib.gid_index];
1775 ah->
av.
ib.gid_index, &sgid);
1780 vlan = rdma_get_vlan_id(&sgid);
1781 is_vlan = vlan < 0x1000;
1798 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
1802 sqp->
ud_header.grh.source_gid.global.subnet_prefix =
1803 to_mdev(ib_dev)->sriov.demux[sqp->
qp.port - 1].
1805 sqp->
ud_header.grh.source_gid.global.interface_id =
1806 to_mdev(ib_dev)->sriov.demux[sqp->
qp.port - 1].
1807 guid_cache[ah->
av.
ib.gid_index];
1811 ah->
av.
ib.gid_index,
1814 ah->
av.
ib.dgid, 16);
1823 (sqp->
ud_header.lrh.service_level << 8));
1826 mlx->rlid = sqp->
ud_header.lrh.destination_lid;
1831 sqp->
ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
1835 sqp->
ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
1851 ndev = to_mdev(sqp->
qp.ibqp.device)->iboe.netdevs[sqp->
qp.port - 1];
1865 sqp->
ud_header.lrh.virtual_lane = !sqp->
qp.ibqp.qp_num ? 15 : 0;
1870 if (!sqp->
qp.ibqp.qp_num)
1878 sqp->
qkey : wr->
wr.
ud.remote_qkey);
1884 pr_err(
"built UD header of size %d:\n", header_size);
1885 for (i = 0; i < header_size / 4; ++
i) {
1887 pr_err(
" [%02x] ", i * 4);
1890 if ((i + 1) % 8 == 0)
1904 if (header_size <= spc) {
1912 inl = (
void *) (inl + 1) +
spc;
1943 if (
likely(cur + nreq < wq->max_post))
1947 spin_lock(&cq->
lock);
1949 spin_unlock(&cq->
lock);
1954 static __be32 convert_access(
int acc)
1968 for (i = 0; i < wr->
wr.
fast_reg.page_list_len; ++
i)
2030 dseg->
vlan = to_mah(wr->
wr.
ud.ah)->av.eth.vlan;
2034 static void set_tunnel_datagram_seg(
struct mlx4_ib_dev *dev,
2040 int port = *((
u8 *) &av->
ib.port_pd) & 0x3;
2044 sqp_av.
g_slid = av->
ib.g_slid & 0x7f;
2055 static void build_tunnel_header(
struct ib_send_wr *wr,
void *wqe,
unsigned *mlx_seg_len)
2070 if (
sizeof (
hdr) <= spc) {
2080 inl = (
void *) (inl + 1) +
spc;
2081 memcpy(inl + 1, (
void *) &
hdr + spc,
sizeof (
hdr) - spc);
2091 static void set_mlx_icrc_seg(
void *dseg)
2137 struct mlx4_ib_qp *qp,
unsigned *lso_seg_len,
2140 unsigned halign =
ALIGN(
sizeof *wqe + wr->
wr.
ud.hlen, 16);
2146 wr->
num_sge > qp->
sq.max_gs - (halign >> 4)))
2153 *lso_seg_len = halign;
2172 static void add_zero_len_inline(
void *wqe)
2186 unsigned long flags;
2203 for (nreq = 0;
wr; ++nreq, wr = wr->
next) {
2207 if (mlx4_wq_overflow(&qp->
sq, nreq, qp->
ibqp.send_cq)) {
2219 ctrl = wqe = get_send_wqe(qp, ind & (qp->
sq.wqe_cnt - 1));
2220 qp->
sq.wrid[(qp->
sq.head + nreq) & (qp->
sq.wqe_cnt - 1)] = wr->wr_id;
2232 ctrl->
imm = send_ieth(wr);
2234 wqe +=
sizeof *
ctrl;
2235 size =
sizeof *ctrl / 16;
2240 switch (wr->opcode) {
2244 set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
2245 wr->wr.atomic.rkey);
2248 set_atomic_seg(wqe, wr);
2252 sizeof (struct mlx4_wqe_atomic_seg)) / 16;
2257 set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
2258 wr->wr.atomic.rkey);
2261 set_masked_atomic_seg(wqe, wr);
2265 sizeof (struct mlx4_wqe_masked_atomic_seg)) / 16;
2272 set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
2281 set_local_inv_seg(wqe, wr->ex.invalidate_rkey);
2289 set_fmr_seg(wqe, wr);
2301 err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
2307 size += seglen / 16;
2312 set_datagram_seg(wqe, wr);
2319 set_datagram_seg(wqe, wr);
2324 err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh);
2329 lso_wqe = (
__be32 *) wqe;
2331 size += seglen / 16;
2341 err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
2347 size += seglen / 16;
2349 add_zero_len_inline(wqe);
2352 build_tunnel_header(wr, wqe, &seglen);
2354 size += seglen / 16;
2366 set_tunnel_datagram_seg(to_mdev(ibqp->
device), wqe, wr, ibqp->
qp_type);
2369 build_tunnel_header(wr, wqe, &seglen);
2371 size += seglen / 16;
2376 err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);
2382 size += seglen / 16;
2397 dseg += wr->num_sge - 1;
2405 set_mlx_icrc_seg(dseg + 1);
2409 for (i = wr->num_sge - 1; i >= 0; --i, --dseg)
2410 set_data_seg(dseg, wr->sg_list + i);
2418 *lso_wqe = lso_hdr_sz;
2430 if (wr->opcode < 0 || wr->opcode >=
ARRAY_SIZE(mlx4_ib_opcode)) {
2452 stamp_send_wqe(qp, stamp, size * 16);
2453 ind = pad_wraparound(qp, ind);
2459 qp->
sq.head += nreq;
2476 stamp_send_wqe(qp, stamp, size * 16);
2478 ind = pad_wraparound(qp, ind);
2482 spin_unlock_irqrestore(&qp->
sq.lock, flags);
2492 unsigned long flags;
2499 max_gs = qp->
rq.max_gs;
2502 ind = qp->
rq.head & (qp->
rq.wqe_cnt - 1);
2504 for (nreq = 0;
wr; ++nreq, wr = wr->
next) {
2505 if (mlx4_wq_overflow(&qp->
rq, nreq, qp->
ibqp.recv_cq)) {
2517 scat = get_recv_wqe(qp, ind);
2521 ib_dma_sync_single_for_device(ibqp->
device,
2534 for (i = 0; i < wr->num_sge; ++
i)
2535 __set_data_seg(scat + i, wr->sg_list + i);
2543 qp->
rq.wrid[
ind] = wr->wr_id;
2545 ind = (ind + 1) & (qp->
rq.wqe_cnt - 1);
2550 qp->
rq.head += nreq;
2561 spin_unlock_irqrestore(&qp->
rq.lock, flags);
2568 switch (mlx4_state) {
2581 static inline enum ib_mig_state to_ib_mig_state(
int mlx4_mig_state)
2583 switch (mlx4_mig_state) {
2591 static int to_ib_qp_access_flags(
int mlx4_flags)
2611 memset(ib_ah_attr, 0,
sizeof *ib_ah_attr);
2632 ib_ah_attr->
grh.traffic_class =
2634 ib_ah_attr->
grh.flow_label =
2637 path->
rgid,
sizeof ib_ah_attr->
grh.dgid.raw);
2665 qp->
state = to_ib_qp_state(mlx4_state);
2706 qp_attr->
cap.max_recv_wr = qp->
rq.wqe_cnt;
2707 qp_attr->
cap.max_recv_sge = qp->
rq.max_gs;
2710 qp_attr->
cap.max_send_wr = qp->
sq.wqe_cnt;
2711 qp_attr->
cap.max_send_sge = qp->
sq.max_gs;
2713 qp_attr->
cap.max_send_wr = 0;
2714 qp_attr->
cap.max_send_sge = 0;
2721 qp_attr->
cap.max_inline_data = 0;
2723 qp_init_attr->
cap = qp_attr->
cap;