Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
iser_verbs.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
3  * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses. You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  * Redistribution and use in source and binary forms, with or
12  * without modification, are permitted provided that the following
13  * conditions are met:
14  *
15  * - Redistributions of source code must retain the above
16  * copyright notice, this list of conditions and the following
17  * disclaimer.
18  *
19  * - Redistributions in binary form must reproduce the above
20  * copyright notice, this list of conditions and the following
21  * disclaimer in the documentation and/or other materials
22  * provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 #include <linux/kernel.h>
34 #include <linux/module.h>
35 #include <linux/slab.h>
36 #include <linux/delay.h>
37 
38 #include "iscsi_iser.h"
39 
40 #define ISCSI_ISER_MAX_CONN 8
41 #define ISER_MAX_RX_CQ_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN)
42 #define ISER_MAX_TX_CQ_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN)
43 
44 static void iser_cq_tasklet_fn(unsigned long data);
45 static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
46 
47 static void iser_cq_event_callback(struct ib_event *cause, void *context)
48 {
49  iser_err("got cq event %d \n", cause->event);
50 }
51 
52 static void iser_qp_event_callback(struct ib_event *cause, void *context)
53 {
54  iser_err("got qp event %d\n",cause->event);
55 }
56 
57 static void iser_event_handler(struct ib_event_handler *handler,
58  struct ib_event *event)
59 {
60  iser_err("async event %d on device %s port %d\n", event->event,
61  event->device->name, event->element.port_num);
62 }
63 
71 static int iser_create_device_ib_res(struct iser_device *device)
72 {
73  int i, j;
74  struct iser_cq_desc *cq_desc;
75 
76  device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors);
77  iser_err("using %d CQs, device %s supports %d vectors\n", device->cqs_used,
78  device->ib_device->name, device->ib_device->num_comp_vectors);
79 
80  device->cq_desc = kmalloc(sizeof(struct iser_cq_desc) * device->cqs_used,
81  GFP_KERNEL);
82  if (device->cq_desc == NULL)
83  goto cq_desc_err;
84  cq_desc = device->cq_desc;
85 
86  device->pd = ib_alloc_pd(device->ib_device);
87  if (IS_ERR(device->pd))
88  goto pd_err;
89 
90  for (i = 0; i < device->cqs_used; i++) {
91  cq_desc[i].device = device;
92  cq_desc[i].cq_index = i;
93 
94  device->rx_cq[i] = ib_create_cq(device->ib_device,
95  iser_cq_callback,
96  iser_cq_event_callback,
97  (void *)&cq_desc[i],
99  if (IS_ERR(device->rx_cq[i]))
100  goto cq_err;
101 
102  device->tx_cq[i] = ib_create_cq(device->ib_device,
103  NULL, iser_cq_event_callback,
104  (void *)&cq_desc[i],
105  ISER_MAX_TX_CQ_LEN, i);
106 
107  if (IS_ERR(device->tx_cq[i]))
108  goto cq_err;
109 
110  if (ib_req_notify_cq(device->rx_cq[i], IB_CQ_NEXT_COMP))
111  goto cq_err;
112 
113  tasklet_init(&device->cq_tasklet[i],
114  iser_cq_tasklet_fn,
115  (unsigned long)&cq_desc[i]);
116  }
117 
118  device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
121  if (IS_ERR(device->mr))
122  goto dma_mr_err;
123 
125  iser_event_handler);
127  goto handler_err;
128 
129  return 0;
130 
131 handler_err:
132  ib_dereg_mr(device->mr);
133 dma_mr_err:
134  for (j = 0; j < device->cqs_used; j++)
135  tasklet_kill(&device->cq_tasklet[j]);
136 cq_err:
137  for (j = 0; j < i; j++) {
138  if (device->tx_cq[j])
139  ib_destroy_cq(device->tx_cq[j]);
140  if (device->rx_cq[j])
141  ib_destroy_cq(device->rx_cq[j]);
142  }
143  ib_dealloc_pd(device->pd);
144 pd_err:
145  kfree(device->cq_desc);
146 cq_desc_err:
147  iser_err("failed to allocate an IB resource\n");
148  return -1;
149 }
150 
155 static void iser_free_device_ib_res(struct iser_device *device)
156 {
157  int i;
158  BUG_ON(device->mr == NULL);
159 
160  for (i = 0; i < device->cqs_used; i++) {
161  tasklet_kill(&device->cq_tasklet[i]);
162  (void)ib_destroy_cq(device->tx_cq[i]);
163  (void)ib_destroy_cq(device->rx_cq[i]);
164  device->tx_cq[i] = NULL;
165  device->rx_cq[i] = NULL;
166  }
167 
169  (void)ib_dereg_mr(device->mr);
170  (void)ib_dealloc_pd(device->pd);
171 
172  kfree(device->cq_desc);
173 
174  device->mr = NULL;
175  device->pd = NULL;
176 }
177 
183 static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
184 {
185  struct iser_device *device;
186  struct ib_qp_init_attr init_attr;
187  int req_err, resp_err, ret = -ENOMEM;
188  struct ib_fmr_pool_param params;
189  int index, min_index = 0;
190 
191  BUG_ON(ib_conn->device == NULL);
192 
193  device = ib_conn->device;
194 
197  if (!ib_conn->login_buf)
198  goto out_err;
199 
200  ib_conn->login_req_buf = ib_conn->login_buf;
202 
203  ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device,
204  (void *)ib_conn->login_req_buf,
206 
207  ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device,
208  (void *)ib_conn->login_resp_buf,
210 
211  req_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_req_dma);
212  resp_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_resp_dma);
213 
214  if (req_err || resp_err) {
215  if (req_err)
216  ib_conn->login_req_dma = 0;
217  if (resp_err)
218  ib_conn->login_resp_dma = 0;
219  goto out_err;
220  }
221 
222  ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
223  (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)),
224  GFP_KERNEL);
225  if (!ib_conn->page_vec)
226  goto out_err;
227 
228  ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1);
229 
230  params.page_shift = SHIFT_4K;
231  /* when the first/last SG element are not start/end *
232  * page aligned, the map whould be of N+1 pages */
233  params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
234  /* make the pool size twice the max number of SCSI commands *
235  * the ML is expected to queue, watermark for unmap at 50% */
236  params.pool_size = ISCSI_DEF_XMIT_CMDS_MAX * 2;
237  params.dirty_watermark = ISCSI_DEF_XMIT_CMDS_MAX;
238  params.cache = 0;
239  params.flush_function = NULL;
240  params.access = (IB_ACCESS_LOCAL_WRITE |
243 
244  ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, &params);
245  if (IS_ERR(ib_conn->fmr_pool)) {
246  ret = PTR_ERR(ib_conn->fmr_pool);
247  ib_conn->fmr_pool = NULL;
248  goto out_err;
249  }
250 
251  memset(&init_attr, 0, sizeof init_attr);
252 
253  mutex_lock(&ig.connlist_mutex);
254  /* select the CQ with the minimal number of usages */
255  for (index = 0; index < device->cqs_used; index++)
256  if (device->cq_active_qps[index] <
257  device->cq_active_qps[min_index])
258  min_index = index;
259  device->cq_active_qps[min_index]++;
260  mutex_unlock(&ig.connlist_mutex);
261  iser_err("cq index %d used for ib_conn %p\n", min_index, ib_conn);
262 
263  init_attr.event_handler = iser_qp_event_callback;
264  init_attr.qp_context = (void *)ib_conn;
265  init_attr.send_cq = device->tx_cq[min_index];
266  init_attr.recv_cq = device->rx_cq[min_index];
267  init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS;
268  init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
269  init_attr.cap.max_send_sge = 2;
270  init_attr.cap.max_recv_sge = 1;
271  init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
272  init_attr.qp_type = IB_QPT_RC;
273 
274  ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
275  if (ret)
276  goto out_err;
277 
278  ib_conn->qp = ib_conn->cma_id->qp;
279  iser_err("setting conn %p cma_id %p: fmr_pool %p qp %p\n",
280  ib_conn, ib_conn->cma_id,
281  ib_conn->fmr_pool, ib_conn->cma_id->qp);
282  return ret;
283 
284 out_err:
285  iser_err("unable to alloc mem or create resource, err %d\n", ret);
286  return ret;
287 }
288 
293 static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id)
294 {
295  int cq_index;
296  BUG_ON(ib_conn == NULL);
297 
298  iser_err("freeing conn %p cma_id %p fmr pool %p qp %p\n",
299  ib_conn, ib_conn->cma_id,
300  ib_conn->fmr_pool, ib_conn->qp);
301 
302  /* qp is created only once both addr & route are resolved */
303  if (ib_conn->fmr_pool != NULL)
304  ib_destroy_fmr_pool(ib_conn->fmr_pool);
305 
306  if (ib_conn->qp != NULL) {
307  cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index;
308  ib_conn->device->cq_active_qps[cq_index]--;
309 
310  rdma_destroy_qp(ib_conn->cma_id);
311  }
312  /* if cma handler context, the caller acts s.t the cma destroy the id */
313  if (ib_conn->cma_id != NULL && can_destroy_id)
314  rdma_destroy_id(ib_conn->cma_id);
315 
316  ib_conn->fmr_pool = NULL;
317  ib_conn->qp = NULL;
318  ib_conn->cma_id = NULL;
319  kfree(ib_conn->page_vec);
320 
321  if (ib_conn->login_buf) {
322  if (ib_conn->login_req_dma)
323  ib_dma_unmap_single(ib_conn->device->ib_device,
324  ib_conn->login_req_dma,
326  if (ib_conn->login_resp_dma)
327  ib_dma_unmap_single(ib_conn->device->ib_device,
328  ib_conn->login_resp_dma,
330  kfree(ib_conn->login_buf);
331  }
332 
333  return 0;
334 }
335 
340 static
341 struct iser_device *iser_device_find_by_ib_device(struct rdma_cm_id *cma_id)
342 {
343  struct iser_device *device;
344 
345  mutex_lock(&ig.device_list_mutex);
346 
347  list_for_each_entry(device, &ig.device_list, ig_list)
348  /* find if there's a match using the node GUID */
349  if (device->ib_device->node_guid == cma_id->device->node_guid)
350  goto inc_refcnt;
351 
352  device = kzalloc(sizeof *device, GFP_KERNEL);
353  if (device == NULL)
354  goto out;
355 
356  /* assign this device to the device */
357  device->ib_device = cma_id->device;
358  /* init the device and link it into ig device list */
359  if (iser_create_device_ib_res(device)) {
360  kfree(device);
361  device = NULL;
362  goto out;
363  }
364  list_add(&device->ig_list, &ig.device_list);
365 
366 inc_refcnt:
367  device->refcount++;
368 out:
369  mutex_unlock(&ig.device_list_mutex);
370  return device;
371 }
372 
373 /* if there's no demand for this device, release it */
374 static void iser_device_try_release(struct iser_device *device)
375 {
376  mutex_lock(&ig.device_list_mutex);
377  device->refcount--;
378  iser_err("device %p refcount %d\n",device,device->refcount);
379  if (!device->refcount) {
380  iser_free_device_ib_res(device);
381  list_del(&device->ig_list);
382  kfree(device);
383  }
384  mutex_unlock(&ig.device_list_mutex);
385 }
386 
387 static int iser_conn_state_comp_exch(struct iser_conn *ib_conn,
388  enum iser_ib_conn_state comp,
389  enum iser_ib_conn_state exch)
390 {
391  int ret;
392 
393  spin_lock_bh(&ib_conn->lock);
394  if ((ret = (ib_conn->state == comp)))
395  ib_conn->state = exch;
396  spin_unlock_bh(&ib_conn->lock);
397  return ret;
398 }
399 
403 static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id)
404 {
405  struct iser_device *device = ib_conn->device;
406 
407  BUG_ON(ib_conn->state != ISER_CONN_DOWN);
408 
409  mutex_lock(&ig.connlist_mutex);
410  list_del(&ib_conn->conn_list);
411  mutex_unlock(&ig.connlist_mutex);
412  iser_free_rx_descriptors(ib_conn);
413  iser_free_ib_conn_res(ib_conn, can_destroy_id);
414  ib_conn->device = NULL;
415  /* on EVENT_ADDR_ERROR there's no device yet for this conn */
416  if (device != NULL)
417  iser_device_try_release(device);
418  iscsi_destroy_endpoint(ib_conn->ep);
419 }
420 
421 void iser_conn_get(struct iser_conn *ib_conn)
422 {
423  atomic_inc(&ib_conn->refcount);
424 }
425 
426 int iser_conn_put(struct iser_conn *ib_conn, int can_destroy_id)
427 {
428  if (atomic_dec_and_test(&ib_conn->refcount)) {
429  iser_conn_release(ib_conn, can_destroy_id);
430  return 1;
431  }
432  return 0;
433 }
434 
438 void iser_conn_terminate(struct iser_conn *ib_conn)
439 {
440  int err = 0;
441 
442  /* change the ib conn state only if the conn is UP, however always call
443  * rdma_disconnect since this is the only way to cause the CMA to change
444  * the QP state to ERROR
445  */
446 
447  iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, ISER_CONN_TERMINATING);
448  err = rdma_disconnect(ib_conn->cma_id);
449  if (err)
450  iser_err("Failed to disconnect, conn: 0x%p err %d\n",
451  ib_conn,err);
452 
454  ib_conn->state == ISER_CONN_DOWN);
455 
456  iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */
457 }
458 
459 static int iser_connect_error(struct rdma_cm_id *cma_id)
460 {
461  struct iser_conn *ib_conn;
462  ib_conn = (struct iser_conn *)cma_id->context;
463 
464  ib_conn->state = ISER_CONN_DOWN;
465  wake_up_interruptible(&ib_conn->wait);
466  return iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */
467 }
468 
469 static int iser_addr_handler(struct rdma_cm_id *cma_id)
470 {
471  struct iser_device *device;
472  struct iser_conn *ib_conn;
473  int ret;
474 
475  device = iser_device_find_by_ib_device(cma_id);
476  if (!device) {
477  iser_err("device lookup/creation failed\n");
478  return iser_connect_error(cma_id);
479  }
480 
481  ib_conn = (struct iser_conn *)cma_id->context;
482  ib_conn->device = device;
483 
484  ret = rdma_resolve_route(cma_id, 1000);
485  if (ret) {
486  iser_err("resolve route failed: %d\n", ret);
487  return iser_connect_error(cma_id);
488  }
489 
490  return 0;
491 }
492 
493 static int iser_route_handler(struct rdma_cm_id *cma_id)
494 {
495  struct rdma_conn_param conn_param;
496  int ret;
497 
498  ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context);
499  if (ret)
500  goto failure;
501 
502  memset(&conn_param, 0, sizeof conn_param);
503  conn_param.responder_resources = 4;
504  conn_param.initiator_depth = 1;
505  conn_param.retry_count = 7;
506  conn_param.rnr_retry_count = 6;
507 
508  ret = rdma_connect(cma_id, &conn_param);
509  if (ret) {
510  iser_err("failure connecting: %d\n", ret);
511  goto failure;
512  }
513 
514  return 0;
515 failure:
516  return iser_connect_error(cma_id);
517 }
518 
519 static void iser_connected_handler(struct rdma_cm_id *cma_id)
520 {
521  struct iser_conn *ib_conn;
522 
523  ib_conn = (struct iser_conn *)cma_id->context;
524  ib_conn->state = ISER_CONN_UP;
525  wake_up_interruptible(&ib_conn->wait);
526 }
527 
528 static int iser_disconnected_handler(struct rdma_cm_id *cma_id)
529 {
530  struct iser_conn *ib_conn;
531  int ret;
532 
533  ib_conn = (struct iser_conn *)cma_id->context;
534 
535  /* getting here when the state is UP means that the conn is being *
536  * terminated asynchronously from the iSCSI layer's perspective. */
537  if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,
539  iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn,
541 
542  /* Complete the termination process if no posts are pending */
543  if (ib_conn->post_recv_buf_count == 0 &&
544  (atomic_read(&ib_conn->post_send_buf_count) == 0)) {
545  ib_conn->state = ISER_CONN_DOWN;
546  wake_up_interruptible(&ib_conn->wait);
547  }
548 
549  ret = iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */
550  return ret;
551 }
552 
553 static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
554 {
555  int ret = 0;
556 
557  iser_err("event %d status %d conn %p id %p\n",
558  event->event, event->status, cma_id->context, cma_id);
559 
560  switch (event->event) {
562  ret = iser_addr_handler(cma_id);
563  break;
565  ret = iser_route_handler(cma_id);
566  break;
568  iser_connected_handler(cma_id);
569  break;
575  ret = iser_connect_error(cma_id);
576  break;
580  ret = iser_disconnected_handler(cma_id);
581  break;
582  default:
583  iser_err("Unexpected RDMA CM event (%d)\n", event->event);
584  break;
585  }
586  return ret;
587 }
588 
589 void iser_conn_init(struct iser_conn *ib_conn)
590 {
591  ib_conn->state = ISER_CONN_INIT;
592  init_waitqueue_head(&ib_conn->wait);
593  ib_conn->post_recv_buf_count = 0;
594  atomic_set(&ib_conn->post_send_buf_count, 0);
595  atomic_set(&ib_conn->refcount, 1); /* ref ib conn allocation */
596  INIT_LIST_HEAD(&ib_conn->conn_list);
597  spin_lock_init(&ib_conn->lock);
598 }
599 
604 int iser_connect(struct iser_conn *ib_conn,
605  struct sockaddr_in *src_addr,
606  struct sockaddr_in *dst_addr,
607  int non_blocking)
608 {
609  struct sockaddr *src, *dst;
610  int err = 0;
611 
612  sprintf(ib_conn->name, "%pI4:%d",
613  &dst_addr->sin_addr.s_addr, dst_addr->sin_port);
614 
615  /* the device is known only --after-- address resolution */
616  ib_conn->device = NULL;
617 
618  iser_err("connecting to: %pI4, port 0x%x\n",
619  &dst_addr->sin_addr, dst_addr->sin_port);
620 
621  ib_conn->state = ISER_CONN_PENDING;
622 
623  iser_conn_get(ib_conn); /* ref ib conn's cma id */
624  ib_conn->cma_id = rdma_create_id(iser_cma_handler,
625  (void *)ib_conn,
627  if (IS_ERR(ib_conn->cma_id)) {
628  err = PTR_ERR(ib_conn->cma_id);
629  iser_err("rdma_create_id failed: %d\n", err);
630  goto id_failure;
631  }
632 
633  src = (struct sockaddr *)src_addr;
634  dst = (struct sockaddr *)dst_addr;
635  err = rdma_resolve_addr(ib_conn->cma_id, src, dst, 1000);
636  if (err) {
637  iser_err("rdma_resolve_addr failed: %d\n", err);
638  goto addr_failure;
639  }
640 
641  if (!non_blocking) {
643  (ib_conn->state != ISER_CONN_PENDING));
644 
645  if (ib_conn->state != ISER_CONN_UP) {
646  err = -EIO;
647  goto connect_failure;
648  }
649  }
650 
651  mutex_lock(&ig.connlist_mutex);
652  list_add(&ib_conn->conn_list, &ig.connlist);
653  mutex_unlock(&ig.connlist_mutex);
654  return 0;
655 
656 id_failure:
657  ib_conn->cma_id = NULL;
658 addr_failure:
659  ib_conn->state = ISER_CONN_DOWN;
660  iser_conn_put(ib_conn, 1); /* deref ib conn's cma id */
661 connect_failure:
662  iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */
663  return err;
664 }
665 
671 int iser_reg_page_vec(struct iser_conn *ib_conn,
672  struct iser_page_vec *page_vec,
673  struct iser_mem_reg *mem_reg)
674 {
675  struct ib_pool_fmr *mem;
676  u64 io_addr;
677  u64 *page_list;
678  int status;
679 
680  page_list = page_vec->pages;
681  io_addr = page_list[0];
682 
683  mem = ib_fmr_pool_map_phys(ib_conn->fmr_pool,
684  page_list,
685  page_vec->length,
686  io_addr);
687 
688  if (IS_ERR(mem)) {
689  status = (int)PTR_ERR(mem);
690  iser_err("ib_fmr_pool_map_phys failed: %d\n", status);
691  return status;
692  }
693 
694  mem_reg->lkey = mem->fmr->lkey;
695  mem_reg->rkey = mem->fmr->rkey;
696  mem_reg->len = page_vec->length * SIZE_4K;
697  mem_reg->va = io_addr;
698  mem_reg->is_fmr = 1;
699  mem_reg->mem_h = (void *)mem;
700 
701  mem_reg->va += page_vec->offset;
702  mem_reg->len = page_vec->data_size;
703 
704  iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, "
705  "entry[0]: (0x%08lx,%ld)] -> "
706  "[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n",
707  page_vec, page_vec->length,
708  (unsigned long)page_vec->pages[0],
709  (unsigned long)page_vec->data_size,
710  (unsigned int)mem_reg->lkey, mem_reg->mem_h,
711  (unsigned long)mem_reg->va, (unsigned long)mem_reg->len);
712  return 0;
713 }
714 
719 {
720  int ret;
721 
722  iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
723 
724  ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
725  if (ret)
726  iser_err("ib_fmr_pool_unmap failed %d\n", ret);
727 
728  reg->mem_h = NULL;
729 }
730 
731 int iser_post_recvl(struct iser_conn *ib_conn)
732 {
733  struct ib_recv_wr rx_wr, *rx_wr_failed;
734  struct ib_sge sge;
735  int ib_ret;
736 
737  sge.addr = ib_conn->login_resp_dma;
739  sge.lkey = ib_conn->device->mr->lkey;
740 
741  rx_wr.wr_id = (unsigned long)ib_conn->login_resp_buf;
742  rx_wr.sg_list = &sge;
743  rx_wr.num_sge = 1;
744  rx_wr.next = NULL;
745 
746  ib_conn->post_recv_buf_count++;
747  ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed);
748  if (ib_ret) {
749  iser_err("ib_post_recv failed ret=%d\n", ib_ret);
750  ib_conn->post_recv_buf_count--;
751  }
752  return ib_ret;
753 }
754 
755 int iser_post_recvm(struct iser_conn *ib_conn, int count)
756 {
757  struct ib_recv_wr *rx_wr, *rx_wr_failed;
758  int i, ib_ret;
759  unsigned int my_rx_head = ib_conn->rx_desc_head;
760  struct iser_rx_desc *rx_desc;
761 
762  for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
763  rx_desc = &ib_conn->rx_descs[my_rx_head];
764  rx_wr->wr_id = (unsigned long)rx_desc;
765  rx_wr->sg_list = &rx_desc->rx_sg;
766  rx_wr->num_sge = 1;
767  rx_wr->next = rx_wr + 1;
768  my_rx_head = (my_rx_head + 1) & (ISER_QP_MAX_RECV_DTOS - 1);
769  }
770 
771  rx_wr--;
772  rx_wr->next = NULL; /* mark end of work requests list */
773 
774  ib_conn->post_recv_buf_count += count;
775  ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed);
776  if (ib_ret) {
777  iser_err("ib_post_recv failed ret=%d\n", ib_ret);
778  ib_conn->post_recv_buf_count -= count;
779  } else
780  ib_conn->rx_desc_head = my_rx_head;
781  return ib_ret;
782 }
783 
784 
790 int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc)
791 {
792  int ib_ret;
793  struct ib_send_wr send_wr, *send_wr_failed;
794 
795  ib_dma_sync_single_for_device(ib_conn->device->ib_device,
797 
798  send_wr.next = NULL;
799  send_wr.wr_id = (unsigned long)tx_desc;
800  send_wr.sg_list = tx_desc->tx_sg;
801  send_wr.num_sge = tx_desc->num_sge;
802  send_wr.opcode = IB_WR_SEND;
803  send_wr.send_flags = IB_SEND_SIGNALED;
804 
805  atomic_inc(&ib_conn->post_send_buf_count);
806 
807  ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed);
808  if (ib_ret) {
809  iser_err("ib_post_send failed, ret:%d\n", ib_ret);
810  atomic_dec(&ib_conn->post_send_buf_count);
811  }
812  return ib_ret;
813 }
814 
815 static void iser_handle_comp_error(struct iser_tx_desc *desc,
816  struct iser_conn *ib_conn)
817 {
818  if (desc && desc->type == ISCSI_TX_DATAOUT)
819  kmem_cache_free(ig.desc_cache, desc);
820 
821  if (ib_conn->post_recv_buf_count == 0 &&
822  atomic_read(&ib_conn->post_send_buf_count) == 0) {
823  /* getting here when the state is UP means that the conn is *
824  * being terminated asynchronously from the iSCSI layer's *
825  * perspective. */
826  if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,
828  iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn,
830 
831  /* no more non completed posts to the QP, complete the
832  * termination process w.o worrying on disconnect event */
833  ib_conn->state = ISER_CONN_DOWN;
834  wake_up_interruptible(&ib_conn->wait);
835  }
836 }
837 
838 static int iser_drain_tx_cq(struct iser_device *device, int cq_index)
839 {
840  struct ib_cq *cq = device->tx_cq[cq_index];
841  struct ib_wc wc;
842  struct iser_tx_desc *tx_desc;
843  struct iser_conn *ib_conn;
844  int completed_tx = 0;
845 
846  while (ib_poll_cq(cq, 1, &wc) == 1) {
847  tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id;
848  ib_conn = wc.qp->qp_context;
849  if (wc.status == IB_WC_SUCCESS) {
850  if (wc.opcode == IB_WC_SEND)
851  iser_snd_completion(tx_desc, ib_conn);
852  else
853  iser_err("expected opcode %d got %d\n",
854  IB_WC_SEND, wc.opcode);
855  } else {
856  iser_err("tx id %llx status %d vend_err %x\n",
857  wc.wr_id, wc.status, wc.vendor_err);
858  atomic_dec(&ib_conn->post_send_buf_count);
859  iser_handle_comp_error(tx_desc, ib_conn);
860  }
861  completed_tx++;
862  }
863  return completed_tx;
864 }
865 
866 
867 static void iser_cq_tasklet_fn(unsigned long data)
868 {
869  struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)data;
870  struct iser_device *device = cq_desc->device;
871  int cq_index = cq_desc->cq_index;
872  struct ib_cq *cq = device->rx_cq[cq_index];
873  struct ib_wc wc;
874  struct iser_rx_desc *desc;
875  unsigned long xfer_len;
876  struct iser_conn *ib_conn;
877  int completed_tx, completed_rx;
878  completed_tx = completed_rx = 0;
879 
880  while (ib_poll_cq(cq, 1, &wc) == 1) {
881  desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id;
882  BUG_ON(desc == NULL);
883  ib_conn = wc.qp->qp_context;
884  if (wc.status == IB_WC_SUCCESS) {
885  if (wc.opcode == IB_WC_RECV) {
886  xfer_len = (unsigned long)wc.byte_len;
887  iser_rcv_completion(desc, xfer_len, ib_conn);
888  } else
889  iser_err("expected opcode %d got %d\n",
890  IB_WC_RECV, wc.opcode);
891  } else {
892  if (wc.status != IB_WC_WR_FLUSH_ERR)
893  iser_err("rx id %llx status %d vend_err %x\n",
894  wc.wr_id, wc.status, wc.vendor_err);
895  ib_conn->post_recv_buf_count--;
896  iser_handle_comp_error(NULL, ib_conn);
897  }
898  completed_rx++;
899  if (!(completed_rx & 63))
900  completed_tx += iser_drain_tx_cq(device, cq_index);
901  }
902  /* #warning "it is assumed here that arming CQ only once its empty" *
903  * " would not cause interrupts to be missed" */
904  ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
905 
906  completed_tx += iser_drain_tx_cq(device, cq_index);
907  iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx);
908 }
909 
910 static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
911 {
912  struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)cq_context;
913  struct iser_device *device = cq_desc->device;
914  int cq_index = cq_desc->cq_index;
915 
916  tasklet_schedule(&device->cq_tasklet[cq_index]);
917 }