Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
ib.h
Go to the documentation of this file.
1 #ifndef _RDS_IB_H
2 #define _RDS_IB_H
3 
4 #include <rdma/ib_verbs.h>
5 #include <rdma/rdma_cm.h>
6 #include <linux/interrupt.h>
7 #include <linux/pci.h>
8 #include <linux/slab.h>
9 #include "rds.h"
10 #include "rdma_transport.h"
11 
12 #define RDS_FMR_SIZE 256
13 #define RDS_FMR_POOL_SIZE 8192
14 
15 #define RDS_IB_MAX_SGE 8
16 #define RDS_IB_RECV_SGE 2
17 
18 #define RDS_IB_DEFAULT_RECV_WR 1024
19 #define RDS_IB_DEFAULT_SEND_WR 256
20 
21 #define RDS_IB_DEFAULT_RETRY_COUNT 2
22 
23 #define RDS_IB_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */
24 
25 #define RDS_IB_RECYCLE_BATCH_COUNT 32
26 
27 extern struct rw_semaphore rds_ib_devices_lock;
28 extern struct list_head rds_ib_devices;
29 
30 /*
31  * IB posts RDS_FRAG_SIZE fragments of pages to the receive queues to
32  * try and minimize the amount of memory tied up both the device and
33  * socket receive queues.
34  */
35 struct rds_page_frag {
36  struct list_head f_item;
38  struct scatterlist f_sg;
39 };
40 
45 };
46 
48  struct list_head *first;
49  unsigned long count;
50 };
51 
54  struct list_head *xfer;
55  struct list_head *ready;
56 };
57 
59  /* Add new fields at the end, and don't permute existing fields. */
67  __be32 dp_credit; /* non-zero enables flow ctl */
68 };
69 
71  void *s_op;
72  struct ib_send_wr s_wr;
74  unsigned long s_queued;
75 };
76 
80  struct ib_recv_wr r_wr;
81  struct ib_sge r_sge[2];
82 };
83 
90 };
91 
92 struct rds_ib_device;
93 
95 
99 
100  /* alphabet soup, IBTA style */
102  struct ib_pd *i_pd;
103  struct ib_mr *i_mr;
104  struct ib_cq *i_send_cq;
105  struct ib_cq *i_recv_cq;
106 
107  /* tx */
109  struct rm_data_op *i_data_op;
114 
115  /* rx */
124  u64 i_ack_recv; /* last ACK received */
127 
128  /* sending acks */
129  unsigned long i_ack_flags;
130 #ifdef KERNEL_HAS_ATOMIC64
131  atomic64_t i_ack_next; /* next ACK to send */
132 #else
133  spinlock_t i_ack_lock; /* protect i_ack_next */
134  u64 i_ack_next; /* next ACK to send */
135 #endif
136  struct rds_header *i_ack;
140  unsigned long i_ack_queued;
141 
142  /* Flow control related information
143  *
144  * Our algorithm uses a pair variables that we need to access
145  * atomically - one for the send credits, and one posted
146  * recv credits we need to transfer to remote.
147  * Rather than protect them using a slow spinlock, we put both into
148  * a single atomic_t and update it using cmpxchg
149  */
151 
152  /* Protocol version specific information */
153  unsigned int i_flowctl:1; /* enable/disable flow ctl */
154 
155  /* Batched completions */
156  unsigned int i_unsignaled_wrs;
157 };
158 
159 /* This assumes that atomic_t is at least 32 bits */
160 #define IB_GET_SEND_CREDITS(v) ((v) & 0xffff)
161 #define IB_GET_POST_CREDITS(v) ((v) >> 16)
162 #define IB_SET_SEND_CREDITS(v) ((v) & 0xffff)
163 #define IB_SET_POST_CREDITS(v) ((v) << 16)
164 
166  struct list_head list;
168 };
169 
171  struct list_head list;
174  struct ib_device *dev;
175  struct ib_pd *pd;
176  struct ib_mr *mr;
178  unsigned int fmr_max_remaps;
179  unsigned int max_fmrs;
180  int max_sge;
181  unsigned int max_wrs;
182  unsigned int max_initiator_depth;
184  spinlock_t spinlock; /* protect the above */
187 };
188 
189 #define ibdev_to_node(ibdev) dev_to_node(ibdev->dma_device)
190 #define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev)
191 
192 /* bits for i_ack_flags */
193 #define IB_ACK_IN_FLIGHT 0
194 #define IB_ACK_REQUESTED 1
195 
196 /* Magic WR_ID for ACKs */
197 #define RDS_IB_ACK_WR_ID (~(u64) 0)
198 
229 };
230 
231 extern struct workqueue_struct *rds_ib_wq;
232 
233 /*
234  * Fake ib_dma_sync_sg_for_{cpu,device} as long as ib_verbs.h
235  * doesn't define it.
236  */
237 static inline void rds_ib_dma_sync_sg_for_cpu(struct ib_device *dev,
238  struct scatterlist *sg, unsigned int sg_dma_len, int direction)
239 {
240  unsigned int i;
241 
242  for (i = 0; i < sg_dma_len; ++i) {
243  ib_dma_sync_single_for_cpu(dev,
244  ib_sg_dma_address(dev, &sg[i]),
245  ib_sg_dma_len(dev, &sg[i]),
246  direction);
247  }
248 }
249 #define ib_dma_sync_sg_for_cpu rds_ib_dma_sync_sg_for_cpu
250 
251 static inline void rds_ib_dma_sync_sg_for_device(struct ib_device *dev,
252  struct scatterlist *sg, unsigned int sg_dma_len, int direction)
253 {
254  unsigned int i;
255 
256  for (i = 0; i < sg_dma_len; ++i) {
257  ib_dma_sync_single_for_device(dev,
258  ib_sg_dma_address(dev, &sg[i]),
259  ib_sg_dma_len(dev, &sg[i]),
260  direction);
261  }
262 }
263 #define ib_dma_sync_sg_for_device rds_ib_dma_sync_sg_for_device
264 
265 
266 /* ib.c */
267 extern struct rds_transport rds_ib_transport;
269 void rds_ib_dev_put(struct rds_ib_device *rds_ibdev);
270 extern struct ib_client rds_ib_client;
271 
272 extern unsigned int fmr_message_size;
273 extern unsigned int rds_ib_retry_count;
274 
276 extern struct list_head ib_nodev_conns;
277 
278 /* ib_cm.c */
279 int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp);
280 void rds_ib_conn_free(void *arg);
281 int rds_ib_conn_connect(struct rds_connection *conn);
282 void rds_ib_conn_shutdown(struct rds_connection *conn);
283 void rds_ib_state_change(struct sock *sk);
284 int rds_ib_listen_init(void);
285 void rds_ib_listen_stop(void);
286 void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...);
288  struct rdma_cm_event *event);
291  struct rdma_cm_event *event);
292 
293 
294 #define rds_ib_conn_error(conn, fmt...) \
295  __rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt)
296 
297 /* ib_rdma.c */
298 int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
299 void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
300 void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
301 void rds_ib_destroy_nodev_conns(void);
303 void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo);
305 void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
306  struct rds_sock *rs, u32 *key_ret);
307 void rds_ib_sync_mr(void *trans_private, int dir);
308 void rds_ib_free_mr(void *trans_private, int invalidate);
309 void rds_ib_flush_mrs(void);
310 
311 /* ib_recv.c */
312 int rds_ib_recv_init(void);
313 void rds_ib_recv_exit(void);
314 int rds_ib_recv(struct rds_connection *conn);
317 void rds_ib_recv_refill(struct rds_connection *conn, int prefill);
318 void rds_ib_inc_free(struct rds_incoming *inc);
319 int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
320  size_t size);
321 void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context);
322 void rds_ib_recv_tasklet_fn(unsigned long data);
325 void rds_ib_recv_init_ack(struct rds_ib_connection *ic);
326 void rds_ib_attempt_ack(struct rds_ib_connection *ic);
329 
330 /* ib_ring.c */
339 u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest);
341 
342 /* ib_send.c */
344 void rds_ib_xmit_complete(struct rds_connection *conn);
345 int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
346  unsigned int hdr_off, unsigned int sg, unsigned int off);
347 void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context);
350 int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op);
351 void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits);
352 void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted);
354  u32 *adv_credits, int need_posted, int max_posted);
355 int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op);
356 
357 /* ib_stats.c */
358 DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats);
359 #define rds_ib_stats_inc(member) rds_stats_inc_which(rds_ib_stats, member)
360 unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter,
361  unsigned int avail);
362 
363 /* ib_sysctl.c */
364 int rds_ib_sysctl_init(void);
365 void rds_ib_sysctl_exit(void);
366 extern unsigned long rds_ib_sysctl_max_send_wr;
367 extern unsigned long rds_ib_sysctl_max_recv_wr;
368 extern unsigned long rds_ib_sysctl_max_unsig_wrs;
369 extern unsigned long rds_ib_sysctl_max_unsig_bytes;
370 extern unsigned long rds_ib_sysctl_max_recv_allocation;
371 extern unsigned int rds_ib_sysctl_flow_control;
372 
373 #endif