Linux Kernel
3.7.1
Main Page
Related Pages
Modules
Namespaces
Data Structures
Files
File List
Globals
All
Data Structures
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Macros
Groups
Pages
include
linux
sunrpc
svc_rdma.h
Go to the documentation of this file.
1
/*
2
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
3
*
4
* This software is available to you under a choice of one of two
5
* licenses. You may choose to be licensed under the terms of the GNU
6
* General Public License (GPL) Version 2, available from the file
7
* COPYING in the main directory of this source tree, or the BSD-type
8
* license below:
9
*
10
* Redistribution and use in source and binary forms, with or without
11
* modification, are permitted provided that the following conditions
12
* are met:
13
*
14
* Redistributions of source code must retain the above copyright
15
* notice, this list of conditions and the following disclaimer.
16
*
17
* Redistributions in binary form must reproduce the above
18
* copyright notice, this list of conditions and the following
19
* disclaimer in the documentation and/or other materials provided
20
* with the distribution.
21
*
22
* Neither the name of the Network Appliance, Inc. nor the names of
23
* its contributors may be used to endorse or promote products
24
* derived from this software without specific prior written
25
* permission.
26
*
27
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38
*
39
* Author: Tom Tucker <
[email protected]
>
40
*/
41
42
#ifndef SVC_RDMA_H
43
#define SVC_RDMA_H
44
#include <
linux/sunrpc/xdr.h
>
45
#include <
linux/sunrpc/svcsock.h
>
46
#include <
linux/sunrpc/rpc_rdma.h
>
47
#include <
rdma/ib_verbs.h
>
48
#include <
rdma/rdma_cm.h
>
49
#define SVCRDMA_DEBUG
50
51
/* RPC/RDMA parameters and stats */
52
extern
unsigned
int
svcrdma_ord
;
53
extern
unsigned
int
svcrdma_max_requests
;
54
extern
unsigned
int
svcrdma_max_req_size
;
55
56
extern
atomic_t
rdma_stat_recv
;
57
extern
atomic_t
rdma_stat_read
;
58
extern
atomic_t
rdma_stat_write
;
59
extern
atomic_t
rdma_stat_sq_starve
;
60
extern
atomic_t
rdma_stat_rq_starve
;
61
extern
atomic_t
rdma_stat_rq_poll
;
62
extern
atomic_t
rdma_stat_rq_prod
;
63
extern
atomic_t
rdma_stat_sq_poll
;
64
extern
atomic_t
rdma_stat_sq_prod
;
65
66
#define RPCRDMA_VERSION 1
67
68
/*
69
* Contexts are built when an RDMA request is created and are a
70
* record of the resources that can be recovered when the request
71
* completes.
72
*/
73
struct
svc_rdma_op_ctxt
{
74
struct
svc_rdma_op_ctxt
*
read_hdr
;
75
struct
svc_rdma_fastreg_mr
*
frmr
;
76
int
hdr_count
;
77
struct
xdr_buf
arg
;
78
struct
list_head
dto_q
;
79
enum
ib_wr_opcode
wr_op
;
80
enum
ib_wc_status
wc_status
;
81
u32
byte_len
;
82
struct
svcxprt_rdma
*
xprt
;
83
unsigned
long
flags
;
84
enum
dma_data_direction
direction
;
85
int
count
;
86
struct
ib_sge
sge
[
RPCSVC_MAXPAGES
];
87
struct
page
*
pages
[
RPCSVC_MAXPAGES
];
88
};
89
90
/*
91
* NFS_ requests are mapped on the client side by the chunk lists in
92
* the RPCRDMA header. During the fetching of the RPC from the client
93
* and the writing of the reply to the client, the memory in the
94
* client and the memory in the server must be mapped as contiguous
95
* vaddr/len for access by the hardware. These data strucures keep
96
* these mappings.
97
*
98
* For an RDMA_WRITE, the 'sge' maps the RPC REPLY. For RDMA_READ, the
99
* 'sge' in the svc_rdma_req_map maps the server side RPC reply and the
100
* 'ch' field maps the read-list of the RPCRDMA header to the 'sge'
101
* mapping of the reply.
102
*/
103
struct
svc_rdma_chunk_sge
{
104
int
start
;
/* sge no for this chunk */
105
int
count
;
/* sge count for this chunk */
106
};
107
struct
svc_rdma_fastreg_mr
{
108
struct
ib_mr
*
mr
;
109
void
*
kva
;
110
struct
ib_fast_reg_page_list
*
page_list
;
111
int
page_list_len
;
112
unsigned
long
access_flags
;
113
unsigned
long
map_len
;
114
enum
dma_data_direction
direction
;
115
struct
list_head
frmr_list
;
116
};
117
struct
svc_rdma_req_map
{
118
struct
svc_rdma_fastreg_mr
*
frmr
;
119
unsigned
long
count
;
120
union
{
121
struct
kvec
sge
[
RPCSVC_MAXPAGES
];
122
struct
svc_rdma_chunk_sge
ch
[
RPCSVC_MAXPAGES
];
123
};
124
};
125
#define RDMACTXT_F_FAST_UNREG 1
126
#define RDMACTXT_F_LAST_CTXT 2
127
128
#define SVCRDMA_DEVCAP_FAST_REG 1
/* fast mr registration */
129
#define SVCRDMA_DEVCAP_READ_W_INV 2
/* read w/ invalidate */
130
131
struct
svcxprt_rdma
{
132
struct
svc_xprt
sc_xprt
;
/* SVC transport structure */
133
struct
rdma_cm_id
*
sc_cm_id
;
/* RDMA connection id */
134
struct
list_head
sc_accept_q
;
/* Conn. waiting accept */
135
int
sc_ord
;
/* RDMA read limit */
136
int
sc_max_sge
;
137
138
int
sc_sq_depth
;
/* Depth of SQ */
139
atomic_t
sc_sq_count
;
/* Number of SQ WR on queue */
140
141
int
sc_max_requests
;
/* Depth of RQ */
142
int
sc_max_req_size
;
/* Size of each RQ WR buf */
143
144
struct
ib_pd
*
sc_pd
;
145
146
atomic_t
sc_dma_used
;
147
atomic_t
sc_ctxt_used
;
148
struct
list_head
sc_rq_dto_q
;
149
spinlock_t
sc_rq_dto_lock
;
150
struct
ib_qp
*
sc_qp
;
151
struct
ib_cq
*
sc_rq_cq
;
152
struct
ib_cq
*
sc_sq_cq
;
153
struct
ib_mr
*
sc_phys_mr
;
/* MR for server memory */
154
u32
sc_dev_caps
;
/* distilled device caps */
155
u32
sc_dma_lkey
;
/* local dma key */
156
unsigned
int
sc_frmr_pg_list_len
;
157
struct
list_head
sc_frmr_q
;
158
spinlock_t
sc_frmr_q_lock
;
159
160
spinlock_t
sc_lock
;
/* transport lock */
161
162
wait_queue_head_t
sc_send_wait
;
/* SQ exhaustion waitlist */
163
unsigned
long
sc_flags
;
164
struct
list_head
sc_dto_q
;
/* DTO tasklet I/O pending Q */
165
struct
list_head
sc_read_complete_q
;
166
struct
work_struct
sc_work
;
167
};
168
/* sc_flags */
169
#define RDMAXPRT_RQ_PENDING 1
170
#define RDMAXPRT_SQ_PENDING 2
171
#define RDMAXPRT_CONN_PENDING 3
172
173
#define RPCRDMA_LISTEN_BACKLOG 10
174
/* The default ORD value is based on two outstanding full-size writes with a
175
* page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */
176
#define RPCRDMA_ORD (64/4)
177
#define RPCRDMA_SQ_DEPTH_MULT 8
178
#define RPCRDMA_MAX_THREADS 16
179
#define RPCRDMA_MAX_REQUESTS 16
180
#define RPCRDMA_MAX_REQ_SIZE 4096
181
182
/* svc_rdma_marshal.c */
183
extern
void
svc_rdma_rcl_chunk_counts
(
struct
rpcrdma_read_chunk
*,
184
int
*,
int
*);
185
extern
int
svc_rdma_xdr_decode_req
(
struct
rpcrdma_msg
**,
struct
svc_rqst
*);
186
extern
int
svc_rdma_xdr_decode_deferred_req
(
struct
svc_rqst
*);
187
extern
int
svc_rdma_xdr_encode_error
(
struct
svcxprt_rdma
*,
188
struct
rpcrdma_msg
*,
189
enum
rpcrdma_errcode
,
u32
*);
190
extern
void
svc_rdma_xdr_encode_write_list
(
struct
rpcrdma_msg
*,
int
);
191
extern
void
svc_rdma_xdr_encode_reply_array
(
struct
rpcrdma_write_array
*,
int
);
192
extern
void
svc_rdma_xdr_encode_array_chunk
(
struct
rpcrdma_write_array
*,
int
,
193
__be32
,
__be64
,
u32
);
194
extern
void
svc_rdma_xdr_encode_reply_header
(
struct
svcxprt_rdma
*,
195
struct
rpcrdma_msg
*,
196
struct
rpcrdma_msg
*,
197
enum
rpcrdma_proc
);
198
extern
int
svc_rdma_xdr_get_reply_hdr_len
(
struct
rpcrdma_msg
*);
199
200
/* svc_rdma_recvfrom.c */
201
extern
int
svc_rdma_recvfrom
(
struct
svc_rqst
*);
202
203
/* svc_rdma_sendto.c */
204
extern
int
svc_rdma_sendto
(
struct
svc_rqst
*);
205
206
/* svc_rdma_transport.c */
207
extern
int
svc_rdma_send
(
struct
svcxprt_rdma
*,
struct
ib_send_wr
*);
208
extern
void
svc_rdma_send_error
(
struct
svcxprt_rdma
*,
struct
rpcrdma_msg
*,
209
enum
rpcrdma_errcode
);
210
struct
page
*
svc_rdma_get_page
(
void
);
211
extern
int
svc_rdma_post_recv
(
struct
svcxprt_rdma
*);
212
extern
int
svc_rdma_create_listen
(
struct
svc_serv
*,
int
,
struct
sockaddr
*);
213
extern
struct
svc_rdma_op_ctxt
*
svc_rdma_get_context
(
struct
svcxprt_rdma
*);
214
extern
void
svc_rdma_put_context
(
struct
svc_rdma_op_ctxt
*,
int
);
215
extern
void
svc_rdma_unmap_dma
(
struct
svc_rdma_op_ctxt
*ctxt);
216
extern
struct
svc_rdma_req_map
*
svc_rdma_get_req_map
(
void
);
217
extern
void
svc_rdma_put_req_map
(
struct
svc_rdma_req_map
*);
218
extern
int
svc_rdma_fastreg
(
struct
svcxprt_rdma
*,
struct
svc_rdma_fastreg_mr
*);
219
extern
struct
svc_rdma_fastreg_mr
*
svc_rdma_get_frmr
(
struct
svcxprt_rdma
*);
220
extern
void
svc_rdma_put_frmr
(
struct
svcxprt_rdma
*,
221
struct
svc_rdma_fastreg_mr
*);
222
extern
void
svc_sq_reap
(
struct
svcxprt_rdma
*);
223
extern
void
svc_rq_reap
(
struct
svcxprt_rdma
*);
224
extern
struct
svc_xprt_class
svc_rdma_class
;
225
extern
void
svc_rdma_prep_reply_hdr
(
struct
svc_rqst
*);
226
227
/* svc_rdma.c */
228
extern
int
svc_rdma_init
(
void
);
229
extern
void
svc_rdma_cleanup
(
void
);
230
231
/*
232
* Returns the address of the first read chunk or <nul> if no read chunk is
233
* present
234
*/
235
static
inline
struct
rpcrdma_read_chunk
*
236
svc_rdma_get_read_chunk(
struct
rpcrdma_msg
*rmsgp)
237
{
238
struct
rpcrdma_read_chunk
*ch =
239
(
struct
rpcrdma_read_chunk
*)&rmsgp->
rm_body
.
rm_chunks
[0];
240
241
if
(ch->
rc_discrim
== 0)
242
return
NULL
;
243
244
return
ch;
245
}
246
247
/*
248
* Returns the address of the first read write array element or <nul> if no
249
* write array list is present
250
*/
251
static
inline
struct
rpcrdma_write_array
*
252
svc_rdma_get_write_array(
struct
rpcrdma_msg
*rmsgp)
253
{
254
if
(rmsgp->
rm_body
.
rm_chunks
[0] != 0
255
|| rmsgp->
rm_body
.
rm_chunks
[1] == 0)
256
return
NULL
;
257
258
return
(
struct
rpcrdma_write_array
*)&rmsgp->
rm_body
.
rm_chunks
[1];
259
}
260
261
/*
262
* Returns the address of the first reply array element or <nul> if no
263
* reply array is present
264
*/
265
static
inline
struct
rpcrdma_write_array
*
266
svc_rdma_get_reply_array(
struct
rpcrdma_msg
*rmsgp)
267
{
268
struct
rpcrdma_read_chunk
*rch;
269
struct
rpcrdma_write_array
*wr_ary;
270
struct
rpcrdma_write_array
*rp_ary;
271
272
/* XXX: Need to fix when reply list may occur with read-list and/or
273
* write list */
274
if
(rmsgp->
rm_body
.
rm_chunks
[0] != 0 ||
275
rmsgp->
rm_body
.
rm_chunks
[1] != 0)
276
return
NULL
;
277
278
rch = svc_rdma_get_read_chunk(rmsgp);
279
if
(rch) {
280
while
(rch->
rc_discrim
)
281
rch++;
282
283
/* The reply list follows an empty write array located
284
* at 'rc_position' here. The reply array is at rc_target.
285
*/
286
rp_ary = (
struct
rpcrdma_write_array
*)&rch->
rc_target
;
287
288
goto
found_it;
289
}
290
291
wr_ary = svc_rdma_get_write_array(rmsgp);
292
if
(wr_ary) {
293
rp_ary = (
struct
rpcrdma_write_array
*)
294
&wr_ary->
295
wc_array
[
ntohl
(wr_ary->
wc_nchunks
)].wc_target.rs_length;
296
297
goto
found_it;
298
}
299
300
/* No read list, no write list */
301
rp_ary = (
struct
rpcrdma_write_array
*)
302
&rmsgp->
rm_body
.
rm_chunks
[2];
303
304
found_it:
305
if
(rp_ary->
wc_discrim
== 0)
306
return
NULL
;
307
308
return
rp_ary;
309
}
310
#endif
Generated on Thu Jan 10 2013 14:52:39 for Linux Kernel by
1.8.2