Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
svc_rdma_marshal.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses. You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the BSD-type
8  * license below:
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  *
14  * Redistributions of source code must retain the above copyright
15  * notice, this list of conditions and the following disclaimer.
16  *
17  * Redistributions in binary form must reproduce the above
18  * copyright notice, this list of conditions and the following
19  * disclaimer in the documentation and/or other materials provided
20  * with the distribution.
21  *
22  * Neither the name of the Network Appliance, Inc. nor the names of
23  * its contributors may be used to endorse or promote products
24  * derived from this software without specific prior written
25  * permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38  *
39  * Author: Tom Tucker <[email protected]>
40  */
41 
42 #include <linux/sunrpc/xdr.h>
43 #include <linux/sunrpc/debug.h>
44 #include <asm/unaligned.h>
45 #include <linux/sunrpc/rpc_rdma.h>
46 #include <linux/sunrpc/svc_rdma.h>
47 
48 #define RPCDBG_FACILITY RPCDBG_SVCXPRT
49 
50 /*
51  * Decodes a read chunk list. The expected format is as follows:
52  * descrim : xdr_one
53  * position : u32 offset into XDR stream
54  * handle : u32 RKEY
55  * . . .
56  * end-of-list: xdr_zero
57  */
58 static u32 *decode_read_list(u32 *va, u32 *vaend)
59 {
60  struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
61 
62  while (ch->rc_discrim != xdr_zero) {
63  if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) >
64  (unsigned long)vaend) {
65  dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch);
66  return NULL;
67  }
68  ch++;
69  }
70  return (u32 *)&ch->rc_position;
71 }
72 
73 /*
74  * Determine number of chunks and total bytes in chunk list. The chunk
75  * list has already been verified to fit within the RPCRDMA header.
76  */
78  int *ch_count, int *byte_count)
79 {
80  /* compute the number of bytes represented by read chunks */
81  *byte_count = 0;
82  *ch_count = 0;
83  for (; ch->rc_discrim != 0; ch++) {
84  *byte_count = *byte_count + ntohl(ch->rc_target.rs_length);
85  *ch_count = *ch_count + 1;
86  }
87 }
88 
89 /*
90  * Decodes a write chunk list. The expected format is as follows:
91  * descrim : xdr_one
92  * nchunks : <count>
93  * handle : u32 RKEY ---+
94  * length : u32 <len of segment> |
95  * offset : remove va + <count>
96  * . . . |
97  * ---+
98  */
99 static u32 *decode_write_list(u32 *va, u32 *vaend)
100 {
101  int nchunks;
102 
103  struct rpcrdma_write_array *ary =
104  (struct rpcrdma_write_array *)va;
105 
106  /* Check for not write-array */
107  if (ary->wc_discrim == xdr_zero)
108  return (u32 *)&ary->wc_nchunks;
109 
110  if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
111  (unsigned long)vaend) {
112  dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
113  return NULL;
114  }
115  nchunks = ntohl(ary->wc_nchunks);
116  if (((unsigned long)&ary->wc_array[0] +
117  (sizeof(struct rpcrdma_write_chunk) * nchunks)) >
118  (unsigned long)vaend) {
119  dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
120  ary, nchunks, vaend);
121  return NULL;
122  }
123  /*
124  * rs_length is the 2nd 4B field in wc_target and taking its
125  * address skips the list terminator
126  */
127  return (u32 *)&ary->wc_array[nchunks].wc_target.rs_length;
128 }
129 
130 static u32 *decode_reply_array(u32 *va, u32 *vaend)
131 {
132  int nchunks;
133  struct rpcrdma_write_array *ary =
134  (struct rpcrdma_write_array *)va;
135 
136  /* Check for no reply-array */
137  if (ary->wc_discrim == xdr_zero)
138  return (u32 *)&ary->wc_nchunks;
139 
140  if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
141  (unsigned long)vaend) {
142  dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
143  return NULL;
144  }
145  nchunks = ntohl(ary->wc_nchunks);
146  if (((unsigned long)&ary->wc_array[0] +
147  (sizeof(struct rpcrdma_write_chunk) * nchunks)) >
148  (unsigned long)vaend) {
149  dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
150  ary, nchunks, vaend);
151  return NULL;
152  }
153  return (u32 *)&ary->wc_array[nchunks];
154 }
155 
156 int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
157  struct svc_rqst *rqstp)
158 {
159  struct rpcrdma_msg *rmsgp = NULL;
160  u32 *va;
161  u32 *vaend;
162  u32 hdr_len;
163 
164  rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
165 
166  /* Verify that there's enough bytes for header + something */
167  if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) {
168  dprintk("svcrdma: header too short = %d\n",
169  rqstp->rq_arg.len);
170  return -EINVAL;
171  }
172 
173  /* Decode the header */
174  rmsgp->rm_xid = ntohl(rmsgp->rm_xid);
175  rmsgp->rm_vers = ntohl(rmsgp->rm_vers);
176  rmsgp->rm_credit = ntohl(rmsgp->rm_credit);
177  rmsgp->rm_type = ntohl(rmsgp->rm_type);
178 
179  if (rmsgp->rm_vers != RPCRDMA_VERSION)
180  return -ENOSYS;
181 
182  /* Pull in the extra for the padded case and bump our pointer */
183  if (rmsgp->rm_type == RDMA_MSGP) {
184  int hdrlen;
185  rmsgp->rm_body.rm_padded.rm_align =
186  ntohl(rmsgp->rm_body.rm_padded.rm_align);
187  rmsgp->rm_body.rm_padded.rm_thresh =
188  ntohl(rmsgp->rm_body.rm_padded.rm_thresh);
189 
190  va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
191  rqstp->rq_arg.head[0].iov_base = va;
192  hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
193  rqstp->rq_arg.head[0].iov_len -= hdrlen;
194  if (hdrlen > rqstp->rq_arg.len)
195  return -EINVAL;
196  return hdrlen;
197  }
198 
199  /* The chunk list may contain either a read chunk list or a write
200  * chunk list and a reply chunk list.
201  */
202  va = &rmsgp->rm_body.rm_chunks[0];
203  vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
204  va = decode_read_list(va, vaend);
205  if (!va)
206  return -EINVAL;
207  va = decode_write_list(va, vaend);
208  if (!va)
209  return -EINVAL;
210  va = decode_reply_array(va, vaend);
211  if (!va)
212  return -EINVAL;
213 
214  rqstp->rq_arg.head[0].iov_base = va;
215  hdr_len = (unsigned long)va - (unsigned long)rmsgp;
216  rqstp->rq_arg.head[0].iov_len -= hdr_len;
217 
218  *rdma_req = rmsgp;
219  return hdr_len;
220 }
221 
223 {
224  struct rpcrdma_msg *rmsgp = NULL;
225  struct rpcrdma_read_chunk *ch;
226  struct rpcrdma_write_array *ary;
227  u32 *va;
228  u32 hdrlen;
229 
230  dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n",
231  rqstp);
232  rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
233 
234  /* Pull in the extra for the padded case and bump our pointer */
235  if (rmsgp->rm_type == RDMA_MSGP) {
236  va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
237  rqstp->rq_arg.head[0].iov_base = va;
238  hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
239  rqstp->rq_arg.head[0].iov_len -= hdrlen;
240  return hdrlen;
241  }
242 
243  /*
244  * Skip all chunks to find RPC msg. These were previously processed
245  */
246  va = &rmsgp->rm_body.rm_chunks[0];
247 
248  /* Skip read-list */
249  for (ch = (struct rpcrdma_read_chunk *)va;
250  ch->rc_discrim != xdr_zero; ch++);
251  va = (u32 *)&ch->rc_position;
252 
253  /* Skip write-list */
254  ary = (struct rpcrdma_write_array *)va;
255  if (ary->wc_discrim == xdr_zero)
256  va = (u32 *)&ary->wc_nchunks;
257  else
258  /*
259  * rs_length is the 2nd 4B field in wc_target and taking its
260  * address skips the list terminator
261  */
262  va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length;
263 
264  /* Skip reply-array */
265  ary = (struct rpcrdma_write_array *)va;
266  if (ary->wc_discrim == xdr_zero)
267  va = (u32 *)&ary->wc_nchunks;
268  else
269  va = (u32 *)&ary->wc_array[ary->wc_nchunks];
270 
271  rqstp->rq_arg.head[0].iov_base = va;
272  hdrlen = (unsigned long)va - (unsigned long)rmsgp;
273  rqstp->rq_arg.head[0].iov_len -= hdrlen;
274 
275  return hdrlen;
276 }
277 
279  struct rpcrdma_msg *rmsgp,
280  enum rpcrdma_errcode err, u32 *va)
281 {
282  u32 *startp = va;
283 
284  *va++ = htonl(rmsgp->rm_xid);
285  *va++ = htonl(rmsgp->rm_vers);
286  *va++ = htonl(xprt->sc_max_requests);
287  *va++ = htonl(RDMA_ERROR);
288  *va++ = htonl(err);
289  if (err == ERR_VERS) {
290  *va++ = htonl(RPCRDMA_VERSION);
291  *va++ = htonl(RPCRDMA_VERSION);
292  }
293 
294  return (int)((unsigned long)va - (unsigned long)startp);
295 }
296 
298 {
299  struct rpcrdma_write_array *wr_ary;
300 
301  /* There is no read-list in a reply */
302 
303  /* skip write list */
304  wr_ary = (struct rpcrdma_write_array *)
305  &rmsgp->rm_body.rm_chunks[1];
306  if (wr_ary->wc_discrim)
307  wr_ary = (struct rpcrdma_write_array *)
308  &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)].
309  wc_target.rs_length;
310  else
311  wr_ary = (struct rpcrdma_write_array *)
312  &wr_ary->wc_nchunks;
313 
314  /* skip reply array */
315  if (wr_ary->wc_discrim)
316  wr_ary = (struct rpcrdma_write_array *)
317  &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)];
318  else
319  wr_ary = (struct rpcrdma_write_array *)
320  &wr_ary->wc_nchunks;
321 
322  return (unsigned long) wr_ary - (unsigned long) rmsgp;
323 }
324 
325 void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
326 {
327  struct rpcrdma_write_array *ary;
328 
329  /* no read-list */
330  rmsgp->rm_body.rm_chunks[0] = xdr_zero;
331 
332  /* write-array discrim */
333  ary = (struct rpcrdma_write_array *)
334  &rmsgp->rm_body.rm_chunks[1];
335  ary->wc_discrim = xdr_one;
336  ary->wc_nchunks = htonl(chunks);
337 
338  /* write-list terminator */
339  ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
340 
341  /* reply-array discriminator */
342  ary->wc_array[chunks].wc_target.rs_length = xdr_zero;
343 }
344 
346  int chunks)
347 {
348  ary->wc_discrim = xdr_one;
349  ary->wc_nchunks = htonl(chunks);
350 }
351 
353  int chunk_no,
354  __be32 rs_handle,
355  __be64 rs_offset,
356  u32 write_len)
357 {
358  struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
359  seg->rs_handle = rs_handle;
360  seg->rs_offset = rs_offset;
361  seg->rs_length = htonl(write_len);
362 }
363 
365  struct rpcrdma_msg *rdma_argp,
366  struct rpcrdma_msg *rdma_resp,
367  enum rpcrdma_proc rdma_type)
368 {
369  rdma_resp->rm_xid = htonl(rdma_argp->rm_xid);
370  rdma_resp->rm_vers = htonl(rdma_argp->rm_vers);
371  rdma_resp->rm_credit = htonl(xprt->sc_max_requests);
372  rdma_resp->rm_type = htonl(rdma_type);
373 
374  /* Encode <nul> chunks lists */
375  rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
376  rdma_resp->rm_body.rm_chunks[1] = xdr_zero;
377  rdma_resp->rm_body.rm_chunks[2] = xdr_zero;
378 }