GNU Linux-libre 4.9.309-gnu1
[releases.git] / net / sunrpc / xprtrdma / svc_rdma_marshal.c
1 /*
2  * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the BSD-type
8  * license below:
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  *
14  *      Redistributions of source code must retain the above copyright
15  *      notice, this list of conditions and the following disclaimer.
16  *
17  *      Redistributions in binary form must reproduce the above
18  *      copyright notice, this list of conditions and the following
19  *      disclaimer in the documentation and/or other materials provided
20  *      with the distribution.
21  *
22  *      Neither the name of the Network Appliance, Inc. nor the names of
23  *      its contributors may be used to endorse or promote products
24  *      derived from this software without specific prior written
25  *      permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38  *
39  * Author: Tom Tucker <tom@opengridcomputing.com>
40  */
41
42 #include <linux/sunrpc/xdr.h>
43 #include <linux/sunrpc/debug.h>
44 #include <asm/unaligned.h>
45 #include <linux/sunrpc/rpc_rdma.h>
46 #include <linux/sunrpc/svc_rdma.h>
47
48 #define RPCDBG_FACILITY RPCDBG_SVCXPRT
49
50 /*
51  * Decodes a read chunk list. The expected format is as follows:
52  *    descrim  : xdr_one
53  *    position : __be32 offset into XDR stream
54  *    handle   : __be32 RKEY
55  *    . . .
56  *  end-of-list: xdr_zero
57  */
58 static __be32 *decode_read_list(__be32 *va, __be32 *vaend)
59 {
60         struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
61
62         while (ch->rc_discrim != xdr_zero) {
63                 if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) >
64                     (unsigned long)vaend) {
65                         dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch);
66                         return NULL;
67                 }
68                 ch++;
69         }
70         return &ch->rc_position;
71 }
72
73 /*
74  * Decodes a write chunk list. The expected format is as follows:
75  *    descrim  : xdr_one
76  *    nchunks  : <count>
77  *       handle   : __be32 RKEY           ---+
78  *       length   : __be32 <len of segment>  |
79  *       offset   : remove va                + <count>
80  *       . . .                               |
81  *                                        ---+
82  */
83 static __be32 *decode_write_list(__be32 *va, __be32 *vaend)
84 {
85         unsigned long start, end;
86         int nchunks;
87
88         struct rpcrdma_write_array *ary =
89                 (struct rpcrdma_write_array *)va;
90
91         /* Check for not write-array */
92         if (ary->wc_discrim == xdr_zero)
93                 return &ary->wc_nchunks;
94
95         if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
96             (unsigned long)vaend) {
97                 dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
98                 return NULL;
99         }
100         nchunks = be32_to_cpu(ary->wc_nchunks);
101
102         start = (unsigned long)&ary->wc_array[0];
103         end = (unsigned long)vaend;
104         if (nchunks < 0 ||
105             nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
106             (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
107                 dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
108                         ary, nchunks, vaend);
109                 return NULL;
110         }
111         /*
112          * rs_length is the 2nd 4B field in wc_target and taking its
113          * address skips the list terminator
114          */
115         return &ary->wc_array[nchunks].wc_target.rs_length;
116 }
117
118 static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
119 {
120         unsigned long start, end;
121         int nchunks;
122         struct rpcrdma_write_array *ary =
123                 (struct rpcrdma_write_array *)va;
124
125         /* Check for no reply-array */
126         if (ary->wc_discrim == xdr_zero)
127                 return &ary->wc_nchunks;
128
129         if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
130             (unsigned long)vaend) {
131                 dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
132                 return NULL;
133         }
134         nchunks = be32_to_cpu(ary->wc_nchunks);
135
136         start = (unsigned long)&ary->wc_array[0];
137         end = (unsigned long)vaend;
138         if (nchunks < 0 ||
139             nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
140             (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
141                 dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
142                         ary, nchunks, vaend);
143                 return NULL;
144         }
145         return (__be32 *)&ary->wc_array[nchunks];
146 }
147
148 /**
149  * svc_rdma_xdr_decode_req - Parse incoming RPC-over-RDMA header
150  * @rq_arg: Receive buffer
151  *
152  * On entry, xdr->head[0].iov_base points to first byte in the
153  * RPC-over-RDMA header.
154  *
155  * On successful exit, head[0] points to first byte past the
156  * RPC-over-RDMA header. For RDMA_MSG, this is the RPC message.
157  * The length of the RPC-over-RDMA header is returned.
158  */
159 int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
160 {
161         struct rpcrdma_msg *rmsgp;
162         __be32 *va, *vaend;
163         unsigned int len;
164         u32 hdr_len;
165
166         /* Verify that there's enough bytes for header + something */
167         if (rq_arg->len <= RPCRDMA_HDRLEN_ERR) {
168                 dprintk("svcrdma: header too short = %d\n",
169                         rq_arg->len);
170                 return -EINVAL;
171         }
172
173         rmsgp = (struct rpcrdma_msg *)rq_arg->head[0].iov_base;
174         if (rmsgp->rm_vers != rpcrdma_version) {
175                 dprintk("%s: bad version %u\n", __func__,
176                         be32_to_cpu(rmsgp->rm_vers));
177                 return -EPROTONOSUPPORT;
178         }
179
180         switch (be32_to_cpu(rmsgp->rm_type)) {
181         case RDMA_MSG:
182         case RDMA_NOMSG:
183                 break;
184
185         case RDMA_DONE:
186                 /* Just drop it */
187                 dprintk("svcrdma: dropping RDMA_DONE message\n");
188                 return 0;
189
190         case RDMA_ERROR:
191                 /* Possible if this is a backchannel reply.
192                  * XXX: We should cancel this XID, though.
193                  */
194                 dprintk("svcrdma: dropping RDMA_ERROR message\n");
195                 return 0;
196
197         case RDMA_MSGP:
198                 /* Pull in the extra for the padded case, bump our pointer */
199                 rmsgp->rm_body.rm_padded.rm_align =
200                         be32_to_cpu(rmsgp->rm_body.rm_padded.rm_align);
201                 rmsgp->rm_body.rm_padded.rm_thresh =
202                         be32_to_cpu(rmsgp->rm_body.rm_padded.rm_thresh);
203
204                 va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
205                 rq_arg->head[0].iov_base = va;
206                 len = (u32)((unsigned long)va - (unsigned long)rmsgp);
207                 rq_arg->head[0].iov_len -= len;
208                 if (len > rq_arg->len)
209                         return -EINVAL;
210                 return len;
211         default:
212                 dprintk("svcrdma: bad rdma procedure (%u)\n",
213                         be32_to_cpu(rmsgp->rm_type));
214                 return -EINVAL;
215         }
216
217         /* The chunk list may contain either a read chunk list or a write
218          * chunk list and a reply chunk list.
219          */
220         va = &rmsgp->rm_body.rm_chunks[0];
221         vaend = (__be32 *)((unsigned long)rmsgp + rq_arg->len);
222         va = decode_read_list(va, vaend);
223         if (!va) {
224                 dprintk("svcrdma: failed to decode read list\n");
225                 return -EINVAL;
226         }
227         va = decode_write_list(va, vaend);
228         if (!va) {
229                 dprintk("svcrdma: failed to decode write list\n");
230                 return -EINVAL;
231         }
232         va = decode_reply_array(va, vaend);
233         if (!va) {
234                 dprintk("svcrdma: failed to decode reply chunk\n");
235                 return -EINVAL;
236         }
237
238         rq_arg->head[0].iov_base = va;
239         hdr_len = (unsigned long)va - (unsigned long)rmsgp;
240         rq_arg->head[0].iov_len -= hdr_len;
241         return hdr_len;
242 }
243
244 int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
245                               struct rpcrdma_msg *rmsgp,
246                               enum rpcrdma_errcode err, __be32 *va)
247 {
248         __be32 *startp = va;
249
250         *va++ = rmsgp->rm_xid;
251         *va++ = rmsgp->rm_vers;
252         *va++ = cpu_to_be32(xprt->sc_max_requests);
253         *va++ = rdma_error;
254         *va++ = cpu_to_be32(err);
255         if (err == ERR_VERS) {
256                 *va++ = rpcrdma_version;
257                 *va++ = rpcrdma_version;
258         }
259
260         return (int)((unsigned long)va - (unsigned long)startp);
261 }
262
263 int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
264 {
265         struct rpcrdma_write_array *wr_ary;
266
267         /* There is no read-list in a reply */
268
269         /* skip write list */
270         wr_ary = (struct rpcrdma_write_array *)
271                 &rmsgp->rm_body.rm_chunks[1];
272         if (wr_ary->wc_discrim)
273                 wr_ary = (struct rpcrdma_write_array *)
274                         &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)].
275                         wc_target.rs_length;
276         else
277                 wr_ary = (struct rpcrdma_write_array *)
278                         &wr_ary->wc_nchunks;
279
280         /* skip reply array */
281         if (wr_ary->wc_discrim)
282                 wr_ary = (struct rpcrdma_write_array *)
283                         &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)];
284         else
285                 wr_ary = (struct rpcrdma_write_array *)
286                         &wr_ary->wc_nchunks;
287
288         return (unsigned long) wr_ary - (unsigned long) rmsgp;
289 }
290
291 void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
292 {
293         struct rpcrdma_write_array *ary;
294
295         /* no read-list */
296         rmsgp->rm_body.rm_chunks[0] = xdr_zero;
297
298         /* write-array discrim */
299         ary = (struct rpcrdma_write_array *)
300                 &rmsgp->rm_body.rm_chunks[1];
301         ary->wc_discrim = xdr_one;
302         ary->wc_nchunks = cpu_to_be32(chunks);
303
304         /* write-list terminator */
305         ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
306
307         /* reply-array discriminator */
308         ary->wc_array[chunks].wc_target.rs_length = xdr_zero;
309 }
310
311 void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary,
312                                  int chunks)
313 {
314         ary->wc_discrim = xdr_one;
315         ary->wc_nchunks = cpu_to_be32(chunks);
316 }
317
318 void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
319                                      int chunk_no,
320                                      __be32 rs_handle,
321                                      __be64 rs_offset,
322                                      u32 write_len)
323 {
324         struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
325         seg->rs_handle = rs_handle;
326         seg->rs_offset = rs_offset;
327         seg->rs_length = cpu_to_be32(write_len);
328 }
329
330 void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
331                                   struct rpcrdma_msg *rdma_argp,
332                                   struct rpcrdma_msg *rdma_resp,
333                                   enum rpcrdma_proc rdma_type)
334 {
335         rdma_resp->rm_xid = rdma_argp->rm_xid;
336         rdma_resp->rm_vers = rdma_argp->rm_vers;
337         rdma_resp->rm_credit = cpu_to_be32(xprt->sc_max_requests);
338         rdma_resp->rm_type = cpu_to_be32(rdma_type);
339
340         /* Encode <nul> chunks lists */
341         rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
342         rdma_resp->rm_body.rm_chunks[1] = xdr_zero;
343         rdma_resp->rm_body.rm_chunks[2] = xdr_zero;
344 }