1 /*
2 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
3 */
4
5 /*
6 * This file contains code imported from the OFED rds source file message.c
7 * Oracle elects to have and use the contents of message.c under and governed
8 * by the OpenIB.org BSD license (see below for full license text). However,
9 * the following notice accompanied the original version of this file:
10 */
11
12 /*
13 * Copyright (c) 2006 Oracle. All rights reserved.
14 *
15 * This software is available to you under a choice of one of two
16 * licenses. You may choose to be licensed under the terms of the GNU
17 * General Public License (GPL) Version 2, available from the file
18 * COPYING in the main directory of this source tree, or the
19 * OpenIB.org BSD license below:
20 *
21 * Redistribution and use in source and binary forms, with or
22 * without modification, are permitted provided that the following
23 * conditions are met:
24 *
25 * - Redistributions of source code must retain the above
26 * copyright notice, this list of conditions and the following
27 * disclaimer.
28 *
29 * - Redistributions in binary form must reproduce the above
30 * copyright notice, this list of conditions and the following
31 * disclaimer in the documentation and/or other materials
32 * provided with the distribution.
33 *
34 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
37 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
38 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
39 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
40 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
41 * SOFTWARE.
42 *
43 */
44 #include <sys/rds.h>
45
46 #include <sys/ib/clients/rdsv3/rdsv3.h>
47 #include <sys/ib/clients/rdsv3/rdma.h>
48 #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
49
50 #ifndef __lock_lint
51 static unsigned int rdsv3_exthdr_size[__RDSV3_EXTHDR_MAX] = {
52 [RDSV3_EXTHDR_NONE] = 0,
53 [RDSV3_EXTHDR_VERSION] = sizeof (struct rdsv3_ext_header_version),
54 [RDSV3_EXTHDR_RDMA] = sizeof (struct rdsv3_ext_header_rdma),
55 [RDSV3_EXTHDR_RDMA_DEST] = sizeof (struct rdsv3_ext_header_rdma_dest),
56 };
57 #else
58 static unsigned int rdsv3_exthdr_size[__RDSV3_EXTHDR_MAX] = {
59 0,
60 sizeof (struct rdsv3_ext_header_version),
61 sizeof (struct rdsv3_ext_header_rdma),
62 sizeof (struct rdsv3_ext_header_rdma_dest),
63 };
64 #endif
65
66 void
rdsv3_message_addref(struct rdsv3_message * rm)67 rdsv3_message_addref(struct rdsv3_message *rm)
68 {
69 RDSV3_DPRINTF5("rdsv3_message_addref", "addref rm %p ref %d",
70 rm, atomic_get(&rm->m_refcount));
71 atomic_inc_32(&rm->m_refcount);
72 }
73
74 /*
75 * This relies on dma_map_sg() not touching sg[].page during merging.
76 */
77 static void
rdsv3_message_purge(struct rdsv3_message * rm)78 rdsv3_message_purge(struct rdsv3_message *rm)
79 {
80 unsigned long i;
81
82 RDSV3_DPRINTF4("rdsv3_message_purge", "Enter(rm: %p)", rm);
83
84 if (test_bit(RDSV3_MSG_PAGEVEC, &rm->m_flags))
85 return;
86
87 for (i = 0; i < rm->m_nents; i++) {
88 RDSV3_DPRINTF5("rdsv3_message_purge", "putting data page %p\n",
89 (void *)rdsv3_sg_page(&rm->m_sg[i]));
90 /* XXX will have to put_page for page refs */
91 kmem_free(rdsv3_sg_page(&rm->m_sg[i]),
92 rdsv3_sg_len(&rm->m_sg[i]));
93 }
94
95 if (rm->m_rdma_op)
96 rdsv3_rdma_free_op(rm->m_rdma_op);
97 if (rm->m_rdma_mr) {
98 struct rdsv3_mr *mr = rm->m_rdma_mr;
99 if (mr->r_refcount == 0) {
100 RDSV3_DPRINTF4("rdsv3_message_purge ASSERT 0",
101 "rm %p mr %p", rm, mr);
102 return;
103 }
104 if (mr->r_refcount == 0xdeadbeef) {
105 RDSV3_DPRINTF4("rdsv3_message_purge ASSERT deadbeef",
106 "rm %p mr %p", rm, mr);
107 return;
108 }
109 if (atomic_dec_and_test(&mr->r_refcount)) {
110 rm->m_rdma_mr = NULL;
111 __rdsv3_put_mr_final(mr);
112 }
113 }
114
115 RDSV3_DPRINTF4("rdsv3_message_purge", "Return(rm: %p)", rm);
116
117 }
118
119 void
rdsv3_message_put(struct rdsv3_message * rm)120 rdsv3_message_put(struct rdsv3_message *rm)
121 {
122 RDSV3_DPRINTF5("rdsv3_message_put",
123 "put rm %p ref %d\n", rm, atomic_get(&rm->m_refcount));
124
125 if (atomic_dec_and_test(&rm->m_refcount)) {
126 ASSERT(!list_link_active(&rm->m_sock_item));
127 ASSERT(!list_link_active(&rm->m_conn_item));
128 rdsv3_message_purge(rm);
129
130 kmem_free(rm, sizeof (struct rdsv3_message) +
131 (rm->m_nents * sizeof (struct rdsv3_scatterlist)));
132 }
133 }
134
135 void
rdsv3_message_inc_free(struct rdsv3_incoming * inc)136 rdsv3_message_inc_free(struct rdsv3_incoming *inc)
137 {
138 struct rdsv3_message *rm =
139 container_of(inc, struct rdsv3_message, m_inc);
140 rdsv3_message_put(rm);
141 }
142
143 void
rdsv3_message_populate_header(struct rdsv3_header * hdr,uint16_be_t sport,uint16_be_t dport,uint64_t seq)144 rdsv3_message_populate_header(struct rdsv3_header *hdr, uint16_be_t sport,
145 uint16_be_t dport, uint64_t seq)
146 {
147 hdr->h_flags = 0;
148 hdr->h_sport = sport;
149 hdr->h_dport = dport;
150 hdr->h_sequence = htonll(seq);
151 hdr->h_exthdr[0] = RDSV3_EXTHDR_NONE;
152 }
153
154 int
rdsv3_message_add_extension(struct rdsv3_header * hdr,unsigned int type,const void * data,unsigned int len)155 rdsv3_message_add_extension(struct rdsv3_header *hdr,
156 unsigned int type, const void *data, unsigned int len)
157 {
158 unsigned int ext_len = sizeof (uint8_t) + len;
159 unsigned char *dst;
160
161 RDSV3_DPRINTF4("rdsv3_message_add_extension", "Enter");
162
163 /* For now, refuse to add more than one extension header */
164 if (hdr->h_exthdr[0] != RDSV3_EXTHDR_NONE)
165 return (0);
166
167 if (type >= __RDSV3_EXTHDR_MAX ||
168 len != rdsv3_exthdr_size[type])
169 return (0);
170
171 if (ext_len >= RDSV3_HEADER_EXT_SPACE)
172 return (0);
173 dst = hdr->h_exthdr;
174
175 *dst++ = type;
176 (void) memcpy(dst, data, len);
177
178 dst[len] = RDSV3_EXTHDR_NONE;
179
180 RDSV3_DPRINTF4("rdsv3_message_add_extension", "Return");
181 return (1);
182 }
183
184 /*
185 * If a message has extension headers, retrieve them here.
186 * Call like this:
187 *
188 * unsigned int pos = 0;
189 *
190 * while (1) {
191 * buflen = sizeof(buffer);
192 * type = rdsv3_message_next_extension(hdr, &pos, buffer, &buflen);
193 * if (type == RDSV3_EXTHDR_NONE)
194 * break;
195 * ...
196 * }
197 */
198 int
rdsv3_message_next_extension(struct rdsv3_header * hdr,unsigned int * pos,void * buf,unsigned int * buflen)199 rdsv3_message_next_extension(struct rdsv3_header *hdr,
200 unsigned int *pos, void *buf, unsigned int *buflen)
201 {
202 unsigned int offset, ext_type, ext_len;
203 uint8_t *src = hdr->h_exthdr;
204
205 RDSV3_DPRINTF4("rdsv3_message_next_extension", "Enter");
206
207 offset = *pos;
208 if (offset >= RDSV3_HEADER_EXT_SPACE)
209 goto none;
210
211 /*
212 * Get the extension type and length. For now, the
213 * length is implied by the extension type.
214 */
215 ext_type = src[offset++];
216
217 if (ext_type == RDSV3_EXTHDR_NONE || ext_type >= __RDSV3_EXTHDR_MAX)
218 goto none;
219 ext_len = rdsv3_exthdr_size[ext_type];
220 if (offset + ext_len > RDSV3_HEADER_EXT_SPACE)
221 goto none;
222
223 *pos = offset + ext_len;
224 if (ext_len < *buflen)
225 *buflen = ext_len;
226 (void) memcpy(buf, src + offset, *buflen);
227 return (ext_type);
228
229 none:
230 *pos = RDSV3_HEADER_EXT_SPACE;
231 *buflen = 0;
232 return (RDSV3_EXTHDR_NONE);
233 }
234
235 int
rdsv3_message_add_version_extension(struct rdsv3_header * hdr,unsigned int version)236 rdsv3_message_add_version_extension(struct rdsv3_header *hdr,
237 unsigned int version)
238 {
239 struct rdsv3_ext_header_version ext_hdr;
240
241 ext_hdr.h_version = htonl(version);
242 return (rdsv3_message_add_extension(hdr, RDSV3_EXTHDR_VERSION,
243 &ext_hdr, sizeof (ext_hdr)));
244 }
245
246 int
rdsv3_message_get_version_extension(struct rdsv3_header * hdr,unsigned int * version)247 rdsv3_message_get_version_extension(struct rdsv3_header *hdr,
248 unsigned int *version)
249 {
250 struct rdsv3_ext_header_version ext_hdr;
251 unsigned int pos = 0, len = sizeof (ext_hdr);
252
253 RDSV3_DPRINTF4("rdsv3_message_get_version_extension", "Enter");
254
255 /*
256 * We assume the version extension is the only one present
257 */
258 if (rdsv3_message_next_extension(hdr, &pos, &ext_hdr, &len) !=
259 RDSV3_EXTHDR_VERSION)
260 return (0);
261 *version = ntohl(ext_hdr.h_version);
262 return (1);
263 }
264
265 int
rdsv3_message_add_rdma_dest_extension(struct rdsv3_header * hdr,uint32_t r_key,uint32_t offset)266 rdsv3_message_add_rdma_dest_extension(struct rdsv3_header *hdr, uint32_t r_key,
267 uint32_t offset)
268 {
269 struct rdsv3_ext_header_rdma_dest ext_hdr;
270
271 ext_hdr.h_rdma_rkey = htonl(r_key);
272 ext_hdr.h_rdma_offset = htonl(offset);
273 return (rdsv3_message_add_extension(hdr, RDSV3_EXTHDR_RDMA_DEST,
274 &ext_hdr, sizeof (ext_hdr)));
275 }
276
277 struct rdsv3_message *
rdsv3_message_alloc(unsigned int nents,int gfp)278 rdsv3_message_alloc(unsigned int nents, int gfp)
279 {
280 struct rdsv3_message *rm;
281
282 RDSV3_DPRINTF4("rdsv3_message_alloc", "Enter(nents: %d)", nents);
283
284 rm = kmem_zalloc(sizeof (struct rdsv3_message) +
285 (nents * sizeof (struct rdsv3_scatterlist)), gfp);
286 if (!rm)
287 goto out;
288
289 rm->m_refcount = 1;
290 list_link_init(&rm->m_sock_item);
291 list_link_init(&rm->m_conn_item);
292 mutex_init(&rm->m_rs_lock, NULL, MUTEX_DRIVER, NULL);
293 rdsv3_init_waitqueue(&rm->m_flush_wait);
294
295 RDSV3_DPRINTF4("rdsv3_message_alloc", "Return(rm: %p)", rm);
296 out:
297 return (rm);
298 }
299
300 struct rdsv3_message *
rdsv3_message_map_pages(unsigned long * page_addrs,unsigned int total_len)301 rdsv3_message_map_pages(unsigned long *page_addrs, unsigned int total_len)
302 {
303 struct rdsv3_message *rm;
304 unsigned int i;
305
306 RDSV3_DPRINTF4("rdsv3_message_map_pages", "Enter(len: %d)", total_len);
307
308 #ifndef __lock_lint
309 rm = rdsv3_message_alloc(ceil(total_len, PAGE_SIZE), KM_NOSLEEP);
310 #else
311 rm = NULL;
312 #endif
313 if (rm == NULL)
314 return (ERR_PTR(-ENOMEM));
315
316 set_bit(RDSV3_MSG_PAGEVEC, &rm->m_flags);
317 rm->m_inc.i_hdr.h_len = htonl(total_len);
318 #ifndef __lock_lint
319 rm->m_nents = ceil(total_len, PAGE_SIZE);
320 #else
321 rm->m_nents = 0;
322 #endif
323
324 for (i = 0; i < rm->m_nents; ++i) {
325 rdsv3_sg_set_page(&rm->m_sg[i],
326 page_addrs[i],
327 PAGE_SIZE, 0);
328 }
329
330 return (rm);
331 }
332
333 struct rdsv3_message *
rdsv3_message_copy_from_user(struct uio * uiop,size_t total_len)334 rdsv3_message_copy_from_user(struct uio *uiop,
335 size_t total_len)
336 {
337 struct rdsv3_message *rm;
338 struct rdsv3_scatterlist *sg;
339 int ret;
340
341 RDSV3_DPRINTF4("rdsv3_message_copy_from_user", "Enter: %d", total_len);
342
343 #ifndef __lock_lint
344 rm = rdsv3_message_alloc(ceil(total_len, PAGE_SIZE), KM_NOSLEEP);
345 #else
346 rm = NULL;
347 #endif
348 if (rm == NULL) {
349 ret = -ENOMEM;
350 goto out;
351 }
352
353 rm->m_inc.i_hdr.h_len = htonl(total_len);
354
355 /*
356 * now allocate and copy in the data payload.
357 */
358 sg = rm->m_sg;
359
360 while (total_len) {
361 if (rdsv3_sg_page(sg) == NULL) {
362 ret = rdsv3_page_remainder_alloc(sg, total_len, 0);
363 if (ret)
364 goto out;
365 rm->m_nents++;
366 }
367
368 ret = uiomove(rdsv3_sg_page(sg), rdsv3_sg_len(sg), UIO_WRITE,
369 uiop);
370 if (ret) {
371 RDSV3_DPRINTF2("rdsv3_message_copy_from_user",
372 "uiomove failed");
373 ret = -ret;
374 goto out;
375 }
376
377 total_len -= rdsv3_sg_len(sg);
378 sg++;
379 }
380 ret = 0;
381 out:
382 if (ret) {
383 if (rm)
384 rdsv3_message_put(rm);
385 rm = ERR_PTR(ret);
386 }
387 return (rm);
388 }
389
390 int
rdsv3_message_inc_copy_to_user(struct rdsv3_incoming * inc,uio_t * uiop,size_t size)391 rdsv3_message_inc_copy_to_user(struct rdsv3_incoming *inc,
392 uio_t *uiop, size_t size)
393 {
394 struct rdsv3_message *rm;
395 struct rdsv3_scatterlist *sg;
396 unsigned long to_copy;
397 unsigned long vec_off;
398 int copied;
399 int ret;
400 uint32_t len;
401
402 rm = container_of(inc, struct rdsv3_message, m_inc);
403 len = ntohl(rm->m_inc.i_hdr.h_len);
404
405 RDSV3_DPRINTF4("rdsv3_message_inc_copy_to_user",
406 "Enter(rm: %p, len: %d)", rm, len);
407
408 sg = rm->m_sg;
409 vec_off = 0;
410 copied = 0;
411
412 while (copied < size && copied < len) {
413
414 to_copy = min(len - copied, sg->length - vec_off);
415 to_copy = min(size - copied, to_copy);
416
417 RDSV3_DPRINTF5("rdsv3_message_inc_copy_to_user",
418 "copying %lu bytes to user iov %p from sg [%p, %u] + %lu\n",
419 to_copy, uiop,
420 rdsv3_sg_page(sg), sg->length, vec_off);
421
422 ret = uiomove(rdsv3_sg_page(sg), to_copy, UIO_READ, uiop);
423 if (ret)
424 break;
425
426 vec_off += to_copy;
427 copied += to_copy;
428
429 if (vec_off == sg->length) {
430 vec_off = 0;
431 sg++;
432 }
433 }
434
435 return (copied);
436 }
437
438 /*
439 * If the message is still on the send queue, wait until the transport
440 * is done with it. This is particularly important for RDMA operations.
441 */
442 /* ARGSUSED */
443 void
rdsv3_message_wait(struct rdsv3_message * rm)444 rdsv3_message_wait(struct rdsv3_message *rm)
445 {
446 rdsv3_wait_event(&rm->m_flush_wait,
447 !test_bit(RDSV3_MSG_MAPPED, &rm->m_flags));
448 }
449
450 void
rdsv3_message_unmapped(struct rdsv3_message * rm)451 rdsv3_message_unmapped(struct rdsv3_message *rm)
452 {
453 clear_bit(RDSV3_MSG_MAPPED, &rm->m_flags);
454 rdsv3_wake_up_all(&rm->m_flush_wait);
455 }
456