xref: /illumos-gate/usr/src/uts/common/io/ib/clients/rdsv3/message.c (revision 7d0b359ca572cd04474eb1f2ceec5a8ff39e36c9)
1 /*
2  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
3  */
4 
5 /*
6  * This file contains code imported from the OFED rds source file message.c
7  * Oracle elects to have and use the contents of message.c under and governed
8  * by the OpenIB.org BSD license (see below for full license text). However,
9  * the following notice accompanied the original version of this file:
10  */
11 
12 /*
13  * Copyright (c) 2006 Oracle.  All rights reserved.
14  *
15  * This software is available to you under a choice of one of two
16  * licenses.  You may choose to be licensed under the terms of the GNU
17  * General Public License (GPL) Version 2, available from the file
18  * COPYING in the main directory of this source tree, or the
19  * OpenIB.org BSD license below:
20  *
21  *     Redistribution and use in source and binary forms, with or
22  *     without modification, are permitted provided that the following
23  *     conditions are met:
24  *
25  *      - Redistributions of source code must retain the above
26  *        copyright notice, this list of conditions and the following
27  *        disclaimer.
28  *
29  *      - Redistributions in binary form must reproduce the above
30  *        copyright notice, this list of conditions and the following
31  *        disclaimer in the documentation and/or other materials
32  *        provided with the distribution.
33  *
34  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
37  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
38  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
39  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
40  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
41  * SOFTWARE.
42  *
43  */
44 #include <sys/rds.h>
45 
46 #include <sys/ib/clients/rdsv3/rdsv3.h>
47 #include <sys/ib/clients/rdsv3/rdma.h>
48 #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
49 
50 #ifndef __lock_lint
51 static unsigned int	rdsv3_exthdr_size[__RDSV3_EXTHDR_MAX] = {
52 [RDSV3_EXTHDR_NONE]	= 0,
53 [RDSV3_EXTHDR_VERSION]	= sizeof (struct rdsv3_ext_header_version),
54 [RDSV3_EXTHDR_RDMA]	= sizeof (struct rdsv3_ext_header_rdma),
55 [RDSV3_EXTHDR_RDMA_DEST]	= sizeof (struct rdsv3_ext_header_rdma_dest),
56 };
57 #else
58 static unsigned int	rdsv3_exthdr_size[__RDSV3_EXTHDR_MAX] = {
59 			0,
60 			sizeof (struct rdsv3_ext_header_version),
61 			sizeof (struct rdsv3_ext_header_rdma),
62 			sizeof (struct rdsv3_ext_header_rdma_dest),
63 };
64 #endif
65 
66 void
67 rdsv3_message_addref(struct rdsv3_message *rm)
68 {
69 	RDSV3_DPRINTF5("rdsv3_message_addref", "addref rm %p ref %d",
70 	    rm, atomic_get(&rm->m_refcount));
71 	atomic_inc_32(&rm->m_refcount);
72 }
73 
74 /*
75  * This relies on dma_map_sg() not touching sg[].page during merging.
76  */
77 static void
78 rdsv3_message_purge(struct rdsv3_message *rm)
79 {
80 	unsigned long i;
81 
82 	RDSV3_DPRINTF4("rdsv3_message_purge", "Enter(rm: %p)", rm);
83 
84 	if (test_bit(RDSV3_MSG_PAGEVEC, &rm->m_flags))
85 		return;
86 
87 	for (i = 0; i < rm->m_nents; i++) {
88 		RDSV3_DPRINTF5("rdsv3_message_purge", "putting data page %p\n",
89 		    (void *)rdsv3_sg_page(&rm->m_sg[i]));
90 		/* XXX will have to put_page for page refs */
91 		kmem_free(rdsv3_sg_page(&rm->m_sg[i]),
92 		    rdsv3_sg_len(&rm->m_sg[i]));
93 	}
94 
95 	if (rm->m_rdma_op)
96 		rdsv3_rdma_free_op(rm->m_rdma_op);
97 	if (rm->m_rdma_mr) {
98 		struct rdsv3_mr *mr = rm->m_rdma_mr;
99 		if (mr->r_refcount == 0) {
100 			RDSV3_DPRINTF4("rdsv3_message_purge ASSERT 0",
101 			    "rm %p mr %p", rm, mr);
102 			return;
103 		}
104 		if (mr->r_refcount == 0xdeadbeef) {
105 			RDSV3_DPRINTF4("rdsv3_message_purge ASSERT deadbeef",
106 			    "rm %p mr %p", rm, mr);
107 			return;
108 		}
109 		if (atomic_dec_and_test(&mr->r_refcount)) {
110 			rm->m_rdma_mr = NULL;
111 			__rdsv3_put_mr_final(mr);
112 		}
113 	}
114 
115 	RDSV3_DPRINTF4("rdsv3_message_purge", "Return(rm: %p)", rm);
116 
117 }
118 
119 void
120 rdsv3_message_put(struct rdsv3_message *rm)
121 {
122 	RDSV3_DPRINTF5("rdsv3_message_put",
123 	    "put rm %p ref %d\n", rm, atomic_get(&rm->m_refcount));
124 
125 	if (atomic_dec_and_test(&rm->m_refcount)) {
126 		ASSERT(!list_link_active(&rm->m_sock_item));
127 		ASSERT(!list_link_active(&rm->m_conn_item));
128 		rdsv3_message_purge(rm);
129 
130 		kmem_free(rm, sizeof (struct rdsv3_message) +
131 		    (rm->m_nents * sizeof (struct rdsv3_scatterlist)));
132 	}
133 }
134 
135 void
136 rdsv3_message_inc_free(struct rdsv3_incoming *inc)
137 {
138 	struct rdsv3_message *rm =
139 	    container_of(inc, struct rdsv3_message, m_inc);
140 	rdsv3_message_put(rm);
141 }
142 
143 void
144 rdsv3_message_populate_header(struct rdsv3_header *hdr, uint16_be_t sport,
145     uint16_be_t dport, uint64_t seq)
146 {
147 	hdr->h_flags = 0;
148 	hdr->h_sport = sport;
149 	hdr->h_dport = dport;
150 	hdr->h_sequence = htonll(seq);
151 	hdr->h_exthdr[0] = RDSV3_EXTHDR_NONE;
152 }
153 
154 int
155 rdsv3_message_add_extension(struct rdsv3_header *hdr,
156     unsigned int type, const void *data, unsigned int len)
157 {
158 	unsigned int ext_len = sizeof (uint8_t) + len;
159 	unsigned char *dst;
160 
161 	RDSV3_DPRINTF4("rdsv3_message_add_extension", "Enter");
162 
163 	/* For now, refuse to add more than one extension header */
164 	if (hdr->h_exthdr[0] != RDSV3_EXTHDR_NONE)
165 		return (0);
166 
167 	if (type >= __RDSV3_EXTHDR_MAX ||
168 	    len != rdsv3_exthdr_size[type])
169 		return (0);
170 
171 	if (ext_len >= RDSV3_HEADER_EXT_SPACE)
172 		return (0);
173 	dst = hdr->h_exthdr;
174 
175 	*dst++ = type;
176 	(void) memcpy(dst, data, len);
177 
178 	dst[len] = RDSV3_EXTHDR_NONE;
179 
180 	RDSV3_DPRINTF4("rdsv3_message_add_extension", "Return");
181 	return (1);
182 }
183 
184 /*
185  * If a message has extension headers, retrieve them here.
186  * Call like this:
187  *
188  * unsigned int pos = 0;
189  *
190  * while (1) {
191  *	buflen = sizeof(buffer);
192  *	type = rdsv3_message_next_extension(hdr, &pos, buffer, &buflen);
193  *	if (type == RDSV3_EXTHDR_NONE)
194  *		break;
195  *	...
196  * }
197  */
198 int
199 rdsv3_message_next_extension(struct rdsv3_header *hdr,
200     unsigned int *pos, void *buf, unsigned int *buflen)
201 {
202 	unsigned int offset, ext_type, ext_len;
203 	uint8_t *src = hdr->h_exthdr;
204 
205 	RDSV3_DPRINTF4("rdsv3_message_next_extension", "Enter");
206 
207 	offset = *pos;
208 	if (offset >= RDSV3_HEADER_EXT_SPACE)
209 		goto none;
210 
211 	/*
212 	 * Get the extension type and length. For now, the
213 	 * length is implied by the extension type.
214 	 */
215 	ext_type = src[offset++];
216 
217 	if (ext_type == RDSV3_EXTHDR_NONE || ext_type >= __RDSV3_EXTHDR_MAX)
218 		goto none;
219 	ext_len = rdsv3_exthdr_size[ext_type];
220 	if (offset + ext_len > RDSV3_HEADER_EXT_SPACE)
221 		goto none;
222 
223 	*pos = offset + ext_len;
224 	if (ext_len < *buflen)
225 		*buflen = ext_len;
226 	(void) memcpy(buf, src + offset, *buflen);
227 	return (ext_type);
228 
229 none:
230 	*pos = RDSV3_HEADER_EXT_SPACE;
231 	*buflen = 0;
232 	return (RDSV3_EXTHDR_NONE);
233 }
234 
235 int
236 rdsv3_message_add_version_extension(struct rdsv3_header *hdr,
237     unsigned int version)
238 {
239 	struct rdsv3_ext_header_version ext_hdr;
240 
241 	ext_hdr.h_version = htonl(version);
242 	return (rdsv3_message_add_extension(hdr, RDSV3_EXTHDR_VERSION,
243 	    &ext_hdr, sizeof (ext_hdr)));
244 }
245 
246 int
247 rdsv3_message_get_version_extension(struct rdsv3_header *hdr,
248     unsigned int *version)
249 {
250 	struct rdsv3_ext_header_version ext_hdr;
251 	unsigned int pos = 0, len = sizeof (ext_hdr);
252 
253 	RDSV3_DPRINTF4("rdsv3_message_get_version_extension", "Enter");
254 
255 	/*
256 	 * We assume the version extension is the only one present
257 	 */
258 	if (rdsv3_message_next_extension(hdr, &pos, &ext_hdr, &len) !=
259 	    RDSV3_EXTHDR_VERSION)
260 		return (0);
261 	*version = ntohl(ext_hdr.h_version);
262 	return (1);
263 }
264 
265 int
266 rdsv3_message_add_rdma_dest_extension(struct rdsv3_header *hdr, uint32_t r_key,
267     uint32_t offset)
268 {
269 	struct rdsv3_ext_header_rdma_dest ext_hdr;
270 
271 	ext_hdr.h_rdma_rkey = htonl(r_key);
272 	ext_hdr.h_rdma_offset = htonl(offset);
273 	return (rdsv3_message_add_extension(hdr, RDSV3_EXTHDR_RDMA_DEST,
274 	    &ext_hdr, sizeof (ext_hdr)));
275 }
276 
277 struct rdsv3_message *
278 rdsv3_message_alloc(unsigned int nents, int gfp)
279 {
280 	struct rdsv3_message *rm;
281 
282 	RDSV3_DPRINTF4("rdsv3_message_alloc", "Enter(nents: %d)", nents);
283 
284 	rm = kmem_zalloc(sizeof (struct rdsv3_message) +
285 	    (nents * sizeof (struct rdsv3_scatterlist)), gfp);
286 	if (!rm)
287 		goto out;
288 
289 	rm->m_refcount = 1;
290 	list_link_init(&rm->m_sock_item);
291 	list_link_init(&rm->m_conn_item);
292 	mutex_init(&rm->m_rs_lock, NULL, MUTEX_DRIVER, NULL);
293 	rdsv3_init_waitqueue(&rm->m_flush_wait);
294 
295 	RDSV3_DPRINTF4("rdsv3_message_alloc", "Return(rm: %p)", rm);
296 out:
297 	return (rm);
298 }
299 
300 struct rdsv3_message *
301 rdsv3_message_map_pages(unsigned long *page_addrs, unsigned int total_len)
302 {
303 	struct rdsv3_message *rm;
304 	unsigned int i;
305 
306 	RDSV3_DPRINTF4("rdsv3_message_map_pages", "Enter(len: %d)", total_len);
307 
308 #ifndef __lock_lint
309 	rm = rdsv3_message_alloc(ceil(total_len, PAGE_SIZE), KM_NOSLEEP);
310 #else
311 	rm = NULL;
312 #endif
313 	if (rm == NULL)
314 		return (ERR_PTR(-ENOMEM));
315 
316 	set_bit(RDSV3_MSG_PAGEVEC, &rm->m_flags);
317 	rm->m_inc.i_hdr.h_len = htonl(total_len);
318 #ifndef __lock_lint
319 	rm->m_nents = ceil(total_len, PAGE_SIZE);
320 #else
321 	rm->m_nents = 0;
322 #endif
323 
324 	for (i = 0; i < rm->m_nents; ++i) {
325 		rdsv3_sg_set_page(&rm->m_sg[i],
326 		    page_addrs[i],
327 		    PAGE_SIZE, 0);
328 	}
329 
330 	return (rm);
331 }
332 
333 struct rdsv3_message *
334 rdsv3_message_copy_from_user(struct uio *uiop,
335     size_t total_len)
336 {
337 	struct rdsv3_message *rm;
338 	struct rdsv3_scatterlist *sg;
339 	int ret;
340 
341 	RDSV3_DPRINTF4("rdsv3_message_copy_from_user", "Enter: %d", total_len);
342 
343 #ifndef __lock_lint
344 	rm = rdsv3_message_alloc(ceil(total_len, PAGE_SIZE), KM_NOSLEEP);
345 #else
346 	rm = NULL;
347 #endif
348 	if (rm == NULL) {
349 		ret = -ENOMEM;
350 		goto out;
351 	}
352 
353 	rm->m_inc.i_hdr.h_len = htonl(total_len);
354 
355 	/*
356 	 * now allocate and copy in the data payload.
357 	 */
358 	sg = rm->m_sg;
359 
360 	while (total_len) {
361 		if (rdsv3_sg_page(sg) == NULL) {
362 			ret = rdsv3_page_remainder_alloc(sg, total_len, 0);
363 			if (ret)
364 				goto out;
365 			rm->m_nents++;
366 		}
367 
368 		ret = uiomove(rdsv3_sg_page(sg), rdsv3_sg_len(sg), UIO_WRITE,
369 		    uiop);
370 		if (ret) {
371 			RDSV3_DPRINTF2("rdsv3_message_copy_from_user",
372 			    "uiomove failed");
373 			ret = -ret;
374 			goto out;
375 		}
376 
377 		total_len -= rdsv3_sg_len(sg);
378 		sg++;
379 	}
380 	ret = 0;
381 out:
382 	if (ret) {
383 		if (rm)
384 			rdsv3_message_put(rm);
385 		rm = ERR_PTR(ret);
386 	}
387 	return (rm);
388 }
389 
390 int
391 rdsv3_message_inc_copy_to_user(struct rdsv3_incoming *inc,
392     uio_t *uiop, size_t size)
393 {
394 	struct rdsv3_message *rm;
395 	struct rdsv3_scatterlist *sg;
396 	unsigned long to_copy;
397 	unsigned long vec_off;
398 	int copied;
399 	int ret;
400 	uint32_t len;
401 
402 	rm = container_of(inc, struct rdsv3_message, m_inc);
403 	len = ntohl(rm->m_inc.i_hdr.h_len);
404 
405 	RDSV3_DPRINTF4("rdsv3_message_inc_copy_to_user",
406 	    "Enter(rm: %p, len: %d)", rm, len);
407 
408 	sg = rm->m_sg;
409 	vec_off = 0;
410 	copied = 0;
411 
412 	while (copied < size && copied < len) {
413 
414 		to_copy = min(len - copied, sg->length - vec_off);
415 		to_copy = min(size - copied, to_copy);
416 
417 		RDSV3_DPRINTF5("rdsv3_message_inc_copy_to_user",
418 		    "copying %lu bytes to user iov %p from sg [%p, %u] + %lu\n",
419 		    to_copy, uiop,
420 		    rdsv3_sg_page(sg), sg->length, vec_off);
421 
422 		ret = uiomove(rdsv3_sg_page(sg), to_copy, UIO_READ, uiop);
423 		if (ret)
424 			break;
425 
426 		vec_off += to_copy;
427 		copied += to_copy;
428 
429 		if (vec_off == sg->length) {
430 			vec_off = 0;
431 			sg++;
432 		}
433 	}
434 
435 	return (copied);
436 }
437 
438 /*
439  * If the message is still on the send queue, wait until the transport
440  * is done with it. This is particularly important for RDMA operations.
441  */
442 /* ARGSUSED */
443 void
444 rdsv3_message_wait(struct rdsv3_message *rm)
445 {
446 	rdsv3_wait_event(&rm->m_flush_wait,
447 	    !test_bit(RDSV3_MSG_MAPPED, &rm->m_flags));
448 }
449 
450 void
451 rdsv3_message_unmapped(struct rdsv3_message *rm)
452 {
453 	clear_bit(RDSV3_MSG_MAPPED, &rm->m_flags);
454 	rdsv3_wake_up_all(&rm->m_flush_wait);
455 }
456