xref: /illumos-gate/usr/src/uts/common/io/ib/clients/rdsv3/message.c (revision 72b703890acc1682901e7ab4df40758e3c4399d8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * Copyright (c) 2006 Oracle.  All rights reserved.
27  *
28  * This software is available to you under a choice of one of two
29  * licenses.  You may choose to be licensed under the terms of the GNU
30  * General Public License (GPL) Version 2, available from the file
31  * COPYING in the main directory of this source tree, or the
32  * OpenIB.org BSD license below:
33  *
34  *     Redistribution and use in source and binary forms, with or
35  *     without modification, are permitted provided that the following
36  *     conditions are met:
37  *
38  *      - Redistributions of source code must retain the above
39  *        copyright notice, this list of conditions and the following
40  *        disclaimer.
41  *
42  *      - Redistributions in binary form must reproduce the above
43  *        copyright notice, this list of conditions and the following
44  *        disclaimer in the documentation and/or other materials
45  *        provided with the distribution.
46  *
47  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
48  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
49  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
50  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
51  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
52  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
53  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
54  * SOFTWARE.
55  *
56  */
57 #include <sys/rds.h>
58 
59 #include <sys/ib/clients/rdsv3/rdsv3.h>
60 #include <sys/ib/clients/rdsv3/rdma.h>
61 #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
62 
63 #ifndef __lock_lint
64 static unsigned int	rdsv3_exthdr_size[__RDSV3_EXTHDR_MAX] = {
65 [RDSV3_EXTHDR_NONE]	= 0,
66 [RDSV3_EXTHDR_VERSION]	= sizeof (struct rdsv3_ext_header_version),
67 [RDSV3_EXTHDR_RDMA]	= sizeof (struct rdsv3_ext_header_rdma),
68 [RDSV3_EXTHDR_RDMA_DEST]	= sizeof (struct rdsv3_ext_header_rdma_dest),
69 };
70 #else
71 static unsigned int	rdsv3_exthdr_size[__RDSV3_EXTHDR_MAX] = {
72 			0,
73 			sizeof (struct rdsv3_ext_header_version),
74 			sizeof (struct rdsv3_ext_header_rdma),
75 			sizeof (struct rdsv3_ext_header_rdma_dest),
76 };
77 #endif
78 
79 void
80 rdsv3_message_addref(struct rdsv3_message *rm)
81 {
82 	RDSV3_DPRINTF5("rdsv3_message_addref", "addref rm %p ref %d",
83 	    rm, atomic_get(&rm->m_refcount));
84 	atomic_add_32(&rm->m_refcount, 1);
85 }
86 
87 /*
88  * This relies on dma_map_sg() not touching sg[].page during merging.
89  */
90 static void
91 rdsv3_message_purge(struct rdsv3_message *rm)
92 {
93 	unsigned long i;
94 
95 	RDSV3_DPRINTF4("rdsv3_message_purge", "Enter(rm: %p)", rm);
96 
97 	if (test_bit(RDSV3_MSG_PAGEVEC, &rm->m_flags))
98 		return;
99 
100 	for (i = 0; i < rm->m_nents; i++) {
101 		RDSV3_DPRINTF5("rdsv3_message_purge", "putting data page %p\n",
102 		    (void *)rdsv3_sg_page(&rm->m_sg[i]));
103 		/* XXX will have to put_page for page refs */
104 		kmem_free(rdsv3_sg_page(&rm->m_sg[i]),
105 		    rdsv3_sg_len(&rm->m_sg[i]));
106 	}
107 
108 	if (rm->m_rdma_op)
109 		rdsv3_rdma_free_op(rm->m_rdma_op);
110 	if (rm->m_rdma_mr) {
111 		struct rdsv3_mr *mr = rm->m_rdma_mr;
112 		if (mr->r_refcount == 0) {
113 			RDSV3_DPRINTF4("rdsv3_message_purge ASSERT 0",
114 			    "rm %p mr %p", rm, mr);
115 			return;
116 		}
117 		if (mr->r_refcount == 0xdeadbeef) {
118 			RDSV3_DPRINTF4("rdsv3_message_purge ASSERT deadbeef",
119 			    "rm %p mr %p", rm, mr);
120 			return;
121 		}
122 		if (atomic_dec_and_test(&mr->r_refcount)) {
123 			rm->m_rdma_mr = NULL;
124 			__rdsv3_put_mr_final(mr);
125 		}
126 	}
127 
128 	RDSV3_DPRINTF4("rdsv3_message_purge", "Return(rm: %p)", rm);
129 
130 }
131 
132 void
133 rdsv3_message_put(struct rdsv3_message *rm)
134 {
135 	RDSV3_DPRINTF5("rdsv3_message_put",
136 	    "put rm %p ref %d\n", rm, atomic_get(&rm->m_refcount));
137 
138 	if (atomic_dec_and_test(&rm->m_refcount)) {
139 		ASSERT(!list_link_active(&rm->m_sock_item));
140 		ASSERT(!list_link_active(&rm->m_conn_item));
141 		rdsv3_message_purge(rm);
142 
143 		kmem_free(rm, sizeof (struct rdsv3_message) +
144 		    (rm->m_nents * sizeof (struct rdsv3_scatterlist)));
145 	}
146 }
147 
148 void
149 rdsv3_message_inc_free(struct rdsv3_incoming *inc)
150 {
151 	struct rdsv3_message *rm =
152 	    container_of(inc, struct rdsv3_message, m_inc);
153 	rdsv3_message_put(rm);
154 }
155 
156 void
157 rdsv3_message_populate_header(struct rdsv3_header *hdr, uint16_be_t sport,
158     uint16_be_t dport, uint64_t seq)
159 {
160 	hdr->h_flags = 0;
161 	hdr->h_sport = sport;
162 	hdr->h_dport = dport;
163 	hdr->h_sequence = htonll(seq);
164 	hdr->h_exthdr[0] = RDSV3_EXTHDR_NONE;
165 }
166 
167 int
168 rdsv3_message_add_extension(struct rdsv3_header *hdr,
169     unsigned int type, const void *data, unsigned int len)
170 {
171 	unsigned int ext_len = sizeof (uint8_t) + len;
172 	unsigned char *dst;
173 
174 	RDSV3_DPRINTF4("rdsv3_message_add_extension", "Enter");
175 
176 	/* For now, refuse to add more than one extension header */
177 	if (hdr->h_exthdr[0] != RDSV3_EXTHDR_NONE)
178 		return (0);
179 
180 	if (type >= __RDSV3_EXTHDR_MAX ||
181 	    len != rdsv3_exthdr_size[type])
182 		return (0);
183 
184 	if (ext_len >= RDSV3_HEADER_EXT_SPACE)
185 		return (0);
186 	dst = hdr->h_exthdr;
187 
188 	*dst++ = type;
189 	(void) memcpy(dst, data, len);
190 
191 	dst[len] = RDSV3_EXTHDR_NONE;
192 
193 	RDSV3_DPRINTF4("rdsv3_message_add_extension", "Return");
194 	return (1);
195 }
196 
197 /*
198  * If a message has extension headers, retrieve them here.
199  * Call like this:
200  *
201  * unsigned int pos = 0;
202  *
203  * while (1) {
204  *	buflen = sizeof(buffer);
205  *	type = rdsv3_message_next_extension(hdr, &pos, buffer, &buflen);
206  *	if (type == RDSV3_EXTHDR_NONE)
207  *		break;
208  *	...
209  * }
210  */
211 int
212 rdsv3_message_next_extension(struct rdsv3_header *hdr,
213     unsigned int *pos, void *buf, unsigned int *buflen)
214 {
215 	unsigned int offset, ext_type, ext_len;
216 	uint8_t *src = hdr->h_exthdr;
217 
218 	RDSV3_DPRINTF4("rdsv3_message_next_extension", "Enter");
219 
220 	offset = *pos;
221 	if (offset >= RDSV3_HEADER_EXT_SPACE)
222 		goto none;
223 
224 	/*
225 	 * Get the extension type and length. For now, the
226 	 * length is implied by the extension type.
227 	 */
228 	ext_type = src[offset++];
229 
230 	if (ext_type == RDSV3_EXTHDR_NONE || ext_type >= __RDSV3_EXTHDR_MAX)
231 		goto none;
232 	ext_len = rdsv3_exthdr_size[ext_type];
233 	if (offset + ext_len > RDSV3_HEADER_EXT_SPACE)
234 		goto none;
235 
236 	*pos = offset + ext_len;
237 	if (ext_len < *buflen)
238 		*buflen = ext_len;
239 	(void) memcpy(buf, src + offset, *buflen);
240 	return (ext_type);
241 
242 none:
243 	*pos = RDSV3_HEADER_EXT_SPACE;
244 	*buflen = 0;
245 	return (RDSV3_EXTHDR_NONE);
246 }
247 
248 int
249 rdsv3_message_add_version_extension(struct rdsv3_header *hdr,
250     unsigned int version)
251 {
252 	struct rdsv3_ext_header_version ext_hdr;
253 
254 	ext_hdr.h_version = htonl(version);
255 	return (rdsv3_message_add_extension(hdr, RDSV3_EXTHDR_VERSION,
256 	    &ext_hdr, sizeof (ext_hdr)));
257 }
258 
259 int
260 rdsv3_message_get_version_extension(struct rdsv3_header *hdr,
261     unsigned int *version)
262 {
263 	struct rdsv3_ext_header_version ext_hdr;
264 	unsigned int pos = 0, len = sizeof (ext_hdr);
265 
266 	RDSV3_DPRINTF4("rdsv3_message_get_version_extension", "Enter");
267 
268 	/*
269 	 * We assume the version extension is the only one present
270 	 */
271 	if (rdsv3_message_next_extension(hdr, &pos, &ext_hdr, &len) !=
272 	    RDSV3_EXTHDR_VERSION)
273 		return (0);
274 	*version = ntohl(ext_hdr.h_version);
275 	return (1);
276 }
277 
278 int
279 rdsv3_message_add_rdma_dest_extension(struct rdsv3_header *hdr, uint32_t r_key,
280     uint32_t offset)
281 {
282 	struct rdsv3_ext_header_rdma_dest ext_hdr;
283 
284 	ext_hdr.h_rdma_rkey = htonl(r_key);
285 	ext_hdr.h_rdma_offset = htonl(offset);
286 	return (rdsv3_message_add_extension(hdr, RDSV3_EXTHDR_RDMA_DEST,
287 	    &ext_hdr, sizeof (ext_hdr)));
288 }
289 
290 struct rdsv3_message *
291 rdsv3_message_alloc(unsigned int nents, int gfp)
292 {
293 	struct rdsv3_message *rm;
294 
295 	RDSV3_DPRINTF4("rdsv3_message_alloc", "Enter(nents: %d)", nents);
296 
297 	rm = kmem_zalloc(sizeof (struct rdsv3_message) +
298 	    (nents * sizeof (struct rdsv3_scatterlist)), gfp);
299 	if (!rm)
300 		goto out;
301 
302 	rm->m_refcount = 1;
303 	list_link_init(&rm->m_sock_item);
304 	list_link_init(&rm->m_conn_item);
305 	mutex_init(&rm->m_rs_lock, NULL, MUTEX_DRIVER, NULL);
306 	rdsv3_init_waitqueue(&rm->m_flush_wait);
307 
308 	RDSV3_DPRINTF4("rdsv3_message_alloc", "Return(rm: %p)", rm);
309 out:
310 	return (rm);
311 }
312 
313 struct rdsv3_message *
314 rdsv3_message_map_pages(unsigned long *page_addrs, unsigned int total_len)
315 {
316 	struct rdsv3_message *rm;
317 	unsigned int i;
318 
319 	RDSV3_DPRINTF4("rdsv3_message_map_pages", "Enter(len: %d)", total_len);
320 
321 #ifndef __lock_lint
322 	rm = rdsv3_message_alloc(ceil(total_len, PAGE_SIZE), KM_NOSLEEP);
323 #else
324 	rm = NULL;
325 #endif
326 	if (rm == NULL)
327 		return (ERR_PTR(-ENOMEM));
328 
329 	set_bit(RDSV3_MSG_PAGEVEC, &rm->m_flags);
330 	rm->m_inc.i_hdr.h_len = htonl(total_len);
331 #ifndef __lock_lint
332 	rm->m_nents = ceil(total_len, PAGE_SIZE);
333 #else
334 	rm->m_nents = 0;
335 #endif
336 
337 	for (i = 0; i < rm->m_nents; ++i) {
338 		rdsv3_sg_set_page(&rm->m_sg[i],
339 		    page_addrs[i],
340 		    PAGE_SIZE, 0);
341 	}
342 
343 	return (rm);
344 }
345 
346 struct rdsv3_message *
347 rdsv3_message_copy_from_user(struct uio *uiop,
348     size_t total_len)
349 {
350 	struct rdsv3_message *rm;
351 	struct rdsv3_scatterlist *sg;
352 	int ret;
353 
354 	RDSV3_DPRINTF4("rdsv3_message_copy_from_user", "Enter: %d", total_len);
355 
356 #ifndef __lock_lint
357 	rm = rdsv3_message_alloc(ceil(total_len, PAGE_SIZE), KM_NOSLEEP);
358 #else
359 	rm = NULL;
360 #endif
361 	if (rm == NULL) {
362 		ret = -ENOMEM;
363 		goto out;
364 	}
365 
366 	rm->m_inc.i_hdr.h_len = htonl(total_len);
367 
368 	/*
369 	 * now allocate and copy in the data payload.
370 	 */
371 	sg = rm->m_sg;
372 
373 	while (total_len) {
374 		if (rdsv3_sg_page(sg) == NULL) {
375 			ret = rdsv3_page_remainder_alloc(sg, total_len, 0);
376 			if (ret)
377 				goto out;
378 			rm->m_nents++;
379 		}
380 
381 		ret = uiomove(rdsv3_sg_page(sg), rdsv3_sg_len(sg), UIO_WRITE,
382 		    uiop);
383 		if (ret) {
384 			RDSV3_DPRINTF2("rdsv3_message_copy_from_user",
385 			    "uiomove failed");
386 			ret = -ret;
387 			goto out;
388 		}
389 
390 		total_len -= rdsv3_sg_len(sg);
391 		sg++;
392 	}
393 	ret = 0;
394 out:
395 	if (ret) {
396 		if (rm)
397 			rdsv3_message_put(rm);
398 		rm = ERR_PTR(ret);
399 	}
400 	return (rm);
401 }
402 
403 int
404 rdsv3_message_inc_copy_to_user(struct rdsv3_incoming *inc,
405     uio_t *uiop, size_t size)
406 {
407 	struct rdsv3_message *rm;
408 	struct rdsv3_scatterlist *sg;
409 	unsigned long to_copy;
410 	unsigned long vec_off;
411 	int copied;
412 	int ret;
413 	uint32_t len;
414 
415 	rm = container_of(inc, struct rdsv3_message, m_inc);
416 	len = ntohl(rm->m_inc.i_hdr.h_len);
417 
418 	RDSV3_DPRINTF4("rdsv3_message_inc_copy_to_user",
419 	    "Enter(rm: %p, len: %d)", rm, len);
420 
421 	sg = rm->m_sg;
422 	vec_off = 0;
423 	copied = 0;
424 
425 	while (copied < size && copied < len) {
426 
427 		to_copy = min(len - copied, sg->length - vec_off);
428 		to_copy = min(size - copied, to_copy);
429 
430 		RDSV3_DPRINTF5("rdsv3_message_inc_copy_to_user",
431 		    "copying %lu bytes to user iov %p from sg [%p, %u] + %lu\n",
432 		    to_copy, uiop,
433 		    rdsv3_sg_page(sg), sg->length, vec_off);
434 
435 		ret = uiomove(rdsv3_sg_page(sg), to_copy, UIO_READ, uiop);
436 		if (ret)
437 			break;
438 
439 		vec_off += to_copy;
440 		copied += to_copy;
441 
442 		if (vec_off == sg->length) {
443 			vec_off = 0;
444 			sg++;
445 		}
446 	}
447 
448 	return (copied);
449 }
450 
451 /*
452  * If the message is still on the send queue, wait until the transport
453  * is done with it. This is particularly important for RDMA operations.
454  */
455 /* ARGSUSED */
456 void
457 rdsv3_message_wait(struct rdsv3_message *rm)
458 {
459 	rdsv3_wait_event(&rm->m_flush_wait,
460 	    !test_bit(RDSV3_MSG_MAPPED, &rm->m_flags));
461 }
462 
463 void
464 rdsv3_message_unmapped(struct rdsv3_message *rm)
465 {
466 	clear_bit(RDSV3_MSG_MAPPED, &rm->m_flags);
467 	rdsv3_wake_up_all(&rm->m_flush_wait);
468 }
469