1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Copyright (c) 2006 Oracle. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 #include <sys/rds.h> 58 59 #include <sys/ib/clients/rdsv3/rdsv3.h> 60 #include <sys/ib/clients/rdsv3/rdma.h> 61 #include <sys/ib/clients/rdsv3/rdsv3_debug.h> 62 63 static rdsv3_wait_queue_t rdsv3_message_flush_waitq; 64 65 #ifndef __lock_lint 66 static unsigned int rdsv3_exthdr_size[__RDSV3_EXTHDR_MAX] = { 67 [RDSV3_EXTHDR_NONE] = 0, 68 [RDSV3_EXTHDR_VERSION] = sizeof (struct rdsv3_ext_header_version), 69 [RDSV3_EXTHDR_RDMA] = sizeof (struct rdsv3_ext_header_rdma), 70 [RDSV3_EXTHDR_RDMA_DEST] = sizeof (struct rdsv3_ext_header_rdma_dest), 71 }; 72 #else 73 static unsigned int rdsv3_exthdr_size[__RDSV3_EXTHDR_MAX] = { 74 0, 75 sizeof (struct rdsv3_ext_header_version), 76 sizeof (struct rdsv3_ext_header_rdma), 77 sizeof (struct rdsv3_ext_header_rdma_dest), 78 }; 79 #endif 80 81 void 82 rdsv3_message_addref(struct rdsv3_message *rm) 83 { 84 RDSV3_DPRINTF5("rdsv3_message_addref", "addref rm %p ref %d", 85 rm, atomic_get(&rm->m_refcount)); 86 atomic_add_32(&rm->m_refcount, 1); 87 } 88 89 /* 90 * This relies on dma_map_sg() not touching sg[].page during merging. 91 */ 92 static void 93 rdsv3_message_purge(struct rdsv3_message *rm) 94 { 95 unsigned long i; 96 97 RDSV3_DPRINTF4("rdsv3_message_purge", "Enter(rm: %p)", rm); 98 99 if (test_bit(RDSV3_MSG_PAGEVEC, &rm->m_flags)) 100 return; 101 102 for (i = 0; i < rm->m_nents; i++) { 103 RDSV3_DPRINTF5("rdsv3_message_purge", "putting data page %p\n", 104 (void *)rdsv3_sg_page(&rm->m_sg[i])); 105 /* XXX will have to put_page for page refs */ 106 kmem_free(rdsv3_sg_page(&rm->m_sg[i]), 107 rdsv3_sg_len(&rm->m_sg[i])); 108 } 109 110 if (rm->m_rdma_op) 111 rdsv3_rdma_free_op(rm->m_rdma_op); 112 if (rm->m_rdma_mr) { 113 struct rdsv3_mr *mr = rm->m_rdma_mr; 114 if (mr->r_refcount == 0) { 115 RDSV3_DPRINTF4("rdsv3_message_purge ASSERT 0", 116 "rm %p mr %p", rm, mr); 117 return; 118 } 119 if (mr->r_refcount == 0xdeadbeef) { 120 RDSV3_DPRINTF4("rdsv3_message_purge ASSERT deadbeef", 121 "rm %p mr %p", rm, mr); 122 return; 123 } 124 if (atomic_dec_and_test(&mr->r_refcount)) { 125 rm->m_rdma_mr = NULL; 126 __rdsv3_put_mr_final(mr); 127 } 128 } 129 130 RDSV3_DPRINTF4("rdsv3_message_purge", "Return(rm: %p)", rm); 131 132 } 133 134 void 135 rdsv3_message_inc_purge(struct rdsv3_incoming *inc) 136 { 137 struct rdsv3_message *rm = 138 container_of(inc, struct rdsv3_message, m_inc); 139 rdsv3_message_purge(rm); 140 } 141 142 void 143 rdsv3_message_put(struct rdsv3_message *rm) 144 { 145 RDSV3_DPRINTF5("rdsv3_message_put", 146 "put rm %p ref %d\n", rm, atomic_get(&rm->m_refcount)); 147 148 if (atomic_dec_and_test(&rm->m_refcount)) { 149 ASSERT(!list_link_active(&rm->m_sock_item)); 150 ASSERT(!list_link_active(&rm->m_conn_item)); 151 rdsv3_message_purge(rm); 152 153 kmem_free(rm, sizeof (struct rdsv3_message) + 154 (rm->m_nents * sizeof (struct rdsv3_scatterlist))); 155 } 156 } 157 158 void 159 rdsv3_message_inc_free(struct rdsv3_incoming *inc) 160 { 161 struct rdsv3_message *rm = 162 container_of(inc, struct rdsv3_message, m_inc); 163 rdsv3_message_put(rm); 164 } 165 166 void 167 rdsv3_message_populate_header(struct rdsv3_header *hdr, uint16_be_t sport, 168 uint16_be_t dport, uint64_t seq) 169 { 170 hdr->h_flags = 0; 171 hdr->h_sport = sport; 172 hdr->h_dport = dport; 173 hdr->h_sequence = htonll(seq); 174 hdr->h_exthdr[0] = RDSV3_EXTHDR_NONE; 175 } 176 177 int 178 rdsv3_message_add_extension(struct rdsv3_header *hdr, 179 unsigned int type, const void *data, unsigned int len) 180 { 181 unsigned int ext_len = sizeof (uint8_t) + len; 182 unsigned char *dst; 183 184 RDSV3_DPRINTF4("rdsv3_message_add_extension", "Enter"); 185 186 /* For now, refuse to add more than one extension header */ 187 if (hdr->h_exthdr[0] != RDSV3_EXTHDR_NONE) 188 return (0); 189 190 if (type >= __RDSV3_EXTHDR_MAX || 191 len != rdsv3_exthdr_size[type]) 192 return (0); 193 194 if (ext_len >= RDSV3_HEADER_EXT_SPACE) 195 return (0); 196 dst = hdr->h_exthdr; 197 198 *dst++ = type; 199 (void) memcpy(dst, data, len); 200 201 dst[len] = RDSV3_EXTHDR_NONE; 202 203 RDSV3_DPRINTF4("rdsv3_message_add_extension", "Return"); 204 return (1); 205 } 206 207 /* 208 * If a message has extension headers, retrieve them here. 209 * Call like this: 210 * 211 * unsigned int pos = 0; 212 * 213 * while (1) { 214 * buflen = sizeof(buffer); 215 * type = rdsv3_message_next_extension(hdr, &pos, buffer, &buflen); 216 * if (type == RDSV3_EXTHDR_NONE) 217 * break; 218 * ... 219 * } 220 */ 221 int 222 rdsv3_message_next_extension(struct rdsv3_header *hdr, 223 unsigned int *pos, void *buf, unsigned int *buflen) 224 { 225 unsigned int offset, ext_type, ext_len; 226 uint8_t *src = hdr->h_exthdr; 227 228 RDSV3_DPRINTF4("rdsv3_message_next_extension", "Enter"); 229 230 offset = *pos; 231 if (offset >= RDSV3_HEADER_EXT_SPACE) 232 goto none; 233 234 /* 235 * Get the extension type and length. For now, the 236 * length is implied by the extension type. 237 */ 238 ext_type = src[offset++]; 239 240 if (ext_type == RDSV3_EXTHDR_NONE || ext_type >= __RDSV3_EXTHDR_MAX) 241 goto none; 242 ext_len = rdsv3_exthdr_size[ext_type]; 243 if (offset + ext_len > RDSV3_HEADER_EXT_SPACE) 244 goto none; 245 246 *pos = offset + ext_len; 247 if (ext_len < *buflen) 248 *buflen = ext_len; 249 (void) memcpy(buf, src + offset, *buflen); 250 return (ext_type); 251 252 none: 253 *pos = RDSV3_HEADER_EXT_SPACE; 254 *buflen = 0; 255 return (RDSV3_EXTHDR_NONE); 256 } 257 258 int 259 rdsv3_message_add_version_extension(struct rdsv3_header *hdr, 260 unsigned int version) 261 { 262 struct rdsv3_ext_header_version ext_hdr; 263 264 ext_hdr.h_version = htonl(version); 265 return (rdsv3_message_add_extension(hdr, RDSV3_EXTHDR_VERSION, 266 &ext_hdr, sizeof (ext_hdr))); 267 } 268 269 int 270 rdsv3_message_get_version_extension(struct rdsv3_header *hdr, 271 unsigned int *version) 272 { 273 struct rdsv3_ext_header_version ext_hdr; 274 unsigned int pos = 0, len = sizeof (ext_hdr); 275 276 RDSV3_DPRINTF4("rdsv3_message_get_version_extension", "Enter"); 277 278 /* 279 * We assume the version extension is the only one present 280 */ 281 if (rdsv3_message_next_extension(hdr, &pos, &ext_hdr, &len) != 282 RDSV3_EXTHDR_VERSION) 283 return (0); 284 *version = ntohl(ext_hdr.h_version); 285 return (1); 286 } 287 288 int 289 rdsv3_message_add_rdma_dest_extension(struct rdsv3_header *hdr, uint32_t r_key, 290 uint32_t offset) 291 { 292 struct rdsv3_ext_header_rdma_dest ext_hdr; 293 294 ext_hdr.h_rdma_rkey = htonl(r_key); 295 ext_hdr.h_rdma_offset = htonl(offset); 296 return (rdsv3_message_add_extension(hdr, RDSV3_EXTHDR_RDMA_DEST, 297 &ext_hdr, sizeof (ext_hdr))); 298 } 299 300 struct rdsv3_message * 301 rdsv3_message_alloc(unsigned int nents, int gfp) 302 { 303 struct rdsv3_message *rm; 304 305 RDSV3_DPRINTF4("rdsv3_message_alloc", "Enter(nents: %d)", nents); 306 307 rm = kmem_zalloc(sizeof (struct rdsv3_message) + 308 (nents * sizeof (struct rdsv3_scatterlist)), gfp); 309 if (!rm) 310 goto out; 311 312 rm->m_refcount = 1; 313 list_link_init(&rm->m_sock_item); 314 list_link_init(&rm->m_conn_item); 315 mutex_init(&rm->m_rs_lock, NULL, MUTEX_DRIVER, NULL); 316 317 RDSV3_DPRINTF4("rdsv3_message_alloc", "Return(rm: %p)", rm); 318 out: 319 return (rm); 320 } 321 322 struct rdsv3_message * 323 rdsv3_message_map_pages(unsigned long *page_addrs, unsigned int total_len) 324 { 325 struct rdsv3_message *rm; 326 unsigned int i; 327 328 RDSV3_DPRINTF4("rdsv3_message_map_pages", "Enter(len: %d)", total_len); 329 330 #ifndef __lock_lint 331 rm = rdsv3_message_alloc(ceil(total_len, PAGE_SIZE), KM_NOSLEEP); 332 #else 333 rm = NULL; 334 #endif 335 if (rm == NULL) 336 return (ERR_PTR(-ENOMEM)); 337 338 set_bit(RDSV3_MSG_PAGEVEC, &rm->m_flags); 339 rm->m_inc.i_hdr.h_len = htonl(total_len); 340 #ifndef __lock_lint 341 rm->m_nents = ceil(total_len, PAGE_SIZE); 342 #else 343 rm->m_nents = 0; 344 #endif 345 346 for (i = 0; i < rm->m_nents; ++i) { 347 rdsv3_sg_set_page(&rm->m_sg[i], 348 page_addrs[i], 349 PAGE_SIZE, 0); 350 } 351 352 return (rm); 353 } 354 355 struct rdsv3_message * 356 rdsv3_message_copy_from_user(struct uio *uiop, 357 size_t total_len) 358 { 359 struct rdsv3_message *rm; 360 struct rdsv3_scatterlist *sg; 361 int ret; 362 363 RDSV3_DPRINTF4("rdsv3_message_copy_from_user", "Enter: %d", total_len); 364 365 #ifndef __lock_lint 366 rm = rdsv3_message_alloc(ceil(total_len, PAGE_SIZE), KM_NOSLEEP); 367 #else 368 rm = NULL; 369 #endif 370 if (rm == NULL) { 371 ret = -ENOMEM; 372 goto out; 373 } 374 375 rm->m_inc.i_hdr.h_len = htonl(total_len); 376 377 /* 378 * now allocate and copy in the data payload. 379 */ 380 sg = rm->m_sg; 381 382 while (total_len) { 383 if (rdsv3_sg_page(sg) == NULL) { 384 ret = rdsv3_page_remainder_alloc(sg, total_len, 0); 385 if (ret) 386 goto out; 387 rm->m_nents++; 388 } 389 390 ret = uiomove(rdsv3_sg_page(sg), rdsv3_sg_len(sg), UIO_WRITE, 391 uiop); 392 if (ret) 393 goto out; 394 395 total_len -= rdsv3_sg_len(sg); 396 sg++; 397 } 398 399 ret = 0; 400 out: 401 if (ret) { 402 if (rm) 403 rdsv3_message_put(rm); 404 rm = ERR_PTR(ret); 405 } 406 return (rm); 407 } 408 409 int 410 rdsv3_message_inc_copy_to_user(struct rdsv3_incoming *inc, 411 uio_t *uiop, size_t size) 412 { 413 struct rdsv3_message *rm; 414 struct rdsv3_scatterlist *sg; 415 unsigned long to_copy; 416 unsigned long vec_off; 417 int copied; 418 int ret; 419 uint32_t len; 420 421 rm = container_of(inc, struct rdsv3_message, m_inc); 422 len = ntohl(rm->m_inc.i_hdr.h_len); 423 424 RDSV3_DPRINTF4("rdsv3_message_inc_copy_to_user", 425 "Enter(rm: %p, len: %d)", rm, len); 426 427 sg = rm->m_sg; 428 vec_off = 0; 429 copied = 0; 430 431 while (copied < size && copied < len) { 432 433 to_copy = min(len - copied, sg->length - vec_off); 434 to_copy = min(size - copied, to_copy); 435 436 RDSV3_DPRINTF5("rdsv3_message_inc_copy_to_user", 437 "copying %lu bytes to user iov %p from sg [%p, %u] + %lu\n", 438 to_copy, uiop, 439 rdsv3_sg_page(sg), sg->length, vec_off); 440 441 ret = uiomove(rdsv3_sg_page(sg), to_copy, UIO_READ, uiop); 442 if (ret) 443 break; 444 445 vec_off += to_copy; 446 copied += to_copy; 447 448 if (vec_off == sg->length) { 449 vec_off = 0; 450 sg++; 451 } 452 } 453 454 return (copied); 455 } 456 457 /* 458 * If the message is still on the send queue, wait until the transport 459 * is done with it. This is particularly important for RDMA operations. 460 */ 461 void 462 rdsv3_message_wait(struct rdsv3_message *rm) 463 { 464 rdsv3_wait_event(&rdsv3_message_flush_waitq, 465 !test_bit(RDSV3_MSG_MAPPED, &rm->m_flags)); 466 } 467 468 void 469 rdsv3_message_unmapped(struct rdsv3_message *rm) 470 { 471 clear_bit(RDSV3_MSG_MAPPED, &rm->m_flags); 472 rdsv3_wake_up_all(&rdsv3_message_flush_waitq); 473 } 474