1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Copyright (c) 2006 Oracle. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 #include <sys/rds.h> 58 59 #include <sys/ib/clients/rdsv3/rdsv3.h> 60 #include <sys/ib/clients/rdsv3/rdma.h> 61 #include <sys/ib/clients/rdsv3/rdsv3_debug.h> 62 63 #ifndef __lock_lint 64 static unsigned int rdsv3_exthdr_size[__RDSV3_EXTHDR_MAX] = { 65 [RDSV3_EXTHDR_NONE] = 0, 66 [RDSV3_EXTHDR_VERSION] = sizeof (struct rdsv3_ext_header_version), 67 [RDSV3_EXTHDR_RDMA] = sizeof (struct rdsv3_ext_header_rdma), 68 [RDSV3_EXTHDR_RDMA_DEST] = sizeof (struct rdsv3_ext_header_rdma_dest), 69 }; 70 #else 71 static unsigned int rdsv3_exthdr_size[__RDSV3_EXTHDR_MAX] = { 72 0, 73 sizeof (struct rdsv3_ext_header_version), 74 sizeof (struct rdsv3_ext_header_rdma), 75 sizeof (struct rdsv3_ext_header_rdma_dest), 76 }; 77 #endif 78 79 void 80 rdsv3_message_addref(struct rdsv3_message *rm) 81 { 82 RDSV3_DPRINTF5("rdsv3_message_addref", "addref rm %p ref %d", 83 rm, atomic_get(&rm->m_refcount)); 84 atomic_add_32(&rm->m_refcount, 1); 85 } 86 87 /* 88 * This relies on dma_map_sg() not touching sg[].page during merging. 89 */ 90 static void 91 rdsv3_message_purge(struct rdsv3_message *rm) 92 { 93 unsigned long i; 94 95 RDSV3_DPRINTF4("rdsv3_message_purge", "Enter(rm: %p)", rm); 96 97 if (test_bit(RDSV3_MSG_PAGEVEC, &rm->m_flags)) 98 return; 99 100 for (i = 0; i < rm->m_nents; i++) { 101 RDSV3_DPRINTF5("rdsv3_message_purge", "putting data page %p\n", 102 (void *)rdsv3_sg_page(&rm->m_sg[i])); 103 /* XXX will have to put_page for page refs */ 104 kmem_free(rdsv3_sg_page(&rm->m_sg[i]), 105 rdsv3_sg_len(&rm->m_sg[i])); 106 } 107 108 if (rm->m_rdma_op) 109 rdsv3_rdma_free_op(rm->m_rdma_op); 110 if (rm->m_rdma_mr) { 111 struct rdsv3_mr *mr = rm->m_rdma_mr; 112 if (mr->r_refcount == 0) { 113 RDSV3_DPRINTF4("rdsv3_message_purge ASSERT 0", 114 "rm %p mr %p", rm, mr); 115 return; 116 } 117 if (mr->r_refcount == 0xdeadbeef) { 118 RDSV3_DPRINTF4("rdsv3_message_purge ASSERT deadbeef", 119 "rm %p mr %p", rm, mr); 120 return; 121 } 122 if (atomic_dec_and_test(&mr->r_refcount)) { 123 rm->m_rdma_mr = NULL; 124 __rdsv3_put_mr_final(mr); 125 } 126 } 127 128 RDSV3_DPRINTF4("rdsv3_message_purge", "Return(rm: %p)", rm); 129 130 } 131 132 void 133 rdsv3_message_put(struct rdsv3_message *rm) 134 { 135 RDSV3_DPRINTF5("rdsv3_message_put", 136 "put rm %p ref %d\n", rm, atomic_get(&rm->m_refcount)); 137 138 if (atomic_dec_and_test(&rm->m_refcount)) { 139 ASSERT(!list_link_active(&rm->m_sock_item)); 140 ASSERT(!list_link_active(&rm->m_conn_item)); 141 rdsv3_message_purge(rm); 142 143 kmem_free(rm, sizeof (struct rdsv3_message) + 144 (rm->m_nents * sizeof (struct rdsv3_scatterlist))); 145 } 146 } 147 148 void 149 rdsv3_message_inc_free(struct rdsv3_incoming *inc) 150 { 151 struct rdsv3_message *rm = 152 container_of(inc, struct rdsv3_message, m_inc); 153 rdsv3_message_put(rm); 154 } 155 156 void 157 rdsv3_message_populate_header(struct rdsv3_header *hdr, uint16_be_t sport, 158 uint16_be_t dport, uint64_t seq) 159 { 160 hdr->h_flags = 0; 161 hdr->h_sport = sport; 162 hdr->h_dport = dport; 163 hdr->h_sequence = htonll(seq); 164 hdr->h_exthdr[0] = RDSV3_EXTHDR_NONE; 165 } 166 167 int 168 rdsv3_message_add_extension(struct rdsv3_header *hdr, 169 unsigned int type, const void *data, unsigned int len) 170 { 171 unsigned int ext_len = sizeof (uint8_t) + len; 172 unsigned char *dst; 173 174 RDSV3_DPRINTF4("rdsv3_message_add_extension", "Enter"); 175 176 /* For now, refuse to add more than one extension header */ 177 if (hdr->h_exthdr[0] != RDSV3_EXTHDR_NONE) 178 return (0); 179 180 if (type >= __RDSV3_EXTHDR_MAX || 181 len != rdsv3_exthdr_size[type]) 182 return (0); 183 184 if (ext_len >= RDSV3_HEADER_EXT_SPACE) 185 return (0); 186 dst = hdr->h_exthdr; 187 188 *dst++ = type; 189 (void) memcpy(dst, data, len); 190 191 dst[len] = RDSV3_EXTHDR_NONE; 192 193 RDSV3_DPRINTF4("rdsv3_message_add_extension", "Return"); 194 return (1); 195 } 196 197 /* 198 * If a message has extension headers, retrieve them here. 199 * Call like this: 200 * 201 * unsigned int pos = 0; 202 * 203 * while (1) { 204 * buflen = sizeof(buffer); 205 * type = rdsv3_message_next_extension(hdr, &pos, buffer, &buflen); 206 * if (type == RDSV3_EXTHDR_NONE) 207 * break; 208 * ... 209 * } 210 */ 211 int 212 rdsv3_message_next_extension(struct rdsv3_header *hdr, 213 unsigned int *pos, void *buf, unsigned int *buflen) 214 { 215 unsigned int offset, ext_type, ext_len; 216 uint8_t *src = hdr->h_exthdr; 217 218 RDSV3_DPRINTF4("rdsv3_message_next_extension", "Enter"); 219 220 offset = *pos; 221 if (offset >= RDSV3_HEADER_EXT_SPACE) 222 goto none; 223 224 /* 225 * Get the extension type and length. For now, the 226 * length is implied by the extension type. 227 */ 228 ext_type = src[offset++]; 229 230 if (ext_type == RDSV3_EXTHDR_NONE || ext_type >= __RDSV3_EXTHDR_MAX) 231 goto none; 232 ext_len = rdsv3_exthdr_size[ext_type]; 233 if (offset + ext_len > RDSV3_HEADER_EXT_SPACE) 234 goto none; 235 236 *pos = offset + ext_len; 237 if (ext_len < *buflen) 238 *buflen = ext_len; 239 (void) memcpy(buf, src + offset, *buflen); 240 return (ext_type); 241 242 none: 243 *pos = RDSV3_HEADER_EXT_SPACE; 244 *buflen = 0; 245 return (RDSV3_EXTHDR_NONE); 246 } 247 248 int 249 rdsv3_message_add_version_extension(struct rdsv3_header *hdr, 250 unsigned int version) 251 { 252 struct rdsv3_ext_header_version ext_hdr; 253 254 ext_hdr.h_version = htonl(version); 255 return (rdsv3_message_add_extension(hdr, RDSV3_EXTHDR_VERSION, 256 &ext_hdr, sizeof (ext_hdr))); 257 } 258 259 int 260 rdsv3_message_get_version_extension(struct rdsv3_header *hdr, 261 unsigned int *version) 262 { 263 struct rdsv3_ext_header_version ext_hdr; 264 unsigned int pos = 0, len = sizeof (ext_hdr); 265 266 RDSV3_DPRINTF4("rdsv3_message_get_version_extension", "Enter"); 267 268 /* 269 * We assume the version extension is the only one present 270 */ 271 if (rdsv3_message_next_extension(hdr, &pos, &ext_hdr, &len) != 272 RDSV3_EXTHDR_VERSION) 273 return (0); 274 *version = ntohl(ext_hdr.h_version); 275 return (1); 276 } 277 278 int 279 rdsv3_message_add_rdma_dest_extension(struct rdsv3_header *hdr, uint32_t r_key, 280 uint32_t offset) 281 { 282 struct rdsv3_ext_header_rdma_dest ext_hdr; 283 284 ext_hdr.h_rdma_rkey = htonl(r_key); 285 ext_hdr.h_rdma_offset = htonl(offset); 286 return (rdsv3_message_add_extension(hdr, RDSV3_EXTHDR_RDMA_DEST, 287 &ext_hdr, sizeof (ext_hdr))); 288 } 289 290 struct rdsv3_message * 291 rdsv3_message_alloc(unsigned int nents, int gfp) 292 { 293 struct rdsv3_message *rm; 294 295 RDSV3_DPRINTF4("rdsv3_message_alloc", "Enter(nents: %d)", nents); 296 297 rm = kmem_zalloc(sizeof (struct rdsv3_message) + 298 (nents * sizeof (struct rdsv3_scatterlist)), gfp); 299 if (!rm) 300 goto out; 301 302 rm->m_refcount = 1; 303 list_link_init(&rm->m_sock_item); 304 list_link_init(&rm->m_conn_item); 305 mutex_init(&rm->m_rs_lock, NULL, MUTEX_DRIVER, NULL); 306 rdsv3_init_waitqueue(&rm->m_flush_wait); 307 308 RDSV3_DPRINTF4("rdsv3_message_alloc", "Return(rm: %p)", rm); 309 out: 310 return (rm); 311 } 312 313 struct rdsv3_message * 314 rdsv3_message_map_pages(unsigned long *page_addrs, unsigned int total_len) 315 { 316 struct rdsv3_message *rm; 317 unsigned int i; 318 319 RDSV3_DPRINTF4("rdsv3_message_map_pages", "Enter(len: %d)", total_len); 320 321 #ifndef __lock_lint 322 rm = rdsv3_message_alloc(ceil(total_len, PAGE_SIZE), KM_NOSLEEP); 323 #else 324 rm = NULL; 325 #endif 326 if (rm == NULL) 327 return (ERR_PTR(-ENOMEM)); 328 329 set_bit(RDSV3_MSG_PAGEVEC, &rm->m_flags); 330 rm->m_inc.i_hdr.h_len = htonl(total_len); 331 #ifndef __lock_lint 332 rm->m_nents = ceil(total_len, PAGE_SIZE); 333 #else 334 rm->m_nents = 0; 335 #endif 336 337 for (i = 0; i < rm->m_nents; ++i) { 338 rdsv3_sg_set_page(&rm->m_sg[i], 339 page_addrs[i], 340 PAGE_SIZE, 0); 341 } 342 343 return (rm); 344 } 345 346 struct rdsv3_message * 347 rdsv3_message_copy_from_user(struct uio *uiop, 348 size_t total_len) 349 { 350 struct rdsv3_message *rm; 351 struct rdsv3_scatterlist *sg; 352 int ret; 353 354 RDSV3_DPRINTF4("rdsv3_message_copy_from_user", "Enter: %d", total_len); 355 356 #ifndef __lock_lint 357 rm = rdsv3_message_alloc(ceil(total_len, PAGE_SIZE), KM_NOSLEEP); 358 #else 359 rm = NULL; 360 #endif 361 if (rm == NULL) { 362 ret = -ENOMEM; 363 goto out; 364 } 365 366 rm->m_inc.i_hdr.h_len = htonl(total_len); 367 368 /* 369 * now allocate and copy in the data payload. 370 */ 371 sg = rm->m_sg; 372 373 while (total_len) { 374 if (rdsv3_sg_page(sg) == NULL) { 375 ret = rdsv3_page_remainder_alloc(sg, total_len, 0); 376 if (ret) 377 goto out; 378 rm->m_nents++; 379 } 380 381 ret = uiomove(rdsv3_sg_page(sg), rdsv3_sg_len(sg), UIO_WRITE, 382 uiop); 383 if (ret) { 384 RDSV3_DPRINTF2("rdsv3_message_copy_from_user", 385 "uiomove failed"); 386 ret = -ret; 387 goto out; 388 } 389 390 total_len -= rdsv3_sg_len(sg); 391 sg++; 392 } 393 ret = 0; 394 out: 395 if (ret) { 396 if (rm) 397 rdsv3_message_put(rm); 398 rm = ERR_PTR(ret); 399 } 400 return (rm); 401 } 402 403 int 404 rdsv3_message_inc_copy_to_user(struct rdsv3_incoming *inc, 405 uio_t *uiop, size_t size) 406 { 407 struct rdsv3_message *rm; 408 struct rdsv3_scatterlist *sg; 409 unsigned long to_copy; 410 unsigned long vec_off; 411 int copied; 412 int ret; 413 uint32_t len; 414 415 rm = container_of(inc, struct rdsv3_message, m_inc); 416 len = ntohl(rm->m_inc.i_hdr.h_len); 417 418 RDSV3_DPRINTF4("rdsv3_message_inc_copy_to_user", 419 "Enter(rm: %p, len: %d)", rm, len); 420 421 sg = rm->m_sg; 422 vec_off = 0; 423 copied = 0; 424 425 while (copied < size && copied < len) { 426 427 to_copy = min(len - copied, sg->length - vec_off); 428 to_copy = min(size - copied, to_copy); 429 430 RDSV3_DPRINTF5("rdsv3_message_inc_copy_to_user", 431 "copying %lu bytes to user iov %p from sg [%p, %u] + %lu\n", 432 to_copy, uiop, 433 rdsv3_sg_page(sg), sg->length, vec_off); 434 435 ret = uiomove(rdsv3_sg_page(sg), to_copy, UIO_READ, uiop); 436 if (ret) 437 break; 438 439 vec_off += to_copy; 440 copied += to_copy; 441 442 if (vec_off == sg->length) { 443 vec_off = 0; 444 sg++; 445 } 446 } 447 448 return (copied); 449 } 450 451 /* 452 * If the message is still on the send queue, wait until the transport 453 * is done with it. This is particularly important for RDMA operations. 454 */ 455 /* ARGSUSED */ 456 void 457 rdsv3_message_wait(struct rdsv3_message *rm) 458 { 459 rdsv3_wait_event(&rm->m_flush_wait, 460 !test_bit(RDSV3_MSG_MAPPED, &rm->m_flags)); 461 } 462 463 void 464 rdsv3_message_unmapped(struct rdsv3_message *rm) 465 { 466 clear_bit(RDSV3_MSG_MAPPED, &rm->m_flags); 467 rdsv3_wake_up_all(&rm->m_flush_wait); 468 } 469