1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/ddi.h> 28 #include <sys/types.h> 29 #include <sys/socket.h> 30 #include <netinet/in.h> 31 #include <sys/sunddi.h> 32 #include <sys/sysmacros.h> 33 #include <sys/ib/ibtl/ibti.h> 34 #include <sys/ib/ibtl/ibtl_types.h> 35 36 #include <sys/ib/clients/iser/iser.h> 37 38 /* 39 * iser_resource.c 40 * Routines for allocating resources for iSER 41 */ 42 43 static iser_mr_t *iser_vmem_chunk_alloc(iser_hca_t *hca, ib_memlen_t chunksize, 44 ibt_mr_flags_t mr_flags); 45 46 static void iser_vmem_chunk_free(iser_hca_t *hca, iser_mr_t *iser_mr); 47 48 static iser_mr_t *iser_reg_mem(iser_hca_t *hca, ib_vaddr_t vaddr, 49 ib_memlen_t len, ibt_mr_flags_t mr_flags); 50 51 static void iser_dereg_mem(iser_hca_t *hca, iser_mr_t *mr); 52 53 static int iser_vmem_mr_compare(const void *void_mr1, const void *void_mr2); 54 55 /* 56 * iser_init_hca_caches() 57 * Invoked per HCA instance initialization, to establish HCA-wide 58 * message and buffer kmem caches. Note we'll uniquify cache names 59 * with the lower 32-bits of the HCA GUID. 60 */ 61 void 62 iser_init_hca_caches(iser_hca_t *hca) 63 { 64 char name[ISER_CACHE_NAMELEN]; 65 66 (void) snprintf(name, ISER_CACHE_NAMELEN, "iser_msg_pool_%08x", 67 (uint32_t)(hca->hca_guid & 0xFFFFFFFF)); 68 hca->hca_msg_pool = iser_vmem_create(name, hca, ISER_MSG_MR_CHUNKSIZE, 69 ISER_MSG_POOL_MAX, ISER_MSG_MR_FLAGS); 70 (void) snprintf(name, ISER_CACHE_NAMELEN, "iser_msg_cache_%08x", 71 (uint32_t)(hca->hca_guid & 0xFFFFFFFF)); 72 hca->iser_msg_cache = kmem_cache_create(name, sizeof (iser_msg_t), 73 0, &iser_msg_cache_constructor, &iser_msg_cache_destructor, 74 NULL, hca, NULL, KM_SLEEP); 75 76 (void) snprintf(name, ISER_CACHE_NAMELEN, "iser_buf_pool_%08x", 77 (uint32_t)(hca->hca_guid & 0xFFFFFFFF)); 78 hca->hca_buf_pool = iser_vmem_create(name, hca, ISER_BUF_MR_CHUNKSIZE, 79 ISER_BUF_POOL_MAX, ISER_BUF_MR_FLAGS); 80 (void) snprintf(name, ISER_CACHE_NAMELEN, "iser_buf_cache_%08x", 81 (uint32_t)(hca->hca_guid & 0xFFFFFFFF)); 82 hca->iser_buf_cache = kmem_cache_create(name, sizeof (iser_buf_t), 83 0, &iser_buf_cache_constructor, &iser_buf_cache_destructor, 84 NULL, hca, NULL, KM_SLEEP); 85 } 86 87 /* 88 * iser_fini_hca_caches() 89 * Invoked per HCA instance teardown, this routine cleans up the 90 * message and buffer handle caches. 91 */ 92 void 93 iser_fini_hca_caches(iser_hca_t *hca) 94 { 95 kmem_cache_destroy(hca->iser_buf_cache); 96 iser_vmem_destroy(hca->hca_buf_pool); 97 kmem_cache_destroy(hca->iser_msg_cache); 98 iser_vmem_destroy(hca->hca_msg_pool); 99 } 100 101 /* 102 * Allocate and initialize an iSER WR handle 103 */ 104 iser_wr_t * 105 iser_wr_get() 106 { 107 iser_wr_t *iser_wr; 108 109 iser_wr = kmem_cache_alloc(iser_state->iser_wr_cache, KM_NOSLEEP); 110 if (iser_wr != NULL) { 111 iser_wr->iw_type = ISER_WR_UNDEFINED; 112 iser_wr->iw_msg = NULL; 113 iser_wr->iw_buf = NULL; 114 iser_wr->iw_pdu = NULL; 115 } 116 117 return (iser_wr); 118 } 119 120 /* 121 * Free an iSER WR handle back to the global cache 122 */ 123 void 124 iser_wr_free(iser_wr_t *iser_wr) 125 { 126 kmem_cache_free(iser_state->iser_wr_cache, iser_wr); 127 } 128 129 /* 130 * iser_msg_cache_constructor() 131 * Allocate and register memory for an iSER Control-type PDU message. 132 * The cached objects will retain this memory registration in the HCA, 133 * and thus provide a cache of pre-allocated and registered messages 134 * for use in iSER. 135 */ 136 /* ARGSUSED */ 137 int 138 iser_msg_cache_constructor(void *msg_void, void *arg, int flags) 139 { 140 void *memp = NULL; 141 int status; 142 iser_msg_t *msg = (iser_msg_t *)msg_void; 143 iser_hca_t *hca = (iser_hca_t *)arg; 144 iser_mr_t mr; 145 146 memp = iser_vmem_alloc(hca->hca_msg_pool, ISER_MAX_CTRLPDU_LEN); 147 if (memp == NULL) { 148 ISER_LOG(CE_NOTE, "iser_msg_cache_constructor: " 149 "failed to allocate backing memory"); 150 return (DDI_FAILURE); 151 } 152 153 /* Fill in iser_mr for the memory we just allocated */ 154 status = iser_vmem_mr(hca->hca_msg_pool, memp, 155 ISER_MAX_CTRLPDU_LEN, &mr); 156 if (status != IDM_STATUS_SUCCESS) { 157 ISER_LOG(CE_NOTE, "iser_msg_cache_constructor: " 158 "couldn't find mr for %p", memp); 159 iser_vmem_free(hca->hca_msg_pool, memp, ISER_MAX_CTRLPDU_LEN); 160 return (DDI_FAILURE); 161 } 162 163 msg->msg_ds.ds_va = (ib_vaddr_t)(uintptr_t)memp; 164 msg->msg_ds.ds_key = mr.is_mrlkey; 165 166 /* Set a backpointer to this cache to save a lookup on free */ 167 msg->cache = hca->iser_msg_cache; 168 169 return (DDI_SUCCESS); 170 } 171 172 /* 173 * Deregister and free registered memory from an iser_msg_t handle. 174 */ 175 void 176 iser_msg_cache_destructor(void *mr, void *arg) 177 { 178 iser_msg_t *msg = (iser_msg_t *)mr; 179 iser_hca_t *hca = (iser_hca_t *)arg; 180 uint8_t *memp; 181 182 memp = (uint8_t *)(uintptr_t)(ib_vaddr_t)msg->msg_ds.ds_va; 183 iser_vmem_free(hca->hca_msg_pool, memp, ISER_MAX_CTRLPDU_LEN); 184 } 185 186 /* 187 * Pull a msg handle off of hca's msg cache. If no object is available 188 * on the cache, a new message buffer will be allocated and registered 189 * with the HCA. Once freed, this message will not be unregistered, thus 190 * building up a cache of pre-allocated and registered message buffers 191 * over time. 192 */ 193 iser_msg_t * 194 iser_msg_get(iser_hca_t *hca, int num, int *ret) 195 { 196 iser_msg_t *tmp, *msg = NULL; 197 int i; 198 199 ASSERT(hca != NULL); 200 201 /* 202 * Pull num number of message handles off the cache, linking 203 * them if more than one have been requested. 204 */ 205 for (i = 0; i < num; i++) { 206 tmp = kmem_cache_alloc(hca->iser_msg_cache, KM_NOSLEEP); 207 if (tmp == NULL) { 208 ISER_LOG(CE_NOTE, "iser_msg_get: alloc failed, " 209 "requested (%d) allocated (%d)", num, i); 210 break; 211 } 212 tmp->msg_ds.ds_len = ISER_MAX_CTRLPDU_LEN; 213 tmp->nextp = msg; 214 msg = tmp; 215 } 216 217 if (ret != NULL) { 218 *ret = i; 219 } 220 221 return (msg); 222 } 223 224 /* 225 * Free this msg back to its cache, leaving the memory contained by 226 * it registered for later re-use. 227 */ 228 void 229 iser_msg_free(iser_msg_t *msg) 230 { 231 kmem_cache_free(msg->cache, msg); 232 } 233 234 /* 235 * iser_buf_cache_constructor() 236 * Allocate and register memory for an iSER RDMA operation. The cached 237 * objects will retain this memory registration in the HCA, and thus 238 * provide a cache of pre-allocated and registered messages for use in 239 * iSER. 240 */ 241 /* ARGSUSED */ 242 int 243 iser_buf_cache_constructor(void *mr, void *arg, int flags) 244 { 245 uint8_t *memp; 246 idm_status_t status; 247 iser_buf_t *iser_buf = (iser_buf_t *)mr; 248 iser_hca_t *hca = (iser_hca_t *)arg; 249 250 /* Allocate an iser_mr handle for this buffer */ 251 iser_buf->iser_mr = kmem_zalloc(sizeof (iser_mr_t), KM_NOSLEEP); 252 if (iser_buf->iser_mr == NULL) { 253 ISER_LOG(CE_NOTE, "iser_buf_cache_constructor: " 254 "failed to allocate memory for iser_mr handle"); 255 return (DDI_FAILURE); 256 } 257 258 memp = iser_vmem_alloc(hca->hca_buf_pool, ISER_DEFAULT_BUFLEN); 259 if (memp == NULL) { 260 kmem_free(iser_buf->iser_mr, sizeof (iser_mr_t)); 261 return (DDI_FAILURE); 262 } 263 264 /* Fill in iser_mr for the memory we just allocated */ 265 status = iser_vmem_mr(hca->hca_buf_pool, memp, ISER_DEFAULT_BUFLEN, 266 iser_buf->iser_mr); 267 268 if (status != IDM_STATUS_SUCCESS) { 269 return (DDI_FAILURE); 270 } 271 272 /* Set buf pointer and len for later manipulation (if necessary) */ 273 iser_buf->buf = (uint64_t *)(uintptr_t)memp; 274 iser_buf->buflen = ISER_DEFAULT_BUFLEN; 275 276 /* Populate the SGE Vaddr and L_key for the xfer operation later */ 277 iser_buf->buf_ds.ds_va = iser_buf->iser_mr->is_mrva; 278 iser_buf->buf_ds.ds_key = iser_buf->iser_mr->is_mrlkey; 279 280 /* Set a backpointer to this cache to save a lookup on free */ 281 iser_buf->cache = hca->iser_buf_cache; 282 283 gethrestime(&iser_buf->buf_constructed); 284 285 return (DDI_SUCCESS); 286 } 287 288 /* 289 * Deregister and free registered memory from an iser_buf_t handle. 290 */ 291 void 292 iser_buf_cache_destructor(void *mr, void *arg) 293 { 294 iser_buf_t *iser_buf = (iser_buf_t *)mr; 295 iser_hca_t *hca = (iser_hca_t *)arg; 296 297 gethrestime(&iser_buf->buf_destructed); 298 299 iser_vmem_free(hca->hca_buf_pool, iser_buf->buf, iser_buf->buflen); 300 301 kmem_free(iser_buf->iser_mr, sizeof (iser_mr_t)); 302 } 303 304 /* 305 * Registration for initiator buffers 306 */ 307 int 308 iser_reg_rdma_mem(iser_hca_t *hca, idm_buf_t *idb) 309 { 310 iser_mr_t *iser_mr = NULL; 311 312 ASSERT(idb != NULL); 313 ASSERT(idb->idb_buflen > 0); 314 315 iser_mr = iser_reg_mem(hca, (ib_vaddr_t)(uintptr_t)idb->idb_buf, 316 idb->idb_buflen, ISER_BUF_MR_FLAGS | IBT_MR_NOSLEEP); 317 if (iser_mr == NULL) { 318 ISER_LOG(CE_NOTE, "iser_reg_rdma_mem: failed to register " 319 "memory for idm_buf_t"); 320 return (DDI_FAILURE); 321 } 322 323 idb->idb_reg_private = (void *)iser_mr; 324 325 return (DDI_SUCCESS); 326 } 327 328 void 329 iser_dereg_rdma_mem(iser_hca_t *hca, idm_buf_t *idb) 330 { 331 iser_mr_t *mr; 332 333 ASSERT(idb != NULL); 334 mr = (iser_mr_t *)idb->idb_reg_private; 335 336 iser_dereg_mem(hca, mr); 337 } 338 339 iser_vmem_mr_pool_t * 340 iser_vmem_create(const char *name, iser_hca_t *hca, ib_memlen_t chunksize, 341 uint64_t max_total_size, ibt_mr_flags_t arena_mr_flags) 342 { 343 iser_mr_t *first_chunk; 344 iser_vmem_mr_pool_t *result; 345 346 ASSERT(chunksize <= max_total_size); 347 result = kmem_zalloc(sizeof (*result), KM_SLEEP); 348 result->ivmp_hca = hca; 349 result->ivmp_mr_flags = arena_mr_flags; 350 result->ivmp_chunksize = chunksize; 351 result->ivmp_max_total_size = max_total_size; 352 mutex_init(&result->ivmp_mutex, NULL, MUTEX_DRIVER, NULL); 353 avl_create(&result->ivmp_mr_list, iser_vmem_mr_compare, 354 sizeof (iser_mr_t), offsetof(iser_mr_t, is_avl_ln)); 355 356 first_chunk = iser_vmem_chunk_alloc(hca, chunksize, 357 arena_mr_flags | IBT_MR_SLEEP); 358 359 avl_add(&result->ivmp_mr_list, first_chunk); 360 result->ivmp_total_size += chunksize; 361 362 result->ivmp_vmem = vmem_create(name, 363 (void *)(uintptr_t)first_chunk->is_mrva, 364 (size_t)first_chunk->is_mrlen, ISER_MR_QUANTSIZE, 365 NULL, NULL, NULL, 0, VM_SLEEP); 366 367 return (result); 368 } 369 370 void 371 iser_vmem_destroy(iser_vmem_mr_pool_t *vmr_pool) 372 { 373 iser_mr_t *chunk, *next_chunk; 374 375 mutex_enter(&vmr_pool->ivmp_mutex); 376 vmem_destroy(vmr_pool->ivmp_vmem); 377 378 for (chunk = avl_first(&vmr_pool->ivmp_mr_list); chunk != NULL; 379 chunk = next_chunk) { 380 next_chunk = AVL_NEXT(&vmr_pool->ivmp_mr_list, chunk); 381 avl_remove(&vmr_pool->ivmp_mr_list, chunk); 382 iser_vmem_chunk_free(vmr_pool->ivmp_hca, chunk); 383 } 384 mutex_exit(&vmr_pool->ivmp_mutex); 385 386 avl_destroy(&vmr_pool->ivmp_mr_list); 387 mutex_destroy(&vmr_pool->ivmp_mutex); 388 389 kmem_free(vmr_pool, sizeof (*vmr_pool)); 390 } 391 392 void * 393 iser_vmem_alloc(iser_vmem_mr_pool_t *vmr_pool, size_t size) 394 { 395 void *result; 396 iser_mr_t *next_chunk; 397 ib_memlen_t chunk_len; 398 result = vmem_alloc(vmr_pool->ivmp_vmem, size, 399 VM_NOSLEEP | VM_FIRSTFIT); 400 if (result == NULL) { 401 mutex_enter(&vmr_pool->ivmp_mutex); 402 chunk_len = vmr_pool->ivmp_chunksize; 403 if ((vmr_pool->ivmp_total_size + chunk_len) > 404 vmr_pool->ivmp_max_total_size) { 405 /* 406 * Don't go over the pool size limit. We can allocate 407 * partial chunks so it's not always the case that 408 * current_size + chunk_size == max_total_size 409 */ 410 if (vmr_pool->ivmp_total_size >= 411 vmr_pool->ivmp_max_total_size) { 412 mutex_exit(&vmr_pool->ivmp_mutex); 413 return (NULL); 414 } else { 415 chunk_len = vmr_pool->ivmp_max_total_size - 416 vmr_pool->ivmp_total_size; 417 } 418 } 419 next_chunk = iser_vmem_chunk_alloc(vmr_pool->ivmp_hca, 420 chunk_len, vmr_pool->ivmp_mr_flags | IBT_MR_NOSLEEP); 421 if (next_chunk != NULL) { 422 if (vmem_add(vmr_pool->ivmp_vmem, 423 (void *)(uintptr_t)next_chunk->is_mrva, 424 next_chunk->is_mrlen, VM_NOSLEEP) == NULL) { 425 /* Free the chunk we just allocated */ 426 iser_vmem_chunk_free(vmr_pool->ivmp_hca, 427 next_chunk); 428 } else { 429 vmr_pool->ivmp_total_size += 430 next_chunk->is_mrlen; 431 avl_add(&vmr_pool->ivmp_mr_list, next_chunk); 432 } 433 434 result = vmem_alloc(vmr_pool->ivmp_vmem, size, 435 VM_NOSLEEP | VM_FIRSTFIT); 436 } 437 438 mutex_exit(&vmr_pool->ivmp_mutex); 439 } 440 441 return (result); 442 } 443 444 445 void 446 iser_vmem_free(iser_vmem_mr_pool_t *vmr_pool, void *vaddr, size_t size) 447 { 448 vmem_free(vmr_pool->ivmp_vmem, vaddr, size); 449 } 450 451 idm_status_t 452 iser_vmem_mr(iser_vmem_mr_pool_t *vmr_pool, void *vaddr, size_t size, 453 iser_mr_t *mr) 454 { 455 avl_index_t where; 456 ib_vaddr_t mrva = (ib_vaddr_t)(uintptr_t)vaddr; 457 iser_mr_t search_chunk; 458 iser_mr_t *nearest_chunk; 459 ib_vaddr_t chunk_end; 460 461 mutex_enter(&vmr_pool->ivmp_mutex); 462 search_chunk.is_mrva = mrva; 463 nearest_chunk = avl_find(&vmr_pool->ivmp_mr_list, &search_chunk, 464 &where); 465 if (nearest_chunk == NULL) { 466 nearest_chunk = avl_nearest(&vmr_pool->ivmp_mr_list, where, 467 AVL_BEFORE); 468 if (nearest_chunk == NULL) { 469 mutex_exit(&vmr_pool->ivmp_mutex); 470 return (IDM_STATUS_FAIL); 471 } 472 } 473 474 /* See if this chunk contains the specified address range */ 475 ASSERT(nearest_chunk->is_mrva <= mrva); 476 chunk_end = nearest_chunk->is_mrva + nearest_chunk->is_mrlen; 477 if (chunk_end >= mrva + size) { 478 /* Yes, this chunk contains the address range */ 479 mr->is_mrhdl = nearest_chunk->is_mrhdl; 480 mr->is_mrva = mrva; 481 mr->is_mrlen = size; 482 mr->is_mrlkey = nearest_chunk->is_mrlkey; 483 mr->is_mrrkey = nearest_chunk->is_mrrkey; 484 mutex_exit(&vmr_pool->ivmp_mutex); 485 return (IDM_STATUS_SUCCESS); 486 } 487 mutex_exit(&vmr_pool->ivmp_mutex); 488 489 return (IDM_STATUS_FAIL); 490 } 491 492 static iser_mr_t * 493 iser_vmem_chunk_alloc(iser_hca_t *hca, ib_memlen_t chunksize, 494 ibt_mr_flags_t mr_flags) 495 { 496 void *chunk = NULL; 497 iser_mr_t *result = NULL; 498 int km_flags = 0; 499 500 if (mr_flags & IBT_MR_NOSLEEP) 501 km_flags |= KM_NOSLEEP; 502 503 while ((chunk == NULL) && (chunksize >= ISER_MIN_CHUNKSIZE)) { 504 chunk = kmem_alloc(chunksize, km_flags); 505 if (chunk == NULL) { 506 ISER_LOG(CE_NOTE, "iser_vmem_chunk_alloc: " 507 "chunk alloc of %d failed, trying %d", 508 (int)chunksize, (int)(chunksize / 2)); 509 chunksize /= 2; 510 } else { 511 ISER_LOG(CE_NOTE, "iser_vmem_chunk_alloc: " 512 "New chunk %p size %d", chunk, (int)chunksize); 513 } 514 } 515 516 if (chunk != NULL) { 517 result = iser_reg_mem(hca, (ib_vaddr_t)(uintptr_t)chunk, 518 chunksize, mr_flags); 519 if (result == NULL) { 520 ISER_LOG(CE_NOTE, "iser_vmem_chunk_alloc: " 521 "Chunk registration failed"); 522 kmem_free(chunk, chunksize); 523 } 524 } 525 526 return (result); 527 } 528 529 static void 530 iser_vmem_chunk_free(iser_hca_t *hca, iser_mr_t *iser_mr) 531 { 532 void *chunk = (void *)(uintptr_t)iser_mr->is_mrva; 533 ib_memlen_t chunksize = iser_mr->is_mrlen; 534 535 iser_dereg_mem(hca, iser_mr); 536 537 kmem_free(chunk, chunksize); 538 } 539 540 iser_mr_t * 541 iser_reg_mem(iser_hca_t *hca, ib_vaddr_t vaddr, ib_memlen_t len, 542 ibt_mr_flags_t mr_flags) 543 { 544 iser_mr_t *result = NULL; 545 ibt_mr_attr_t mr_attr; 546 ibt_mr_desc_t mr_desc; 547 ibt_status_t status; 548 int km_flags = 0; 549 550 if (mr_flags & IBT_MR_NOSLEEP) 551 mr_flags |= KM_NOSLEEP; 552 553 result = (iser_mr_t *)kmem_zalloc(sizeof (iser_mr_t), km_flags); 554 if (result == NULL) { 555 ISER_LOG(CE_NOTE, "iser_reg_mem: failed to allocate " 556 "memory for iser_mr handle"); 557 return (NULL); 558 } 559 560 bzero(&mr_attr, sizeof (ibt_mr_attr_t)); 561 bzero(&mr_desc, sizeof (ibt_mr_desc_t)); 562 563 mr_attr.mr_vaddr = vaddr; 564 mr_attr.mr_len = len; 565 mr_attr.mr_as = NULL; 566 mr_attr.mr_flags = mr_flags; 567 568 status = ibt_register_mr(hca->hca_hdl, hca->hca_pdhdl, &mr_attr, 569 &result->is_mrhdl, &mr_desc); 570 if (status != IBT_SUCCESS) { 571 ISER_LOG(CE_NOTE, "iser_reg_mem: ibt_register_mr " 572 "failure (%d)", status); 573 kmem_free(result, sizeof (iser_mr_t)); 574 return (NULL); 575 } 576 577 result->is_mrva = mr_attr.mr_vaddr; 578 result->is_mrlen = mr_attr.mr_len; 579 result->is_mrlkey = mr_desc.md_lkey; 580 result->is_mrrkey = mr_desc.md_rkey; 581 582 return (result); 583 } 584 585 void 586 iser_dereg_mem(iser_hca_t *hca, iser_mr_t *mr) 587 { 588 (void) ibt_deregister_mr(hca->hca_hdl, mr->is_mrhdl); 589 kmem_free(mr, sizeof (iser_mr_t)); 590 } 591 592 static int 593 iser_vmem_mr_compare(const void *void_mr1, const void *void_mr2) 594 { 595 iser_mr_t *mr1 = (iser_mr_t *)void_mr1; 596 iser_mr_t *mr2 = (iser_mr_t *)void_mr2; 597 598 /* Sort memory chunks by their virtual address */ 599 if (mr1->is_mrva < mr2->is_mrva) 600 return (-1); 601 else if (mr1->is_mrva > mr2->is_mrva) 602 return (1); 603 604 return (0); 605 } 606