1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * I/O Controller functions for the Solaris COMSTAR SCSI RDMA Protocol 29 * Target (SRPT) port provider. 30 */ 31 32 #include <sys/types.h> 33 #include <sys/ddi.h> 34 #include <sys/types.h> 35 #include <sys/sunddi.h> 36 #include <sys/atomic.h> 37 #include <sys/sysmacros.h> 38 #include <sys/ib/ibtl/ibti.h> 39 40 #include "srp.h" 41 #include "srpt_impl.h" 42 #include "srpt_ioc.h" 43 #include "srpt_stp.h" 44 #include "srpt_ch.h" 45 46 /* 47 * srpt_ioc_srq_size - Tunable parameter that specifies the number 48 * of receive WQ entries that can be posted to the IOC shared 49 * receive queue. 50 */ 51 uint32_t srpt_ioc_srq_size = SRPT_DEFAULT_IOC_SRQ_SIZE; 52 extern uint16_t srpt_send_msg_depth; 53 54 /* IOC profile capabilities mask must be big-endian */ 55 typedef struct srpt_ioc_opcap_bits_s { 56 #if defined(_BIT_FIELDS_LTOH) 57 uint8_t af:1, 58 at:1, 59 wf:1, 60 wt:1, 61 rf:1, 62 rt:1, 63 sf:1, 64 st:1; 65 #elif defined(_BIT_FIELDS_HTOL) 66 uint8_t st:1, 67 sf:1, 68 rt:1, 69 rf:1, 70 wt:1, 71 wf:1, 72 at:1, 73 af:1; 74 #else 75 #error One of _BIT_FIELDS_LTOH or _BIT_FIELDS_HTOL must be defined 76 #endif 77 } srpt_ioc_opcap_bits_t; 78 79 typedef union { 80 srpt_ioc_opcap_bits_t bits; 81 uint8_t mask; 82 } srpt_ioc_opcap_mask_t; 83 84 /* 85 * vmem arena variables - values derived from iSER 86 */ 87 #define SRPT_MR_QUANTSIZE 0x400 /* 1K */ 88 #define SRPT_MIN_CHUNKSIZE 0x100000 /* 1MB */ 89 90 /* use less memory on 32-bit kernels as it's much more constrained */ 91 #ifdef _LP64 92 #define SRPT_BUF_MR_CHUNKSIZE 0x1000000 /* 16MB */ 93 #define SRPT_BUF_POOL_MAX 0x40000000 /* 1GB */ 94 #else 95 #define SRPT_BUF_MR_CHUNKSIZE 0x400000 /* 4MB */ 96 #define SRPT_BUF_POOL_MAX 0x4000000 /* 64MB */ 97 #endif 98 99 static ibt_mr_flags_t srpt_dbuf_mr_flags = 100 IBT_MR_ENABLE_LOCAL_WRITE | IBT_MR_ENABLE_REMOTE_WRITE | 101 IBT_MR_ENABLE_REMOTE_READ; 102 103 void srpt_ioc_ib_async_hdlr(void *clnt, ibt_hca_hdl_t hdl, 104 ibt_async_code_t code, ibt_async_event_t *event); 105 106 static struct ibt_clnt_modinfo_s srpt_ibt_modinfo = { 107 IBTI_V_CURR, 108 IBT_STORAGE_DEV, 109 srpt_ioc_ib_async_hdlr, 110 NULL, 111 "srpt" 112 }; 113 114 static srpt_ioc_t *srpt_ioc_init(ib_guid_t guid); 115 static void srpt_ioc_fini(srpt_ioc_t *ioc); 116 117 static srpt_vmem_pool_t *srpt_vmem_create(const char *name, srpt_ioc_t *ioc, 118 ib_memlen_t chunksize, uint64_t maxsize, ibt_mr_flags_t flags); 119 static void *srpt_vmem_alloc(srpt_vmem_pool_t *vm_pool, size_t size); 120 static int srpt_vmem_mr_compare(const void *a, const void *b); 121 static srpt_mr_t *srpt_vmem_chunk_alloc(srpt_vmem_pool_t *ioc, 122 ib_memlen_t chunksize); 123 static void srpt_vmem_destroy(srpt_vmem_pool_t *vm_pool); 124 static void srpt_vmem_free(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size); 125 static srpt_mr_t *srpt_reg_mem(srpt_vmem_pool_t *vm_pool, ib_vaddr_t vaddr, 126 ib_memlen_t len); 127 static void srpt_vmem_chunk_free(srpt_vmem_pool_t *vm_pool, srpt_mr_t *mr); 128 static void srpt_dereg_mem(srpt_ioc_t *ioc, srpt_mr_t *mr); 129 static int srpt_vmem_mr(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size, 130 srpt_mr_t *mr); 131 132 /* 133 * srpt_ioc_attach() - I/O Controller attach 134 * 135 * Attach to IBTF and initialize I/O controllers. The srpt_ctxt->sc_rwlock 136 * should be held outside of this call. 137 */ 138 int 139 srpt_ioc_attach() 140 { 141 int status; 142 int hca_cnt; 143 int hca_ndx; 144 ib_guid_t *guid; 145 srpt_ioc_t *ioc; 146 147 ASSERT(srpt_ctxt != NULL); 148 149 /* 150 * Attach to IBTF and initialize a list of IB devices. Each 151 * HCA will be represented by an I/O Controller. 152 */ 153 status = ibt_attach(&srpt_ibt_modinfo, srpt_ctxt->sc_dip, 154 srpt_ctxt, &srpt_ctxt->sc_ibt_hdl); 155 if (status != DDI_SUCCESS) { 156 SRPT_DPRINTF_L1("ioc_attach, ibt_attach failed (0x%x)", 157 status); 158 return (DDI_FAILURE); 159 } 160 161 hca_cnt = ibt_get_hca_list(&guid); 162 if (hca_cnt < 1) { 163 SRPT_DPRINTF_L2("ioc_attach, no HCA found"); 164 ibt_detach(srpt_ctxt->sc_ibt_hdl); 165 srpt_ctxt->sc_ibt_hdl = NULL; 166 return (DDI_FAILURE); 167 } 168 169 list_create(&srpt_ctxt->sc_ioc_list, sizeof (srpt_ioc_t), 170 offsetof(srpt_ioc_t, ioc_node)); 171 172 for (hca_ndx = 0; hca_ndx < hca_cnt; hca_ndx++) { 173 SRPT_DPRINTF_L2("ioc_attach, adding I/O" 174 " Controller (%016llx)", (u_longlong_t)guid[hca_ndx]); 175 176 ioc = srpt_ioc_init(guid[hca_ndx]); 177 if (ioc == NULL) { 178 SRPT_DPRINTF_L1("ioc_attach, ioc_init GUID(%016llx)" 179 " failed", (u_longlong_t)guid[hca_ndx]); 180 continue; 181 } 182 list_insert_tail(&srpt_ctxt->sc_ioc_list, ioc); 183 SRPT_DPRINTF_L2("ioc_attach, I/O Controller ibt HCA hdl (%p)", 184 (void *)ioc->ioc_ibt_hdl); 185 srpt_ctxt->sc_num_iocs++; 186 } 187 188 ibt_free_hca_list(guid, hca_cnt); 189 SRPT_DPRINTF_L3("ioc_attach, added %d I/O Controller(s)", 190 srpt_ctxt->sc_num_iocs); 191 return (DDI_SUCCESS); 192 } 193 194 /* 195 * srpt_ioc_detach() - I/O Controller detach 196 * 197 * srpt_ctxt->sc_rwlock should be held outside of this call. 198 */ 199 void 200 srpt_ioc_detach() 201 { 202 srpt_ioc_t *ioc; 203 204 ASSERT(srpt_ctxt != NULL); 205 206 while ((ioc = list_head(&srpt_ctxt->sc_ioc_list)) != NULL) { 207 list_remove(&srpt_ctxt->sc_ioc_list, ioc); 208 SRPT_DPRINTF_L2("ioc_detach, removing I/O Controller(%p)" 209 " (%016llx), ibt_hdl(%p)", 210 (void *)ioc, 211 ioc ? (u_longlong_t)ioc->ioc_guid : 0x0ll, 212 (void *)ioc->ioc_ibt_hdl); 213 srpt_ioc_fini(ioc); 214 } 215 216 list_destroy(&srpt_ctxt->sc_ioc_list); 217 218 ibt_detach(srpt_ctxt->sc_ibt_hdl); 219 srpt_ctxt->sc_ibt_hdl = NULL; 220 } 221 222 /* 223 * srpt_ioc_init() - I/O Controller initialization 224 * 225 * Requires srpt_ctxt->rw_lock be held outside of call. 226 */ 227 static srpt_ioc_t * 228 srpt_ioc_init(ib_guid_t guid) 229 { 230 ibt_status_t status; 231 srpt_ioc_t *ioc; 232 ibt_hca_attr_t hca_attr; 233 uint_t iu_ndx; 234 uint_t err_ndx; 235 ibt_mr_attr_t mr_attr; 236 ibt_mr_desc_t mr_desc; 237 srpt_iu_t *iu; 238 ibt_srq_sizes_t srq_attr; 239 char namebuf[32]; 240 size_t iu_offset; 241 242 status = ibt_query_hca_byguid(guid, &hca_attr); 243 if (status != IBT_SUCCESS) { 244 SRPT_DPRINTF_L1("ioc_init, HCA query error (%d)", 245 status); 246 return (NULL); 247 } 248 249 ioc = srpt_ioc_get_locked(guid); 250 if (ioc != NULL) { 251 SRPT_DPRINTF_L1("ioc_init, HCA already exists"); 252 return (NULL); 253 } 254 255 ioc = kmem_zalloc(sizeof (srpt_ioc_t), KM_SLEEP); 256 257 rw_init(&ioc->ioc_rwlock, NULL, RW_DRIVER, NULL); 258 rw_enter(&ioc->ioc_rwlock, RW_WRITER); 259 260 bcopy(&hca_attr, &ioc->ioc_attr, sizeof (ibt_hca_attr_t)); 261 262 SRPT_DPRINTF_L2("ioc_init, HCA max mr=%d, mrlen=%lld", 263 hca_attr.hca_max_memr, (u_longlong_t)hca_attr.hca_max_memr_len); 264 ioc->ioc_guid = guid; 265 266 status = ibt_open_hca(srpt_ctxt->sc_ibt_hdl, guid, &ioc->ioc_ibt_hdl); 267 if (status != IBT_SUCCESS) { 268 SRPT_DPRINTF_L1("ioc_init, IBT open failed (%d)", status); 269 goto hca_open_err; 270 } 271 272 status = ibt_alloc_pd(ioc->ioc_ibt_hdl, IBT_PD_NO_FLAGS, 273 &ioc->ioc_pd_hdl); 274 if (status != IBT_SUCCESS) { 275 SRPT_DPRINTF_L1("ioc_init, IBT create PD failed (%d)", status); 276 goto pd_alloc_err; 277 } 278 279 /* 280 * We require hardware support for SRQs. We use a common SRQ to 281 * reduce channel memory consumption. 282 */ 283 if ((ioc->ioc_attr.hca_flags & IBT_HCA_SRQ) == 0) { 284 SRPT_DPRINTF_L0("ioc_init, no SRQ capability, not supported"); 285 goto srq_alloc_err; 286 } 287 288 SRPT_DPRINTF_L3("ioc_init, Using shared receive queues, max srq work" 289 " queue size(%d), def size = %d", ioc->ioc_attr.hca_max_srqs_sz, 290 srpt_ioc_srq_size); 291 srq_attr.srq_wr_sz = min(srpt_ioc_srq_size, 292 ioc->ioc_attr.hca_max_srqs_sz); 293 srq_attr.srq_sgl_sz = 1; 294 295 status = ibt_alloc_srq(ioc->ioc_ibt_hdl, IBT_SRQ_NO_FLAGS, 296 ioc->ioc_pd_hdl, &srq_attr, &ioc->ioc_srq_hdl, 297 &ioc->ioc_srq_attr); 298 if (status != IBT_SUCCESS) { 299 SRPT_DPRINTF_L1("ioc_init, IBT create SRQ failed(%d)", status); 300 goto srq_alloc_err; 301 } 302 303 SRPT_DPRINTF_L2("ioc_init, SRQ WR size(%d), SG size(%d)", 304 ioc->ioc_srq_attr.srq_wr_sz, ioc->ioc_srq_attr.srq_sgl_sz); 305 306 ibt_set_srq_private(ioc->ioc_srq_hdl, ioc); 307 308 /* 309 * Allocate a pool of SRP IU message buffers and post them to 310 * the I/O Controller SRQ. We let the SRQ manage the free IU 311 * messages. 312 */ 313 ioc->ioc_num_iu_entries = 314 min(srq_attr.srq_wr_sz, srpt_ioc_srq_size) - 1; 315 316 ioc->ioc_iu_pool = kmem_zalloc(sizeof (srpt_iu_t) * 317 ioc->ioc_num_iu_entries, KM_SLEEP); 318 319 ioc->ioc_iu_bufs = kmem_alloc(SRPT_DEFAULT_SEND_MSG_SIZE * 320 ioc->ioc_num_iu_entries, KM_SLEEP); 321 322 if ((ioc->ioc_iu_pool == NULL) || (ioc->ioc_iu_bufs == NULL)) { 323 SRPT_DPRINTF_L1("ioc_init, failed to allocate SRQ IUs"); 324 goto srq_iu_alloc_err; 325 } 326 327 mr_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)ioc->ioc_iu_bufs; 328 mr_attr.mr_len = SRPT_DEFAULT_SEND_MSG_SIZE * ioc->ioc_num_iu_entries; 329 mr_attr.mr_as = NULL; 330 mr_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE; 331 332 status = ibt_register_mr(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl, 333 &mr_attr, &ioc->ioc_iu_mr_hdl, &mr_desc); 334 if (status != IBT_SUCCESS) { 335 SRPT_DPRINTF_L1("ioc_init, IU buffer pool MR err(%d)", 336 status); 337 goto srq_iu_alloc_err; 338 } 339 340 for (iu_ndx = 0, iu = ioc->ioc_iu_pool; iu_ndx < 341 ioc->ioc_num_iu_entries; iu_ndx++, iu++) { 342 343 iu_offset = (iu_ndx * SRPT_DEFAULT_SEND_MSG_SIZE); 344 iu->iu_buf = (void *)((uintptr_t)ioc->ioc_iu_bufs + iu_offset); 345 346 mutex_init(&iu->iu_lock, NULL, MUTEX_DRIVER, NULL); 347 348 iu->iu_sge.ds_va = mr_desc.md_vaddr + iu_offset; 349 iu->iu_sge.ds_key = mr_desc.md_lkey; 350 iu->iu_sge.ds_len = SRPT_DEFAULT_SEND_MSG_SIZE; 351 iu->iu_ioc = ioc; 352 iu->iu_pool_ndx = iu_ndx; 353 354 status = srpt_ioc_post_recv_iu(ioc, &ioc->ioc_iu_pool[iu_ndx]); 355 if (status != IBT_SUCCESS) { 356 SRPT_DPRINTF_L1("ioc_init, SRQ IU post err(%d)", 357 status); 358 goto srq_iu_post_err; 359 } 360 } 361 362 /* 363 * Initialize the dbuf vmem arena 364 */ 365 (void) snprintf(namebuf, sizeof (namebuf), 366 "srpt_buf_pool_%16llX", (u_longlong_t)guid); 367 ioc->ioc_dbuf_pool = srpt_vmem_create(namebuf, ioc, 368 SRPT_BUF_MR_CHUNKSIZE, SRPT_BUF_POOL_MAX, srpt_dbuf_mr_flags); 369 370 if (ioc->ioc_dbuf_pool == NULL) { 371 goto stmf_db_alloc_err; 372 } 373 374 /* 375 * Allocate the I/O Controller STMF data buffer allocator. The 376 * data store will span all targets associated with this IOC. 377 */ 378 ioc->ioc_stmf_ds = stmf_alloc(STMF_STRUCT_DBUF_STORE, 0, 0); 379 if (ioc->ioc_stmf_ds == NULL) { 380 SRPT_DPRINTF_L1("ioc_attach, STMF DBUF alloc failure for IOC"); 381 goto stmf_db_alloc_err; 382 } 383 ioc->ioc_stmf_ds->ds_alloc_data_buf = &srpt_ioc_ds_alloc_dbuf; 384 ioc->ioc_stmf_ds->ds_free_data_buf = &srpt_ioc_ds_free_dbuf; 385 ioc->ioc_stmf_ds->ds_port_private = ioc; 386 387 rw_exit(&ioc->ioc_rwlock); 388 return (ioc); 389 390 stmf_db_alloc_err: 391 if (ioc->ioc_dbuf_pool != NULL) { 392 srpt_vmem_destroy(ioc->ioc_dbuf_pool); 393 } 394 395 srq_iu_post_err: 396 if (ioc->ioc_iu_mr_hdl != NULL) { 397 status = ibt_deregister_mr(ioc->ioc_ibt_hdl, 398 ioc->ioc_iu_mr_hdl); 399 if (status != IBT_SUCCESS) { 400 SRPT_DPRINTF_L1("ioc_init, error deregistering" 401 " memory region (%d)", status); 402 } 403 } 404 for (err_ndx = 0, iu = ioc->ioc_iu_pool; err_ndx < iu_ndx; 405 err_ndx++, iu++) { 406 mutex_destroy(&iu->iu_lock); 407 } 408 409 srq_iu_alloc_err: 410 if (ioc->ioc_iu_bufs != NULL) { 411 kmem_free(ioc->ioc_iu_bufs, SRPT_DEFAULT_SEND_MSG_SIZE * 412 ioc->ioc_num_iu_entries); 413 } 414 if (ioc->ioc_iu_pool != NULL) { 415 kmem_free(ioc->ioc_iu_pool, 416 sizeof (srpt_iu_t) * ioc->ioc_num_iu_entries); 417 } 418 if (ioc->ioc_srq_hdl != NULL) { 419 status = ibt_free_srq(ioc->ioc_srq_hdl); 420 if (status != IBT_SUCCESS) { 421 SRPT_DPRINTF_L1("ioc_init, error freeing SRQ (%d)", 422 status); 423 } 424 425 } 426 427 srq_alloc_err: 428 status = ibt_free_pd(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl); 429 if (status != IBT_SUCCESS) { 430 SRPT_DPRINTF_L1("ioc_init, free PD error (%d)", status); 431 } 432 433 pd_alloc_err: 434 status = ibt_close_hca(ioc->ioc_ibt_hdl); 435 if (status != IBT_SUCCESS) { 436 SRPT_DPRINTF_L1("ioc_init, close ioc error (%d)", status); 437 } 438 439 hca_open_err: 440 rw_exit(&ioc->ioc_rwlock); 441 rw_destroy(&ioc->ioc_rwlock); 442 kmem_free(ioc, sizeof (*ioc)); 443 return (NULL); 444 } 445 446 /* 447 * srpt_ioc_fini() - I/O Controller Cleanup 448 * 449 * Requires srpt_ctxt->sc_rwlock be held outside of call. 450 */ 451 static void 452 srpt_ioc_fini(srpt_ioc_t *ioc) 453 { 454 int status; 455 int ndx; 456 457 /* 458 * Note driver flows will have already taken all SRP 459 * services running on the I/O Controller off-line. 460 */ 461 rw_enter(&ioc->ioc_rwlock, RW_WRITER); 462 if (ioc->ioc_ibt_hdl != NULL) { 463 if (ioc->ioc_stmf_ds != NULL) { 464 stmf_free(ioc->ioc_stmf_ds); 465 } 466 467 if (ioc->ioc_srq_hdl != NULL) { 468 SRPT_DPRINTF_L4("ioc_fini, freeing SRQ"); 469 status = ibt_free_srq(ioc->ioc_srq_hdl); 470 if (status != IBT_SUCCESS) { 471 SRPT_DPRINTF_L1("ioc_fini, free SRQ" 472 " error (%d)", status); 473 } 474 } 475 476 if (ioc->ioc_iu_mr_hdl != NULL) { 477 status = ibt_deregister_mr( 478 ioc->ioc_ibt_hdl, ioc->ioc_iu_mr_hdl); 479 if (status != IBT_SUCCESS) { 480 SRPT_DPRINTF_L1("ioc_fini, error deregistering" 481 " memory region (%d)", status); 482 } 483 } 484 485 if (ioc->ioc_iu_bufs != NULL) { 486 kmem_free(ioc->ioc_iu_bufs, SRPT_DEFAULT_SEND_MSG_SIZE * 487 ioc->ioc_num_iu_entries); 488 } 489 490 if (ioc->ioc_iu_pool != NULL) { 491 SRPT_DPRINTF_L4("ioc_fini, freeing IU entries"); 492 for (ndx = 0; ndx < ioc->ioc_num_iu_entries; ndx++) { 493 mutex_destroy(&ioc->ioc_iu_pool[ndx].iu_lock); 494 } 495 496 SRPT_DPRINTF_L4("ioc_fini, free IU pool struct"); 497 kmem_free(ioc->ioc_iu_pool, 498 sizeof (srpt_iu_t) * (ioc->ioc_num_iu_entries)); 499 ioc->ioc_iu_pool = NULL; 500 ioc->ioc_num_iu_entries = 0; 501 } 502 503 if (ioc->ioc_dbuf_pool != NULL) { 504 srpt_vmem_destroy(ioc->ioc_dbuf_pool); 505 } 506 507 if (ioc->ioc_pd_hdl != NULL) { 508 status = ibt_free_pd(ioc->ioc_ibt_hdl, 509 ioc->ioc_pd_hdl); 510 if (status != IBT_SUCCESS) { 511 SRPT_DPRINTF_L1("ioc_fini, free PD" 512 " error (%d)", status); 513 } 514 } 515 516 status = ibt_close_hca(ioc->ioc_ibt_hdl); 517 if (status != IBT_SUCCESS) { 518 SRPT_DPRINTF_L1( 519 "ioc_fini, close ioc error (%d)", status); 520 } 521 } 522 rw_exit(&ioc->ioc_rwlock); 523 rw_destroy(&ioc->ioc_rwlock); 524 kmem_free(ioc, sizeof (srpt_ioc_t)); 525 } 526 527 /* 528 * srpt_ioc_port_active() - I/O Controller port active 529 */ 530 static void 531 srpt_ioc_port_active(ibt_async_event_t *event) 532 { 533 ibt_status_t status; 534 srpt_ioc_t *ioc; 535 536 ASSERT(event != NULL); 537 538 SRPT_DPRINTF_L3("ioc_port_active event handler, invoked"); 539 540 /* 541 * Find the HCA in question and if the HCA has completed 542 * initialization, and the SRP Target service for the 543 * the I/O Controller exists, then bind this port. 544 */ 545 ioc = srpt_ioc_get(event->ev_hca_guid); 546 547 if (ioc == NULL) { 548 SRPT_DPRINTF_L2("ioc_port_active, I/O Controller not" 549 " active"); 550 return; 551 } 552 553 if (ioc->ioc_tgt_port == NULL) { 554 SRPT_DPRINTF_L2("ioc_port_active, no I/O Controller target" 555 " undefined"); 556 return; 557 } 558 559 560 /* 561 * We take the target lock here to serialize this operation 562 * with any STMF initiated target state transitions. If 563 * SRP is off-line then the service handle is NULL. 564 */ 565 mutex_enter(&ioc->ioc_tgt_port->tp_lock); 566 567 if (ioc->ioc_tgt_port->tp_ibt_svc_hdl != NULL) { 568 status = srpt_ioc_svc_bind(ioc->ioc_tgt_port, event->ev_port); 569 if (status != IBT_SUCCESS && 570 status != IBT_HCA_PORT_NOT_ACTIVE) { 571 SRPT_DPRINTF_L1("ioc_port_active, bind failed (%d)", 572 status); 573 } 574 } 575 mutex_exit(&ioc->ioc_tgt_port->tp_lock); 576 } 577 578 /* 579 * srpt_ioc_port_down() 580 */ 581 static void 582 srpt_ioc_port_down(ibt_async_event_t *event) 583 { 584 srpt_ioc_t *ioc; 585 srpt_target_port_t *tgt; 586 srpt_channel_t *ch; 587 srpt_channel_t *next_ch; 588 589 SRPT_DPRINTF_L3("ioc_port_down event handler, invoked"); 590 591 /* 592 * Find the HCA in question and if the HCA has completed 593 * initialization, and the SRP Target service for the 594 * the I/O Controller exists, then logout initiators 595 * through this port. 596 */ 597 ioc = srpt_ioc_get(event->ev_hca_guid); 598 599 if (ioc == NULL) { 600 SRPT_DPRINTF_L2("ioc_port_down, I/O Controller not" 601 " active"); 602 return; 603 } 604 605 /* 606 * We only have one target now, but we could go through all 607 * SCSI target ports if more are added. 608 */ 609 tgt = ioc->ioc_tgt_port; 610 if (tgt == NULL) { 611 SRPT_DPRINTF_L2("ioc_port_down, no I/O Controller target" 612 " undefined"); 613 return; 614 } 615 mutex_enter(&tgt->tp_lock); 616 617 /* 618 * For all channel's logged in through this port, initiate a 619 * disconnect. 620 */ 621 mutex_enter(&tgt->tp_ch_list_lock); 622 ch = list_head(&tgt->tp_ch_list); 623 while (ch != NULL) { 624 next_ch = list_next(&tgt->tp_ch_list, ch); 625 if (ch->ch_session && (ch->ch_session->ss_hw_port == 626 event->ev_port)) { 627 srpt_ch_disconnect(ch); 628 } 629 ch = next_ch; 630 } 631 mutex_exit(&tgt->tp_ch_list_lock); 632 633 mutex_exit(&tgt->tp_lock); 634 } 635 636 /* 637 * srpt_ioc_ib_async_hdlr - I/O Controller IB asynchronous events 638 */ 639 /* ARGSUSED */ 640 void 641 srpt_ioc_ib_async_hdlr(void *clnt, ibt_hca_hdl_t hdl, 642 ibt_async_code_t code, ibt_async_event_t *event) 643 { 644 srpt_ioc_t *ioc; 645 srpt_channel_t *ch; 646 647 switch (code) { 648 case IBT_EVENT_PORT_UP: 649 srpt_ioc_port_active(event); 650 break; 651 652 case IBT_ERROR_PORT_DOWN: 653 srpt_ioc_port_down(event); 654 break; 655 656 case IBT_HCA_ATTACH_EVENT: 657 rw_enter(&srpt_ctxt->sc_rwlock, RW_WRITER); 658 ioc = srpt_ioc_init(event->ev_hca_guid); 659 660 if (ioc == NULL) { 661 rw_exit(&srpt_ctxt->sc_rwlock); 662 SRPT_DPRINTF_L1("ib_async_hdlr, HCA_ATTACH" 663 " event failed to initialize HCA (0x%016llx)", 664 (u_longlong_t)event->ev_hca_guid); 665 return; 666 } 667 SRPT_DPRINTF_L2("HCA_ATTACH_EVENT: I/O Controller" 668 " ibt hdl (%p)", 669 (void *)ioc->ioc_ibt_hdl); 670 671 rw_enter(&ioc->ioc_rwlock, RW_WRITER); 672 ioc->ioc_tgt_port = srpt_stp_alloc_port(ioc, ioc->ioc_guid); 673 if (ioc->ioc_tgt_port == NULL) { 674 SRPT_DPRINTF_L1("ioc_ib_async_hdlr, alloc SCSI " 675 "target port error for HCA (0x%016llx)", 676 (u_longlong_t)event->ev_hca_guid); 677 rw_exit(&ioc->ioc_rwlock); 678 srpt_ioc_fini(ioc); 679 rw_exit(&srpt_ctxt->sc_rwlock); 680 return; 681 } 682 683 /* 684 * New HCA added with default SCSI Target Port, SRP service 685 * will be started when SCSI Target Port is brought 686 * on-line by STMF. 687 */ 688 srpt_ctxt->sc_num_iocs++; 689 list_insert_tail(&srpt_ctxt->sc_ioc_list, ioc); 690 691 rw_exit(&ioc->ioc_rwlock); 692 rw_exit(&srpt_ctxt->sc_rwlock); 693 break; 694 695 case IBT_HCA_DETACH_EVENT: 696 SRPT_DPRINTF_L1( 697 "ioc_iob_async_hdlr, HCA_DETACH_EVENT received."); 698 break; 699 700 case IBT_EVENT_EMPTY_CHAN: 701 /* Channel in ERROR state is now empty */ 702 ch = (srpt_channel_t *)ibt_get_chan_private(event->ev_chan_hdl); 703 SRPT_DPRINTF_L3( 704 "ioc_iob_async_hdlr, received empty channel error on %p", 705 (void *)ch); 706 break; 707 708 default: 709 SRPT_DPRINTF_L2("ioc_ib_async_hdlr, event not " 710 "handled (%d)", code); 711 break; 712 } 713 } 714 715 /* 716 * srpt_ioc_svc_bind() 717 */ 718 ibt_status_t 719 srpt_ioc_svc_bind(srpt_target_port_t *tgt, uint_t portnum) 720 { 721 ibt_status_t status; 722 srpt_hw_port_t *port; 723 ibt_hca_portinfo_t *portinfo; 724 uint_t qportinfo_sz; 725 uint_t qportnum; 726 ib_gid_t new_gid; 727 srpt_ioc_t *ioc; 728 729 ASSERT(tgt != NULL); 730 ASSERT(tgt->tp_ioc != NULL); 731 ioc = tgt->tp_ioc; 732 733 if (tgt->tp_ibt_svc_hdl == NULL) { 734 SRPT_DPRINTF_L2("ioc_svc_bind, NULL SCSI target port" 735 " service"); 736 return (IBT_INVALID_PARAM); 737 } 738 739 if (portnum == 0 || portnum > tgt->tp_nports) { 740 SRPT_DPRINTF_L2("ioc_svc_bind, bad port (%d)", portnum); 741 return (IBT_INVALID_PARAM); 742 } 743 status = ibt_query_hca_ports(ioc->ioc_ibt_hdl, portnum, 744 &portinfo, &qportnum, &qportinfo_sz); 745 if (status != IBT_SUCCESS) { 746 SRPT_DPRINTF_L1("ioc_svc_bind, query port error (%d)", 747 portnum); 748 return (IBT_INVALID_PARAM); 749 } 750 751 ASSERT(portinfo != NULL); 752 753 /* 754 * If port is not active do nothing, caller should attempt to bind 755 * after the port goes active. 756 */ 757 if (portinfo->p_linkstate != IBT_PORT_ACTIVE) { 758 SRPT_DPRINTF_L2("ioc_svc_bind, port %d not in active state", 759 portnum); 760 ibt_free_portinfo(portinfo, qportinfo_sz); 761 return (IBT_HCA_PORT_NOT_ACTIVE); 762 } 763 764 port = &tgt->tp_hw_port[portnum-1]; 765 new_gid = portinfo->p_sgid_tbl[0]; 766 ibt_free_portinfo(portinfo, qportinfo_sz); 767 768 /* 769 * If previously bound and the port GID has changed, 770 * rebind to the new GID. 771 */ 772 if (port->hwp_bind_hdl != NULL) { 773 if (new_gid.gid_guid != port->hwp_gid.gid_guid || 774 new_gid.gid_prefix != port->hwp_gid.gid_prefix) { 775 SRPT_DPRINTF_L2("ioc_svc_bind, unregister current" 776 " bind"); 777 ibt_unbind_service(tgt->tp_ibt_svc_hdl, 778 port->hwp_bind_hdl); 779 port->hwp_bind_hdl = NULL; 780 } 781 } 782 SRPT_DPRINTF_L2("ioc_svc_bind, bind service, %016llx:%016llx", 783 (u_longlong_t)new_gid.gid_prefix, 784 (u_longlong_t)new_gid.gid_guid); 785 786 /* 787 * Pass SCSI Target Port as CM private data, the target will always 788 * exist while this service is bound. 789 */ 790 status = ibt_bind_service(tgt->tp_ibt_svc_hdl, new_gid, NULL, tgt, 791 &port->hwp_bind_hdl); 792 if (status != IBT_SUCCESS && status != IBT_CM_SERVICE_EXISTS) { 793 SRPT_DPRINTF_L1("ioc_svc_bind, bind error (%d)", status); 794 return (status); 795 } 796 797 return (IBT_SUCCESS); 798 } 799 800 /* 801 * srpt_ioc_svc_unbind() 802 */ 803 void 804 srpt_ioc_svc_unbind(srpt_target_port_t *tgt, uint_t portnum) 805 { 806 srpt_hw_port_t *port; 807 808 if (tgt == NULL) { 809 SRPT_DPRINTF_L2("ioc_svc_unbind, SCSI target does not exist"); 810 return; 811 } 812 813 if (portnum == 0 || portnum > tgt->tp_nports) { 814 SRPT_DPRINTF_L2("ioc_svc_unbind, bad port (%d)", portnum); 815 return; 816 } 817 port = &tgt->tp_hw_port[portnum-1]; 818 819 if (tgt->tp_ibt_svc_hdl != NULL && port->hwp_bind_hdl != NULL) { 820 SRPT_DPRINTF_L2("ioc_svc_unbind, unregister current bind"); 821 ibt_unbind_service(tgt->tp_ibt_svc_hdl, port->hwp_bind_hdl); 822 } 823 port->hwp_bind_hdl = NULL; 824 } 825 826 /* 827 * srpt_ioc_svc_unbind_all() 828 */ 829 void 830 srpt_ioc_svc_unbind_all(srpt_target_port_t *tgt) 831 { 832 uint_t portnum; 833 834 if (tgt == NULL) { 835 SRPT_DPRINTF_L2("ioc_svc_unbind_all, NULL SCSI target port" 836 " specified"); 837 return; 838 } 839 for (portnum = 1; portnum <= tgt->tp_nports; portnum++) { 840 srpt_ioc_svc_unbind(tgt, portnum); 841 } 842 } 843 844 /* 845 * srpt_ioc_get_locked() 846 * 847 * Requires srpt_ctxt->rw_lock be held outside of call. 848 */ 849 srpt_ioc_t * 850 srpt_ioc_get_locked(ib_guid_t guid) 851 { 852 srpt_ioc_t *ioc; 853 854 ioc = list_head(&srpt_ctxt->sc_ioc_list); 855 while (ioc != NULL) { 856 if (ioc->ioc_guid == guid) { 857 break; 858 } 859 ioc = list_next(&srpt_ctxt->sc_ioc_list, ioc); 860 } 861 return (ioc); 862 } 863 864 /* 865 * srpt_ioc_get() 866 */ 867 srpt_ioc_t * 868 srpt_ioc_get(ib_guid_t guid) 869 { 870 srpt_ioc_t *ioc; 871 872 rw_enter(&srpt_ctxt->sc_rwlock, RW_READER); 873 ioc = srpt_ioc_get_locked(guid); 874 rw_exit(&srpt_ctxt->sc_rwlock); 875 return (ioc); 876 } 877 878 /* 879 * srpt_ioc_post_recv_iu() 880 */ 881 ibt_status_t 882 srpt_ioc_post_recv_iu(srpt_ioc_t *ioc, srpt_iu_t *iu) 883 { 884 ibt_status_t status; 885 ibt_recv_wr_t wr; 886 uint_t posted; 887 888 ASSERT(ioc != NULL); 889 ASSERT(iu != NULL); 890 891 wr.wr_id = (ibt_wrid_t)(uintptr_t)iu; 892 wr.wr_nds = 1; 893 wr.wr_sgl = &iu->iu_sge; 894 posted = 0; 895 896 status = ibt_post_srq(ioc->ioc_srq_hdl, &wr, 1, &posted); 897 if (status != IBT_SUCCESS) { 898 SRPT_DPRINTF_L2("ioc_post_recv_iu, post error (%d)", 899 status); 900 } 901 return (status); 902 } 903 904 /* 905 * srpt_ioc_repost_recv_iu() 906 */ 907 void 908 srpt_ioc_repost_recv_iu(srpt_ioc_t *ioc, srpt_iu_t *iu) 909 { 910 srpt_channel_t *ch; 911 ibt_status_t status; 912 913 ASSERT(iu != NULL); 914 ASSERT(mutex_owned(&iu->iu_lock)); 915 916 /* 917 * Some additional sanity checks while in debug state, all STMF 918 * related task activities should be complete prior to returning 919 * this IU to the available pool. 920 */ 921 ASSERT(iu->iu_stmf_task == NULL); 922 ASSERT(iu->iu_sq_posted_cnt == 0); 923 924 ch = iu->iu_ch; 925 iu->iu_ch = NULL; 926 iu->iu_num_rdescs = 0; 927 iu->iu_rdescs = NULL; 928 iu->iu_tot_xfer_len = 0; 929 iu->iu_tag = 0; 930 iu->iu_flags = 0; 931 iu->iu_sq_posted_cnt = 0; 932 933 status = srpt_ioc_post_recv_iu(ioc, iu); 934 935 if (status != IBT_SUCCESS) { 936 /* 937 * Very bad, we should initiate a shutdown of the I/O 938 * Controller here, off-lining any targets associated 939 * with this I/O Controller (and therefore disconnecting 940 * any logins that remain). 941 * 942 * In practice this should never happen so we put 943 * the code near the bottom of the implementation list. 944 */ 945 SRPT_DPRINTF_L0("ioc_repost_recv_iu, error RX IU (%d)", 946 status); 947 ASSERT(0); 948 } else if (ch != NULL) { 949 atomic_inc_32(&ch->ch_req_lim_delta); 950 } 951 } 952 953 /* 954 * srpt_ioc_init_profile() 955 * 956 * SRP I/O Controller serialization lock must be held when this 957 * routine is invoked. 958 */ 959 void 960 srpt_ioc_init_profile(srpt_ioc_t *ioc) 961 { 962 srpt_ioc_opcap_mask_t capmask = {0}; 963 964 ASSERT(ioc != NULL); 965 966 ioc->ioc_profile.ioc_guid = h2b64(ioc->ioc_guid); 967 (void) memcpy(ioc->ioc_profile.ioc_id_string, 968 "Solaris SRP Target 0.9a", 23); 969 970 /* 971 * Note vendor ID and subsystem ID are 24 bit values. Low order 972 * 8 bits in vendor ID field is slot and is initialized to zero. 973 * Low order 8 bits of subsystem ID is a reserved field and 974 * initialized to zero. 975 */ 976 ioc->ioc_profile.ioc_vendorid = 977 h2b32((uint32_t)(ioc->ioc_attr.hca_vendor_id << 8)); 978 ioc->ioc_profile.ioc_deviceid = 979 h2b32((uint32_t)ioc->ioc_attr.hca_device_id); 980 ioc->ioc_profile.ioc_device_ver = 981 h2b16((uint16_t)ioc->ioc_attr.hca_version_id); 982 ioc->ioc_profile.ioc_subsys_vendorid = 983 h2b32((uint32_t)(ioc->ioc_attr.hca_vendor_id << 8)); 984 ioc->ioc_profile.ioc_subsys_id = h2b32(0); 985 ioc->ioc_profile.ioc_io_class = h2b16(SRP_REV_16A_IO_CLASS); 986 ioc->ioc_profile.ioc_io_subclass = h2b16(SRP_IO_SUBCLASS); 987 ioc->ioc_profile.ioc_protocol = h2b16(SRP_PROTOCOL); 988 ioc->ioc_profile.ioc_protocol_ver = h2b16(SRP_PROTOCOL_VERSION); 989 ioc->ioc_profile.ioc_send_msg_qdepth = h2b16(srpt_send_msg_depth); 990 ioc->ioc_profile.ioc_rdma_read_qdepth = 991 ioc->ioc_attr.hca_max_rdma_out_chan; 992 ioc->ioc_profile.ioc_send_msg_sz = h2b32(SRPT_DEFAULT_SEND_MSG_SIZE); 993 ioc->ioc_profile.ioc_rdma_xfer_sz = h2b32(SRPT_DEFAULT_MAX_RDMA_SIZE); 994 995 capmask.bits.st = 1; /* Messages can be sent to IOC */ 996 capmask.bits.sf = 1; /* Messages can be sent from IOC */ 997 capmask.bits.rf = 1; /* RDMA Reads can be sent from IOC */ 998 capmask.bits.wf = 1; /* RDMA Writes can be sent from IOC */ 999 ioc->ioc_profile.ioc_ctrl_opcap_mask = capmask.mask; 1000 1001 /* 1002 * We currently only have one target, but if we had a list we would 1003 * go through that list and only count those that are ONLINE when 1004 * setting the services count and entries. 1005 */ 1006 if (ioc->ioc_tgt_port->tp_srp_enabled) { 1007 ioc->ioc_profile.ioc_service_entries = 1; 1008 ioc->ioc_svc.srv_id = h2b64(ioc->ioc_guid); 1009 (void) snprintf((char *)ioc->ioc_svc.srv_name, 1010 IB_DM_MAX_SVC_NAME_LEN, "SRP.T10:%016llx", 1011 (u_longlong_t)ioc->ioc_guid); 1012 } else { 1013 ioc->ioc_profile.ioc_service_entries = 0; 1014 ioc->ioc_svc.srv_id = 0; 1015 } 1016 } 1017 1018 /* 1019 * srpt_ioc_ds_alloc_dbuf() 1020 */ 1021 /* ARGSUSED */ 1022 stmf_data_buf_t * 1023 srpt_ioc_ds_alloc_dbuf(struct scsi_task *task, uint32_t size, 1024 uint32_t *pminsize, uint32_t flags) 1025 { 1026 srpt_iu_t *iu; 1027 srpt_ioc_t *ioc; 1028 srpt_ds_dbuf_t *dbuf; 1029 stmf_data_buf_t *stmf_dbuf; 1030 void *buf; 1031 srpt_mr_t mr; 1032 1033 ASSERT(task != NULL); 1034 iu = task->task_port_private; 1035 ioc = iu->iu_ioc; 1036 1037 SRPT_DPRINTF_L4("ioc_ds_alloc_dbuf, invoked ioc(%p)" 1038 " size(%d), flags(%x)", 1039 (void *)ioc, size, flags); 1040 1041 buf = srpt_vmem_alloc(ioc->ioc_dbuf_pool, size); 1042 if (buf == NULL) { 1043 return (NULL); 1044 } 1045 1046 if (srpt_vmem_mr(ioc->ioc_dbuf_pool, buf, size, &mr) != 0) { 1047 goto stmf_alloc_err; 1048 } 1049 1050 stmf_dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, sizeof (srpt_ds_dbuf_t), 1051 0); 1052 if (stmf_dbuf == NULL) { 1053 SRPT_DPRINTF_L2("ioc_ds_alloc_dbuf, stmf_alloc failed"); 1054 goto stmf_alloc_err; 1055 } 1056 1057 dbuf = stmf_dbuf->db_port_private; 1058 dbuf->db_stmf_buf = stmf_dbuf; 1059 dbuf->db_mr_hdl = mr.mr_hdl; 1060 dbuf->db_ioc = ioc; 1061 dbuf->db_sge.ds_va = mr.mr_va; 1062 dbuf->db_sge.ds_key = mr.mr_lkey; 1063 dbuf->db_sge.ds_len = size; 1064 1065 stmf_dbuf->db_buf_size = size; 1066 stmf_dbuf->db_data_size = size; 1067 stmf_dbuf->db_relative_offset = 0; 1068 stmf_dbuf->db_flags = 0; 1069 stmf_dbuf->db_xfer_status = 0; 1070 stmf_dbuf->db_sglist_length = 1; 1071 stmf_dbuf->db_sglist[0].seg_addr = buf; 1072 stmf_dbuf->db_sglist[0].seg_length = size; 1073 1074 return (stmf_dbuf); 1075 1076 buf_mr_err: 1077 stmf_free(stmf_dbuf); 1078 1079 stmf_alloc_err: 1080 srpt_vmem_free(ioc->ioc_dbuf_pool, buf, size); 1081 1082 return (NULL); 1083 } 1084 1085 void 1086 srpt_ioc_ds_free_dbuf(struct stmf_dbuf_store *ds, 1087 stmf_data_buf_t *dbuf) 1088 { 1089 srpt_ioc_t *ioc; 1090 1091 SRPT_DPRINTF_L4("ioc_ds_free_dbuf, invoked buf (%p)", 1092 (void *)dbuf); 1093 ioc = ds->ds_port_private; 1094 1095 srpt_vmem_free(ioc->ioc_dbuf_pool, dbuf->db_sglist[0].seg_addr, 1096 dbuf->db_buf_size); 1097 stmf_free(dbuf); 1098 } 1099 1100 /* Memory arena routines */ 1101 1102 static srpt_vmem_pool_t * 1103 srpt_vmem_create(const char *name, srpt_ioc_t *ioc, ib_memlen_t chunksize, 1104 uint64_t maxsize, ibt_mr_flags_t flags) 1105 { 1106 srpt_mr_t *chunk; 1107 srpt_vmem_pool_t *result; 1108 1109 ASSERT(chunksize <= maxsize); 1110 1111 result = kmem_zalloc(sizeof (srpt_vmem_pool_t), KM_SLEEP); 1112 1113 result->svp_ioc = ioc; 1114 result->svp_chunksize = chunksize; 1115 result->svp_max_size = maxsize; 1116 result->svp_flags = flags; 1117 1118 rw_init(&result->svp_lock, NULL, RW_DRIVER, NULL); 1119 avl_create(&result->svp_mr_list, srpt_vmem_mr_compare, 1120 sizeof (srpt_mr_t), offsetof(srpt_mr_t, mr_avl)); 1121 1122 chunk = srpt_vmem_chunk_alloc(result, chunksize); 1123 1124 avl_add(&result->svp_mr_list, chunk); 1125 result->svp_total_size = chunksize; 1126 1127 result->svp_vmem = vmem_create(name, 1128 (void*)(uintptr_t)chunk->mr_va, 1129 (size_t)chunk->mr_len, SRPT_MR_QUANTSIZE, 1130 NULL, NULL, NULL, 0, VM_SLEEP); 1131 1132 return (result); 1133 } 1134 1135 static void 1136 srpt_vmem_destroy(srpt_vmem_pool_t *vm_pool) 1137 { 1138 srpt_mr_t *chunk; 1139 srpt_mr_t *next; 1140 1141 rw_enter(&vm_pool->svp_lock, RW_WRITER); 1142 vmem_destroy(vm_pool->svp_vmem); 1143 1144 chunk = avl_first(&vm_pool->svp_mr_list); 1145 1146 while (chunk != NULL) { 1147 next = AVL_NEXT(&vm_pool->svp_mr_list, chunk); 1148 avl_remove(&vm_pool->svp_mr_list, chunk); 1149 srpt_vmem_chunk_free(vm_pool, chunk); 1150 chunk = next; 1151 } 1152 1153 avl_destroy(&vm_pool->svp_mr_list); 1154 1155 rw_exit(&vm_pool->svp_lock); 1156 rw_destroy(&vm_pool->svp_lock); 1157 1158 kmem_free(vm_pool, sizeof (srpt_vmem_pool_t)); 1159 } 1160 1161 static void * 1162 srpt_vmem_alloc(srpt_vmem_pool_t *vm_pool, size_t size) 1163 { 1164 void *result; 1165 srpt_mr_t *next; 1166 ib_memlen_t chunklen; 1167 1168 ASSERT(vm_pool != NULL); 1169 1170 result = vmem_alloc(vm_pool->svp_vmem, size, 1171 VM_NOSLEEP | VM_FIRSTFIT); 1172 1173 if (result != NULL) { 1174 /* memory successfully allocated */ 1175 return (result); 1176 } 1177 1178 /* need more vmem */ 1179 rw_enter(&vm_pool->svp_lock, RW_WRITER); 1180 chunklen = vm_pool->svp_chunksize; 1181 1182 if (vm_pool->svp_total_size >= vm_pool->svp_max_size) { 1183 /* no more room to alloc */ 1184 rw_exit(&vm_pool->svp_lock); 1185 return (NULL); 1186 } 1187 1188 if ((vm_pool->svp_total_size + chunklen) > vm_pool->svp_max_size) { 1189 chunklen = vm_pool->svp_max_size - vm_pool->svp_total_size; 1190 } 1191 1192 next = srpt_vmem_chunk_alloc(vm_pool, chunklen); 1193 if (next != NULL) { 1194 /* 1195 * Note that the size of the chunk we got 1196 * may not be the size we requested. Use the 1197 * length returned in the chunk itself. 1198 */ 1199 if (vmem_add(vm_pool->svp_vmem, (void*)(uintptr_t)next->mr_va, 1200 next->mr_len, VM_NOSLEEP) == NULL) { 1201 srpt_vmem_chunk_free(vm_pool, next); 1202 SRPT_DPRINTF_L2("vmem_add failed"); 1203 } else { 1204 vm_pool->svp_total_size += next->mr_len; 1205 avl_add(&vm_pool->svp_mr_list, next); 1206 } 1207 } 1208 1209 rw_exit(&vm_pool->svp_lock); 1210 1211 result = vmem_alloc(vm_pool->svp_vmem, size, VM_NOSLEEP | VM_FIRSTFIT); 1212 1213 return (result); 1214 } 1215 1216 static void 1217 srpt_vmem_free(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size) 1218 { 1219 vmem_free(vm_pool->svp_vmem, vaddr, size); 1220 } 1221 1222 static int 1223 srpt_vmem_mr(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size, 1224 srpt_mr_t *mr) 1225 { 1226 avl_index_t where; 1227 ib_vaddr_t mrva = (ib_vaddr_t)(uintptr_t)vaddr; 1228 srpt_mr_t chunk; 1229 srpt_mr_t *nearest; 1230 ib_vaddr_t chunk_end; 1231 int status = DDI_FAILURE; 1232 1233 rw_enter(&vm_pool->svp_lock, RW_READER); 1234 1235 chunk.mr_va = mrva; 1236 nearest = avl_find(&vm_pool->svp_mr_list, &chunk, &where); 1237 1238 if (nearest == NULL) { 1239 nearest = avl_nearest(&vm_pool->svp_mr_list, where, 1240 AVL_BEFORE); 1241 } 1242 1243 if (nearest != NULL) { 1244 /* Verify this chunk contains the specified address range */ 1245 ASSERT(nearest->mr_va <= mrva); 1246 1247 chunk_end = nearest->mr_va + nearest->mr_len; 1248 if (chunk_end >= mrva + size) { 1249 mr->mr_hdl = nearest->mr_hdl; 1250 mr->mr_va = mrva; 1251 mr->mr_len = size; 1252 mr->mr_lkey = nearest->mr_lkey; 1253 mr->mr_rkey = nearest->mr_rkey; 1254 status = DDI_SUCCESS; 1255 } 1256 } 1257 1258 rw_exit(&vm_pool->svp_lock); 1259 return (status); 1260 } 1261 1262 static srpt_mr_t * 1263 srpt_vmem_chunk_alloc(srpt_vmem_pool_t *vm_pool, ib_memlen_t chunksize) 1264 { 1265 void *chunk = NULL; 1266 srpt_mr_t *result = NULL; 1267 1268 while ((chunk == NULL) && (chunksize >= SRPT_MIN_CHUNKSIZE)) { 1269 chunk = kmem_alloc(chunksize, KM_NOSLEEP); 1270 if (chunk == NULL) { 1271 SRPT_DPRINTF_L2("srpt_vmem_chunk_alloc: " 1272 "failed to alloc chunk of %d, trying %d", 1273 (int)chunksize, (int)chunksize/2); 1274 chunksize /= 2; 1275 } 1276 } 1277 1278 if (chunk != NULL) { 1279 result = srpt_reg_mem(vm_pool, (ib_vaddr_t)(uintptr_t)chunk, 1280 chunksize); 1281 if (result == NULL) { 1282 SRPT_DPRINTF_L2("srpt_vmem_chunk_alloc: " 1283 "chunk registration failed"); 1284 kmem_free(chunk, chunksize); 1285 } 1286 } 1287 1288 return (result); 1289 } 1290 1291 static void 1292 srpt_vmem_chunk_free(srpt_vmem_pool_t *vm_pool, srpt_mr_t *mr) 1293 { 1294 void *chunk = (void *)(uintptr_t)mr->mr_va; 1295 ib_memlen_t chunksize = mr->mr_len; 1296 1297 srpt_dereg_mem(vm_pool->svp_ioc, mr); 1298 kmem_free(chunk, chunksize); 1299 } 1300 1301 static srpt_mr_t * 1302 srpt_reg_mem(srpt_vmem_pool_t *vm_pool, ib_vaddr_t vaddr, ib_memlen_t len) 1303 { 1304 srpt_mr_t *result = NULL; 1305 ibt_mr_attr_t mr_attr; 1306 ibt_mr_desc_t mr_desc; 1307 ibt_status_t status; 1308 srpt_ioc_t *ioc = vm_pool->svp_ioc; 1309 1310 result = kmem_zalloc(sizeof (srpt_mr_t), KM_NOSLEEP); 1311 if (result == NULL) { 1312 SRPT_DPRINTF_L2("srpt_reg_mem: failed to allocate"); 1313 return (NULL); 1314 } 1315 1316 bzero(&mr_attr, sizeof (ibt_mr_attr_t)); 1317 bzero(&mr_desc, sizeof (ibt_mr_desc_t)); 1318 1319 mr_attr.mr_vaddr = vaddr; 1320 mr_attr.mr_len = len; 1321 mr_attr.mr_as = NULL; 1322 mr_attr.mr_flags = vm_pool->svp_flags; 1323 1324 status = ibt_register_mr(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl, 1325 &mr_attr, &result->mr_hdl, &mr_desc); 1326 if (status != IBT_SUCCESS) { 1327 SRPT_DPRINTF_L2("srpt_reg_mem: ibt_register_mr " 1328 "failed %d", status); 1329 kmem_free(result, sizeof (srpt_mr_t)); 1330 return (NULL); 1331 } 1332 1333 result->mr_va = mr_attr.mr_vaddr; 1334 result->mr_len = mr_attr.mr_len; 1335 result->mr_lkey = mr_desc.md_lkey; 1336 result->mr_rkey = mr_desc.md_rkey; 1337 1338 return (result); 1339 } 1340 1341 static void 1342 srpt_dereg_mem(srpt_ioc_t *ioc, srpt_mr_t *mr) 1343 { 1344 ibt_status_t status; 1345 1346 status = ibt_deregister_mr(ioc->ioc_ibt_hdl, mr->mr_hdl); 1347 if (status != IBT_SUCCESS) { 1348 SRPT_DPRINTF_L1("ioc_fini, error deregistering MR (%d)", 1349 status); 1350 } 1351 kmem_free(mr, sizeof (srpt_mr_t)); 1352 } 1353 1354 static int 1355 srpt_vmem_mr_compare(const void *a, const void *b) 1356 { 1357 srpt_mr_t *mr1 = (srpt_mr_t *)a; 1358 srpt_mr_t *mr2 = (srpt_mr_t *)b; 1359 1360 /* sort and match by virtual address */ 1361 if (mr1->mr_va < mr2->mr_va) { 1362 return (-1); 1363 } else if (mr1->mr_va > mr2->mr_va) { 1364 return (1); 1365 } 1366 1367 return (0); 1368 } 1369