1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * I/O Controller functions for the Solaris COMSTAR SCSI RDMA Protocol 29 * Target (SRPT) port provider. 30 */ 31 32 #include <sys/types.h> 33 #include <sys/ddi.h> 34 #include <sys/types.h> 35 #include <sys/sunddi.h> 36 #include <sys/atomic.h> 37 #include <sys/sysmacros.h> 38 #include <sys/ib/ibtl/ibti.h> 39 #include <sys/sdt.h> 40 41 #include "srp.h" 42 #include "srpt_impl.h" 43 #include "srpt_ioc.h" 44 #include "srpt_stp.h" 45 #include "srpt_ch.h" 46 47 /* 48 * srpt_ioc_srq_size - Tunable parameter that specifies the number 49 * of receive WQ entries that can be posted to the IOC shared 50 * receive queue. 51 */ 52 uint32_t srpt_ioc_srq_size = SRPT_DEFAULT_IOC_SRQ_SIZE; 53 extern uint16_t srpt_send_msg_depth; 54 55 /* IOC profile capabilities mask must be big-endian */ 56 typedef struct srpt_ioc_opcap_bits_s { 57 #if defined(_BIT_FIELDS_LTOH) 58 uint8_t af:1, 59 at:1, 60 wf:1, 61 wt:1, 62 rf:1, 63 rt:1, 64 sf:1, 65 st:1; 66 #elif defined(_BIT_FIELDS_HTOL) 67 uint8_t st:1, 68 sf:1, 69 rt:1, 70 rf:1, 71 wt:1, 72 wf:1, 73 at:1, 74 af:1; 75 #else 76 #error One of _BIT_FIELDS_LTOH or _BIT_FIELDS_HTOL must be defined 77 #endif 78 } srpt_ioc_opcap_bits_t; 79 80 typedef union { 81 srpt_ioc_opcap_bits_t bits; 82 uint8_t mask; 83 } srpt_ioc_opcap_mask_t; 84 85 /* 86 * vmem arena variables - values derived from iSER 87 */ 88 #define SRPT_MR_QUANTSIZE 0x400 /* 1K */ 89 #define SRPT_MIN_CHUNKSIZE 0x100000 /* 1MB */ 90 91 /* use less memory on 32-bit kernels as it's much more constrained */ 92 #ifdef _LP64 93 #define SRPT_BUF_MR_CHUNKSIZE 0x1000000 /* 16MB */ 94 #define SRPT_BUF_POOL_MAX 0x40000000 /* 1GB */ 95 #else 96 #define SRPT_BUF_MR_CHUNKSIZE 0x400000 /* 4MB */ 97 #define SRPT_BUF_POOL_MAX 0x4000000 /* 64MB */ 98 #endif 99 100 static ibt_mr_flags_t srpt_dbuf_mr_flags = 101 IBT_MR_ENABLE_LOCAL_WRITE | IBT_MR_ENABLE_REMOTE_WRITE | 102 IBT_MR_ENABLE_REMOTE_READ; 103 104 void srpt_ioc_ib_async_hdlr(void *clnt, ibt_hca_hdl_t hdl, 105 ibt_async_code_t code, ibt_async_event_t *event); 106 107 static struct ibt_clnt_modinfo_s srpt_ibt_modinfo = { 108 IBTI_V_CURR, 109 IBT_STORAGE_DEV, 110 srpt_ioc_ib_async_hdlr, 111 NULL, 112 "srpt" 113 }; 114 115 static srpt_ioc_t *srpt_ioc_init(ib_guid_t guid); 116 static void srpt_ioc_fini(srpt_ioc_t *ioc); 117 118 static srpt_vmem_pool_t *srpt_vmem_create(const char *name, srpt_ioc_t *ioc, 119 ib_memlen_t chunksize, uint64_t maxsize, ibt_mr_flags_t flags); 120 static void *srpt_vmem_alloc(srpt_vmem_pool_t *vm_pool, size_t size); 121 static int srpt_vmem_mr_compare(const void *a, const void *b); 122 static srpt_mr_t *srpt_vmem_chunk_alloc(srpt_vmem_pool_t *ioc, 123 ib_memlen_t chunksize); 124 static void srpt_vmem_destroy(srpt_vmem_pool_t *vm_pool); 125 static void srpt_vmem_free(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size); 126 static srpt_mr_t *srpt_reg_mem(srpt_vmem_pool_t *vm_pool, ib_vaddr_t vaddr, 127 ib_memlen_t len); 128 static void srpt_vmem_chunk_free(srpt_vmem_pool_t *vm_pool, srpt_mr_t *mr); 129 static void srpt_dereg_mem(srpt_ioc_t *ioc, srpt_mr_t *mr); 130 static int srpt_vmem_mr(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size, 131 srpt_mr_t *mr); 132 133 /* 134 * srpt_ioc_attach() - I/O Controller attach 135 * 136 * Attach to IBTF and initialize I/O controllers. The srpt_ctxt->sc_rwlock 137 * should be held outside of this call. 138 */ 139 int 140 srpt_ioc_attach() 141 { 142 int status; 143 int hca_cnt; 144 int hca_ndx; 145 ib_guid_t *guid; 146 srpt_ioc_t *ioc; 147 148 ASSERT(srpt_ctxt != NULL); 149 150 /* 151 * Attach to IBTF and initialize a list of IB devices. Each 152 * HCA will be represented by an I/O Controller. 153 */ 154 status = ibt_attach(&srpt_ibt_modinfo, srpt_ctxt->sc_dip, 155 srpt_ctxt, &srpt_ctxt->sc_ibt_hdl); 156 if (status != DDI_SUCCESS) { 157 SRPT_DPRINTF_L1("ioc_attach, ibt_attach failed (0x%x)", 158 status); 159 return (DDI_FAILURE); 160 } 161 162 hca_cnt = ibt_get_hca_list(&guid); 163 if (hca_cnt < 1) { 164 /* 165 * not a fatal error. Service will be up and 166 * waiting for ATTACH events. 167 */ 168 SRPT_DPRINTF_L2("ioc_attach, no HCA found"); 169 return (DDI_SUCCESS); 170 } 171 172 for (hca_ndx = 0; hca_ndx < hca_cnt; hca_ndx++) { 173 SRPT_DPRINTF_L2("ioc_attach, adding I/O" 174 " Controller (%016llx)", (u_longlong_t)guid[hca_ndx]); 175 176 ioc = srpt_ioc_init(guid[hca_ndx]); 177 if (ioc == NULL) { 178 SRPT_DPRINTF_L1("ioc_attach, ioc_init GUID(%016llx)" 179 " failed", (u_longlong_t)guid[hca_ndx]); 180 continue; 181 } 182 list_insert_tail(&srpt_ctxt->sc_ioc_list, ioc); 183 SRPT_DPRINTF_L2("ioc_attach, I/O Controller ibt HCA hdl (%p)", 184 (void *)ioc->ioc_ibt_hdl); 185 srpt_ctxt->sc_num_iocs++; 186 } 187 188 ibt_free_hca_list(guid, hca_cnt); 189 SRPT_DPRINTF_L3("ioc_attach, added %d I/O Controller(s)", 190 srpt_ctxt->sc_num_iocs); 191 return (DDI_SUCCESS); 192 } 193 194 /* 195 * srpt_ioc_detach() - I/O Controller detach 196 * 197 * srpt_ctxt->sc_rwlock should be held outside of this call. 198 */ 199 void 200 srpt_ioc_detach() 201 { 202 srpt_ioc_t *ioc; 203 204 ASSERT(srpt_ctxt != NULL); 205 206 while ((ioc = list_head(&srpt_ctxt->sc_ioc_list)) != NULL) { 207 list_remove(&srpt_ctxt->sc_ioc_list, ioc); 208 SRPT_DPRINTF_L2("ioc_detach, removing I/O Controller(%p)" 209 " (%016llx), ibt_hdl(%p)", 210 (void *)ioc, 211 ioc ? (u_longlong_t)ioc->ioc_guid : 0x0ll, 212 (void *)ioc->ioc_ibt_hdl); 213 srpt_ioc_fini(ioc); 214 } 215 216 (void) ibt_detach(srpt_ctxt->sc_ibt_hdl); 217 srpt_ctxt->sc_ibt_hdl = NULL; 218 } 219 220 /* 221 * srpt_ioc_init() - I/O Controller initialization 222 * 223 * Requires srpt_ctxt->rw_lock be held outside of call. 224 */ 225 static srpt_ioc_t * 226 srpt_ioc_init(ib_guid_t guid) 227 { 228 ibt_status_t status; 229 srpt_ioc_t *ioc; 230 ibt_hca_attr_t hca_attr; 231 uint_t iu_ndx; 232 uint_t err_ndx; 233 ibt_mr_attr_t mr_attr; 234 ibt_mr_desc_t mr_desc; 235 srpt_iu_t *iu; 236 ibt_srq_sizes_t srq_attr; 237 char namebuf[32]; 238 size_t iu_offset; 239 240 status = ibt_query_hca_byguid(guid, &hca_attr); 241 if (status != IBT_SUCCESS) { 242 SRPT_DPRINTF_L1("ioc_init, HCA query error (%d)", 243 status); 244 return (NULL); 245 } 246 247 ioc = srpt_ioc_get_locked(guid); 248 if (ioc != NULL) { 249 SRPT_DPRINTF_L1("ioc_init, HCA already exists"); 250 return (NULL); 251 } 252 253 ioc = kmem_zalloc(sizeof (srpt_ioc_t), KM_SLEEP); 254 255 rw_init(&ioc->ioc_rwlock, NULL, RW_DRIVER, NULL); 256 rw_enter(&ioc->ioc_rwlock, RW_WRITER); 257 258 bcopy(&hca_attr, &ioc->ioc_attr, sizeof (ibt_hca_attr_t)); 259 260 SRPT_DPRINTF_L2("ioc_init, HCA max mr=%d, mrlen=%lld", 261 hca_attr.hca_max_memr, (u_longlong_t)hca_attr.hca_max_memr_len); 262 ioc->ioc_guid = guid; 263 264 status = ibt_open_hca(srpt_ctxt->sc_ibt_hdl, guid, &ioc->ioc_ibt_hdl); 265 if (status != IBT_SUCCESS) { 266 SRPT_DPRINTF_L1("ioc_init, IBT open failed (%d)", status); 267 goto hca_open_err; 268 } 269 270 status = ibt_alloc_pd(ioc->ioc_ibt_hdl, IBT_PD_NO_FLAGS, 271 &ioc->ioc_pd_hdl); 272 if (status != IBT_SUCCESS) { 273 SRPT_DPRINTF_L1("ioc_init, IBT create PD failed (%d)", status); 274 goto pd_alloc_err; 275 } 276 277 /* 278 * We require hardware support for SRQs. We use a common SRQ to 279 * reduce channel memory consumption. 280 */ 281 if ((ioc->ioc_attr.hca_flags & IBT_HCA_SRQ) == 0) { 282 SRPT_DPRINTF_L0("ioc_init, no SRQ capability, not supported"); 283 goto srq_alloc_err; 284 } 285 286 SRPT_DPRINTF_L3("ioc_init, Using shared receive queues, max srq work" 287 " queue size(%d), def size = %d", ioc->ioc_attr.hca_max_srqs_sz, 288 srpt_ioc_srq_size); 289 srq_attr.srq_wr_sz = min(srpt_ioc_srq_size, 290 ioc->ioc_attr.hca_max_srqs_sz); 291 srq_attr.srq_sgl_sz = 1; 292 293 status = ibt_alloc_srq(ioc->ioc_ibt_hdl, IBT_SRQ_NO_FLAGS, 294 ioc->ioc_pd_hdl, &srq_attr, &ioc->ioc_srq_hdl, 295 &ioc->ioc_srq_attr); 296 if (status != IBT_SUCCESS) { 297 SRPT_DPRINTF_L1("ioc_init, IBT create SRQ failed(%d)", status); 298 goto srq_alloc_err; 299 } 300 301 SRPT_DPRINTF_L2("ioc_init, SRQ WR size(%d), SG size(%d)", 302 ioc->ioc_srq_attr.srq_wr_sz, ioc->ioc_srq_attr.srq_sgl_sz); 303 304 ibt_set_srq_private(ioc->ioc_srq_hdl, ioc); 305 306 /* 307 * Allocate a pool of SRP IU message buffers and post them to 308 * the I/O Controller SRQ. We let the SRQ manage the free IU 309 * messages. 310 */ 311 ioc->ioc_num_iu_entries = 312 min(srq_attr.srq_wr_sz, srpt_ioc_srq_size) - 1; 313 314 ioc->ioc_iu_pool = kmem_zalloc(sizeof (srpt_iu_t) * 315 ioc->ioc_num_iu_entries, KM_SLEEP); 316 317 ioc->ioc_iu_bufs = kmem_alloc(SRPT_DEFAULT_SEND_MSG_SIZE * 318 ioc->ioc_num_iu_entries, KM_SLEEP); 319 320 if ((ioc->ioc_iu_pool == NULL) || (ioc->ioc_iu_bufs == NULL)) { 321 SRPT_DPRINTF_L1("ioc_init, failed to allocate SRQ IUs"); 322 goto srq_iu_alloc_err; 323 } 324 325 mr_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)ioc->ioc_iu_bufs; 326 mr_attr.mr_len = SRPT_DEFAULT_SEND_MSG_SIZE * ioc->ioc_num_iu_entries; 327 mr_attr.mr_as = NULL; 328 mr_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE; 329 330 status = ibt_register_mr(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl, 331 &mr_attr, &ioc->ioc_iu_mr_hdl, &mr_desc); 332 if (status != IBT_SUCCESS) { 333 SRPT_DPRINTF_L1("ioc_init, IU buffer pool MR err(%d)", 334 status); 335 goto srq_iu_alloc_err; 336 } 337 338 for (iu_ndx = 0, iu = ioc->ioc_iu_pool; iu_ndx < 339 ioc->ioc_num_iu_entries; iu_ndx++, iu++) { 340 341 iu_offset = (iu_ndx * SRPT_DEFAULT_SEND_MSG_SIZE); 342 iu->iu_buf = (void *)((uintptr_t)ioc->ioc_iu_bufs + iu_offset); 343 344 mutex_init(&iu->iu_lock, NULL, MUTEX_DRIVER, NULL); 345 346 iu->iu_sge.ds_va = mr_desc.md_vaddr + iu_offset; 347 iu->iu_sge.ds_key = mr_desc.md_lkey; 348 iu->iu_sge.ds_len = SRPT_DEFAULT_SEND_MSG_SIZE; 349 iu->iu_ioc = ioc; 350 iu->iu_pool_ndx = iu_ndx; 351 352 status = srpt_ioc_post_recv_iu(ioc, &ioc->ioc_iu_pool[iu_ndx]); 353 if (status != IBT_SUCCESS) { 354 SRPT_DPRINTF_L1("ioc_init, SRQ IU post err(%d)", 355 status); 356 goto srq_iu_post_err; 357 } 358 } 359 360 /* 361 * Initialize the dbuf vmem arena 362 */ 363 (void) snprintf(namebuf, sizeof (namebuf), 364 "srpt_buf_pool_%16llX", (u_longlong_t)guid); 365 ioc->ioc_dbuf_pool = srpt_vmem_create(namebuf, ioc, 366 SRPT_BUF_MR_CHUNKSIZE, SRPT_BUF_POOL_MAX, srpt_dbuf_mr_flags); 367 368 if (ioc->ioc_dbuf_pool == NULL) { 369 goto stmf_db_alloc_err; 370 } 371 372 /* 373 * Allocate the I/O Controller STMF data buffer allocator. The 374 * data store will span all targets associated with this IOC. 375 */ 376 ioc->ioc_stmf_ds = stmf_alloc(STMF_STRUCT_DBUF_STORE, 0, 0); 377 if (ioc->ioc_stmf_ds == NULL) { 378 SRPT_DPRINTF_L1("ioc_attach, STMF DBUF alloc failure for IOC"); 379 goto stmf_db_alloc_err; 380 } 381 ioc->ioc_stmf_ds->ds_alloc_data_buf = &srpt_ioc_ds_alloc_dbuf; 382 ioc->ioc_stmf_ds->ds_free_data_buf = &srpt_ioc_ds_free_dbuf; 383 ioc->ioc_stmf_ds->ds_port_private = ioc; 384 385 rw_exit(&ioc->ioc_rwlock); 386 return (ioc); 387 388 stmf_db_alloc_err: 389 if (ioc->ioc_dbuf_pool != NULL) { 390 srpt_vmem_destroy(ioc->ioc_dbuf_pool); 391 } 392 393 srq_iu_post_err: 394 if (ioc->ioc_iu_mr_hdl != NULL) { 395 status = ibt_deregister_mr(ioc->ioc_ibt_hdl, 396 ioc->ioc_iu_mr_hdl); 397 if (status != IBT_SUCCESS) { 398 SRPT_DPRINTF_L1("ioc_init, error deregistering" 399 " memory region (%d)", status); 400 } 401 } 402 for (err_ndx = 0, iu = ioc->ioc_iu_pool; err_ndx < iu_ndx; 403 err_ndx++, iu++) { 404 mutex_destroy(&iu->iu_lock); 405 } 406 407 srq_iu_alloc_err: 408 if (ioc->ioc_iu_bufs != NULL) { 409 kmem_free(ioc->ioc_iu_bufs, SRPT_DEFAULT_SEND_MSG_SIZE * 410 ioc->ioc_num_iu_entries); 411 } 412 if (ioc->ioc_iu_pool != NULL) { 413 kmem_free(ioc->ioc_iu_pool, 414 sizeof (srpt_iu_t) * ioc->ioc_num_iu_entries); 415 } 416 if (ioc->ioc_srq_hdl != NULL) { 417 status = ibt_free_srq(ioc->ioc_srq_hdl); 418 if (status != IBT_SUCCESS) { 419 SRPT_DPRINTF_L1("ioc_init, error freeing SRQ (%d)", 420 status); 421 } 422 423 } 424 425 srq_alloc_err: 426 status = ibt_free_pd(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl); 427 if (status != IBT_SUCCESS) { 428 SRPT_DPRINTF_L1("ioc_init, free PD error (%d)", status); 429 } 430 431 pd_alloc_err: 432 status = ibt_close_hca(ioc->ioc_ibt_hdl); 433 if (status != IBT_SUCCESS) { 434 SRPT_DPRINTF_L1("ioc_init, close ioc error (%d)", status); 435 } 436 437 hca_open_err: 438 rw_exit(&ioc->ioc_rwlock); 439 rw_destroy(&ioc->ioc_rwlock); 440 kmem_free(ioc, sizeof (*ioc)); 441 return (NULL); 442 } 443 444 /* 445 * srpt_ioc_fini() - I/O Controller Cleanup 446 * 447 * Requires srpt_ctxt->sc_rwlock be held outside of call. 448 */ 449 static void 450 srpt_ioc_fini(srpt_ioc_t *ioc) 451 { 452 int status; 453 int ndx; 454 455 /* 456 * Note driver flows will have already taken all SRP 457 * services running on the I/O Controller off-line. 458 */ 459 rw_enter(&ioc->ioc_rwlock, RW_WRITER); 460 if (ioc->ioc_ibt_hdl != NULL) { 461 if (ioc->ioc_stmf_ds != NULL) { 462 stmf_free(ioc->ioc_stmf_ds); 463 } 464 465 if (ioc->ioc_srq_hdl != NULL) { 466 SRPT_DPRINTF_L4("ioc_fini, freeing SRQ"); 467 status = ibt_free_srq(ioc->ioc_srq_hdl); 468 if (status != IBT_SUCCESS) { 469 SRPT_DPRINTF_L1("ioc_fini, free SRQ" 470 " error (%d)", status); 471 } 472 } 473 474 if (ioc->ioc_iu_mr_hdl != NULL) { 475 status = ibt_deregister_mr( 476 ioc->ioc_ibt_hdl, ioc->ioc_iu_mr_hdl); 477 if (status != IBT_SUCCESS) { 478 SRPT_DPRINTF_L1("ioc_fini, error deregistering" 479 " memory region (%d)", status); 480 } 481 } 482 483 if (ioc->ioc_iu_bufs != NULL) { 484 kmem_free(ioc->ioc_iu_bufs, SRPT_DEFAULT_SEND_MSG_SIZE * 485 ioc->ioc_num_iu_entries); 486 } 487 488 if (ioc->ioc_iu_pool != NULL) { 489 SRPT_DPRINTF_L4("ioc_fini, freeing IU entries"); 490 for (ndx = 0; ndx < ioc->ioc_num_iu_entries; ndx++) { 491 mutex_destroy(&ioc->ioc_iu_pool[ndx].iu_lock); 492 } 493 494 SRPT_DPRINTF_L4("ioc_fini, free IU pool struct"); 495 kmem_free(ioc->ioc_iu_pool, 496 sizeof (srpt_iu_t) * (ioc->ioc_num_iu_entries)); 497 ioc->ioc_iu_pool = NULL; 498 ioc->ioc_num_iu_entries = 0; 499 } 500 501 if (ioc->ioc_dbuf_pool != NULL) { 502 srpt_vmem_destroy(ioc->ioc_dbuf_pool); 503 } 504 505 if (ioc->ioc_pd_hdl != NULL) { 506 status = ibt_free_pd(ioc->ioc_ibt_hdl, 507 ioc->ioc_pd_hdl); 508 if (status != IBT_SUCCESS) { 509 SRPT_DPRINTF_L1("ioc_fini, free PD" 510 " error (%d)", status); 511 } 512 } 513 514 status = ibt_close_hca(ioc->ioc_ibt_hdl); 515 if (status != IBT_SUCCESS) { 516 SRPT_DPRINTF_L1( 517 "ioc_fini, close ioc error (%d)", status); 518 } 519 } 520 rw_exit(&ioc->ioc_rwlock); 521 rw_destroy(&ioc->ioc_rwlock); 522 kmem_free(ioc, sizeof (srpt_ioc_t)); 523 } 524 525 /* 526 * srpt_ioc_port_active() - I/O Controller port active 527 */ 528 static void 529 srpt_ioc_port_active(ibt_async_event_t *event) 530 { 531 ibt_status_t status; 532 srpt_ioc_t *ioc; 533 srpt_target_port_t *tgt = NULL; 534 boolean_t online_target = B_FALSE; 535 stmf_change_status_t cstatus; 536 537 ASSERT(event != NULL); 538 539 SRPT_DPRINTF_L3("ioc_port_active event handler, invoked"); 540 541 /* 542 * Find the HCA in question and if the HCA has completed 543 * initialization, and the SRP Target service for the 544 * the I/O Controller exists, then bind this port. 545 */ 546 ioc = srpt_ioc_get(event->ev_hca_guid); 547 548 if (ioc == NULL) { 549 SRPT_DPRINTF_L2("ioc_port_active, I/O Controller not" 550 " active"); 551 return; 552 } 553 554 tgt = ioc->ioc_tgt_port; 555 if (tgt == NULL) { 556 SRPT_DPRINTF_L2("ioc_port_active, no I/O Controller target" 557 " undefined"); 558 return; 559 } 560 561 562 /* 563 * We take the target lock here to serialize this operation 564 * with any STMF initiated target state transitions. If 565 * SRP is off-line then the service handle is NULL. 566 */ 567 mutex_enter(&tgt->tp_lock); 568 569 if (tgt->tp_ibt_svc_hdl != NULL) { 570 status = srpt_ioc_svc_bind(tgt, event->ev_port); 571 if ((status != IBT_SUCCESS) && 572 (status != IBT_HCA_PORT_NOT_ACTIVE)) { 573 SRPT_DPRINTF_L1("ioc_port_active, bind failed (%d)", 574 status); 575 } 576 } else { 577 /* if we were offline because of no ports, try onlining now */ 578 if ((tgt->tp_num_active_ports == 0) && 579 (tgt->tp_requested_state != tgt->tp_state) && 580 (tgt->tp_requested_state == SRPT_TGT_STATE_ONLINE)) { 581 online_target = B_TRUE; 582 cstatus.st_completion_status = STMF_SUCCESS; 583 cstatus.st_additional_info = "port active"; 584 } 585 } 586 587 mutex_exit(&tgt->tp_lock); 588 589 if (online_target) { 590 stmf_status_t ret; 591 592 ret = stmf_ctl(STMF_CMD_LPORT_ONLINE, tgt->tp_lport, &cstatus); 593 594 if (ret == STMF_SUCCESS) { 595 SRPT_DPRINTF_L1("ioc_port_active, port %d active, " 596 "target %016llx online requested", event->ev_port, 597 (u_longlong_t)ioc->ioc_guid); 598 } else if (ret != STMF_ALREADY) { 599 SRPT_DPRINTF_L1("ioc_port_active, port %d active, " 600 "target %016llx failed online request: %d", 601 event->ev_port, (u_longlong_t)ioc->ioc_guid, 602 (int)ret); 603 } 604 } 605 } 606 607 /* 608 * srpt_ioc_port_down() 609 */ 610 static void 611 srpt_ioc_port_down(ibt_async_event_t *event) 612 { 613 srpt_ioc_t *ioc; 614 srpt_target_port_t *tgt; 615 srpt_channel_t *ch; 616 srpt_channel_t *next_ch; 617 boolean_t offline_target = B_FALSE; 618 stmf_change_status_t cstatus; 619 620 SRPT_DPRINTF_L3("ioc_port_down event handler, invoked"); 621 622 /* 623 * Find the HCA in question and if the HCA has completed 624 * initialization, and the SRP Target service for the 625 * the I/O Controller exists, then logout initiators 626 * through this port. 627 */ 628 ioc = srpt_ioc_get(event->ev_hca_guid); 629 630 if (ioc == NULL) { 631 SRPT_DPRINTF_L2("ioc_port_down, I/O Controller not" 632 " active"); 633 return; 634 } 635 636 /* 637 * We only have one target now, but we could go through all 638 * SCSI target ports if more are added. 639 */ 640 tgt = ioc->ioc_tgt_port; 641 if (tgt == NULL) { 642 SRPT_DPRINTF_L2("ioc_port_down, no I/O Controller target" 643 " undefined"); 644 return; 645 } 646 mutex_enter(&tgt->tp_lock); 647 648 /* 649 * For all channel's logged in through this port, initiate a 650 * disconnect. 651 */ 652 mutex_enter(&tgt->tp_ch_list_lock); 653 ch = list_head(&tgt->tp_ch_list); 654 while (ch != NULL) { 655 next_ch = list_next(&tgt->tp_ch_list, ch); 656 if (ch->ch_session && (ch->ch_session->ss_hw_port == 657 event->ev_port)) { 658 srpt_ch_disconnect(ch); 659 } 660 ch = next_ch; 661 } 662 mutex_exit(&tgt->tp_ch_list_lock); 663 664 tgt->tp_num_active_ports--; 665 666 /* if we have no active ports, take the target offline */ 667 if ((tgt->tp_num_active_ports == 0) && 668 (tgt->tp_state == SRPT_TGT_STATE_ONLINE)) { 669 cstatus.st_completion_status = STMF_SUCCESS; 670 cstatus.st_additional_info = "no ports active"; 671 offline_target = B_TRUE; 672 } 673 674 mutex_exit(&tgt->tp_lock); 675 676 if (offline_target) { 677 stmf_status_t ret; 678 679 ret = stmf_ctl(STMF_CMD_LPORT_OFFLINE, tgt->tp_lport, &cstatus); 680 681 if (ret == STMF_SUCCESS) { 682 SRPT_DPRINTF_L1("ioc_port_down, port %d down, target " 683 "%016llx offline requested", event->ev_port, 684 (u_longlong_t)ioc->ioc_guid); 685 } else if (ret != STMF_ALREADY) { 686 SRPT_DPRINTF_L1("ioc_port_down, port %d down, target " 687 "%016llx failed offline request: %d", 688 event->ev_port, 689 (u_longlong_t)ioc->ioc_guid, (int)ret); 690 } 691 } 692 } 693 694 /* 695 * srpt_ioc_ib_async_hdlr - I/O Controller IB asynchronous events 696 */ 697 /* ARGSUSED */ 698 void 699 srpt_ioc_ib_async_hdlr(void *clnt, ibt_hca_hdl_t hdl, 700 ibt_async_code_t code, ibt_async_event_t *event) 701 { 702 srpt_ioc_t *ioc; 703 srpt_channel_t *ch; 704 705 switch (code) { 706 case IBT_EVENT_PORT_UP: 707 srpt_ioc_port_active(event); 708 break; 709 710 case IBT_ERROR_PORT_DOWN: 711 srpt_ioc_port_down(event); 712 break; 713 714 case IBT_HCA_ATTACH_EVENT: 715 rw_enter(&srpt_ctxt->sc_rwlock, RW_WRITER); 716 ioc = srpt_ioc_init(event->ev_hca_guid); 717 718 if (ioc == NULL) { 719 rw_exit(&srpt_ctxt->sc_rwlock); 720 SRPT_DPRINTF_L1("ib_async_hdlr, HCA_ATTACH" 721 " event failed to initialize HCA (0x%016llx)", 722 (u_longlong_t)event->ev_hca_guid); 723 return; 724 } 725 SRPT_DPRINTF_L2("HCA_ATTACH_EVENT: I/O Controller" 726 " ibt hdl (%p)", 727 (void *)ioc->ioc_ibt_hdl); 728 729 rw_enter(&ioc->ioc_rwlock, RW_WRITER); 730 ioc->ioc_tgt_port = srpt_stp_alloc_port(ioc, ioc->ioc_guid); 731 if (ioc->ioc_tgt_port == NULL) { 732 SRPT_DPRINTF_L1("ioc_ib_async_hdlr, alloc SCSI " 733 "target port error for HCA (0x%016llx)", 734 (u_longlong_t)event->ev_hca_guid); 735 rw_exit(&ioc->ioc_rwlock); 736 srpt_ioc_fini(ioc); 737 rw_exit(&srpt_ctxt->sc_rwlock); 738 return; 739 } 740 741 /* 742 * New HCA added with default SCSI Target Port, SRP service 743 * will be started when SCSI Target Port is brought 744 * on-line by STMF. 745 */ 746 srpt_ctxt->sc_num_iocs++; 747 list_insert_tail(&srpt_ctxt->sc_ioc_list, ioc); 748 749 rw_exit(&ioc->ioc_rwlock); 750 rw_exit(&srpt_ctxt->sc_rwlock); 751 break; 752 753 case IBT_HCA_DETACH_EVENT: 754 SRPT_DPRINTF_L1( 755 "ioc_iob_async_hdlr, HCA_DETACH_EVENT received."); 756 break; 757 758 case IBT_EVENT_EMPTY_CHAN: 759 /* Channel in ERROR state is now empty */ 760 ch = (srpt_channel_t *)ibt_get_chan_private(event->ev_chan_hdl); 761 SRPT_DPRINTF_L3( 762 "ioc_iob_async_hdlr, received empty channel error on %p", 763 (void *)ch); 764 break; 765 766 default: 767 SRPT_DPRINTF_L2("ioc_ib_async_hdlr, event not " 768 "handled (%d)", code); 769 break; 770 } 771 } 772 773 /* 774 * srpt_ioc_svc_bind() 775 */ 776 ibt_status_t 777 srpt_ioc_svc_bind(srpt_target_port_t *tgt, uint_t portnum) 778 { 779 ibt_status_t status; 780 srpt_hw_port_t *port; 781 ibt_hca_portinfo_t *portinfo; 782 uint_t qportinfo_sz; 783 uint_t qportnum; 784 ib_gid_t new_gid; 785 srpt_ioc_t *ioc; 786 srpt_session_t sess; 787 788 ASSERT(tgt != NULL); 789 ASSERT(tgt->tp_ioc != NULL); 790 ioc = tgt->tp_ioc; 791 792 if (tgt->tp_ibt_svc_hdl == NULL) { 793 SRPT_DPRINTF_L2("ioc_svc_bind, NULL SCSI target port" 794 " service"); 795 return (IBT_INVALID_PARAM); 796 } 797 798 if (portnum == 0 || portnum > tgt->tp_nports) { 799 SRPT_DPRINTF_L2("ioc_svc_bind, bad port (%d)", portnum); 800 return (IBT_INVALID_PARAM); 801 } 802 status = ibt_query_hca_ports(ioc->ioc_ibt_hdl, portnum, 803 &portinfo, &qportnum, &qportinfo_sz); 804 if (status != IBT_SUCCESS) { 805 SRPT_DPRINTF_L1("ioc_svc_bind, query port error (%d)", 806 portnum); 807 return (IBT_INVALID_PARAM); 808 } 809 810 ASSERT(portinfo != NULL); 811 812 /* 813 * If port is not active do nothing, caller should attempt to bind 814 * after the port goes active. 815 */ 816 if (portinfo->p_linkstate != IBT_PORT_ACTIVE) { 817 SRPT_DPRINTF_L2("ioc_svc_bind, port %d not in active state", 818 portnum); 819 ibt_free_portinfo(portinfo, qportinfo_sz); 820 return (IBT_HCA_PORT_NOT_ACTIVE); 821 } 822 823 port = &tgt->tp_hw_port[portnum-1]; 824 new_gid = portinfo->p_sgid_tbl[0]; 825 ibt_free_portinfo(portinfo, qportinfo_sz); 826 827 /* 828 * If previously bound and the port GID has changed, 829 * rebind to the new GID. 830 */ 831 if (port->hwp_bind_hdl != NULL) { 832 if (new_gid.gid_guid != port->hwp_gid.gid_guid || 833 new_gid.gid_prefix != port->hwp_gid.gid_prefix) { 834 SRPT_DPRINTF_L2("ioc_svc_bind, unregister current" 835 " bind"); 836 (void) ibt_unbind_service(tgt->tp_ibt_svc_hdl, 837 port->hwp_bind_hdl); 838 port->hwp_bind_hdl = NULL; 839 } 840 } 841 SRPT_DPRINTF_L2("ioc_svc_bind, bind service, %016llx:%016llx", 842 (u_longlong_t)new_gid.gid_prefix, 843 (u_longlong_t)new_gid.gid_guid); 844 845 /* 846 * Pass SCSI Target Port as CM private data, the target will always 847 * exist while this service is bound. 848 */ 849 status = ibt_bind_service(tgt->tp_ibt_svc_hdl, new_gid, NULL, tgt, 850 &port->hwp_bind_hdl); 851 if (status != IBT_SUCCESS && status != IBT_CM_SERVICE_EXISTS) { 852 SRPT_DPRINTF_L1("ioc_svc_bind, bind error (%d)", status); 853 return (status); 854 } 855 tgt->tp_num_active_ports++; 856 port->hwp_gid.gid_prefix = new_gid.gid_prefix; 857 port->hwp_gid.gid_guid = new_gid.gid_guid; 858 859 /* setting up a transient structure for the dtrace probe. */ 860 bzero(&sess, sizeof (srpt_session_t)); 861 ALIAS_STR(sess.ss_t_gid, new_gid.gid_prefix, new_gid.gid_guid); 862 EUI_STR(sess.ss_t_name, tgt->tp_ibt_svc_id); 863 864 DTRACE_SRP_1(service__up, srpt_session_t, &sess); 865 866 return (IBT_SUCCESS); 867 } 868 869 /* 870 * srpt_ioc_svc_unbind() 871 */ 872 void 873 srpt_ioc_svc_unbind(srpt_target_port_t *tgt, uint_t portnum) 874 { 875 srpt_hw_port_t *port; 876 srpt_session_t sess; 877 ibt_status_t ret; 878 879 if (tgt == NULL) { 880 SRPT_DPRINTF_L2("ioc_svc_unbind, SCSI target does not exist"); 881 return; 882 } 883 884 if (portnum == 0 || portnum > tgt->tp_nports) { 885 SRPT_DPRINTF_L2("ioc_svc_unbind, bad port (%d)", portnum); 886 return; 887 } 888 port = &tgt->tp_hw_port[portnum-1]; 889 890 /* setting up a transient structure for the dtrace probe. */ 891 bzero(&sess, sizeof (srpt_session_t)); 892 ALIAS_STR(sess.ss_t_gid, port->hwp_gid.gid_prefix, 893 port->hwp_gid.gid_guid); 894 EUI_STR(sess.ss_t_name, tgt->tp_ibt_svc_id); 895 896 DTRACE_SRP_1(service__down, srpt_session_t, &sess); 897 898 if (tgt->tp_ibt_svc_hdl != NULL && port->hwp_bind_hdl != NULL) { 899 SRPT_DPRINTF_L2("ioc_svc_unbind, unregister current bind"); 900 ret = ibt_unbind_service(tgt->tp_ibt_svc_hdl, 901 port->hwp_bind_hdl); 902 if (ret != IBT_SUCCESS) { 903 SRPT_DPRINTF_L1( 904 "ioc_svc_unbind, unregister port %d failed: %d", 905 portnum, ret); 906 } else { 907 port->hwp_bind_hdl = NULL; 908 port->hwp_gid.gid_prefix = 0; 909 port->hwp_gid.gid_guid = 0; 910 } 911 } 912 } 913 914 /* 915 * srpt_ioc_svc_unbind_all() 916 */ 917 void 918 srpt_ioc_svc_unbind_all(srpt_target_port_t *tgt) 919 { 920 uint_t portnum; 921 922 if (tgt == NULL) { 923 SRPT_DPRINTF_L2("ioc_svc_unbind_all, NULL SCSI target port" 924 " specified"); 925 return; 926 } 927 for (portnum = 1; portnum <= tgt->tp_nports; portnum++) { 928 srpt_ioc_svc_unbind(tgt, portnum); 929 } 930 } 931 932 /* 933 * srpt_ioc_get_locked() 934 * 935 * Requires srpt_ctxt->rw_lock be held outside of call. 936 */ 937 srpt_ioc_t * 938 srpt_ioc_get_locked(ib_guid_t guid) 939 { 940 srpt_ioc_t *ioc; 941 942 ioc = list_head(&srpt_ctxt->sc_ioc_list); 943 while (ioc != NULL) { 944 if (ioc->ioc_guid == guid) { 945 break; 946 } 947 ioc = list_next(&srpt_ctxt->sc_ioc_list, ioc); 948 } 949 return (ioc); 950 } 951 952 /* 953 * srpt_ioc_get() 954 */ 955 srpt_ioc_t * 956 srpt_ioc_get(ib_guid_t guid) 957 { 958 srpt_ioc_t *ioc; 959 960 rw_enter(&srpt_ctxt->sc_rwlock, RW_READER); 961 ioc = srpt_ioc_get_locked(guid); 962 rw_exit(&srpt_ctxt->sc_rwlock); 963 return (ioc); 964 } 965 966 /* 967 * srpt_ioc_post_recv_iu() 968 */ 969 ibt_status_t 970 srpt_ioc_post_recv_iu(srpt_ioc_t *ioc, srpt_iu_t *iu) 971 { 972 ibt_status_t status; 973 ibt_recv_wr_t wr; 974 uint_t posted; 975 976 ASSERT(ioc != NULL); 977 ASSERT(iu != NULL); 978 979 wr.wr_id = (ibt_wrid_t)(uintptr_t)iu; 980 wr.wr_nds = 1; 981 wr.wr_sgl = &iu->iu_sge; 982 posted = 0; 983 984 status = ibt_post_srq(ioc->ioc_srq_hdl, &wr, 1, &posted); 985 if (status != IBT_SUCCESS) { 986 SRPT_DPRINTF_L2("ioc_post_recv_iu, post error (%d)", 987 status); 988 } 989 return (status); 990 } 991 992 /* 993 * srpt_ioc_repost_recv_iu() 994 */ 995 void 996 srpt_ioc_repost_recv_iu(srpt_ioc_t *ioc, srpt_iu_t *iu) 997 { 998 srpt_channel_t *ch; 999 ibt_status_t status; 1000 1001 ASSERT(iu != NULL); 1002 ASSERT(mutex_owned(&iu->iu_lock)); 1003 1004 /* 1005 * Some additional sanity checks while in debug state, all STMF 1006 * related task activities should be complete prior to returning 1007 * this IU to the available pool. 1008 */ 1009 ASSERT(iu->iu_stmf_task == NULL); 1010 ASSERT(iu->iu_sq_posted_cnt == 0); 1011 1012 ch = iu->iu_ch; 1013 iu->iu_ch = NULL; 1014 iu->iu_num_rdescs = 0; 1015 iu->iu_rdescs = NULL; 1016 iu->iu_tot_xfer_len = 0; 1017 iu->iu_tag = 0; 1018 iu->iu_flags = 0; 1019 iu->iu_sq_posted_cnt = 0; 1020 1021 status = srpt_ioc_post_recv_iu(ioc, iu); 1022 1023 if (status != IBT_SUCCESS) { 1024 /* 1025 * Very bad, we should initiate a shutdown of the I/O 1026 * Controller here, off-lining any targets associated 1027 * with this I/O Controller (and therefore disconnecting 1028 * any logins that remain). 1029 * 1030 * In practice this should never happen so we put 1031 * the code near the bottom of the implementation list. 1032 */ 1033 SRPT_DPRINTF_L0("ioc_repost_recv_iu, error RX IU (%d)", 1034 status); 1035 ASSERT(0); 1036 } else if (ch != NULL) { 1037 atomic_inc_32(&ch->ch_req_lim_delta); 1038 } 1039 } 1040 1041 /* 1042 * srpt_ioc_init_profile() 1043 * 1044 * SRP I/O Controller serialization lock must be held when this 1045 * routine is invoked. 1046 */ 1047 void 1048 srpt_ioc_init_profile(srpt_ioc_t *ioc) 1049 { 1050 srpt_ioc_opcap_mask_t capmask = {0}; 1051 1052 ASSERT(ioc != NULL); 1053 1054 ioc->ioc_profile.ioc_guid = h2b64(ioc->ioc_guid); 1055 (void) memcpy(ioc->ioc_profile.ioc_id_string, 1056 "Solaris SRP Target 0.9a", 23); 1057 1058 /* 1059 * Note vendor ID and subsystem ID are 24 bit values. Low order 1060 * 8 bits in vendor ID field is slot and is initialized to zero. 1061 * Low order 8 bits of subsystem ID is a reserved field and 1062 * initialized to zero. 1063 */ 1064 ioc->ioc_profile.ioc_vendorid = 1065 h2b32((uint32_t)(ioc->ioc_attr.hca_vendor_id << 8)); 1066 ioc->ioc_profile.ioc_deviceid = 1067 h2b32((uint32_t)ioc->ioc_attr.hca_device_id); 1068 ioc->ioc_profile.ioc_device_ver = 1069 h2b16((uint16_t)ioc->ioc_attr.hca_version_id); 1070 ioc->ioc_profile.ioc_subsys_vendorid = 1071 h2b32((uint32_t)(ioc->ioc_attr.hca_vendor_id << 8)); 1072 ioc->ioc_profile.ioc_subsys_id = h2b32(0); 1073 ioc->ioc_profile.ioc_io_class = h2b16(SRP_REV_16A_IO_CLASS); 1074 ioc->ioc_profile.ioc_io_subclass = h2b16(SRP_IO_SUBCLASS); 1075 ioc->ioc_profile.ioc_protocol = h2b16(SRP_PROTOCOL); 1076 ioc->ioc_profile.ioc_protocol_ver = h2b16(SRP_PROTOCOL_VERSION); 1077 ioc->ioc_profile.ioc_send_msg_qdepth = h2b16(srpt_send_msg_depth); 1078 ioc->ioc_profile.ioc_rdma_read_qdepth = 1079 ioc->ioc_attr.hca_max_rdma_out_chan; 1080 ioc->ioc_profile.ioc_send_msg_sz = h2b32(SRPT_DEFAULT_SEND_MSG_SIZE); 1081 ioc->ioc_profile.ioc_rdma_xfer_sz = h2b32(SRPT_DEFAULT_MAX_RDMA_SIZE); 1082 1083 capmask.bits.st = 1; /* Messages can be sent to IOC */ 1084 capmask.bits.sf = 1; /* Messages can be sent from IOC */ 1085 capmask.bits.rf = 1; /* RDMA Reads can be sent from IOC */ 1086 capmask.bits.wf = 1; /* RDMA Writes can be sent from IOC */ 1087 ioc->ioc_profile.ioc_ctrl_opcap_mask = capmask.mask; 1088 1089 /* 1090 * We currently only have one target, but if we had a list we would 1091 * go through that list and only count those that are ONLINE when 1092 * setting the services count and entries. 1093 */ 1094 if (ioc->ioc_tgt_port->tp_srp_enabled) { 1095 ioc->ioc_profile.ioc_service_entries = 1; 1096 ioc->ioc_svc.srv_id = h2b64(ioc->ioc_guid); 1097 (void) snprintf((char *)ioc->ioc_svc.srv_name, 1098 IB_DM_MAX_SVC_NAME_LEN, "SRP.T10:%016llx", 1099 (u_longlong_t)ioc->ioc_guid); 1100 } else { 1101 ioc->ioc_profile.ioc_service_entries = 0; 1102 ioc->ioc_svc.srv_id = 0; 1103 } 1104 } 1105 1106 /* 1107 * srpt_ioc_ds_alloc_dbuf() 1108 */ 1109 /* ARGSUSED */ 1110 stmf_data_buf_t * 1111 srpt_ioc_ds_alloc_dbuf(struct scsi_task *task, uint32_t size, 1112 uint32_t *pminsize, uint32_t flags) 1113 { 1114 srpt_iu_t *iu; 1115 srpt_ioc_t *ioc; 1116 srpt_ds_dbuf_t *dbuf; 1117 stmf_data_buf_t *stmf_dbuf; 1118 void *buf; 1119 srpt_mr_t mr; 1120 1121 ASSERT(task != NULL); 1122 iu = task->task_port_private; 1123 ioc = iu->iu_ioc; 1124 1125 SRPT_DPRINTF_L4("ioc_ds_alloc_dbuf, invoked ioc(%p)" 1126 " size(%d), flags(%x)", 1127 (void *)ioc, size, flags); 1128 1129 buf = srpt_vmem_alloc(ioc->ioc_dbuf_pool, size); 1130 if (buf == NULL) { 1131 return (NULL); 1132 } 1133 1134 if (srpt_vmem_mr(ioc->ioc_dbuf_pool, buf, size, &mr) != 0) { 1135 goto stmf_alloc_err; 1136 } 1137 1138 stmf_dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, sizeof (srpt_ds_dbuf_t), 1139 0); 1140 if (stmf_dbuf == NULL) { 1141 SRPT_DPRINTF_L2("ioc_ds_alloc_dbuf, stmf_alloc failed"); 1142 goto stmf_alloc_err; 1143 } 1144 1145 dbuf = stmf_dbuf->db_port_private; 1146 dbuf->db_stmf_buf = stmf_dbuf; 1147 dbuf->db_mr_hdl = mr.mr_hdl; 1148 dbuf->db_ioc = ioc; 1149 dbuf->db_sge.ds_va = mr.mr_va; 1150 dbuf->db_sge.ds_key = mr.mr_lkey; 1151 dbuf->db_sge.ds_len = size; 1152 1153 stmf_dbuf->db_buf_size = size; 1154 stmf_dbuf->db_data_size = size; 1155 stmf_dbuf->db_relative_offset = 0; 1156 stmf_dbuf->db_flags = 0; 1157 stmf_dbuf->db_xfer_status = 0; 1158 stmf_dbuf->db_sglist_length = 1; 1159 stmf_dbuf->db_sglist[0].seg_addr = buf; 1160 stmf_dbuf->db_sglist[0].seg_length = size; 1161 1162 return (stmf_dbuf); 1163 1164 buf_mr_err: 1165 stmf_free(stmf_dbuf); 1166 1167 stmf_alloc_err: 1168 srpt_vmem_free(ioc->ioc_dbuf_pool, buf, size); 1169 1170 return (NULL); 1171 } 1172 1173 void 1174 srpt_ioc_ds_free_dbuf(struct stmf_dbuf_store *ds, 1175 stmf_data_buf_t *dbuf) 1176 { 1177 srpt_ioc_t *ioc; 1178 1179 SRPT_DPRINTF_L4("ioc_ds_free_dbuf, invoked buf (%p)", 1180 (void *)dbuf); 1181 ioc = ds->ds_port_private; 1182 1183 srpt_vmem_free(ioc->ioc_dbuf_pool, dbuf->db_sglist[0].seg_addr, 1184 dbuf->db_buf_size); 1185 stmf_free(dbuf); 1186 } 1187 1188 /* Memory arena routines */ 1189 1190 static srpt_vmem_pool_t * 1191 srpt_vmem_create(const char *name, srpt_ioc_t *ioc, ib_memlen_t chunksize, 1192 uint64_t maxsize, ibt_mr_flags_t flags) 1193 { 1194 srpt_mr_t *chunk; 1195 srpt_vmem_pool_t *result; 1196 1197 ASSERT(chunksize <= maxsize); 1198 1199 result = kmem_zalloc(sizeof (srpt_vmem_pool_t), KM_SLEEP); 1200 1201 result->svp_ioc = ioc; 1202 result->svp_chunksize = chunksize; 1203 result->svp_max_size = maxsize; 1204 result->svp_flags = flags; 1205 1206 rw_init(&result->svp_lock, NULL, RW_DRIVER, NULL); 1207 avl_create(&result->svp_mr_list, srpt_vmem_mr_compare, 1208 sizeof (srpt_mr_t), offsetof(srpt_mr_t, mr_avl)); 1209 1210 chunk = srpt_vmem_chunk_alloc(result, chunksize); 1211 1212 avl_add(&result->svp_mr_list, chunk); 1213 result->svp_total_size = chunksize; 1214 1215 result->svp_vmem = vmem_create(name, 1216 (void*)(uintptr_t)chunk->mr_va, 1217 (size_t)chunk->mr_len, SRPT_MR_QUANTSIZE, 1218 NULL, NULL, NULL, 0, VM_SLEEP); 1219 1220 return (result); 1221 } 1222 1223 static void 1224 srpt_vmem_destroy(srpt_vmem_pool_t *vm_pool) 1225 { 1226 srpt_mr_t *chunk; 1227 srpt_mr_t *next; 1228 1229 rw_enter(&vm_pool->svp_lock, RW_WRITER); 1230 vmem_destroy(vm_pool->svp_vmem); 1231 1232 chunk = avl_first(&vm_pool->svp_mr_list); 1233 1234 while (chunk != NULL) { 1235 next = AVL_NEXT(&vm_pool->svp_mr_list, chunk); 1236 avl_remove(&vm_pool->svp_mr_list, chunk); 1237 srpt_vmem_chunk_free(vm_pool, chunk); 1238 chunk = next; 1239 } 1240 1241 avl_destroy(&vm_pool->svp_mr_list); 1242 1243 rw_exit(&vm_pool->svp_lock); 1244 rw_destroy(&vm_pool->svp_lock); 1245 1246 kmem_free(vm_pool, sizeof (srpt_vmem_pool_t)); 1247 } 1248 1249 static void * 1250 srpt_vmem_alloc(srpt_vmem_pool_t *vm_pool, size_t size) 1251 { 1252 void *result; 1253 srpt_mr_t *next; 1254 ib_memlen_t chunklen; 1255 1256 ASSERT(vm_pool != NULL); 1257 1258 result = vmem_alloc(vm_pool->svp_vmem, size, 1259 VM_NOSLEEP | VM_FIRSTFIT); 1260 1261 if (result != NULL) { 1262 /* memory successfully allocated */ 1263 return (result); 1264 } 1265 1266 /* need more vmem */ 1267 rw_enter(&vm_pool->svp_lock, RW_WRITER); 1268 chunklen = vm_pool->svp_chunksize; 1269 1270 if (vm_pool->svp_total_size >= vm_pool->svp_max_size) { 1271 /* no more room to alloc */ 1272 rw_exit(&vm_pool->svp_lock); 1273 return (NULL); 1274 } 1275 1276 if ((vm_pool->svp_total_size + chunklen) > vm_pool->svp_max_size) { 1277 chunklen = vm_pool->svp_max_size - vm_pool->svp_total_size; 1278 } 1279 1280 next = srpt_vmem_chunk_alloc(vm_pool, chunklen); 1281 if (next != NULL) { 1282 /* 1283 * Note that the size of the chunk we got 1284 * may not be the size we requested. Use the 1285 * length returned in the chunk itself. 1286 */ 1287 if (vmem_add(vm_pool->svp_vmem, (void*)(uintptr_t)next->mr_va, 1288 next->mr_len, VM_NOSLEEP) == NULL) { 1289 srpt_vmem_chunk_free(vm_pool, next); 1290 SRPT_DPRINTF_L2("vmem_add failed"); 1291 } else { 1292 vm_pool->svp_total_size += next->mr_len; 1293 avl_add(&vm_pool->svp_mr_list, next); 1294 } 1295 } 1296 1297 rw_exit(&vm_pool->svp_lock); 1298 1299 result = vmem_alloc(vm_pool->svp_vmem, size, VM_NOSLEEP | VM_FIRSTFIT); 1300 1301 return (result); 1302 } 1303 1304 static void 1305 srpt_vmem_free(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size) 1306 { 1307 vmem_free(vm_pool->svp_vmem, vaddr, size); 1308 } 1309 1310 static int 1311 srpt_vmem_mr(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size, 1312 srpt_mr_t *mr) 1313 { 1314 avl_index_t where; 1315 ib_vaddr_t mrva = (ib_vaddr_t)(uintptr_t)vaddr; 1316 srpt_mr_t chunk; 1317 srpt_mr_t *nearest; 1318 ib_vaddr_t chunk_end; 1319 int status = DDI_FAILURE; 1320 1321 rw_enter(&vm_pool->svp_lock, RW_READER); 1322 1323 chunk.mr_va = mrva; 1324 nearest = avl_find(&vm_pool->svp_mr_list, &chunk, &where); 1325 1326 if (nearest == NULL) { 1327 nearest = avl_nearest(&vm_pool->svp_mr_list, where, 1328 AVL_BEFORE); 1329 } 1330 1331 if (nearest != NULL) { 1332 /* Verify this chunk contains the specified address range */ 1333 ASSERT(nearest->mr_va <= mrva); 1334 1335 chunk_end = nearest->mr_va + nearest->mr_len; 1336 if (chunk_end >= mrva + size) { 1337 mr->mr_hdl = nearest->mr_hdl; 1338 mr->mr_va = mrva; 1339 mr->mr_len = size; 1340 mr->mr_lkey = nearest->mr_lkey; 1341 mr->mr_rkey = nearest->mr_rkey; 1342 status = DDI_SUCCESS; 1343 } 1344 } 1345 1346 rw_exit(&vm_pool->svp_lock); 1347 return (status); 1348 } 1349 1350 static srpt_mr_t * 1351 srpt_vmem_chunk_alloc(srpt_vmem_pool_t *vm_pool, ib_memlen_t chunksize) 1352 { 1353 void *chunk = NULL; 1354 srpt_mr_t *result = NULL; 1355 1356 while ((chunk == NULL) && (chunksize >= SRPT_MIN_CHUNKSIZE)) { 1357 chunk = kmem_alloc(chunksize, KM_NOSLEEP); 1358 if (chunk == NULL) { 1359 SRPT_DPRINTF_L2("srpt_vmem_chunk_alloc: " 1360 "failed to alloc chunk of %d, trying %d", 1361 (int)chunksize, (int)chunksize/2); 1362 chunksize /= 2; 1363 } 1364 } 1365 1366 if (chunk != NULL) { 1367 result = srpt_reg_mem(vm_pool, (ib_vaddr_t)(uintptr_t)chunk, 1368 chunksize); 1369 if (result == NULL) { 1370 SRPT_DPRINTF_L2("srpt_vmem_chunk_alloc: " 1371 "chunk registration failed"); 1372 kmem_free(chunk, chunksize); 1373 } 1374 } 1375 1376 return (result); 1377 } 1378 1379 static void 1380 srpt_vmem_chunk_free(srpt_vmem_pool_t *vm_pool, srpt_mr_t *mr) 1381 { 1382 void *chunk = (void *)(uintptr_t)mr->mr_va; 1383 ib_memlen_t chunksize = mr->mr_len; 1384 1385 srpt_dereg_mem(vm_pool->svp_ioc, mr); 1386 kmem_free(chunk, chunksize); 1387 } 1388 1389 static srpt_mr_t * 1390 srpt_reg_mem(srpt_vmem_pool_t *vm_pool, ib_vaddr_t vaddr, ib_memlen_t len) 1391 { 1392 srpt_mr_t *result = NULL; 1393 ibt_mr_attr_t mr_attr; 1394 ibt_mr_desc_t mr_desc; 1395 ibt_status_t status; 1396 srpt_ioc_t *ioc = vm_pool->svp_ioc; 1397 1398 result = kmem_zalloc(sizeof (srpt_mr_t), KM_NOSLEEP); 1399 if (result == NULL) { 1400 SRPT_DPRINTF_L2("srpt_reg_mem: failed to allocate"); 1401 return (NULL); 1402 } 1403 1404 bzero(&mr_attr, sizeof (ibt_mr_attr_t)); 1405 bzero(&mr_desc, sizeof (ibt_mr_desc_t)); 1406 1407 mr_attr.mr_vaddr = vaddr; 1408 mr_attr.mr_len = len; 1409 mr_attr.mr_as = NULL; 1410 mr_attr.mr_flags = vm_pool->svp_flags; 1411 1412 status = ibt_register_mr(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl, 1413 &mr_attr, &result->mr_hdl, &mr_desc); 1414 if (status != IBT_SUCCESS) { 1415 SRPT_DPRINTF_L2("srpt_reg_mem: ibt_register_mr " 1416 "failed %d", status); 1417 kmem_free(result, sizeof (srpt_mr_t)); 1418 return (NULL); 1419 } 1420 1421 result->mr_va = mr_attr.mr_vaddr; 1422 result->mr_len = mr_attr.mr_len; 1423 result->mr_lkey = mr_desc.md_lkey; 1424 result->mr_rkey = mr_desc.md_rkey; 1425 1426 return (result); 1427 } 1428 1429 static void 1430 srpt_dereg_mem(srpt_ioc_t *ioc, srpt_mr_t *mr) 1431 { 1432 ibt_status_t status; 1433 1434 status = ibt_deregister_mr(ioc->ioc_ibt_hdl, mr->mr_hdl); 1435 if (status != IBT_SUCCESS) { 1436 SRPT_DPRINTF_L1("ioc_fini, error deregistering MR (%d)", 1437 status); 1438 } 1439 kmem_free(mr, sizeof (srpt_mr_t)); 1440 } 1441 1442 static int 1443 srpt_vmem_mr_compare(const void *a, const void *b) 1444 { 1445 srpt_mr_t *mr1 = (srpt_mr_t *)a; 1446 srpt_mr_t *mr2 = (srpt_mr_t *)b; 1447 1448 /* sort and match by virtual address */ 1449 if (mr1->mr_va < mr2->mr_va) { 1450 return (-1); 1451 } else if (mr1->mr_va > mr2->mr_va) { 1452 return (1); 1453 } 1454 1455 return (0); 1456 } 1457