1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * tavor_misc.c 29 * Tavor Miscellaneous routines - Address Handle, Multicast, Protection 30 * Domain, and port-related operations 31 * 32 * Implements all the routines necessary for allocating, freeing, querying 33 * and modifying Address Handles and Protection Domains. Also implements 34 * all the routines necessary for adding and removing Queue Pairs to/from 35 * Multicast Groups. Lastly, it implements the routines necessary for 36 * port-related query and modify operations. 37 */ 38 39 #include <sys/types.h> 40 #include <sys/conf.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/modctl.h> 44 #include <sys/bitmap.h> 45 #include <sys/sysmacros.h> 46 47 #include <sys/ib/adapters/tavor/tavor.h> 48 49 /* used for helping uniquify fmr pool taskq name */ 50 static uint_t tavor_debug_fmrpool_cnt = 0x00000000; 51 52 static void tavor_udav_sync(tavor_ahhdl_t ah, tavor_hw_udav_t *udav, 53 uint_t flag); 54 static int tavor_mcg_qplist_add(tavor_state_t *state, tavor_mcghdl_t mcg, 55 tavor_hw_mcg_qp_list_t *mcg_qplist, tavor_qphdl_t qp, uint_t *qp_found); 56 static int tavor_mcg_qplist_remove(tavor_mcghdl_t mcg, 57 tavor_hw_mcg_qp_list_t *mcg_qplist, tavor_qphdl_t qp); 58 static void tavor_qp_mcg_refcnt_inc(tavor_qphdl_t qp); 59 static void tavor_qp_mcg_refcnt_dec(tavor_qphdl_t qp); 60 static uint_t tavor_mcg_walk_mgid_hash(tavor_state_t *state, 61 uint64_t start_indx, ib_gid_t mgid, uint_t *prev_indx); 62 static void tavor_mcg_setup_new_hdr(tavor_mcghdl_t mcg, 63 tavor_hw_mcg_t *mcg_hdr, ib_gid_t mgid, tavor_rsrc_t *mcg_rsrc); 64 static int tavor_mcg_hash_list_remove(tavor_state_t *state, uint_t curr_indx, 65 uint_t prev_indx, tavor_hw_mcg_t *mcg_entry); 66 static int tavor_mcg_entry_invalidate(tavor_state_t *state, 67 tavor_hw_mcg_t *mcg_entry, uint_t indx); 68 static int tavor_mgid_is_valid(ib_gid_t gid); 69 static int tavor_mlid_is_valid(ib_lid_t lid); 70 static void tavor_fmr_processing(void *fmr_args); 71 static int tavor_fmr_cleanup(tavor_state_t *state, tavor_fmrhdl_t pool); 72 static void tavor_fmr_cache_init(tavor_fmrhdl_t fmr); 73 static void tavor_fmr_cache_fini(tavor_fmrhdl_t fmr); 74 static int tavor_fmr_avl_compare(const void *q, const void *e); 75 76 77 /* 78 * tavor_ah_alloc() 79 * Context: Can be called only from user or kernel context. 80 */ 81 int 82 tavor_ah_alloc(tavor_state_t *state, tavor_pdhdl_t pd, 83 ibt_adds_vect_t *attr_p, tavor_ahhdl_t *ahhdl, uint_t sleepflag) 84 { 85 tavor_rsrc_t *udav, *rsrc; 86 tavor_hw_udav_t udav_entry; 87 tavor_ahhdl_t ah; 88 ibt_mr_attr_t mr_attr; 89 tavor_mr_options_t op; 90 tavor_mrhdl_t mr; 91 uint64_t data; 92 uint32_t size; 93 int status, i, flag; 94 char *errormsg; 95 96 TAVOR_TNF_ENTER(tavor_ah_alloc); 97 98 /* 99 * Someday maybe the "ibt_adds_vect_t *attr_p" will be NULL to 100 * indicate that we wish to allocate an "invalid" (i.e. empty) 101 * address handle XXX 102 */ 103 104 /* Validate that specified port number is legal */ 105 if (!tavor_portnum_is_valid(state, attr_p->av_port_num)) { 106 /* Set "status" and "errormsg" and goto failure */ 107 TAVOR_TNF_FAIL(IBT_HCA_PORT_INVALID, "invalid port num"); 108 goto ahalloc_fail; 109 } 110 111 /* 112 * Allocate a UDAV entry. This will be filled in with all the 113 * necessary parameters to define the Address Handle. Unlike the 114 * other hardware resources no ownership transfer takes place as 115 * these UDAV entries are always owned by hardware. 116 */ 117 status = tavor_rsrc_alloc(state, TAVOR_UDAV, 1, sleepflag, &udav); 118 if (status != DDI_SUCCESS) { 119 /* Set "status" and "errormsg" and goto failure */ 120 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed UDAV"); 121 goto ahalloc_fail; 122 } 123 124 /* 125 * Allocate the software structure for tracking the address handle 126 * (i.e. the Tavor Address Handle struct). If we fail here, we must 127 * undo the previous resource allocation. 128 */ 129 status = tavor_rsrc_alloc(state, TAVOR_AHHDL, 1, sleepflag, &rsrc); 130 if (status != DDI_SUCCESS) { 131 /* Set "status" and "errormsg" and goto failure */ 132 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed AH handler"); 133 goto ahalloc_fail1; 134 } 135 ah = (tavor_ahhdl_t)rsrc->tr_addr; 136 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah)) 137 138 /* Increment the reference count on the protection domain (PD) */ 139 tavor_pd_refcnt_inc(pd); 140 141 /* 142 * Fill in the UDAV entry. Note: We are only filling in a temporary 143 * copy here, which we will later copy into the actual entry in 144 * Tavor DDR memory. This starts be zeroing out the temporary copy 145 * and then calling tavor_set_addr_path() to fill in the common 146 * portions that can be pulled from the "ibt_adds_vect_t" passed in 147 */ 148 bzero(&udav_entry, sizeof (tavor_hw_udav_t)); 149 status = tavor_set_addr_path(state, attr_p, 150 (tavor_hw_addr_path_t *)&udav_entry, TAVOR_ADDRPATH_UDAV, NULL); 151 if (status != DDI_SUCCESS) { 152 tavor_pd_refcnt_dec(pd); 153 tavor_rsrc_free(state, &rsrc); 154 tavor_rsrc_free(state, &udav); 155 /* Set "status" and "errormsg" and goto failure */ 156 TAVOR_TNF_FAIL(status, "failed in tavor_set_addr_path"); 157 goto ahalloc_fail; 158 } 159 udav_entry.pd = pd->pd_pdnum; 160 udav_entry.msg_sz = state->ts_cfg_profile->cp_max_mtu - 1; 161 162 /* 163 * Register the memory for the UDAV. The memory for the UDAV must 164 * be registered in the Tavor TPT tables. This gives us the LKey 165 * that we will need when we later post a UD work request that 166 * uses this address handle. 167 * We might be able to pre-register all the memory for the UDAV XXX 168 */ 169 flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP; 170 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)udav->tr_addr; 171 mr_attr.mr_len = udav->tr_len; 172 mr_attr.mr_as = NULL; 173 mr_attr.mr_flags = flag; 174 op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass; 175 op.mro_bind_dmahdl = NULL; 176 op.mro_bind_override_addr = 0; 177 status = tavor_mr_register(state, pd, &mr_attr, &mr, &op); 178 if (status != DDI_SUCCESS) { 179 /* Set "status" and "errormsg" and goto failure */ 180 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr"); 181 goto ahalloc_fail2; 182 } 183 184 /* 185 * Fill in the UDAV entry. Here we copy all the information from 186 * the temporary UDAV into the DDR memory for the real UDAV entry. 187 * Note that we copy everything but the first 64-bit word. This 188 * is where the PD number for the address handle resides. 189 * By filling everything except the PD and then writing the PD in 190 * a separate step below, we can ensure that the UDAV is not 191 * accessed while there are partially written values in it (something 192 * which really should not happen anyway). This is guaranteed 193 * because we take measures to ensure that the PD number is zero for 194 * all unused UDAV (and because PD#0 is reserved for Tavor). 195 */ 196 size = sizeof (tavor_hw_udav_t) >> 3; 197 for (i = 1; i < size; i++) { 198 data = ((uint64_t *)&udav_entry)[i]; 199 ddi_put64(udav->tr_acchdl, ((uint64_t *)udav->tr_addr + i), 200 data); 201 } 202 data = ((uint64_t *)&udav_entry)[0]; 203 ddi_put64(udav->tr_acchdl, (uint64_t *)udav->tr_addr, data); 204 205 /* 206 * Fill in the rest of the Tavor Address Handle struct. Having 207 * successfully copied the UDAV into the hardware, we update the 208 * following fields for use in further operations on the AH. 209 * 210 * NOTE: We are saving away a copy of the "av_dgid.gid_guid" field 211 * here because we may need to return it later to the IBTF (as a 212 * result of a subsequent query operation). Unlike the other UDAV 213 * parameters, the value of "av_dgid.gid_guid" is not always preserved 214 * by being written to hardware. The reason for this is described in 215 * tavor_set_addr_path(). 216 */ 217 ah->ah_udavrsrcp = udav; 218 ah->ah_rsrcp = rsrc; 219 ah->ah_pdhdl = pd; 220 ah->ah_mrhdl = mr; 221 ah->ah_save_guid = attr_p->av_dgid.gid_guid; 222 ah->ah_save_srate = attr_p->av_srate; 223 *ahhdl = ah; 224 225 /* Determine if later ddi_dma_sync will be necessary */ 226 ah->ah_sync = TAVOR_UDAV_IS_SYNC_REQ(state); 227 228 /* Sync the UDAV for use by the hardware */ 229 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV); 230 231 TAVOR_TNF_EXIT(tavor_ah_alloc); 232 return (DDI_SUCCESS); 233 234 ahalloc_fail2: 235 tavor_pd_refcnt_dec(pd); 236 tavor_rsrc_free(state, &rsrc); 237 ahalloc_fail1: 238 tavor_rsrc_free(state, &udav); 239 ahalloc_fail: 240 TNF_PROBE_1(tavor_ah_alloc_fail, TAVOR_TNF_ERROR, "", 241 tnf_string, msg, errormsg); 242 TAVOR_TNF_EXIT(tavor_ah_alloc); 243 return (status); 244 } 245 246 247 /* 248 * tavor_ah_free() 249 * Context: Can be called only from user or kernel context. 250 */ 251 /* ARGSUSED */ 252 int 253 tavor_ah_free(tavor_state_t *state, tavor_ahhdl_t *ahhdl, uint_t sleepflag) 254 { 255 tavor_rsrc_t *udav, *rsrc; 256 tavor_pdhdl_t pd; 257 tavor_mrhdl_t mr; 258 tavor_ahhdl_t ah; 259 int status; 260 261 TAVOR_TNF_ENTER(tavor_ah_free); 262 263 /* 264 * Pull all the necessary information from the Tavor Address Handle 265 * struct. This is necessary here because the resource for the 266 * AH is going to be freed up as part of this operation. 267 */ 268 ah = *ahhdl; 269 mutex_enter(&ah->ah_lock); 270 udav = ah->ah_udavrsrcp; 271 rsrc = ah->ah_rsrcp; 272 pd = ah->ah_pdhdl; 273 mr = ah->ah_mrhdl; 274 mutex_exit(&ah->ah_lock); 275 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah)) 276 277 /* 278 * Deregister the memory for the UDAV. If this fails for any reason, 279 * then it is an indication that something (either in HW or SW) has 280 * gone seriously wrong. So we print a warning message and return 281 * failure. 282 */ 283 status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL, 284 sleepflag); 285 if (status != DDI_SUCCESS) { 286 TNF_PROBE_0(tavor_ah_free_dereg_mr_fail, TAVOR_TNF_ERROR, ""); 287 TAVOR_TNF_EXIT(tavor_ah_free); 288 return (ibc_get_ci_failure(0)); 289 } 290 291 /* 292 * Write zero to the first 64-bit word in the UDAV entry. As 293 * described above (in tavor_ah_alloc), the PD number is stored in 294 * the first 64-bits of each UDAV and setting this to zero is 295 * guaranteed to invalidate the entry. 296 */ 297 ddi_put64(udav->tr_acchdl, (uint64_t *)udav->tr_addr, 0); 298 299 /* Sync the UDAV for use by the hardware */ 300 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV); 301 302 /* Decrement the reference count on the protection domain (PD) */ 303 tavor_pd_refcnt_dec(pd); 304 305 /* Free the Tavor Address Handle structure */ 306 tavor_rsrc_free(state, &rsrc); 307 308 /* Free up the UDAV entry resource */ 309 tavor_rsrc_free(state, &udav); 310 311 /* Set the ahhdl pointer to NULL and return success */ 312 *ahhdl = NULL; 313 314 TAVOR_TNF_EXIT(tavor_ah_free); 315 return (DDI_SUCCESS); 316 } 317 318 319 /* 320 * tavor_ah_query() 321 * Context: Can be called from interrupt or base context. 322 */ 323 /* ARGSUSED */ 324 int 325 tavor_ah_query(tavor_state_t *state, tavor_ahhdl_t ah, tavor_pdhdl_t *pd, 326 ibt_adds_vect_t *attr_p) 327 { 328 tavor_hw_udav_t udav_entry; 329 tavor_rsrc_t *udav; 330 uint64_t data; 331 uint32_t size; 332 int i; 333 334 TAVOR_TNF_ENTER(tavor_ah_query); 335 336 mutex_enter(&ah->ah_lock); 337 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p)) 338 339 /* 340 * Pull all the necessary information from the Tavor Address Handle 341 * structure 342 */ 343 udav = ah->ah_udavrsrcp; 344 *pd = ah->ah_pdhdl; 345 346 /* 347 * Copy the UDAV entry into the temporary copy. Here we copy all 348 * the information from the UDAV entry in DDR memory into the 349 * temporary UDAV. Note: We don't need to sync the UDAV for 350 * reading by software because Tavor HW never modifies the entry. 351 */ 352 size = sizeof (tavor_hw_udav_t) >> 3; 353 for (i = 0; i < size; i++) { 354 data = ddi_get64(udav->tr_acchdl, 355 ((uint64_t *)udav->tr_addr + i)); 356 ((uint64_t *)&udav_entry)[i] = data; 357 } 358 359 /* 360 * Fill in "ibt_adds_vect_t". We call tavor_get_addr_path() to fill 361 * the common portions that can be pulled from the UDAV we pass in. 362 * 363 * NOTE: We will also fill the "av_dgid.gid_guid" field from the 364 * "ah_save_guid" field we have previously saved away. The reason 365 * for this is described in tavor_ah_alloc() and tavor_ah_modify(). 366 */ 367 tavor_get_addr_path(state, (tavor_hw_addr_path_t *)&udav_entry, 368 attr_p, TAVOR_ADDRPATH_UDAV, NULL); 369 370 attr_p->av_dgid.gid_guid = ah->ah_save_guid; 371 attr_p->av_srate = ah->ah_save_srate; 372 373 mutex_exit(&ah->ah_lock); 374 TAVOR_TNF_EXIT(tavor_ah_query); 375 return (DDI_SUCCESS); 376 } 377 378 379 /* 380 * tavor_ah_modify() 381 * Context: Can be called from interrupt or base context. 382 */ 383 /* ARGSUSED */ 384 int 385 tavor_ah_modify(tavor_state_t *state, tavor_ahhdl_t ah, 386 ibt_adds_vect_t *attr_p) 387 { 388 tavor_hw_udav_t udav_entry; 389 tavor_rsrc_t *udav; 390 uint64_t data_new, data_old; 391 uint32_t udav_pd, size, portnum_new; 392 int i, status; 393 394 TAVOR_TNF_ENTER(tavor_ah_modify); 395 396 /* Validate that specified port number is legal */ 397 if (!tavor_portnum_is_valid(state, attr_p->av_port_num)) { 398 TNF_PROBE_1(tavor_ah_modify_inv_portnum, 399 TAVOR_TNF_ERROR, "", tnf_uint, port, attr_p->av_port_num); 400 TAVOR_TNF_EXIT(tavor_ah_modify); 401 return (IBT_HCA_PORT_INVALID); 402 } 403 404 mutex_enter(&ah->ah_lock); 405 406 /* 407 * Pull all the necessary information from the Tavor Address Handle 408 * structure 409 */ 410 udav = ah->ah_udavrsrcp; 411 412 /* 413 * Fill in the UDAV entry. Note: we are only filling in a temporary 414 * copy here, which we will later copy into the actual entry in 415 * Tavor DDR memory. This starts be zeroing out the temporary copy 416 * and then calling tavor_set_addr_path() to fill in the common 417 * portions that can be pulled from the "ibt_adds_vect_t" passed in 418 * 419 * NOTE: We also need to save away a copy of the "av_dgid.gid_guid" 420 * field here (just as we did during tavor_ah_alloc()) because we 421 * may need to return it later to the IBTF (as a result of a 422 * subsequent query operation). As explained in tavor_ah_alloc(), 423 * unlike the other UDAV parameters, the value of "av_dgid.gid_guid" 424 * is not always preserved by being written to hardware. The reason 425 * for this is described in tavor_set_addr_path(). 426 */ 427 bzero(&udav_entry, sizeof (tavor_hw_udav_t)); 428 status = tavor_set_addr_path(state, attr_p, 429 (tavor_hw_addr_path_t *)&udav_entry, TAVOR_ADDRPATH_UDAV, NULL); 430 if (status != DDI_SUCCESS) { 431 mutex_exit(&ah->ah_lock); 432 TNF_PROBE_0(tavor_ah_modify_setaddrpath_fail, 433 TAVOR_TNF_ERROR, ""); 434 TAVOR_TNF_EXIT(tavor_ah_modify); 435 return (status); 436 } 437 ah->ah_save_guid = attr_p->av_dgid.gid_guid; 438 ah->ah_save_srate = attr_p->av_srate; 439 440 /* 441 * Save away the current PD number for this UDAV. Then temporarily 442 * invalidate the entry (by setting the PD to zero). Note: Since 443 * the first 32 bits of the UDAV actually contain the current port 444 * number _and_ current PD number, we need to mask off some bits. 445 */ 446 udav_pd = ddi_get32(udav->tr_acchdl, (uint32_t *)udav->tr_addr); 447 udav_pd = udav_pd & 0xFFFFFF; 448 ddi_put32(udav->tr_acchdl, (uint32_t *)udav->tr_addr, 0); 449 450 /* Sync the UDAV for use by the hardware */ 451 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV); 452 453 /* 454 * Copy UDAV structure to the entry 455 * Note: We copy in 64-bit chunks. For the first two of these 456 * chunks it is necessary to read the current contents of the 457 * UDAV, mask off the modifiable portions (maintaining any 458 * of the "reserved" portions), and then mask on the new data. 459 */ 460 size = sizeof (tavor_hw_udav_t) >> 3; 461 for (i = 0; i < size; i++) { 462 data_new = ((uint64_t *)&udav_entry)[i]; 463 data_old = ddi_get64(udav->tr_acchdl, 464 ((uint64_t *)udav->tr_addr + i)); 465 466 /* 467 * Apply mask to change only the relevant values. Note: We 468 * extract the new portnum from the address handle here 469 * because the "PD" and "portnum" fields are in the same 470 * 32-bit word in the UDAV. We will use the (new) port 471 * number extracted here when we write the valid PD number 472 * in the last step below. 473 */ 474 if (i == 0) { 475 data_old = data_old & TAVOR_UDAV_MODIFY_MASK0; 476 portnum_new = data_new >> 56; 477 } else if (i == 1) { 478 data_old = data_old & TAVOR_UDAV_MODIFY_MASK1; 479 } else { 480 data_old = 0; 481 } 482 483 /* Write the updated values to the UDAV (in DDR) */ 484 data_new = data_old | data_new; 485 ddi_put64(udav->tr_acchdl, ((uint64_t *)udav->tr_addr + i), 486 data_new); 487 } 488 489 /* 490 * Sync the body of the UDAV for use by the hardware. After we 491 * have updated the PD number (to make the UDAV valid), we sync 492 * again to push the entire entry out for hardware access. 493 */ 494 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV); 495 496 /* 497 * Put the valid PD number back into UDAV entry. Note: Because port 498 * number and PD number are in the same word, we must mask the 499 * new port number with the old PD number before writing it back 500 * to the UDAV entry 501 */ 502 udav_pd = ((portnum_new << 24) | udav_pd); 503 ddi_put32(udav->tr_acchdl, (uint32_t *)udav->tr_addr, udav_pd); 504 505 /* Sync the rest of the UDAV for use by the hardware */ 506 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV); 507 508 mutex_exit(&ah->ah_lock); 509 TAVOR_TNF_EXIT(tavor_ah_modify); 510 return (DDI_SUCCESS); 511 } 512 513 514 /* 515 * tavor_udav_sync() 516 * Context: Can be called from interrupt or base context. 517 */ 518 /* ARGSUSED */ 519 static void 520 tavor_udav_sync(tavor_ahhdl_t ah, tavor_hw_udav_t *udav, uint_t flag) 521 { 522 ddi_dma_handle_t dmahdl; 523 off_t offset; 524 int status; 525 526 TAVOR_TNF_ENTER(tavor_udav_sync); 527 528 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah)) 529 530 /* Determine if AH needs to be synced or not */ 531 if (ah->ah_sync == 0) { 532 TAVOR_TNF_EXIT(tavor_udav_sync); 533 return; 534 } 535 536 /* Get the DMA handle from AH handle */ 537 dmahdl = ah->ah_mrhdl->mr_bindinfo.bi_dmahdl; 538 539 /* Calculate offset into address handle */ 540 offset = (off_t)0; 541 status = ddi_dma_sync(dmahdl, offset, sizeof (tavor_hw_udav_t), flag); 542 if (status != DDI_SUCCESS) { 543 TNF_PROBE_0(tavor_udav_sync_getnextentry_fail, 544 TAVOR_TNF_ERROR, ""); 545 TAVOR_TNF_EXIT(tavor_udav_sync); 546 return; 547 } 548 549 TAVOR_TNF_EXIT(tavor_udav_sync); 550 } 551 552 553 /* 554 * tavor_mcg_attach() 555 * Context: Can be called only from user or kernel context. 556 */ 557 int 558 tavor_mcg_attach(tavor_state_t *state, tavor_qphdl_t qp, ib_gid_t gid, 559 ib_lid_t lid) 560 { 561 tavor_rsrc_t *rsrc; 562 tavor_hw_mcg_t *mcg_entry; 563 tavor_hw_mcg_qp_list_t *mcg_entry_qplist; 564 tavor_mcghdl_t mcg, newmcg; 565 uint64_t mgid_hash; 566 uint32_t end_indx; 567 int status; 568 uint_t qp_found; 569 char *errormsg; 570 571 TAVOR_TNF_ENTER(tavor_mcg_attach); 572 573 /* 574 * It is only allowed to attach MCG to UD queue pairs. Verify 575 * that the intended QP is of the appropriate transport type 576 */ 577 if (qp->qp_serv_type != TAVOR_QP_UD) { 578 /* Set "status" and "errormsg" and goto failure */ 579 TAVOR_TNF_FAIL(IBT_QP_SRV_TYPE_INVALID, "invalid service type"); 580 goto mcgattach_fail; 581 } 582 583 /* 584 * Check for invalid Multicast DLID. Specifically, all Multicast 585 * LIDs should be within a well defined range. If the specified LID 586 * is outside of that range, then return an error. 587 */ 588 if (tavor_mlid_is_valid(lid) == 0) { 589 /* Set "status" and "errormsg" and goto failure */ 590 TAVOR_TNF_FAIL(IBT_MC_MLID_INVALID, "invalid MLID"); 591 goto mcgattach_fail; 592 } 593 /* 594 * Check for invalid Multicast GID. All Multicast GIDs should have 595 * a well-defined pattern of bits and flags that are allowable. If 596 * the specified GID does not meet the criteria, then return an error. 597 */ 598 if (tavor_mgid_is_valid(gid) == 0) { 599 /* Set "status" and "errormsg" and goto failure */ 600 TAVOR_TNF_FAIL(IBT_MC_MGID_INVALID, "invalid MGID"); 601 goto mcgattach_fail; 602 } 603 604 /* 605 * Compute the MGID hash value. Since the MCG table is arranged as 606 * a number of separate hash chains, this operation converts the 607 * specified MGID into the starting index of an entry in the hash 608 * table (i.e. the index for the start of the appropriate hash chain). 609 * Subsequent operations below will walk the chain searching for the 610 * right place to add this new QP. 611 */ 612 status = tavor_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid, 613 &mgid_hash, TAVOR_SLEEPFLAG_FOR_CONTEXT()); 614 if (status != TAVOR_CMD_SUCCESS) { 615 cmn_err(CE_CONT, "Tavor: MGID_HASH command failed: %08x\n", 616 status); 617 TNF_PROBE_1(tavor_mcg_attach_mgid_hash_cmd_fail, 618 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status); 619 TAVOR_TNF_EXIT(tavor_mcg_attach); 620 return (ibc_get_ci_failure(0)); 621 } 622 623 /* 624 * Grab the multicast group mutex. Then grab the pre-allocated 625 * temporary buffer used for holding and/or modifying MCG entries. 626 * Zero out the temporary MCG entry before we begin. 627 */ 628 mutex_enter(&state->ts_mcglock); 629 mcg_entry = state->ts_mcgtmp; 630 mcg_entry_qplist = TAVOR_MCG_GET_QPLIST_PTR(mcg_entry); 631 bzero(mcg_entry, TAVOR_MCGMEM_SZ(state)); 632 633 /* 634 * Walk through the array of MCG entries starting at "mgid_hash". 635 * Try to find the appropriate place for this new QP to be added. 636 * This could happen when the first entry of the chain has MGID == 0 637 * (which means that the hash chain is empty), or because we find 638 * an entry with the same MGID (in which case we'll add the QP to 639 * that MCG), or because we come to the end of the chain (in which 640 * case this is the first QP being added to the multicast group that 641 * corresponds to the MGID. The tavor_mcg_walk_mgid_hash() routine 642 * walks the list and returns an index into the MCG table. The entry 643 * at this index is then checked to determine which case we have 644 * fallen into (see below). Note: We are using the "shadow" MCG 645 * list (of tavor_mcg_t structs) for this lookup because the real 646 * MCG entries are in hardware (and the lookup process would be much 647 * more time consuming). 648 */ 649 end_indx = tavor_mcg_walk_mgid_hash(state, mgid_hash, gid, NULL); 650 mcg = &state->ts_mcghdl[end_indx]; 651 652 /* 653 * If MGID == 0, then the hash chain is empty. Just fill in the 654 * current entry. Note: No need to allocate an MCG table entry 655 * as all the hash chain "heads" are already preallocated. 656 */ 657 if ((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) { 658 659 /* Fill in the current entry in the "shadow" MCG list */ 660 tavor_mcg_setup_new_hdr(mcg, mcg_entry, gid, NULL); 661 662 /* 663 * Try to add the new QP number to the list. This (and the 664 * above) routine fills in a temporary MCG. The "mcg_entry" 665 * and "mcg_entry_qplist" pointers simply point to different 666 * offsets within the same temporary copy of the MCG (for 667 * convenience). Note: If this fails, we need to invalidate 668 * the entries we've already put into the "shadow" list entry 669 * above. 670 */ 671 status = tavor_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp, 672 &qp_found); 673 if (status != DDI_SUCCESS) { 674 bzero(mcg, sizeof (struct tavor_sw_mcg_list_s)); 675 mutex_exit(&state->ts_mcglock); 676 /* Set "status" and "errormsg" and goto failure */ 677 TAVOR_TNF_FAIL(status, "failed qplist add"); 678 goto mcgattach_fail; 679 } 680 681 /* 682 * Once the temporary MCG has been filled in, write the entry 683 * into the appropriate location in the Tavor MCG entry table. 684 * If it's successful, then drop the lock and return success. 685 * Note: In general, this operation shouldn't fail. If it 686 * does, then it is an indication that something (probably in 687 * HW, but maybe in SW) has gone seriously wrong. We still 688 * want to zero out the entries that we've filled in above 689 * (in the tavor_mcg_setup_new_hdr() routine). 690 */ 691 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx, 692 TAVOR_CMD_NOSLEEP_SPIN); 693 if (status != TAVOR_CMD_SUCCESS) { 694 bzero(mcg, sizeof (struct tavor_sw_mcg_list_s)); 695 mutex_exit(&state->ts_mcglock); 696 TAVOR_WARNING(state, "failed to write MCG entry"); 697 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: " 698 "%08x\n", status); 699 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail, 700 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 701 tnf_uint, indx, end_indx); 702 TAVOR_TNF_EXIT(tavor_mcg_attach); 703 return (ibc_get_ci_failure(0)); 704 } 705 706 /* 707 * Now that we know all the Tavor firmware accesses have been 708 * successful, we update the "shadow" MCG entry by incrementing 709 * the "number of attached QPs" count. 710 * 711 * We increment only if the QP is not already part of the 712 * MCG by checking the 'qp_found' flag returned from the 713 * qplist_add above. 714 */ 715 if (!qp_found) { 716 mcg->mcg_num_qps++; 717 718 /* 719 * Increment the refcnt for this QP. Because the QP 720 * was added to this MCG, the refcnt must be 721 * incremented. 722 */ 723 tavor_qp_mcg_refcnt_inc(qp); 724 } 725 726 /* 727 * We drop the lock and return success. 728 */ 729 mutex_exit(&state->ts_mcglock); 730 TAVOR_TNF_EXIT(tavor_mcg_attach); 731 return (DDI_SUCCESS); 732 } 733 734 /* 735 * If the specified MGID matches the MGID in the current entry, then 736 * we need to try to add the QP to the current MCG entry. In this 737 * case, it means that we need to read the existing MCG entry (into 738 * the temporary MCG), add the new QP number to the temporary entry 739 * (using the same method we used above), and write the entry back 740 * to the hardware (same as above). 741 */ 742 if ((mcg->mcg_mgid_h == gid.gid_prefix) && 743 (mcg->mcg_mgid_l == gid.gid_guid)) { 744 745 /* 746 * Read the current MCG entry into the temporary MCG. Note: 747 * In general, this operation shouldn't fail. If it does, 748 * then it is an indication that something (probably in HW, 749 * but maybe in SW) has gone seriously wrong. 750 */ 751 status = tavor_read_mgm_cmd_post(state, mcg_entry, end_indx, 752 TAVOR_CMD_NOSLEEP_SPIN); 753 if (status != TAVOR_CMD_SUCCESS) { 754 mutex_exit(&state->ts_mcglock); 755 TAVOR_WARNING(state, "failed to read MCG entry"); 756 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: " 757 "%08x\n", status); 758 TNF_PROBE_2(tavor_mcg_attach_read_mgm_cmd_fail, 759 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 760 tnf_uint, indx, end_indx); 761 TAVOR_TNF_EXIT(tavor_mcg_attach); 762 return (ibc_get_ci_failure(0)); 763 } 764 765 /* 766 * Try to add the new QP number to the list. This routine 767 * fills in the necessary pieces of the temporary MCG. The 768 * "mcg_entry_qplist" pointer is used to point to the portion 769 * of the temporary MCG that holds the QP numbers. 770 * 771 * Note: tavor_mcg_qplist_add() returns SUCCESS if it 772 * already found the QP in the list. In this case, the QP is 773 * not added on to the list again. Check the flag 'qp_found' 774 * if this value is needed to be known. 775 * 776 */ 777 status = tavor_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp, 778 &qp_found); 779 if (status != DDI_SUCCESS) { 780 mutex_exit(&state->ts_mcglock); 781 /* Set "status" and "errormsg" and goto failure */ 782 TAVOR_TNF_FAIL(status, "failed qplist add"); 783 goto mcgattach_fail; 784 } 785 786 /* 787 * Once the temporary MCG has been updated, write the entry 788 * into the appropriate location in the Tavor MCG entry table. 789 * If it's successful, then drop the lock and return success. 790 * Note: In general, this operation shouldn't fail. If it 791 * does, then it is an indication that something (probably in 792 * HW, but maybe in SW) has gone seriously wrong. 793 */ 794 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx, 795 TAVOR_CMD_NOSLEEP_SPIN); 796 if (status != TAVOR_CMD_SUCCESS) { 797 mutex_exit(&state->ts_mcglock); 798 TAVOR_WARNING(state, "failed to write MCG entry"); 799 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: " 800 "%08x\n", status); 801 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail, 802 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 803 tnf_uint, indx, end_indx); 804 TAVOR_TNF_EXIT(tavor_mcg_attach); 805 return (ibc_get_ci_failure(0)); 806 } 807 808 /* 809 * Now that we know all the Tavor firmware accesses have been 810 * successful, we update the current "shadow" MCG entry by 811 * incrementing the "number of attached QPs" count. 812 * 813 * We increment only if the QP is not already part of the 814 * MCG by checking the 'qp_found' flag returned from the 815 * qplist_add above. 816 */ 817 if (!qp_found) { 818 mcg->mcg_num_qps++; 819 820 /* 821 * Increment the refcnt for this QP. Because the QP 822 * was added to this MCG, the refcnt must be 823 * incremented. 824 */ 825 tavor_qp_mcg_refcnt_inc(qp); 826 } 827 828 /* 829 * We drop the lock and return success. 830 */ 831 mutex_exit(&state->ts_mcglock); 832 TAVOR_TNF_EXIT(tavor_mcg_attach); 833 return (DDI_SUCCESS); 834 } 835 836 /* 837 * If we've reached here, then we're at the end of the hash chain. 838 * We need to allocate a new MCG entry, fill it in, write it to Tavor, 839 * and update the previous entry to link the new one to the end of the 840 * chain. 841 */ 842 843 /* 844 * Allocate an MCG table entry. This will be filled in with all 845 * the necessary parameters to define the multicast group. Then it 846 * will be written to the hardware in the next-to-last step below. 847 */ 848 status = tavor_rsrc_alloc(state, TAVOR_MCG, 1, TAVOR_NOSLEEP, &rsrc); 849 if (status != DDI_SUCCESS) { 850 mutex_exit(&state->ts_mcglock); 851 /* Set "status" and "errormsg" and goto failure */ 852 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MCG"); 853 goto mcgattach_fail; 854 } 855 856 /* 857 * Fill in the new entry in the "shadow" MCG list. Note: Just as 858 * it does above, tavor_mcg_setup_new_hdr() also fills in a portion 859 * of the temporary MCG entry (the rest of which will be filled in by 860 * tavor_mcg_qplist_add() below) 861 */ 862 newmcg = &state->ts_mcghdl[rsrc->tr_indx]; 863 tavor_mcg_setup_new_hdr(newmcg, mcg_entry, gid, rsrc); 864 865 /* 866 * Try to add the new QP number to the list. This routine fills in 867 * the final necessary pieces of the temporary MCG. The 868 * "mcg_entry_qplist" pointer is used to point to the portion of the 869 * temporary MCG that holds the QP numbers. If we fail here, we 870 * must undo the previous resource allocation. 871 * 872 * Note: tavor_mcg_qplist_add() can we return SUCCESS if it already 873 * found the QP in the list. In this case, the QP is not added on to 874 * the list again. Check the flag 'qp_found' if this value is needed 875 * to be known. 876 */ 877 status = tavor_mcg_qplist_add(state, newmcg, mcg_entry_qplist, qp, 878 &qp_found); 879 if (status != DDI_SUCCESS) { 880 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s)); 881 tavor_rsrc_free(state, &rsrc); 882 mutex_exit(&state->ts_mcglock); 883 /* Set "status" and "errormsg" and goto failure */ 884 TAVOR_TNF_FAIL(status, "failed qplist add"); 885 goto mcgattach_fail; 886 } 887 888 /* 889 * Once the temporary MCG has been updated, write the entry into the 890 * appropriate location in the Tavor MCG entry table. If this is 891 * successful, then we need to chain the previous entry to this one. 892 * Note: In general, this operation shouldn't fail. If it does, then 893 * it is an indication that something (probably in HW, but maybe in 894 * SW) has gone seriously wrong. 895 */ 896 status = tavor_write_mgm_cmd_post(state, mcg_entry, rsrc->tr_indx, 897 TAVOR_CMD_NOSLEEP_SPIN); 898 if (status != TAVOR_CMD_SUCCESS) { 899 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s)); 900 tavor_rsrc_free(state, &rsrc); 901 mutex_exit(&state->ts_mcglock); 902 TAVOR_WARNING(state, "failed to write MCG entry"); 903 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n", 904 status); 905 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail, 906 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 907 tnf_uint, indx, rsrc->tr_indx); 908 TAVOR_TNF_EXIT(tavor_mcg_attach); 909 return (ibc_get_ci_failure(0)); 910 } 911 912 /* 913 * Now read the current MCG entry (the one previously at the end of 914 * hash chain) into the temporary MCG. We are going to update its 915 * "next_gid_indx" now and write the entry back to the MCG table. 916 * Note: In general, this operation shouldn't fail. If it does, then 917 * it is an indication that something (probably in HW, but maybe in SW) 918 * has gone seriously wrong. We will free up the MCG entry resource, 919 * but we will not undo the previously written MCG entry in the HW. 920 * This is OK, though, because the MCG entry is not currently attached 921 * to any hash chain. 922 */ 923 status = tavor_read_mgm_cmd_post(state, mcg_entry, end_indx, 924 TAVOR_CMD_NOSLEEP_SPIN); 925 if (status != TAVOR_CMD_SUCCESS) { 926 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s)); 927 tavor_rsrc_free(state, &rsrc); 928 mutex_exit(&state->ts_mcglock); 929 TAVOR_WARNING(state, "failed to read MCG entry"); 930 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: %08x\n", 931 status); 932 TNF_PROBE_2(tavor_mcg_attach_read_mgm_cmd_fail, 933 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 934 tnf_uint, indx, end_indx); 935 TAVOR_TNF_EXIT(tavor_mcg_attach); 936 return (ibc_get_ci_failure(0)); 937 } 938 939 /* 940 * Finally, we update the "next_gid_indx" field in the temporary MCG 941 * and attempt to write the entry back into the Tavor MCG table. If 942 * this succeeds, then we update the "shadow" list to reflect the 943 * change, drop the lock, and return success. Note: In general, this 944 * operation shouldn't fail. If it does, then it is an indication 945 * that something (probably in HW, but maybe in SW) has gone seriously 946 * wrong. Just as we do above, we will free up the MCG entry resource, 947 * but we will not try to undo the previously written MCG entry. This 948 * is OK, though, because (since we failed here to update the end of 949 * the chain) that other entry is not currently attached to any chain. 950 */ 951 mcg_entry->next_gid_indx = rsrc->tr_indx; 952 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx, 953 TAVOR_CMD_NOSLEEP_SPIN); 954 if (status != TAVOR_CMD_SUCCESS) { 955 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s)); 956 tavor_rsrc_free(state, &rsrc); 957 mutex_exit(&state->ts_mcglock); 958 TAVOR_WARNING(state, "failed to write MCG entry"); 959 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n", 960 status); 961 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail, 962 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 963 tnf_uint, indx, end_indx); 964 TAVOR_TNF_EXIT(tavor_mcg_attach); 965 return (ibc_get_ci_failure(0)); 966 } 967 mcg = &state->ts_mcghdl[end_indx]; 968 mcg->mcg_next_indx = rsrc->tr_indx; 969 970 /* 971 * Now that we know all the Tavor firmware accesses have been 972 * successful, we update the new "shadow" MCG entry by incrementing 973 * the "number of attached QPs" count. Then we drop the lock and 974 * return success. 975 */ 976 newmcg->mcg_num_qps++; 977 978 /* 979 * Increment the refcnt for this QP. Because the QP 980 * was added to this MCG, the refcnt must be 981 * incremented. 982 */ 983 tavor_qp_mcg_refcnt_inc(qp); 984 985 mutex_exit(&state->ts_mcglock); 986 TAVOR_TNF_EXIT(tavor_mcg_attach); 987 return (DDI_SUCCESS); 988 989 mcgattach_fail: 990 TNF_PROBE_1(tavor_mcg_attach_fail, TAVOR_TNF_ERROR, "", tnf_string, 991 msg, errormsg); 992 TAVOR_TNF_EXIT(tavor_mcg_attach); 993 return (status); 994 } 995 996 997 /* 998 * tavor_mcg_detach() 999 * Context: Can be called only from user or kernel context. 1000 */ 1001 int 1002 tavor_mcg_detach(tavor_state_t *state, tavor_qphdl_t qp, ib_gid_t gid, 1003 ib_lid_t lid) 1004 { 1005 tavor_hw_mcg_t *mcg_entry; 1006 tavor_hw_mcg_qp_list_t *mcg_entry_qplist; 1007 tavor_mcghdl_t mcg; 1008 uint64_t mgid_hash; 1009 uint32_t end_indx, prev_indx; 1010 int status; 1011 1012 TAVOR_TNF_ENTER(tavor_mcg_detach); 1013 1014 /* 1015 * Check for invalid Multicast DLID. Specifically, all Multicast 1016 * LIDs should be within a well defined range. If the specified LID 1017 * is outside of that range, then return an error. 1018 */ 1019 if (tavor_mlid_is_valid(lid) == 0) { 1020 TNF_PROBE_0(tavor_mcg_detach_invmlid_fail, TAVOR_TNF_ERROR, ""); 1021 TAVOR_TNF_EXIT(tavor_mcg_detach); 1022 return (IBT_MC_MLID_INVALID); 1023 } 1024 1025 /* 1026 * Compute the MGID hash value. As described above, the MCG table is 1027 * arranged as a number of separate hash chains. This operation 1028 * converts the specified MGID into the starting index of an entry in 1029 * the hash table (i.e. the index for the start of the appropriate 1030 * hash chain). Subsequent operations below will walk the chain 1031 * searching for a matching entry from which to attempt to remove 1032 * the specified QP. 1033 */ 1034 status = tavor_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid, 1035 &mgid_hash, TAVOR_SLEEPFLAG_FOR_CONTEXT()); 1036 if (status != TAVOR_CMD_SUCCESS) { 1037 cmn_err(CE_CONT, "Tavor: MGID_HASH command failed: %08x\n", 1038 status); 1039 TNF_PROBE_1(tavor_mcg_detach_mgid_hash_cmd_fail, 1040 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status); 1041 TAVOR_TNF_EXIT(tavor_mcg_attach); 1042 return (ibc_get_ci_failure(0)); 1043 } 1044 1045 /* 1046 * Grab the multicast group mutex. Then grab the pre-allocated 1047 * temporary buffer used for holding and/or modifying MCG entries. 1048 */ 1049 mutex_enter(&state->ts_mcglock); 1050 mcg_entry = state->ts_mcgtmp; 1051 mcg_entry_qplist = TAVOR_MCG_GET_QPLIST_PTR(mcg_entry); 1052 1053 /* 1054 * Walk through the array of MCG entries starting at "mgid_hash". 1055 * Try to find an MCG entry with a matching MGID. The 1056 * tavor_mcg_walk_mgid_hash() routine walks the list and returns an 1057 * index into the MCG table. The entry at this index is checked to 1058 * determine whether it is a match or not. If it is a match, then 1059 * we continue on to attempt to remove the QP from the MCG. If it 1060 * is not a match (or not a valid MCG entry), then we return an error. 1061 */ 1062 end_indx = tavor_mcg_walk_mgid_hash(state, mgid_hash, gid, &prev_indx); 1063 mcg = &state->ts_mcghdl[end_indx]; 1064 1065 /* 1066 * If MGID == 0 (the hash chain is empty) or if the specified MGID 1067 * does not match the MGID in the current entry, then return 1068 * IBT_MC_MGID_INVALID (to indicate that the specified MGID is not 1069 * valid). 1070 */ 1071 if (((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) || 1072 ((mcg->mcg_mgid_h != gid.gid_prefix) || 1073 (mcg->mcg_mgid_l != gid.gid_guid))) { 1074 mutex_exit(&state->ts_mcglock); 1075 TNF_PROBE_0(tavor_mcg_detach_invmgid_fail, TAVOR_TNF_ERROR, ""); 1076 TAVOR_TNF_EXIT(tavor_mcg_detach); 1077 return (IBT_MC_MGID_INVALID); 1078 } 1079 1080 /* 1081 * Read the current MCG entry into the temporary MCG. Note: In 1082 * general, this operation shouldn't fail. If it does, then it is 1083 * an indication that something (probably in HW, but maybe in SW) 1084 * has gone seriously wrong. 1085 */ 1086 status = tavor_read_mgm_cmd_post(state, mcg_entry, end_indx, 1087 TAVOR_CMD_NOSLEEP_SPIN); 1088 if (status != TAVOR_CMD_SUCCESS) { 1089 mutex_exit(&state->ts_mcglock); 1090 TAVOR_WARNING(state, "failed to read MCG entry"); 1091 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: %08x\n", 1092 status); 1093 TNF_PROBE_2(tavor_mcg_detach_read_mgm_cmd_fail, 1094 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 1095 tnf_uint, indx, end_indx); 1096 TAVOR_TNF_EXIT(tavor_mcg_attach); 1097 return (ibc_get_ci_failure(0)); 1098 } 1099 1100 /* 1101 * Search the QP number list for a match. If a match is found, then 1102 * remove the entry from the QP list. Otherwise, if no match is found, 1103 * return an error. 1104 */ 1105 status = tavor_mcg_qplist_remove(mcg, mcg_entry_qplist, qp); 1106 if (status != DDI_SUCCESS) { 1107 mutex_exit(&state->ts_mcglock); 1108 TAVOR_TNF_EXIT(tavor_mcg_detach); 1109 return (status); 1110 } 1111 1112 /* 1113 * Decrement the MCG count for this QP. When the 'qp_mcg' 1114 * field becomes 0, then this QP is no longer a member of any 1115 * MCG. 1116 */ 1117 tavor_qp_mcg_refcnt_dec(qp); 1118 1119 /* 1120 * If the current MCG's QP number list is about to be made empty 1121 * ("mcg_num_qps" == 1), then remove the entry itself from the hash 1122 * chain. Otherwise, just write the updated MCG entry back to the 1123 * hardware. In either case, once we successfully update the hardware 1124 * chain, then we decrement the "shadow" list entry's "mcg_num_qps" 1125 * count (or zero out the entire "shadow" list entry) before returning 1126 * success. Note: Zeroing out the "shadow" list entry is done 1127 * inside of tavor_mcg_hash_list_remove(). 1128 */ 1129 if (mcg->mcg_num_qps == 1) { 1130 1131 /* Remove an MCG entry from the hash chain */ 1132 status = tavor_mcg_hash_list_remove(state, end_indx, prev_indx, 1133 mcg_entry); 1134 if (status != DDI_SUCCESS) { 1135 mutex_exit(&state->ts_mcglock); 1136 TAVOR_TNF_EXIT(tavor_mcg_detach); 1137 return (status); 1138 } 1139 1140 } else { 1141 /* 1142 * Write the updated MCG entry back to the Tavor MCG table. 1143 * If this succeeds, then we update the "shadow" list to 1144 * reflect the change (i.e. decrement the "mcg_num_qps"), 1145 * drop the lock, and return success. Note: In general, 1146 * this operation shouldn't fail. If it does, then it is an 1147 * indication that something (probably in HW, but maybe in SW) 1148 * has gone seriously wrong. 1149 */ 1150 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx, 1151 TAVOR_CMD_NOSLEEP_SPIN); 1152 if (status != TAVOR_CMD_SUCCESS) { 1153 mutex_exit(&state->ts_mcglock); 1154 TAVOR_WARNING(state, "failed to write MCG entry"); 1155 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: " 1156 "%08x\n", status); 1157 TNF_PROBE_2(tavor_mcg_detach_write_mgm_cmd_fail, 1158 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 1159 tnf_uint, indx, end_indx); 1160 TAVOR_TNF_EXIT(tavor_mcg_detach); 1161 return (ibc_get_ci_failure(0)); 1162 } 1163 mcg->mcg_num_qps--; 1164 } 1165 1166 mutex_exit(&state->ts_mcglock); 1167 TAVOR_TNF_EXIT(tavor_mcg_detach); 1168 return (DDI_SUCCESS); 1169 } 1170 1171 /* 1172 * tavor_qp_mcg_refcnt_inc() 1173 * Context: Can be called from interrupt or base context. 1174 */ 1175 static void 1176 tavor_qp_mcg_refcnt_inc(tavor_qphdl_t qp) 1177 { 1178 /* Increment the QP's MCG reference count */ 1179 mutex_enter(&qp->qp_lock); 1180 qp->qp_mcg_refcnt++; 1181 TNF_PROBE_1_DEBUG(tavor_qp_mcg_refcnt_inc, TAVOR_TNF_TRACE, "", 1182 tnf_uint, refcnt, qp->qp_mcg_refcnt); 1183 mutex_exit(&qp->qp_lock); 1184 } 1185 1186 1187 /* 1188 * tavor_qp_mcg_refcnt_dec() 1189 * Context: Can be called from interrupt or base context. 1190 */ 1191 static void 1192 tavor_qp_mcg_refcnt_dec(tavor_qphdl_t qp) 1193 { 1194 /* Decrement the QP's MCG reference count */ 1195 mutex_enter(&qp->qp_lock); 1196 qp->qp_mcg_refcnt--; 1197 TNF_PROBE_1_DEBUG(tavor_qp_mcg_refcnt_dec, TAVOR_TNF_TRACE, "", 1198 tnf_uint, refcnt, qp->qp_mcg_refcnt); 1199 mutex_exit(&qp->qp_lock); 1200 } 1201 1202 1203 /* 1204 * tavor_mcg_qplist_add() 1205 * Context: Can be called from interrupt or base context. 1206 */ 1207 static int 1208 tavor_mcg_qplist_add(tavor_state_t *state, tavor_mcghdl_t mcg, 1209 tavor_hw_mcg_qp_list_t *mcg_qplist, tavor_qphdl_t qp, 1210 uint_t *qp_found) 1211 { 1212 uint_t qplist_indx; 1213 1214 TAVOR_TNF_ENTER(tavor_mcg_qplist_add); 1215 1216 ASSERT(MUTEX_HELD(&state->ts_mcglock)); 1217 1218 qplist_indx = mcg->mcg_num_qps; 1219 1220 /* 1221 * Determine if we have exceeded the maximum number of QP per 1222 * multicast group. If we have, then return an error 1223 */ 1224 if (qplist_indx >= state->ts_cfg_profile->cp_num_qp_per_mcg) { 1225 TNF_PROBE_0(tavor_mcg_qplist_add_too_many_qps, 1226 TAVOR_TNF_ERROR, ""); 1227 TAVOR_TNF_EXIT(tavor_mcg_qplist_add); 1228 return (IBT_HCA_MCG_QP_EXCEEDED); 1229 } 1230 1231 /* 1232 * Determine if the QP is already attached to this MCG table. If it 1233 * is, then we break out and treat this operation as a NO-OP 1234 */ 1235 for (qplist_indx = 0; qplist_indx < mcg->mcg_num_qps; 1236 qplist_indx++) { 1237 if (mcg_qplist[qplist_indx].qpn == qp->qp_qpnum) { 1238 break; 1239 } 1240 } 1241 1242 /* 1243 * If the QP was already on the list, set 'qp_found' to TRUE. We still 1244 * return SUCCESS in this case, but the qplist will not have been 1245 * updated because the QP was already on the list. 1246 */ 1247 if (qplist_indx < mcg->mcg_num_qps) { 1248 *qp_found = 1; 1249 } else { 1250 /* 1251 * Otherwise, append the new QP number to the end of the 1252 * current QP list. Note: We will increment the "mcg_num_qps" 1253 * field on the "shadow" MCG list entry later (after we know 1254 * that all necessary Tavor firmware accesses have been 1255 * successful). 1256 * 1257 * Set 'qp_found' to 0 so we know the QP was added on to the 1258 * list for sure. 1259 */ 1260 mcg_qplist[qplist_indx].q = TAVOR_MCG_QPN_VALID; 1261 mcg_qplist[qplist_indx].qpn = qp->qp_qpnum; 1262 *qp_found = 0; 1263 } 1264 1265 TAVOR_TNF_EXIT(tavor_mcg_qplist_add); 1266 return (DDI_SUCCESS); 1267 } 1268 1269 1270 1271 /* 1272 * tavor_mcg_qplist_remove() 1273 * Context: Can be called from interrupt or base context. 1274 */ 1275 static int 1276 tavor_mcg_qplist_remove(tavor_mcghdl_t mcg, tavor_hw_mcg_qp_list_t *mcg_qplist, 1277 tavor_qphdl_t qp) 1278 { 1279 uint_t i, qplist_indx; 1280 1281 TAVOR_TNF_ENTER(tavor_mcg_qplist_remove); 1282 1283 /* 1284 * Search the MCG QP list for a matching QPN. When 1285 * it's found, we swap the last entry with the current 1286 * one, set the last entry to zero, decrement the last 1287 * entry, and return. If it's not found, then it's 1288 * and error. 1289 */ 1290 qplist_indx = mcg->mcg_num_qps; 1291 for (i = 0; i < qplist_indx; i++) { 1292 if (mcg_qplist[i].qpn == qp->qp_qpnum) { 1293 mcg_qplist[i] = mcg_qplist[qplist_indx - 1]; 1294 mcg_qplist[qplist_indx - 1].q = TAVOR_MCG_QPN_INVALID; 1295 mcg_qplist[qplist_indx - 1].qpn = 0; 1296 1297 TAVOR_TNF_EXIT(tavor_mcg_qplist_remove); 1298 return (DDI_SUCCESS); 1299 } 1300 } 1301 1302 TNF_PROBE_0(tavor_mcg_qplist_remove_invqphdl_fail, TAVOR_TNF_ERROR, ""); 1303 TAVOR_TNF_EXIT(tavor_mcg_qplist_remove); 1304 return (IBT_QP_HDL_INVALID); 1305 } 1306 1307 1308 /* 1309 * tavor_mcg_walk_mgid_hash() 1310 * Context: Can be called from interrupt or base context. 1311 */ 1312 static uint_t 1313 tavor_mcg_walk_mgid_hash(tavor_state_t *state, uint64_t start_indx, 1314 ib_gid_t mgid, uint_t *p_indx) 1315 { 1316 tavor_mcghdl_t curr_mcghdl; 1317 uint_t curr_indx, prev_indx; 1318 1319 TAVOR_TNF_ENTER(tavor_mcg_walk_mgid_hash); 1320 1321 ASSERT(MUTEX_HELD(&state->ts_mcglock)); 1322 1323 /* Start at the head of the hash chain */ 1324 curr_indx = start_indx; 1325 prev_indx = curr_indx; 1326 curr_mcghdl = &state->ts_mcghdl[curr_indx]; 1327 1328 /* If the first entry in the chain has MGID == 0, then stop */ 1329 if ((curr_mcghdl->mcg_mgid_h == 0) && 1330 (curr_mcghdl->mcg_mgid_l == 0)) { 1331 goto end_mgid_hash_walk; 1332 } 1333 1334 /* If the first entry in the chain matches the MGID, then stop */ 1335 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) && 1336 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) { 1337 goto end_mgid_hash_walk; 1338 } 1339 1340 /* Otherwise, walk the hash chain looking for a match */ 1341 while (curr_mcghdl->mcg_next_indx != 0) { 1342 prev_indx = curr_indx; 1343 curr_indx = curr_mcghdl->mcg_next_indx; 1344 curr_mcghdl = &state->ts_mcghdl[curr_indx]; 1345 1346 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) && 1347 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) { 1348 break; 1349 } 1350 } 1351 1352 end_mgid_hash_walk: 1353 /* 1354 * If necessary, return the index of the previous entry too. This 1355 * is primarily used for detaching a QP from a multicast group. It 1356 * may be necessary, in that case, to delete an MCG entry from the 1357 * hash chain and having the index of the previous entry is helpful. 1358 */ 1359 if (p_indx != NULL) { 1360 *p_indx = prev_indx; 1361 } 1362 TAVOR_TNF_EXIT(tavor_mcg_walk_mgid_hash); 1363 return (curr_indx); 1364 } 1365 1366 1367 /* 1368 * tavor_mcg_setup_new_hdr() 1369 * Context: Can be called from interrupt or base context. 1370 */ 1371 static void 1372 tavor_mcg_setup_new_hdr(tavor_mcghdl_t mcg, tavor_hw_mcg_t *mcg_hdr, 1373 ib_gid_t mgid, tavor_rsrc_t *mcg_rsrc) 1374 { 1375 TAVOR_TNF_ENTER(tavor_mcg_setup_new_hdr); 1376 1377 /* 1378 * Fill in the fields of the "shadow" entry used by software 1379 * to track MCG hardware entry 1380 */ 1381 mcg->mcg_mgid_h = mgid.gid_prefix; 1382 mcg->mcg_mgid_l = mgid.gid_guid; 1383 mcg->mcg_rsrcp = mcg_rsrc; 1384 mcg->mcg_next_indx = 0; 1385 mcg->mcg_num_qps = 0; 1386 1387 /* 1388 * Fill the header fields of the MCG entry (in the temporary copy) 1389 */ 1390 mcg_hdr->mgid_h = mgid.gid_prefix; 1391 mcg_hdr->mgid_l = mgid.gid_guid; 1392 mcg_hdr->next_gid_indx = 0; 1393 1394 TAVOR_TNF_EXIT(tavor_mcg_setup_new_hdr); 1395 } 1396 1397 1398 /* 1399 * tavor_mcg_hash_list_remove() 1400 * Context: Can be called only from user or kernel context. 1401 */ 1402 static int 1403 tavor_mcg_hash_list_remove(tavor_state_t *state, uint_t curr_indx, 1404 uint_t prev_indx, tavor_hw_mcg_t *mcg_entry) 1405 { 1406 tavor_mcghdl_t curr_mcg, prev_mcg, next_mcg; 1407 uint_t next_indx; 1408 int status; 1409 1410 /* Get the pointer to "shadow" list for current entry */ 1411 curr_mcg = &state->ts_mcghdl[curr_indx]; 1412 1413 /* 1414 * If this is the first entry on a hash chain, then attempt to replace 1415 * the entry with the next entry on the chain. If there are no 1416 * subsequent entries on the chain, then this is the only entry and 1417 * should be invalidated. 1418 */ 1419 if (curr_indx == prev_indx) { 1420 1421 /* 1422 * If this is the only entry on the chain, then invalidate it. 1423 * Note: Invalidating an MCG entry means writing all zeros 1424 * to the entry. This is only necessary for those MCG 1425 * entries that are the "head" entries of the individual hash 1426 * chains. Regardless of whether this operation returns 1427 * success or failure, return that result to the caller. 1428 */ 1429 next_indx = curr_mcg->mcg_next_indx; 1430 if (next_indx == 0) { 1431 status = tavor_mcg_entry_invalidate(state, mcg_entry, 1432 curr_indx); 1433 bzero(curr_mcg, sizeof (struct tavor_sw_mcg_list_s)); 1434 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove); 1435 return (status); 1436 } 1437 1438 /* 1439 * Otherwise, this is just the first entry on the chain, so 1440 * grab the next one 1441 */ 1442 next_mcg = &state->ts_mcghdl[next_indx]; 1443 1444 /* 1445 * Read the next MCG entry into the temporary MCG. Note: 1446 * In general, this operation shouldn't fail. If it does, 1447 * then it is an indication that something (probably in HW, 1448 * but maybe in SW) has gone seriously wrong. 1449 */ 1450 status = tavor_read_mgm_cmd_post(state, mcg_entry, next_indx, 1451 TAVOR_CMD_NOSLEEP_SPIN); 1452 if (status != TAVOR_CMD_SUCCESS) { 1453 TAVOR_WARNING(state, "failed to read MCG entry"); 1454 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: " 1455 "%08x\n", status); 1456 TNF_PROBE_2(tavor_mcg_hash_list_rem_read_mgm_cmd_fail, 1457 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 1458 tnf_uint, indx, next_indx); 1459 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove); 1460 return (ibc_get_ci_failure(0)); 1461 } 1462 1463 /* 1464 * Copy/Write the temporary MCG back to the hardware MCG list 1465 * using the current index. This essentially removes the 1466 * current MCG entry from the list by writing over it with 1467 * the next one. If this is successful, then we can do the 1468 * same operation for the "shadow" list. And we can also 1469 * free up the Tavor MCG entry resource that was associated 1470 * with the (old) next entry. Note: In general, this 1471 * operation shouldn't fail. If it does, then it is an 1472 * indication that something (probably in HW, but maybe in SW) 1473 * has gone seriously wrong. 1474 */ 1475 status = tavor_write_mgm_cmd_post(state, mcg_entry, curr_indx, 1476 TAVOR_CMD_NOSLEEP_SPIN); 1477 if (status != TAVOR_CMD_SUCCESS) { 1478 TAVOR_WARNING(state, "failed to write MCG entry"); 1479 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: " 1480 "%08x\n", status); 1481 TNF_PROBE_2(tavor_mcg_hash_list_rem_write_mgm_cmd_fail, 1482 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 1483 tnf_uint, indx, curr_indx); 1484 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove); 1485 return (ibc_get_ci_failure(0)); 1486 } 1487 1488 /* 1489 * Copy all the software tracking information from the next 1490 * entry on the "shadow" MCG list into the current entry on 1491 * the list. Then invalidate (zero out) the other "shadow" 1492 * list entry. 1493 */ 1494 bcopy(next_mcg, curr_mcg, sizeof (struct tavor_sw_mcg_list_s)); 1495 bzero(next_mcg, sizeof (struct tavor_sw_mcg_list_s)); 1496 1497 /* 1498 * Free up the Tavor MCG entry resource used by the "next" 1499 * MCG entry. That resource is no longer needed by any 1500 * MCG entry which is first on a hash chain (like the "next" 1501 * entry has just become). 1502 */ 1503 tavor_rsrc_free(state, &curr_mcg->mcg_rsrcp); 1504 1505 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove); 1506 return (DDI_SUCCESS); 1507 } 1508 1509 /* 1510 * Else if this is the last entry on the hash chain (or a middle 1511 * entry, then we update the previous entry's "next_gid_index" field 1512 * to make it point instead to the next entry on the chain. By 1513 * skipping over the removed entry in this way, we can then free up 1514 * any resources associated with the current entry. Note: We don't 1515 * need to invalidate the "skipped over" hardware entry because it 1516 * will no be longer connected to any hash chains, and if/when it is 1517 * finally re-used, it will be written with entirely new values. 1518 */ 1519 1520 /* 1521 * Read the next MCG entry into the temporary MCG. Note: In general, 1522 * this operation shouldn't fail. If it does, then it is an 1523 * indication that something (probably in HW, but maybe in SW) has 1524 * gone seriously wrong. 1525 */ 1526 status = tavor_read_mgm_cmd_post(state, mcg_entry, prev_indx, 1527 TAVOR_CMD_NOSLEEP_SPIN); 1528 if (status != TAVOR_CMD_SUCCESS) { 1529 TAVOR_WARNING(state, "failed to read MCG entry"); 1530 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: %08x\n", 1531 status); 1532 TNF_PROBE_2(tavor_mcg_hash_list_rem_read_mgm_cmd_fail, 1533 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 1534 tnf_uint, indx, prev_indx); 1535 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove); 1536 return (ibc_get_ci_failure(0)); 1537 } 1538 1539 /* 1540 * Finally, we update the "next_gid_indx" field in the temporary MCG 1541 * and attempt to write the entry back into the Tavor MCG table. If 1542 * this succeeds, then we update the "shadow" list to reflect the 1543 * change, free up the Tavor MCG entry resource that was associated 1544 * with the current entry, and return success. Note: In general, 1545 * this operation shouldn't fail. If it does, then it is an indication 1546 * that something (probably in HW, but maybe in SW) has gone seriously 1547 * wrong. 1548 */ 1549 mcg_entry->next_gid_indx = curr_mcg->mcg_next_indx; 1550 status = tavor_write_mgm_cmd_post(state, mcg_entry, prev_indx, 1551 TAVOR_CMD_NOSLEEP_SPIN); 1552 if (status != TAVOR_CMD_SUCCESS) { 1553 TAVOR_WARNING(state, "failed to write MCG entry"); 1554 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n", 1555 status); 1556 TNF_PROBE_2(tavor_mcg_hash_list_rem_write_mgm_cmd_fail, 1557 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 1558 tnf_uint, indx, prev_indx); 1559 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove); 1560 return (ibc_get_ci_failure(0)); 1561 } 1562 1563 /* 1564 * Get the pointer to the "shadow" MCG list entry for the previous 1565 * MCG. Update its "mcg_next_indx" to point to the next entry 1566 * the one after the current entry. Note: This next index may be 1567 * zero, indicating the end of the list. 1568 */ 1569 prev_mcg = &state->ts_mcghdl[prev_indx]; 1570 prev_mcg->mcg_next_indx = curr_mcg->mcg_next_indx; 1571 1572 /* 1573 * Free up the Tavor MCG entry resource used by the current entry. 1574 * This resource is no longer needed because the chain now skips over 1575 * the current entry. Then invalidate (zero out) the current "shadow" 1576 * list entry. 1577 */ 1578 tavor_rsrc_free(state, &curr_mcg->mcg_rsrcp); 1579 bzero(curr_mcg, sizeof (struct tavor_sw_mcg_list_s)); 1580 1581 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove); 1582 return (DDI_SUCCESS); 1583 } 1584 1585 1586 /* 1587 * tavor_mcg_entry_invalidate() 1588 * Context: Can be called only from user or kernel context. 1589 */ 1590 static int 1591 tavor_mcg_entry_invalidate(tavor_state_t *state, tavor_hw_mcg_t *mcg_entry, 1592 uint_t indx) 1593 { 1594 int status; 1595 1596 TAVOR_TNF_ENTER(tavor_mcg_entry_invalidate); 1597 1598 /* 1599 * Invalidate the hardware MCG entry by zeroing out this temporary 1600 * MCG and writing it the the hardware. Note: In general, this 1601 * operation shouldn't fail. If it does, then it is an indication 1602 * that something (probably in HW, but maybe in SW) has gone seriously 1603 * wrong. 1604 */ 1605 bzero(mcg_entry, TAVOR_MCGMEM_SZ(state)); 1606 status = tavor_write_mgm_cmd_post(state, mcg_entry, indx, 1607 TAVOR_CMD_NOSLEEP_SPIN); 1608 if (status != TAVOR_CMD_SUCCESS) { 1609 TAVOR_WARNING(state, "failed to write MCG entry"); 1610 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n", 1611 status); 1612 TNF_PROBE_2(tavor_mcg_entry_invalidate_write_mgm_cmd_fail, 1613 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 1614 tnf_uint, indx, indx); 1615 TAVOR_TNF_EXIT(tavor_mcg_entry_invalidate); 1616 return (ibc_get_ci_failure(0)); 1617 } 1618 1619 TAVOR_TNF_EXIT(tavor_mcg_entry_invalidate); 1620 return (DDI_SUCCESS); 1621 } 1622 1623 1624 /* 1625 * tavor_mgid_is_valid() 1626 * Context: Can be called from interrupt or base context. 1627 */ 1628 static int 1629 tavor_mgid_is_valid(ib_gid_t gid) 1630 { 1631 uint_t topbits, flags, scope; 1632 1633 TAVOR_TNF_ENTER(tavor_mgid_is_valid); 1634 1635 /* 1636 * According to IBA 1.1 specification (section 4.1.1) a valid 1637 * "multicast GID" must have its top eight bits set to all ones 1638 */ 1639 topbits = (gid.gid_prefix >> TAVOR_MCG_TOPBITS_SHIFT) & 1640 TAVOR_MCG_TOPBITS_MASK; 1641 if (topbits != TAVOR_MCG_TOPBITS) { 1642 TNF_PROBE_0(tavor_mgid_is_valid_invbits_fail, TAVOR_TNF_ERROR, 1643 ""); 1644 TAVOR_TNF_EXIT(tavor_mgid_is_valid); 1645 return (0); 1646 } 1647 1648 /* 1649 * The next 4 bits are the "flag" bits. These are valid only 1650 * if they are "0" (which correspond to permanently assigned/ 1651 * "well-known" multicast GIDs) or "1" (for so-called "transient" 1652 * multicast GIDs). All other values are reserved. 1653 */ 1654 flags = (gid.gid_prefix >> TAVOR_MCG_FLAGS_SHIFT) & 1655 TAVOR_MCG_FLAGS_MASK; 1656 if (!((flags == TAVOR_MCG_FLAGS_PERM) || 1657 (flags == TAVOR_MCG_FLAGS_NONPERM))) { 1658 TNF_PROBE_1(tavor_mgid_is_valid_invflags_fail, TAVOR_TNF_ERROR, 1659 "", tnf_uint, flags, flags); 1660 TAVOR_TNF_EXIT(tavor_mgid_is_valid); 1661 return (0); 1662 } 1663 1664 /* 1665 * The next 4 bits are the "scope" bits. These are valid only 1666 * if they are "2" (Link-local), "5" (Site-local), "8" 1667 * (Organization-local) or "E" (Global). All other values 1668 * are reserved (or currently unassigned). 1669 */ 1670 scope = (gid.gid_prefix >> TAVOR_MCG_SCOPE_SHIFT) & 1671 TAVOR_MCG_SCOPE_MASK; 1672 if (!((scope == TAVOR_MCG_SCOPE_LINKLOC) || 1673 (scope == TAVOR_MCG_SCOPE_SITELOC) || 1674 (scope == TAVOR_MCG_SCOPE_ORGLOC) || 1675 (scope == TAVOR_MCG_SCOPE_GLOBAL))) { 1676 TNF_PROBE_1(tavor_mgid_is_valid_invscope_fail, TAVOR_TNF_ERROR, 1677 "", tnf_uint, scope, scope); 1678 TAVOR_TNF_EXIT(tavor_mgid_is_valid); 1679 return (0); 1680 } 1681 1682 /* 1683 * If it passes all of the above checks, then we will consider it 1684 * a valid multicast GID. 1685 */ 1686 TAVOR_TNF_EXIT(tavor_mgid_is_valid); 1687 return (1); 1688 } 1689 1690 1691 /* 1692 * tavor_mlid_is_valid() 1693 * Context: Can be called from interrupt or base context. 1694 */ 1695 static int 1696 tavor_mlid_is_valid(ib_lid_t lid) 1697 { 1698 TAVOR_TNF_ENTER(tavor_mlid_is_valid); 1699 1700 /* 1701 * According to IBA 1.1 specification (section 4.1.1) a valid 1702 * "multicast DLID" must be between 0xC000 and 0xFFFE. 1703 */ 1704 if ((lid < IB_LID_MC_FIRST) || (lid > IB_LID_MC_LAST)) { 1705 TNF_PROBE_1(tavor_mlid_is_valid_invdlid_fail, TAVOR_TNF_ERROR, 1706 "", tnf_uint, mlid, lid); 1707 TAVOR_TNF_EXIT(tavor_mlid_is_valid); 1708 return (0); 1709 } 1710 1711 TAVOR_TNF_EXIT(tavor_mlid_is_valid); 1712 return (1); 1713 } 1714 1715 1716 /* 1717 * tavor_pd_alloc() 1718 * Context: Can be called only from user or kernel context. 1719 */ 1720 int 1721 tavor_pd_alloc(tavor_state_t *state, tavor_pdhdl_t *pdhdl, uint_t sleepflag) 1722 { 1723 tavor_rsrc_t *rsrc; 1724 tavor_pdhdl_t pd; 1725 int status; 1726 1727 TAVOR_TNF_ENTER(tavor_pd_alloc); 1728 1729 /* 1730 * Allocate the software structure for tracking the protection domain 1731 * (i.e. the Tavor Protection Domain handle). By default each PD 1732 * structure will have a unique PD number assigned to it. All that 1733 * is necessary is for software to initialize the PD reference count 1734 * (to zero) and return success. 1735 */ 1736 status = tavor_rsrc_alloc(state, TAVOR_PDHDL, 1, sleepflag, &rsrc); 1737 if (status != DDI_SUCCESS) { 1738 TNF_PROBE_0(tavor_pd_alloc_rsrcalloc_fail, TAVOR_TNF_ERROR, ""); 1739 TAVOR_TNF_EXIT(tavor_pd_alloc); 1740 return (IBT_INSUFF_RESOURCE); 1741 } 1742 pd = (tavor_pdhdl_t)rsrc->tr_addr; 1743 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd)) 1744 1745 pd->pd_refcnt = 0; 1746 *pdhdl = pd; 1747 1748 TAVOR_TNF_EXIT(tavor_pd_alloc); 1749 return (DDI_SUCCESS); 1750 } 1751 1752 1753 /* 1754 * tavor_pd_free() 1755 * Context: Can be called only from user or kernel context. 1756 */ 1757 int 1758 tavor_pd_free(tavor_state_t *state, tavor_pdhdl_t *pdhdl) 1759 { 1760 tavor_rsrc_t *rsrc; 1761 tavor_pdhdl_t pd; 1762 1763 TAVOR_TNF_ENTER(tavor_pd_free); 1764 1765 /* 1766 * Pull all the necessary information from the Tavor Protection Domain 1767 * handle. This is necessary here because the resource for the 1768 * PD is going to be freed up as part of this operation. 1769 */ 1770 pd = *pdhdl; 1771 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd)) 1772 rsrc = pd->pd_rsrcp; 1773 1774 /* 1775 * Check the PD reference count. If the reference count is non-zero, 1776 * then it means that this protection domain is still referenced by 1777 * some memory region, queue pair, address handle, or other IB object 1778 * If it is non-zero, then return an error. Otherwise, free the 1779 * Tavor resource and return success. 1780 */ 1781 if (pd->pd_refcnt != 0) { 1782 TNF_PROBE_1(tavor_pd_free_refcnt_fail, TAVOR_TNF_ERROR, "", 1783 tnf_int, refcnt, pd->pd_refcnt); 1784 TAVOR_TNF_EXIT(tavor_pd_free); 1785 return (IBT_PD_IN_USE); 1786 } 1787 1788 /* Free the Tavor Protection Domain handle */ 1789 tavor_rsrc_free(state, &rsrc); 1790 1791 /* Set the pdhdl pointer to NULL and return success */ 1792 *pdhdl = (tavor_pdhdl_t)NULL; 1793 1794 TAVOR_TNF_EXIT(tavor_pd_free); 1795 return (DDI_SUCCESS); 1796 } 1797 1798 1799 /* 1800 * tavor_pd_refcnt_inc() 1801 * Context: Can be called from interrupt or base context. 1802 */ 1803 void 1804 tavor_pd_refcnt_inc(tavor_pdhdl_t pd) 1805 { 1806 /* Increment the protection domain's reference count */ 1807 mutex_enter(&pd->pd_lock); 1808 TNF_PROBE_1_DEBUG(tavor_pd_refcnt_inc, TAVOR_TNF_TRACE, "", 1809 tnf_uint, refcnt, pd->pd_refcnt); 1810 pd->pd_refcnt++; 1811 mutex_exit(&pd->pd_lock); 1812 1813 } 1814 1815 1816 /* 1817 * tavor_pd_refcnt_dec() 1818 * Context: Can be called from interrupt or base context. 1819 */ 1820 void 1821 tavor_pd_refcnt_dec(tavor_pdhdl_t pd) 1822 { 1823 /* Decrement the protection domain's reference count */ 1824 mutex_enter(&pd->pd_lock); 1825 pd->pd_refcnt--; 1826 TNF_PROBE_1_DEBUG(tavor_pd_refcnt_dec, TAVOR_TNF_TRACE, "", 1827 tnf_uint, refcnt, pd->pd_refcnt); 1828 mutex_exit(&pd->pd_lock); 1829 1830 } 1831 1832 1833 /* 1834 * tavor_port_query() 1835 * Context: Can be called only from user or kernel context. 1836 */ 1837 int 1838 tavor_port_query(tavor_state_t *state, uint_t port, ibt_hca_portinfo_t *pi) 1839 { 1840 sm_portinfo_t portinfo; 1841 sm_guidinfo_t guidinfo; 1842 sm_pkey_table_t pkeytable; 1843 ib_gid_t *sgid; 1844 uint_t sgid_max, pkey_max, tbl_size; 1845 int i, j, indx, status; 1846 1847 TAVOR_TNF_ENTER(tavor_port_query); 1848 1849 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pi)) 1850 1851 /* Validate that specified port number is legal */ 1852 if (!tavor_portnum_is_valid(state, port)) { 1853 TNF_PROBE_1(tavor_port_query_inv_portnum_fail, 1854 TAVOR_TNF_ERROR, "", tnf_uint, port, port); 1855 TAVOR_TNF_EXIT(tavor_port_query); 1856 return (IBT_HCA_PORT_INVALID); 1857 } 1858 1859 /* 1860 * We use the Tavor MAD_IFC command to post a GetPortInfo MAD 1861 * to the firmware (for the specified port number). This returns 1862 * a full PortInfo MAD (in "portinfo") which we subsequently 1863 * parse to fill in the "ibt_hca_portinfo_t" structure returned 1864 * to the IBTF. 1865 */ 1866 status = tavor_getportinfo_cmd_post(state, port, 1867 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &portinfo); 1868 if (status != TAVOR_CMD_SUCCESS) { 1869 cmn_err(CE_CONT, "Tavor: GetPortInfo (port %02d) command " 1870 "failed: %08x\n", port, status); 1871 TNF_PROBE_1(tavor_port_query_getportinfo_cmd_fail, 1872 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status); 1873 TAVOR_TNF_EXIT(tavor_port_query); 1874 return (ibc_get_ci_failure(0)); 1875 } 1876 1877 /* 1878 * Parse the PortInfo MAD and fill in the IBTF structure 1879 */ 1880 pi->p_base_lid = portinfo.LID; 1881 pi->p_qkey_violations = portinfo.Q_KeyViolations; 1882 pi->p_pkey_violations = portinfo.P_KeyViolations; 1883 pi->p_sm_sl = portinfo.MasterSMSL; 1884 pi->p_sm_lid = portinfo.MasterSMLID; 1885 pi->p_linkstate = portinfo.PortState; 1886 pi->p_port_num = portinfo.LocalPortNum; 1887 pi->p_phys_state = portinfo.PortPhysicalState; 1888 pi->p_width_supported = portinfo.LinkWidthSupported; 1889 pi->p_width_enabled = portinfo.LinkWidthEnabled; 1890 pi->p_width_active = portinfo.LinkWidthActive; 1891 pi->p_speed_supported = portinfo.LinkSpeedSupported; 1892 pi->p_speed_enabled = portinfo.LinkSpeedEnabled; 1893 pi->p_speed_active = portinfo.LinkSpeedActive; 1894 pi->p_mtu = portinfo.MTUCap; 1895 pi->p_lmc = portinfo.LMC; 1896 pi->p_max_vl = portinfo.VLCap; 1897 pi->p_subnet_timeout = portinfo.SubnetTimeOut; 1898 pi->p_msg_sz = ((uint32_t)1 << TAVOR_QP_LOG_MAX_MSGSZ); 1899 tbl_size = state->ts_cfg_profile->cp_log_max_gidtbl; 1900 pi->p_sgid_tbl_sz = (1 << tbl_size); 1901 tbl_size = state->ts_cfg_profile->cp_log_max_pkeytbl; 1902 pi->p_pkey_tbl_sz = (1 << tbl_size); 1903 1904 /* 1905 * Convert InfiniBand-defined port capability flags to the format 1906 * specified by the IBTF 1907 */ 1908 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM) 1909 pi->p_capabilities |= IBT_PORT_CAP_SM; 1910 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM_DISABLED) 1911 pi->p_capabilities |= IBT_PORT_CAP_SM_DISABLED; 1912 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SNMP_SUPPD) 1913 pi->p_capabilities |= IBT_PORT_CAP_SNMP_TUNNEL; 1914 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_DM_SUPPD) 1915 pi->p_capabilities |= IBT_PORT_CAP_DM; 1916 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_VM_SUPPD) 1917 pi->p_capabilities |= IBT_PORT_CAP_VENDOR; 1918 1919 /* 1920 * Fill in the SGID table. Since the only access to the Tavor 1921 * GID tables is through the firmware's MAD_IFC interface, we 1922 * post as many GetGUIDInfo MADs as necessary to read in the entire 1923 * contents of the SGID table (for the specified port). Note: The 1924 * GetGUIDInfo command only gets eight GUIDs per operation. These 1925 * GUIDs are then appended to the GID prefix for the port (from the 1926 * GetPortInfo above) to form the entire SGID table. 1927 */ 1928 for (i = 0; i < pi->p_sgid_tbl_sz; i += 8) { 1929 status = tavor_getguidinfo_cmd_post(state, port, i >> 3, 1930 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &guidinfo); 1931 if (status != TAVOR_CMD_SUCCESS) { 1932 cmn_err(CE_CONT, "Tavor: GetGUIDInfo (port %02d) " 1933 "command failed: %08x\n", port, status); 1934 TNF_PROBE_1(tavor_port_query_getguidinfo_cmd_fail, 1935 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status); 1936 TAVOR_TNF_EXIT(tavor_port_query); 1937 return (ibc_get_ci_failure(0)); 1938 } 1939 1940 /* Figure out how many of the entries are valid */ 1941 sgid_max = min((pi->p_sgid_tbl_sz - i), 8); 1942 for (j = 0; j < sgid_max; j++) { 1943 indx = (i + j); 1944 sgid = &pi->p_sgid_tbl[indx]; 1945 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sgid)) 1946 sgid->gid_prefix = portinfo.GidPrefix; 1947 sgid->gid_guid = guidinfo.GUIDBlocks[j]; 1948 } 1949 } 1950 1951 /* 1952 * Fill in the PKey table. Just as for the GID tables above, the 1953 * only access to the Tavor PKey tables is through the firmware's 1954 * MAD_IFC interface. We post as many GetPKeyTable MADs as necessary 1955 * to read in the entire contents of the PKey table (for the specified 1956 * port). Note: The GetPKeyTable command only gets 32 PKeys per 1957 * operation. 1958 */ 1959 for (i = 0; i < pi->p_pkey_tbl_sz; i += 32) { 1960 status = tavor_getpkeytable_cmd_post(state, port, i, 1961 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &pkeytable); 1962 if (status != TAVOR_CMD_SUCCESS) { 1963 cmn_err(CE_CONT, "Tavor: GetPKeyTable (port %02d) " 1964 "command failed: %08x\n", port, status); 1965 TNF_PROBE_1(tavor_port_query_getpkeytable_cmd_fail, 1966 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status); 1967 TAVOR_TNF_EXIT(tavor_port_query); 1968 return (ibc_get_ci_failure(0)); 1969 } 1970 1971 /* Figure out how many of the entries are valid */ 1972 pkey_max = min((pi->p_pkey_tbl_sz - i), 32); 1973 for (j = 0; j < pkey_max; j++) { 1974 indx = (i + j); 1975 pi->p_pkey_tbl[indx] = pkeytable.P_KeyTableBlocks[j]; 1976 } 1977 } 1978 1979 TAVOR_TNF_EXIT(tavor_port_query); 1980 return (DDI_SUCCESS); 1981 } 1982 1983 1984 /* 1985 * tavor_port_modify() 1986 * Context: Can be called only from user or kernel context. 1987 */ 1988 /* ARGSUSED */ 1989 int 1990 tavor_port_modify(tavor_state_t *state, uint8_t port, 1991 ibt_port_modify_flags_t flags, uint8_t init_type) 1992 { 1993 sm_portinfo_t portinfo; 1994 uint32_t capmask, reset_qkey; 1995 int status; 1996 1997 TAVOR_TNF_ENTER(tavor_port_modify); 1998 1999 /* 2000 * Return an error if either of the unsupported flags are set 2001 */ 2002 if ((flags & IBT_PORT_SHUTDOWN) || 2003 (flags & IBT_PORT_SET_INIT_TYPE)) { 2004 TNF_PROBE_1(tavor_port_modify_inv_flags_fail, 2005 TAVOR_TNF_ERROR, "", tnf_uint, flags, flags); 2006 TAVOR_TNF_EXIT(tavor_port_modify); 2007 return (IBT_NOT_SUPPORTED); 2008 } 2009 2010 /* 2011 * Determine whether we are trying to reset the QKey counter 2012 */ 2013 reset_qkey = (flags & IBT_PORT_RESET_QKEY) ? 1 : 0; 2014 2015 /* Validate that specified port number is legal */ 2016 if (!tavor_portnum_is_valid(state, port)) { 2017 TNF_PROBE_1(tavor_port_modify_inv_portnum_fail, 2018 TAVOR_TNF_ERROR, "", tnf_uint, port, port); 2019 TAVOR_TNF_EXIT(tavor_port_modify); 2020 return (IBT_HCA_PORT_INVALID); 2021 } 2022 2023 /* 2024 * Use the Tavor MAD_IFC command to post a GetPortInfo MAD to the 2025 * firmware (for the specified port number). This returns a full 2026 * PortInfo MAD (in "portinfo") from which we pull the current 2027 * capability mask. We then modify the capability mask as directed 2028 * by the "pmod_flags" field, and write the updated capability mask 2029 * using the Tavor SET_IB command (below). 2030 */ 2031 status = tavor_getportinfo_cmd_post(state, port, 2032 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &portinfo); 2033 if (status != TAVOR_CMD_SUCCESS) { 2034 TNF_PROBE_1(tavor_port_modify_getportinfo_cmd_fail, 2035 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status); 2036 TAVOR_TNF_EXIT(tavor_port_modify); 2037 return (ibc_get_ci_failure(0)); 2038 } 2039 2040 /* 2041 * Convert InfiniBand-defined port capability flags to the format 2042 * specified by the IBTF. Specifically, we modify the capability 2043 * mask based on the specified values. 2044 */ 2045 capmask = portinfo.CapabilityMask; 2046 2047 if (flags & IBT_PORT_RESET_SM) 2048 capmask &= ~SM_CAP_MASK_IS_SM; 2049 else if (flags & IBT_PORT_SET_SM) 2050 capmask |= SM_CAP_MASK_IS_SM; 2051 2052 if (flags & IBT_PORT_RESET_SNMP) 2053 capmask &= ~SM_CAP_MASK_IS_SNMP_SUPPD; 2054 else if (flags & IBT_PORT_SET_SNMP) 2055 capmask |= SM_CAP_MASK_IS_SNMP_SUPPD; 2056 2057 if (flags & IBT_PORT_RESET_DEVMGT) 2058 capmask &= ~SM_CAP_MASK_IS_DM_SUPPD; 2059 else if (flags & IBT_PORT_SET_DEVMGT) 2060 capmask |= SM_CAP_MASK_IS_DM_SUPPD; 2061 2062 if (flags & IBT_PORT_RESET_VENDOR) 2063 capmask &= ~SM_CAP_MASK_IS_VM_SUPPD; 2064 else if (flags & IBT_PORT_SET_VENDOR) 2065 capmask |= SM_CAP_MASK_IS_VM_SUPPD; 2066 2067 /* 2068 * Use the Tavor SET_IB command to update the capability mask and 2069 * (possibly) reset the QKey violation counter for the specified port. 2070 * Note: In general, this operation shouldn't fail. If it does, then 2071 * it is an indication that something (probably in HW, but maybe in 2072 * SW) has gone seriously wrong. 2073 */ 2074 status = tavor_set_ib_cmd_post(state, capmask, port, reset_qkey, 2075 TAVOR_SLEEPFLAG_FOR_CONTEXT()); 2076 if (status != TAVOR_CMD_SUCCESS) { 2077 TAVOR_WARNING(state, "failed to modify port capabilities"); 2078 cmn_err(CE_CONT, "Tavor: SET_IB (port %02d) command failed: " 2079 "%08x\n", port, status); 2080 TNF_PROBE_1(tavor_port_modify_set_ib_cmd_fail, 2081 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status); 2082 TAVOR_TNF_EXIT(tavor_port_modify); 2083 return (ibc_get_ci_failure(0)); 2084 } 2085 2086 TAVOR_TNF_EXIT(tavor_port_modify); 2087 return (DDI_SUCCESS); 2088 } 2089 2090 2091 /* 2092 * tavor_set_addr_path() 2093 * Context: Can be called from interrupt or base context. 2094 * 2095 * Note: This routine is used for two purposes. It is used to fill in the 2096 * Tavor UDAV fields, and it is used to fill in the address path information 2097 * for QPs. Because the two Tavor structures are similar, common fields can 2098 * be filled in here. Because they are slightly different, however, we pass 2099 * an additional flag to indicate which type is being filled. 2100 */ 2101 int 2102 tavor_set_addr_path(tavor_state_t *state, ibt_adds_vect_t *av, 2103 tavor_hw_addr_path_t *path, uint_t type, tavor_qphdl_t qp) 2104 { 2105 uint_t gidtbl_sz; 2106 2107 TAVOR_TNF_ENTER(tavor_set_addr_path); 2108 2109 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av)) 2110 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path)) 2111 2112 path->ml_path = av->av_src_path; 2113 path->rlid = av->av_dlid; 2114 path->sl = av->av_srvl; 2115 2116 /* Port number only valid (in "av_port_num") if this is a UDAV */ 2117 if (type == TAVOR_ADDRPATH_UDAV) { 2118 path->portnum = av->av_port_num; 2119 } 2120 2121 /* 2122 * Validate (and fill in) static rate. 2123 * 2124 * The stat_rate_sup is used to decide how to set the rate and 2125 * if it is zero, the driver uses the old interface. 2126 */ 2127 if (state->ts_devlim.stat_rate_sup) { 2128 if (av->av_srate == IBT_SRATE_20) { 2129 path->max_stat_rate = 0; /* 4x@DDR injection rate */ 2130 } else if (av->av_srate == IBT_SRATE_5) { 2131 path->max_stat_rate = 3; /* 1x@DDR injection rate */ 2132 } else if (av->av_srate == IBT_SRATE_10) { 2133 path->max_stat_rate = 2; /* 4x@SDR injection rate */ 2134 } else if (av->av_srate == IBT_SRATE_2) { 2135 path->max_stat_rate = 1; /* 1x@SDR injection rate */ 2136 } else if (av->av_srate == IBT_SRATE_NOT_SPECIFIED) { 2137 path->max_stat_rate = 0; /* Max */ 2138 } else { 2139 TNF_PROBE_1(tavor_set_addr_path_inv_srate_fail, 2140 TAVOR_TNF_ERROR, "", tnf_uint, srate, av->av_srate); 2141 TAVOR_TNF_EXIT(tavor_set_addr_path); 2142 return (IBT_STATIC_RATE_INVALID); 2143 } 2144 } else { 2145 if (av->av_srate == IBT_SRATE_10) { 2146 path->max_stat_rate = 0; /* 4x@SDR injection rate */ 2147 } else if (av->av_srate == IBT_SRATE_2) { 2148 path->max_stat_rate = 1; /* 1x@SDR injection rate */ 2149 } else if (av->av_srate == IBT_SRATE_NOT_SPECIFIED) { 2150 path->max_stat_rate = 0; /* Max */ 2151 } else { 2152 TNF_PROBE_1(tavor_set_addr_path_inv_srate_fail, 2153 TAVOR_TNF_ERROR, "", tnf_uint, srate, av->av_srate); 2154 TAVOR_TNF_EXIT(tavor_set_addr_path); 2155 return (IBT_STATIC_RATE_INVALID); 2156 } 2157 } 2158 2159 /* 2160 * If this is a QP operation save asoft copy. 2161 */ 2162 if (qp) { 2163 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(qp->qp_save_srate)) 2164 qp->qp_save_srate = av->av_srate; 2165 } 2166 2167 /* If "grh" flag is set, then check for valid SGID index too */ 2168 gidtbl_sz = (1 << state->ts_devlim.log_max_gid); 2169 if ((av->av_send_grh) && (av->av_sgid_ix > gidtbl_sz)) { 2170 TNF_PROBE_1(tavor_set_addr_path_inv_sgid_ix_fail, 2171 TAVOR_TNF_ERROR, "", tnf_uint, sgid_ix, av->av_sgid_ix); 2172 TAVOR_TNF_EXIT(tavor_set_addr_path); 2173 return (IBT_SGID_INVALID); 2174 } 2175 2176 /* 2177 * Fill in all "global" values regardless of the value in the GRH 2178 * flag. Because "grh" is not set unless "av_send_grh" is set, the 2179 * hardware will ignore the other "global" values as necessary. Note: 2180 * SW does this here to enable later query operations to return 2181 * exactly the same params that were passed when the addr path was 2182 * last written. 2183 */ 2184 path->grh = av->av_send_grh; 2185 if (type == TAVOR_ADDRPATH_QP) { 2186 path->mgid_index = av->av_sgid_ix; 2187 } else { 2188 /* 2189 * For Tavor UDAV, the "mgid_index" field is the index into 2190 * a combined table (not a per-port table). So some extra 2191 * calculations are necessary. 2192 */ 2193 path->mgid_index = ((av->av_port_num - 1) * gidtbl_sz) + 2194 av->av_sgid_ix; 2195 } 2196 path->flow_label = av->av_flow; 2197 path->tclass = av->av_tclass; 2198 path->hop_limit = av->av_hop; 2199 path->rgid_h = av->av_dgid.gid_prefix; 2200 2201 /* 2202 * According to Tavor PRM, the (31:0) part of rgid_l must be set to 2203 * "0x2" if the 'grh' or 'g' bit is cleared. It also says that we 2204 * only need to do it for UDAV's. So we enforce that here. 2205 * 2206 * NOTE: The entire 64 bits worth of GUID info is actually being 2207 * preserved (for UDAVs) by the callers of this function 2208 * (tavor_ah_alloc() and tavor_ah_modify()) and as long as the 2209 * 'grh' bit is not set, the upper 32 bits (63:32) of rgid_l are 2210 * "don't care". 2211 */ 2212 if ((path->grh) || (type == TAVOR_ADDRPATH_QP)) { 2213 path->rgid_l = av->av_dgid.gid_guid; 2214 } else { 2215 path->rgid_l = 0x2; 2216 } 2217 2218 TAVOR_TNF_EXIT(tavor_set_addr_path); 2219 return (DDI_SUCCESS); 2220 } 2221 2222 2223 /* 2224 * tavor_get_addr_path() 2225 * Context: Can be called from interrupt or base context. 2226 * 2227 * Note: Just like tavor_set_addr_path() above, this routine is used for two 2228 * purposes. It is used to read in the Tavor UDAV fields, and it is used to 2229 * read in the address path information for QPs. Because the two Tavor 2230 * structures are similar, common fields can be read in here. But because 2231 * they are slightly different, we pass an additional flag to indicate which 2232 * type is being read. 2233 */ 2234 void 2235 tavor_get_addr_path(tavor_state_t *state, tavor_hw_addr_path_t *path, 2236 ibt_adds_vect_t *av, uint_t type, tavor_qphdl_t qp) 2237 { 2238 uint_t gidtbl_sz; 2239 2240 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path)) 2241 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av)) 2242 2243 av->av_src_path = path->ml_path; 2244 av->av_port_num = path->portnum; 2245 av->av_dlid = path->rlid; 2246 av->av_srvl = path->sl; 2247 2248 /* 2249 * Set "av_ipd" value from max_stat_rate. 2250 */ 2251 if (qp) { 2252 /* 2253 * If a QP operation use the soft copy 2254 */ 2255 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(qp->qp_save_srate)) 2256 av->av_srate = qp->qp_save_srate; 2257 } else { 2258 /* 2259 * The stat_rate_sup is used to decide how the srate value is 2260 * set and 2261 * if it is zero, the driver uses the old interface. 2262 */ 2263 if (state->ts_devlim.stat_rate_sup) { 2264 if (path->max_stat_rate == 0) { 2265 av->av_srate = IBT_SRATE_20; /* 4x@DDR rate */ 2266 } else if (path->max_stat_rate == 1) { 2267 av->av_srate = IBT_SRATE_2; /* 1x@SDR rate */ 2268 } else if (path->max_stat_rate == 2) { 2269 av->av_srate = IBT_SRATE_10; /* 4x@SDR rate */ 2270 } else if (path->max_stat_rate == 3) { 2271 av->av_srate = IBT_SRATE_5; /* 1xDDR rate */ 2272 } 2273 } else { 2274 if (path->max_stat_rate == 0) { 2275 av->av_srate = IBT_SRATE_10; /* 4x@SDR rate */ 2276 } else if (path->max_stat_rate == 1) { 2277 av->av_srate = IBT_SRATE_2; /* 1x@SDR rate */ 2278 } 2279 } 2280 } 2281 2282 /* 2283 * Extract all "global" values regardless of the value in the GRH 2284 * flag. Because "av_send_grh" is set only if "grh" is set, software 2285 * knows to ignore the other "global" values as necessary. Note: SW 2286 * does it this way to enable these query operations to return exactly 2287 * the same params that were passed when the addr path was last written. 2288 */ 2289 av->av_send_grh = path->grh; 2290 if (type == TAVOR_ADDRPATH_QP) { 2291 av->av_sgid_ix = path->mgid_index; 2292 } else { 2293 /* 2294 * For Tavor UDAV, the "mgid_index" field is the index into 2295 * a combined table (not a per-port table). So some extra 2296 * calculations are necessary. 2297 */ 2298 gidtbl_sz = (1 << state->ts_devlim.log_max_gid); 2299 av->av_sgid_ix = path->mgid_index - ((av->av_port_num - 1) * 2300 gidtbl_sz); 2301 } 2302 av->av_flow = path->flow_label; 2303 av->av_tclass = path->tclass; 2304 av->av_hop = path->hop_limit; 2305 av->av_dgid.gid_prefix = path->rgid_h; 2306 av->av_dgid.gid_guid = path->rgid_l; 2307 } 2308 2309 2310 /* 2311 * tavor_portnum_is_valid() 2312 * Context: Can be called from interrupt or base context. 2313 */ 2314 int 2315 tavor_portnum_is_valid(tavor_state_t *state, uint_t portnum) 2316 { 2317 uint_t max_port; 2318 2319 max_port = state->ts_cfg_profile->cp_num_ports; 2320 if ((portnum <= max_port) && (portnum != 0)) { 2321 return (1); 2322 } else { 2323 return (0); 2324 } 2325 } 2326 2327 2328 /* 2329 * tavor_pkeyindex_is_valid() 2330 * Context: Can be called from interrupt or base context. 2331 */ 2332 int 2333 tavor_pkeyindex_is_valid(tavor_state_t *state, uint_t pkeyindx) 2334 { 2335 uint_t max_pkeyindx; 2336 2337 max_pkeyindx = 1 << state->ts_cfg_profile->cp_log_max_pkeytbl; 2338 if (pkeyindx < max_pkeyindx) { 2339 return (1); 2340 } else { 2341 return (0); 2342 } 2343 } 2344 2345 2346 /* 2347 * tavor_queue_alloc() 2348 * Context: Can be called from interrupt or base context. 2349 */ 2350 int 2351 tavor_queue_alloc(tavor_state_t *state, tavor_qalloc_info_t *qa_info, 2352 uint_t sleepflag) 2353 { 2354 ddi_dma_attr_t dma_attr; 2355 int (*callback)(caddr_t); 2356 uint64_t realsize, alloc_mask; 2357 uint_t dma_xfer_mode, type; 2358 int flag, status; 2359 2360 TAVOR_TNF_ENTER(tavor_queue_alloc); 2361 2362 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info)) 2363 2364 /* Set the callback flag appropriately */ 2365 callback = (sleepflag == TAVOR_SLEEP) ? DDI_DMA_SLEEP : 2366 DDI_DMA_DONTWAIT; 2367 2368 /* 2369 * Initialize many of the default DMA attributes. Then set additional 2370 * alignment restrictions as necessary for the queue memory. Also 2371 * respect the configured value for IOMMU bypass 2372 */ 2373 tavor_dma_attr_init(&dma_attr); 2374 dma_attr.dma_attr_align = qa_info->qa_bind_align; 2375 type = state->ts_cfg_profile->cp_iommu_bypass; 2376 if (type == TAVOR_BINDMEM_BYPASS) { 2377 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 2378 } 2379 2380 /* Allocate a DMA handle */ 2381 status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr, callback, NULL, 2382 &qa_info->qa_dmahdl); 2383 if (status != DDI_SUCCESS) { 2384 TNF_PROBE_0(tavor_queue_alloc_dmahdl_fail, TAVOR_TNF_ERROR, ""); 2385 TAVOR_TNF_EXIT(tavor_queue_alloc); 2386 return (DDI_FAILURE); 2387 } 2388 2389 /* 2390 * Determine the amount of memory to allocate, depending on the values 2391 * in "qa_bind_align" and "qa_alloc_align". The problem we are trying 2392 * to solve here is that allocating a DMA handle with IOMMU bypass 2393 * (DDI_DMA_FORCE_PHYSICAL) constrains us to only requesting alignments 2394 * that are less than the page size. Since we may need stricter 2395 * alignments on the memory allocated by ddi_dma_mem_alloc() (e.g. in 2396 * Tavor QP work queue memory allocation), we use the following method 2397 * to calculate how much additional memory to request, and we enforce 2398 * our own alignment on the allocated result. 2399 */ 2400 alloc_mask = qa_info->qa_alloc_align - 1; 2401 if (qa_info->qa_bind_align == qa_info->qa_alloc_align) { 2402 realsize = qa_info->qa_size; 2403 } else { 2404 realsize = qa_info->qa_size + alloc_mask; 2405 } 2406 2407 /* 2408 * If we are to allocate the queue from system memory, then use 2409 * ddi_dma_mem_alloc() to find the space. Otherwise, if we are to 2410 * allocate the queue from locally-attached DDR memory, then use the 2411 * vmem allocator to find the space. In either case, return a pointer 2412 * to the memory range allocated (including any necessary alignment 2413 * adjustments), the "real" memory pointer, the "real" size, and a 2414 * ddi_acc_handle_t to use when reading from/writing to the memory. 2415 */ 2416 if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_NORMAL) { 2417 2418 /* 2419 * Determine whether to map STREAMING or CONSISTENT. This is 2420 * based on the value set in the configuration profile at 2421 * attach time. 2422 */ 2423 dma_xfer_mode = state->ts_cfg_profile->cp_streaming_consistent; 2424 2425 /* Allocate system memory for the queue */ 2426 status = ddi_dma_mem_alloc(qa_info->qa_dmahdl, realsize, 2427 &state->ts_reg_accattr, dma_xfer_mode, callback, NULL, 2428 (caddr_t *)&qa_info->qa_buf_real, 2429 (size_t *)&qa_info->qa_buf_realsz, &qa_info->qa_acchdl); 2430 if (status != DDI_SUCCESS) { 2431 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2432 TNF_PROBE_0(tavor_queue_alloc_dma_memalloc_fail, 2433 TAVOR_TNF_ERROR, ""); 2434 TAVOR_TNF_EXIT(tavor_queue_alloc); 2435 return (DDI_FAILURE); 2436 } 2437 2438 /* 2439 * Save temporary copy of the real pointer. (This may be 2440 * modified in the last step below). 2441 */ 2442 qa_info->qa_buf_aligned = qa_info->qa_buf_real; 2443 2444 } else if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_USERLAND) { 2445 2446 /* Allocate userland mappable memory for the queue */ 2447 flag = (sleepflag == TAVOR_SLEEP) ? DDI_UMEM_SLEEP : 2448 DDI_UMEM_NOSLEEP; 2449 qa_info->qa_buf_real = ddi_umem_alloc(realsize, flag, 2450 &qa_info->qa_umemcookie); 2451 if (qa_info->qa_buf_real == NULL) { 2452 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2453 TNF_PROBE_0(tavor_queue_alloc_umem_fail, 2454 TAVOR_TNF_ERROR, ""); 2455 TAVOR_TNF_EXIT(tavor_queue_alloc); 2456 return (DDI_FAILURE); 2457 } 2458 2459 /* 2460 * Save temporary copy of the real pointer. (This may be 2461 * modified in the last step below). 2462 */ 2463 qa_info->qa_buf_aligned = qa_info->qa_buf_real; 2464 2465 } else { /* TAVOR_QUEUE_LOCATION_INDDR */ 2466 2467 /* Allocate DDR memory for the queue */ 2468 flag = (sleepflag == TAVOR_SLEEP) ? VM_SLEEP : VM_NOSLEEP; 2469 qa_info->qa_buf_real = (uint32_t *)vmem_xalloc( 2470 state->ts_ddrvmem, realsize, qa_info->qa_bind_align, 0, 0, 2471 NULL, NULL, flag); 2472 if (qa_info->qa_buf_real == NULL) { 2473 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2474 TNF_PROBE_0(tavor_queue_alloc_vmxa_fail, 2475 TAVOR_TNF_ERROR, ""); 2476 TAVOR_TNF_EXIT(tavor_queue_alloc); 2477 return (DDI_FAILURE); 2478 } 2479 2480 /* 2481 * Since "qa_buf_real" will be a PCI address (the offset into 2482 * the DDR memory), we first need to do some calculations to 2483 * convert it to its kernel mapped address. (Note: This may 2484 * be modified again below, when any additional "alloc" 2485 * alignment constraint is applied). 2486 */ 2487 qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t) 2488 state->ts_reg_ddr_baseaddr) + ((uintptr_t) 2489 qa_info->qa_buf_real - state->ts_ddr.ddr_baseaddr)); 2490 qa_info->qa_buf_realsz = realsize; 2491 qa_info->qa_acchdl = state->ts_reg_ddrhdl; 2492 } 2493 2494 /* 2495 * The last step is to ensure that the final address ("qa_buf_aligned") 2496 * has the appropriate "alloc" alignment restriction applied to it 2497 * (if necessary). 2498 */ 2499 if (qa_info->qa_bind_align != qa_info->qa_alloc_align) { 2500 qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t) 2501 qa_info->qa_buf_aligned + alloc_mask) & ~alloc_mask); 2502 } 2503 2504 TAVOR_TNF_EXIT(tavor_queue_alloc); 2505 return (DDI_SUCCESS); 2506 } 2507 2508 2509 /* 2510 * tavor_queue_free() 2511 * Context: Can be called from interrupt or base context. 2512 */ 2513 void 2514 tavor_queue_free(tavor_state_t *state, tavor_qalloc_info_t *qa_info) 2515 { 2516 TAVOR_TNF_ENTER(tavor_queue_free); 2517 2518 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info)) 2519 2520 /* 2521 * Depending on how (i.e. from where) we allocated the memory for 2522 * this queue, we choose the appropriate method for releasing the 2523 * resources. 2524 */ 2525 if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_NORMAL) { 2526 2527 ddi_dma_mem_free(&qa_info->qa_acchdl); 2528 2529 } else if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_USERLAND) { 2530 2531 ddi_umem_free(qa_info->qa_umemcookie); 2532 2533 } else { /* TAVOR_QUEUE_LOCATION_INDDR */ 2534 2535 vmem_xfree(state->ts_ddrvmem, qa_info->qa_buf_real, 2536 qa_info->qa_buf_realsz); 2537 } 2538 2539 /* Always free the dma handle */ 2540 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2541 2542 TAVOR_TNF_EXIT(tavor_queue_free); 2543 } 2544 2545 2546 /* 2547 * tavor_dmaattr_get() 2548 * Context: Can be called from interrupt or base context. 2549 */ 2550 void 2551 tavor_dma_attr_init(ddi_dma_attr_t *dma_attr) 2552 { 2553 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dma_attr)) 2554 2555 dma_attr->dma_attr_version = DMA_ATTR_V0; 2556 dma_attr->dma_attr_addr_lo = 0; 2557 dma_attr->dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFFull; 2558 dma_attr->dma_attr_count_max = 0xFFFFFFFFFFFFFFFFull; 2559 dma_attr->dma_attr_align = 1; 2560 dma_attr->dma_attr_burstsizes = 0x3FF; 2561 dma_attr->dma_attr_minxfer = 1; 2562 dma_attr->dma_attr_maxxfer = 0xFFFFFFFFFFFFFFFFull; 2563 dma_attr->dma_attr_seg = 0xFFFFFFFFFFFFFFFFull; 2564 dma_attr->dma_attr_sgllen = 0x7FFFFFFF; 2565 dma_attr->dma_attr_granular = 1; 2566 dma_attr->dma_attr_flags = 0; 2567 } 2568 2569 /* 2570 * tavor_destroy_fmr_pool() 2571 * Create a pool of FMRs. 2572 * Context: Can be called from kernel context only. 2573 */ 2574 int 2575 tavor_create_fmr_pool(tavor_state_t *state, tavor_pdhdl_t pd, 2576 ibt_fmr_pool_attr_t *fmr_attr, tavor_fmrhdl_t *fmrpoolp) 2577 { 2578 tavor_fmrhdl_t fmrpool; 2579 tavor_fmr_list_t *fmr, *fmr_next; 2580 tavor_mrhdl_t mr; 2581 char taskqname[48]; 2582 char *errormsg; 2583 int status; 2584 int sleep; 2585 int i; 2586 2587 TAVOR_TNF_ENTER(tavor_create_fmr_pool); 2588 2589 sleep = (fmr_attr->fmr_flags & IBT_MR_SLEEP) ? TAVOR_SLEEP : 2590 TAVOR_NOSLEEP; 2591 if ((sleep == TAVOR_SLEEP) && 2592 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 2593 TNF_PROBE_0(tavor_create_fmr_pool_invalid_flags, 2594 TAVOR_TNF_ERROR, ""); 2595 TAVOR_TNF_EXIT(tavor_create_fmr_pool); 2596 return (IBT_INVALID_PARAM); 2597 } 2598 2599 fmrpool = (tavor_fmrhdl_t)kmem_zalloc(sizeof (*fmrpool), sleep); 2600 if (fmrpool == NULL) { 2601 /* Set "status" and "errormsg" and goto failure */ 2602 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed FMR Pool handle"); 2603 goto fail; 2604 } 2605 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmrpool)) 2606 2607 mutex_init(&fmrpool->fmr_lock, NULL, MUTEX_DRIVER, 2608 DDI_INTR_PRI(state->ts_intrmsi_pri)); 2609 2610 fmrpool->fmr_state = state; 2611 fmrpool->fmr_flush_function = fmr_attr->fmr_func_hdlr; 2612 fmrpool->fmr_flush_arg = fmr_attr->fmr_func_arg; 2613 fmrpool->fmr_pool_size = 0; 2614 fmrpool->fmr_cache = 0; 2615 fmrpool->fmr_max_pages = fmr_attr->fmr_max_pages_per_fmr; 2616 fmrpool->fmr_page_sz = fmr_attr->fmr_page_sz; 2617 fmrpool->fmr_dirty_watermark = fmr_attr->fmr_dirty_watermark; 2618 fmrpool->fmr_dirty_len = 0; 2619 fmrpool->fmr_flags = fmr_attr->fmr_flags; 2620 2621 /* Create taskq to handle cleanup and flush processing */ 2622 (void) snprintf(taskqname, 50, "fmrpool/%d/%d @ 0x%" PRIx64, 2623 fmr_attr->fmr_pool_size, tavor_debug_fmrpool_cnt, 2624 (uint64_t)(uintptr_t)fmrpool); 2625 fmrpool->fmr_taskq = ddi_taskq_create(state->ts_dip, taskqname, 2626 TAVOR_TASKQ_NTHREADS, TASKQ_DEFAULTPRI, 0); 2627 if (fmrpool->fmr_taskq == NULL) { 2628 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed task queue"); 2629 goto fail1; 2630 } 2631 2632 fmrpool->fmr_free_list = NULL; 2633 fmrpool->fmr_dirty_list = NULL; 2634 2635 if (fmr_attr->fmr_cache) { 2636 tavor_fmr_cache_init(fmrpool); 2637 } 2638 2639 for (i = 0; i < fmr_attr->fmr_pool_size; i++) { 2640 status = tavor_mr_alloc_fmr(state, pd, fmrpool, &mr); 2641 if (status != DDI_SUCCESS) { 2642 TAVOR_TNF_FAIL(status, "failed fmr alloc"); 2643 goto fail2; 2644 } 2645 2646 fmr = (tavor_fmr_list_t *)kmem_zalloc( 2647 sizeof (tavor_fmr_list_t), sleep); 2648 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr)) 2649 2650 fmr->fmr = mr; 2651 fmr->fmr_refcnt = 0; 2652 fmr->fmr_remaps = 0; 2653 fmr->fmr_pool = fmrpool; 2654 fmr->fmr_in_cache = 0; 2655 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 2656 mr->mr_fmr = fmr; 2657 2658 fmr->fmr_next = fmrpool->fmr_free_list; 2659 fmrpool->fmr_free_list = fmr; 2660 fmrpool->fmr_pool_size++; 2661 } 2662 2663 /* Set to return pool */ 2664 *fmrpoolp = fmrpool; 2665 2666 TAVOR_TNF_EXIT(tavor_create_fmr_pool); 2667 return (IBT_SUCCESS); 2668 fail2: 2669 tavor_fmr_cache_fini(fmrpool); 2670 for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) { 2671 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr)) 2672 fmr_next = fmr->fmr_next; 2673 (void) tavor_mr_dealloc_fmr(state, &fmr->fmr); 2674 kmem_free(fmr, sizeof (tavor_fmr_list_t)); 2675 } 2676 ddi_taskq_destroy(fmrpool->fmr_taskq); 2677 fail1: 2678 kmem_free(fmrpool, sizeof (*fmrpool)); 2679 fail: 2680 TNF_PROBE_1(tavor_create_fmr_pool_fail, TAVOR_TNF_ERROR, "", 2681 tnf_string, msg, errormsg); 2682 TAVOR_TNF_EXIT(tavor_create_fmr_pool); 2683 if (status == DDI_FAILURE) { 2684 return (ibc_get_ci_failure(0)); 2685 } else { 2686 return (status); 2687 } 2688 } 2689 2690 /* 2691 * tavor_destroy_fmr_pool() 2692 * Destroy an FMR pool and free all associated resources. 2693 * Context: Can be called from kernel context only. 2694 */ 2695 int 2696 tavor_destroy_fmr_pool(tavor_state_t *state, tavor_fmrhdl_t fmrpool) 2697 { 2698 tavor_fmr_list_t *fmr, *fmr_next; 2699 char *errormsg; 2700 int status; 2701 2702 TAVOR_TNF_ENTER(tavor_destroy_fmr_pool); 2703 2704 mutex_enter(&fmrpool->fmr_lock); 2705 status = tavor_fmr_cleanup(state, fmrpool); 2706 if (status != DDI_SUCCESS) { 2707 mutex_exit(&fmrpool->fmr_lock); 2708 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), "failed fmr cleanup"); 2709 goto fail; 2710 } 2711 2712 if (fmrpool->fmr_cache) { 2713 tavor_fmr_cache_fini(fmrpool); 2714 } 2715 2716 for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) { 2717 fmr_next = fmr->fmr_next; 2718 2719 (void) tavor_mr_dealloc_fmr(state, &fmr->fmr); 2720 kmem_free(fmr, sizeof (tavor_fmr_list_t)); 2721 } 2722 mutex_exit(&fmrpool->fmr_lock); 2723 2724 ddi_taskq_destroy(fmrpool->fmr_taskq); 2725 mutex_destroy(&fmrpool->fmr_lock); 2726 2727 kmem_free(fmrpool, sizeof (*fmrpool)); 2728 2729 TAVOR_TNF_EXIT(tavor_destroy_fmr_pool); 2730 return (DDI_SUCCESS); 2731 fail: 2732 TNF_PROBE_1(tavor_destroy_fmr_pool_fail, TAVOR_TNF_ERROR, "", 2733 tnf_string, msg, errormsg); 2734 TAVOR_TNF_EXIT(tavor_destroy_fmr_pool); 2735 return (status); 2736 } 2737 2738 /* 2739 * tavor_flush_fmr_pool() 2740 * Ensure that all unmapped FMRs are fully invalidated. 2741 * Context: Can be called from kernel context only. 2742 */ 2743 int 2744 tavor_flush_fmr_pool(tavor_state_t *state, tavor_fmrhdl_t fmrpool) 2745 { 2746 char *errormsg; 2747 int status; 2748 2749 TAVOR_TNF_ENTER(tavor_flush_fmr_pool); 2750 2751 /* 2752 * Force the unmapping of all entries on the dirty list, regardless of 2753 * whether the watermark has been hit yet. 2754 */ 2755 /* grab the pool lock */ 2756 mutex_enter(&fmrpool->fmr_lock); 2757 status = tavor_fmr_cleanup(state, fmrpool); 2758 if (status != DDI_SUCCESS) { 2759 mutex_exit(&fmrpool->fmr_lock); 2760 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), "failed fmr cleanup"); 2761 goto fail; 2762 } 2763 /* release the pool lock */ 2764 mutex_exit(&fmrpool->fmr_lock); 2765 2766 TAVOR_TNF_EXIT(tavor_flush_fmr_pool); 2767 return (DDI_SUCCESS); 2768 fail: 2769 TNF_PROBE_1(tavor_flush_fmr_pool_fail, TAVOR_TNF_ERROR, "", 2770 tnf_string, msg, errormsg); 2771 TAVOR_TNF_EXIT(tavor_flush_fmr_pool); 2772 return (status); 2773 } 2774 2775 /* 2776 * tavor_deregister_fmr() 2777 * Map memory into FMR 2778 * Context: Can be called from interrupt or base context. 2779 */ 2780 int 2781 tavor_register_physical_fmr(tavor_state_t *state, tavor_fmrhdl_t fmrpool, 2782 ibt_pmr_attr_t *mem_pattr, tavor_mrhdl_t *mr, 2783 ibt_pmr_desc_t *mem_desc_p) 2784 { 2785 tavor_fmr_list_t *fmr; 2786 tavor_fmr_list_t query; 2787 avl_index_t where; 2788 int status; 2789 2790 TAVOR_TNF_ENTER(tavor_register_physical_fmr); 2791 2792 /* Check length */ 2793 mutex_enter(&fmrpool->fmr_lock); 2794 if (mem_pattr->pmr_len < 1 || (mem_pattr->pmr_num_buf > 2795 fmrpool->fmr_max_pages)) { 2796 mutex_exit(&fmrpool->fmr_lock); 2797 TNF_PROBE_0(tavor_register_physical_fmr_length_fail, 2798 TAVOR_TNF_ERROR, ""); 2799 TAVOR_TNF_EXIT(tavor_register_physical_fmr); 2800 return (IBT_MR_LEN_INVALID); 2801 } 2802 2803 mutex_enter(&fmrpool->fmr_cachelock); 2804 /* lookup in fmr cache */ 2805 /* if exists, grab it, and return it */ 2806 if (fmrpool->fmr_cache) { 2807 query.fmr_desc.pmd_iova = mem_pattr->pmr_iova; 2808 query.fmr_desc.pmd_phys_buf_list_sz = mem_pattr->pmr_len; 2809 fmr = (tavor_fmr_list_t *)avl_find(&fmrpool->fmr_cache_avl, 2810 &query, &where); 2811 2812 /* 2813 * If valid FMR was found in cache, return that fmr info 2814 */ 2815 if (fmr != NULL) { 2816 fmr->fmr_refcnt++; 2817 /* Store pmr desc for use in cache */ 2818 (void) memcpy(mem_desc_p, &fmr->fmr_desc, 2819 sizeof (ibt_pmr_desc_t)); 2820 *mr = (tavor_mrhdl_t)fmr->fmr; 2821 mutex_exit(&fmrpool->fmr_cachelock); 2822 mutex_exit(&fmrpool->fmr_lock); 2823 TAVOR_TNF_EXIT(tavor_register_physical_fmr); 2824 return (DDI_SUCCESS); 2825 } 2826 } 2827 2828 /* FMR does not exist in cache, proceed with registration */ 2829 2830 /* grab next free entry */ 2831 fmr = fmrpool->fmr_free_list; 2832 if (fmr == NULL) { 2833 mutex_exit(&fmrpool->fmr_cachelock); 2834 mutex_exit(&fmrpool->fmr_lock); 2835 TNF_PROBE_0(tavor_register_physical_fmr_none_free, 2836 TAVOR_TNF_ERROR, ""); 2837 TAVOR_TNF_EXIT(tavor_register_physical_fmr); 2838 return (IBT_INSUFF_RESOURCE); 2839 } 2840 2841 fmrpool->fmr_free_list = fmrpool->fmr_free_list->fmr_next; 2842 fmr->fmr_next = NULL; 2843 2844 status = tavor_mr_register_physical_fmr(state, mem_pattr, fmr->fmr, 2845 mem_desc_p); 2846 if (status != DDI_SUCCESS) { 2847 mutex_exit(&fmrpool->fmr_cachelock); 2848 mutex_exit(&fmrpool->fmr_lock); 2849 TNF_PROBE_0(tavor_register_physical_fmr_reg_fail, 2850 TAVOR_TNF_ERROR, ""); 2851 TAVOR_TNF_EXIT(tavor_register_physical_fmr); 2852 return (status); 2853 } 2854 2855 fmr->fmr_refcnt = 1; 2856 fmr->fmr_remaps++; 2857 2858 /* Store pmr desc for use in cache */ 2859 (void) memcpy(&fmr->fmr_desc, mem_desc_p, sizeof (ibt_pmr_desc_t)); 2860 *mr = (tavor_mrhdl_t)fmr->fmr; 2861 2862 /* Store in cache */ 2863 if (fmrpool->fmr_cache) { 2864 if (!fmr->fmr_in_cache) { 2865 avl_insert(&fmrpool->fmr_cache_avl, fmr, where); 2866 fmr->fmr_in_cache = 1; 2867 } 2868 } 2869 2870 mutex_exit(&fmrpool->fmr_cachelock); 2871 mutex_exit(&fmrpool->fmr_lock); 2872 TAVOR_TNF_EXIT(tavor_register_physical_fmr); 2873 return (DDI_SUCCESS); 2874 } 2875 2876 /* 2877 * tavor_deregister_fmr() 2878 * Unmap FMR 2879 * Context: Can be called from kernel context only. 2880 */ 2881 int 2882 tavor_deregister_fmr(tavor_state_t *state, tavor_mrhdl_t mr) 2883 { 2884 tavor_fmr_list_t *fmr; 2885 tavor_fmrhdl_t fmrpool; 2886 int status; 2887 2888 fmr = mr->mr_fmr; 2889 fmrpool = fmr->fmr_pool; 2890 2891 /* Grab pool lock */ 2892 mutex_enter(&fmrpool->fmr_lock); 2893 fmr->fmr_refcnt--; 2894 2895 if (fmr->fmr_refcnt == 0) { 2896 /* 2897 * First, do some bit of invalidation, reducing our exposure to 2898 * having this region still registered in hardware. 2899 */ 2900 (void) tavor_mr_invalidate_fmr(state, mr); 2901 2902 /* 2903 * If we've exhausted our remaps then add the FMR to the dirty 2904 * list, not allowing it to be re-used until we have done a 2905 * flush. Otherwise, simply add it back to the free list for 2906 * re-mapping. 2907 */ 2908 if (fmr->fmr_remaps < 2909 state->ts_cfg_profile->cp_fmr_max_remaps) { 2910 /* add to free list */ 2911 fmr->fmr_next = fmrpool->fmr_free_list; 2912 fmrpool->fmr_free_list = fmr; 2913 } else { 2914 /* add to dirty list */ 2915 fmr->fmr_next = fmrpool->fmr_dirty_list; 2916 fmrpool->fmr_dirty_list = fmr; 2917 fmrpool->fmr_dirty_len++; 2918 2919 status = ddi_taskq_dispatch(fmrpool->fmr_taskq, 2920 tavor_fmr_processing, fmrpool, DDI_NOSLEEP); 2921 if (status == DDI_FAILURE) { 2922 mutex_exit(&fmrpool->fmr_lock); 2923 TNF_PROBE_0(tavor_agent_request_cb_taskq_fail, 2924 TAVOR_TNF_ERROR, ""); 2925 return (IBT_INSUFF_RESOURCE); 2926 } 2927 } 2928 } 2929 /* Release pool lock */ 2930 mutex_exit(&fmrpool->fmr_lock); 2931 2932 return (DDI_SUCCESS); 2933 } 2934 2935 2936 /* 2937 * tavor_fmr_processing() 2938 * If required, perform cleanup. 2939 * Context: Called from taskq context only. 2940 */ 2941 static void 2942 tavor_fmr_processing(void *fmr_args) 2943 { 2944 tavor_fmrhdl_t fmrpool; 2945 char *errormsg; 2946 int status; 2947 2948 TAVOR_TNF_ENTER(tavor_fmr_processing); 2949 2950 ASSERT(fmr_args != NULL); 2951 2952 fmrpool = (tavor_fmrhdl_t)fmr_args; 2953 2954 /* grab pool lock */ 2955 mutex_enter(&fmrpool->fmr_lock); 2956 if (fmrpool->fmr_dirty_len >= fmrpool->fmr_dirty_watermark) { 2957 status = tavor_fmr_cleanup(fmrpool->fmr_state, fmrpool); 2958 if (status != DDI_SUCCESS) { 2959 mutex_exit(&fmrpool->fmr_lock); 2960 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 2961 "failed fmr cleanup"); 2962 goto fail; 2963 } 2964 2965 if (fmrpool->fmr_flush_function != NULL) { 2966 (void) fmrpool->fmr_flush_function( 2967 (ibc_fmr_pool_hdl_t)fmrpool, 2968 fmrpool->fmr_flush_arg); 2969 } 2970 } 2971 2972 /* let pool lock go */ 2973 mutex_exit(&fmrpool->fmr_lock); 2974 2975 TAVOR_TNF_EXIT(tavor_fmr_processing); 2976 return; 2977 fail: 2978 TNF_PROBE_1(tavor_fmr_processing, TAVOR_TNF_ERROR, "", 2979 tnf_string, msg, errormsg); 2980 TAVOR_TNF_EXIT(tavor_fmr_processing); 2981 } 2982 2983 /* 2984 * tavor_fmr_cleanup() 2985 * Perform cleaning processing, walking the list and performing the MTT sync 2986 * operation if required. 2987 * Context: can be called from taskq or base context. 2988 */ 2989 static int 2990 tavor_fmr_cleanup(tavor_state_t *state, tavor_fmrhdl_t fmrpool) 2991 { 2992 tavor_fmr_list_t *fmr; 2993 tavor_fmr_list_t *fmr_next; 2994 int sync_needed; 2995 int status; 2996 2997 TAVOR_TNF_ENTER(tavor_fmr_cleanup); 2998 2999 ASSERT(MUTEX_HELD(&fmrpool->fmr_lock)); 3000 3001 sync_needed = 0; 3002 for (fmr = fmrpool->fmr_dirty_list; fmr; fmr = fmr_next) { 3003 fmr_next = fmr->fmr_next; 3004 fmr->fmr_remaps = 0; 3005 3006 (void) tavor_mr_deregister_fmr(state, fmr->fmr); 3007 3008 /* 3009 * Update lists. 3010 * - add fmr back to free list 3011 * - remove fmr from dirty list 3012 */ 3013 fmr->fmr_next = fmrpool->fmr_free_list; 3014 fmrpool->fmr_free_list = fmr; 3015 3016 3017 /* 3018 * Because we have updated the dirty list, and deregistered the 3019 * FMR entry, we do need to sync the TPT, so we set the 3020 * 'sync_needed' flag here so we sync once we finish dirty_list 3021 * processing. 3022 */ 3023 sync_needed = 1; 3024 } 3025 3026 fmrpool->fmr_dirty_list = NULL; 3027 fmrpool->fmr_dirty_len = 0; 3028 3029 if (sync_needed) { 3030 status = tavor_sync_tpt_cmd_post(state, TAVOR_CMD_NOSLEEP_SPIN); 3031 if (status != TAVOR_CMD_SUCCESS) { 3032 TNF_PROBE_0(tavor_fmr_cleanup, TAVOR_TNF_ERROR, ""); 3033 TAVOR_TNF_EXIT(tavor_fmr_cleanup); 3034 return (status); 3035 } 3036 } 3037 3038 TAVOR_TNF_EXIT(tavor_fmr_cleanup); 3039 return (DDI_SUCCESS); 3040 } 3041 3042 /* 3043 * tavor_fmr_avl_compare() 3044 * Context: Can be called from user or kernel context. 3045 */ 3046 static int 3047 tavor_fmr_avl_compare(const void *q, const void *e) 3048 { 3049 tavor_fmr_list_t *entry, *query; 3050 3051 TAVOR_TNF_ENTER(tavor_qpn_avl_compare); 3052 3053 entry = (tavor_fmr_list_t *)e; 3054 query = (tavor_fmr_list_t *)q; 3055 3056 if (query->fmr_desc.pmd_iova < entry->fmr_desc.pmd_iova) { 3057 TAVOR_TNF_EXIT(tavor_qpn_avl_compare); 3058 return (-1); 3059 } else if (query->fmr_desc.pmd_iova > entry->fmr_desc.pmd_iova) { 3060 TAVOR_TNF_EXIT(tavor_qpn_avl_compare); 3061 return (+1); 3062 } else { 3063 TAVOR_TNF_EXIT(tavor_qpn_avl_compare); 3064 return (0); 3065 } 3066 } 3067 3068 3069 /* 3070 * tavor_fmr_cache_init() 3071 * Context: Can be called from user or kernel context. 3072 */ 3073 static void 3074 tavor_fmr_cache_init(tavor_fmrhdl_t fmr) 3075 { 3076 TAVOR_TNF_ENTER(tavor_fmr_cache_init); 3077 3078 /* Initialize the lock used for FMR cache AVL tree access */ 3079 mutex_init(&fmr->fmr_cachelock, NULL, MUTEX_DRIVER, 3080 DDI_INTR_PRI(fmr->fmr_state->ts_intrmsi_pri)); 3081 3082 /* Initialize the AVL tree for the FMR cache */ 3083 avl_create(&fmr->fmr_cache_avl, tavor_fmr_avl_compare, 3084 sizeof (tavor_fmr_list_t), 3085 offsetof(tavor_fmr_list_t, fmr_avlnode)); 3086 3087 fmr->fmr_cache = 1; 3088 3089 TAVOR_TNF_EXIT(tavor_fmr_cache_init); 3090 } 3091 3092 3093 /* 3094 * tavor_fmr_cache_fini() 3095 * Context: Can be called from user or kernel context. 3096 */ 3097 static void 3098 tavor_fmr_cache_fini(tavor_fmrhdl_t fmr) 3099 { 3100 void *cookie; 3101 3102 TAVOR_TNF_ENTER(tavor_fmr_cache_fini); 3103 3104 /* 3105 * Empty all entries (if necessary) and destroy the AVL tree. 3106 * The FMRs themselves are freed as part of destroy_pool() 3107 */ 3108 cookie = NULL; 3109 while (((void *)(tavor_fmr_list_t *)avl_destroy_nodes( 3110 &fmr->fmr_cache_avl, &cookie)) != NULL) { 3111 /* loop through */ 3112 } 3113 avl_destroy(&fmr->fmr_cache_avl); 3114 3115 /* Destroy the lock used for FMR cache */ 3116 mutex_destroy(&fmr->fmr_cachelock); 3117 3118 TAVOR_TNF_EXIT(tavor_fmr_cache_fini); 3119 } 3120 3121 /* 3122 * tavor_get_dma_cookies() 3123 * Return DMA cookies in the pre-allocated paddr_list_p based on the length 3124 * needed. 3125 * Context: Can be called from interrupt or base context. 3126 */ 3127 int 3128 tavor_get_dma_cookies(tavor_state_t *state, ibt_phys_buf_t *paddr_list_p, 3129 ibt_va_attr_t *va_attrs, uint_t list_len, uint_t *cookiecnt, 3130 ibc_ma_hdl_t *ibc_ma_hdl_p) 3131 { 3132 ddi_dma_handle_t dma_hdl; 3133 ddi_dma_attr_t dma_attr; 3134 ddi_dma_cookie_t dmacookie; 3135 uint_t dma_xfer_mode; 3136 int (*callback)(caddr_t); 3137 int status; 3138 int i; 3139 3140 TAVOR_TNF_ENTER(tavor_get_dma_cookies); 3141 3142 /* Set the callback flag appropriately */ 3143 callback = (va_attrs->va_flags & IBT_VA_NOSLEEP) ? DDI_DMA_DONTWAIT : 3144 DDI_DMA_SLEEP; 3145 if ((callback == DDI_DMA_SLEEP) && 3146 (TAVOR_SLEEP != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 3147 TNF_PROBE_0(tavor_ci_map_mem_area_invalid_flags, 3148 TAVOR_TNF_ERROR, ""); 3149 TAVOR_TNF_EXIT(tavor_ci_map_mem_area); 3150 return (IBT_INVALID_PARAM); 3151 } 3152 3153 /* 3154 * Initialize many of the default DMA attributes and allocate the DMA 3155 * handle. Then, if we're bypassing the IOMMU, set the 3156 * DDI_DMA_FORCE_PHYSICAL flag. 3157 */ 3158 tavor_dma_attr_init(&dma_attr); 3159 3160 #ifdef __x86 3161 /* 3162 * On x86 we can specify a maximum segment length for our returned 3163 * cookies. 3164 */ 3165 if (va_attrs->va_flags & IBT_VA_FMR) { 3166 dma_attr.dma_attr_seg = PAGESIZE - 1; 3167 } 3168 #endif 3169 3170 /* Determine whether to map STREAMING or CONSISTENT */ 3171 dma_xfer_mode = (va_attrs->va_flags & IBT_VA_NONCOHERENT) ? 3172 DDI_DMA_STREAMING : DDI_DMA_CONSISTENT; 3173 3174 #ifdef __sparc 3175 /* 3176 * First, disable streaming and switch to consistent if 3177 * configured to do so and IOMMU BYPASS is enabled. 3178 */ 3179 if (state->ts_cfg_profile->cp_disable_streaming_on_bypass && 3180 dma_xfer_mode == DDI_DMA_STREAMING && 3181 state->ts_cfg_profile->cp_iommu_bypass == TAVOR_BINDMEM_BYPASS) { 3182 dma_xfer_mode = DDI_DMA_CONSISTENT; 3183 } 3184 3185 /* 3186 * Then, if streaming is still specified, then "bypass" is not 3187 * allowed. 3188 */ 3189 if ((dma_xfer_mode == DDI_DMA_CONSISTENT) && 3190 (state->ts_cfg_profile->cp_iommu_bypass == TAVOR_BINDMEM_BYPASS)) { 3191 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 3192 } 3193 #endif 3194 3195 status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr, 3196 callback, NULL, &dma_hdl); 3197 if (status != DDI_SUCCESS) { 3198 TNF_PROBE_1(tavor_ci_map_mem_area_alloc_handle_fail, 3199 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 3200 TAVOR_TNF_EXIT(tavor_ci_map_mem_area); 3201 3202 switch (status) { 3203 case DDI_DMA_NORESOURCES: 3204 return (IBT_INSUFF_RESOURCE); 3205 case DDI_DMA_BADATTR: 3206 default: 3207 return (ibc_get_ci_failure(0)); 3208 } 3209 } 3210 3211 /* 3212 * Now bind the handle with the correct DMA attributes. 3213 */ 3214 if (va_attrs->va_flags & IBT_VA_BUF) { 3215 status = ddi_dma_buf_bind_handle(dma_hdl, va_attrs->va_buf, 3216 DDI_DMA_RDWR | dma_xfer_mode, DDI_DMA_DONTWAIT, 3217 NULL, &dmacookie, cookiecnt); 3218 } else { 3219 status = ddi_dma_addr_bind_handle(dma_hdl, NULL, 3220 (caddr_t)(uintptr_t)va_attrs->va_vaddr, va_attrs->va_len, 3221 DDI_DMA_RDWR | dma_xfer_mode, DDI_DMA_DONTWAIT, 3222 NULL, &dmacookie, cookiecnt); 3223 } 3224 if (status != DDI_SUCCESS) { 3225 ddi_dma_free_handle(&dma_hdl); 3226 TNF_PROBE_0(tavor_ci_map_mem_area_bind_handle_fail, 3227 TAVOR_TNF_ERROR, ""); 3228 TAVOR_TNF_EXIT(tavor_ci_map_mem_area); 3229 3230 switch (status) { 3231 case DDI_DMA_NORESOURCES: 3232 return (IBT_INSUFF_RESOURCE); 3233 case DDI_DMA_TOOBIG: 3234 return (IBT_INVALID_PARAM); 3235 case DDI_DMA_PARTIAL_MAP: 3236 case DDI_DMA_INUSE: 3237 case DDI_DMA_NOMAPPING: 3238 default: 3239 return (ibc_get_ci_failure(0)); 3240 } 3241 } 3242 3243 /* 3244 * Verify our physical buffer list (PBL) is large enough to handle the 3245 * number of cookies that were returned. 3246 */ 3247 if (*cookiecnt > list_len) { 3248 (void) ddi_dma_unbind_handle(dma_hdl); 3249 ddi_dma_free_handle(&dma_hdl); 3250 TNF_PROBE_0(tavor_ci_map_mem_area_toomany_cookie_fail, 3251 TAVOR_TNF_ERROR, ""); 3252 TAVOR_TNF_EXIT(tavor_ci_map_mem_area); 3253 return (IBT_PBL_TOO_SMALL); 3254 } 3255 3256 /* 3257 * We store the cookies returned by the DDI into our own PBL. This 3258 * sets the cookies up for later processing (for example, if we want to 3259 * split up the cookies into smaller chunks). We use the laddr and 3260 * size fields in each cookie to create each individual entry (PBE). 3261 */ 3262 3263 /* 3264 * Store first cookie info first 3265 */ 3266 paddr_list_p[0].p_laddr = dmacookie.dmac_laddress; 3267 paddr_list_p[0].p_size = dmacookie.dmac_size; 3268 3269 /* 3270 * Loop through each cookie, storing each cookie into our physical 3271 * buffer list. 3272 */ 3273 for (i = 1; i < *cookiecnt; i++) { 3274 ddi_dma_nextcookie(dma_hdl, &dmacookie); 3275 3276 paddr_list_p[i].p_laddr = dmacookie.dmac_laddress; 3277 paddr_list_p[i].p_size = dmacookie.dmac_size; 3278 } 3279 3280 /* return handle */ 3281 *ibc_ma_hdl_p = (ibc_ma_hdl_t)dma_hdl; 3282 TAVOR_TNF_EXIT(tavor_get_dma_cookies); 3283 return (DDI_SUCCESS); 3284 } 3285 3286 /* 3287 * tavor_split_dma_cookies() 3288 * Split up cookies passed in from paddr_list_p, returning the new list in the 3289 * same buffers, based on the pagesize to split the cookies into. 3290 * Context: Can be called from interrupt or base context. 3291 */ 3292 /* ARGSUSED */ 3293 int 3294 tavor_split_dma_cookies(tavor_state_t *state, ibt_phys_buf_t *paddr_list, 3295 ib_memlen_t *paddr_offset, uint_t list_len, uint_t *cookiecnt, 3296 uint_t pagesize) 3297 { 3298 uint64_t pageoffset; 3299 uint64_t pagemask; 3300 uint_t pageshift; 3301 uint_t current_cookiecnt; 3302 uint_t cookies_needed; 3303 uint64_t last_size, extra_cookie; 3304 int i_increment; 3305 int i, k; 3306 int status; 3307 3308 TAVOR_TNF_ENTER(tavor_split_dma_cookies); 3309 3310 /* Setup pagesize calculations */ 3311 pageoffset = pagesize - 1; 3312 pagemask = (~pageoffset); 3313 pageshift = highbit(pagesize) - 1; 3314 3315 /* 3316 * Setup first cookie offset based on pagesize requested. 3317 */ 3318 *paddr_offset = paddr_list[0].p_laddr & pageoffset; 3319 paddr_list[0].p_laddr &= pagemask; 3320 3321 /* Save away the current number of cookies that are passed in */ 3322 current_cookiecnt = *cookiecnt; 3323 3324 /* Perform splitting up of current cookies into pagesize blocks */ 3325 for (i = 0; i < current_cookiecnt; i += i_increment) { 3326 /* 3327 * If the cookie is smaller than pagesize, or already is 3328 * pagesize, then we are already within our limits, so we skip 3329 * it. 3330 */ 3331 if (paddr_list[i].p_size <= pagesize) { 3332 i_increment = 1; 3333 continue; 3334 } 3335 3336 /* 3337 * If this is our first cookie, then we have to deal with the 3338 * offset that may be present in the first address. So add 3339 * that to our size, to calculate potential change to the last 3340 * cookie's size. 3341 * 3342 * Also, calculate the number of cookies that we'll need to 3343 * split up this block into. 3344 */ 3345 if (i == 0) { 3346 last_size = (paddr_list[i].p_size + *paddr_offset) & 3347 pageoffset; 3348 cookies_needed = (paddr_list[i].p_size + 3349 *paddr_offset) >> pageshift; 3350 } else { 3351 last_size = 0; 3352 cookies_needed = paddr_list[i].p_size >> pageshift; 3353 } 3354 3355 /* 3356 * If our size is not a multiple of pagesize, we need one more 3357 * cookie. 3358 */ 3359 if (last_size) { 3360 extra_cookie = 1; 3361 } else { 3362 extra_cookie = 0; 3363 } 3364 3365 /* 3366 * Split cookie into pagesize chunks, shifting list of cookies 3367 * down, using more cookie slots in the PBL if necessary. 3368 */ 3369 status = tavor_dma_cookie_shift(paddr_list, i, list_len, 3370 current_cookiecnt - i, cookies_needed + extra_cookie); 3371 if (status != 0) { 3372 TNF_PROBE_0(tavor_split_cookies_toomany_fail, 3373 TAVOR_TNF_ERROR, ""); 3374 TAVOR_TNF_EXIT(tavor_dma_split_cookies); 3375 return (status); 3376 } 3377 3378 /* 3379 * If the very first cookie, we must take possible offset into 3380 * account. 3381 */ 3382 if (i == 0) { 3383 paddr_list[i].p_size = pagesize - *paddr_offset; 3384 } else { 3385 paddr_list[i].p_size = pagesize; 3386 } 3387 3388 /* 3389 * We have shifted the existing cookies down the PBL, now fill 3390 * in the blank entries by splitting up our current block. 3391 */ 3392 for (k = 1; k < cookies_needed; k++) { 3393 paddr_list[i + k].p_laddr = 3394 paddr_list[i + k - 1].p_laddr + pagesize; 3395 paddr_list[i + k].p_size = pagesize; 3396 } 3397 3398 /* If we have one extra cookie (of less than pagesize...) */ 3399 if (extra_cookie) { 3400 paddr_list[i + k].p_laddr = 3401 paddr_list[i + k - 1].p_laddr + pagesize; 3402 paddr_list[i + k].p_size = last_size; 3403 } 3404 3405 /* Increment cookiecnt appropriately based on cookies used */ 3406 i_increment = cookies_needed + extra_cookie; 3407 current_cookiecnt += i_increment - 1; 3408 } 3409 3410 /* Update to new cookie count */ 3411 *cookiecnt = current_cookiecnt; 3412 TAVOR_TNF_EXIT(tavor_dma_split_cookies); 3413 return (DDI_SUCCESS); 3414 } 3415 3416 /* 3417 * tavor_dma_cookie_shift() 3418 * Context: Can be called from interrupt or base context. 3419 */ 3420 int 3421 tavor_dma_cookie_shift(ibt_phys_buf_t *paddr_list, int start, int end, 3422 int cookiecnt, int num_shift) 3423 { 3424 int shift_start; 3425 int i; 3426 3427 TAVOR_TNF_ENTER(tavor_dma_cookie_shift); 3428 3429 /* Calculating starting point in the PBL list */ 3430 shift_start = start + cookiecnt - 1; 3431 3432 /* Check if we're at the end of our PBL list */ 3433 if ((shift_start + num_shift - 1) >= end) { 3434 TNF_PROBE_0(tavor_dma_cookie_shift_toomany_fail, 3435 TAVOR_TNF_ERROR, ""); 3436 TAVOR_TNF_EXIT(tavor_dma_cookie_shift); 3437 return (IBT_PBL_TOO_SMALL); 3438 } 3439 3440 for (i = shift_start; i > start; i--) { 3441 paddr_list[i + num_shift - 1] = paddr_list[i]; 3442 } 3443 3444 TAVOR_TNF_EXIT(tavor_dma_cookie_shift); 3445 return (DDI_SUCCESS); 3446 } 3447 3448 3449 /* 3450 * tavor_free_dma_cookies() 3451 * Context: Can be called from interrupt or base context. 3452 */ 3453 int 3454 tavor_free_dma_cookies(ibc_ma_hdl_t ma_hdl) 3455 { 3456 ddi_dma_handle_t dma_hdl; 3457 int status; 3458 3459 dma_hdl = (ddi_dma_handle_t)ma_hdl; 3460 3461 status = ddi_dma_unbind_handle(dma_hdl); 3462 if (status != DDI_SUCCESS) { 3463 TNF_PROBE_0(tavor_ci_free_dma_unbind_fail, 3464 TAVOR_TNF_ERROR, ""); 3465 TAVOR_TNF_EXIT(tavor_ci_unmap_mem_area); 3466 return (ibc_get_ci_failure(0)); 3467 } 3468 3469 ddi_dma_free_handle(&dma_hdl); 3470 3471 return (DDI_SUCCESS); 3472 } 3473