1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * tavor_misc.c 29 * Tavor Miscellaneous routines - Address Handle, Multicast, Protection 30 * Domain, and port-related operations 31 * 32 * Implements all the routines necessary for allocating, freeing, querying 33 * and modifying Address Handles and Protection Domains. Also implements 34 * all the routines necessary for adding and removing Queue Pairs to/from 35 * Multicast Groups. Lastly, it implements the routines necessary for 36 * port-related query and modify operations. 37 */ 38 39 #include <sys/types.h> 40 #include <sys/conf.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/modctl.h> 44 #include <sys/bitmap.h> 45 #include <sys/sysmacros.h> 46 47 #include <sys/ib/adapters/tavor/tavor.h> 48 49 static void tavor_udav_sync(tavor_ahhdl_t ah, tavor_hw_udav_t *udav, 50 uint_t flag); 51 static int tavor_mcg_qplist_add(tavor_state_t *state, tavor_mcghdl_t mcg, 52 tavor_hw_mcg_qp_list_t *mcg_qplist, tavor_qphdl_t qp, uint_t *qp_found); 53 static int tavor_mcg_qplist_remove(tavor_mcghdl_t mcg, 54 tavor_hw_mcg_qp_list_t *mcg_qplist, tavor_qphdl_t qp); 55 static void tavor_qp_mcg_refcnt_inc(tavor_qphdl_t qp); 56 static void tavor_qp_mcg_refcnt_dec(tavor_qphdl_t qp); 57 static uint_t tavor_mcg_walk_mgid_hash(tavor_state_t *state, 58 uint64_t start_indx, ib_gid_t mgid, uint_t *prev_indx); 59 static void tavor_mcg_setup_new_hdr(tavor_mcghdl_t mcg, 60 tavor_hw_mcg_t *mcg_hdr, ib_gid_t mgid, tavor_rsrc_t *mcg_rsrc); 61 static int tavor_mcg_hash_list_remove(tavor_state_t *state, uint_t curr_indx, 62 uint_t prev_indx, tavor_hw_mcg_t *mcg_entry); 63 static int tavor_mcg_entry_invalidate(tavor_state_t *state, 64 tavor_hw_mcg_t *mcg_entry, uint_t indx); 65 static int tavor_mgid_is_valid(ib_gid_t gid); 66 static int tavor_mlid_is_valid(ib_lid_t lid); 67 68 69 /* 70 * tavor_ah_alloc() 71 * Context: Can be called only from user or kernel context. 72 */ 73 int 74 tavor_ah_alloc(tavor_state_t *state, tavor_pdhdl_t pd, 75 ibt_adds_vect_t *attr_p, tavor_ahhdl_t *ahhdl, uint_t sleepflag) 76 { 77 tavor_rsrc_t *udav, *rsrc; 78 tavor_hw_udav_t udav_entry; 79 tavor_ahhdl_t ah; 80 ibt_mr_attr_t mr_attr; 81 tavor_mr_options_t op; 82 tavor_mrhdl_t mr; 83 uint64_t data; 84 uint32_t size; 85 int status, i, flag; 86 char *errormsg; 87 88 TAVOR_TNF_ENTER(tavor_ah_alloc); 89 90 /* 91 * Someday maybe the "ibt_adds_vect_t *attr_p" will be NULL to 92 * indicate that we wish to allocate an "invalid" (i.e. empty) 93 * address handle XXX 94 */ 95 96 /* Validate that specified port number is legal */ 97 if (!tavor_portnum_is_valid(state, attr_p->av_port_num)) { 98 /* Set "status" and "errormsg" and goto failure */ 99 TAVOR_TNF_FAIL(IBT_HCA_PORT_INVALID, "invalid port num"); 100 goto ahalloc_fail; 101 } 102 103 /* 104 * Allocate a UDAV entry. This will be filled in with all the 105 * necessary parameters to define the Address Handle. Unlike the 106 * other hardware resources no ownership transfer takes place as 107 * these UDAV entries are always owned by hardware. 108 */ 109 status = tavor_rsrc_alloc(state, TAVOR_UDAV, 1, sleepflag, &udav); 110 if (status != DDI_SUCCESS) { 111 /* Set "status" and "errormsg" and goto failure */ 112 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed UDAV"); 113 goto ahalloc_fail; 114 } 115 116 /* 117 * Allocate the software structure for tracking the address handle 118 * (i.e. the Tavor Address Handle struct). If we fail here, we must 119 * undo the previous resource allocation. 120 */ 121 status = tavor_rsrc_alloc(state, TAVOR_AHHDL, 1, sleepflag, &rsrc); 122 if (status != DDI_SUCCESS) { 123 /* Set "status" and "errormsg" and goto failure */ 124 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed AH handler"); 125 goto ahalloc_fail1; 126 } 127 ah = (tavor_ahhdl_t)rsrc->tr_addr; 128 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah)) 129 130 /* Increment the reference count on the protection domain (PD) */ 131 tavor_pd_refcnt_inc(pd); 132 133 /* 134 * Fill in the UDAV entry. Note: We are only filling in a temporary 135 * copy here, which we will later copy into the actual entry in 136 * Tavor DDR memory. This starts be zeroing out the temporary copy 137 * and then calling tavor_set_addr_path() to fill in the common 138 * portions that can be pulled from the "ibt_adds_vect_t" passed in 139 */ 140 bzero(&udav_entry, sizeof (tavor_hw_udav_t)); 141 status = tavor_set_addr_path(state, attr_p, 142 (tavor_hw_addr_path_t *)&udav_entry, TAVOR_ADDRPATH_UDAV, NULL); 143 if (status != DDI_SUCCESS) { 144 tavor_pd_refcnt_dec(pd); 145 tavor_rsrc_free(state, &rsrc); 146 tavor_rsrc_free(state, &udav); 147 /* Set "status" and "errormsg" and goto failure */ 148 TAVOR_TNF_FAIL(status, "failed in tavor_set_addr_path"); 149 goto ahalloc_fail; 150 } 151 udav_entry.pd = pd->pd_pdnum; 152 udav_entry.msg_sz = state->ts_cfg_profile->cp_max_mtu - 1; 153 154 /* 155 * Register the memory for the UDAV. The memory for the UDAV must 156 * be registered in the Tavor TPT tables. This gives us the LKey 157 * that we will need when we later post a UD work request that 158 * uses this address handle. 159 * We might be able to pre-register all the memory for the UDAV XXX 160 */ 161 flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP; 162 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)udav->tr_addr; 163 mr_attr.mr_len = udav->tr_len; 164 mr_attr.mr_as = NULL; 165 mr_attr.mr_flags = flag; 166 op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass; 167 op.mro_bind_dmahdl = NULL; 168 op.mro_bind_override_addr = 0; 169 status = tavor_mr_register(state, pd, &mr_attr, &mr, &op); 170 if (status != DDI_SUCCESS) { 171 /* Set "status" and "errormsg" and goto failure */ 172 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr"); 173 goto ahalloc_fail2; 174 } 175 176 /* 177 * Fill in the UDAV entry. Here we copy all the information from 178 * the temporary UDAV into the DDR memory for the real UDAV entry. 179 * Note that we copy everything but the first 64-bit word. This 180 * is where the PD number for the address handle resides. 181 * By filling everything except the PD and then writing the PD in 182 * a separate step below, we can ensure that the UDAV is not 183 * accessed while there are partially written values in it (something 184 * which really should not happen anyway). This is guaranteed 185 * because we take measures to ensure that the PD number is zero for 186 * all unused UDAV (and because PD#0 is reserved for Tavor). 187 */ 188 size = sizeof (tavor_hw_udav_t) >> 3; 189 for (i = 1; i < size; i++) { 190 data = ((uint64_t *)&udav_entry)[i]; 191 ddi_put64(udav->tr_acchdl, ((uint64_t *)udav->tr_addr + i), 192 data); 193 } 194 data = ((uint64_t *)&udav_entry)[0]; 195 ddi_put64(udav->tr_acchdl, (uint64_t *)udav->tr_addr, data); 196 197 /* 198 * Fill in the rest of the Tavor Address Handle struct. Having 199 * successfully copied the UDAV into the hardware, we update the 200 * following fields for use in further operations on the AH. 201 * 202 * NOTE: We are saving away a copy of the "av_dgid.gid_guid" field 203 * here because we may need to return it later to the IBTF (as a 204 * result of a subsequent query operation). Unlike the other UDAV 205 * parameters, the value of "av_dgid.gid_guid" is not always preserved 206 * by being written to hardware. The reason for this is described in 207 * tavor_set_addr_path(). 208 */ 209 ah->ah_udavrsrcp = udav; 210 ah->ah_rsrcp = rsrc; 211 ah->ah_pdhdl = pd; 212 ah->ah_mrhdl = mr; 213 ah->ah_save_guid = attr_p->av_dgid.gid_guid; 214 ah->ah_save_srate = attr_p->av_srate; 215 *ahhdl = ah; 216 217 /* Determine if later ddi_dma_sync will be necessary */ 218 ah->ah_sync = TAVOR_UDAV_IS_SYNC_REQ(state); 219 220 /* Sync the UDAV for use by the hardware */ 221 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV); 222 223 TAVOR_TNF_EXIT(tavor_ah_alloc); 224 return (DDI_SUCCESS); 225 226 ahalloc_fail2: 227 tavor_pd_refcnt_dec(pd); 228 tavor_rsrc_free(state, &rsrc); 229 ahalloc_fail1: 230 tavor_rsrc_free(state, &udav); 231 ahalloc_fail: 232 TNF_PROBE_1(tavor_ah_alloc_fail, TAVOR_TNF_ERROR, "", 233 tnf_string, msg, errormsg); 234 TAVOR_TNF_EXIT(tavor_ah_alloc); 235 return (status); 236 } 237 238 239 /* 240 * tavor_ah_free() 241 * Context: Can be called only from user or kernel context. 242 */ 243 /* ARGSUSED */ 244 int 245 tavor_ah_free(tavor_state_t *state, tavor_ahhdl_t *ahhdl, uint_t sleepflag) 246 { 247 tavor_rsrc_t *udav, *rsrc; 248 tavor_pdhdl_t pd; 249 tavor_mrhdl_t mr; 250 tavor_ahhdl_t ah; 251 int status; 252 253 TAVOR_TNF_ENTER(tavor_ah_free); 254 255 /* 256 * Pull all the necessary information from the Tavor Address Handle 257 * struct. This is necessary here because the resource for the 258 * AH is going to be freed up as part of this operation. 259 */ 260 ah = *ahhdl; 261 mutex_enter(&ah->ah_lock); 262 udav = ah->ah_udavrsrcp; 263 rsrc = ah->ah_rsrcp; 264 pd = ah->ah_pdhdl; 265 mr = ah->ah_mrhdl; 266 mutex_exit(&ah->ah_lock); 267 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah)) 268 269 /* 270 * Deregister the memory for the UDAV. If this fails for any reason, 271 * then it is an indication that something (either in HW or SW) has 272 * gone seriously wrong. So we print a warning message and return 273 * failure. 274 */ 275 status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL, 276 sleepflag); 277 if (status != DDI_SUCCESS) { 278 TNF_PROBE_0(tavor_ah_free_dereg_mr_fail, TAVOR_TNF_ERROR, ""); 279 TAVOR_TNF_EXIT(tavor_ah_free); 280 return (ibc_get_ci_failure(0)); 281 } 282 283 /* 284 * Write zero to the first 64-bit word in the UDAV entry. As 285 * described above (in tavor_ah_alloc), the PD number is stored in 286 * the first 64-bits of each UDAV and setting this to zero is 287 * guaranteed to invalidate the entry. 288 */ 289 ddi_put64(udav->tr_acchdl, (uint64_t *)udav->tr_addr, 0); 290 291 /* Sync the UDAV for use by the hardware */ 292 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV); 293 294 /* Decrement the reference count on the protection domain (PD) */ 295 tavor_pd_refcnt_dec(pd); 296 297 /* Free the Tavor Address Handle structure */ 298 tavor_rsrc_free(state, &rsrc); 299 300 /* Free up the UDAV entry resource */ 301 tavor_rsrc_free(state, &udav); 302 303 /* Set the ahhdl pointer to NULL and return success */ 304 *ahhdl = NULL; 305 306 TAVOR_TNF_EXIT(tavor_ah_free); 307 return (DDI_SUCCESS); 308 } 309 310 311 /* 312 * tavor_ah_query() 313 * Context: Can be called from interrupt or base context. 314 */ 315 /* ARGSUSED */ 316 int 317 tavor_ah_query(tavor_state_t *state, tavor_ahhdl_t ah, tavor_pdhdl_t *pd, 318 ibt_adds_vect_t *attr_p) 319 { 320 tavor_hw_udav_t udav_entry; 321 tavor_rsrc_t *udav; 322 uint64_t data; 323 uint32_t size; 324 int i; 325 326 TAVOR_TNF_ENTER(tavor_ah_query); 327 328 mutex_enter(&ah->ah_lock); 329 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p)) 330 331 /* 332 * Pull all the necessary information from the Tavor Address Handle 333 * structure 334 */ 335 udav = ah->ah_udavrsrcp; 336 *pd = ah->ah_pdhdl; 337 338 /* 339 * Copy the UDAV entry into the temporary copy. Here we copy all 340 * the information from the UDAV entry in DDR memory into the 341 * temporary UDAV. Note: We don't need to sync the UDAV for 342 * reading by software because Tavor HW never modifies the entry. 343 */ 344 size = sizeof (tavor_hw_udav_t) >> 3; 345 for (i = 0; i < size; i++) { 346 data = ddi_get64(udav->tr_acchdl, 347 ((uint64_t *)udav->tr_addr + i)); 348 ((uint64_t *)&udav_entry)[i] = data; 349 } 350 351 /* 352 * Fill in "ibt_adds_vect_t". We call tavor_get_addr_path() to fill 353 * the common portions that can be pulled from the UDAV we pass in. 354 * 355 * NOTE: We will also fill the "av_dgid.gid_guid" field from the 356 * "ah_save_guid" field we have previously saved away. The reason 357 * for this is described in tavor_ah_alloc() and tavor_ah_modify(). 358 */ 359 tavor_get_addr_path(state, (tavor_hw_addr_path_t *)&udav_entry, 360 attr_p, TAVOR_ADDRPATH_UDAV, NULL); 361 362 attr_p->av_dgid.gid_guid = ah->ah_save_guid; 363 attr_p->av_srate = ah->ah_save_srate; 364 365 mutex_exit(&ah->ah_lock); 366 TAVOR_TNF_EXIT(tavor_ah_query); 367 return (DDI_SUCCESS); 368 } 369 370 371 /* 372 * tavor_ah_modify() 373 * Context: Can be called from interrupt or base context. 374 */ 375 /* ARGSUSED */ 376 int 377 tavor_ah_modify(tavor_state_t *state, tavor_ahhdl_t ah, 378 ibt_adds_vect_t *attr_p) 379 { 380 tavor_hw_udav_t udav_entry; 381 tavor_rsrc_t *udav; 382 uint64_t data_new, data_old; 383 uint32_t udav_pd, size, portnum_new; 384 int i, status; 385 386 TAVOR_TNF_ENTER(tavor_ah_modify); 387 388 /* Validate that specified port number is legal */ 389 if (!tavor_portnum_is_valid(state, attr_p->av_port_num)) { 390 TNF_PROBE_1(tavor_ah_modify_inv_portnum, 391 TAVOR_TNF_ERROR, "", tnf_uint, port, attr_p->av_port_num); 392 TAVOR_TNF_EXIT(tavor_ah_modify); 393 return (IBT_HCA_PORT_INVALID); 394 } 395 396 mutex_enter(&ah->ah_lock); 397 398 /* 399 * Pull all the necessary information from the Tavor Address Handle 400 * structure 401 */ 402 udav = ah->ah_udavrsrcp; 403 404 /* 405 * Fill in the UDAV entry. Note: we are only filling in a temporary 406 * copy here, which we will later copy into the actual entry in 407 * Tavor DDR memory. This starts be zeroing out the temporary copy 408 * and then calling tavor_set_addr_path() to fill in the common 409 * portions that can be pulled from the "ibt_adds_vect_t" passed in 410 * 411 * NOTE: We also need to save away a copy of the "av_dgid.gid_guid" 412 * field here (just as we did during tavor_ah_alloc()) because we 413 * may need to return it later to the IBTF (as a result of a 414 * subsequent query operation). As explained in tavor_ah_alloc(), 415 * unlike the other UDAV parameters, the value of "av_dgid.gid_guid" 416 * is not always preserved by being written to hardware. The reason 417 * for this is described in tavor_set_addr_path(). 418 */ 419 bzero(&udav_entry, sizeof (tavor_hw_udav_t)); 420 status = tavor_set_addr_path(state, attr_p, 421 (tavor_hw_addr_path_t *)&udav_entry, TAVOR_ADDRPATH_UDAV, NULL); 422 if (status != DDI_SUCCESS) { 423 mutex_exit(&ah->ah_lock); 424 TNF_PROBE_0(tavor_ah_modify_setaddrpath_fail, 425 TAVOR_TNF_ERROR, ""); 426 TAVOR_TNF_EXIT(tavor_ah_modify); 427 return (status); 428 } 429 ah->ah_save_guid = attr_p->av_dgid.gid_guid; 430 ah->ah_save_srate = attr_p->av_srate; 431 432 /* 433 * Save away the current PD number for this UDAV. Then temporarily 434 * invalidate the entry (by setting the PD to zero). Note: Since 435 * the first 32 bits of the UDAV actually contain the current port 436 * number _and_ current PD number, we need to mask off some bits. 437 */ 438 udav_pd = ddi_get32(udav->tr_acchdl, (uint32_t *)udav->tr_addr); 439 udav_pd = udav_pd & 0xFFFFFF; 440 ddi_put32(udav->tr_acchdl, (uint32_t *)udav->tr_addr, 0); 441 442 /* Sync the UDAV for use by the hardware */ 443 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV); 444 445 /* 446 * Copy UDAV structure to the entry 447 * Note: We copy in 64-bit chunks. For the first two of these 448 * chunks it is necessary to read the current contents of the 449 * UDAV, mask off the modifiable portions (maintaining any 450 * of the "reserved" portions), and then mask on the new data. 451 */ 452 size = sizeof (tavor_hw_udav_t) >> 3; 453 for (i = 0; i < size; i++) { 454 data_new = ((uint64_t *)&udav_entry)[i]; 455 data_old = ddi_get64(udav->tr_acchdl, 456 ((uint64_t *)udav->tr_addr + i)); 457 458 /* 459 * Apply mask to change only the relevant values. Note: We 460 * extract the new portnum from the address handle here 461 * because the "PD" and "portnum" fields are in the same 462 * 32-bit word in the UDAV. We will use the (new) port 463 * number extracted here when we write the valid PD number 464 * in the last step below. 465 */ 466 if (i == 0) { 467 data_old = data_old & TAVOR_UDAV_MODIFY_MASK0; 468 portnum_new = data_new >> 56; 469 } else if (i == 1) { 470 data_old = data_old & TAVOR_UDAV_MODIFY_MASK1; 471 } else { 472 data_old = 0; 473 } 474 475 /* Write the updated values to the UDAV (in DDR) */ 476 data_new = data_old | data_new; 477 ddi_put64(udav->tr_acchdl, ((uint64_t *)udav->tr_addr + i), 478 data_new); 479 } 480 481 /* 482 * Sync the body of the UDAV for use by the hardware. After we 483 * have updated the PD number (to make the UDAV valid), we sync 484 * again to push the entire entry out for hardware access. 485 */ 486 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV); 487 488 /* 489 * Put the valid PD number back into UDAV entry. Note: Because port 490 * number and PD number are in the same word, we must mask the 491 * new port number with the old PD number before writing it back 492 * to the UDAV entry 493 */ 494 udav_pd = ((portnum_new << 24) | udav_pd); 495 ddi_put32(udav->tr_acchdl, (uint32_t *)udav->tr_addr, udav_pd); 496 497 /* Sync the rest of the UDAV for use by the hardware */ 498 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV); 499 500 mutex_exit(&ah->ah_lock); 501 TAVOR_TNF_EXIT(tavor_ah_modify); 502 return (DDI_SUCCESS); 503 } 504 505 506 /* 507 * tavor_udav_sync() 508 * Context: Can be called from interrupt or base context. 509 */ 510 /* ARGSUSED */ 511 static void 512 tavor_udav_sync(tavor_ahhdl_t ah, tavor_hw_udav_t *udav, uint_t flag) 513 { 514 ddi_dma_handle_t dmahdl; 515 off_t offset; 516 int status; 517 518 TAVOR_TNF_ENTER(tavor_udav_sync); 519 520 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah)) 521 522 /* Determine if AH needs to be synced or not */ 523 if (ah->ah_sync == 0) { 524 TAVOR_TNF_EXIT(tavor_udav_sync); 525 return; 526 } 527 528 /* Get the DMA handle from AH handle */ 529 dmahdl = ah->ah_mrhdl->mr_bindinfo.bi_dmahdl; 530 531 /* Calculate offset into address handle */ 532 offset = (off_t)0; 533 status = ddi_dma_sync(dmahdl, offset, sizeof (tavor_hw_udav_t), flag); 534 if (status != DDI_SUCCESS) { 535 TNF_PROBE_0(tavor_udav_sync_getnextentry_fail, 536 TAVOR_TNF_ERROR, ""); 537 TAVOR_TNF_EXIT(tavor_udav_sync); 538 return; 539 } 540 541 TAVOR_TNF_EXIT(tavor_udav_sync); 542 } 543 544 545 /* 546 * tavor_mcg_attach() 547 * Context: Can be called only from user or kernel context. 548 */ 549 int 550 tavor_mcg_attach(tavor_state_t *state, tavor_qphdl_t qp, ib_gid_t gid, 551 ib_lid_t lid) 552 { 553 tavor_rsrc_t *rsrc; 554 tavor_hw_mcg_t *mcg_entry; 555 tavor_hw_mcg_qp_list_t *mcg_entry_qplist; 556 tavor_mcghdl_t mcg, newmcg; 557 uint64_t mgid_hash; 558 uint32_t end_indx; 559 int status; 560 uint_t qp_found; 561 char *errormsg; 562 563 TAVOR_TNF_ENTER(tavor_mcg_attach); 564 565 /* 566 * It is only allowed to attach MCG to UD queue pairs. Verify 567 * that the intended QP is of the appropriate transport type 568 */ 569 if (qp->qp_serv_type != TAVOR_QP_UD) { 570 /* Set "status" and "errormsg" and goto failure */ 571 TAVOR_TNF_FAIL(IBT_QP_SRV_TYPE_INVALID, "invalid service type"); 572 goto mcgattach_fail; 573 } 574 575 /* 576 * Check for invalid Multicast DLID. Specifically, all Multicast 577 * LIDs should be within a well defined range. If the specified LID 578 * is outside of that range, then return an error. 579 */ 580 if (tavor_mlid_is_valid(lid) == 0) { 581 /* Set "status" and "errormsg" and goto failure */ 582 TAVOR_TNF_FAIL(IBT_MC_MLID_INVALID, "invalid MLID"); 583 goto mcgattach_fail; 584 } 585 /* 586 * Check for invalid Multicast GID. All Multicast GIDs should have 587 * a well-defined pattern of bits and flags that are allowable. If 588 * the specified GID does not meet the criteria, then return an error. 589 */ 590 if (tavor_mgid_is_valid(gid) == 0) { 591 /* Set "status" and "errormsg" and goto failure */ 592 TAVOR_TNF_FAIL(IBT_MC_MGID_INVALID, "invalid MGID"); 593 goto mcgattach_fail; 594 } 595 596 /* 597 * Compute the MGID hash value. Since the MCG table is arranged as 598 * a number of separate hash chains, this operation converts the 599 * specified MGID into the starting index of an entry in the hash 600 * table (i.e. the index for the start of the appropriate hash chain). 601 * Subsequent operations below will walk the chain searching for the 602 * right place to add this new QP. 603 */ 604 status = tavor_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid, 605 &mgid_hash, TAVOR_SLEEPFLAG_FOR_CONTEXT()); 606 if (status != TAVOR_CMD_SUCCESS) { 607 cmn_err(CE_CONT, "Tavor: MGID_HASH command failed: %08x\n", 608 status); 609 TNF_PROBE_1(tavor_mcg_attach_mgid_hash_cmd_fail, 610 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status); 611 TAVOR_TNF_EXIT(tavor_mcg_attach); 612 return (ibc_get_ci_failure(0)); 613 } 614 615 /* 616 * Grab the multicast group mutex. Then grab the pre-allocated 617 * temporary buffer used for holding and/or modifying MCG entries. 618 * Zero out the temporary MCG entry before we begin. 619 */ 620 mutex_enter(&state->ts_mcglock); 621 mcg_entry = state->ts_mcgtmp; 622 mcg_entry_qplist = TAVOR_MCG_GET_QPLIST_PTR(mcg_entry); 623 bzero(mcg_entry, TAVOR_MCGMEM_SZ(state)); 624 625 /* 626 * Walk through the array of MCG entries starting at "mgid_hash". 627 * Try to find the appropriate place for this new QP to be added. 628 * This could happen when the first entry of the chain has MGID == 0 629 * (which means that the hash chain is empty), or because we find 630 * an entry with the same MGID (in which case we'll add the QP to 631 * that MCG), or because we come to the end of the chain (in which 632 * case this is the first QP being added to the multicast group that 633 * corresponds to the MGID. The tavor_mcg_walk_mgid_hash() routine 634 * walks the list and returns an index into the MCG table. The entry 635 * at this index is then checked to determine which case we have 636 * fallen into (see below). Note: We are using the "shadow" MCG 637 * list (of tavor_mcg_t structs) for this lookup because the real 638 * MCG entries are in hardware (and the lookup process would be much 639 * more time consuming). 640 */ 641 end_indx = tavor_mcg_walk_mgid_hash(state, mgid_hash, gid, NULL); 642 mcg = &state->ts_mcghdl[end_indx]; 643 644 /* 645 * If MGID == 0, then the hash chain is empty. Just fill in the 646 * current entry. Note: No need to allocate an MCG table entry 647 * as all the hash chain "heads" are already preallocated. 648 */ 649 if ((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) { 650 651 /* Fill in the current entry in the "shadow" MCG list */ 652 tavor_mcg_setup_new_hdr(mcg, mcg_entry, gid, NULL); 653 654 /* 655 * Try to add the new QP number to the list. This (and the 656 * above) routine fills in a temporary MCG. The "mcg_entry" 657 * and "mcg_entry_qplist" pointers simply point to different 658 * offsets within the same temporary copy of the MCG (for 659 * convenience). Note: If this fails, we need to invalidate 660 * the entries we've already put into the "shadow" list entry 661 * above. 662 */ 663 status = tavor_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp, 664 &qp_found); 665 if (status != DDI_SUCCESS) { 666 bzero(mcg, sizeof (struct tavor_sw_mcg_list_s)); 667 mutex_exit(&state->ts_mcglock); 668 /* Set "status" and "errormsg" and goto failure */ 669 TAVOR_TNF_FAIL(status, "failed qplist add"); 670 goto mcgattach_fail; 671 } 672 673 /* 674 * Once the temporary MCG has been filled in, write the entry 675 * into the appropriate location in the Tavor MCG entry table. 676 * If it's successful, then drop the lock and return success. 677 * Note: In general, this operation shouldn't fail. If it 678 * does, then it is an indication that something (probably in 679 * HW, but maybe in SW) has gone seriously wrong. We still 680 * want to zero out the entries that we've filled in above 681 * (in the tavor_mcg_setup_new_hdr() routine). 682 */ 683 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx, 684 TAVOR_CMD_NOSLEEP_SPIN); 685 if (status != TAVOR_CMD_SUCCESS) { 686 bzero(mcg, sizeof (struct tavor_sw_mcg_list_s)); 687 mutex_exit(&state->ts_mcglock); 688 TAVOR_WARNING(state, "failed to write MCG entry"); 689 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: " 690 "%08x\n", status); 691 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail, 692 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 693 tnf_uint, indx, end_indx); 694 TAVOR_TNF_EXIT(tavor_mcg_attach); 695 return (ibc_get_ci_failure(0)); 696 } 697 698 /* 699 * Now that we know all the Tavor firmware accesses have been 700 * successful, we update the "shadow" MCG entry by incrementing 701 * the "number of attached QPs" count. 702 * 703 * We increment only if the QP is not already part of the 704 * MCG by checking the 'qp_found' flag returned from the 705 * qplist_add above. 706 */ 707 if (!qp_found) { 708 mcg->mcg_num_qps++; 709 710 /* 711 * Increment the refcnt for this QP. Because the QP 712 * was added to this MCG, the refcnt must be 713 * incremented. 714 */ 715 tavor_qp_mcg_refcnt_inc(qp); 716 } 717 718 /* 719 * We drop the lock and return success. 720 */ 721 mutex_exit(&state->ts_mcglock); 722 TAVOR_TNF_EXIT(tavor_mcg_attach); 723 return (DDI_SUCCESS); 724 } 725 726 /* 727 * If the specified MGID matches the MGID in the current entry, then 728 * we need to try to add the QP to the current MCG entry. In this 729 * case, it means that we need to read the existing MCG entry (into 730 * the temporary MCG), add the new QP number to the temporary entry 731 * (using the same method we used above), and write the entry back 732 * to the hardware (same as above). 733 */ 734 if ((mcg->mcg_mgid_h == gid.gid_prefix) && 735 (mcg->mcg_mgid_l == gid.gid_guid)) { 736 737 /* 738 * Read the current MCG entry into the temporary MCG. Note: 739 * In general, this operation shouldn't fail. If it does, 740 * then it is an indication that something (probably in HW, 741 * but maybe in SW) has gone seriously wrong. 742 */ 743 status = tavor_read_mgm_cmd_post(state, mcg_entry, end_indx, 744 TAVOR_CMD_NOSLEEP_SPIN); 745 if (status != TAVOR_CMD_SUCCESS) { 746 mutex_exit(&state->ts_mcglock); 747 TAVOR_WARNING(state, "failed to read MCG entry"); 748 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: " 749 "%08x\n", status); 750 TNF_PROBE_2(tavor_mcg_attach_read_mgm_cmd_fail, 751 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 752 tnf_uint, indx, end_indx); 753 TAVOR_TNF_EXIT(tavor_mcg_attach); 754 return (ibc_get_ci_failure(0)); 755 } 756 757 /* 758 * Try to add the new QP number to the list. This routine 759 * fills in the necessary pieces of the temporary MCG. The 760 * "mcg_entry_qplist" pointer is used to point to the portion 761 * of the temporary MCG that holds the QP numbers. 762 * 763 * Note: tavor_mcg_qplist_add() returns SUCCESS if it 764 * already found the QP in the list. In this case, the QP is 765 * not added on to the list again. Check the flag 'qp_found' 766 * if this value is needed to be known. 767 * 768 */ 769 status = tavor_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp, 770 &qp_found); 771 if (status != DDI_SUCCESS) { 772 mutex_exit(&state->ts_mcglock); 773 /* Set "status" and "errormsg" and goto failure */ 774 TAVOR_TNF_FAIL(status, "failed qplist add"); 775 goto mcgattach_fail; 776 } 777 778 /* 779 * Once the temporary MCG has been updated, write the entry 780 * into the appropriate location in the Tavor MCG entry table. 781 * If it's successful, then drop the lock and return success. 782 * Note: In general, this operation shouldn't fail. If it 783 * does, then it is an indication that something (probably in 784 * HW, but maybe in SW) has gone seriously wrong. 785 */ 786 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx, 787 TAVOR_CMD_NOSLEEP_SPIN); 788 if (status != TAVOR_CMD_SUCCESS) { 789 mutex_exit(&state->ts_mcglock); 790 TAVOR_WARNING(state, "failed to write MCG entry"); 791 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: " 792 "%08x\n", status); 793 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail, 794 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 795 tnf_uint, indx, end_indx); 796 TAVOR_TNF_EXIT(tavor_mcg_attach); 797 return (ibc_get_ci_failure(0)); 798 } 799 800 /* 801 * Now that we know all the Tavor firmware accesses have been 802 * successful, we update the current "shadow" MCG entry by 803 * incrementing the "number of attached QPs" count. 804 * 805 * We increment only if the QP is not already part of the 806 * MCG by checking the 'qp_found' flag returned from the 807 * qplist_add above. 808 */ 809 if (!qp_found) { 810 mcg->mcg_num_qps++; 811 812 /* 813 * Increment the refcnt for this QP. Because the QP 814 * was added to this MCG, the refcnt must be 815 * incremented. 816 */ 817 tavor_qp_mcg_refcnt_inc(qp); 818 } 819 820 /* 821 * We drop the lock and return success. 822 */ 823 mutex_exit(&state->ts_mcglock); 824 TAVOR_TNF_EXIT(tavor_mcg_attach); 825 return (DDI_SUCCESS); 826 } 827 828 /* 829 * If we've reached here, then we're at the end of the hash chain. 830 * We need to allocate a new MCG entry, fill it in, write it to Tavor, 831 * and update the previous entry to link the new one to the end of the 832 * chain. 833 */ 834 835 /* 836 * Allocate an MCG table entry. This will be filled in with all 837 * the necessary parameters to define the multicast group. Then it 838 * will be written to the hardware in the next-to-last step below. 839 */ 840 status = tavor_rsrc_alloc(state, TAVOR_MCG, 1, TAVOR_NOSLEEP, &rsrc); 841 if (status != DDI_SUCCESS) { 842 mutex_exit(&state->ts_mcglock); 843 /* Set "status" and "errormsg" and goto failure */ 844 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MCG"); 845 goto mcgattach_fail; 846 } 847 848 /* 849 * Fill in the new entry in the "shadow" MCG list. Note: Just as 850 * it does above, tavor_mcg_setup_new_hdr() also fills in a portion 851 * of the temporary MCG entry (the rest of which will be filled in by 852 * tavor_mcg_qplist_add() below) 853 */ 854 newmcg = &state->ts_mcghdl[rsrc->tr_indx]; 855 tavor_mcg_setup_new_hdr(newmcg, mcg_entry, gid, rsrc); 856 857 /* 858 * Try to add the new QP number to the list. This routine fills in 859 * the final necessary pieces of the temporary MCG. The 860 * "mcg_entry_qplist" pointer is used to point to the portion of the 861 * temporary MCG that holds the QP numbers. If we fail here, we 862 * must undo the previous resource allocation. 863 * 864 * Note: tavor_mcg_qplist_add() can we return SUCCESS if it already 865 * found the QP in the list. In this case, the QP is not added on to 866 * the list again. Check the flag 'qp_found' if this value is needed 867 * to be known. 868 */ 869 status = tavor_mcg_qplist_add(state, newmcg, mcg_entry_qplist, qp, 870 &qp_found); 871 if (status != DDI_SUCCESS) { 872 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s)); 873 tavor_rsrc_free(state, &rsrc); 874 mutex_exit(&state->ts_mcglock); 875 /* Set "status" and "errormsg" and goto failure */ 876 TAVOR_TNF_FAIL(status, "failed qplist add"); 877 goto mcgattach_fail; 878 } 879 880 /* 881 * Once the temporary MCG has been updated, write the entry into the 882 * appropriate location in the Tavor MCG entry table. If this is 883 * successful, then we need to chain the previous entry to this one. 884 * Note: In general, this operation shouldn't fail. If it does, then 885 * it is an indication that something (probably in HW, but maybe in 886 * SW) has gone seriously wrong. 887 */ 888 status = tavor_write_mgm_cmd_post(state, mcg_entry, rsrc->tr_indx, 889 TAVOR_CMD_NOSLEEP_SPIN); 890 if (status != TAVOR_CMD_SUCCESS) { 891 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s)); 892 tavor_rsrc_free(state, &rsrc); 893 mutex_exit(&state->ts_mcglock); 894 TAVOR_WARNING(state, "failed to write MCG entry"); 895 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n", 896 status); 897 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail, 898 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 899 tnf_uint, indx, rsrc->tr_indx); 900 TAVOR_TNF_EXIT(tavor_mcg_attach); 901 return (ibc_get_ci_failure(0)); 902 } 903 904 /* 905 * Now read the current MCG entry (the one previously at the end of 906 * hash chain) into the temporary MCG. We are going to update its 907 * "next_gid_indx" now and write the entry back to the MCG table. 908 * Note: In general, this operation shouldn't fail. If it does, then 909 * it is an indication that something (probably in HW, but maybe in SW) 910 * has gone seriously wrong. We will free up the MCG entry resource, 911 * but we will not undo the previously written MCG entry in the HW. 912 * This is OK, though, because the MCG entry is not currently attached 913 * to any hash chain. 914 */ 915 status = tavor_read_mgm_cmd_post(state, mcg_entry, end_indx, 916 TAVOR_CMD_NOSLEEP_SPIN); 917 if (status != TAVOR_CMD_SUCCESS) { 918 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s)); 919 tavor_rsrc_free(state, &rsrc); 920 mutex_exit(&state->ts_mcglock); 921 TAVOR_WARNING(state, "failed to read MCG entry"); 922 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: %08x\n", 923 status); 924 TNF_PROBE_2(tavor_mcg_attach_read_mgm_cmd_fail, 925 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 926 tnf_uint, indx, end_indx); 927 TAVOR_TNF_EXIT(tavor_mcg_attach); 928 return (ibc_get_ci_failure(0)); 929 } 930 931 /* 932 * Finally, we update the "next_gid_indx" field in the temporary MCG 933 * and attempt to write the entry back into the Tavor MCG table. If 934 * this succeeds, then we update the "shadow" list to reflect the 935 * change, drop the lock, and return success. Note: In general, this 936 * operation shouldn't fail. If it does, then it is an indication 937 * that something (probably in HW, but maybe in SW) has gone seriously 938 * wrong. Just as we do above, we will free up the MCG entry resource, 939 * but we will not try to undo the previously written MCG entry. This 940 * is OK, though, because (since we failed here to update the end of 941 * the chain) that other entry is not currently attached to any chain. 942 */ 943 mcg_entry->next_gid_indx = rsrc->tr_indx; 944 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx, 945 TAVOR_CMD_NOSLEEP_SPIN); 946 if (status != TAVOR_CMD_SUCCESS) { 947 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s)); 948 tavor_rsrc_free(state, &rsrc); 949 mutex_exit(&state->ts_mcglock); 950 TAVOR_WARNING(state, "failed to write MCG entry"); 951 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n", 952 status); 953 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail, 954 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 955 tnf_uint, indx, end_indx); 956 TAVOR_TNF_EXIT(tavor_mcg_attach); 957 return (ibc_get_ci_failure(0)); 958 } 959 mcg = &state->ts_mcghdl[end_indx]; 960 mcg->mcg_next_indx = rsrc->tr_indx; 961 962 /* 963 * Now that we know all the Tavor firmware accesses have been 964 * successful, we update the new "shadow" MCG entry by incrementing 965 * the "number of attached QPs" count. Then we drop the lock and 966 * return success. 967 */ 968 newmcg->mcg_num_qps++; 969 970 /* 971 * Increment the refcnt for this QP. Because the QP 972 * was added to this MCG, the refcnt must be 973 * incremented. 974 */ 975 tavor_qp_mcg_refcnt_inc(qp); 976 977 mutex_exit(&state->ts_mcglock); 978 TAVOR_TNF_EXIT(tavor_mcg_attach); 979 return (DDI_SUCCESS); 980 981 mcgattach_fail: 982 TNF_PROBE_1(tavor_mcg_attach_fail, TAVOR_TNF_ERROR, "", tnf_string, 983 msg, errormsg); 984 TAVOR_TNF_EXIT(tavor_mcg_attach); 985 return (status); 986 } 987 988 989 /* 990 * tavor_mcg_detach() 991 * Context: Can be called only from user or kernel context. 992 */ 993 int 994 tavor_mcg_detach(tavor_state_t *state, tavor_qphdl_t qp, ib_gid_t gid, 995 ib_lid_t lid) 996 { 997 tavor_hw_mcg_t *mcg_entry; 998 tavor_hw_mcg_qp_list_t *mcg_entry_qplist; 999 tavor_mcghdl_t mcg; 1000 uint64_t mgid_hash; 1001 uint32_t end_indx, prev_indx; 1002 int status; 1003 1004 TAVOR_TNF_ENTER(tavor_mcg_detach); 1005 1006 /* 1007 * Check for invalid Multicast DLID. Specifically, all Multicast 1008 * LIDs should be within a well defined range. If the specified LID 1009 * is outside of that range, then return an error. 1010 */ 1011 if (tavor_mlid_is_valid(lid) == 0) { 1012 TNF_PROBE_0(tavor_mcg_detach_invmlid_fail, TAVOR_TNF_ERROR, ""); 1013 TAVOR_TNF_EXIT(tavor_mcg_detach); 1014 return (IBT_MC_MLID_INVALID); 1015 } 1016 1017 /* 1018 * Compute the MGID hash value. As described above, the MCG table is 1019 * arranged as a number of separate hash chains. This operation 1020 * converts the specified MGID into the starting index of an entry in 1021 * the hash table (i.e. the index for the start of the appropriate 1022 * hash chain). Subsequent operations below will walk the chain 1023 * searching for a matching entry from which to attempt to remove 1024 * the specified QP. 1025 */ 1026 status = tavor_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid, 1027 &mgid_hash, TAVOR_SLEEPFLAG_FOR_CONTEXT()); 1028 if (status != TAVOR_CMD_SUCCESS) { 1029 cmn_err(CE_CONT, "Tavor: MGID_HASH command failed: %08x\n", 1030 status); 1031 TNF_PROBE_1(tavor_mcg_detach_mgid_hash_cmd_fail, 1032 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status); 1033 TAVOR_TNF_EXIT(tavor_mcg_attach); 1034 return (ibc_get_ci_failure(0)); 1035 } 1036 1037 /* 1038 * Grab the multicast group mutex. Then grab the pre-allocated 1039 * temporary buffer used for holding and/or modifying MCG entries. 1040 */ 1041 mutex_enter(&state->ts_mcglock); 1042 mcg_entry = state->ts_mcgtmp; 1043 mcg_entry_qplist = TAVOR_MCG_GET_QPLIST_PTR(mcg_entry); 1044 1045 /* 1046 * Walk through the array of MCG entries starting at "mgid_hash". 1047 * Try to find an MCG entry with a matching MGID. The 1048 * tavor_mcg_walk_mgid_hash() routine walks the list and returns an 1049 * index into the MCG table. The entry at this index is checked to 1050 * determine whether it is a match or not. If it is a match, then 1051 * we continue on to attempt to remove the QP from the MCG. If it 1052 * is not a match (or not a valid MCG entry), then we return an error. 1053 */ 1054 end_indx = tavor_mcg_walk_mgid_hash(state, mgid_hash, gid, &prev_indx); 1055 mcg = &state->ts_mcghdl[end_indx]; 1056 1057 /* 1058 * If MGID == 0 (the hash chain is empty) or if the specified MGID 1059 * does not match the MGID in the current entry, then return 1060 * IBT_MC_MGID_INVALID (to indicate that the specified MGID is not 1061 * valid). 1062 */ 1063 if (((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) || 1064 ((mcg->mcg_mgid_h != gid.gid_prefix) || 1065 (mcg->mcg_mgid_l != gid.gid_guid))) { 1066 mutex_exit(&state->ts_mcglock); 1067 TNF_PROBE_0(tavor_mcg_detach_invmgid_fail, TAVOR_TNF_ERROR, ""); 1068 TAVOR_TNF_EXIT(tavor_mcg_detach); 1069 return (IBT_MC_MGID_INVALID); 1070 } 1071 1072 /* 1073 * Read the current MCG entry into the temporary MCG. Note: In 1074 * general, this operation shouldn't fail. If it does, then it is 1075 * an indication that something (probably in HW, but maybe in SW) 1076 * has gone seriously wrong. 1077 */ 1078 status = tavor_read_mgm_cmd_post(state, mcg_entry, end_indx, 1079 TAVOR_CMD_NOSLEEP_SPIN); 1080 if (status != TAVOR_CMD_SUCCESS) { 1081 mutex_exit(&state->ts_mcglock); 1082 TAVOR_WARNING(state, "failed to read MCG entry"); 1083 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: %08x\n", 1084 status); 1085 TNF_PROBE_2(tavor_mcg_detach_read_mgm_cmd_fail, 1086 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 1087 tnf_uint, indx, end_indx); 1088 TAVOR_TNF_EXIT(tavor_mcg_attach); 1089 return (ibc_get_ci_failure(0)); 1090 } 1091 1092 /* 1093 * Search the QP number list for a match. If a match is found, then 1094 * remove the entry from the QP list. Otherwise, if no match is found, 1095 * return an error. 1096 */ 1097 status = tavor_mcg_qplist_remove(mcg, mcg_entry_qplist, qp); 1098 if (status != DDI_SUCCESS) { 1099 mutex_exit(&state->ts_mcglock); 1100 TAVOR_TNF_EXIT(tavor_mcg_detach); 1101 return (status); 1102 } 1103 1104 /* 1105 * Decrement the MCG count for this QP. When the 'qp_mcg' 1106 * field becomes 0, then this QP is no longer a member of any 1107 * MCG. 1108 */ 1109 tavor_qp_mcg_refcnt_dec(qp); 1110 1111 /* 1112 * If the current MCG's QP number list is about to be made empty 1113 * ("mcg_num_qps" == 1), then remove the entry itself from the hash 1114 * chain. Otherwise, just write the updated MCG entry back to the 1115 * hardware. In either case, once we successfully update the hardware 1116 * chain, then we decrement the "shadow" list entry's "mcg_num_qps" 1117 * count (or zero out the entire "shadow" list entry) before returning 1118 * success. Note: Zeroing out the "shadow" list entry is done 1119 * inside of tavor_mcg_hash_list_remove(). 1120 */ 1121 if (mcg->mcg_num_qps == 1) { 1122 1123 /* Remove an MCG entry from the hash chain */ 1124 status = tavor_mcg_hash_list_remove(state, end_indx, prev_indx, 1125 mcg_entry); 1126 if (status != DDI_SUCCESS) { 1127 mutex_exit(&state->ts_mcglock); 1128 TAVOR_TNF_EXIT(tavor_mcg_detach); 1129 return (status); 1130 } 1131 1132 } else { 1133 /* 1134 * Write the updated MCG entry back to the Tavor MCG table. 1135 * If this succeeds, then we update the "shadow" list to 1136 * reflect the change (i.e. decrement the "mcg_num_qps"), 1137 * drop the lock, and return success. Note: In general, 1138 * this operation shouldn't fail. If it does, then it is an 1139 * indication that something (probably in HW, but maybe in SW) 1140 * has gone seriously wrong. 1141 */ 1142 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx, 1143 TAVOR_CMD_NOSLEEP_SPIN); 1144 if (status != TAVOR_CMD_SUCCESS) { 1145 mutex_exit(&state->ts_mcglock); 1146 TAVOR_WARNING(state, "failed to write MCG entry"); 1147 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: " 1148 "%08x\n", status); 1149 TNF_PROBE_2(tavor_mcg_detach_write_mgm_cmd_fail, 1150 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 1151 tnf_uint, indx, end_indx); 1152 TAVOR_TNF_EXIT(tavor_mcg_detach); 1153 return (ibc_get_ci_failure(0)); 1154 } 1155 mcg->mcg_num_qps--; 1156 } 1157 1158 mutex_exit(&state->ts_mcglock); 1159 TAVOR_TNF_EXIT(tavor_mcg_detach); 1160 return (DDI_SUCCESS); 1161 } 1162 1163 /* 1164 * tavor_qp_mcg_refcnt_inc() 1165 * Context: Can be called from interrupt or base context. 1166 */ 1167 static void 1168 tavor_qp_mcg_refcnt_inc(tavor_qphdl_t qp) 1169 { 1170 /* Increment the QP's MCG reference count */ 1171 mutex_enter(&qp->qp_lock); 1172 qp->qp_mcg_refcnt++; 1173 TNF_PROBE_1_DEBUG(tavor_qp_mcg_refcnt_inc, TAVOR_TNF_TRACE, "", 1174 tnf_uint, refcnt, qp->qp_mcg_refcnt); 1175 mutex_exit(&qp->qp_lock); 1176 } 1177 1178 1179 /* 1180 * tavor_qp_mcg_refcnt_dec() 1181 * Context: Can be called from interrupt or base context. 1182 */ 1183 static void 1184 tavor_qp_mcg_refcnt_dec(tavor_qphdl_t qp) 1185 { 1186 /* Decrement the QP's MCG reference count */ 1187 mutex_enter(&qp->qp_lock); 1188 qp->qp_mcg_refcnt--; 1189 TNF_PROBE_1_DEBUG(tavor_qp_mcg_refcnt_dec, TAVOR_TNF_TRACE, "", 1190 tnf_uint, refcnt, qp->qp_mcg_refcnt); 1191 mutex_exit(&qp->qp_lock); 1192 } 1193 1194 1195 /* 1196 * tavor_mcg_qplist_add() 1197 * Context: Can be called from interrupt or base context. 1198 */ 1199 static int 1200 tavor_mcg_qplist_add(tavor_state_t *state, tavor_mcghdl_t mcg, 1201 tavor_hw_mcg_qp_list_t *mcg_qplist, tavor_qphdl_t qp, 1202 uint_t *qp_found) 1203 { 1204 uint_t qplist_indx; 1205 1206 TAVOR_TNF_ENTER(tavor_mcg_qplist_add); 1207 1208 ASSERT(MUTEX_HELD(&state->ts_mcglock)); 1209 1210 qplist_indx = mcg->mcg_num_qps; 1211 1212 /* 1213 * Determine if we have exceeded the maximum number of QP per 1214 * multicast group. If we have, then return an error 1215 */ 1216 if (qplist_indx >= state->ts_cfg_profile->cp_num_qp_per_mcg) { 1217 TNF_PROBE_0(tavor_mcg_qplist_add_too_many_qps, 1218 TAVOR_TNF_ERROR, ""); 1219 TAVOR_TNF_EXIT(tavor_mcg_qplist_add); 1220 return (IBT_HCA_MCG_QP_EXCEEDED); 1221 } 1222 1223 /* 1224 * Determine if the QP is already attached to this MCG table. If it 1225 * is, then we break out and treat this operation as a NO-OP 1226 */ 1227 for (qplist_indx = 0; qplist_indx < mcg->mcg_num_qps; 1228 qplist_indx++) { 1229 if (mcg_qplist[qplist_indx].qpn == qp->qp_qpnum) { 1230 break; 1231 } 1232 } 1233 1234 /* 1235 * If the QP was already on the list, set 'qp_found' to TRUE. We still 1236 * return SUCCESS in this case, but the qplist will not have been 1237 * updated because the QP was already on the list. 1238 */ 1239 if (qplist_indx < mcg->mcg_num_qps) { 1240 *qp_found = 1; 1241 } else { 1242 /* 1243 * Otherwise, append the new QP number to the end of the 1244 * current QP list. Note: We will increment the "mcg_num_qps" 1245 * field on the "shadow" MCG list entry later (after we know 1246 * that all necessary Tavor firmware accesses have been 1247 * successful). 1248 * 1249 * Set 'qp_found' to 0 so we know the QP was added on to the 1250 * list for sure. 1251 */ 1252 mcg_qplist[qplist_indx].q = TAVOR_MCG_QPN_VALID; 1253 mcg_qplist[qplist_indx].qpn = qp->qp_qpnum; 1254 *qp_found = 0; 1255 } 1256 1257 TAVOR_TNF_EXIT(tavor_mcg_qplist_add); 1258 return (DDI_SUCCESS); 1259 } 1260 1261 1262 1263 /* 1264 * tavor_mcg_qplist_remove() 1265 * Context: Can be called from interrupt or base context. 1266 */ 1267 static int 1268 tavor_mcg_qplist_remove(tavor_mcghdl_t mcg, tavor_hw_mcg_qp_list_t *mcg_qplist, 1269 tavor_qphdl_t qp) 1270 { 1271 uint_t i, qplist_indx; 1272 1273 TAVOR_TNF_ENTER(tavor_mcg_qplist_remove); 1274 1275 /* 1276 * Search the MCG QP list for a matching QPN. When 1277 * it's found, we swap the last entry with the current 1278 * one, set the last entry to zero, decrement the last 1279 * entry, and return. If it's not found, then it's 1280 * and error. 1281 */ 1282 qplist_indx = mcg->mcg_num_qps; 1283 for (i = 0; i < qplist_indx; i++) { 1284 if (mcg_qplist[i].qpn == qp->qp_qpnum) { 1285 mcg_qplist[i] = mcg_qplist[qplist_indx - 1]; 1286 mcg_qplist[qplist_indx - 1].q = TAVOR_MCG_QPN_INVALID; 1287 mcg_qplist[qplist_indx - 1].qpn = 0; 1288 1289 TAVOR_TNF_EXIT(tavor_mcg_qplist_remove); 1290 return (DDI_SUCCESS); 1291 } 1292 } 1293 1294 TNF_PROBE_0(tavor_mcg_qplist_remove_invqphdl_fail, TAVOR_TNF_ERROR, ""); 1295 TAVOR_TNF_EXIT(tavor_mcg_qplist_remove); 1296 return (IBT_QP_HDL_INVALID); 1297 } 1298 1299 1300 /* 1301 * tavor_mcg_walk_mgid_hash() 1302 * Context: Can be called from interrupt or base context. 1303 */ 1304 static uint_t 1305 tavor_mcg_walk_mgid_hash(tavor_state_t *state, uint64_t start_indx, 1306 ib_gid_t mgid, uint_t *p_indx) 1307 { 1308 tavor_mcghdl_t curr_mcghdl; 1309 uint_t curr_indx, prev_indx; 1310 1311 TAVOR_TNF_ENTER(tavor_mcg_walk_mgid_hash); 1312 1313 ASSERT(MUTEX_HELD(&state->ts_mcglock)); 1314 1315 /* Start at the head of the hash chain */ 1316 curr_indx = start_indx; 1317 prev_indx = curr_indx; 1318 curr_mcghdl = &state->ts_mcghdl[curr_indx]; 1319 1320 /* If the first entry in the chain has MGID == 0, then stop */ 1321 if ((curr_mcghdl->mcg_mgid_h == 0) && 1322 (curr_mcghdl->mcg_mgid_l == 0)) { 1323 goto end_mgid_hash_walk; 1324 } 1325 1326 /* If the first entry in the chain matches the MGID, then stop */ 1327 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) && 1328 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) { 1329 goto end_mgid_hash_walk; 1330 } 1331 1332 /* Otherwise, walk the hash chain looking for a match */ 1333 while (curr_mcghdl->mcg_next_indx != 0) { 1334 prev_indx = curr_indx; 1335 curr_indx = curr_mcghdl->mcg_next_indx; 1336 curr_mcghdl = &state->ts_mcghdl[curr_indx]; 1337 1338 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) && 1339 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) { 1340 break; 1341 } 1342 } 1343 1344 end_mgid_hash_walk: 1345 /* 1346 * If necessary, return the index of the previous entry too. This 1347 * is primarily used for detaching a QP from a multicast group. It 1348 * may be necessary, in that case, to delete an MCG entry from the 1349 * hash chain and having the index of the previous entry is helpful. 1350 */ 1351 if (p_indx != NULL) { 1352 *p_indx = prev_indx; 1353 } 1354 TAVOR_TNF_EXIT(tavor_mcg_walk_mgid_hash); 1355 return (curr_indx); 1356 } 1357 1358 1359 /* 1360 * tavor_mcg_setup_new_hdr() 1361 * Context: Can be called from interrupt or base context. 1362 */ 1363 static void 1364 tavor_mcg_setup_new_hdr(tavor_mcghdl_t mcg, tavor_hw_mcg_t *mcg_hdr, 1365 ib_gid_t mgid, tavor_rsrc_t *mcg_rsrc) 1366 { 1367 TAVOR_TNF_ENTER(tavor_mcg_setup_new_hdr); 1368 1369 /* 1370 * Fill in the fields of the "shadow" entry used by software 1371 * to track MCG hardware entry 1372 */ 1373 mcg->mcg_mgid_h = mgid.gid_prefix; 1374 mcg->mcg_mgid_l = mgid.gid_guid; 1375 mcg->mcg_rsrcp = mcg_rsrc; 1376 mcg->mcg_next_indx = 0; 1377 mcg->mcg_num_qps = 0; 1378 1379 /* 1380 * Fill the header fields of the MCG entry (in the temporary copy) 1381 */ 1382 mcg_hdr->mgid_h = mgid.gid_prefix; 1383 mcg_hdr->mgid_l = mgid.gid_guid; 1384 mcg_hdr->next_gid_indx = 0; 1385 1386 TAVOR_TNF_EXIT(tavor_mcg_setup_new_hdr); 1387 } 1388 1389 1390 /* 1391 * tavor_mcg_hash_list_remove() 1392 * Context: Can be called only from user or kernel context. 1393 */ 1394 static int 1395 tavor_mcg_hash_list_remove(tavor_state_t *state, uint_t curr_indx, 1396 uint_t prev_indx, tavor_hw_mcg_t *mcg_entry) 1397 { 1398 tavor_mcghdl_t curr_mcg, prev_mcg, next_mcg; 1399 uint_t next_indx; 1400 int status; 1401 1402 /* Get the pointer to "shadow" list for current entry */ 1403 curr_mcg = &state->ts_mcghdl[curr_indx]; 1404 1405 /* 1406 * If this is the first entry on a hash chain, then attempt to replace 1407 * the entry with the next entry on the chain. If there are no 1408 * subsequent entries on the chain, then this is the only entry and 1409 * should be invalidated. 1410 */ 1411 if (curr_indx == prev_indx) { 1412 1413 /* 1414 * If this is the only entry on the chain, then invalidate it. 1415 * Note: Invalidating an MCG entry means writing all zeros 1416 * to the entry. This is only necessary for those MCG 1417 * entries that are the "head" entries of the individual hash 1418 * chains. Regardless of whether this operation returns 1419 * success or failure, return that result to the caller. 1420 */ 1421 next_indx = curr_mcg->mcg_next_indx; 1422 if (next_indx == 0) { 1423 status = tavor_mcg_entry_invalidate(state, mcg_entry, 1424 curr_indx); 1425 bzero(curr_mcg, sizeof (struct tavor_sw_mcg_list_s)); 1426 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove); 1427 return (status); 1428 } 1429 1430 /* 1431 * Otherwise, this is just the first entry on the chain, so 1432 * grab the next one 1433 */ 1434 next_mcg = &state->ts_mcghdl[next_indx]; 1435 1436 /* 1437 * Read the next MCG entry into the temporary MCG. Note: 1438 * In general, this operation shouldn't fail. If it does, 1439 * then it is an indication that something (probably in HW, 1440 * but maybe in SW) has gone seriously wrong. 1441 */ 1442 status = tavor_read_mgm_cmd_post(state, mcg_entry, next_indx, 1443 TAVOR_CMD_NOSLEEP_SPIN); 1444 if (status != TAVOR_CMD_SUCCESS) { 1445 TAVOR_WARNING(state, "failed to read MCG entry"); 1446 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: " 1447 "%08x\n", status); 1448 TNF_PROBE_2(tavor_mcg_hash_list_rem_read_mgm_cmd_fail, 1449 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 1450 tnf_uint, indx, next_indx); 1451 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove); 1452 return (ibc_get_ci_failure(0)); 1453 } 1454 1455 /* 1456 * Copy/Write the temporary MCG back to the hardware MCG list 1457 * using the current index. This essentially removes the 1458 * current MCG entry from the list by writing over it with 1459 * the next one. If this is successful, then we can do the 1460 * same operation for the "shadow" list. And we can also 1461 * free up the Tavor MCG entry resource that was associated 1462 * with the (old) next entry. Note: In general, this 1463 * operation shouldn't fail. If it does, then it is an 1464 * indication that something (probably in HW, but maybe in SW) 1465 * has gone seriously wrong. 1466 */ 1467 status = tavor_write_mgm_cmd_post(state, mcg_entry, curr_indx, 1468 TAVOR_CMD_NOSLEEP_SPIN); 1469 if (status != TAVOR_CMD_SUCCESS) { 1470 TAVOR_WARNING(state, "failed to write MCG entry"); 1471 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: " 1472 "%08x\n", status); 1473 TNF_PROBE_2(tavor_mcg_hash_list_rem_write_mgm_cmd_fail, 1474 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 1475 tnf_uint, indx, curr_indx); 1476 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove); 1477 return (ibc_get_ci_failure(0)); 1478 } 1479 1480 /* 1481 * Copy all the software tracking information from the next 1482 * entry on the "shadow" MCG list into the current entry on 1483 * the list. Then invalidate (zero out) the other "shadow" 1484 * list entry. 1485 */ 1486 bcopy(next_mcg, curr_mcg, sizeof (struct tavor_sw_mcg_list_s)); 1487 bzero(next_mcg, sizeof (struct tavor_sw_mcg_list_s)); 1488 1489 /* 1490 * Free up the Tavor MCG entry resource used by the "next" 1491 * MCG entry. That resource is no longer needed by any 1492 * MCG entry which is first on a hash chain (like the "next" 1493 * entry has just become). 1494 */ 1495 tavor_rsrc_free(state, &curr_mcg->mcg_rsrcp); 1496 1497 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove); 1498 return (DDI_SUCCESS); 1499 } 1500 1501 /* 1502 * Else if this is the last entry on the hash chain (or a middle 1503 * entry, then we update the previous entry's "next_gid_index" field 1504 * to make it point instead to the next entry on the chain. By 1505 * skipping over the removed entry in this way, we can then free up 1506 * any resources associated with the current entry. Note: We don't 1507 * need to invalidate the "skipped over" hardware entry because it 1508 * will no be longer connected to any hash chains, and if/when it is 1509 * finally re-used, it will be written with entirely new values. 1510 */ 1511 1512 /* 1513 * Read the next MCG entry into the temporary MCG. Note: In general, 1514 * this operation shouldn't fail. If it does, then it is an 1515 * indication that something (probably in HW, but maybe in SW) has 1516 * gone seriously wrong. 1517 */ 1518 status = tavor_read_mgm_cmd_post(state, mcg_entry, prev_indx, 1519 TAVOR_CMD_NOSLEEP_SPIN); 1520 if (status != TAVOR_CMD_SUCCESS) { 1521 TAVOR_WARNING(state, "failed to read MCG entry"); 1522 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: %08x\n", 1523 status); 1524 TNF_PROBE_2(tavor_mcg_hash_list_rem_read_mgm_cmd_fail, 1525 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 1526 tnf_uint, indx, prev_indx); 1527 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove); 1528 return (ibc_get_ci_failure(0)); 1529 } 1530 1531 /* 1532 * Finally, we update the "next_gid_indx" field in the temporary MCG 1533 * and attempt to write the entry back into the Tavor MCG table. If 1534 * this succeeds, then we update the "shadow" list to reflect the 1535 * change, free up the Tavor MCG entry resource that was associated 1536 * with the current entry, and return success. Note: In general, 1537 * this operation shouldn't fail. If it does, then it is an indication 1538 * that something (probably in HW, but maybe in SW) has gone seriously 1539 * wrong. 1540 */ 1541 mcg_entry->next_gid_indx = curr_mcg->mcg_next_indx; 1542 status = tavor_write_mgm_cmd_post(state, mcg_entry, prev_indx, 1543 TAVOR_CMD_NOSLEEP_SPIN); 1544 if (status != TAVOR_CMD_SUCCESS) { 1545 TAVOR_WARNING(state, "failed to write MCG entry"); 1546 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n", 1547 status); 1548 TNF_PROBE_2(tavor_mcg_hash_list_rem_write_mgm_cmd_fail, 1549 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 1550 tnf_uint, indx, prev_indx); 1551 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove); 1552 return (ibc_get_ci_failure(0)); 1553 } 1554 1555 /* 1556 * Get the pointer to the "shadow" MCG list entry for the previous 1557 * MCG. Update its "mcg_next_indx" to point to the next entry 1558 * the one after the current entry. Note: This next index may be 1559 * zero, indicating the end of the list. 1560 */ 1561 prev_mcg = &state->ts_mcghdl[prev_indx]; 1562 prev_mcg->mcg_next_indx = curr_mcg->mcg_next_indx; 1563 1564 /* 1565 * Free up the Tavor MCG entry resource used by the current entry. 1566 * This resource is no longer needed because the chain now skips over 1567 * the current entry. Then invalidate (zero out) the current "shadow" 1568 * list entry. 1569 */ 1570 tavor_rsrc_free(state, &curr_mcg->mcg_rsrcp); 1571 bzero(curr_mcg, sizeof (struct tavor_sw_mcg_list_s)); 1572 1573 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove); 1574 return (DDI_SUCCESS); 1575 } 1576 1577 1578 /* 1579 * tavor_mcg_entry_invalidate() 1580 * Context: Can be called only from user or kernel context. 1581 */ 1582 static int 1583 tavor_mcg_entry_invalidate(tavor_state_t *state, tavor_hw_mcg_t *mcg_entry, 1584 uint_t indx) 1585 { 1586 int status; 1587 1588 TAVOR_TNF_ENTER(tavor_mcg_entry_invalidate); 1589 1590 /* 1591 * Invalidate the hardware MCG entry by zeroing out this temporary 1592 * MCG and writing it the the hardware. Note: In general, this 1593 * operation shouldn't fail. If it does, then it is an indication 1594 * that something (probably in HW, but maybe in SW) has gone seriously 1595 * wrong. 1596 */ 1597 bzero(mcg_entry, TAVOR_MCGMEM_SZ(state)); 1598 status = tavor_write_mgm_cmd_post(state, mcg_entry, indx, 1599 TAVOR_CMD_NOSLEEP_SPIN); 1600 if (status != TAVOR_CMD_SUCCESS) { 1601 TAVOR_WARNING(state, "failed to write MCG entry"); 1602 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n", 1603 status); 1604 TNF_PROBE_2(tavor_mcg_entry_invalidate_write_mgm_cmd_fail, 1605 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 1606 tnf_uint, indx, indx); 1607 TAVOR_TNF_EXIT(tavor_mcg_entry_invalidate); 1608 return (ibc_get_ci_failure(0)); 1609 } 1610 1611 TAVOR_TNF_EXIT(tavor_mcg_entry_invalidate); 1612 return (DDI_SUCCESS); 1613 } 1614 1615 1616 /* 1617 * tavor_mgid_is_valid() 1618 * Context: Can be called from interrupt or base context. 1619 */ 1620 static int 1621 tavor_mgid_is_valid(ib_gid_t gid) 1622 { 1623 uint_t topbits, flags, scope; 1624 1625 TAVOR_TNF_ENTER(tavor_mgid_is_valid); 1626 1627 /* 1628 * According to IBA 1.1 specification (section 4.1.1) a valid 1629 * "multicast GID" must have its top eight bits set to all ones 1630 */ 1631 topbits = (gid.gid_prefix >> TAVOR_MCG_TOPBITS_SHIFT) & 1632 TAVOR_MCG_TOPBITS_MASK; 1633 if (topbits != TAVOR_MCG_TOPBITS) { 1634 TNF_PROBE_0(tavor_mgid_is_valid_invbits_fail, TAVOR_TNF_ERROR, 1635 ""); 1636 TAVOR_TNF_EXIT(tavor_mgid_is_valid); 1637 return (0); 1638 } 1639 1640 /* 1641 * The next 4 bits are the "flag" bits. These are valid only 1642 * if they are "0" (which correspond to permanently assigned/ 1643 * "well-known" multicast GIDs) or "1" (for so-called "transient" 1644 * multicast GIDs). All other values are reserved. 1645 */ 1646 flags = (gid.gid_prefix >> TAVOR_MCG_FLAGS_SHIFT) & 1647 TAVOR_MCG_FLAGS_MASK; 1648 if (!((flags == TAVOR_MCG_FLAGS_PERM) || 1649 (flags == TAVOR_MCG_FLAGS_NONPERM))) { 1650 TNF_PROBE_1(tavor_mgid_is_valid_invflags_fail, TAVOR_TNF_ERROR, 1651 "", tnf_uint, flags, flags); 1652 TAVOR_TNF_EXIT(tavor_mgid_is_valid); 1653 return (0); 1654 } 1655 1656 /* 1657 * The next 4 bits are the "scope" bits. These are valid only 1658 * if they are "2" (Link-local), "5" (Site-local), "8" 1659 * (Organization-local) or "E" (Global). All other values 1660 * are reserved (or currently unassigned). 1661 */ 1662 scope = (gid.gid_prefix >> TAVOR_MCG_SCOPE_SHIFT) & 1663 TAVOR_MCG_SCOPE_MASK; 1664 if (!((scope == TAVOR_MCG_SCOPE_LINKLOC) || 1665 (scope == TAVOR_MCG_SCOPE_SITELOC) || 1666 (scope == TAVOR_MCG_SCOPE_ORGLOC) || 1667 (scope == TAVOR_MCG_SCOPE_GLOBAL))) { 1668 TNF_PROBE_1(tavor_mgid_is_valid_invscope_fail, TAVOR_TNF_ERROR, 1669 "", tnf_uint, scope, scope); 1670 TAVOR_TNF_EXIT(tavor_mgid_is_valid); 1671 return (0); 1672 } 1673 1674 /* 1675 * If it passes all of the above checks, then we will consider it 1676 * a valid multicast GID. 1677 */ 1678 TAVOR_TNF_EXIT(tavor_mgid_is_valid); 1679 return (1); 1680 } 1681 1682 1683 /* 1684 * tavor_mlid_is_valid() 1685 * Context: Can be called from interrupt or base context. 1686 */ 1687 static int 1688 tavor_mlid_is_valid(ib_lid_t lid) 1689 { 1690 TAVOR_TNF_ENTER(tavor_mlid_is_valid); 1691 1692 /* 1693 * According to IBA 1.1 specification (section 4.1.1) a valid 1694 * "multicast DLID" must be between 0xC000 and 0xFFFE. 1695 */ 1696 if ((lid < IB_LID_MC_FIRST) || (lid > IB_LID_MC_LAST)) { 1697 TNF_PROBE_1(tavor_mlid_is_valid_invdlid_fail, TAVOR_TNF_ERROR, 1698 "", tnf_uint, mlid, lid); 1699 TAVOR_TNF_EXIT(tavor_mlid_is_valid); 1700 return (0); 1701 } 1702 1703 TAVOR_TNF_EXIT(tavor_mlid_is_valid); 1704 return (1); 1705 } 1706 1707 1708 /* 1709 * tavor_pd_alloc() 1710 * Context: Can be called only from user or kernel context. 1711 */ 1712 int 1713 tavor_pd_alloc(tavor_state_t *state, tavor_pdhdl_t *pdhdl, uint_t sleepflag) 1714 { 1715 tavor_rsrc_t *rsrc; 1716 tavor_pdhdl_t pd; 1717 int status; 1718 1719 TAVOR_TNF_ENTER(tavor_pd_alloc); 1720 1721 /* 1722 * Allocate the software structure for tracking the protection domain 1723 * (i.e. the Tavor Protection Domain handle). By default each PD 1724 * structure will have a unique PD number assigned to it. All that 1725 * is necessary is for software to initialize the PD reference count 1726 * (to zero) and return success. 1727 */ 1728 status = tavor_rsrc_alloc(state, TAVOR_PDHDL, 1, sleepflag, &rsrc); 1729 if (status != DDI_SUCCESS) { 1730 TNF_PROBE_0(tavor_pd_alloc_rsrcalloc_fail, TAVOR_TNF_ERROR, ""); 1731 TAVOR_TNF_EXIT(tavor_pd_alloc); 1732 return (IBT_INSUFF_RESOURCE); 1733 } 1734 pd = (tavor_pdhdl_t)rsrc->tr_addr; 1735 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd)) 1736 1737 pd->pd_refcnt = 0; 1738 *pdhdl = pd; 1739 1740 TAVOR_TNF_EXIT(tavor_pd_alloc); 1741 return (DDI_SUCCESS); 1742 } 1743 1744 1745 /* 1746 * tavor_pd_free() 1747 * Context: Can be called only from user or kernel context. 1748 */ 1749 int 1750 tavor_pd_free(tavor_state_t *state, tavor_pdhdl_t *pdhdl) 1751 { 1752 tavor_rsrc_t *rsrc; 1753 tavor_pdhdl_t pd; 1754 1755 TAVOR_TNF_ENTER(tavor_pd_free); 1756 1757 /* 1758 * Pull all the necessary information from the Tavor Protection Domain 1759 * handle. This is necessary here because the resource for the 1760 * PD is going to be freed up as part of this operation. 1761 */ 1762 pd = *pdhdl; 1763 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd)) 1764 rsrc = pd->pd_rsrcp; 1765 1766 /* 1767 * Check the PD reference count. If the reference count is non-zero, 1768 * then it means that this protection domain is still referenced by 1769 * some memory region, queue pair, address handle, or other IB object 1770 * If it is non-zero, then return an error. Otherwise, free the 1771 * Tavor resource and return success. 1772 */ 1773 if (pd->pd_refcnt != 0) { 1774 TNF_PROBE_1(tavor_pd_free_refcnt_fail, TAVOR_TNF_ERROR, "", 1775 tnf_int, refcnt, pd->pd_refcnt); 1776 TAVOR_TNF_EXIT(tavor_pd_free); 1777 return (IBT_PD_IN_USE); 1778 } 1779 1780 /* Free the Tavor Protection Domain handle */ 1781 tavor_rsrc_free(state, &rsrc); 1782 1783 /* Set the pdhdl pointer to NULL and return success */ 1784 *pdhdl = (tavor_pdhdl_t)NULL; 1785 1786 TAVOR_TNF_EXIT(tavor_pd_free); 1787 return (DDI_SUCCESS); 1788 } 1789 1790 1791 /* 1792 * tavor_pd_refcnt_inc() 1793 * Context: Can be called from interrupt or base context. 1794 */ 1795 void 1796 tavor_pd_refcnt_inc(tavor_pdhdl_t pd) 1797 { 1798 /* Increment the protection domain's reference count */ 1799 mutex_enter(&pd->pd_lock); 1800 TNF_PROBE_1_DEBUG(tavor_pd_refcnt_inc, TAVOR_TNF_TRACE, "", 1801 tnf_uint, refcnt, pd->pd_refcnt); 1802 pd->pd_refcnt++; 1803 mutex_exit(&pd->pd_lock); 1804 1805 } 1806 1807 1808 /* 1809 * tavor_pd_refcnt_dec() 1810 * Context: Can be called from interrupt or base context. 1811 */ 1812 void 1813 tavor_pd_refcnt_dec(tavor_pdhdl_t pd) 1814 { 1815 /* Decrement the protection domain's reference count */ 1816 mutex_enter(&pd->pd_lock); 1817 pd->pd_refcnt--; 1818 TNF_PROBE_1_DEBUG(tavor_pd_refcnt_dec, TAVOR_TNF_TRACE, "", 1819 tnf_uint, refcnt, pd->pd_refcnt); 1820 mutex_exit(&pd->pd_lock); 1821 1822 } 1823 1824 1825 /* 1826 * tavor_port_query() 1827 * Context: Can be called only from user or kernel context. 1828 */ 1829 int 1830 tavor_port_query(tavor_state_t *state, uint_t port, ibt_hca_portinfo_t *pi) 1831 { 1832 sm_portinfo_t portinfo; 1833 sm_guidinfo_t guidinfo; 1834 sm_pkey_table_t pkeytable; 1835 ib_gid_t *sgid; 1836 uint_t sgid_max, pkey_max, tbl_size; 1837 int i, j, indx, status; 1838 1839 TAVOR_TNF_ENTER(tavor_port_query); 1840 1841 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pi)) 1842 1843 /* Validate that specified port number is legal */ 1844 if (!tavor_portnum_is_valid(state, port)) { 1845 TNF_PROBE_1(tavor_port_query_inv_portnum_fail, 1846 TAVOR_TNF_ERROR, "", tnf_uint, port, port); 1847 TAVOR_TNF_EXIT(tavor_port_query); 1848 return (IBT_HCA_PORT_INVALID); 1849 } 1850 1851 /* 1852 * We use the Tavor MAD_IFC command to post a GetPortInfo MAD 1853 * to the firmware (for the specified port number). This returns 1854 * a full PortInfo MAD (in "portinfo") which we subsequently 1855 * parse to fill in the "ibt_hca_portinfo_t" structure returned 1856 * to the IBTF. 1857 */ 1858 status = tavor_getportinfo_cmd_post(state, port, 1859 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &portinfo); 1860 if (status != TAVOR_CMD_SUCCESS) { 1861 cmn_err(CE_CONT, "Tavor: GetPortInfo (port %02d) command " 1862 "failed: %08x\n", port, status); 1863 TNF_PROBE_1(tavor_port_query_getportinfo_cmd_fail, 1864 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status); 1865 TAVOR_TNF_EXIT(tavor_port_query); 1866 return (ibc_get_ci_failure(0)); 1867 } 1868 1869 /* 1870 * Parse the PortInfo MAD and fill in the IBTF structure 1871 */ 1872 pi->p_base_lid = portinfo.LID; 1873 pi->p_qkey_violations = portinfo.Q_KeyViolations; 1874 pi->p_pkey_violations = portinfo.P_KeyViolations; 1875 pi->p_sm_sl = portinfo.MasterSMSL; 1876 pi->p_sm_lid = portinfo.MasterSMLID; 1877 pi->p_linkstate = portinfo.PortState; 1878 pi->p_port_num = portinfo.LocalPortNum; 1879 pi->p_phys_state = portinfo.PortPhysicalState; 1880 pi->p_width_supported = portinfo.LinkWidthSupported; 1881 pi->p_width_enabled = portinfo.LinkWidthEnabled; 1882 pi->p_width_active = portinfo.LinkWidthActive; 1883 pi->p_speed_supported = portinfo.LinkSpeedSupported; 1884 pi->p_speed_enabled = portinfo.LinkSpeedEnabled; 1885 pi->p_speed_active = portinfo.LinkSpeedActive; 1886 pi->p_mtu = portinfo.MTUCap; 1887 pi->p_lmc = portinfo.LMC; 1888 pi->p_max_vl = portinfo.VLCap; 1889 pi->p_subnet_timeout = portinfo.SubnetTimeOut; 1890 pi->p_msg_sz = ((uint32_t)1 << TAVOR_QP_LOG_MAX_MSGSZ); 1891 tbl_size = state->ts_cfg_profile->cp_log_max_gidtbl; 1892 pi->p_sgid_tbl_sz = (1 << tbl_size); 1893 tbl_size = state->ts_cfg_profile->cp_log_max_pkeytbl; 1894 pi->p_pkey_tbl_sz = (1 << tbl_size); 1895 1896 /* 1897 * Convert InfiniBand-defined port capability flags to the format 1898 * specified by the IBTF 1899 */ 1900 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM) 1901 pi->p_capabilities |= IBT_PORT_CAP_SM; 1902 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM_DISABLED) 1903 pi->p_capabilities |= IBT_PORT_CAP_SM_DISABLED; 1904 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SNMP_SUPPD) 1905 pi->p_capabilities |= IBT_PORT_CAP_SNMP_TUNNEL; 1906 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_DM_SUPPD) 1907 pi->p_capabilities |= IBT_PORT_CAP_DM; 1908 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_VM_SUPPD) 1909 pi->p_capabilities |= IBT_PORT_CAP_VENDOR; 1910 1911 /* 1912 * Fill in the SGID table. Since the only access to the Tavor 1913 * GID tables is through the firmware's MAD_IFC interface, we 1914 * post as many GetGUIDInfo MADs as necessary to read in the entire 1915 * contents of the SGID table (for the specified port). Note: The 1916 * GetGUIDInfo command only gets eight GUIDs per operation. These 1917 * GUIDs are then appended to the GID prefix for the port (from the 1918 * GetPortInfo above) to form the entire SGID table. 1919 */ 1920 for (i = 0; i < pi->p_sgid_tbl_sz; i += 8) { 1921 status = tavor_getguidinfo_cmd_post(state, port, i >> 3, 1922 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &guidinfo); 1923 if (status != TAVOR_CMD_SUCCESS) { 1924 cmn_err(CE_CONT, "Tavor: GetGUIDInfo (port %02d) " 1925 "command failed: %08x\n", port, status); 1926 TNF_PROBE_1(tavor_port_query_getguidinfo_cmd_fail, 1927 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status); 1928 TAVOR_TNF_EXIT(tavor_port_query); 1929 return (ibc_get_ci_failure(0)); 1930 } 1931 1932 /* Figure out how many of the entries are valid */ 1933 sgid_max = min((pi->p_sgid_tbl_sz - i), 8); 1934 for (j = 0; j < sgid_max; j++) { 1935 indx = (i + j); 1936 sgid = &pi->p_sgid_tbl[indx]; 1937 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sgid)) 1938 sgid->gid_prefix = portinfo.GidPrefix; 1939 sgid->gid_guid = guidinfo.GUIDBlocks[j]; 1940 } 1941 } 1942 1943 /* 1944 * Fill in the PKey table. Just as for the GID tables above, the 1945 * only access to the Tavor PKey tables is through the firmware's 1946 * MAD_IFC interface. We post as many GetPKeyTable MADs as necessary 1947 * to read in the entire contents of the PKey table (for the specified 1948 * port). Note: The GetPKeyTable command only gets 32 PKeys per 1949 * operation. 1950 */ 1951 for (i = 0; i < pi->p_pkey_tbl_sz; i += 32) { 1952 status = tavor_getpkeytable_cmd_post(state, port, i, 1953 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &pkeytable); 1954 if (status != TAVOR_CMD_SUCCESS) { 1955 cmn_err(CE_CONT, "Tavor: GetPKeyTable (port %02d) " 1956 "command failed: %08x\n", port, status); 1957 TNF_PROBE_1(tavor_port_query_getpkeytable_cmd_fail, 1958 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status); 1959 TAVOR_TNF_EXIT(tavor_port_query); 1960 return (ibc_get_ci_failure(0)); 1961 } 1962 1963 /* Figure out how many of the entries are valid */ 1964 pkey_max = min((pi->p_pkey_tbl_sz - i), 32); 1965 for (j = 0; j < pkey_max; j++) { 1966 indx = (i + j); 1967 pi->p_pkey_tbl[indx] = pkeytable.P_KeyTableBlocks[j]; 1968 } 1969 } 1970 1971 TAVOR_TNF_EXIT(tavor_port_query); 1972 return (DDI_SUCCESS); 1973 } 1974 1975 1976 /* 1977 * tavor_port_modify() 1978 * Context: Can be called only from user or kernel context. 1979 */ 1980 /* ARGSUSED */ 1981 int 1982 tavor_port_modify(tavor_state_t *state, uint8_t port, 1983 ibt_port_modify_flags_t flags, uint8_t init_type) 1984 { 1985 sm_portinfo_t portinfo; 1986 uint32_t capmask, reset_qkey; 1987 int status; 1988 1989 TAVOR_TNF_ENTER(tavor_port_modify); 1990 1991 /* 1992 * Return an error if either of the unsupported flags are set 1993 */ 1994 if ((flags & IBT_PORT_SHUTDOWN) || 1995 (flags & IBT_PORT_SET_INIT_TYPE)) { 1996 TNF_PROBE_1(tavor_port_modify_inv_flags_fail, 1997 TAVOR_TNF_ERROR, "", tnf_uint, flags, flags); 1998 TAVOR_TNF_EXIT(tavor_port_modify); 1999 return (IBT_NOT_SUPPORTED); 2000 } 2001 2002 /* 2003 * Determine whether we are trying to reset the QKey counter 2004 */ 2005 reset_qkey = (flags & IBT_PORT_RESET_QKEY) ? 1 : 0; 2006 2007 /* Validate that specified port number is legal */ 2008 if (!tavor_portnum_is_valid(state, port)) { 2009 TNF_PROBE_1(tavor_port_modify_inv_portnum_fail, 2010 TAVOR_TNF_ERROR, "", tnf_uint, port, port); 2011 TAVOR_TNF_EXIT(tavor_port_modify); 2012 return (IBT_HCA_PORT_INVALID); 2013 } 2014 2015 /* 2016 * Use the Tavor MAD_IFC command to post a GetPortInfo MAD to the 2017 * firmware (for the specified port number). This returns a full 2018 * PortInfo MAD (in "portinfo") from which we pull the current 2019 * capability mask. We then modify the capability mask as directed 2020 * by the "pmod_flags" field, and write the updated capability mask 2021 * using the Tavor SET_IB command (below). 2022 */ 2023 status = tavor_getportinfo_cmd_post(state, port, 2024 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &portinfo); 2025 if (status != TAVOR_CMD_SUCCESS) { 2026 TNF_PROBE_1(tavor_port_modify_getportinfo_cmd_fail, 2027 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status); 2028 TAVOR_TNF_EXIT(tavor_port_modify); 2029 return (ibc_get_ci_failure(0)); 2030 } 2031 2032 /* 2033 * Convert InfiniBand-defined port capability flags to the format 2034 * specified by the IBTF. Specifically, we modify the capability 2035 * mask based on the specified values. 2036 */ 2037 capmask = portinfo.CapabilityMask; 2038 2039 if (flags & IBT_PORT_RESET_SM) 2040 capmask &= ~SM_CAP_MASK_IS_SM; 2041 else if (flags & IBT_PORT_SET_SM) 2042 capmask |= SM_CAP_MASK_IS_SM; 2043 2044 if (flags & IBT_PORT_RESET_SNMP) 2045 capmask &= ~SM_CAP_MASK_IS_SNMP_SUPPD; 2046 else if (flags & IBT_PORT_SET_SNMP) 2047 capmask |= SM_CAP_MASK_IS_SNMP_SUPPD; 2048 2049 if (flags & IBT_PORT_RESET_DEVMGT) 2050 capmask &= ~SM_CAP_MASK_IS_DM_SUPPD; 2051 else if (flags & IBT_PORT_SET_DEVMGT) 2052 capmask |= SM_CAP_MASK_IS_DM_SUPPD; 2053 2054 if (flags & IBT_PORT_RESET_VENDOR) 2055 capmask &= ~SM_CAP_MASK_IS_VM_SUPPD; 2056 else if (flags & IBT_PORT_SET_VENDOR) 2057 capmask |= SM_CAP_MASK_IS_VM_SUPPD; 2058 2059 /* 2060 * Use the Tavor SET_IB command to update the capability mask and 2061 * (possibly) reset the QKey violation counter for the specified port. 2062 * Note: In general, this operation shouldn't fail. If it does, then 2063 * it is an indication that something (probably in HW, but maybe in 2064 * SW) has gone seriously wrong. 2065 */ 2066 status = tavor_set_ib_cmd_post(state, capmask, port, reset_qkey, 2067 TAVOR_SLEEPFLAG_FOR_CONTEXT()); 2068 if (status != TAVOR_CMD_SUCCESS) { 2069 TAVOR_WARNING(state, "failed to modify port capabilities"); 2070 cmn_err(CE_CONT, "Tavor: SET_IB (port %02d) command failed: " 2071 "%08x\n", port, status); 2072 TNF_PROBE_1(tavor_port_modify_set_ib_cmd_fail, 2073 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status); 2074 TAVOR_TNF_EXIT(tavor_port_modify); 2075 return (ibc_get_ci_failure(0)); 2076 } 2077 2078 TAVOR_TNF_EXIT(tavor_port_modify); 2079 return (DDI_SUCCESS); 2080 } 2081 2082 2083 /* 2084 * tavor_set_addr_path() 2085 * Context: Can be called from interrupt or base context. 2086 * 2087 * Note: This routine is used for two purposes. It is used to fill in the 2088 * Tavor UDAV fields, and it is used to fill in the address path information 2089 * for QPs. Because the two Tavor structures are similar, common fields can 2090 * be filled in here. Because they are slightly different, however, we pass 2091 * an additional flag to indicate which type is being filled. 2092 */ 2093 int 2094 tavor_set_addr_path(tavor_state_t *state, ibt_adds_vect_t *av, 2095 tavor_hw_addr_path_t *path, uint_t type, tavor_qphdl_t qp) 2096 { 2097 uint_t gidtbl_sz; 2098 2099 TAVOR_TNF_ENTER(tavor_set_addr_path); 2100 2101 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av)) 2102 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path)) 2103 2104 path->ml_path = av->av_src_path; 2105 path->rlid = av->av_dlid; 2106 path->sl = av->av_srvl; 2107 2108 /* Port number only valid (in "av_port_num") if this is a UDAV */ 2109 if (type == TAVOR_ADDRPATH_UDAV) { 2110 path->portnum = av->av_port_num; 2111 } 2112 2113 /* 2114 * Validate (and fill in) static rate. 2115 * 2116 * The stat_rate_sup is used to decide how to set the rate and 2117 * if it is zero, the driver uses the old interface. 2118 */ 2119 if (state->ts_devlim.stat_rate_sup) { 2120 if (av->av_srate == IBT_SRATE_20) { 2121 path->max_stat_rate = 0; /* 4x@DDR injection rate */ 2122 } else if (av->av_srate == IBT_SRATE_5) { 2123 path->max_stat_rate = 3; /* 1x@DDR injection rate */ 2124 } else if (av->av_srate == IBT_SRATE_10) { 2125 path->max_stat_rate = 2; /* 4x@SDR injection rate */ 2126 } else if (av->av_srate == IBT_SRATE_2) { 2127 path->max_stat_rate = 1; /* 1x@SDR injection rate */ 2128 } else if (av->av_srate == IBT_SRATE_NOT_SPECIFIED) { 2129 path->max_stat_rate = 0; /* Max */ 2130 } else { 2131 TNF_PROBE_1(tavor_set_addr_path_inv_srate_fail, 2132 TAVOR_TNF_ERROR, "", tnf_uint, srate, av->av_srate); 2133 TAVOR_TNF_EXIT(tavor_set_addr_path); 2134 return (IBT_STATIC_RATE_INVALID); 2135 } 2136 } else { 2137 if (av->av_srate == IBT_SRATE_10) { 2138 path->max_stat_rate = 0; /* 4x@SDR injection rate */ 2139 } else if (av->av_srate == IBT_SRATE_2) { 2140 path->max_stat_rate = 1; /* 1x@SDR injection rate */ 2141 } else if (av->av_srate == IBT_SRATE_NOT_SPECIFIED) { 2142 path->max_stat_rate = 0; /* Max */ 2143 } else { 2144 TNF_PROBE_1(tavor_set_addr_path_inv_srate_fail, 2145 TAVOR_TNF_ERROR, "", tnf_uint, srate, av->av_srate); 2146 TAVOR_TNF_EXIT(tavor_set_addr_path); 2147 return (IBT_STATIC_RATE_INVALID); 2148 } 2149 } 2150 2151 /* 2152 * If this is a QP operation save asoft copy. 2153 */ 2154 if (qp) { 2155 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(qp->qp_save_srate)) 2156 qp->qp_save_srate = av->av_srate; 2157 } 2158 2159 /* If "grh" flag is set, then check for valid SGID index too */ 2160 gidtbl_sz = (1 << state->ts_devlim.log_max_gid); 2161 if ((av->av_send_grh) && (av->av_sgid_ix > gidtbl_sz)) { 2162 TNF_PROBE_1(tavor_set_addr_path_inv_sgid_ix_fail, 2163 TAVOR_TNF_ERROR, "", tnf_uint, sgid_ix, av->av_sgid_ix); 2164 TAVOR_TNF_EXIT(tavor_set_addr_path); 2165 return (IBT_SGID_INVALID); 2166 } 2167 2168 /* 2169 * Fill in all "global" values regardless of the value in the GRH 2170 * flag. Because "grh" is not set unless "av_send_grh" is set, the 2171 * hardware will ignore the other "global" values as necessary. Note: 2172 * SW does this here to enable later query operations to return 2173 * exactly the same params that were passed when the addr path was 2174 * last written. 2175 */ 2176 path->grh = av->av_send_grh; 2177 if (type == TAVOR_ADDRPATH_QP) { 2178 path->mgid_index = av->av_sgid_ix; 2179 } else { 2180 /* 2181 * For Tavor UDAV, the "mgid_index" field is the index into 2182 * a combined table (not a per-port table). So some extra 2183 * calculations are necessary. 2184 */ 2185 path->mgid_index = ((av->av_port_num - 1) * gidtbl_sz) + 2186 av->av_sgid_ix; 2187 } 2188 path->flow_label = av->av_flow; 2189 path->tclass = av->av_tclass; 2190 path->hop_limit = av->av_hop; 2191 path->rgid_h = av->av_dgid.gid_prefix; 2192 2193 /* 2194 * According to Tavor PRM, the (31:0) part of rgid_l must be set to 2195 * "0x2" if the 'grh' or 'g' bit is cleared. It also says that we 2196 * only need to do it for UDAV's. So we enforce that here. 2197 * 2198 * NOTE: The entire 64 bits worth of GUID info is actually being 2199 * preserved (for UDAVs) by the callers of this function 2200 * (tavor_ah_alloc() and tavor_ah_modify()) and as long as the 2201 * 'grh' bit is not set, the upper 32 bits (63:32) of rgid_l are 2202 * "don't care". 2203 */ 2204 if ((path->grh) || (type == TAVOR_ADDRPATH_QP)) { 2205 path->rgid_l = av->av_dgid.gid_guid; 2206 } else { 2207 path->rgid_l = 0x2; 2208 } 2209 2210 TAVOR_TNF_EXIT(tavor_set_addr_path); 2211 return (DDI_SUCCESS); 2212 } 2213 2214 2215 /* 2216 * tavor_get_addr_path() 2217 * Context: Can be called from interrupt or base context. 2218 * 2219 * Note: Just like tavor_set_addr_path() above, this routine is used for two 2220 * purposes. It is used to read in the Tavor UDAV fields, and it is used to 2221 * read in the address path information for QPs. Because the two Tavor 2222 * structures are similar, common fields can be read in here. But because 2223 * they are slightly different, we pass an additional flag to indicate which 2224 * type is being read. 2225 */ 2226 void 2227 tavor_get_addr_path(tavor_state_t *state, tavor_hw_addr_path_t *path, 2228 ibt_adds_vect_t *av, uint_t type, tavor_qphdl_t qp) 2229 { 2230 uint_t gidtbl_sz; 2231 2232 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path)) 2233 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av)) 2234 2235 av->av_src_path = path->ml_path; 2236 av->av_port_num = path->portnum; 2237 av->av_dlid = path->rlid; 2238 av->av_srvl = path->sl; 2239 2240 /* 2241 * Set "av_ipd" value from max_stat_rate. 2242 */ 2243 if (qp) { 2244 /* 2245 * If a QP operation use the soft copy 2246 */ 2247 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(qp->qp_save_srate)) 2248 av->av_srate = qp->qp_save_srate; 2249 } else { 2250 /* 2251 * The stat_rate_sup is used to decide how the srate value is 2252 * set and 2253 * if it is zero, the driver uses the old interface. 2254 */ 2255 if (state->ts_devlim.stat_rate_sup) { 2256 if (path->max_stat_rate == 0) { 2257 av->av_srate = IBT_SRATE_20; /* 4x@DDR rate */ 2258 } else if (path->max_stat_rate == 1) { 2259 av->av_srate = IBT_SRATE_2; /* 1x@SDR rate */ 2260 } else if (path->max_stat_rate == 2) { 2261 av->av_srate = IBT_SRATE_10; /* 4x@SDR rate */ 2262 } else if (path->max_stat_rate == 3) { 2263 av->av_srate = IBT_SRATE_5; /* 1xDDR rate */ 2264 } 2265 } else { 2266 if (path->max_stat_rate == 0) { 2267 av->av_srate = IBT_SRATE_10; /* 4x@SDR rate */ 2268 } else if (path->max_stat_rate == 1) { 2269 av->av_srate = IBT_SRATE_2; /* 1x@SDR rate */ 2270 } 2271 } 2272 } 2273 2274 /* 2275 * Extract all "global" values regardless of the value in the GRH 2276 * flag. Because "av_send_grh" is set only if "grh" is set, software 2277 * knows to ignore the other "global" values as necessary. Note: SW 2278 * does it this way to enable these query operations to return exactly 2279 * the same params that were passed when the addr path was last written. 2280 */ 2281 av->av_send_grh = path->grh; 2282 if (type == TAVOR_ADDRPATH_QP) { 2283 av->av_sgid_ix = path->mgid_index; 2284 } else { 2285 /* 2286 * For Tavor UDAV, the "mgid_index" field is the index into 2287 * a combined table (not a per-port table). So some extra 2288 * calculations are necessary. 2289 */ 2290 gidtbl_sz = (1 << state->ts_devlim.log_max_gid); 2291 av->av_sgid_ix = path->mgid_index - ((av->av_port_num - 1) * 2292 gidtbl_sz); 2293 } 2294 av->av_flow = path->flow_label; 2295 av->av_tclass = path->tclass; 2296 av->av_hop = path->hop_limit; 2297 av->av_dgid.gid_prefix = path->rgid_h; 2298 av->av_dgid.gid_guid = path->rgid_l; 2299 } 2300 2301 2302 /* 2303 * tavor_portnum_is_valid() 2304 * Context: Can be called from interrupt or base context. 2305 */ 2306 int 2307 tavor_portnum_is_valid(tavor_state_t *state, uint_t portnum) 2308 { 2309 uint_t max_port; 2310 2311 max_port = state->ts_cfg_profile->cp_num_ports; 2312 if ((portnum <= max_port) && (portnum != 0)) { 2313 return (1); 2314 } else { 2315 return (0); 2316 } 2317 } 2318 2319 2320 /* 2321 * tavor_pkeyindex_is_valid() 2322 * Context: Can be called from interrupt or base context. 2323 */ 2324 int 2325 tavor_pkeyindex_is_valid(tavor_state_t *state, uint_t pkeyindx) 2326 { 2327 uint_t max_pkeyindx; 2328 2329 max_pkeyindx = 1 << state->ts_cfg_profile->cp_log_max_pkeytbl; 2330 if (pkeyindx < max_pkeyindx) { 2331 return (1); 2332 } else { 2333 return (0); 2334 } 2335 } 2336 2337 2338 /* 2339 * tavor_queue_alloc() 2340 * Context: Can be called from interrupt or base context. 2341 */ 2342 int 2343 tavor_queue_alloc(tavor_state_t *state, tavor_qalloc_info_t *qa_info, 2344 uint_t sleepflag) 2345 { 2346 ddi_dma_attr_t dma_attr; 2347 int (*callback)(caddr_t); 2348 uint64_t realsize, alloc_mask; 2349 uint_t dma_xfer_mode, type; 2350 int flag, status; 2351 2352 TAVOR_TNF_ENTER(tavor_queue_alloc); 2353 2354 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info)) 2355 2356 /* Set the callback flag appropriately */ 2357 callback = (sleepflag == TAVOR_SLEEP) ? DDI_DMA_SLEEP : 2358 DDI_DMA_DONTWAIT; 2359 2360 /* 2361 * Initialize many of the default DMA attributes. Then set additional 2362 * alignment restrictions as necessary for the queue memory. Also 2363 * respect the configured value for IOMMU bypass 2364 */ 2365 tavor_dma_attr_init(&dma_attr); 2366 dma_attr.dma_attr_align = qa_info->qa_bind_align; 2367 type = state->ts_cfg_profile->cp_iommu_bypass; 2368 if (type == TAVOR_BINDMEM_BYPASS) { 2369 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 2370 } 2371 2372 /* Allocate a DMA handle */ 2373 status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr, callback, NULL, 2374 &qa_info->qa_dmahdl); 2375 if (status != DDI_SUCCESS) { 2376 TNF_PROBE_0(tavor_queue_alloc_dmahdl_fail, TAVOR_TNF_ERROR, ""); 2377 TAVOR_TNF_EXIT(tavor_queue_alloc); 2378 return (DDI_FAILURE); 2379 } 2380 2381 /* 2382 * Determine the amount of memory to allocate, depending on the values 2383 * in "qa_bind_align" and "qa_alloc_align". The problem we are trying 2384 * to solve here is that allocating a DMA handle with IOMMU bypass 2385 * (DDI_DMA_FORCE_PHYSICAL) constrains us to only requesting alignments 2386 * that are less than the page size. Since we may need stricter 2387 * alignments on the memory allocated by ddi_dma_mem_alloc() (e.g. in 2388 * Tavor QP work queue memory allocation), we use the following method 2389 * to calculate how much additional memory to request, and we enforce 2390 * our own alignment on the allocated result. 2391 */ 2392 alloc_mask = qa_info->qa_alloc_align - 1; 2393 if (qa_info->qa_bind_align == qa_info->qa_alloc_align) { 2394 realsize = qa_info->qa_size; 2395 } else { 2396 realsize = qa_info->qa_size + alloc_mask; 2397 } 2398 2399 /* 2400 * If we are to allocate the queue from system memory, then use 2401 * ddi_dma_mem_alloc() to find the space. Otherwise, if we are to 2402 * allocate the queue from locally-attached DDR memory, then use the 2403 * vmem allocator to find the space. In either case, return a pointer 2404 * to the memory range allocated (including any necessary alignment 2405 * adjustments), the "real" memory pointer, the "real" size, and a 2406 * ddi_acc_handle_t to use when reading from/writing to the memory. 2407 */ 2408 if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_NORMAL) { 2409 2410 /* 2411 * Determine whether to map STREAMING or CONSISTENT. This is 2412 * based on the value set in the configuration profile at 2413 * attach time. 2414 */ 2415 dma_xfer_mode = state->ts_cfg_profile->cp_streaming_consistent; 2416 2417 /* Allocate system memory for the queue */ 2418 status = ddi_dma_mem_alloc(qa_info->qa_dmahdl, realsize, 2419 &state->ts_reg_accattr, dma_xfer_mode, callback, NULL, 2420 (caddr_t *)&qa_info->qa_buf_real, 2421 (size_t *)&qa_info->qa_buf_realsz, &qa_info->qa_acchdl); 2422 if (status != DDI_SUCCESS) { 2423 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2424 TNF_PROBE_0(tavor_queue_alloc_dma_memalloc_fail, 2425 TAVOR_TNF_ERROR, ""); 2426 TAVOR_TNF_EXIT(tavor_queue_alloc); 2427 return (DDI_FAILURE); 2428 } 2429 2430 /* 2431 * Save temporary copy of the real pointer. (This may be 2432 * modified in the last step below). 2433 */ 2434 qa_info->qa_buf_aligned = qa_info->qa_buf_real; 2435 2436 } else if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_USERLAND) { 2437 2438 /* Allocate userland mappable memory for the queue */ 2439 flag = (sleepflag == TAVOR_SLEEP) ? DDI_UMEM_SLEEP : 2440 DDI_UMEM_NOSLEEP; 2441 qa_info->qa_buf_real = ddi_umem_alloc(realsize, flag, 2442 &qa_info->qa_umemcookie); 2443 if (qa_info->qa_buf_real == NULL) { 2444 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2445 TNF_PROBE_0(tavor_queue_alloc_umem_fail, 2446 TAVOR_TNF_ERROR, ""); 2447 TAVOR_TNF_EXIT(tavor_queue_alloc); 2448 return (DDI_FAILURE); 2449 } 2450 2451 /* 2452 * Save temporary copy of the real pointer. (This may be 2453 * modified in the last step below). 2454 */ 2455 qa_info->qa_buf_aligned = qa_info->qa_buf_real; 2456 2457 } else { /* TAVOR_QUEUE_LOCATION_INDDR */ 2458 2459 /* Allocate DDR memory for the queue */ 2460 flag = (sleepflag == TAVOR_SLEEP) ? VM_SLEEP : VM_NOSLEEP; 2461 qa_info->qa_buf_real = (uint32_t *)vmem_xalloc( 2462 state->ts_ddrvmem, realsize, qa_info->qa_bind_align, 0, 0, 2463 NULL, NULL, flag); 2464 if (qa_info->qa_buf_real == NULL) { 2465 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2466 TNF_PROBE_0(tavor_queue_alloc_vmxa_fail, 2467 TAVOR_TNF_ERROR, ""); 2468 TAVOR_TNF_EXIT(tavor_queue_alloc); 2469 return (DDI_FAILURE); 2470 } 2471 2472 /* 2473 * Since "qa_buf_real" will be a PCI address (the offset into 2474 * the DDR memory), we first need to do some calculations to 2475 * convert it to its kernel mapped address. (Note: This may 2476 * be modified again below, when any additional "alloc" 2477 * alignment constraint is applied). 2478 */ 2479 qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t) 2480 state->ts_reg_ddr_baseaddr) + ((uintptr_t) 2481 qa_info->qa_buf_real - state->ts_ddr.ddr_baseaddr)); 2482 qa_info->qa_buf_realsz = realsize; 2483 qa_info->qa_acchdl = state->ts_reg_ddrhdl; 2484 } 2485 2486 /* 2487 * The last step is to ensure that the final address ("qa_buf_aligned") 2488 * has the appropriate "alloc" alignment restriction applied to it 2489 * (if necessary). 2490 */ 2491 if (qa_info->qa_bind_align != qa_info->qa_alloc_align) { 2492 qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t) 2493 qa_info->qa_buf_aligned + alloc_mask) & ~alloc_mask); 2494 } 2495 2496 TAVOR_TNF_EXIT(tavor_queue_alloc); 2497 return (DDI_SUCCESS); 2498 } 2499 2500 2501 /* 2502 * tavor_queue_free() 2503 * Context: Can be called from interrupt or base context. 2504 */ 2505 void 2506 tavor_queue_free(tavor_state_t *state, tavor_qalloc_info_t *qa_info) 2507 { 2508 TAVOR_TNF_ENTER(tavor_queue_free); 2509 2510 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info)) 2511 2512 /* 2513 * Depending on how (i.e. from where) we allocated the memory for 2514 * this queue, we choose the appropriate method for releasing the 2515 * resources. 2516 */ 2517 if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_NORMAL) { 2518 2519 ddi_dma_mem_free(&qa_info->qa_acchdl); 2520 2521 } else if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_USERLAND) { 2522 2523 ddi_umem_free(qa_info->qa_umemcookie); 2524 2525 } else { /* TAVOR_QUEUE_LOCATION_INDDR */ 2526 2527 vmem_xfree(state->ts_ddrvmem, qa_info->qa_buf_real, 2528 qa_info->qa_buf_realsz); 2529 } 2530 2531 /* Always free the dma handle */ 2532 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2533 2534 TAVOR_TNF_EXIT(tavor_queue_free); 2535 } 2536 2537 2538 /* 2539 * tavor_dmaattr_get() 2540 * Context: Can be called from interrupt or base context. 2541 */ 2542 void 2543 tavor_dma_attr_init(ddi_dma_attr_t *dma_attr) 2544 { 2545 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dma_attr)) 2546 2547 dma_attr->dma_attr_version = DMA_ATTR_V0; 2548 dma_attr->dma_attr_addr_lo = 0; 2549 dma_attr->dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFFull; 2550 dma_attr->dma_attr_count_max = 0xFFFFFFFFFFFFFFFFull; 2551 dma_attr->dma_attr_align = 1; 2552 dma_attr->dma_attr_burstsizes = 0x3FF; 2553 dma_attr->dma_attr_minxfer = 1; 2554 dma_attr->dma_attr_maxxfer = 0xFFFFFFFFFFFFFFFFull; 2555 dma_attr->dma_attr_seg = 0xFFFFFFFFFFFFFFFFull; 2556 dma_attr->dma_attr_sgllen = 0x7FFFFFFF; 2557 dma_attr->dma_attr_granular = 1; 2558 dma_attr->dma_attr_flags = 0; 2559 } 2560