1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * hermon_misc.c 29 * Hermon Miscellaneous routines - Address Handle, Multicast, Protection 30 * Domain, and port-related operations 31 * 32 * Implements all the routines necessary for allocating, freeing, querying 33 * and modifying Address Handles and Protection Domains. Also implements 34 * all the routines necessary for adding and removing Queue Pairs to/from 35 * Multicast Groups. Lastly, it implements the routines necessary for 36 * port-related query and modify operations. 37 */ 38 39 #include <sys/types.h> 40 #include <sys/conf.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/modctl.h> 44 #include <sys/bitmap.h> 45 #include <sys/sysmacros.h> 46 47 #include <sys/ib/adapters/hermon/hermon.h> 48 49 extern uint32_t hermon_kernel_data_ro; 50 51 /* used for helping uniquify fmr pool taskq name */ 52 static uint_t hermon_debug_fmrpool_cnt = 0x00000000; 53 54 static int hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg, 55 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp, uint_t *qp_found); 56 static int hermon_mcg_qplist_remove(hermon_mcghdl_t mcg, 57 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp); 58 static void hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp); 59 static void hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp); 60 static uint_t hermon_mcg_walk_mgid_hash(hermon_state_t *state, 61 uint64_t start_indx, ib_gid_t mgid, uint_t *prev_indx); 62 static void hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg, 63 hermon_hw_mcg_t *mcg_hdr, ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc); 64 static int hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx, 65 uint_t prev_indx, hermon_hw_mcg_t *mcg_entry); 66 static int hermon_mcg_entry_invalidate(hermon_state_t *state, 67 hermon_hw_mcg_t *mcg_entry, uint_t indx); 68 static int hermon_mgid_is_valid(ib_gid_t gid); 69 static int hermon_mlid_is_valid(ib_lid_t lid); 70 static void hermon_fmr_processing(void *fmr_args); 71 static int hermon_fmr_cleanup(hermon_state_t *state, hermon_fmrhdl_t pool); 72 static void hermon_fmr_cache_init(hermon_fmrhdl_t fmr); 73 static void hermon_fmr_cache_fini(hermon_fmrhdl_t fmr); 74 static int hermon_fmr_avl_compare(const void *q, const void *e); 75 76 77 #define HERMON_MAX_DBR_PAGES_PER_USER 64 78 #define HERMON_DBR_KEY(index, page) \ 79 (((uint64_t)index) * HERMON_MAX_DBR_PAGES_PER_USER + (page)) 80 81 static hermon_udbr_page_t * 82 hermon_dbr_new_user_page(hermon_state_t *state, uint_t index, 83 uint_t page) 84 { 85 hermon_udbr_page_t *pagep; 86 ddi_dma_attr_t dma_attr; 87 uint_t cookiecnt; 88 int i, status; 89 uint64_t *p; 90 hermon_umap_db_entry_t *umapdb; 91 92 pagep = kmem_alloc(sizeof (*pagep), KM_SLEEP); 93 pagep->upg_index = page; 94 pagep->upg_nfree = PAGESIZE / sizeof (hermon_dbr_t); 95 pagep->upg_firstfree = 0; 96 pagep->upg_kvaddr = ddi_umem_alloc(PAGESIZE, DDI_UMEM_SLEEP, 97 &pagep->upg_umemcookie); /* not HERMON_PAGESIZE here */ 98 99 /* link free entries */ 100 p = (uint64_t *)(void *)pagep->upg_kvaddr; 101 for (i = pagep->upg_firstfree; i < pagep->upg_nfree; i++) 102 p[i] = i + 1; 103 pagep->upg_buf = ddi_umem_iosetup(pagep->upg_umemcookie, 0, 104 PAGESIZE, B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP); 105 106 hermon_dma_attr_init(state, &dma_attr); 107 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 108 DDI_DMA_SLEEP, NULL, &pagep->upg_dmahdl); 109 if (status != DDI_SUCCESS) { 110 IBTF_DPRINTF_L2("hermon", "hermon_new_user_page: " 111 "ddi_dma_buf_bind_handle failed: %d", status); 112 return (NULL); 113 } 114 status = ddi_dma_buf_bind_handle(pagep->upg_dmahdl, 115 pagep->upg_buf, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 116 DDI_DMA_SLEEP, NULL, &pagep->upg_dmacookie, &cookiecnt); 117 if (status != DDI_SUCCESS) { 118 IBTF_DPRINTF_L2("hermon", "hermon_dbr_new_user_page: " 119 "ddi_dma_buf_bind_handle failed: %d", status); 120 ddi_dma_free_handle(&pagep->upg_dmahdl); 121 return (NULL); 122 } 123 ASSERT(cookiecnt == 1); 124 125 /* create db entry for mmap */ 126 umapdb = hermon_umap_db_alloc(state->hs_instance, 127 HERMON_DBR_KEY(index, page), MLNX_UMAP_DBRMEM_RSRC, 128 (uint64_t)(uintptr_t)pagep); 129 hermon_umap_db_add(umapdb); 130 return (pagep); 131 } 132 133 134 /*ARGSUSED*/ 135 static int 136 hermon_user_dbr_alloc(hermon_state_t *state, uint_t index, 137 ddi_acc_handle_t *acchdl, hermon_dbr_t **vdbr, uint64_t *pdbr, 138 uint64_t *mapoffset) 139 { 140 hermon_user_dbr_t *udbr; 141 hermon_udbr_page_t *pagep; 142 uint_t next_page; 143 int j; 144 145 mutex_enter(&state->hs_dbr_lock); 146 for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link) 147 if (udbr->udbr_index == index) 148 break; 149 if (udbr == NULL) { 150 udbr = kmem_alloc(sizeof (*udbr), KM_SLEEP); 151 udbr->udbr_link = state->hs_user_dbr; 152 state->hs_user_dbr = udbr; 153 udbr->udbr_index = index; 154 udbr->udbr_pagep = NULL; 155 } 156 pagep = udbr->udbr_pagep; 157 next_page = (pagep == NULL) ? 0 : (pagep->upg_index + 1); 158 while (pagep != NULL) 159 if (pagep->upg_nfree > 0) 160 break; 161 else 162 pagep = pagep->upg_link; 163 if (pagep == NULL) { 164 pagep = hermon_dbr_new_user_page(state, index, next_page); 165 if (pagep == NULL) { 166 mutex_exit(&state->hs_dbr_lock); 167 return (DDI_FAILURE); 168 } 169 pagep->upg_link = udbr->udbr_pagep; 170 udbr->udbr_pagep = pagep; 171 } 172 j = pagep->upg_firstfree; /* index within page */ 173 pagep->upg_firstfree = ((uint64_t *)(void *)pagep->upg_kvaddr)[j]; 174 pagep->upg_nfree--; 175 ((uint64_t *)(void *)pagep->upg_kvaddr)[j] = 0; /* clear dbr */ 176 *mapoffset = ((HERMON_DBR_KEY(index, pagep->upg_index) << 177 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_DBRMEM_RSRC) << PAGESHIFT; 178 *vdbr = (hermon_dbr_t *)((uint64_t *)(void *)pagep->upg_kvaddr + j); 179 *pdbr = pagep->upg_dmacookie.dmac_laddress + j * sizeof (uint64_t); 180 181 mutex_exit(&state->hs_dbr_lock); 182 return (DDI_SUCCESS); 183 } 184 185 static void 186 hermon_user_dbr_free(hermon_state_t *state, uint_t index, hermon_dbr_t *record) 187 { 188 hermon_user_dbr_t *udbr; 189 hermon_udbr_page_t *pagep; 190 caddr_t kvaddr; 191 uint_t dbr_index; 192 uint_t max_free = PAGESIZE / sizeof (hermon_dbr_t); 193 194 dbr_index = (uintptr_t)record & PAGEOFFSET; /* offset (not yet index) */ 195 kvaddr = (caddr_t)record - dbr_index; 196 dbr_index /= sizeof (hermon_dbr_t); /* now it's the index */ 197 198 mutex_enter(&state->hs_dbr_lock); 199 for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link) 200 if (udbr->udbr_index == index) 201 break; 202 if (udbr == NULL) { 203 IBTF_DPRINTF_L2("hermon", "free user dbr: udbr struct not " 204 "found for index %x", index); 205 mutex_exit(&state->hs_dbr_lock); 206 return; 207 } 208 for (pagep = udbr->udbr_pagep; pagep != NULL; pagep = pagep->upg_link) 209 if (pagep->upg_kvaddr == kvaddr) 210 break; 211 if (pagep == NULL) { 212 IBTF_DPRINTF_L2("hermon", "free user dbr: pagep struct not" 213 " found for index %x, kvaddr %p, DBR index %x", 214 index, kvaddr, dbr_index); 215 mutex_exit(&state->hs_dbr_lock); 216 return; 217 } 218 if (pagep->upg_nfree >= max_free) { 219 IBTF_DPRINTF_L2("hermon", "free user dbr: overflow: " 220 "UCE index %x, DBR index %x", index, dbr_index); 221 mutex_exit(&state->hs_dbr_lock); 222 return; 223 } 224 ASSERT(dbr_index < max_free); 225 ((uint64_t *)(void *)kvaddr)[dbr_index] = pagep->upg_firstfree; 226 pagep->upg_firstfree = dbr_index; 227 pagep->upg_nfree++; 228 mutex_exit(&state->hs_dbr_lock); 229 230 /* XXX still need to unlink and free struct */ 231 /* XXX munmap needs to be managed */ 232 } 233 234 /* 235 * hermon_dbr_page_alloc() 236 * first page allocation - called from attach or open 237 * in this case, we want exactly one page per call, and aligned on a 238 * page - and may need to be mapped to the user for access 239 */ 240 int 241 hermon_dbr_page_alloc(hermon_state_t *state, hermon_dbr_info_t **dinfo) 242 { 243 int status; 244 ddi_dma_handle_t dma_hdl; 245 ddi_acc_handle_t acc_hdl; 246 ddi_dma_attr_t dma_attr; 247 ddi_dma_cookie_t cookie; 248 uint_t cookie_cnt; 249 int i; 250 hermon_dbr_info_t *info; 251 caddr_t dmaaddr; 252 uint64_t dmalen; 253 254 info = kmem_zalloc(sizeof (hermon_dbr_info_t), KM_SLEEP); 255 256 /* 257 * Initialize many of the default DMA attributes. Then set additional 258 * alignment restrictions if necessary for the dbr memory, meaning 259 * page aligned. Also use the configured value for IOMMU bypass 260 */ 261 hermon_dma_attr_init(state, &dma_attr); 262 dma_attr.dma_attr_align = PAGESIZE; 263 dma_attr.dma_attr_sgllen = 1; /* make sure only one cookie */ 264 265 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 266 DDI_DMA_SLEEP, NULL, &dma_hdl); 267 if (status != DDI_SUCCESS) { 268 kmem_free((void *)info, sizeof (hermon_dbr_info_t)); 269 cmn_err(CE_NOTE, "dbr DMA handle alloc failed\n"); 270 return (DDI_FAILURE); 271 } 272 273 status = ddi_dma_mem_alloc(dma_hdl, PAGESIZE, 274 &state->hs_reg_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, 275 NULL, &dmaaddr, (size_t *)&dmalen, &acc_hdl); 276 if (status != DDI_SUCCESS) { 277 ddi_dma_free_handle(&dma_hdl); 278 cmn_err(CE_CONT, "dbr DMA mem alloc failed(status %d)", status); 279 kmem_free((void *)info, sizeof (hermon_dbr_info_t)); 280 return (DDI_FAILURE); 281 } 282 283 /* this memory won't be IB registered, so do the bind here */ 284 status = ddi_dma_addr_bind_handle(dma_hdl, NULL, 285 dmaaddr, (size_t)dmalen, DDI_DMA_RDWR | 286 DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &cookie, &cookie_cnt); 287 if (status != DDI_SUCCESS) { 288 ddi_dma_mem_free(&acc_hdl); 289 ddi_dma_free_handle(&dma_hdl); 290 kmem_free((void *)info, sizeof (hermon_dbr_info_t)); 291 cmn_err(CE_CONT, "dbr DMA bind handle failed (status %d)", 292 status); 293 return (DDI_FAILURE); 294 } 295 *dinfo = info; /* Pass back the pointer */ 296 297 /* init the info structure with returned info */ 298 info->dbr_dmahdl = dma_hdl; 299 info->dbr_acchdl = acc_hdl; 300 info->dbr_page = (hermon_dbr_t *)(void *)dmaaddr; 301 info->dbr_link = NULL; 302 /* extract the phys addr from the cookie */ 303 info->dbr_paddr = cookie.dmac_laddress; 304 info->dbr_firstfree = 0; 305 info->dbr_nfree = HERMON_NUM_DBR_PER_PAGE; 306 /* link all DBrs onto the free list */ 307 for (i = 0; i < HERMON_NUM_DBR_PER_PAGE; i++) { 308 info->dbr_page[i] = i + 1; 309 } 310 311 return (DDI_SUCCESS); 312 } 313 314 315 /* 316 * hermon_dbr_alloc() 317 * DBr record allocation - called from alloc cq/qp/srq 318 * will check for available dbrs in current 319 * page - if needed it will allocate another and link them 320 */ 321 322 int 323 hermon_dbr_alloc(hermon_state_t *state, uint_t index, ddi_acc_handle_t *acchdl, 324 hermon_dbr_t **vdbr, uint64_t *pdbr, uint64_t *mapoffset) 325 { 326 hermon_dbr_t *record = NULL; 327 hermon_dbr_info_t *info = NULL; 328 uint32_t idx; 329 int status; 330 331 if (index != state->hs_kernel_uar_index) 332 return (hermon_user_dbr_alloc(state, index, acchdl, vdbr, pdbr, 333 mapoffset)); 334 335 mutex_enter(&state->hs_dbr_lock); 336 for (info = state->hs_kern_dbr; info != NULL; info = info->dbr_link) 337 if (info->dbr_nfree != 0) 338 break; /* found a page w/ one available */ 339 340 if (info == NULL) { /* did NOT find a page with one available */ 341 status = hermon_dbr_page_alloc(state, &info); 342 if (status != DDI_SUCCESS) { 343 /* do error handling */ 344 mutex_exit(&state->hs_dbr_lock); 345 return (DDI_FAILURE); 346 } 347 /* got a new page, so link it in. */ 348 info->dbr_link = state->hs_kern_dbr; 349 state->hs_kern_dbr = info; 350 } 351 idx = info->dbr_firstfree; 352 record = info->dbr_page + idx; 353 info->dbr_firstfree = *record; 354 info->dbr_nfree--; 355 *record = 0; 356 357 *acchdl = info->dbr_acchdl; 358 *vdbr = record; 359 *pdbr = info->dbr_paddr + idx * sizeof (hermon_dbr_t); 360 mutex_exit(&state->hs_dbr_lock); 361 return (DDI_SUCCESS); 362 } 363 364 /* 365 * hermon_dbr_free() 366 * DBr record deallocation - called from free cq/qp 367 * will update the counter in the header, and invalidate 368 * the dbr, but will NEVER free pages of dbrs - small 369 * price to pay, but userland access never will anyway 370 */ 371 void 372 hermon_dbr_free(hermon_state_t *state, uint_t indx, hermon_dbr_t *record) 373 { 374 hermon_dbr_t *page; 375 hermon_dbr_info_t *info; 376 377 if (indx != state->hs_kernel_uar_index) { 378 hermon_user_dbr_free(state, indx, record); 379 return; 380 } 381 page = (hermon_dbr_t *)(uintptr_t)((uintptr_t)record & PAGEMASK); 382 mutex_enter(&state->hs_dbr_lock); 383 for (info = state->hs_kern_dbr; info != NULL; info = info->dbr_link) 384 if (info->dbr_page == page) 385 break; 386 ASSERT(info != NULL); 387 *record = info->dbr_firstfree; 388 info->dbr_firstfree = record - info->dbr_page; 389 info->dbr_nfree++; 390 mutex_exit(&state->hs_dbr_lock); 391 } 392 393 /* 394 * hermon_dbr_kern_free() 395 * Context: Can be called only from detach context. 396 * 397 * Free all kernel dbr pages. This includes the freeing of all the dma 398 * resources acquired during the allocation of the pages. 399 * 400 * Also, free all the user dbr pages. 401 */ 402 void 403 hermon_dbr_kern_free(hermon_state_t *state) 404 { 405 hermon_dbr_info_t *info, *link; 406 hermon_user_dbr_t *udbr, *next; 407 hermon_udbr_page_t *pagep, *nextp; 408 hermon_umap_db_entry_t *umapdb; 409 int instance, status; 410 uint64_t value; 411 extern hermon_umap_db_t hermon_userland_rsrc_db; 412 413 mutex_enter(&state->hs_dbr_lock); 414 for (info = state->hs_kern_dbr; info != NULL; info = link) { 415 (void) ddi_dma_unbind_handle(info->dbr_dmahdl); 416 ddi_dma_mem_free(&info->dbr_acchdl); /* free page */ 417 ddi_dma_free_handle(&info->dbr_dmahdl); 418 link = info->dbr_link; 419 kmem_free(info, sizeof (hermon_dbr_info_t)); 420 } 421 422 udbr = state->hs_user_dbr; 423 instance = state->hs_instance; 424 mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock); 425 while (udbr != NULL) { 426 pagep = udbr->udbr_pagep; 427 while (pagep != NULL) { 428 /* probably need to remove "db" */ 429 (void) ddi_dma_unbind_handle(pagep->upg_dmahdl); 430 ddi_dma_free_handle(&pagep->upg_dmahdl); 431 freerbuf(pagep->upg_buf); 432 ddi_umem_free(pagep->upg_umemcookie); 433 status = hermon_umap_db_find_nolock(instance, 434 HERMON_DBR_KEY(udbr->udbr_index, 435 pagep->upg_index), MLNX_UMAP_DBRMEM_RSRC, 436 &value, HERMON_UMAP_DB_REMOVE, &umapdb); 437 if (status == DDI_SUCCESS) 438 hermon_umap_db_free(umapdb); 439 nextp = pagep->upg_link; 440 kmem_free(pagep, sizeof (*pagep)); 441 pagep = nextp; 442 } 443 next = udbr->udbr_link; 444 kmem_free(udbr, sizeof (*udbr)); 445 udbr = next; 446 } 447 mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock); 448 mutex_exit(&state->hs_dbr_lock); 449 } 450 451 /* 452 * hermon_ah_alloc() 453 * Context: Can be called only from user or kernel context. 454 */ 455 int 456 hermon_ah_alloc(hermon_state_t *state, hermon_pdhdl_t pd, 457 ibt_adds_vect_t *attr_p, hermon_ahhdl_t *ahhdl, uint_t sleepflag) 458 { 459 hermon_rsrc_t *rsrc; 460 hermon_hw_udav_t *udav; 461 hermon_ahhdl_t ah; 462 int status; 463 464 /* 465 * Someday maybe the "ibt_adds_vect_t *attr_p" will be NULL to 466 * indicate that we wish to allocate an "invalid" (i.e. empty) 467 * address handle XXX 468 */ 469 470 /* Validate that specified port number is legal */ 471 if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) { 472 return (IBT_HCA_PORT_INVALID); 473 } 474 475 /* 476 * Allocate the software structure for tracking the address handle 477 * (i.e. the Hermon Address Handle struct). 478 */ 479 status = hermon_rsrc_alloc(state, HERMON_AHHDL, 1, sleepflag, &rsrc); 480 if (status != DDI_SUCCESS) { 481 return (IBT_INSUFF_RESOURCE); 482 } 483 ah = (hermon_ahhdl_t)rsrc->hr_addr; 484 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah)) 485 486 /* Increment the reference count on the protection domain (PD) */ 487 hermon_pd_refcnt_inc(pd); 488 489 udav = (hermon_hw_udav_t *)kmem_zalloc(sizeof (hermon_hw_udav_t), 490 KM_SLEEP); 491 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav)) 492 493 /* 494 * Fill in the UDAV data. We first zero out the UDAV, then populate 495 * it by then calling hermon_set_addr_path() to fill in the common 496 * portions that can be pulled from the "ibt_adds_vect_t" passed in 497 */ 498 status = hermon_set_addr_path(state, attr_p, 499 (hermon_hw_addr_path_t *)udav, HERMON_ADDRPATH_UDAV); 500 if (status != DDI_SUCCESS) { 501 hermon_pd_refcnt_dec(pd); 502 hermon_rsrc_free(state, &rsrc); 503 return (status); 504 } 505 udav->pd = pd->pd_pdnum; 506 udav->sl = attr_p->av_srvl; 507 508 /* 509 * Fill in the rest of the Hermon Address Handle struct. 510 * 511 * NOTE: We are saving away a copy of the "av_dgid.gid_guid" field 512 * here because we may need to return it later to the IBTF (as a 513 * result of a subsequent query operation). Unlike the other UDAV 514 * parameters, the value of "av_dgid.gid_guid" is not always preserved. 515 * The reason for this is described in hermon_set_addr_path(). 516 */ 517 ah->ah_rsrcp = rsrc; 518 ah->ah_pdhdl = pd; 519 ah->ah_udav = udav; 520 ah->ah_save_guid = attr_p->av_dgid.gid_guid; 521 *ahhdl = ah; 522 523 return (DDI_SUCCESS); 524 } 525 526 527 /* 528 * hermon_ah_free() 529 * Context: Can be called only from user or kernel context. 530 */ 531 /* ARGSUSED */ 532 int 533 hermon_ah_free(hermon_state_t *state, hermon_ahhdl_t *ahhdl, uint_t sleepflag) 534 { 535 hermon_rsrc_t *rsrc; 536 hermon_pdhdl_t pd; 537 hermon_ahhdl_t ah; 538 539 /* 540 * Pull all the necessary information from the Hermon Address Handle 541 * struct. This is necessary here because the resource for the 542 * AH is going to be freed up as part of this operation. 543 */ 544 ah = *ahhdl; 545 mutex_enter(&ah->ah_lock); 546 rsrc = ah->ah_rsrcp; 547 pd = ah->ah_pdhdl; 548 mutex_exit(&ah->ah_lock); 549 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah)) 550 551 /* Free the UDAV memory */ 552 kmem_free(ah->ah_udav, sizeof (hermon_hw_udav_t)); 553 554 /* Decrement the reference count on the protection domain (PD) */ 555 hermon_pd_refcnt_dec(pd); 556 557 /* Free the Hermon Address Handle structure */ 558 hermon_rsrc_free(state, &rsrc); 559 560 /* Set the ahhdl pointer to NULL and return success */ 561 *ahhdl = NULL; 562 563 return (DDI_SUCCESS); 564 } 565 566 567 /* 568 * hermon_ah_query() 569 * Context: Can be called from interrupt or base context. 570 */ 571 /* ARGSUSED */ 572 int 573 hermon_ah_query(hermon_state_t *state, hermon_ahhdl_t ah, hermon_pdhdl_t *pd, 574 ibt_adds_vect_t *attr_p) 575 { 576 mutex_enter(&ah->ah_lock); 577 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p)) 578 579 /* 580 * Pull the PD and UDAV from the Hermon Address Handle structure 581 */ 582 *pd = ah->ah_pdhdl; 583 584 /* 585 * Fill in "ibt_adds_vect_t". We call hermon_get_addr_path() to fill 586 * the common portions that can be pulled from the UDAV we pass in. 587 * 588 * NOTE: We will also fill the "av_dgid.gid_guid" field from the 589 * "ah_save_guid" field we have previously saved away. The reason 590 * for this is described in hermon_ah_alloc() and hermon_ah_modify(). 591 */ 592 hermon_get_addr_path(state, (hermon_hw_addr_path_t *)ah->ah_udav, 593 attr_p, HERMON_ADDRPATH_UDAV); 594 595 attr_p->av_dgid.gid_guid = ah->ah_save_guid; 596 597 mutex_exit(&ah->ah_lock); 598 return (DDI_SUCCESS); 599 } 600 601 602 /* 603 * hermon_ah_modify() 604 * Context: Can be called from interrupt or base context. 605 */ 606 /* ARGSUSED */ 607 int 608 hermon_ah_modify(hermon_state_t *state, hermon_ahhdl_t ah, 609 ibt_adds_vect_t *attr_p) 610 { 611 hermon_hw_udav_t old_udav; 612 uint64_t data_old; 613 int status, size, i; 614 615 /* Validate that specified port number is legal */ 616 if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) { 617 return (IBT_HCA_PORT_INVALID); 618 } 619 620 mutex_enter(&ah->ah_lock); 621 622 /* Save a copy of the current UDAV data in old_udav. */ 623 bcopy(ah->ah_udav, &old_udav, sizeof (hermon_hw_udav_t)); 624 625 /* 626 * Fill in the new UDAV with the caller's data, passed in via the 627 * "ibt_adds_vect_t" structure. 628 * 629 * NOTE: We also need to save away a copy of the "av_dgid.gid_guid" 630 * field here (just as we did during hermon_ah_alloc()) because we 631 * may need to return it later to the IBTF (as a result of a 632 * subsequent query operation). As explained in hermon_ah_alloc(), 633 * unlike the other UDAV parameters, the value of "av_dgid.gid_guid" 634 * is not always preserved. The reason for this is described in 635 * hermon_set_addr_path(). 636 */ 637 status = hermon_set_addr_path(state, attr_p, 638 (hermon_hw_addr_path_t *)ah->ah_udav, HERMON_ADDRPATH_UDAV); 639 if (status != DDI_SUCCESS) { 640 mutex_exit(&ah->ah_lock); 641 return (status); 642 } 643 ah->ah_save_guid = attr_p->av_dgid.gid_guid; 644 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(ah->ah_udav))) 645 ah->ah_udav->sl = attr_p->av_srvl; 646 647 /* 648 * Copy changes into the new UDAV. 649 * Note: We copy in 64-bit chunks. For the first two of these 650 * chunks it is necessary to read the current contents of the 651 * UDAV, mask off the modifiable portions (maintaining any 652 * of the "reserved" portions), and then mask on the new data. 653 */ 654 size = sizeof (hermon_hw_udav_t) >> 3; 655 for (i = 0; i < size; i++) { 656 data_old = ((uint64_t *)&old_udav)[i]; 657 658 /* 659 * Apply mask to change only the relevant values. 660 */ 661 if (i == 0) { 662 data_old = data_old & HERMON_UDAV_MODIFY_MASK0; 663 } else if (i == 1) { 664 data_old = data_old & HERMON_UDAV_MODIFY_MASK1; 665 } else { 666 data_old = 0; 667 } 668 669 /* Store the updated values to the UDAV */ 670 ((uint64_t *)ah->ah_udav)[i] |= data_old; 671 } 672 673 /* 674 * Put the valid PD number back into the UDAV entry, as it 675 * might have been clobbered above. 676 */ 677 ah->ah_udav->pd = old_udav.pd; 678 679 680 mutex_exit(&ah->ah_lock); 681 return (DDI_SUCCESS); 682 } 683 684 /* 685 * hermon_mcg_attach() 686 * Context: Can be called only from user or kernel context. 687 */ 688 int 689 hermon_mcg_attach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid, 690 ib_lid_t lid) 691 { 692 hermon_rsrc_t *rsrc; 693 hermon_hw_mcg_t *mcg_entry; 694 hermon_hw_mcg_qp_list_t *mcg_entry_qplist; 695 hermon_mcghdl_t mcg, newmcg; 696 uint64_t mgid_hash; 697 uint32_t end_indx; 698 int status; 699 uint_t qp_found; 700 701 /* 702 * It is only allowed to attach MCG to UD queue pairs. Verify 703 * that the intended QP is of the appropriate transport type 704 */ 705 if (qp->qp_serv_type != HERMON_QP_UD) { 706 return (IBT_QP_SRV_TYPE_INVALID); 707 } 708 709 /* 710 * Check for invalid Multicast DLID. Specifically, all Multicast 711 * LIDs should be within a well defined range. If the specified LID 712 * is outside of that range, then return an error. 713 */ 714 if (hermon_mlid_is_valid(lid) == 0) { 715 return (IBT_MC_MLID_INVALID); 716 } 717 /* 718 * Check for invalid Multicast GID. All Multicast GIDs should have 719 * a well-defined pattern of bits and flags that are allowable. If 720 * the specified GID does not meet the criteria, then return an error. 721 */ 722 if (hermon_mgid_is_valid(gid) == 0) { 723 return (IBT_MC_MGID_INVALID); 724 } 725 726 /* 727 * Compute the MGID hash value. Since the MCG table is arranged as 728 * a number of separate hash chains, this operation converts the 729 * specified MGID into the starting index of an entry in the hash 730 * table (i.e. the index for the start of the appropriate hash chain). 731 * Subsequent operations below will walk the chain searching for the 732 * right place to add this new QP. 733 */ 734 status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid, 735 &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT()); 736 if (status != HERMON_CMD_SUCCESS) { 737 cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n", 738 status); 739 if (status == HERMON_CMD_INVALID_STATUS) { 740 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 741 } 742 return (ibc_get_ci_failure(0)); 743 } 744 745 /* 746 * Grab the multicast group mutex. Then grab the pre-allocated 747 * temporary buffer used for holding and/or modifying MCG entries. 748 * Zero out the temporary MCG entry before we begin. 749 */ 750 mutex_enter(&state->hs_mcglock); 751 mcg_entry = state->hs_mcgtmp; 752 mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry); 753 bzero(mcg_entry, HERMON_MCGMEM_SZ(state)); 754 755 /* 756 * Walk through the array of MCG entries starting at "mgid_hash". 757 * Try to find the appropriate place for this new QP to be added. 758 * This could happen when the first entry of the chain has MGID == 0 759 * (which means that the hash chain is empty), or because we find 760 * an entry with the same MGID (in which case we'll add the QP to 761 * that MCG), or because we come to the end of the chain (in which 762 * case this is the first QP being added to the multicast group that 763 * corresponds to the MGID. The hermon_mcg_walk_mgid_hash() routine 764 * walks the list and returns an index into the MCG table. The entry 765 * at this index is then checked to determine which case we have 766 * fallen into (see below). Note: We are using the "shadow" MCG 767 * list (of hermon_mcg_t structs) for this lookup because the real 768 * MCG entries are in hardware (and the lookup process would be much 769 * more time consuming). 770 */ 771 end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, NULL); 772 mcg = &state->hs_mcghdl[end_indx]; 773 774 /* 775 * If MGID == 0, then the hash chain is empty. Just fill in the 776 * current entry. Note: No need to allocate an MCG table entry 777 * as all the hash chain "heads" are already preallocated. 778 */ 779 if ((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) { 780 781 /* Fill in the current entry in the "shadow" MCG list */ 782 hermon_mcg_setup_new_hdr(mcg, mcg_entry, gid, NULL); 783 784 /* 785 * Try to add the new QP number to the list. This (and the 786 * above) routine fills in a temporary MCG. The "mcg_entry" 787 * and "mcg_entry_qplist" pointers simply point to different 788 * offsets within the same temporary copy of the MCG (for 789 * convenience). Note: If this fails, we need to invalidate 790 * the entries we've already put into the "shadow" list entry 791 * above. 792 */ 793 status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp, 794 &qp_found); 795 if (status != DDI_SUCCESS) { 796 bzero(mcg, sizeof (struct hermon_sw_mcg_list_s)); 797 mutex_exit(&state->hs_mcglock); 798 return (status); 799 } 800 if (!qp_found) 801 mcg_entry->member_cnt = (mcg->mcg_num_qps + 1); 802 /* set the member count */ 803 804 /* 805 * Once the temporary MCG has been filled in, write the entry 806 * into the appropriate location in the Hermon MCG entry table. 807 * If it's successful, then drop the lock and return success. 808 * Note: In general, this operation shouldn't fail. If it 809 * does, then it is an indication that something (probably in 810 * HW, but maybe in SW) has gone seriously wrong. We still 811 * want to zero out the entries that we've filled in above 812 * (in the hermon_mcg_setup_new_hdr() routine). 813 */ 814 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 815 HERMON_CMD_NOSLEEP_SPIN); 816 if (status != HERMON_CMD_SUCCESS) { 817 bzero(mcg, sizeof (struct hermon_sw_mcg_list_s)); 818 mutex_exit(&state->hs_mcglock); 819 HERMON_WARNING(state, "failed to write MCG entry"); 820 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 821 "%08x\n", status); 822 if (status == HERMON_CMD_INVALID_STATUS) { 823 hermon_fm_ereport(state, HCA_SYS_ERR, 824 HCA_ERR_SRV_LOST); 825 } 826 return (ibc_get_ci_failure(0)); 827 } 828 829 /* 830 * Now that we know all the Hermon firmware accesses have been 831 * successful, we update the "shadow" MCG entry by incrementing 832 * the "number of attached QPs" count. 833 * 834 * We increment only if the QP is not already part of the 835 * MCG by checking the 'qp_found' flag returned from the 836 * qplist_add above. 837 */ 838 if (!qp_found) { 839 mcg->mcg_num_qps++; 840 841 /* 842 * Increment the refcnt for this QP. Because the QP 843 * was added to this MCG, the refcnt must be 844 * incremented. 845 */ 846 hermon_qp_mcg_refcnt_inc(qp); 847 } 848 849 /* 850 * We drop the lock and return success. 851 */ 852 mutex_exit(&state->hs_mcglock); 853 return (DDI_SUCCESS); 854 } 855 856 /* 857 * If the specified MGID matches the MGID in the current entry, then 858 * we need to try to add the QP to the current MCG entry. In this 859 * case, it means that we need to read the existing MCG entry (into 860 * the temporary MCG), add the new QP number to the temporary entry 861 * (using the same method we used above), and write the entry back 862 * to the hardware (same as above). 863 */ 864 if ((mcg->mcg_mgid_h == gid.gid_prefix) && 865 (mcg->mcg_mgid_l == gid.gid_guid)) { 866 867 /* 868 * Read the current MCG entry into the temporary MCG. Note: 869 * In general, this operation shouldn't fail. If it does, 870 * then it is an indication that something (probably in HW, 871 * but maybe in SW) has gone seriously wrong. 872 */ 873 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx, 874 HERMON_CMD_NOSLEEP_SPIN); 875 if (status != HERMON_CMD_SUCCESS) { 876 mutex_exit(&state->hs_mcglock); 877 HERMON_WARNING(state, "failed to read MCG entry"); 878 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: " 879 "%08x\n", status); 880 if (status == HERMON_CMD_INVALID_STATUS) { 881 hermon_fm_ereport(state, HCA_SYS_ERR, 882 HCA_ERR_SRV_LOST); 883 } 884 return (ibc_get_ci_failure(0)); 885 } 886 887 /* 888 * Try to add the new QP number to the list. This routine 889 * fills in the necessary pieces of the temporary MCG. The 890 * "mcg_entry_qplist" pointer is used to point to the portion 891 * of the temporary MCG that holds the QP numbers. 892 * 893 * Note: hermon_mcg_qplist_add() returns SUCCESS if it 894 * already found the QP in the list. In this case, the QP is 895 * not added on to the list again. Check the flag 'qp_found' 896 * if this value is needed to be known. 897 * 898 */ 899 status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp, 900 &qp_found); 901 if (status != DDI_SUCCESS) { 902 mutex_exit(&state->hs_mcglock); 903 return (status); 904 } 905 if (!qp_found) 906 mcg_entry->member_cnt = (mcg->mcg_num_qps + 1); 907 /* set the member count */ 908 909 /* 910 * Once the temporary MCG has been updated, write the entry 911 * into the appropriate location in the Hermon MCG entry table. 912 * If it's successful, then drop the lock and return success. 913 * Note: In general, this operation shouldn't fail. If it 914 * does, then it is an indication that something (probably in 915 * HW, but maybe in SW) has gone seriously wrong. 916 */ 917 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 918 HERMON_CMD_NOSLEEP_SPIN); 919 if (status != HERMON_CMD_SUCCESS) { 920 mutex_exit(&state->hs_mcglock); 921 HERMON_WARNING(state, "failed to write MCG entry"); 922 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 923 "%08x\n", status); 924 if (status == HERMON_CMD_INVALID_STATUS) { 925 hermon_fm_ereport(state, HCA_SYS_ERR, 926 HCA_ERR_SRV_LOST); 927 } 928 return (ibc_get_ci_failure(0)); 929 } 930 931 /* 932 * Now that we know all the Hermon firmware accesses have been 933 * successful, we update the current "shadow" MCG entry by 934 * incrementing the "number of attached QPs" count. 935 * 936 * We increment only if the QP is not already part of the 937 * MCG by checking the 'qp_found' flag returned 938 * hermon_mcg_walk_mgid_hashfrom the qplist_add above. 939 */ 940 if (!qp_found) { 941 mcg->mcg_num_qps++; 942 943 /* 944 * Increment the refcnt for this QP. Because the QP 945 * was added to this MCG, the refcnt must be 946 * incremented. 947 */ 948 hermon_qp_mcg_refcnt_inc(qp); 949 } 950 951 /* 952 * We drop the lock and return success. 953 */ 954 mutex_exit(&state->hs_mcglock); 955 return (DDI_SUCCESS); 956 } 957 958 /* 959 * If we've reached here, then we're at the end of the hash chain. 960 * We need to allocate a new MCG entry, fill it in, write it to Hermon, 961 * and update the previous entry to link the new one to the end of the 962 * chain. 963 */ 964 965 /* 966 * Allocate an MCG table entry. This will be filled in with all 967 * the necessary parameters to define the multicast group. Then it 968 * will be written to the hardware in the next-to-last step below. 969 */ 970 status = hermon_rsrc_alloc(state, HERMON_MCG, 1, HERMON_NOSLEEP, &rsrc); 971 if (status != DDI_SUCCESS) { 972 mutex_exit(&state->hs_mcglock); 973 return (IBT_INSUFF_RESOURCE); 974 } 975 976 /* 977 * Fill in the new entry in the "shadow" MCG list. Note: Just as 978 * it does above, hermon_mcg_setup_new_hdr() also fills in a portion 979 * of the temporary MCG entry (the rest of which will be filled in by 980 * hermon_mcg_qplist_add() below) 981 */ 982 newmcg = &state->hs_mcghdl[rsrc->hr_indx]; 983 hermon_mcg_setup_new_hdr(newmcg, mcg_entry, gid, rsrc); 984 985 /* 986 * Try to add the new QP number to the list. This routine fills in 987 * the final necessary pieces of the temporary MCG. The 988 * "mcg_entry_qplist" pointer is used to point to the portion of the 989 * temporary MCG that holds the QP numbers. If we fail here, we 990 * must undo the previous resource allocation. 991 * 992 * Note: hermon_mcg_qplist_add() can we return SUCCESS if it already 993 * found the QP in the list. In this case, the QP is not added on to 994 * the list again. Check the flag 'qp_found' if this value is needed 995 * to be known. 996 */ 997 status = hermon_mcg_qplist_add(state, newmcg, mcg_entry_qplist, qp, 998 &qp_found); 999 if (status != DDI_SUCCESS) { 1000 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1001 hermon_rsrc_free(state, &rsrc); 1002 mutex_exit(&state->hs_mcglock); 1003 return (status); 1004 } 1005 mcg_entry->member_cnt = (newmcg->mcg_num_qps + 1); 1006 /* set the member count */ 1007 1008 /* 1009 * Once the temporary MCG has been updated, write the entry into the 1010 * appropriate location in the Hermon MCG entry table. If this is 1011 * successful, then we need to chain the previous entry to this one. 1012 * Note: In general, this operation shouldn't fail. If it does, then 1013 * it is an indication that something (probably in HW, but maybe in 1014 * SW) has gone seriously wrong. 1015 */ 1016 status = hermon_write_mgm_cmd_post(state, mcg_entry, rsrc->hr_indx, 1017 HERMON_CMD_NOSLEEP_SPIN); 1018 if (status != HERMON_CMD_SUCCESS) { 1019 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1020 hermon_rsrc_free(state, &rsrc); 1021 mutex_exit(&state->hs_mcglock); 1022 HERMON_WARNING(state, "failed to write MCG entry"); 1023 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1024 status); 1025 if (status == HERMON_CMD_INVALID_STATUS) { 1026 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1027 } 1028 return (ibc_get_ci_failure(0)); 1029 } 1030 1031 /* 1032 * Now read the current MCG entry (the one previously at the end of 1033 * hash chain) into the temporary MCG. We are going to update its 1034 * "next_gid_indx" now and write the entry back to the MCG table. 1035 * Note: In general, this operation shouldn't fail. If it does, then 1036 * it is an indication that something (probably in HW, but maybe in SW) 1037 * has gone seriously wrong. We will free up the MCG entry resource, 1038 * but we will not undo the previously written MCG entry in the HW. 1039 * This is OK, though, because the MCG entry is not currently attached 1040 * to any hash chain. 1041 */ 1042 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx, 1043 HERMON_CMD_NOSLEEP_SPIN); 1044 if (status != HERMON_CMD_SUCCESS) { 1045 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1046 hermon_rsrc_free(state, &rsrc); 1047 mutex_exit(&state->hs_mcglock); 1048 HERMON_WARNING(state, "failed to read MCG entry"); 1049 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n", 1050 status); 1051 if (status == HERMON_CMD_INVALID_STATUS) { 1052 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1053 } 1054 return (ibc_get_ci_failure(0)); 1055 } 1056 1057 /* 1058 * Finally, we update the "next_gid_indx" field in the temporary MCG 1059 * and attempt to write the entry back into the Hermon MCG table. If 1060 * this succeeds, then we update the "shadow" list to reflect the 1061 * change, drop the lock, and return success. Note: In general, this 1062 * operation shouldn't fail. If it does, then it is an indication 1063 * that something (probably in HW, but maybe in SW) has gone seriously 1064 * wrong. Just as we do above, we will free up the MCG entry resource, 1065 * but we will not try to undo the previously written MCG entry. This 1066 * is OK, though, because (since we failed here to update the end of 1067 * the chain) that other entry is not currently attached to any chain. 1068 */ 1069 mcg_entry->next_gid_indx = rsrc->hr_indx; 1070 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 1071 HERMON_CMD_NOSLEEP_SPIN); 1072 if (status != HERMON_CMD_SUCCESS) { 1073 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1074 hermon_rsrc_free(state, &rsrc); 1075 mutex_exit(&state->hs_mcglock); 1076 HERMON_WARNING(state, "failed to write MCG entry"); 1077 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1078 status); 1079 if (status == HERMON_CMD_INVALID_STATUS) { 1080 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1081 } 1082 return (ibc_get_ci_failure(0)); 1083 } 1084 mcg = &state->hs_mcghdl[end_indx]; 1085 mcg->mcg_next_indx = rsrc->hr_indx; 1086 1087 /* 1088 * Now that we know all the Hermon firmware accesses have been 1089 * successful, we update the new "shadow" MCG entry by incrementing 1090 * the "number of attached QPs" count. Then we drop the lock and 1091 * return success. 1092 */ 1093 newmcg->mcg_num_qps++; 1094 1095 /* 1096 * Increment the refcnt for this QP. Because the QP 1097 * was added to this MCG, the refcnt must be 1098 * incremented. 1099 */ 1100 hermon_qp_mcg_refcnt_inc(qp); 1101 1102 mutex_exit(&state->hs_mcglock); 1103 return (DDI_SUCCESS); 1104 } 1105 1106 1107 /* 1108 * hermon_mcg_detach() 1109 * Context: Can be called only from user or kernel context. 1110 */ 1111 int 1112 hermon_mcg_detach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid, 1113 ib_lid_t lid) 1114 { 1115 hermon_hw_mcg_t *mcg_entry; 1116 hermon_hw_mcg_qp_list_t *mcg_entry_qplist; 1117 hermon_mcghdl_t mcg; 1118 uint64_t mgid_hash; 1119 uint32_t end_indx, prev_indx; 1120 int status; 1121 1122 /* 1123 * Check for invalid Multicast DLID. Specifically, all Multicast 1124 * LIDs should be within a well defined range. If the specified LID 1125 * is outside of that range, then return an error. 1126 */ 1127 if (hermon_mlid_is_valid(lid) == 0) { 1128 return (IBT_MC_MLID_INVALID); 1129 } 1130 1131 /* 1132 * Compute the MGID hash value. As described above, the MCG table is 1133 * arranged as a number of separate hash chains. This operation 1134 * converts the specified MGID into the starting index of an entry in 1135 * the hash table (i.e. the index for the start of the appropriate 1136 * hash chain). Subsequent operations below will walk the chain 1137 * searching for a matching entry from which to attempt to remove 1138 * the specified QP. 1139 */ 1140 status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid, 1141 &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT()); 1142 if (status != HERMON_CMD_SUCCESS) { 1143 cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n", 1144 status); 1145 if (status == HERMON_CMD_INVALID_STATUS) { 1146 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1147 } 1148 return (ibc_get_ci_failure(0)); 1149 } 1150 1151 /* 1152 * Grab the multicast group mutex. Then grab the pre-allocated 1153 * temporary buffer used for holding and/or modifying MCG entries. 1154 */ 1155 mutex_enter(&state->hs_mcglock); 1156 mcg_entry = state->hs_mcgtmp; 1157 mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry); 1158 1159 /* 1160 * Walk through the array of MCG entries starting at "mgid_hash". 1161 * Try to find an MCG entry with a matching MGID. The 1162 * hermon_mcg_walk_mgid_hash() routine walks the list and returns an 1163 * index into the MCG table. The entry at this index is checked to 1164 * determine whether it is a match or not. If it is a match, then 1165 * we continue on to attempt to remove the QP from the MCG. If it 1166 * is not a match (or not a valid MCG entry), then we return an error. 1167 */ 1168 end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, &prev_indx); 1169 mcg = &state->hs_mcghdl[end_indx]; 1170 1171 /* 1172 * If MGID == 0 (the hash chain is empty) or if the specified MGID 1173 * does not match the MGID in the current entry, then return 1174 * IBT_MC_MGID_INVALID (to indicate that the specified MGID is not 1175 * valid). 1176 */ 1177 if (((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) || 1178 ((mcg->mcg_mgid_h != gid.gid_prefix) || 1179 (mcg->mcg_mgid_l != gid.gid_guid))) { 1180 mutex_exit(&state->hs_mcglock); 1181 return (IBT_MC_MGID_INVALID); 1182 } 1183 1184 /* 1185 * Read the current MCG entry into the temporary MCG. Note: In 1186 * general, this operation shouldn't fail. If it does, then it is 1187 * an indication that something (probably in HW, but maybe in SW) 1188 * has gone seriously wrong. 1189 */ 1190 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx, 1191 HERMON_CMD_NOSLEEP_SPIN); 1192 if (status != HERMON_CMD_SUCCESS) { 1193 mutex_exit(&state->hs_mcglock); 1194 HERMON_WARNING(state, "failed to read MCG entry"); 1195 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n", 1196 status); 1197 if (status == HERMON_CMD_INVALID_STATUS) { 1198 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1199 } 1200 return (ibc_get_ci_failure(0)); 1201 } 1202 1203 /* 1204 * Search the QP number list for a match. If a match is found, then 1205 * remove the entry from the QP list. Otherwise, if no match is found, 1206 * return an error. 1207 */ 1208 status = hermon_mcg_qplist_remove(mcg, mcg_entry_qplist, qp); 1209 if (status != DDI_SUCCESS) { 1210 mutex_exit(&state->hs_mcglock); 1211 return (status); 1212 } 1213 1214 /* 1215 * Decrement the MCG count for this QP. When the 'qp_mcg' 1216 * field becomes 0, then this QP is no longer a member of any 1217 * MCG. 1218 */ 1219 hermon_qp_mcg_refcnt_dec(qp); 1220 1221 /* 1222 * If the current MCG's QP number list is about to be made empty 1223 * ("mcg_num_qps" == 1), then remove the entry itself from the hash 1224 * chain. Otherwise, just write the updated MCG entry back to the 1225 * hardware. In either case, once we successfully update the hardware 1226 * chain, then we decrement the "shadow" list entry's "mcg_num_qps" 1227 * count (or zero out the entire "shadow" list entry) before returning 1228 * success. Note: Zeroing out the "shadow" list entry is done 1229 * inside of hermon_mcg_hash_list_remove(). 1230 */ 1231 if (mcg->mcg_num_qps == 1) { 1232 1233 /* Remove an MCG entry from the hash chain */ 1234 status = hermon_mcg_hash_list_remove(state, end_indx, prev_indx, 1235 mcg_entry); 1236 if (status != DDI_SUCCESS) { 1237 mutex_exit(&state->hs_mcglock); 1238 return (status); 1239 } 1240 1241 } else { 1242 /* 1243 * Write the updated MCG entry back to the Hermon MCG table. 1244 * If this succeeds, then we update the "shadow" list to 1245 * reflect the change (i.e. decrement the "mcg_num_qps"), 1246 * drop the lock, and return success. Note: In general, 1247 * this operation shouldn't fail. If it does, then it is an 1248 * indication that something (probably in HW, but maybe in SW) 1249 * has gone seriously wrong. 1250 */ 1251 mcg_entry->member_cnt = (mcg->mcg_num_qps - 1); 1252 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 1253 HERMON_CMD_NOSLEEP_SPIN); 1254 if (status != HERMON_CMD_SUCCESS) { 1255 mutex_exit(&state->hs_mcglock); 1256 HERMON_WARNING(state, "failed to write MCG entry"); 1257 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 1258 "%08x\n", status); 1259 if (status == HERMON_CMD_INVALID_STATUS) { 1260 hermon_fm_ereport(state, HCA_SYS_ERR, 1261 HCA_ERR_SRV_LOST); 1262 } 1263 return (ibc_get_ci_failure(0)); 1264 } 1265 mcg->mcg_num_qps--; 1266 } 1267 1268 mutex_exit(&state->hs_mcglock); 1269 return (DDI_SUCCESS); 1270 } 1271 1272 /* 1273 * hermon_qp_mcg_refcnt_inc() 1274 * Context: Can be called from interrupt or base context. 1275 */ 1276 static void 1277 hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp) 1278 { 1279 /* Increment the QP's MCG reference count */ 1280 mutex_enter(&qp->qp_lock); 1281 qp->qp_mcg_refcnt++; 1282 mutex_exit(&qp->qp_lock); 1283 } 1284 1285 1286 /* 1287 * hermon_qp_mcg_refcnt_dec() 1288 * Context: Can be called from interrupt or base context. 1289 */ 1290 static void 1291 hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp) 1292 { 1293 /* Decrement the QP's MCG reference count */ 1294 mutex_enter(&qp->qp_lock); 1295 qp->qp_mcg_refcnt--; 1296 mutex_exit(&qp->qp_lock); 1297 } 1298 1299 1300 /* 1301 * hermon_mcg_qplist_add() 1302 * Context: Can be called from interrupt or base context. 1303 */ 1304 static int 1305 hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg, 1306 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp, 1307 uint_t *qp_found) 1308 { 1309 uint_t qplist_indx; 1310 1311 ASSERT(MUTEX_HELD(&state->hs_mcglock)); 1312 1313 qplist_indx = mcg->mcg_num_qps; 1314 1315 /* 1316 * Determine if we have exceeded the maximum number of QP per 1317 * multicast group. If we have, then return an error 1318 */ 1319 if (qplist_indx >= state->hs_cfg_profile->cp_num_qp_per_mcg) { 1320 return (IBT_HCA_MCG_QP_EXCEEDED); 1321 } 1322 1323 /* 1324 * Determine if the QP is already attached to this MCG table. If it 1325 * is, then we break out and treat this operation as a NO-OP 1326 */ 1327 for (qplist_indx = 0; qplist_indx < mcg->mcg_num_qps; 1328 qplist_indx++) { 1329 if (mcg_qplist[qplist_indx].qpn == qp->qp_qpnum) { 1330 break; 1331 } 1332 } 1333 1334 /* 1335 * If the QP was already on the list, set 'qp_found' to TRUE. We still 1336 * return SUCCESS in this case, but the qplist will not have been 1337 * updated because the QP was already on the list. 1338 */ 1339 if (qplist_indx < mcg->mcg_num_qps) { 1340 *qp_found = 1; 1341 } else { 1342 /* 1343 * Otherwise, append the new QP number to the end of the 1344 * current QP list. Note: We will increment the "mcg_num_qps" 1345 * field on the "shadow" MCG list entry later (after we know 1346 * that all necessary Hermon firmware accesses have been 1347 * successful). 1348 * 1349 * Set 'qp_found' to 0 so we know the QP was added on to the 1350 * list for sure. 1351 */ 1352 mcg_qplist[qplist_indx].qpn = 1353 (qp->qp_qpnum | HERMON_MCG_QPN_BLOCK_LB); 1354 *qp_found = 0; 1355 } 1356 1357 return (DDI_SUCCESS); 1358 } 1359 1360 1361 1362 /* 1363 * hermon_mcg_qplist_remove() 1364 * Context: Can be called from interrupt or base context. 1365 */ 1366 static int 1367 hermon_mcg_qplist_remove(hermon_mcghdl_t mcg, 1368 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp) 1369 { 1370 uint_t i, qplist_indx; 1371 1372 /* 1373 * Search the MCG QP list for a matching QPN. When 1374 * it's found, we swap the last entry with the current 1375 * one, set the last entry to zero, decrement the last 1376 * entry, and return. If it's not found, then it's 1377 * and error. 1378 */ 1379 qplist_indx = mcg->mcg_num_qps; 1380 for (i = 0; i < qplist_indx; i++) { 1381 if (mcg_qplist[i].qpn == qp->qp_qpnum) { 1382 mcg_qplist[i] = mcg_qplist[qplist_indx - 1]; 1383 mcg_qplist[qplist_indx - 1].qpn = 0; 1384 1385 return (DDI_SUCCESS); 1386 } 1387 } 1388 1389 return (IBT_QP_HDL_INVALID); 1390 } 1391 1392 1393 /* 1394 * hermon_mcg_walk_mgid_hash() 1395 * Context: Can be called from interrupt or base context. 1396 */ 1397 static uint_t 1398 hermon_mcg_walk_mgid_hash(hermon_state_t *state, uint64_t start_indx, 1399 ib_gid_t mgid, uint_t *p_indx) 1400 { 1401 hermon_mcghdl_t curr_mcghdl; 1402 uint_t curr_indx, prev_indx; 1403 1404 ASSERT(MUTEX_HELD(&state->hs_mcglock)); 1405 1406 /* Start at the head of the hash chain */ 1407 curr_indx = (uint_t)start_indx; 1408 prev_indx = curr_indx; 1409 curr_mcghdl = &state->hs_mcghdl[curr_indx]; 1410 1411 /* If the first entry in the chain has MGID == 0, then stop */ 1412 if ((curr_mcghdl->mcg_mgid_h == 0) && 1413 (curr_mcghdl->mcg_mgid_l == 0)) { 1414 goto end_mgid_hash_walk; 1415 } 1416 1417 /* If the first entry in the chain matches the MGID, then stop */ 1418 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) && 1419 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) { 1420 goto end_mgid_hash_walk; 1421 } 1422 1423 /* Otherwise, walk the hash chain looking for a match */ 1424 while (curr_mcghdl->mcg_next_indx != 0) { 1425 prev_indx = curr_indx; 1426 curr_indx = curr_mcghdl->mcg_next_indx; 1427 curr_mcghdl = &state->hs_mcghdl[curr_indx]; 1428 1429 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) && 1430 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) { 1431 break; 1432 } 1433 } 1434 1435 end_mgid_hash_walk: 1436 /* 1437 * If necessary, return the index of the previous entry too. This 1438 * is primarily used for detaching a QP from a multicast group. It 1439 * may be necessary, in that case, to delete an MCG entry from the 1440 * hash chain and having the index of the previous entry is helpful. 1441 */ 1442 if (p_indx != NULL) { 1443 *p_indx = prev_indx; 1444 } 1445 return (curr_indx); 1446 } 1447 1448 1449 /* 1450 * hermon_mcg_setup_new_hdr() 1451 * Context: Can be called from interrupt or base context. 1452 */ 1453 static void 1454 hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg, hermon_hw_mcg_t *mcg_hdr, 1455 ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc) 1456 { 1457 /* 1458 * Fill in the fields of the "shadow" entry used by software 1459 * to track MCG hardware entry 1460 */ 1461 mcg->mcg_mgid_h = mgid.gid_prefix; 1462 mcg->mcg_mgid_l = mgid.gid_guid; 1463 mcg->mcg_rsrcp = mcg_rsrc; 1464 mcg->mcg_next_indx = 0; 1465 mcg->mcg_num_qps = 0; 1466 1467 /* 1468 * Fill the header fields of the MCG entry (in the temporary copy) 1469 */ 1470 mcg_hdr->mgid_h = mgid.gid_prefix; 1471 mcg_hdr->mgid_l = mgid.gid_guid; 1472 mcg_hdr->next_gid_indx = 0; 1473 } 1474 1475 1476 /* 1477 * hermon_mcg_hash_list_remove() 1478 * Context: Can be called only from user or kernel context. 1479 */ 1480 static int 1481 hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx, 1482 uint_t prev_indx, hermon_hw_mcg_t *mcg_entry) 1483 { 1484 hermon_mcghdl_t curr_mcg, prev_mcg, next_mcg; 1485 uint_t next_indx; 1486 int status; 1487 1488 /* Get the pointer to "shadow" list for current entry */ 1489 curr_mcg = &state->hs_mcghdl[curr_indx]; 1490 1491 /* 1492 * If this is the first entry on a hash chain, then attempt to replace 1493 * the entry with the next entry on the chain. If there are no 1494 * subsequent entries on the chain, then this is the only entry and 1495 * should be invalidated. 1496 */ 1497 if (curr_indx == prev_indx) { 1498 1499 /* 1500 * If this is the only entry on the chain, then invalidate it. 1501 * Note: Invalidating an MCG entry means writing all zeros 1502 * to the entry. This is only necessary for those MCG 1503 * entries that are the "head" entries of the individual hash 1504 * chains. Regardless of whether this operation returns 1505 * success or failure, return that result to the caller. 1506 */ 1507 next_indx = curr_mcg->mcg_next_indx; 1508 if (next_indx == 0) { 1509 status = hermon_mcg_entry_invalidate(state, mcg_entry, 1510 curr_indx); 1511 bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1512 return (status); 1513 } 1514 1515 /* 1516 * Otherwise, this is just the first entry on the chain, so 1517 * grab the next one 1518 */ 1519 next_mcg = &state->hs_mcghdl[next_indx]; 1520 1521 /* 1522 * Read the next MCG entry into the temporary MCG. Note: 1523 * In general, this operation shouldn't fail. If it does, 1524 * then it is an indication that something (probably in HW, 1525 * but maybe in SW) has gone seriously wrong. 1526 */ 1527 status = hermon_read_mgm_cmd_post(state, mcg_entry, next_indx, 1528 HERMON_CMD_NOSLEEP_SPIN); 1529 if (status != HERMON_CMD_SUCCESS) { 1530 HERMON_WARNING(state, "failed to read MCG entry"); 1531 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: " 1532 "%08x\n", status); 1533 if (status == HERMON_CMD_INVALID_STATUS) { 1534 hermon_fm_ereport(state, HCA_SYS_ERR, 1535 HCA_ERR_SRV_LOST); 1536 } 1537 return (ibc_get_ci_failure(0)); 1538 } 1539 1540 /* 1541 * Copy/Write the temporary MCG back to the hardware MCG list 1542 * using the current index. This essentially removes the 1543 * current MCG entry from the list by writing over it with 1544 * the next one. If this is successful, then we can do the 1545 * same operation for the "shadow" list. And we can also 1546 * free up the Hermon MCG entry resource that was associated 1547 * with the (old) next entry. Note: In general, this 1548 * operation shouldn't fail. If it does, then it is an 1549 * indication that something (probably in HW, but maybe in SW) 1550 * has gone seriously wrong. 1551 */ 1552 status = hermon_write_mgm_cmd_post(state, mcg_entry, curr_indx, 1553 HERMON_CMD_NOSLEEP_SPIN); 1554 if (status != HERMON_CMD_SUCCESS) { 1555 HERMON_WARNING(state, "failed to write MCG entry"); 1556 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 1557 "%08x\n", status); 1558 if (status == HERMON_CMD_INVALID_STATUS) { 1559 hermon_fm_ereport(state, HCA_SYS_ERR, 1560 HCA_ERR_SRV_LOST); 1561 } 1562 return (ibc_get_ci_failure(0)); 1563 } 1564 1565 /* 1566 * Copy all the software tracking information from the next 1567 * entry on the "shadow" MCG list into the current entry on 1568 * the list. Then invalidate (zero out) the other "shadow" 1569 * list entry. 1570 */ 1571 bcopy(next_mcg, curr_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1572 bzero(next_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1573 1574 /* 1575 * Free up the Hermon MCG entry resource used by the "next" 1576 * MCG entry. That resource is no longer needed by any 1577 * MCG entry which is first on a hash chain (like the "next" 1578 * entry has just become). 1579 */ 1580 hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp); 1581 1582 return (DDI_SUCCESS); 1583 } 1584 1585 /* 1586 * Else if this is the last entry on the hash chain (or a middle 1587 * entry, then we update the previous entry's "next_gid_index" field 1588 * to make it point instead to the next entry on the chain. By 1589 * skipping over the removed entry in this way, we can then free up 1590 * any resources associated with the current entry. Note: We don't 1591 * need to invalidate the "skipped over" hardware entry because it 1592 * will no be longer connected to any hash chains, and if/when it is 1593 * finally re-used, it will be written with entirely new values. 1594 */ 1595 1596 /* 1597 * Read the next MCG entry into the temporary MCG. Note: In general, 1598 * this operation shouldn't fail. If it does, then it is an 1599 * indication that something (probably in HW, but maybe in SW) has 1600 * gone seriously wrong. 1601 */ 1602 status = hermon_read_mgm_cmd_post(state, mcg_entry, prev_indx, 1603 HERMON_CMD_NOSLEEP_SPIN); 1604 if (status != HERMON_CMD_SUCCESS) { 1605 HERMON_WARNING(state, "failed to read MCG entry"); 1606 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n", 1607 status); 1608 if (status == HERMON_CMD_INVALID_STATUS) { 1609 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1610 } 1611 return (ibc_get_ci_failure(0)); 1612 } 1613 1614 /* 1615 * Finally, we update the "next_gid_indx" field in the temporary MCG 1616 * and attempt to write the entry back into the Hermon MCG table. If 1617 * this succeeds, then we update the "shadow" list to reflect the 1618 * change, free up the Hermon MCG entry resource that was associated 1619 * with the current entry, and return success. Note: In general, 1620 * this operation shouldn't fail. If it does, then it is an indication 1621 * that something (probably in HW, but maybe in SW) has gone seriously 1622 * wrong. 1623 */ 1624 mcg_entry->next_gid_indx = curr_mcg->mcg_next_indx; 1625 status = hermon_write_mgm_cmd_post(state, mcg_entry, prev_indx, 1626 HERMON_CMD_NOSLEEP_SPIN); 1627 if (status != HERMON_CMD_SUCCESS) { 1628 HERMON_WARNING(state, "failed to write MCG entry"); 1629 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1630 status); 1631 if (status == HERMON_CMD_INVALID_STATUS) { 1632 hermon_fm_ereport(state, HCA_SYS_ERR, 1633 HCA_ERR_SRV_LOST); 1634 } 1635 return (ibc_get_ci_failure(0)); 1636 } 1637 1638 /* 1639 * Get the pointer to the "shadow" MCG list entry for the previous 1640 * MCG. Update its "mcg_next_indx" to point to the next entry 1641 * the one after the current entry. Note: This next index may be 1642 * zero, indicating the end of the list. 1643 */ 1644 prev_mcg = &state->hs_mcghdl[prev_indx]; 1645 prev_mcg->mcg_next_indx = curr_mcg->mcg_next_indx; 1646 1647 /* 1648 * Free up the Hermon MCG entry resource used by the current entry. 1649 * This resource is no longer needed because the chain now skips over 1650 * the current entry. Then invalidate (zero out) the current "shadow" 1651 * list entry. 1652 */ 1653 hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp); 1654 bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1655 1656 return (DDI_SUCCESS); 1657 } 1658 1659 1660 /* 1661 * hermon_mcg_entry_invalidate() 1662 * Context: Can be called only from user or kernel context. 1663 */ 1664 static int 1665 hermon_mcg_entry_invalidate(hermon_state_t *state, hermon_hw_mcg_t *mcg_entry, 1666 uint_t indx) 1667 { 1668 int status; 1669 1670 /* 1671 * Invalidate the hardware MCG entry by zeroing out this temporary 1672 * MCG and writing it the the hardware. Note: In general, this 1673 * operation shouldn't fail. If it does, then it is an indication 1674 * that something (probably in HW, but maybe in SW) has gone seriously 1675 * wrong. 1676 */ 1677 bzero(mcg_entry, HERMON_MCGMEM_SZ(state)); 1678 status = hermon_write_mgm_cmd_post(state, mcg_entry, indx, 1679 HERMON_CMD_NOSLEEP_SPIN); 1680 if (status != HERMON_CMD_SUCCESS) { 1681 HERMON_WARNING(state, "failed to write MCG entry"); 1682 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1683 status); 1684 if (status == HERMON_CMD_INVALID_STATUS) { 1685 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1686 } 1687 return (ibc_get_ci_failure(0)); 1688 } 1689 1690 return (DDI_SUCCESS); 1691 } 1692 1693 1694 /* 1695 * hermon_mgid_is_valid() 1696 * Context: Can be called from interrupt or base context. 1697 */ 1698 static int 1699 hermon_mgid_is_valid(ib_gid_t gid) 1700 { 1701 uint_t topbits, flags, scope; 1702 1703 /* 1704 * According to IBA 1.1 specification (section 4.1.1) a valid 1705 * "multicast GID" must have its top eight bits set to all ones 1706 */ 1707 topbits = (gid.gid_prefix >> HERMON_MCG_TOPBITS_SHIFT) & 1708 HERMON_MCG_TOPBITS_MASK; 1709 if (topbits != HERMON_MCG_TOPBITS) { 1710 return (0); 1711 } 1712 1713 /* 1714 * The next 4 bits are the "flag" bits. These are valid only 1715 * if they are "0" (which correspond to permanently assigned/ 1716 * "well-known" multicast GIDs) or "1" (for so-called "transient" 1717 * multicast GIDs). All other values are reserved. 1718 */ 1719 flags = (gid.gid_prefix >> HERMON_MCG_FLAGS_SHIFT) & 1720 HERMON_MCG_FLAGS_MASK; 1721 if (!((flags == HERMON_MCG_FLAGS_PERM) || 1722 (flags == HERMON_MCG_FLAGS_NONPERM))) { 1723 return (0); 1724 } 1725 1726 /* 1727 * The next 4 bits are the "scope" bits. These are valid only 1728 * if they are "2" (Link-local), "5" (Site-local), "8" 1729 * (Organization-local) or "E" (Global). All other values 1730 * are reserved (or currently unassigned). 1731 */ 1732 scope = (gid.gid_prefix >> HERMON_MCG_SCOPE_SHIFT) & 1733 HERMON_MCG_SCOPE_MASK; 1734 if (!((scope == HERMON_MCG_SCOPE_LINKLOC) || 1735 (scope == HERMON_MCG_SCOPE_SITELOC) || 1736 (scope == HERMON_MCG_SCOPE_ORGLOC) || 1737 (scope == HERMON_MCG_SCOPE_GLOBAL))) { 1738 return (0); 1739 } 1740 1741 /* 1742 * If it passes all of the above checks, then we will consider it 1743 * a valid multicast GID. 1744 */ 1745 return (1); 1746 } 1747 1748 1749 /* 1750 * hermon_mlid_is_valid() 1751 * Context: Can be called from interrupt or base context. 1752 */ 1753 static int 1754 hermon_mlid_is_valid(ib_lid_t lid) 1755 { 1756 /* 1757 * According to IBA 1.1 specification (section 4.1.1) a valid 1758 * "multicast DLID" must be between 0xC000 and 0xFFFE. 1759 */ 1760 if ((lid < IB_LID_MC_FIRST) || (lid > IB_LID_MC_LAST)) { 1761 return (0); 1762 } 1763 1764 return (1); 1765 } 1766 1767 1768 /* 1769 * hermon_pd_alloc() 1770 * Context: Can be called only from user or kernel context. 1771 */ 1772 int 1773 hermon_pd_alloc(hermon_state_t *state, hermon_pdhdl_t *pdhdl, uint_t sleepflag) 1774 { 1775 hermon_rsrc_t *rsrc; 1776 hermon_pdhdl_t pd; 1777 int status; 1778 1779 /* 1780 * Allocate the software structure for tracking the protection domain 1781 * (i.e. the Hermon Protection Domain handle). By default each PD 1782 * structure will have a unique PD number assigned to it. All that 1783 * is necessary is for software to initialize the PD reference count 1784 * (to zero) and return success. 1785 */ 1786 status = hermon_rsrc_alloc(state, HERMON_PDHDL, 1, sleepflag, &rsrc); 1787 if (status != DDI_SUCCESS) { 1788 return (IBT_INSUFF_RESOURCE); 1789 } 1790 pd = (hermon_pdhdl_t)rsrc->hr_addr; 1791 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd)) 1792 1793 pd->pd_refcnt = 0; 1794 *pdhdl = pd; 1795 1796 return (DDI_SUCCESS); 1797 } 1798 1799 1800 /* 1801 * hermon_pd_free() 1802 * Context: Can be called only from user or kernel context. 1803 */ 1804 int 1805 hermon_pd_free(hermon_state_t *state, hermon_pdhdl_t *pdhdl) 1806 { 1807 hermon_rsrc_t *rsrc; 1808 hermon_pdhdl_t pd; 1809 1810 /* 1811 * Pull all the necessary information from the Hermon Protection Domain 1812 * handle. This is necessary here because the resource for the 1813 * PD is going to be freed up as part of this operation. 1814 */ 1815 pd = *pdhdl; 1816 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd)) 1817 rsrc = pd->pd_rsrcp; 1818 1819 /* 1820 * Check the PD reference count. If the reference count is non-zero, 1821 * then it means that this protection domain is still referenced by 1822 * some memory region, queue pair, address handle, or other IB object 1823 * If it is non-zero, then return an error. Otherwise, free the 1824 * Hermon resource and return success. 1825 */ 1826 if (pd->pd_refcnt != 0) { 1827 return (IBT_PD_IN_USE); 1828 } 1829 1830 /* Free the Hermon Protection Domain handle */ 1831 hermon_rsrc_free(state, &rsrc); 1832 1833 /* Set the pdhdl pointer to NULL and return success */ 1834 *pdhdl = (hermon_pdhdl_t)NULL; 1835 1836 return (DDI_SUCCESS); 1837 } 1838 1839 1840 /* 1841 * hermon_pd_refcnt_inc() 1842 * Context: Can be called from interrupt or base context. 1843 */ 1844 void 1845 hermon_pd_refcnt_inc(hermon_pdhdl_t pd) 1846 { 1847 /* Increment the protection domain's reference count */ 1848 atomic_inc_32(&pd->pd_refcnt); 1849 } 1850 1851 1852 /* 1853 * hermon_pd_refcnt_dec() 1854 * Context: Can be called from interrupt or base context. 1855 */ 1856 void 1857 hermon_pd_refcnt_dec(hermon_pdhdl_t pd) 1858 { 1859 /* Decrement the protection domain's reference count */ 1860 atomic_dec_32(&pd->pd_refcnt); 1861 } 1862 1863 1864 /* 1865 * hermon_port_query() 1866 * Context: Can be called only from user or kernel context. 1867 */ 1868 int 1869 hermon_port_query(hermon_state_t *state, uint_t port, ibt_hca_portinfo_t *pi) 1870 { 1871 sm_portinfo_t portinfo; 1872 sm_guidinfo_t guidinfo; 1873 sm_pkey_table_t pkeytable; 1874 ib_gid_t *sgid; 1875 uint_t sgid_max, pkey_max, tbl_size; 1876 int i, j, indx, status; 1877 ib_pkey_t *pkeyp; 1878 ib_guid_t *guidp; 1879 1880 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pi)) 1881 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*state)) 1882 1883 /* Validate that specified port number is legal */ 1884 if (!hermon_portnum_is_valid(state, port)) { 1885 return (IBT_HCA_PORT_INVALID); 1886 } 1887 pkeyp = state->hs_pkey[port - 1]; 1888 guidp = state->hs_guid[port - 1]; 1889 1890 /* 1891 * We use the Hermon MAD_IFC command to post a GetPortInfo MAD 1892 * to the firmware (for the specified port number). This returns 1893 * a full PortInfo MAD (in "portinfo") which we subsequently 1894 * parse to fill in the "ibt_hca_portinfo_t" structure returned 1895 * to the IBTF. 1896 */ 1897 status = hermon_getportinfo_cmd_post(state, port, 1898 HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo); 1899 if (status != HERMON_CMD_SUCCESS) { 1900 cmn_err(CE_CONT, "Hermon: GetPortInfo (port %02d) command " 1901 "failed: %08x\n", port, status); 1902 if (status == HERMON_CMD_INVALID_STATUS) { 1903 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1904 } 1905 return (ibc_get_ci_failure(0)); 1906 } 1907 1908 /* 1909 * Parse the PortInfo MAD and fill in the IBTF structure 1910 */ 1911 pi->p_base_lid = portinfo.LID; 1912 pi->p_qkey_violations = portinfo.Q_KeyViolations; 1913 pi->p_pkey_violations = portinfo.P_KeyViolations; 1914 pi->p_sm_sl = portinfo.MasterSMSL; 1915 pi->p_sm_lid = portinfo.MasterSMLID; 1916 pi->p_linkstate = portinfo.PortState; 1917 pi->p_port_num = portinfo.LocalPortNum; 1918 pi->p_phys_state = portinfo.PortPhysicalState; 1919 pi->p_width_supported = portinfo.LinkWidthSupported; 1920 pi->p_width_enabled = portinfo.LinkWidthEnabled; 1921 pi->p_width_active = portinfo.LinkWidthActive; 1922 pi->p_speed_supported = portinfo.LinkSpeedSupported; 1923 pi->p_speed_enabled = portinfo.LinkSpeedEnabled; 1924 pi->p_speed_active = portinfo.LinkSpeedActive; 1925 pi->p_mtu = portinfo.MTUCap; 1926 pi->p_lmc = portinfo.LMC; 1927 pi->p_max_vl = portinfo.VLCap; 1928 pi->p_subnet_timeout = portinfo.SubnetTimeOut; 1929 pi->p_msg_sz = ((uint32_t)1 << HERMON_QP_LOG_MAX_MSGSZ); 1930 tbl_size = state->hs_cfg_profile->cp_log_max_gidtbl; 1931 pi->p_sgid_tbl_sz = (1 << tbl_size); 1932 tbl_size = state->hs_cfg_profile->cp_log_max_pkeytbl; 1933 pi->p_pkey_tbl_sz = (1 << tbl_size); 1934 state->hs_sn_prefix[port - 1] = portinfo.GidPrefix; 1935 1936 /* 1937 * Convert InfiniBand-defined port capability flags to the format 1938 * specified by the IBTF 1939 */ 1940 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM) 1941 pi->p_capabilities |= IBT_PORT_CAP_SM; 1942 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM_DISABLED) 1943 pi->p_capabilities |= IBT_PORT_CAP_SM_DISABLED; 1944 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SNMP_SUPPD) 1945 pi->p_capabilities |= IBT_PORT_CAP_SNMP_TUNNEL; 1946 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_DM_SUPPD) 1947 pi->p_capabilities |= IBT_PORT_CAP_DM; 1948 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_VM_SUPPD) 1949 pi->p_capabilities |= IBT_PORT_CAP_VENDOR; 1950 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_CLNT_REREG_SUPPD) 1951 pi->p_capabilities |= IBT_PORT_CAP_CLNT_REREG; 1952 1953 /* 1954 * Fill in the SGID table. Since the only access to the Hermon 1955 * GID tables is through the firmware's MAD_IFC interface, we 1956 * post as many GetGUIDInfo MADs as necessary to read in the entire 1957 * contents of the SGID table (for the specified port). Note: The 1958 * GetGUIDInfo command only gets eight GUIDs per operation. These 1959 * GUIDs are then appended to the GID prefix for the port (from the 1960 * GetPortInfo above) to form the entire SGID table. 1961 */ 1962 for (i = 0; i < pi->p_sgid_tbl_sz; i += 8) { 1963 status = hermon_getguidinfo_cmd_post(state, port, i >> 3, 1964 HERMON_SLEEPFLAG_FOR_CONTEXT(), &guidinfo); 1965 if (status != HERMON_CMD_SUCCESS) { 1966 cmn_err(CE_CONT, "Hermon: GetGUIDInfo (port %02d) " 1967 "command failed: %08x\n", port, status); 1968 if (status == HERMON_CMD_INVALID_STATUS) { 1969 hermon_fm_ereport(state, HCA_SYS_ERR, 1970 HCA_ERR_SRV_LOST); 1971 } 1972 return (ibc_get_ci_failure(0)); 1973 } 1974 1975 /* Figure out how many of the entries are valid */ 1976 sgid_max = min((pi->p_sgid_tbl_sz - i), 8); 1977 for (j = 0; j < sgid_max; j++) { 1978 indx = (i + j); 1979 sgid = &pi->p_sgid_tbl[indx]; 1980 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sgid)) 1981 sgid->gid_prefix = portinfo.GidPrefix; 1982 guidp[indx] = sgid->gid_guid = 1983 guidinfo.GUIDBlocks[j]; 1984 } 1985 } 1986 1987 /* 1988 * Fill in the PKey table. Just as for the GID tables above, the 1989 * only access to the Hermon PKey tables is through the firmware's 1990 * MAD_IFC interface. We post as many GetPKeyTable MADs as necessary 1991 * to read in the entire contents of the PKey table (for the specified 1992 * port). Note: The GetPKeyTable command only gets 32 PKeys per 1993 * operation. 1994 */ 1995 for (i = 0; i < pi->p_pkey_tbl_sz; i += 32) { 1996 status = hermon_getpkeytable_cmd_post(state, port, i, 1997 HERMON_SLEEPFLAG_FOR_CONTEXT(), &pkeytable); 1998 if (status != HERMON_CMD_SUCCESS) { 1999 cmn_err(CE_CONT, "Hermon: GetPKeyTable (port %02d) " 2000 "command failed: %08x\n", port, status); 2001 if (status == HERMON_CMD_INVALID_STATUS) { 2002 hermon_fm_ereport(state, HCA_SYS_ERR, 2003 HCA_ERR_SRV_LOST); 2004 } 2005 return (ibc_get_ci_failure(0)); 2006 } 2007 2008 /* Figure out how many of the entries are valid */ 2009 pkey_max = min((pi->p_pkey_tbl_sz - i), 32); 2010 for (j = 0; j < pkey_max; j++) { 2011 indx = (i + j); 2012 pkeyp[indx] = pi->p_pkey_tbl[indx] = 2013 pkeytable.P_KeyTableBlocks[j]; 2014 } 2015 } 2016 2017 return (DDI_SUCCESS); 2018 } 2019 2020 2021 /* 2022 * hermon_port_modify() 2023 * Context: Can be called only from user or kernel context. 2024 */ 2025 /* ARGSUSED */ 2026 int 2027 hermon_port_modify(hermon_state_t *state, uint8_t port, 2028 ibt_port_modify_flags_t flags, uint8_t init_type) 2029 { 2030 sm_portinfo_t portinfo; 2031 uint32_t capmask; 2032 int status; 2033 hermon_hw_set_port_t set_port; 2034 2035 /* 2036 * Return an error if either of the unsupported flags are set 2037 */ 2038 if ((flags & IBT_PORT_SHUTDOWN) || 2039 (flags & IBT_PORT_SET_INIT_TYPE)) { 2040 return (IBT_NOT_SUPPORTED); 2041 } 2042 2043 bzero(&set_port, sizeof (set_port)); 2044 2045 /* 2046 * Determine whether we are trying to reset the QKey counter 2047 */ 2048 if (flags & IBT_PORT_RESET_QKEY) 2049 set_port.rqk = 1; 2050 2051 /* Validate that specified port number is legal */ 2052 if (!hermon_portnum_is_valid(state, port)) { 2053 return (IBT_HCA_PORT_INVALID); 2054 } 2055 2056 /* 2057 * Use the Hermon MAD_IFC command to post a GetPortInfo MAD to the 2058 * firmware (for the specified port number). This returns a full 2059 * PortInfo MAD (in "portinfo") from which we pull the current 2060 * capability mask. We then modify the capability mask as directed 2061 * by the "pmod_flags" field, and write the updated capability mask 2062 * using the Hermon SET_IB command (below). 2063 */ 2064 status = hermon_getportinfo_cmd_post(state, port, 2065 HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo); 2066 if (status != HERMON_CMD_SUCCESS) { 2067 if (status == HERMON_CMD_INVALID_STATUS) { 2068 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 2069 } 2070 return (ibc_get_ci_failure(0)); 2071 } 2072 2073 /* 2074 * Convert InfiniBand-defined port capability flags to the format 2075 * specified by the IBTF. Specifically, we modify the capability 2076 * mask based on the specified values. 2077 */ 2078 capmask = portinfo.CapabilityMask; 2079 2080 if (flags & IBT_PORT_RESET_SM) 2081 capmask &= ~SM_CAP_MASK_IS_SM; 2082 else if (flags & IBT_PORT_SET_SM) 2083 capmask |= SM_CAP_MASK_IS_SM; 2084 2085 if (flags & IBT_PORT_RESET_SNMP) 2086 capmask &= ~SM_CAP_MASK_IS_SNMP_SUPPD; 2087 else if (flags & IBT_PORT_SET_SNMP) 2088 capmask |= SM_CAP_MASK_IS_SNMP_SUPPD; 2089 2090 if (flags & IBT_PORT_RESET_DEVMGT) 2091 capmask &= ~SM_CAP_MASK_IS_DM_SUPPD; 2092 else if (flags & IBT_PORT_SET_DEVMGT) 2093 capmask |= SM_CAP_MASK_IS_DM_SUPPD; 2094 2095 if (flags & IBT_PORT_RESET_VENDOR) 2096 capmask &= ~SM_CAP_MASK_IS_VM_SUPPD; 2097 else if (flags & IBT_PORT_SET_VENDOR) 2098 capmask |= SM_CAP_MASK_IS_VM_SUPPD; 2099 2100 set_port.cap_mask = capmask; 2101 2102 /* 2103 * Use the Hermon SET_PORT command to update the capability mask and 2104 * (possibly) reset the QKey violation counter for the specified port. 2105 * Note: In general, this operation shouldn't fail. If it does, then 2106 * it is an indication that something (probably in HW, but maybe in 2107 * SW) has gone seriously wrong. 2108 */ 2109 status = hermon_set_port_cmd_post(state, &set_port, port, 2110 HERMON_SLEEPFLAG_FOR_CONTEXT()); 2111 if (status != HERMON_CMD_SUCCESS) { 2112 HERMON_WARNING(state, "failed to modify port capabilities"); 2113 cmn_err(CE_CONT, "Hermon: SET_IB (port %02d) command failed: " 2114 "%08x\n", port, status); 2115 if (status == HERMON_CMD_INVALID_STATUS) { 2116 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 2117 } 2118 return (ibc_get_ci_failure(0)); 2119 } 2120 2121 return (DDI_SUCCESS); 2122 } 2123 2124 2125 /* 2126 * hermon_set_addr_path() 2127 * Context: Can be called from interrupt or base context. 2128 * 2129 * Note: This routine is used for two purposes. It is used to fill in the 2130 * Hermon UDAV fields, and it is used to fill in the address path information 2131 * for QPs. Because the two Hermon structures are similar, common fields can 2132 * be filled in here. Because they are different, however, we pass 2133 * an additional flag to indicate which type is being filled and do each one 2134 * uniquely 2135 */ 2136 2137 int hermon_srate_override = -1; /* allows ease of testing */ 2138 2139 int 2140 hermon_set_addr_path(hermon_state_t *state, ibt_adds_vect_t *av, 2141 hermon_hw_addr_path_t *path, uint_t type) 2142 { 2143 uint_t gidtbl_sz; 2144 hermon_hw_udav_t *udav; 2145 2146 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av)) 2147 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path)) 2148 2149 udav = (hermon_hw_udav_t *)(void *)path; 2150 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav)) 2151 path->mlid = av->av_src_path; 2152 path->rlid = av->av_dlid; 2153 2154 switch (av->av_srate) { 2155 case IBT_SRATE_2: /* 1xSDR-2.5Gb/s injection rate */ 2156 path->max_stat_rate = 7; break; 2157 case IBT_SRATE_10: /* 4xSDR-10.0Gb/s injection rate */ 2158 path->max_stat_rate = 8; break; 2159 case IBT_SRATE_30: /* 12xSDR-30Gb/s injection rate */ 2160 path->max_stat_rate = 9; break; 2161 case IBT_SRATE_5: /* 1xDDR-5Gb/s injection rate */ 2162 path->max_stat_rate = 10; break; 2163 case IBT_SRATE_20: /* 4xDDR-20Gb/s injection rate */ 2164 path->max_stat_rate = 11; break; 2165 case IBT_SRATE_40: /* 4xQDR-40Gb/s injection rate */ 2166 path->max_stat_rate = 12; break; 2167 case IBT_SRATE_60: /* 12xDDR-60Gb/s injection rate */ 2168 path->max_stat_rate = 13; break; 2169 case IBT_SRATE_80: /* 8xQDR-80Gb/s injection rate */ 2170 path->max_stat_rate = 14; break; 2171 case IBT_SRATE_120: /* 12xQDR-120Gb/s injection rate */ 2172 path->max_stat_rate = 15; break; 2173 case IBT_SRATE_NOT_SPECIFIED: /* Max */ 2174 path->max_stat_rate = 0; break; 2175 default: 2176 return (IBT_STATIC_RATE_INVALID); 2177 } 2178 if (hermon_srate_override != -1) /* for evaluating HCA firmware */ 2179 path->max_stat_rate = hermon_srate_override; 2180 2181 /* If "grh" flag is set, then check for valid SGID index too */ 2182 gidtbl_sz = (1 << state->hs_queryport.log_max_gid); 2183 if ((av->av_send_grh) && (av->av_sgid_ix > gidtbl_sz)) { 2184 return (IBT_SGID_INVALID); 2185 } 2186 2187 /* 2188 * Fill in all "global" values regardless of the value in the GRH 2189 * flag. Because "grh" is not set unless "av_send_grh" is set, the 2190 * hardware will ignore the other "global" values as necessary. Note: 2191 * SW does this here to enable later query operations to return 2192 * exactly the same params that were passed when the addr path was 2193 * last written. 2194 */ 2195 path->grh = av->av_send_grh; 2196 if (type == HERMON_ADDRPATH_QP) { 2197 path->mgid_index = av->av_sgid_ix; 2198 } else { 2199 /* 2200 * For Hermon UDAV, the "mgid_index" field is the index into 2201 * a combined table (not a per-port table), but having sections 2202 * for each port. So some extra calculations are necessary. 2203 */ 2204 2205 path->mgid_index = ((av->av_port_num - 1) * gidtbl_sz) + 2206 av->av_sgid_ix; 2207 2208 udav->portnum = av->av_port_num; 2209 } 2210 2211 /* 2212 * According to Hermon PRM, the (31:0) part of rgid_l must be set to 2213 * "0x2" if the 'grh' or 'g' bit is cleared. It also says that we 2214 * only need to do it for UDAV's. So we enforce that here. 2215 * 2216 * NOTE: The entire 64 bits worth of GUID info is actually being 2217 * preserved (for UDAVs) by the callers of this function 2218 * (hermon_ah_alloc() and hermon_ah_modify()) and as long as the 2219 * 'grh' bit is not set, the upper 32 bits (63:32) of rgid_l are 2220 * "don't care". 2221 */ 2222 if ((path->grh) || (type == HERMON_ADDRPATH_QP)) { 2223 path->flow_label = av->av_flow; 2224 path->tclass = av->av_tclass; 2225 path->hop_limit = av->av_hop; 2226 bcopy(&(av->av_dgid.gid_prefix), &(path->rgid_h), 2227 sizeof (uint64_t)); 2228 bcopy(&(av->av_dgid.gid_guid), &(path->rgid_l), 2229 sizeof (uint64_t)); 2230 } else { 2231 path->rgid_l = 0x2; 2232 path->flow_label = 0; 2233 path->tclass = 0; 2234 path->hop_limit = 0; 2235 path->rgid_h = 0; 2236 } 2237 /* extract the default service level */ 2238 udav->sl = (HERMON_DEF_SCHED_SELECTION & 0x3C) >> 2; 2239 2240 return (DDI_SUCCESS); 2241 } 2242 2243 2244 /* 2245 * hermon_get_addr_path() 2246 * Context: Can be called from interrupt or base context. 2247 * 2248 * Note: Just like hermon_set_addr_path() above, this routine is used for two 2249 * purposes. It is used to read in the Hermon UDAV fields, and it is used to 2250 * read in the address path information for QPs. Because the two Hermon 2251 * structures are similar, common fields can be read in here. But because 2252 * they are slightly different, we pass an additional flag to indicate which 2253 * type is being read. 2254 */ 2255 void 2256 hermon_get_addr_path(hermon_state_t *state, hermon_hw_addr_path_t *path, 2257 ibt_adds_vect_t *av, uint_t type) 2258 { 2259 uint_t gidtbl_sz; 2260 2261 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path)) 2262 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av)) 2263 2264 av->av_src_path = path->mlid; 2265 av->av_dlid = path->rlid; 2266 2267 /* Set "av_ipd" value from max_stat_rate */ 2268 switch (path->max_stat_rate) { 2269 case 7: /* 1xSDR-2.5Gb/s injection rate */ 2270 av->av_srate = IBT_SRATE_2; break; 2271 case 8: /* 4xSDR-10.0Gb/s injection rate */ 2272 av->av_srate = IBT_SRATE_10; break; 2273 case 9: /* 12xSDR-30Gb/s injection rate */ 2274 av->av_srate = IBT_SRATE_30; break; 2275 case 10: /* 1xDDR-5Gb/s injection rate */ 2276 av->av_srate = IBT_SRATE_5; break; 2277 case 11: /* 4xDDR-20Gb/s injection rate */ 2278 av->av_srate = IBT_SRATE_20; break; 2279 case 12: /* xQDR-40Gb/s injection rate */ 2280 av->av_srate = IBT_SRATE_40; break; 2281 case 13: /* 12xDDR-60Gb/s injection rate */ 2282 av->av_srate = IBT_SRATE_60; break; 2283 case 14: /* 8xQDR-80Gb/s injection rate */ 2284 av->av_srate = IBT_SRATE_80; break; 2285 case 15: /* 12xQDR-120Gb/s injection rate */ 2286 av->av_srate = IBT_SRATE_120; break; 2287 case 0: /* max */ 2288 av->av_srate = IBT_SRATE_NOT_SPECIFIED; break; 2289 default: /* 1x injection rate */ 2290 av->av_srate = IBT_SRATE_1X; 2291 } 2292 2293 /* 2294 * Extract all "global" values regardless of the value in the GRH 2295 * flag. Because "av_send_grh" is set only if "grh" is set, software 2296 * knows to ignore the other "global" values as necessary. Note: SW 2297 * does it this way to enable these query operations to return exactly 2298 * the same params that were passed when the addr path was last written. 2299 */ 2300 av->av_send_grh = path->grh; 2301 if (type == HERMON_ADDRPATH_QP) { 2302 av->av_sgid_ix = path->mgid_index; 2303 } else { 2304 /* 2305 * For Hermon UDAV, the "mgid_index" field is the index into 2306 * a combined table (not a per-port table). 2307 */ 2308 gidtbl_sz = (1 << state->hs_queryport.log_max_gid); 2309 av->av_sgid_ix = path->mgid_index - ((av->av_port_num - 1) * 2310 gidtbl_sz); 2311 2312 av->av_port_num = ((hermon_hw_udav_t *)(void *)path)->portnum; 2313 } 2314 av->av_flow = path->flow_label; 2315 av->av_tclass = path->tclass; 2316 av->av_hop = path->hop_limit; 2317 /* this is for alignment issue w/ the addr path struct in Hermon */ 2318 bcopy(&(path->rgid_h), &(av->av_dgid.gid_prefix), sizeof (uint64_t)); 2319 bcopy(&(path->rgid_l), &(av->av_dgid.gid_guid), sizeof (uint64_t)); 2320 } 2321 2322 2323 /* 2324 * hermon_portnum_is_valid() 2325 * Context: Can be called from interrupt or base context. 2326 */ 2327 int 2328 hermon_portnum_is_valid(hermon_state_t *state, uint_t portnum) 2329 { 2330 uint_t max_port; 2331 2332 max_port = state->hs_cfg_profile->cp_num_ports; 2333 if ((portnum <= max_port) && (portnum != 0)) { 2334 return (1); 2335 } else { 2336 return (0); 2337 } 2338 } 2339 2340 2341 /* 2342 * hermon_pkeyindex_is_valid() 2343 * Context: Can be called from interrupt or base context. 2344 */ 2345 int 2346 hermon_pkeyindex_is_valid(hermon_state_t *state, uint_t pkeyindx) 2347 { 2348 uint_t max_pkeyindx; 2349 2350 max_pkeyindx = 1 << state->hs_cfg_profile->cp_log_max_pkeytbl; 2351 if (pkeyindx < max_pkeyindx) { 2352 return (1); 2353 } else { 2354 return (0); 2355 } 2356 } 2357 2358 2359 /* 2360 * hermon_queue_alloc() 2361 * Context: Can be called from interrupt or base context. 2362 */ 2363 int 2364 hermon_queue_alloc(hermon_state_t *state, hermon_qalloc_info_t *qa_info, 2365 uint_t sleepflag) 2366 { 2367 ddi_dma_attr_t dma_attr; 2368 int (*callback)(caddr_t); 2369 uint64_t realsize, alloc_mask; 2370 uint_t type; 2371 int flag, status; 2372 2373 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info)) 2374 2375 /* Set the callback flag appropriately */ 2376 callback = (sleepflag == HERMON_SLEEP) ? DDI_DMA_SLEEP : 2377 DDI_DMA_DONTWAIT; 2378 2379 /* 2380 * Initialize many of the default DMA attributes. Then set additional 2381 * alignment restrictions as necessary for the queue memory. Also 2382 * respect the configured value for IOMMU bypass 2383 */ 2384 hermon_dma_attr_init(state, &dma_attr); 2385 dma_attr.dma_attr_align = qa_info->qa_bind_align; 2386 type = state->hs_cfg_profile->cp_iommu_bypass; 2387 if (type == HERMON_BINDMEM_BYPASS) { 2388 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 2389 } 2390 2391 /* Allocate a DMA handle */ 2392 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, callback, NULL, 2393 &qa_info->qa_dmahdl); 2394 if (status != DDI_SUCCESS) { 2395 return (DDI_FAILURE); 2396 } 2397 2398 /* 2399 * Determine the amount of memory to allocate, depending on the values 2400 * in "qa_bind_align" and "qa_alloc_align". The problem we are trying 2401 * to solve here is that allocating a DMA handle with IOMMU bypass 2402 * (DDI_DMA_FORCE_PHYSICAL) constrains us to only requesting alignments 2403 * that are less restrictive than the page size. Since we may need 2404 * stricter alignments on the memory allocated by ddi_dma_mem_alloc() 2405 * (e.g. in Hermon QP work queue memory allocation), we use the 2406 * following method to calculate how much additional memory to request, 2407 * and we enforce our own alignment on the allocated result. 2408 */ 2409 alloc_mask = qa_info->qa_alloc_align - 1; 2410 if (qa_info->qa_bind_align == qa_info->qa_alloc_align) { 2411 realsize = qa_info->qa_size; 2412 } else { 2413 realsize = qa_info->qa_size + alloc_mask; 2414 } 2415 2416 /* 2417 * If we are to allocate the queue from system memory, then use 2418 * ddi_dma_mem_alloc() to find the space. Otherwise, this is a 2419 * host memory allocation, use ddi_umem_alloc(). In either case, 2420 * return a pointer to the memory range allocated (including any 2421 * necessary alignment adjustments), the "real" memory pointer, 2422 * the "real" size, and a ddi_acc_handle_t to use when reading 2423 * from/writing to the memory. 2424 */ 2425 if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) { 2426 /* Allocate system memory for the queue */ 2427 status = ddi_dma_mem_alloc(qa_info->qa_dmahdl, realsize, 2428 &state->hs_reg_accattr, DDI_DMA_CONSISTENT, callback, NULL, 2429 (caddr_t *)&qa_info->qa_buf_real, 2430 (size_t *)&qa_info->qa_buf_realsz, &qa_info->qa_acchdl); 2431 if (status != DDI_SUCCESS) { 2432 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2433 return (DDI_FAILURE); 2434 } 2435 2436 /* 2437 * Save temporary copy of the real pointer. (This may be 2438 * modified in the last step below). 2439 */ 2440 qa_info->qa_buf_aligned = qa_info->qa_buf_real; 2441 2442 bzero(qa_info->qa_buf_real, qa_info->qa_buf_realsz); 2443 2444 } else { /* HERMON_QUEUE_LOCATION_USERLAND */ 2445 2446 /* Allocate userland mappable memory for the queue */ 2447 flag = (sleepflag == HERMON_SLEEP) ? DDI_UMEM_SLEEP : 2448 DDI_UMEM_NOSLEEP; 2449 qa_info->qa_buf_real = ddi_umem_alloc(realsize, flag, 2450 &qa_info->qa_umemcookie); 2451 if (qa_info->qa_buf_real == NULL) { 2452 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2453 return (DDI_FAILURE); 2454 } 2455 2456 /* 2457 * Save temporary copy of the real pointer. (This may be 2458 * modified in the last step below). 2459 */ 2460 qa_info->qa_buf_aligned = qa_info->qa_buf_real; 2461 2462 } 2463 2464 /* 2465 * The next to last step is to ensure that the final address 2466 * ("qa_buf_aligned") has the appropriate "alloc" alignment 2467 * restriction applied to it (if necessary). 2468 */ 2469 if (qa_info->qa_bind_align != qa_info->qa_alloc_align) { 2470 qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t) 2471 qa_info->qa_buf_aligned + alloc_mask) & ~alloc_mask); 2472 } 2473 /* 2474 * The last step is to figure out the offset of the start relative 2475 * to the first page of the region - will be used in the eqc/cqc 2476 * passed to the HW 2477 */ 2478 qa_info->qa_pgoffs = (uint_t)((uintptr_t) 2479 qa_info->qa_buf_aligned & HERMON_PAGEMASK); 2480 2481 return (DDI_SUCCESS); 2482 } 2483 2484 2485 /* 2486 * hermon_queue_free() 2487 * Context: Can be called from interrupt or base context. 2488 */ 2489 void 2490 hermon_queue_free(hermon_qalloc_info_t *qa_info) 2491 { 2492 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info)) 2493 2494 /* 2495 * Depending on how (i.e. from where) we allocated the memory for 2496 * this queue, we choose the appropriate method for releasing the 2497 * resources. 2498 */ 2499 if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) { 2500 2501 ddi_dma_mem_free(&qa_info->qa_acchdl); 2502 2503 } else if (qa_info->qa_location == HERMON_QUEUE_LOCATION_USERLAND) { 2504 2505 ddi_umem_free(qa_info->qa_umemcookie); 2506 2507 } 2508 2509 /* Always free the dma handle */ 2510 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2511 } 2512 2513 /* 2514 * hermon_destroy_fmr_pool() 2515 * Create a pool of FMRs. 2516 * Context: Can be called from kernel context only. 2517 */ 2518 int 2519 hermon_create_fmr_pool(hermon_state_t *state, hermon_pdhdl_t pd, 2520 ibt_fmr_pool_attr_t *fmr_attr, hermon_fmrhdl_t *fmrpoolp) 2521 { 2522 hermon_fmrhdl_t fmrpool; 2523 hermon_fmr_list_t *fmr, *fmr_next; 2524 hermon_mrhdl_t mr; 2525 char taskqname[48]; 2526 int status; 2527 int sleep; 2528 int i; 2529 2530 sleep = (fmr_attr->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP : 2531 HERMON_NOSLEEP; 2532 if ((sleep == HERMON_SLEEP) && 2533 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 2534 return (IBT_INVALID_PARAM); 2535 } 2536 2537 fmrpool = (hermon_fmrhdl_t)kmem_zalloc(sizeof (*fmrpool), sleep); 2538 if (fmrpool == NULL) { 2539 status = IBT_INSUFF_RESOURCE; 2540 goto fail; 2541 } 2542 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmrpool)) 2543 2544 mutex_init(&fmrpool->fmr_lock, NULL, MUTEX_DRIVER, 2545 DDI_INTR_PRI(state->hs_intrmsi_pri)); 2546 2547 fmrpool->fmr_state = state; 2548 fmrpool->fmr_flush_function = fmr_attr->fmr_func_hdlr; 2549 fmrpool->fmr_flush_arg = fmr_attr->fmr_func_arg; 2550 fmrpool->fmr_pool_size = 0; 2551 fmrpool->fmr_cache = 0; 2552 fmrpool->fmr_max_pages = fmr_attr->fmr_max_pages_per_fmr; 2553 fmrpool->fmr_page_sz = fmr_attr->fmr_page_sz; 2554 fmrpool->fmr_dirty_watermark = fmr_attr->fmr_dirty_watermark; 2555 fmrpool->fmr_dirty_len = 0; 2556 fmrpool->fmr_flags = fmr_attr->fmr_flags; 2557 2558 /* Create taskq to handle cleanup and flush processing */ 2559 (void) snprintf(taskqname, 50, "fmrpool/%d/%d @ 0x%" PRIx64, 2560 fmr_attr->fmr_pool_size, hermon_debug_fmrpool_cnt, 2561 (uint64_t)(uintptr_t)fmrpool); 2562 fmrpool->fmr_taskq = ddi_taskq_create(state->hs_dip, taskqname, 2563 HERMON_TASKQ_NTHREADS, TASKQ_DEFAULTPRI, 0); 2564 if (fmrpool->fmr_taskq == NULL) { 2565 status = IBT_INSUFF_RESOURCE; 2566 goto fail1; 2567 } 2568 2569 fmrpool->fmr_free_list = NULL; 2570 fmrpool->fmr_dirty_list = NULL; 2571 2572 if (fmr_attr->fmr_cache) { 2573 hermon_fmr_cache_init(fmrpool); 2574 } 2575 2576 for (i = 0; i < fmr_attr->fmr_pool_size; i++) { 2577 status = hermon_mr_alloc_fmr(state, pd, fmrpool, &mr); 2578 if (status != DDI_SUCCESS) { 2579 goto fail2; 2580 } 2581 2582 fmr = (hermon_fmr_list_t *)kmem_zalloc( 2583 sizeof (hermon_fmr_list_t), sleep); 2584 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr)) 2585 2586 fmr->fmr = mr; 2587 fmr->fmr_refcnt = 0; 2588 fmr->fmr_remaps = 0; 2589 fmr->fmr_pool = fmrpool; 2590 fmr->fmr_in_cache = 0; 2591 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 2592 mr->mr_fmr = fmr; 2593 2594 fmr->fmr_next = fmrpool->fmr_free_list; 2595 fmrpool->fmr_free_list = fmr; 2596 fmrpool->fmr_pool_size++; 2597 } 2598 2599 /* Set to return pool */ 2600 *fmrpoolp = fmrpool; 2601 2602 return (IBT_SUCCESS); 2603 fail2: 2604 hermon_fmr_cache_fini(fmrpool); 2605 for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) { 2606 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr)) 2607 fmr_next = fmr->fmr_next; 2608 (void) hermon_mr_dealloc_fmr(state, &fmr->fmr); 2609 kmem_free(fmr, sizeof (hermon_fmr_list_t)); 2610 } 2611 ddi_taskq_destroy(fmrpool->fmr_taskq); 2612 fail1: 2613 kmem_free(fmrpool, sizeof (*fmrpool)); 2614 fail: 2615 if (status == DDI_FAILURE) { 2616 return (ibc_get_ci_failure(0)); 2617 } else { 2618 return (status); 2619 } 2620 } 2621 2622 /* 2623 * hermon_destroy_fmr_pool() 2624 * Destroy an FMR pool and free all associated resources. 2625 * Context: Can be called from kernel context only. 2626 */ 2627 int 2628 hermon_destroy_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool) 2629 { 2630 hermon_fmr_list_t *fmr, *fmr_next; 2631 int status; 2632 2633 mutex_enter(&fmrpool->fmr_lock); 2634 status = hermon_fmr_cleanup(state, fmrpool); 2635 if (status != DDI_SUCCESS) { 2636 mutex_exit(&fmrpool->fmr_lock); 2637 return (status); 2638 } 2639 2640 if (fmrpool->fmr_cache) { 2641 hermon_fmr_cache_fini(fmrpool); 2642 } 2643 2644 for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) { 2645 fmr_next = fmr->fmr_next; 2646 2647 (void) hermon_mr_dealloc_fmr(state, &fmr->fmr); 2648 kmem_free(fmr, sizeof (hermon_fmr_list_t)); 2649 } 2650 mutex_exit(&fmrpool->fmr_lock); 2651 2652 ddi_taskq_destroy(fmrpool->fmr_taskq); 2653 mutex_destroy(&fmrpool->fmr_lock); 2654 2655 kmem_free(fmrpool, sizeof (*fmrpool)); 2656 return (DDI_SUCCESS); 2657 } 2658 2659 /* 2660 * hermon_flush_fmr_pool() 2661 * Ensure that all unmapped FMRs are fully invalidated. 2662 * Context: Can be called from kernel context only. 2663 */ 2664 int 2665 hermon_flush_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool) 2666 { 2667 int status; 2668 2669 /* 2670 * Force the unmapping of all entries on the dirty list, regardless of 2671 * whether the watermark has been hit yet. 2672 */ 2673 /* grab the pool lock */ 2674 mutex_enter(&fmrpool->fmr_lock); 2675 status = hermon_fmr_cleanup(state, fmrpool); 2676 mutex_exit(&fmrpool->fmr_lock); 2677 return (status); 2678 } 2679 2680 /* 2681 * hermon_deregister_fmr() 2682 * Map memory into FMR 2683 * Context: Can be called from interrupt or base context. 2684 */ 2685 int 2686 hermon_register_physical_fmr(hermon_state_t *state, hermon_fmrhdl_t fmrpool, 2687 ibt_pmr_attr_t *mem_pattr, hermon_mrhdl_t *mr, 2688 ibt_pmr_desc_t *mem_desc_p) 2689 { 2690 hermon_fmr_list_t *fmr; 2691 hermon_fmr_list_t query; 2692 avl_index_t where; 2693 int status; 2694 2695 /* Check length */ 2696 mutex_enter(&fmrpool->fmr_lock); 2697 if (mem_pattr->pmr_len < 1 || (mem_pattr->pmr_num_buf > 2698 fmrpool->fmr_max_pages)) { 2699 mutex_exit(&fmrpool->fmr_lock); 2700 return (IBT_MR_LEN_INVALID); 2701 } 2702 2703 mutex_enter(&fmrpool->fmr_cachelock); 2704 /* lookup in fmr cache */ 2705 /* if exists, grab it, and return it */ 2706 if (fmrpool->fmr_cache) { 2707 query.fmr_desc.pmd_iova = mem_pattr->pmr_iova; 2708 query.fmr_desc.pmd_phys_buf_list_sz = mem_pattr->pmr_len; 2709 fmr = (hermon_fmr_list_t *)avl_find(&fmrpool->fmr_cache_avl, 2710 &query, &where); 2711 2712 /* 2713 * If valid FMR was found in cache, return that fmr info 2714 */ 2715 if (fmr != NULL) { 2716 fmr->fmr_refcnt++; 2717 /* Store pmr desc for use in cache */ 2718 (void) memcpy(mem_desc_p, &fmr->fmr_desc, 2719 sizeof (ibt_pmr_desc_t)); 2720 *mr = (hermon_mrhdl_t)fmr->fmr; 2721 mutex_exit(&fmrpool->fmr_cachelock); 2722 mutex_exit(&fmrpool->fmr_lock); 2723 return (DDI_SUCCESS); 2724 } 2725 } 2726 2727 /* FMR does not exist in cache, proceed with registration */ 2728 2729 /* grab next free entry */ 2730 fmr = fmrpool->fmr_free_list; 2731 if (fmr == NULL) { 2732 mutex_exit(&fmrpool->fmr_cachelock); 2733 mutex_exit(&fmrpool->fmr_lock); 2734 return (IBT_INSUFF_RESOURCE); 2735 } 2736 2737 fmrpool->fmr_free_list = fmrpool->fmr_free_list->fmr_next; 2738 fmr->fmr_next = NULL; 2739 2740 status = hermon_mr_register_physical_fmr(state, mem_pattr, fmr->fmr, 2741 mem_desc_p); 2742 if (status != DDI_SUCCESS) { 2743 mutex_exit(&fmrpool->fmr_cachelock); 2744 mutex_exit(&fmrpool->fmr_lock); 2745 return (status); 2746 } 2747 2748 fmr->fmr_refcnt = 1; 2749 fmr->fmr_remaps++; 2750 2751 /* Store pmr desc for use in cache */ 2752 (void) memcpy(&fmr->fmr_desc, mem_desc_p, sizeof (ibt_pmr_desc_t)); 2753 *mr = (hermon_mrhdl_t)fmr->fmr; 2754 2755 /* Store in cache */ 2756 if (fmrpool->fmr_cache) { 2757 if (!fmr->fmr_in_cache) { 2758 avl_insert(&fmrpool->fmr_cache_avl, fmr, where); 2759 fmr->fmr_in_cache = 1; 2760 } 2761 } 2762 2763 mutex_exit(&fmrpool->fmr_cachelock); 2764 mutex_exit(&fmrpool->fmr_lock); 2765 return (DDI_SUCCESS); 2766 } 2767 2768 /* 2769 * hermon_deregister_fmr() 2770 * Unmap FMR 2771 * Context: Can be called from kernel context only. 2772 */ 2773 int 2774 hermon_deregister_fmr(hermon_state_t *state, hermon_mrhdl_t mr) 2775 { 2776 hermon_fmr_list_t *fmr; 2777 hermon_fmrhdl_t fmrpool; 2778 int status; 2779 2780 fmr = mr->mr_fmr; 2781 fmrpool = fmr->fmr_pool; 2782 2783 /* Grab pool lock */ 2784 mutex_enter(&fmrpool->fmr_lock); 2785 fmr->fmr_refcnt--; 2786 2787 if (fmr->fmr_refcnt == 0) { 2788 /* 2789 * First, do some bit of invalidation, reducing our exposure to 2790 * having this region still registered in hardware. 2791 */ 2792 (void) hermon_mr_invalidate_fmr(state, mr); 2793 2794 /* 2795 * If we've exhausted our remaps then add the FMR to the dirty 2796 * list, not allowing it to be re-used until we have done a 2797 * flush. Otherwise, simply add it back to the free list for 2798 * re-mapping. 2799 */ 2800 if (fmr->fmr_remaps < 2801 state->hs_cfg_profile->cp_fmr_max_remaps) { 2802 /* add to free list */ 2803 fmr->fmr_next = fmrpool->fmr_free_list; 2804 fmrpool->fmr_free_list = fmr; 2805 } else { 2806 /* add to dirty list */ 2807 fmr->fmr_next = fmrpool->fmr_dirty_list; 2808 fmrpool->fmr_dirty_list = fmr; 2809 fmrpool->fmr_dirty_len++; 2810 2811 status = ddi_taskq_dispatch(fmrpool->fmr_taskq, 2812 hermon_fmr_processing, fmrpool, DDI_NOSLEEP); 2813 if (status == DDI_FAILURE) { 2814 mutex_exit(&fmrpool->fmr_lock); 2815 return (IBT_INSUFF_RESOURCE); 2816 } 2817 } 2818 } 2819 /* Release pool lock */ 2820 mutex_exit(&fmrpool->fmr_lock); 2821 2822 return (DDI_SUCCESS); 2823 } 2824 2825 2826 /* 2827 * hermon_fmr_processing() 2828 * If required, perform cleanup. 2829 * Context: Called from taskq context only. 2830 */ 2831 static void 2832 hermon_fmr_processing(void *fmr_args) 2833 { 2834 hermon_fmrhdl_t fmrpool; 2835 int status; 2836 2837 ASSERT(fmr_args != NULL); 2838 2839 fmrpool = (hermon_fmrhdl_t)fmr_args; 2840 2841 /* grab pool lock */ 2842 mutex_enter(&fmrpool->fmr_lock); 2843 if (fmrpool->fmr_dirty_len >= fmrpool->fmr_dirty_watermark) { 2844 status = hermon_fmr_cleanup(fmrpool->fmr_state, fmrpool); 2845 if (status != DDI_SUCCESS) { 2846 mutex_exit(&fmrpool->fmr_lock); 2847 return; 2848 } 2849 2850 if (fmrpool->fmr_flush_function != NULL) { 2851 (void) fmrpool->fmr_flush_function( 2852 (ibc_fmr_pool_hdl_t)fmrpool, 2853 fmrpool->fmr_flush_arg); 2854 } 2855 } 2856 2857 /* let pool lock go */ 2858 mutex_exit(&fmrpool->fmr_lock); 2859 } 2860 2861 /* 2862 * hermon_fmr_cleanup() 2863 * Perform cleaning processing, walking the list and performing the MTT sync 2864 * operation if required. 2865 * Context: can be called from taskq or base context. 2866 */ 2867 static int 2868 hermon_fmr_cleanup(hermon_state_t *state, hermon_fmrhdl_t fmrpool) 2869 { 2870 hermon_fmr_list_t *fmr; 2871 hermon_fmr_list_t *fmr_next; 2872 int sync_needed; 2873 int status; 2874 2875 ASSERT(MUTEX_HELD(&fmrpool->fmr_lock)); 2876 2877 sync_needed = 0; 2878 for (fmr = fmrpool->fmr_dirty_list; fmr; fmr = fmr_next) { 2879 fmr_next = fmr->fmr_next; 2880 fmr->fmr_remaps = 0; 2881 2882 (void) hermon_mr_deregister_fmr(state, fmr->fmr); 2883 2884 /* 2885 * Update lists. 2886 * - add fmr back to free list 2887 * - remove fmr from dirty list 2888 */ 2889 fmr->fmr_next = fmrpool->fmr_free_list; 2890 fmrpool->fmr_free_list = fmr; 2891 2892 2893 /* 2894 * Because we have updated the dirty list, and deregistered the 2895 * FMR entry, we do need to sync the TPT, so we set the 2896 * 'sync_needed' flag here so we sync once we finish dirty_list 2897 * processing. 2898 */ 2899 sync_needed = 1; 2900 } 2901 2902 fmrpool->fmr_dirty_list = NULL; 2903 fmrpool->fmr_dirty_len = 0; 2904 2905 if (sync_needed) { 2906 status = hermon_sync_tpt_cmd_post(state, 2907 HERMON_CMD_NOSLEEP_SPIN); 2908 if (status != HERMON_CMD_SUCCESS) { 2909 return (status); 2910 } 2911 } 2912 2913 return (DDI_SUCCESS); 2914 } 2915 2916 /* 2917 * hermon_fmr_avl_compare() 2918 * Context: Can be called from user or kernel context. 2919 */ 2920 static int 2921 hermon_fmr_avl_compare(const void *q, const void *e) 2922 { 2923 hermon_fmr_list_t *entry, *query; 2924 2925 entry = (hermon_fmr_list_t *)e; 2926 query = (hermon_fmr_list_t *)q; 2927 2928 if (query->fmr_desc.pmd_iova < entry->fmr_desc.pmd_iova) { 2929 return (-1); 2930 } else if (query->fmr_desc.pmd_iova > entry->fmr_desc.pmd_iova) { 2931 return (+1); 2932 } else { 2933 return (0); 2934 } 2935 } 2936 2937 2938 /* 2939 * hermon_fmr_cache_init() 2940 * Context: Can be called from user or kernel context. 2941 */ 2942 static void 2943 hermon_fmr_cache_init(hermon_fmrhdl_t fmr) 2944 { 2945 /* Initialize the lock used for FMR cache AVL tree access */ 2946 mutex_init(&fmr->fmr_cachelock, NULL, MUTEX_DRIVER, 2947 DDI_INTR_PRI(fmr->fmr_state->hs_intrmsi_pri)); 2948 2949 /* Initialize the AVL tree for the FMR cache */ 2950 avl_create(&fmr->fmr_cache_avl, hermon_fmr_avl_compare, 2951 sizeof (hermon_fmr_list_t), 2952 offsetof(hermon_fmr_list_t, fmr_avlnode)); 2953 2954 fmr->fmr_cache = 1; 2955 } 2956 2957 2958 /* 2959 * hermon_fmr_cache_fini() 2960 * Context: Can be called from user or kernel context. 2961 */ 2962 static void 2963 hermon_fmr_cache_fini(hermon_fmrhdl_t fmr) 2964 { 2965 void *cookie; 2966 2967 /* 2968 * Empty all entries (if necessary) and destroy the AVL tree. 2969 * The FMRs themselves are freed as part of destroy_pool() 2970 */ 2971 cookie = NULL; 2972 while (((void *)(hermon_fmr_list_t *)avl_destroy_nodes( 2973 &fmr->fmr_cache_avl, &cookie)) != NULL) { 2974 /* loop through */ 2975 } 2976 avl_destroy(&fmr->fmr_cache_avl); 2977 2978 /* Destroy the lock used for FMR cache */ 2979 mutex_destroy(&fmr->fmr_cachelock); 2980 } 2981 2982 /* 2983 * hermon_get_dma_cookies() 2984 * Return DMA cookies in the pre-allocated paddr_list_p based on the length 2985 * needed. 2986 * Context: Can be called from interrupt or base context. 2987 */ 2988 int 2989 hermon_get_dma_cookies(hermon_state_t *state, ibt_phys_buf_t *paddr_list_p, 2990 ibt_va_attr_t *va_attrs, uint_t list_len, uint_t *cookiecnt, 2991 ibc_ma_hdl_t *ibc_ma_hdl_p) 2992 { 2993 ddi_dma_handle_t dma_hdl; 2994 ddi_dma_attr_t dma_attr; 2995 ddi_dma_cookie_t dmacookie; 2996 int (*callback)(caddr_t); 2997 int status; 2998 int i; 2999 3000 /* Set the callback flag appropriately */ 3001 callback = (va_attrs->va_flags & IBT_VA_NOSLEEP) ? DDI_DMA_DONTWAIT : 3002 DDI_DMA_SLEEP; 3003 if ((callback == DDI_DMA_SLEEP) && 3004 (HERMON_SLEEP != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 3005 return (IBT_INVALID_PARAM); 3006 } 3007 3008 /* 3009 * Initialize many of the default DMA attributes and allocate the DMA 3010 * handle. Then, if we're bypassing the IOMMU, set the 3011 * DDI_DMA_FORCE_PHYSICAL flag. 3012 */ 3013 hermon_dma_attr_init(state, &dma_attr); 3014 3015 #ifdef __x86 3016 /* 3017 * On x86 we can specify a maximum segment length for our returned 3018 * cookies. 3019 */ 3020 if (va_attrs->va_flags & IBT_VA_FMR) { 3021 dma_attr.dma_attr_seg = PAGESIZE - 1; 3022 } 3023 #endif 3024 3025 /* 3026 * Check to see if the RO flag is set, and if so, 3027 * set that bit in the attr structure as well. 3028 * 3029 * NOTE 1: This function is ONLY called by consumers, and only for 3030 * data buffers 3031 */ 3032 if (hermon_kernel_data_ro == HERMON_RO_ENABLED) { 3033 dma_attr.dma_attr_flags |= DDI_DMA_RELAXED_ORDERING; 3034 } 3035 3036 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 3037 callback, NULL, &dma_hdl); 3038 if (status != DDI_SUCCESS) { 3039 switch (status) { 3040 case DDI_DMA_NORESOURCES: 3041 return (IBT_INSUFF_RESOURCE); 3042 case DDI_DMA_BADATTR: 3043 default: 3044 return (ibc_get_ci_failure(0)); 3045 } 3046 } 3047 3048 /* 3049 * Now bind the handle with the correct DMA attributes. 3050 */ 3051 if (va_attrs->va_flags & IBT_VA_BUF) { 3052 status = ddi_dma_buf_bind_handle(dma_hdl, va_attrs->va_buf, 3053 DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT, 3054 NULL, &dmacookie, cookiecnt); 3055 } else { 3056 status = ddi_dma_addr_bind_handle(dma_hdl, NULL, 3057 (caddr_t)(uintptr_t)va_attrs->va_vaddr, va_attrs->va_len, 3058 DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT, 3059 NULL, &dmacookie, cookiecnt); 3060 } 3061 if (status != DDI_SUCCESS) { 3062 ddi_dma_free_handle(&dma_hdl); 3063 3064 switch (status) { 3065 case DDI_DMA_NORESOURCES: 3066 return (IBT_INSUFF_RESOURCE); 3067 case DDI_DMA_TOOBIG: 3068 return (IBT_INVALID_PARAM); 3069 case DDI_DMA_PARTIAL_MAP: 3070 case DDI_DMA_INUSE: 3071 case DDI_DMA_NOMAPPING: 3072 default: 3073 return (ibc_get_ci_failure(0)); 3074 } 3075 } 3076 3077 /* 3078 * Verify our physical buffer list (PBL) is large enough to handle the 3079 * number of cookies that were returned. 3080 */ 3081 if (*cookiecnt > list_len) { 3082 (void) ddi_dma_unbind_handle(dma_hdl); 3083 ddi_dma_free_handle(&dma_hdl); 3084 return (IBT_PBL_TOO_SMALL); 3085 } 3086 3087 /* 3088 * We store the cookies returned by the DDI into our own PBL. This 3089 * sets the cookies up for later processing (for example, if we want to 3090 * split up the cookies into smaller chunks). We use the laddr and 3091 * size fields in each cookie to create each individual entry (PBE). 3092 */ 3093 3094 /* 3095 * Store first cookie info first 3096 */ 3097 paddr_list_p[0].p_laddr = dmacookie.dmac_laddress; 3098 paddr_list_p[0].p_size = dmacookie.dmac_size; 3099 3100 /* 3101 * Loop through each cookie, storing each cookie into our physical 3102 * buffer list. 3103 */ 3104 for (i = 1; i < *cookiecnt; i++) { 3105 ddi_dma_nextcookie(dma_hdl, &dmacookie); 3106 3107 paddr_list_p[i].p_laddr = dmacookie.dmac_laddress; 3108 paddr_list_p[i].p_size = dmacookie.dmac_size; 3109 } 3110 3111 /* return handle */ 3112 *ibc_ma_hdl_p = (ibc_ma_hdl_t)dma_hdl; 3113 return (DDI_SUCCESS); 3114 } 3115 3116 /* 3117 * hermon_split_dma_cookies() 3118 * Split up cookies passed in from paddr_list_p, returning the new list in the 3119 * same buffers, based on the pagesize to split the cookies into. 3120 * Context: Can be called from interrupt or base context. 3121 */ 3122 /* ARGSUSED */ 3123 int 3124 hermon_split_dma_cookies(hermon_state_t *state, ibt_phys_buf_t *paddr_list, 3125 ib_memlen_t *paddr_offset, uint_t list_len, uint_t *cookiecnt, 3126 uint_t pagesize) 3127 { 3128 uint64_t pageoffset; 3129 uint64_t pagemask; 3130 uint_t pageshift; 3131 uint_t current_cookiecnt; 3132 uint_t cookies_needed; 3133 uint64_t last_size, extra_cookie; 3134 int i_increment; 3135 int i, k; 3136 int status; 3137 3138 /* Setup pagesize calculations */ 3139 pageoffset = pagesize - 1; 3140 pagemask = (~pageoffset); 3141 pageshift = highbit(pagesize) - 1; 3142 3143 /* 3144 * Setup first cookie offset based on pagesize requested. 3145 */ 3146 *paddr_offset = paddr_list[0].p_laddr & pageoffset; 3147 paddr_list[0].p_laddr &= pagemask; 3148 3149 /* Save away the current number of cookies that are passed in */ 3150 current_cookiecnt = *cookiecnt; 3151 3152 /* Perform splitting up of current cookies into pagesize blocks */ 3153 for (i = 0; i < current_cookiecnt; i += i_increment) { 3154 /* 3155 * If the cookie is smaller than pagesize, or already is 3156 * pagesize, then we are already within our limits, so we skip 3157 * it. 3158 */ 3159 if (paddr_list[i].p_size <= pagesize) { 3160 i_increment = 1; 3161 continue; 3162 } 3163 3164 /* 3165 * If this is our first cookie, then we have to deal with the 3166 * offset that may be present in the first address. So add 3167 * that to our size, to calculate potential change to the last 3168 * cookie's size. 3169 * 3170 * Also, calculate the number of cookies that we'll need to 3171 * split up this block into. 3172 */ 3173 if (i == 0) { 3174 last_size = (paddr_list[i].p_size + *paddr_offset) & 3175 pageoffset; 3176 cookies_needed = (paddr_list[i].p_size + 3177 *paddr_offset) >> pageshift; 3178 } else { 3179 last_size = 0; 3180 cookies_needed = paddr_list[i].p_size >> pageshift; 3181 } 3182 3183 /* 3184 * If our size is not a multiple of pagesize, we need one more 3185 * cookie. 3186 */ 3187 if (last_size) { 3188 extra_cookie = 1; 3189 } else { 3190 extra_cookie = 0; 3191 } 3192 3193 /* 3194 * Split cookie into pagesize chunks, shifting list of cookies 3195 * down, using more cookie slots in the PBL if necessary. 3196 */ 3197 status = hermon_dma_cookie_shift(paddr_list, i, list_len, 3198 current_cookiecnt - i, cookies_needed + extra_cookie); 3199 if (status != 0) { 3200 return (status); 3201 } 3202 3203 /* 3204 * If the very first cookie, we must take possible offset into 3205 * account. 3206 */ 3207 if (i == 0) { 3208 paddr_list[i].p_size = pagesize - *paddr_offset; 3209 } else { 3210 paddr_list[i].p_size = pagesize; 3211 } 3212 3213 /* 3214 * We have shifted the existing cookies down the PBL, now fill 3215 * in the blank entries by splitting up our current block. 3216 */ 3217 for (k = 1; k < cookies_needed; k++) { 3218 paddr_list[i + k].p_laddr = 3219 paddr_list[i + k - 1].p_laddr + pagesize; 3220 paddr_list[i + k].p_size = pagesize; 3221 } 3222 3223 /* If we have one extra cookie (of less than pagesize...) */ 3224 if (extra_cookie) { 3225 paddr_list[i + k].p_laddr = 3226 paddr_list[i + k - 1].p_laddr + pagesize; 3227 paddr_list[i + k].p_size = (size_t)last_size; 3228 } 3229 3230 /* Increment cookiecnt appropriately based on cookies used */ 3231 i_increment = cookies_needed + extra_cookie; 3232 current_cookiecnt += i_increment - 1; 3233 } 3234 3235 /* Update to new cookie count */ 3236 *cookiecnt = current_cookiecnt; 3237 return (DDI_SUCCESS); 3238 } 3239 3240 /* 3241 * hermon_dma_cookie_shift() 3242 * Context: Can be called from interrupt or base context. 3243 */ 3244 int 3245 hermon_dma_cookie_shift(ibt_phys_buf_t *paddr_list, int start, int end, 3246 int cookiecnt, int num_shift) 3247 { 3248 int shift_start; 3249 int i; 3250 3251 /* Calculating starting point in the PBL list */ 3252 shift_start = start + cookiecnt - 1; 3253 3254 /* Check if we're at the end of our PBL list */ 3255 if ((shift_start + num_shift - 1) >= end) { 3256 return (IBT_PBL_TOO_SMALL); 3257 } 3258 3259 for (i = shift_start; i > start; i--) { 3260 paddr_list[i + num_shift - 1] = paddr_list[i]; 3261 } 3262 3263 return (DDI_SUCCESS); 3264 } 3265 3266 3267 /* 3268 * hermon_free_dma_cookies() 3269 * Context: Can be called from interrupt or base context. 3270 */ 3271 int 3272 hermon_free_dma_cookies(ibc_ma_hdl_t ma_hdl) 3273 { 3274 ddi_dma_handle_t dma_hdl; 3275 int status; 3276 3277 dma_hdl = (ddi_dma_handle_t)ma_hdl; 3278 3279 status = ddi_dma_unbind_handle(dma_hdl); 3280 if (status != DDI_SUCCESS) { 3281 return (ibc_get_ci_failure(0)); 3282 } 3283 ddi_dma_free_handle(&dma_hdl); 3284 3285 return (DDI_SUCCESS); 3286 } 3287