1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * hermon_misc.c 28 * Hermon Miscellaneous routines - Address Handle, Multicast, Protection 29 * Domain, and port-related operations 30 * 31 * Implements all the routines necessary for allocating, freeing, querying 32 * and modifying Address Handles and Protection Domains. Also implements 33 * all the routines necessary for adding and removing Queue Pairs to/from 34 * Multicast Groups. Lastly, it implements the routines necessary for 35 * port-related query and modify operations. 36 */ 37 38 #include <sys/types.h> 39 #include <sys/conf.h> 40 #include <sys/ddi.h> 41 #include <sys/sunddi.h> 42 #include <sys/modctl.h> 43 #include <sys/bitmap.h> 44 #include <sys/sysmacros.h> 45 46 #include <sys/ib/adapters/hermon/hermon.h> 47 48 extern int hermon_rdma_debug; 49 int hermon_fmr_verbose = 0; 50 51 static int hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg, 52 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp, uint_t *qp_found); 53 static int hermon_mcg_qplist_remove(hermon_mcghdl_t mcg, 54 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp); 55 static void hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp); 56 static void hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp); 57 static uint_t hermon_mcg_walk_mgid_hash(hermon_state_t *state, 58 uint64_t start_indx, ib_gid_t mgid, uint_t *prev_indx); 59 static void hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg, 60 hermon_hw_mcg_t *mcg_hdr, ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc); 61 static int hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx, 62 uint_t prev_indx, hermon_hw_mcg_t *mcg_entry); 63 static int hermon_mcg_entry_invalidate(hermon_state_t *state, 64 hermon_hw_mcg_t *mcg_entry, uint_t indx); 65 static int hermon_mgid_is_valid(ib_gid_t gid); 66 static int hermon_mlid_is_valid(ib_lid_t lid); 67 static void hermon_fmr_cleanup(hermon_fmrhdl_t pool); 68 69 70 #define HERMON_MAX_DBR_PAGES_PER_USER 64 71 #define HERMON_DBR_KEY(index, page) \ 72 (((uint64_t)index) * HERMON_MAX_DBR_PAGES_PER_USER + (page)) 73 74 static hermon_udbr_page_t * 75 hermon_dbr_new_user_page(hermon_state_t *state, uint_t index, 76 uint_t page) 77 { 78 hermon_udbr_page_t *pagep; 79 ddi_dma_attr_t dma_attr; 80 uint_t cookiecnt; 81 int status; 82 hermon_umap_db_entry_t *umapdb; 83 ulong_t pagesize = PAGESIZE; 84 85 pagep = kmem_alloc(sizeof (*pagep), KM_SLEEP); 86 pagep->upg_index = page; 87 pagep->upg_nfree = pagesize / sizeof (hermon_dbr_t); 88 89 /* Allocate 1 bit per dbr for free/alloc management (0 => "free") */ 90 pagep->upg_free = kmem_zalloc(pagesize / sizeof (hermon_dbr_t) / 8, 91 KM_SLEEP); 92 pagep->upg_kvaddr = ddi_umem_alloc(pagesize, DDI_UMEM_SLEEP, 93 &pagep->upg_umemcookie); /* not HERMON_PAGESIZE here */ 94 95 pagep->upg_buf = ddi_umem_iosetup(pagep->upg_umemcookie, 0, 96 pagesize, B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP); 97 98 hermon_dma_attr_init(state, &dma_attr); 99 #ifdef __sparc 100 if (state->hs_cfg_profile->cp_iommu_bypass == HERMON_BINDMEM_BYPASS) 101 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 102 #endif 103 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 104 DDI_DMA_SLEEP, NULL, &pagep->upg_dmahdl); 105 if (status != DDI_SUCCESS) { 106 IBTF_DPRINTF_L2("hermon", "hermon_new_user_page: " 107 "ddi_dma_buf_bind_handle failed: %d", status); 108 return (NULL); 109 } 110 status = ddi_dma_buf_bind_handle(pagep->upg_dmahdl, 111 pagep->upg_buf, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 112 DDI_DMA_SLEEP, NULL, &pagep->upg_dmacookie, &cookiecnt); 113 if (status != DDI_SUCCESS) { 114 IBTF_DPRINTF_L2("hermon", "hermon_dbr_new_user_page: " 115 "ddi_dma_buf_bind_handle failed: %d", status); 116 ddi_dma_free_handle(&pagep->upg_dmahdl); 117 return (NULL); 118 } 119 ASSERT(cookiecnt == 1); 120 121 /* create db entry for mmap */ 122 umapdb = hermon_umap_db_alloc(state->hs_instance, 123 HERMON_DBR_KEY(index, page), MLNX_UMAP_DBRMEM_RSRC, 124 (uint64_t)(uintptr_t)pagep); 125 hermon_umap_db_add(umapdb); 126 return (pagep); 127 } 128 129 130 /*ARGSUSED*/ 131 static int 132 hermon_user_dbr_alloc(hermon_state_t *state, uint_t index, 133 ddi_acc_handle_t *acchdl, hermon_dbr_t **vdbr, uint64_t *pdbr, 134 uint64_t *mapoffset) 135 { 136 hermon_user_dbr_t *udbr; 137 hermon_udbr_page_t *pagep; 138 uint_t next_page; 139 int dbr_index; 140 int i1, i2, i3, last; 141 uint64_t u64, mask; 142 143 mutex_enter(&state->hs_dbr_lock); 144 for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link) 145 if (udbr->udbr_index == index) 146 break; 147 if (udbr == NULL) { 148 udbr = kmem_alloc(sizeof (*udbr), KM_SLEEP); 149 udbr->udbr_link = state->hs_user_dbr; 150 state->hs_user_dbr = udbr; 151 udbr->udbr_index = index; 152 udbr->udbr_pagep = NULL; 153 } 154 pagep = udbr->udbr_pagep; 155 next_page = (pagep == NULL) ? 0 : (pagep->upg_index + 1); 156 while (pagep != NULL) 157 if (pagep->upg_nfree > 0) 158 break; 159 else 160 pagep = pagep->upg_link; 161 if (pagep == NULL) { 162 pagep = hermon_dbr_new_user_page(state, index, next_page); 163 if (pagep == NULL) { 164 mutex_exit(&state->hs_dbr_lock); 165 return (DDI_FAILURE); 166 } 167 pagep->upg_link = udbr->udbr_pagep; 168 udbr->udbr_pagep = pagep; 169 } 170 171 /* Since nfree > 0, we're assured the loops below will succeed */ 172 173 /* First, find a 64-bit (not ~0) that has a free dbr */ 174 last = PAGESIZE / sizeof (uint64_t) / 64; 175 mask = ~0ull; 176 for (i1 = 0; i1 < last; i1++) 177 if ((pagep->upg_free[i1] & mask) != mask) 178 break; 179 u64 = pagep->upg_free[i1]; 180 181 /* Second, find a byte (not 0xff) that has a free dbr */ 182 last = sizeof (uint64_t) / sizeof (uint8_t); 183 for (i2 = 0, mask = 0xff; i2 < last; i2++, mask <<= 8) 184 if ((u64 & mask) != mask) 185 break; 186 187 /* Third, find a bit that is free (0) */ 188 for (i3 = 0; i3 < sizeof (uint64_t) / sizeof (uint8_t); i3++) 189 if ((u64 & (1ul << (i3 + 8 * i2))) == 0) 190 break; 191 192 /* Mark it as allocated */ 193 pagep->upg_free[i1] |= (1ul << (i3 + 8 * i2)); 194 195 dbr_index = ((i1 * sizeof (uint64_t)) + i2) * sizeof (uint64_t) + i3; 196 pagep->upg_nfree--; 197 ((uint64_t *)(void *)pagep->upg_kvaddr)[dbr_index] = 0; /* clear dbr */ 198 *mapoffset = ((HERMON_DBR_KEY(index, pagep->upg_index) << 199 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_DBRMEM_RSRC) << PAGESHIFT; 200 *vdbr = (hermon_dbr_t *)((uint64_t *)(void *)pagep->upg_kvaddr + 201 dbr_index); 202 *pdbr = pagep->upg_dmacookie.dmac_laddress + dbr_index * 203 sizeof (uint64_t); 204 205 mutex_exit(&state->hs_dbr_lock); 206 return (DDI_SUCCESS); 207 } 208 209 static void 210 hermon_user_dbr_free(hermon_state_t *state, uint_t index, hermon_dbr_t *record) 211 { 212 hermon_user_dbr_t *udbr; 213 hermon_udbr_page_t *pagep; 214 caddr_t kvaddr; 215 uint_t dbr_index; 216 uint_t max_free = PAGESIZE / sizeof (hermon_dbr_t); 217 int i1, i2; 218 219 dbr_index = (uintptr_t)record & PAGEOFFSET; /* offset (not yet index) */ 220 kvaddr = (caddr_t)record - dbr_index; 221 dbr_index /= sizeof (hermon_dbr_t); /* now it's the index */ 222 223 mutex_enter(&state->hs_dbr_lock); 224 for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link) 225 if (udbr->udbr_index == index) 226 break; 227 if (udbr == NULL) { 228 IBTF_DPRINTF_L2("hermon", "free user dbr: udbr struct not " 229 "found for index %x", index); 230 mutex_exit(&state->hs_dbr_lock); 231 return; 232 } 233 for (pagep = udbr->udbr_pagep; pagep != NULL; pagep = pagep->upg_link) 234 if (pagep->upg_kvaddr == kvaddr) 235 break; 236 if (pagep == NULL) { 237 IBTF_DPRINTF_L2("hermon", "free user dbr: pagep struct not" 238 " found for index %x, kvaddr %p, DBR index %x", 239 index, kvaddr, dbr_index); 240 mutex_exit(&state->hs_dbr_lock); 241 return; 242 } 243 if (pagep->upg_nfree >= max_free) { 244 IBTF_DPRINTF_L2("hermon", "free user dbr: overflow: " 245 "UCE index %x, DBR index %x", index, dbr_index); 246 mutex_exit(&state->hs_dbr_lock); 247 return; 248 } 249 ASSERT(dbr_index < max_free); 250 i1 = dbr_index / 64; 251 i2 = dbr_index % 64; 252 ASSERT((pagep->upg_free[i1] & (1ul << i2)) == (1ul << i2)); 253 pagep->upg_free[i1] &= ~(1ul << i2); 254 pagep->upg_nfree++; 255 mutex_exit(&state->hs_dbr_lock); 256 } 257 258 /* 259 * hermon_dbr_page_alloc() 260 * first page allocation - called from attach or open 261 * in this case, we want exactly one page per call, and aligned on a 262 * page - and may need to be mapped to the user for access 263 */ 264 int 265 hermon_dbr_page_alloc(hermon_state_t *state, hermon_dbr_info_t **dinfo) 266 { 267 int status; 268 ddi_dma_handle_t dma_hdl; 269 ddi_acc_handle_t acc_hdl; 270 ddi_dma_attr_t dma_attr; 271 ddi_dma_cookie_t cookie; 272 uint_t cookie_cnt; 273 int i; 274 hermon_dbr_info_t *info; 275 caddr_t dmaaddr; 276 uint64_t dmalen; 277 ulong_t pagesize = PAGESIZE; 278 279 info = kmem_zalloc(sizeof (hermon_dbr_info_t), KM_SLEEP); 280 281 /* 282 * Initialize many of the default DMA attributes. Then set additional 283 * alignment restrictions if necessary for the dbr memory, meaning 284 * page aligned. Also use the configured value for IOMMU bypass 285 */ 286 hermon_dma_attr_init(state, &dma_attr); 287 dma_attr.dma_attr_align = pagesize; 288 dma_attr.dma_attr_sgllen = 1; /* make sure only one cookie */ 289 #ifdef __sparc 290 if (state->hs_cfg_profile->cp_iommu_bypass == HERMON_BINDMEM_BYPASS) 291 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 292 #endif 293 294 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 295 DDI_DMA_SLEEP, NULL, &dma_hdl); 296 if (status != DDI_SUCCESS) { 297 kmem_free((void *)info, sizeof (hermon_dbr_info_t)); 298 cmn_err(CE_NOTE, "dbr DMA handle alloc failed\n"); 299 return (DDI_FAILURE); 300 } 301 302 status = ddi_dma_mem_alloc(dma_hdl, pagesize, 303 &state->hs_reg_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, 304 NULL, &dmaaddr, (size_t *)&dmalen, &acc_hdl); 305 if (status != DDI_SUCCESS) { 306 ddi_dma_free_handle(&dma_hdl); 307 cmn_err(CE_CONT, "dbr DMA mem alloc failed(status %d)", status); 308 kmem_free((void *)info, sizeof (hermon_dbr_info_t)); 309 return (DDI_FAILURE); 310 } 311 312 /* this memory won't be IB registered, so do the bind here */ 313 status = ddi_dma_addr_bind_handle(dma_hdl, NULL, 314 dmaaddr, (size_t)dmalen, DDI_DMA_RDWR | 315 DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &cookie, &cookie_cnt); 316 if (status != DDI_SUCCESS) { 317 ddi_dma_mem_free(&acc_hdl); 318 ddi_dma_free_handle(&dma_hdl); 319 kmem_free((void *)info, sizeof (hermon_dbr_info_t)); 320 cmn_err(CE_CONT, "dbr DMA bind handle failed (status %d)", 321 status); 322 return (DDI_FAILURE); 323 } 324 *dinfo = info; /* Pass back the pointer */ 325 326 /* init the info structure with returned info */ 327 info->dbr_dmahdl = dma_hdl; 328 info->dbr_acchdl = acc_hdl; 329 info->dbr_page = (hermon_dbr_t *)(void *)dmaaddr; 330 info->dbr_link = NULL; 331 /* extract the phys addr from the cookie */ 332 info->dbr_paddr = cookie.dmac_laddress; 333 info->dbr_firstfree = 0; 334 info->dbr_nfree = HERMON_NUM_DBR_PER_PAGE; 335 /* link all DBrs onto the free list */ 336 for (i = 0; i < HERMON_NUM_DBR_PER_PAGE; i++) { 337 info->dbr_page[i] = i + 1; 338 } 339 340 return (DDI_SUCCESS); 341 } 342 343 344 /* 345 * hermon_dbr_alloc() 346 * DBr record allocation - called from alloc cq/qp/srq 347 * will check for available dbrs in current 348 * page - if needed it will allocate another and link them 349 */ 350 351 int 352 hermon_dbr_alloc(hermon_state_t *state, uint_t index, ddi_acc_handle_t *acchdl, 353 hermon_dbr_t **vdbr, uint64_t *pdbr, uint64_t *mapoffset) 354 { 355 hermon_dbr_t *record = NULL; 356 hermon_dbr_info_t *info = NULL; 357 uint32_t idx; 358 int status; 359 360 if (index != state->hs_kernel_uar_index) 361 return (hermon_user_dbr_alloc(state, index, acchdl, vdbr, pdbr, 362 mapoffset)); 363 364 mutex_enter(&state->hs_dbr_lock); 365 for (info = state->hs_kern_dbr; info != NULL; info = info->dbr_link) 366 if (info->dbr_nfree != 0) 367 break; /* found a page w/ one available */ 368 369 if (info == NULL) { /* did NOT find a page with one available */ 370 status = hermon_dbr_page_alloc(state, &info); 371 if (status != DDI_SUCCESS) { 372 /* do error handling */ 373 mutex_exit(&state->hs_dbr_lock); 374 return (DDI_FAILURE); 375 } 376 /* got a new page, so link it in. */ 377 info->dbr_link = state->hs_kern_dbr; 378 state->hs_kern_dbr = info; 379 } 380 idx = info->dbr_firstfree; 381 record = info->dbr_page + idx; 382 info->dbr_firstfree = *record; 383 info->dbr_nfree--; 384 *record = 0; 385 386 *acchdl = info->dbr_acchdl; 387 *vdbr = record; 388 *pdbr = info->dbr_paddr + idx * sizeof (hermon_dbr_t); 389 mutex_exit(&state->hs_dbr_lock); 390 return (DDI_SUCCESS); 391 } 392 393 /* 394 * hermon_dbr_free() 395 * DBr record deallocation - called from free cq/qp 396 * will update the counter in the header, and invalidate 397 * the dbr, but will NEVER free pages of dbrs - small 398 * price to pay, but userland access never will anyway 399 */ 400 void 401 hermon_dbr_free(hermon_state_t *state, uint_t indx, hermon_dbr_t *record) 402 { 403 hermon_dbr_t *page; 404 hermon_dbr_info_t *info; 405 406 if (indx != state->hs_kernel_uar_index) { 407 hermon_user_dbr_free(state, indx, record); 408 return; 409 } 410 page = (hermon_dbr_t *)(uintptr_t)((uintptr_t)record & PAGEMASK); 411 mutex_enter(&state->hs_dbr_lock); 412 for (info = state->hs_kern_dbr; info != NULL; info = info->dbr_link) 413 if (info->dbr_page == page) 414 break; 415 ASSERT(info != NULL); 416 *record = info->dbr_firstfree; 417 info->dbr_firstfree = record - info->dbr_page; 418 info->dbr_nfree++; 419 mutex_exit(&state->hs_dbr_lock); 420 } 421 422 /* 423 * hermon_dbr_kern_free() 424 * Context: Can be called only from detach context. 425 * 426 * Free all kernel dbr pages. This includes the freeing of all the dma 427 * resources acquired during the allocation of the pages. 428 * 429 * Also, free all the user dbr pages. 430 */ 431 void 432 hermon_dbr_kern_free(hermon_state_t *state) 433 { 434 hermon_dbr_info_t *info, *link; 435 hermon_user_dbr_t *udbr, *next; 436 hermon_udbr_page_t *pagep, *nextp; 437 hermon_umap_db_entry_t *umapdb; 438 int instance, status; 439 uint64_t value; 440 extern hermon_umap_db_t hermon_userland_rsrc_db; 441 442 mutex_enter(&state->hs_dbr_lock); 443 for (info = state->hs_kern_dbr; info != NULL; info = link) { 444 (void) ddi_dma_unbind_handle(info->dbr_dmahdl); 445 ddi_dma_mem_free(&info->dbr_acchdl); /* free page */ 446 ddi_dma_free_handle(&info->dbr_dmahdl); 447 link = info->dbr_link; 448 kmem_free(info, sizeof (hermon_dbr_info_t)); 449 } 450 451 udbr = state->hs_user_dbr; 452 instance = state->hs_instance; 453 mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock); 454 while (udbr != NULL) { 455 pagep = udbr->udbr_pagep; 456 while (pagep != NULL) { 457 /* probably need to remove "db" */ 458 (void) ddi_dma_unbind_handle(pagep->upg_dmahdl); 459 ddi_dma_free_handle(&pagep->upg_dmahdl); 460 freerbuf(pagep->upg_buf); 461 ddi_umem_free(pagep->upg_umemcookie); 462 status = hermon_umap_db_find_nolock(instance, 463 HERMON_DBR_KEY(udbr->udbr_index, 464 pagep->upg_index), MLNX_UMAP_DBRMEM_RSRC, 465 &value, HERMON_UMAP_DB_REMOVE, &umapdb); 466 if (status == DDI_SUCCESS) 467 hermon_umap_db_free(umapdb); 468 kmem_free(pagep->upg_free, 469 PAGESIZE / sizeof (hermon_dbr_t) / 8); 470 nextp = pagep->upg_link; 471 kmem_free(pagep, sizeof (*pagep)); 472 pagep = nextp; 473 } 474 next = udbr->udbr_link; 475 kmem_free(udbr, sizeof (*udbr)); 476 udbr = next; 477 } 478 mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock); 479 mutex_exit(&state->hs_dbr_lock); 480 } 481 482 /* 483 * hermon_ah_alloc() 484 * Context: Can be called only from user or kernel context. 485 */ 486 int 487 hermon_ah_alloc(hermon_state_t *state, hermon_pdhdl_t pd, 488 ibt_adds_vect_t *attr_p, hermon_ahhdl_t *ahhdl, uint_t sleepflag) 489 { 490 hermon_rsrc_t *rsrc; 491 hermon_hw_udav_t *udav; 492 hermon_ahhdl_t ah; 493 int status; 494 495 /* 496 * Someday maybe the "ibt_adds_vect_t *attr_p" will be NULL to 497 * indicate that we wish to allocate an "invalid" (i.e. empty) 498 * address handle XXX 499 */ 500 501 /* Validate that specified port number is legal */ 502 if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) { 503 return (IBT_HCA_PORT_INVALID); 504 } 505 506 /* 507 * Allocate the software structure for tracking the address handle 508 * (i.e. the Hermon Address Handle struct). 509 */ 510 status = hermon_rsrc_alloc(state, HERMON_AHHDL, 1, sleepflag, &rsrc); 511 if (status != DDI_SUCCESS) { 512 return (IBT_INSUFF_RESOURCE); 513 } 514 ah = (hermon_ahhdl_t)rsrc->hr_addr; 515 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah)) 516 517 /* Increment the reference count on the protection domain (PD) */ 518 hermon_pd_refcnt_inc(pd); 519 520 udav = (hermon_hw_udav_t *)kmem_zalloc(sizeof (hermon_hw_udav_t), 521 KM_SLEEP); 522 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav)) 523 524 /* 525 * Fill in the UDAV data. We first zero out the UDAV, then populate 526 * it by then calling hermon_set_addr_path() to fill in the common 527 * portions that can be pulled from the "ibt_adds_vect_t" passed in 528 */ 529 status = hermon_set_addr_path(state, attr_p, 530 (hermon_hw_addr_path_t *)udav, HERMON_ADDRPATH_UDAV); 531 if (status != DDI_SUCCESS) { 532 hermon_pd_refcnt_dec(pd); 533 hermon_rsrc_free(state, &rsrc); 534 return (status); 535 } 536 udav->pd = pd->pd_pdnum; 537 udav->sl = attr_p->av_srvl; 538 539 /* 540 * Fill in the rest of the Hermon Address Handle struct. 541 * 542 * NOTE: We are saving away a copy of the "av_dgid.gid_guid" field 543 * here because we may need to return it later to the IBTF (as a 544 * result of a subsequent query operation). Unlike the other UDAV 545 * parameters, the value of "av_dgid.gid_guid" is not always preserved. 546 * The reason for this is described in hermon_set_addr_path(). 547 */ 548 ah->ah_rsrcp = rsrc; 549 ah->ah_pdhdl = pd; 550 ah->ah_udav = udav; 551 ah->ah_save_guid = attr_p->av_dgid.gid_guid; 552 *ahhdl = ah; 553 554 return (DDI_SUCCESS); 555 } 556 557 558 /* 559 * hermon_ah_free() 560 * Context: Can be called only from user or kernel context. 561 */ 562 /* ARGSUSED */ 563 int 564 hermon_ah_free(hermon_state_t *state, hermon_ahhdl_t *ahhdl, uint_t sleepflag) 565 { 566 hermon_rsrc_t *rsrc; 567 hermon_pdhdl_t pd; 568 hermon_ahhdl_t ah; 569 570 /* 571 * Pull all the necessary information from the Hermon Address Handle 572 * struct. This is necessary here because the resource for the 573 * AH is going to be freed up as part of this operation. 574 */ 575 ah = *ahhdl; 576 mutex_enter(&ah->ah_lock); 577 rsrc = ah->ah_rsrcp; 578 pd = ah->ah_pdhdl; 579 mutex_exit(&ah->ah_lock); 580 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah)) 581 582 /* Free the UDAV memory */ 583 kmem_free(ah->ah_udav, sizeof (hermon_hw_udav_t)); 584 585 /* Decrement the reference count on the protection domain (PD) */ 586 hermon_pd_refcnt_dec(pd); 587 588 /* Free the Hermon Address Handle structure */ 589 hermon_rsrc_free(state, &rsrc); 590 591 /* Set the ahhdl pointer to NULL and return success */ 592 *ahhdl = NULL; 593 594 return (DDI_SUCCESS); 595 } 596 597 598 /* 599 * hermon_ah_query() 600 * Context: Can be called from interrupt or base context. 601 */ 602 /* ARGSUSED */ 603 int 604 hermon_ah_query(hermon_state_t *state, hermon_ahhdl_t ah, hermon_pdhdl_t *pd, 605 ibt_adds_vect_t *attr_p) 606 { 607 mutex_enter(&ah->ah_lock); 608 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p)) 609 610 /* 611 * Pull the PD and UDAV from the Hermon Address Handle structure 612 */ 613 *pd = ah->ah_pdhdl; 614 615 /* 616 * Fill in "ibt_adds_vect_t". We call hermon_get_addr_path() to fill 617 * the common portions that can be pulled from the UDAV we pass in. 618 * 619 * NOTE: We will also fill the "av_dgid.gid_guid" field from the 620 * "ah_save_guid" field we have previously saved away. The reason 621 * for this is described in hermon_ah_alloc() and hermon_ah_modify(). 622 */ 623 hermon_get_addr_path(state, (hermon_hw_addr_path_t *)ah->ah_udav, 624 attr_p, HERMON_ADDRPATH_UDAV); 625 626 attr_p->av_dgid.gid_guid = ah->ah_save_guid; 627 628 mutex_exit(&ah->ah_lock); 629 return (DDI_SUCCESS); 630 } 631 632 633 /* 634 * hermon_ah_modify() 635 * Context: Can be called from interrupt or base context. 636 */ 637 /* ARGSUSED */ 638 int 639 hermon_ah_modify(hermon_state_t *state, hermon_ahhdl_t ah, 640 ibt_adds_vect_t *attr_p) 641 { 642 hermon_hw_udav_t old_udav; 643 uint64_t data_old; 644 int status, size, i; 645 646 /* Validate that specified port number is legal */ 647 if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) { 648 return (IBT_HCA_PORT_INVALID); 649 } 650 651 mutex_enter(&ah->ah_lock); 652 653 /* Save a copy of the current UDAV data in old_udav. */ 654 bcopy(ah->ah_udav, &old_udav, sizeof (hermon_hw_udav_t)); 655 656 /* 657 * Fill in the new UDAV with the caller's data, passed in via the 658 * "ibt_adds_vect_t" structure. 659 * 660 * NOTE: We also need to save away a copy of the "av_dgid.gid_guid" 661 * field here (just as we did during hermon_ah_alloc()) because we 662 * may need to return it later to the IBTF (as a result of a 663 * subsequent query operation). As explained in hermon_ah_alloc(), 664 * unlike the other UDAV parameters, the value of "av_dgid.gid_guid" 665 * is not always preserved. The reason for this is described in 666 * hermon_set_addr_path(). 667 */ 668 status = hermon_set_addr_path(state, attr_p, 669 (hermon_hw_addr_path_t *)ah->ah_udav, HERMON_ADDRPATH_UDAV); 670 if (status != DDI_SUCCESS) { 671 mutex_exit(&ah->ah_lock); 672 return (status); 673 } 674 ah->ah_save_guid = attr_p->av_dgid.gid_guid; 675 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(ah->ah_udav))) 676 ah->ah_udav->sl = attr_p->av_srvl; 677 678 /* 679 * Copy changes into the new UDAV. 680 * Note: We copy in 64-bit chunks. For the first two of these 681 * chunks it is necessary to read the current contents of the 682 * UDAV, mask off the modifiable portions (maintaining any 683 * of the "reserved" portions), and then mask on the new data. 684 */ 685 size = sizeof (hermon_hw_udav_t) >> 3; 686 for (i = 0; i < size; i++) { 687 data_old = ((uint64_t *)&old_udav)[i]; 688 689 /* 690 * Apply mask to change only the relevant values. 691 */ 692 if (i == 0) { 693 data_old = data_old & HERMON_UDAV_MODIFY_MASK0; 694 } else if (i == 1) { 695 data_old = data_old & HERMON_UDAV_MODIFY_MASK1; 696 } else { 697 data_old = 0; 698 } 699 700 /* Store the updated values to the UDAV */ 701 ((uint64_t *)ah->ah_udav)[i] |= data_old; 702 } 703 704 /* 705 * Put the valid PD number back into the UDAV entry, as it 706 * might have been clobbered above. 707 */ 708 ah->ah_udav->pd = old_udav.pd; 709 710 711 mutex_exit(&ah->ah_lock); 712 return (DDI_SUCCESS); 713 } 714 715 /* 716 * hermon_mcg_attach() 717 * Context: Can be called only from user or kernel context. 718 */ 719 int 720 hermon_mcg_attach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid, 721 ib_lid_t lid) 722 { 723 hermon_rsrc_t *rsrc; 724 hermon_hw_mcg_t *mcg_entry; 725 hermon_hw_mcg_qp_list_t *mcg_entry_qplist; 726 hermon_mcghdl_t mcg, newmcg; 727 uint64_t mgid_hash; 728 uint32_t end_indx; 729 int status; 730 uint_t qp_found; 731 732 /* 733 * It is only allowed to attach MCG to UD queue pairs. Verify 734 * that the intended QP is of the appropriate transport type 735 */ 736 if (qp->qp_serv_type != HERMON_QP_UD) { 737 return (IBT_QP_SRV_TYPE_INVALID); 738 } 739 740 /* 741 * Check for invalid Multicast DLID. Specifically, all Multicast 742 * LIDs should be within a well defined range. If the specified LID 743 * is outside of that range, then return an error. 744 */ 745 if (hermon_mlid_is_valid(lid) == 0) { 746 return (IBT_MC_MLID_INVALID); 747 } 748 /* 749 * Check for invalid Multicast GID. All Multicast GIDs should have 750 * a well-defined pattern of bits and flags that are allowable. If 751 * the specified GID does not meet the criteria, then return an error. 752 */ 753 if (hermon_mgid_is_valid(gid) == 0) { 754 return (IBT_MC_MGID_INVALID); 755 } 756 757 /* 758 * Compute the MGID hash value. Since the MCG table is arranged as 759 * a number of separate hash chains, this operation converts the 760 * specified MGID into the starting index of an entry in the hash 761 * table (i.e. the index for the start of the appropriate hash chain). 762 * Subsequent operations below will walk the chain searching for the 763 * right place to add this new QP. 764 */ 765 status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid, 766 &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT()); 767 if (status != HERMON_CMD_SUCCESS) { 768 cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n", 769 status); 770 if (status == HERMON_CMD_INVALID_STATUS) { 771 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 772 } 773 return (ibc_get_ci_failure(0)); 774 } 775 776 /* 777 * Grab the multicast group mutex. Then grab the pre-allocated 778 * temporary buffer used for holding and/or modifying MCG entries. 779 * Zero out the temporary MCG entry before we begin. 780 */ 781 mutex_enter(&state->hs_mcglock); 782 mcg_entry = state->hs_mcgtmp; 783 mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry); 784 bzero(mcg_entry, HERMON_MCGMEM_SZ(state)); 785 786 /* 787 * Walk through the array of MCG entries starting at "mgid_hash". 788 * Try to find the appropriate place for this new QP to be added. 789 * This could happen when the first entry of the chain has MGID == 0 790 * (which means that the hash chain is empty), or because we find 791 * an entry with the same MGID (in which case we'll add the QP to 792 * that MCG), or because we come to the end of the chain (in which 793 * case this is the first QP being added to the multicast group that 794 * corresponds to the MGID. The hermon_mcg_walk_mgid_hash() routine 795 * walks the list and returns an index into the MCG table. The entry 796 * at this index is then checked to determine which case we have 797 * fallen into (see below). Note: We are using the "shadow" MCG 798 * list (of hermon_mcg_t structs) for this lookup because the real 799 * MCG entries are in hardware (and the lookup process would be much 800 * more time consuming). 801 */ 802 end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, NULL); 803 mcg = &state->hs_mcghdl[end_indx]; 804 805 /* 806 * If MGID == 0, then the hash chain is empty. Just fill in the 807 * current entry. Note: No need to allocate an MCG table entry 808 * as all the hash chain "heads" are already preallocated. 809 */ 810 if ((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) { 811 812 /* Fill in the current entry in the "shadow" MCG list */ 813 hermon_mcg_setup_new_hdr(mcg, mcg_entry, gid, NULL); 814 815 /* 816 * Try to add the new QP number to the list. This (and the 817 * above) routine fills in a temporary MCG. The "mcg_entry" 818 * and "mcg_entry_qplist" pointers simply point to different 819 * offsets within the same temporary copy of the MCG (for 820 * convenience). Note: If this fails, we need to invalidate 821 * the entries we've already put into the "shadow" list entry 822 * above. 823 */ 824 status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp, 825 &qp_found); 826 if (status != DDI_SUCCESS) { 827 bzero(mcg, sizeof (struct hermon_sw_mcg_list_s)); 828 mutex_exit(&state->hs_mcglock); 829 return (status); 830 } 831 if (!qp_found) 832 mcg_entry->member_cnt = (mcg->mcg_num_qps + 1); 833 /* set the member count */ 834 835 /* 836 * Once the temporary MCG has been filled in, write the entry 837 * into the appropriate location in the Hermon MCG entry table. 838 * If it's successful, then drop the lock and return success. 839 * Note: In general, this operation shouldn't fail. If it 840 * does, then it is an indication that something (probably in 841 * HW, but maybe in SW) has gone seriously wrong. We still 842 * want to zero out the entries that we've filled in above 843 * (in the hermon_mcg_setup_new_hdr() routine). 844 */ 845 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 846 HERMON_CMD_NOSLEEP_SPIN); 847 if (status != HERMON_CMD_SUCCESS) { 848 bzero(mcg, sizeof (struct hermon_sw_mcg_list_s)); 849 mutex_exit(&state->hs_mcglock); 850 HERMON_WARNING(state, "failed to write MCG entry"); 851 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 852 "%08x\n", status); 853 if (status == HERMON_CMD_INVALID_STATUS) { 854 hermon_fm_ereport(state, HCA_SYS_ERR, 855 HCA_ERR_SRV_LOST); 856 } 857 return (ibc_get_ci_failure(0)); 858 } 859 860 /* 861 * Now that we know all the Hermon firmware accesses have been 862 * successful, we update the "shadow" MCG entry by incrementing 863 * the "number of attached QPs" count. 864 * 865 * We increment only if the QP is not already part of the 866 * MCG by checking the 'qp_found' flag returned from the 867 * qplist_add above. 868 */ 869 if (!qp_found) { 870 mcg->mcg_num_qps++; 871 872 /* 873 * Increment the refcnt for this QP. Because the QP 874 * was added to this MCG, the refcnt must be 875 * incremented. 876 */ 877 hermon_qp_mcg_refcnt_inc(qp); 878 } 879 880 /* 881 * We drop the lock and return success. 882 */ 883 mutex_exit(&state->hs_mcglock); 884 return (DDI_SUCCESS); 885 } 886 887 /* 888 * If the specified MGID matches the MGID in the current entry, then 889 * we need to try to add the QP to the current MCG entry. In this 890 * case, it means that we need to read the existing MCG entry (into 891 * the temporary MCG), add the new QP number to the temporary entry 892 * (using the same method we used above), and write the entry back 893 * to the hardware (same as above). 894 */ 895 if ((mcg->mcg_mgid_h == gid.gid_prefix) && 896 (mcg->mcg_mgid_l == gid.gid_guid)) { 897 898 /* 899 * Read the current MCG entry into the temporary MCG. Note: 900 * In general, this operation shouldn't fail. If it does, 901 * then it is an indication that something (probably in HW, 902 * but maybe in SW) has gone seriously wrong. 903 */ 904 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx, 905 HERMON_CMD_NOSLEEP_SPIN); 906 if (status != HERMON_CMD_SUCCESS) { 907 mutex_exit(&state->hs_mcglock); 908 HERMON_WARNING(state, "failed to read MCG entry"); 909 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: " 910 "%08x\n", status); 911 if (status == HERMON_CMD_INVALID_STATUS) { 912 hermon_fm_ereport(state, HCA_SYS_ERR, 913 HCA_ERR_SRV_LOST); 914 } 915 return (ibc_get_ci_failure(0)); 916 } 917 918 /* 919 * Try to add the new QP number to the list. This routine 920 * fills in the necessary pieces of the temporary MCG. The 921 * "mcg_entry_qplist" pointer is used to point to the portion 922 * of the temporary MCG that holds the QP numbers. 923 * 924 * Note: hermon_mcg_qplist_add() returns SUCCESS if it 925 * already found the QP in the list. In this case, the QP is 926 * not added on to the list again. Check the flag 'qp_found' 927 * if this value is needed to be known. 928 * 929 */ 930 status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp, 931 &qp_found); 932 if (status != DDI_SUCCESS) { 933 mutex_exit(&state->hs_mcglock); 934 return (status); 935 } 936 if (!qp_found) 937 mcg_entry->member_cnt = (mcg->mcg_num_qps + 1); 938 /* set the member count */ 939 940 /* 941 * Once the temporary MCG has been updated, write the entry 942 * into the appropriate location in the Hermon MCG entry table. 943 * If it's successful, then drop the lock and return success. 944 * Note: In general, this operation shouldn't fail. If it 945 * does, then it is an indication that something (probably in 946 * HW, but maybe in SW) has gone seriously wrong. 947 */ 948 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 949 HERMON_CMD_NOSLEEP_SPIN); 950 if (status != HERMON_CMD_SUCCESS) { 951 mutex_exit(&state->hs_mcglock); 952 HERMON_WARNING(state, "failed to write MCG entry"); 953 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 954 "%08x\n", status); 955 if (status == HERMON_CMD_INVALID_STATUS) { 956 hermon_fm_ereport(state, HCA_SYS_ERR, 957 HCA_ERR_SRV_LOST); 958 } 959 return (ibc_get_ci_failure(0)); 960 } 961 962 /* 963 * Now that we know all the Hermon firmware accesses have been 964 * successful, we update the current "shadow" MCG entry by 965 * incrementing the "number of attached QPs" count. 966 * 967 * We increment only if the QP is not already part of the 968 * MCG by checking the 'qp_found' flag returned 969 * hermon_mcg_walk_mgid_hashfrom the qplist_add above. 970 */ 971 if (!qp_found) { 972 mcg->mcg_num_qps++; 973 974 /* 975 * Increment the refcnt for this QP. Because the QP 976 * was added to this MCG, the refcnt must be 977 * incremented. 978 */ 979 hermon_qp_mcg_refcnt_inc(qp); 980 } 981 982 /* 983 * We drop the lock and return success. 984 */ 985 mutex_exit(&state->hs_mcglock); 986 return (DDI_SUCCESS); 987 } 988 989 /* 990 * If we've reached here, then we're at the end of the hash chain. 991 * We need to allocate a new MCG entry, fill it in, write it to Hermon, 992 * and update the previous entry to link the new one to the end of the 993 * chain. 994 */ 995 996 /* 997 * Allocate an MCG table entry. This will be filled in with all 998 * the necessary parameters to define the multicast group. Then it 999 * will be written to the hardware in the next-to-last step below. 1000 */ 1001 status = hermon_rsrc_alloc(state, HERMON_MCG, 1, HERMON_NOSLEEP, &rsrc); 1002 if (status != DDI_SUCCESS) { 1003 mutex_exit(&state->hs_mcglock); 1004 return (IBT_INSUFF_RESOURCE); 1005 } 1006 1007 /* 1008 * Fill in the new entry in the "shadow" MCG list. Note: Just as 1009 * it does above, hermon_mcg_setup_new_hdr() also fills in a portion 1010 * of the temporary MCG entry (the rest of which will be filled in by 1011 * hermon_mcg_qplist_add() below) 1012 */ 1013 newmcg = &state->hs_mcghdl[rsrc->hr_indx]; 1014 hermon_mcg_setup_new_hdr(newmcg, mcg_entry, gid, rsrc); 1015 1016 /* 1017 * Try to add the new QP number to the list. This routine fills in 1018 * the final necessary pieces of the temporary MCG. The 1019 * "mcg_entry_qplist" pointer is used to point to the portion of the 1020 * temporary MCG that holds the QP numbers. If we fail here, we 1021 * must undo the previous resource allocation. 1022 * 1023 * Note: hermon_mcg_qplist_add() can we return SUCCESS if it already 1024 * found the QP in the list. In this case, the QP is not added on to 1025 * the list again. Check the flag 'qp_found' if this value is needed 1026 * to be known. 1027 */ 1028 status = hermon_mcg_qplist_add(state, newmcg, mcg_entry_qplist, qp, 1029 &qp_found); 1030 if (status != DDI_SUCCESS) { 1031 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1032 hermon_rsrc_free(state, &rsrc); 1033 mutex_exit(&state->hs_mcglock); 1034 return (status); 1035 } 1036 mcg_entry->member_cnt = (newmcg->mcg_num_qps + 1); 1037 /* set the member count */ 1038 1039 /* 1040 * Once the temporary MCG has been updated, write the entry into the 1041 * appropriate location in the Hermon MCG entry table. If this is 1042 * successful, then we need to chain the previous entry to this one. 1043 * Note: In general, this operation shouldn't fail. If it does, then 1044 * it is an indication that something (probably in HW, but maybe in 1045 * SW) has gone seriously wrong. 1046 */ 1047 status = hermon_write_mgm_cmd_post(state, mcg_entry, rsrc->hr_indx, 1048 HERMON_CMD_NOSLEEP_SPIN); 1049 if (status != HERMON_CMD_SUCCESS) { 1050 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1051 hermon_rsrc_free(state, &rsrc); 1052 mutex_exit(&state->hs_mcglock); 1053 HERMON_WARNING(state, "failed to write MCG entry"); 1054 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1055 status); 1056 if (status == HERMON_CMD_INVALID_STATUS) { 1057 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1058 } 1059 return (ibc_get_ci_failure(0)); 1060 } 1061 1062 /* 1063 * Now read the current MCG entry (the one previously at the end of 1064 * hash chain) into the temporary MCG. We are going to update its 1065 * "next_gid_indx" now and write the entry back to the MCG table. 1066 * Note: In general, this operation shouldn't fail. If it does, then 1067 * it is an indication that something (probably in HW, but maybe in SW) 1068 * has gone seriously wrong. We will free up the MCG entry resource, 1069 * but we will not undo the previously written MCG entry in the HW. 1070 * This is OK, though, because the MCG entry is not currently attached 1071 * to any hash chain. 1072 */ 1073 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx, 1074 HERMON_CMD_NOSLEEP_SPIN); 1075 if (status != HERMON_CMD_SUCCESS) { 1076 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1077 hermon_rsrc_free(state, &rsrc); 1078 mutex_exit(&state->hs_mcglock); 1079 HERMON_WARNING(state, "failed to read MCG entry"); 1080 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n", 1081 status); 1082 if (status == HERMON_CMD_INVALID_STATUS) { 1083 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1084 } 1085 return (ibc_get_ci_failure(0)); 1086 } 1087 1088 /* 1089 * Finally, we update the "next_gid_indx" field in the temporary MCG 1090 * and attempt to write the entry back into the Hermon MCG table. If 1091 * this succeeds, then we update the "shadow" list to reflect the 1092 * change, drop the lock, and return success. Note: In general, this 1093 * operation shouldn't fail. If it does, then it is an indication 1094 * that something (probably in HW, but maybe in SW) has gone seriously 1095 * wrong. Just as we do above, we will free up the MCG entry resource, 1096 * but we will not try to undo the previously written MCG entry. This 1097 * is OK, though, because (since we failed here to update the end of 1098 * the chain) that other entry is not currently attached to any chain. 1099 */ 1100 mcg_entry->next_gid_indx = rsrc->hr_indx; 1101 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 1102 HERMON_CMD_NOSLEEP_SPIN); 1103 if (status != HERMON_CMD_SUCCESS) { 1104 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1105 hermon_rsrc_free(state, &rsrc); 1106 mutex_exit(&state->hs_mcglock); 1107 HERMON_WARNING(state, "failed to write MCG entry"); 1108 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1109 status); 1110 if (status == HERMON_CMD_INVALID_STATUS) { 1111 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1112 } 1113 return (ibc_get_ci_failure(0)); 1114 } 1115 mcg = &state->hs_mcghdl[end_indx]; 1116 mcg->mcg_next_indx = rsrc->hr_indx; 1117 1118 /* 1119 * Now that we know all the Hermon firmware accesses have been 1120 * successful, we update the new "shadow" MCG entry by incrementing 1121 * the "number of attached QPs" count. Then we drop the lock and 1122 * return success. 1123 */ 1124 newmcg->mcg_num_qps++; 1125 1126 /* 1127 * Increment the refcnt for this QP. Because the QP 1128 * was added to this MCG, the refcnt must be 1129 * incremented. 1130 */ 1131 hermon_qp_mcg_refcnt_inc(qp); 1132 1133 mutex_exit(&state->hs_mcglock); 1134 return (DDI_SUCCESS); 1135 } 1136 1137 1138 /* 1139 * hermon_mcg_detach() 1140 * Context: Can be called only from user or kernel context. 1141 */ 1142 int 1143 hermon_mcg_detach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid, 1144 ib_lid_t lid) 1145 { 1146 hermon_hw_mcg_t *mcg_entry; 1147 hermon_hw_mcg_qp_list_t *mcg_entry_qplist; 1148 hermon_mcghdl_t mcg; 1149 uint64_t mgid_hash; 1150 uint32_t end_indx, prev_indx; 1151 int status; 1152 1153 /* 1154 * Check for invalid Multicast DLID. Specifically, all Multicast 1155 * LIDs should be within a well defined range. If the specified LID 1156 * is outside of that range, then return an error. 1157 */ 1158 if (hermon_mlid_is_valid(lid) == 0) { 1159 return (IBT_MC_MLID_INVALID); 1160 } 1161 1162 /* 1163 * Compute the MGID hash value. As described above, the MCG table is 1164 * arranged as a number of separate hash chains. This operation 1165 * converts the specified MGID into the starting index of an entry in 1166 * the hash table (i.e. the index for the start of the appropriate 1167 * hash chain). Subsequent operations below will walk the chain 1168 * searching for a matching entry from which to attempt to remove 1169 * the specified QP. 1170 */ 1171 status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid, 1172 &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT()); 1173 if (status != HERMON_CMD_SUCCESS) { 1174 cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n", 1175 status); 1176 if (status == HERMON_CMD_INVALID_STATUS) { 1177 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1178 } 1179 return (ibc_get_ci_failure(0)); 1180 } 1181 1182 /* 1183 * Grab the multicast group mutex. Then grab the pre-allocated 1184 * temporary buffer used for holding and/or modifying MCG entries. 1185 */ 1186 mutex_enter(&state->hs_mcglock); 1187 mcg_entry = state->hs_mcgtmp; 1188 mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry); 1189 1190 /* 1191 * Walk through the array of MCG entries starting at "mgid_hash". 1192 * Try to find an MCG entry with a matching MGID. The 1193 * hermon_mcg_walk_mgid_hash() routine walks the list and returns an 1194 * index into the MCG table. The entry at this index is checked to 1195 * determine whether it is a match or not. If it is a match, then 1196 * we continue on to attempt to remove the QP from the MCG. If it 1197 * is not a match (or not a valid MCG entry), then we return an error. 1198 */ 1199 end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, &prev_indx); 1200 mcg = &state->hs_mcghdl[end_indx]; 1201 1202 /* 1203 * If MGID == 0 (the hash chain is empty) or if the specified MGID 1204 * does not match the MGID in the current entry, then return 1205 * IBT_MC_MGID_INVALID (to indicate that the specified MGID is not 1206 * valid). 1207 */ 1208 if (((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) || 1209 ((mcg->mcg_mgid_h != gid.gid_prefix) || 1210 (mcg->mcg_mgid_l != gid.gid_guid))) { 1211 mutex_exit(&state->hs_mcglock); 1212 return (IBT_MC_MGID_INVALID); 1213 } 1214 1215 /* 1216 * Read the current MCG entry into the temporary MCG. Note: In 1217 * general, this operation shouldn't fail. If it does, then it is 1218 * an indication that something (probably in HW, but maybe in SW) 1219 * has gone seriously wrong. 1220 */ 1221 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx, 1222 HERMON_CMD_NOSLEEP_SPIN); 1223 if (status != HERMON_CMD_SUCCESS) { 1224 mutex_exit(&state->hs_mcglock); 1225 HERMON_WARNING(state, "failed to read MCG entry"); 1226 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n", 1227 status); 1228 if (status == HERMON_CMD_INVALID_STATUS) { 1229 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1230 } 1231 return (ibc_get_ci_failure(0)); 1232 } 1233 1234 /* 1235 * Search the QP number list for a match. If a match is found, then 1236 * remove the entry from the QP list. Otherwise, if no match is found, 1237 * return an error. 1238 */ 1239 status = hermon_mcg_qplist_remove(mcg, mcg_entry_qplist, qp); 1240 if (status != DDI_SUCCESS) { 1241 mutex_exit(&state->hs_mcglock); 1242 return (status); 1243 } 1244 1245 /* 1246 * Decrement the MCG count for this QP. When the 'qp_mcg' 1247 * field becomes 0, then this QP is no longer a member of any 1248 * MCG. 1249 */ 1250 hermon_qp_mcg_refcnt_dec(qp); 1251 1252 /* 1253 * If the current MCG's QP number list is about to be made empty 1254 * ("mcg_num_qps" == 1), then remove the entry itself from the hash 1255 * chain. Otherwise, just write the updated MCG entry back to the 1256 * hardware. In either case, once we successfully update the hardware 1257 * chain, then we decrement the "shadow" list entry's "mcg_num_qps" 1258 * count (or zero out the entire "shadow" list entry) before returning 1259 * success. Note: Zeroing out the "shadow" list entry is done 1260 * inside of hermon_mcg_hash_list_remove(). 1261 */ 1262 if (mcg->mcg_num_qps == 1) { 1263 1264 /* Remove an MCG entry from the hash chain */ 1265 status = hermon_mcg_hash_list_remove(state, end_indx, prev_indx, 1266 mcg_entry); 1267 if (status != DDI_SUCCESS) { 1268 mutex_exit(&state->hs_mcglock); 1269 return (status); 1270 } 1271 1272 } else { 1273 /* 1274 * Write the updated MCG entry back to the Hermon MCG table. 1275 * If this succeeds, then we update the "shadow" list to 1276 * reflect the change (i.e. decrement the "mcg_num_qps"), 1277 * drop the lock, and return success. Note: In general, 1278 * this operation shouldn't fail. If it does, then it is an 1279 * indication that something (probably in HW, but maybe in SW) 1280 * has gone seriously wrong. 1281 */ 1282 mcg_entry->member_cnt = (mcg->mcg_num_qps - 1); 1283 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 1284 HERMON_CMD_NOSLEEP_SPIN); 1285 if (status != HERMON_CMD_SUCCESS) { 1286 mutex_exit(&state->hs_mcglock); 1287 HERMON_WARNING(state, "failed to write MCG entry"); 1288 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 1289 "%08x\n", status); 1290 if (status == HERMON_CMD_INVALID_STATUS) { 1291 hermon_fm_ereport(state, HCA_SYS_ERR, 1292 HCA_ERR_SRV_LOST); 1293 } 1294 return (ibc_get_ci_failure(0)); 1295 } 1296 mcg->mcg_num_qps--; 1297 } 1298 1299 mutex_exit(&state->hs_mcglock); 1300 return (DDI_SUCCESS); 1301 } 1302 1303 /* 1304 * hermon_qp_mcg_refcnt_inc() 1305 * Context: Can be called from interrupt or base context. 1306 */ 1307 static void 1308 hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp) 1309 { 1310 /* Increment the QP's MCG reference count */ 1311 mutex_enter(&qp->qp_lock); 1312 qp->qp_mcg_refcnt++; 1313 mutex_exit(&qp->qp_lock); 1314 } 1315 1316 1317 /* 1318 * hermon_qp_mcg_refcnt_dec() 1319 * Context: Can be called from interrupt or base context. 1320 */ 1321 static void 1322 hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp) 1323 { 1324 /* Decrement the QP's MCG reference count */ 1325 mutex_enter(&qp->qp_lock); 1326 qp->qp_mcg_refcnt--; 1327 mutex_exit(&qp->qp_lock); 1328 } 1329 1330 1331 /* 1332 * hermon_mcg_qplist_add() 1333 * Context: Can be called from interrupt or base context. 1334 */ 1335 static int 1336 hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg, 1337 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp, 1338 uint_t *qp_found) 1339 { 1340 uint_t qplist_indx; 1341 1342 ASSERT(MUTEX_HELD(&state->hs_mcglock)); 1343 1344 qplist_indx = mcg->mcg_num_qps; 1345 1346 /* 1347 * Determine if we have exceeded the maximum number of QP per 1348 * multicast group. If we have, then return an error 1349 */ 1350 if (qplist_indx >= state->hs_cfg_profile->cp_num_qp_per_mcg) { 1351 return (IBT_HCA_MCG_QP_EXCEEDED); 1352 } 1353 1354 /* 1355 * Determine if the QP is already attached to this MCG table. If it 1356 * is, then we break out and treat this operation as a NO-OP 1357 */ 1358 for (qplist_indx = 0; qplist_indx < mcg->mcg_num_qps; 1359 qplist_indx++) { 1360 if (mcg_qplist[qplist_indx].qpn == qp->qp_qpnum) { 1361 break; 1362 } 1363 } 1364 1365 /* 1366 * If the QP was already on the list, set 'qp_found' to TRUE. We still 1367 * return SUCCESS in this case, but the qplist will not have been 1368 * updated because the QP was already on the list. 1369 */ 1370 if (qplist_indx < mcg->mcg_num_qps) { 1371 *qp_found = 1; 1372 } else { 1373 /* 1374 * Otherwise, append the new QP number to the end of the 1375 * current QP list. Note: We will increment the "mcg_num_qps" 1376 * field on the "shadow" MCG list entry later (after we know 1377 * that all necessary Hermon firmware accesses have been 1378 * successful). 1379 * 1380 * Set 'qp_found' to 0 so we know the QP was added on to the 1381 * list for sure. 1382 */ 1383 mcg_qplist[qplist_indx].qpn = 1384 (qp->qp_qpnum | HERMON_MCG_QPN_BLOCK_LB); 1385 *qp_found = 0; 1386 } 1387 1388 return (DDI_SUCCESS); 1389 } 1390 1391 1392 1393 /* 1394 * hermon_mcg_qplist_remove() 1395 * Context: Can be called from interrupt or base context. 1396 */ 1397 static int 1398 hermon_mcg_qplist_remove(hermon_mcghdl_t mcg, 1399 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp) 1400 { 1401 uint_t i, qplist_indx; 1402 1403 /* 1404 * Search the MCG QP list for a matching QPN. When 1405 * it's found, we swap the last entry with the current 1406 * one, set the last entry to zero, decrement the last 1407 * entry, and return. If it's not found, then it's 1408 * and error. 1409 */ 1410 qplist_indx = mcg->mcg_num_qps; 1411 for (i = 0; i < qplist_indx; i++) { 1412 if (mcg_qplist[i].qpn == qp->qp_qpnum) { 1413 mcg_qplist[i] = mcg_qplist[qplist_indx - 1]; 1414 mcg_qplist[qplist_indx - 1].qpn = 0; 1415 1416 return (DDI_SUCCESS); 1417 } 1418 } 1419 1420 return (IBT_QP_HDL_INVALID); 1421 } 1422 1423 1424 /* 1425 * hermon_mcg_walk_mgid_hash() 1426 * Context: Can be called from interrupt or base context. 1427 */ 1428 static uint_t 1429 hermon_mcg_walk_mgid_hash(hermon_state_t *state, uint64_t start_indx, 1430 ib_gid_t mgid, uint_t *p_indx) 1431 { 1432 hermon_mcghdl_t curr_mcghdl; 1433 uint_t curr_indx, prev_indx; 1434 1435 ASSERT(MUTEX_HELD(&state->hs_mcglock)); 1436 1437 /* Start at the head of the hash chain */ 1438 curr_indx = (uint_t)start_indx; 1439 prev_indx = curr_indx; 1440 curr_mcghdl = &state->hs_mcghdl[curr_indx]; 1441 1442 /* If the first entry in the chain has MGID == 0, then stop */ 1443 if ((curr_mcghdl->mcg_mgid_h == 0) && 1444 (curr_mcghdl->mcg_mgid_l == 0)) { 1445 goto end_mgid_hash_walk; 1446 } 1447 1448 /* If the first entry in the chain matches the MGID, then stop */ 1449 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) && 1450 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) { 1451 goto end_mgid_hash_walk; 1452 } 1453 1454 /* Otherwise, walk the hash chain looking for a match */ 1455 while (curr_mcghdl->mcg_next_indx != 0) { 1456 prev_indx = curr_indx; 1457 curr_indx = curr_mcghdl->mcg_next_indx; 1458 curr_mcghdl = &state->hs_mcghdl[curr_indx]; 1459 1460 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) && 1461 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) { 1462 break; 1463 } 1464 } 1465 1466 end_mgid_hash_walk: 1467 /* 1468 * If necessary, return the index of the previous entry too. This 1469 * is primarily used for detaching a QP from a multicast group. It 1470 * may be necessary, in that case, to delete an MCG entry from the 1471 * hash chain and having the index of the previous entry is helpful. 1472 */ 1473 if (p_indx != NULL) { 1474 *p_indx = prev_indx; 1475 } 1476 return (curr_indx); 1477 } 1478 1479 1480 /* 1481 * hermon_mcg_setup_new_hdr() 1482 * Context: Can be called from interrupt or base context. 1483 */ 1484 static void 1485 hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg, hermon_hw_mcg_t *mcg_hdr, 1486 ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc) 1487 { 1488 /* 1489 * Fill in the fields of the "shadow" entry used by software 1490 * to track MCG hardware entry 1491 */ 1492 mcg->mcg_mgid_h = mgid.gid_prefix; 1493 mcg->mcg_mgid_l = mgid.gid_guid; 1494 mcg->mcg_rsrcp = mcg_rsrc; 1495 mcg->mcg_next_indx = 0; 1496 mcg->mcg_num_qps = 0; 1497 1498 /* 1499 * Fill the header fields of the MCG entry (in the temporary copy) 1500 */ 1501 mcg_hdr->mgid_h = mgid.gid_prefix; 1502 mcg_hdr->mgid_l = mgid.gid_guid; 1503 mcg_hdr->next_gid_indx = 0; 1504 } 1505 1506 1507 /* 1508 * hermon_mcg_hash_list_remove() 1509 * Context: Can be called only from user or kernel context. 1510 */ 1511 static int 1512 hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx, 1513 uint_t prev_indx, hermon_hw_mcg_t *mcg_entry) 1514 { 1515 hermon_mcghdl_t curr_mcg, prev_mcg, next_mcg; 1516 uint_t next_indx; 1517 int status; 1518 1519 /* Get the pointer to "shadow" list for current entry */ 1520 curr_mcg = &state->hs_mcghdl[curr_indx]; 1521 1522 /* 1523 * If this is the first entry on a hash chain, then attempt to replace 1524 * the entry with the next entry on the chain. If there are no 1525 * subsequent entries on the chain, then this is the only entry and 1526 * should be invalidated. 1527 */ 1528 if (curr_indx == prev_indx) { 1529 1530 /* 1531 * If this is the only entry on the chain, then invalidate it. 1532 * Note: Invalidating an MCG entry means writing all zeros 1533 * to the entry. This is only necessary for those MCG 1534 * entries that are the "head" entries of the individual hash 1535 * chains. Regardless of whether this operation returns 1536 * success or failure, return that result to the caller. 1537 */ 1538 next_indx = curr_mcg->mcg_next_indx; 1539 if (next_indx == 0) { 1540 status = hermon_mcg_entry_invalidate(state, mcg_entry, 1541 curr_indx); 1542 bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1543 return (status); 1544 } 1545 1546 /* 1547 * Otherwise, this is just the first entry on the chain, so 1548 * grab the next one 1549 */ 1550 next_mcg = &state->hs_mcghdl[next_indx]; 1551 1552 /* 1553 * Read the next MCG entry into the temporary MCG. Note: 1554 * In general, this operation shouldn't fail. If it does, 1555 * then it is an indication that something (probably in HW, 1556 * but maybe in SW) has gone seriously wrong. 1557 */ 1558 status = hermon_read_mgm_cmd_post(state, mcg_entry, next_indx, 1559 HERMON_CMD_NOSLEEP_SPIN); 1560 if (status != HERMON_CMD_SUCCESS) { 1561 HERMON_WARNING(state, "failed to read MCG entry"); 1562 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: " 1563 "%08x\n", status); 1564 if (status == HERMON_CMD_INVALID_STATUS) { 1565 hermon_fm_ereport(state, HCA_SYS_ERR, 1566 HCA_ERR_SRV_LOST); 1567 } 1568 return (ibc_get_ci_failure(0)); 1569 } 1570 1571 /* 1572 * Copy/Write the temporary MCG back to the hardware MCG list 1573 * using the current index. This essentially removes the 1574 * current MCG entry from the list by writing over it with 1575 * the next one. If this is successful, then we can do the 1576 * same operation for the "shadow" list. And we can also 1577 * free up the Hermon MCG entry resource that was associated 1578 * with the (old) next entry. Note: In general, this 1579 * operation shouldn't fail. If it does, then it is an 1580 * indication that something (probably in HW, but maybe in SW) 1581 * has gone seriously wrong. 1582 */ 1583 status = hermon_write_mgm_cmd_post(state, mcg_entry, curr_indx, 1584 HERMON_CMD_NOSLEEP_SPIN); 1585 if (status != HERMON_CMD_SUCCESS) { 1586 HERMON_WARNING(state, "failed to write MCG entry"); 1587 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 1588 "%08x\n", status); 1589 if (status == HERMON_CMD_INVALID_STATUS) { 1590 hermon_fm_ereport(state, HCA_SYS_ERR, 1591 HCA_ERR_SRV_LOST); 1592 } 1593 return (ibc_get_ci_failure(0)); 1594 } 1595 1596 /* 1597 * Copy all the software tracking information from the next 1598 * entry on the "shadow" MCG list into the current entry on 1599 * the list. Then invalidate (zero out) the other "shadow" 1600 * list entry. 1601 */ 1602 bcopy(next_mcg, curr_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1603 bzero(next_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1604 1605 /* 1606 * Free up the Hermon MCG entry resource used by the "next" 1607 * MCG entry. That resource is no longer needed by any 1608 * MCG entry which is first on a hash chain (like the "next" 1609 * entry has just become). 1610 */ 1611 hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp); 1612 1613 return (DDI_SUCCESS); 1614 } 1615 1616 /* 1617 * Else if this is the last entry on the hash chain (or a middle 1618 * entry, then we update the previous entry's "next_gid_index" field 1619 * to make it point instead to the next entry on the chain. By 1620 * skipping over the removed entry in this way, we can then free up 1621 * any resources associated with the current entry. Note: We don't 1622 * need to invalidate the "skipped over" hardware entry because it 1623 * will no be longer connected to any hash chains, and if/when it is 1624 * finally re-used, it will be written with entirely new values. 1625 */ 1626 1627 /* 1628 * Read the next MCG entry into the temporary MCG. Note: In general, 1629 * this operation shouldn't fail. If it does, then it is an 1630 * indication that something (probably in HW, but maybe in SW) has 1631 * gone seriously wrong. 1632 */ 1633 status = hermon_read_mgm_cmd_post(state, mcg_entry, prev_indx, 1634 HERMON_CMD_NOSLEEP_SPIN); 1635 if (status != HERMON_CMD_SUCCESS) { 1636 HERMON_WARNING(state, "failed to read MCG entry"); 1637 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n", 1638 status); 1639 if (status == HERMON_CMD_INVALID_STATUS) { 1640 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1641 } 1642 return (ibc_get_ci_failure(0)); 1643 } 1644 1645 /* 1646 * Finally, we update the "next_gid_indx" field in the temporary MCG 1647 * and attempt to write the entry back into the Hermon MCG table. If 1648 * this succeeds, then we update the "shadow" list to reflect the 1649 * change, free up the Hermon MCG entry resource that was associated 1650 * with the current entry, and return success. Note: In general, 1651 * this operation shouldn't fail. If it does, then it is an indication 1652 * that something (probably in HW, but maybe in SW) has gone seriously 1653 * wrong. 1654 */ 1655 mcg_entry->next_gid_indx = curr_mcg->mcg_next_indx; 1656 status = hermon_write_mgm_cmd_post(state, mcg_entry, prev_indx, 1657 HERMON_CMD_NOSLEEP_SPIN); 1658 if (status != HERMON_CMD_SUCCESS) { 1659 HERMON_WARNING(state, "failed to write MCG entry"); 1660 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1661 status); 1662 if (status == HERMON_CMD_INVALID_STATUS) { 1663 hermon_fm_ereport(state, HCA_SYS_ERR, 1664 HCA_ERR_SRV_LOST); 1665 } 1666 return (ibc_get_ci_failure(0)); 1667 } 1668 1669 /* 1670 * Get the pointer to the "shadow" MCG list entry for the previous 1671 * MCG. Update its "mcg_next_indx" to point to the next entry 1672 * the one after the current entry. Note: This next index may be 1673 * zero, indicating the end of the list. 1674 */ 1675 prev_mcg = &state->hs_mcghdl[prev_indx]; 1676 prev_mcg->mcg_next_indx = curr_mcg->mcg_next_indx; 1677 1678 /* 1679 * Free up the Hermon MCG entry resource used by the current entry. 1680 * This resource is no longer needed because the chain now skips over 1681 * the current entry. Then invalidate (zero out) the current "shadow" 1682 * list entry. 1683 */ 1684 hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp); 1685 bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1686 1687 return (DDI_SUCCESS); 1688 } 1689 1690 1691 /* 1692 * hermon_mcg_entry_invalidate() 1693 * Context: Can be called only from user or kernel context. 1694 */ 1695 static int 1696 hermon_mcg_entry_invalidate(hermon_state_t *state, hermon_hw_mcg_t *mcg_entry, 1697 uint_t indx) 1698 { 1699 int status; 1700 1701 /* 1702 * Invalidate the hardware MCG entry by zeroing out this temporary 1703 * MCG and writing it the the hardware. Note: In general, this 1704 * operation shouldn't fail. If it does, then it is an indication 1705 * that something (probably in HW, but maybe in SW) has gone seriously 1706 * wrong. 1707 */ 1708 bzero(mcg_entry, HERMON_MCGMEM_SZ(state)); 1709 status = hermon_write_mgm_cmd_post(state, mcg_entry, indx, 1710 HERMON_CMD_NOSLEEP_SPIN); 1711 if (status != HERMON_CMD_SUCCESS) { 1712 HERMON_WARNING(state, "failed to write MCG entry"); 1713 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1714 status); 1715 if (status == HERMON_CMD_INVALID_STATUS) { 1716 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1717 } 1718 return (ibc_get_ci_failure(0)); 1719 } 1720 1721 return (DDI_SUCCESS); 1722 } 1723 1724 1725 /* 1726 * hermon_mgid_is_valid() 1727 * Context: Can be called from interrupt or base context. 1728 */ 1729 static int 1730 hermon_mgid_is_valid(ib_gid_t gid) 1731 { 1732 uint_t topbits, flags, scope; 1733 1734 /* 1735 * According to IBA 1.1 specification (section 4.1.1) a valid 1736 * "multicast GID" must have its top eight bits set to all ones 1737 */ 1738 topbits = (gid.gid_prefix >> HERMON_MCG_TOPBITS_SHIFT) & 1739 HERMON_MCG_TOPBITS_MASK; 1740 if (topbits != HERMON_MCG_TOPBITS) { 1741 return (0); 1742 } 1743 1744 /* 1745 * The next 4 bits are the "flag" bits. These are valid only 1746 * if they are "0" (which correspond to permanently assigned/ 1747 * "well-known" multicast GIDs) or "1" (for so-called "transient" 1748 * multicast GIDs). All other values are reserved. 1749 */ 1750 flags = (gid.gid_prefix >> HERMON_MCG_FLAGS_SHIFT) & 1751 HERMON_MCG_FLAGS_MASK; 1752 if (!((flags == HERMON_MCG_FLAGS_PERM) || 1753 (flags == HERMON_MCG_FLAGS_NONPERM))) { 1754 return (0); 1755 } 1756 1757 /* 1758 * The next 4 bits are the "scope" bits. These are valid only 1759 * if they are "2" (Link-local), "5" (Site-local), "8" 1760 * (Organization-local) or "E" (Global). All other values 1761 * are reserved (or currently unassigned). 1762 */ 1763 scope = (gid.gid_prefix >> HERMON_MCG_SCOPE_SHIFT) & 1764 HERMON_MCG_SCOPE_MASK; 1765 if (!((scope == HERMON_MCG_SCOPE_LINKLOC) || 1766 (scope == HERMON_MCG_SCOPE_SITELOC) || 1767 (scope == HERMON_MCG_SCOPE_ORGLOC) || 1768 (scope == HERMON_MCG_SCOPE_GLOBAL))) { 1769 return (0); 1770 } 1771 1772 /* 1773 * If it passes all of the above checks, then we will consider it 1774 * a valid multicast GID. 1775 */ 1776 return (1); 1777 } 1778 1779 1780 /* 1781 * hermon_mlid_is_valid() 1782 * Context: Can be called from interrupt or base context. 1783 */ 1784 static int 1785 hermon_mlid_is_valid(ib_lid_t lid) 1786 { 1787 /* 1788 * According to IBA 1.1 specification (section 4.1.1) a valid 1789 * "multicast DLID" must be between 0xC000 and 0xFFFE. 1790 */ 1791 if ((lid < IB_LID_MC_FIRST) || (lid > IB_LID_MC_LAST)) { 1792 return (0); 1793 } 1794 1795 return (1); 1796 } 1797 1798 1799 /* 1800 * hermon_pd_alloc() 1801 * Context: Can be called only from user or kernel context. 1802 */ 1803 int 1804 hermon_pd_alloc(hermon_state_t *state, hermon_pdhdl_t *pdhdl, uint_t sleepflag) 1805 { 1806 hermon_rsrc_t *rsrc; 1807 hermon_pdhdl_t pd; 1808 int status; 1809 1810 /* 1811 * Allocate the software structure for tracking the protection domain 1812 * (i.e. the Hermon Protection Domain handle). By default each PD 1813 * structure will have a unique PD number assigned to it. All that 1814 * is necessary is for software to initialize the PD reference count 1815 * (to zero) and return success. 1816 */ 1817 status = hermon_rsrc_alloc(state, HERMON_PDHDL, 1, sleepflag, &rsrc); 1818 if (status != DDI_SUCCESS) { 1819 return (IBT_INSUFF_RESOURCE); 1820 } 1821 pd = (hermon_pdhdl_t)rsrc->hr_addr; 1822 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd)) 1823 1824 pd->pd_refcnt = 0; 1825 *pdhdl = pd; 1826 1827 return (DDI_SUCCESS); 1828 } 1829 1830 1831 /* 1832 * hermon_pd_free() 1833 * Context: Can be called only from user or kernel context. 1834 */ 1835 int 1836 hermon_pd_free(hermon_state_t *state, hermon_pdhdl_t *pdhdl) 1837 { 1838 hermon_rsrc_t *rsrc; 1839 hermon_pdhdl_t pd; 1840 1841 /* 1842 * Pull all the necessary information from the Hermon Protection Domain 1843 * handle. This is necessary here because the resource for the 1844 * PD is going to be freed up as part of this operation. 1845 */ 1846 pd = *pdhdl; 1847 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd)) 1848 rsrc = pd->pd_rsrcp; 1849 1850 /* 1851 * Check the PD reference count. If the reference count is non-zero, 1852 * then it means that this protection domain is still referenced by 1853 * some memory region, queue pair, address handle, or other IB object 1854 * If it is non-zero, then return an error. Otherwise, free the 1855 * Hermon resource and return success. 1856 */ 1857 if (pd->pd_refcnt != 0) { 1858 return (IBT_PD_IN_USE); 1859 } 1860 1861 /* Free the Hermon Protection Domain handle */ 1862 hermon_rsrc_free(state, &rsrc); 1863 1864 /* Set the pdhdl pointer to NULL and return success */ 1865 *pdhdl = (hermon_pdhdl_t)NULL; 1866 1867 return (DDI_SUCCESS); 1868 } 1869 1870 1871 /* 1872 * hermon_pd_refcnt_inc() 1873 * Context: Can be called from interrupt or base context. 1874 */ 1875 void 1876 hermon_pd_refcnt_inc(hermon_pdhdl_t pd) 1877 { 1878 /* Increment the protection domain's reference count */ 1879 atomic_inc_32(&pd->pd_refcnt); 1880 } 1881 1882 1883 /* 1884 * hermon_pd_refcnt_dec() 1885 * Context: Can be called from interrupt or base context. 1886 */ 1887 void 1888 hermon_pd_refcnt_dec(hermon_pdhdl_t pd) 1889 { 1890 /* Decrement the protection domain's reference count */ 1891 atomic_dec_32(&pd->pd_refcnt); 1892 } 1893 1894 1895 /* 1896 * hermon_port_query() 1897 * Context: Can be called only from user or kernel context. 1898 */ 1899 int 1900 hermon_port_query(hermon_state_t *state, uint_t port, ibt_hca_portinfo_t *pi) 1901 { 1902 sm_portinfo_t portinfo; 1903 sm_guidinfo_t guidinfo; 1904 sm_pkey_table_t pkeytable; 1905 ib_gid_t *sgid; 1906 uint_t sgid_max, pkey_max, tbl_size; 1907 int i, j, indx, status; 1908 ib_pkey_t *pkeyp; 1909 ib_guid_t *guidp; 1910 1911 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pi)) 1912 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*state)) 1913 1914 /* Validate that specified port number is legal */ 1915 if (!hermon_portnum_is_valid(state, port)) { 1916 return (IBT_HCA_PORT_INVALID); 1917 } 1918 pkeyp = state->hs_pkey[port - 1]; 1919 guidp = state->hs_guid[port - 1]; 1920 1921 /* 1922 * We use the Hermon MAD_IFC command to post a GetPortInfo MAD 1923 * to the firmware (for the specified port number). This returns 1924 * a full PortInfo MAD (in "portinfo") which we subsequently 1925 * parse to fill in the "ibt_hca_portinfo_t" structure returned 1926 * to the IBTF. 1927 */ 1928 status = hermon_getportinfo_cmd_post(state, port, 1929 HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo); 1930 if (status != HERMON_CMD_SUCCESS) { 1931 cmn_err(CE_CONT, "Hermon: GetPortInfo (port %02d) command " 1932 "failed: %08x\n", port, status); 1933 if (status == HERMON_CMD_INVALID_STATUS) { 1934 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1935 } 1936 return (ibc_get_ci_failure(0)); 1937 } 1938 1939 /* 1940 * Parse the PortInfo MAD and fill in the IBTF structure 1941 */ 1942 pi->p_base_lid = portinfo.LID; 1943 pi->p_qkey_violations = portinfo.Q_KeyViolations; 1944 pi->p_pkey_violations = portinfo.P_KeyViolations; 1945 pi->p_sm_sl = portinfo.MasterSMSL; 1946 pi->p_sm_lid = portinfo.MasterSMLID; 1947 pi->p_linkstate = portinfo.PortState; 1948 pi->p_port_num = portinfo.LocalPortNum; 1949 pi->p_phys_state = portinfo.PortPhysicalState; 1950 pi->p_width_supported = portinfo.LinkWidthSupported; 1951 pi->p_width_enabled = portinfo.LinkWidthEnabled; 1952 pi->p_width_active = portinfo.LinkWidthActive; 1953 pi->p_speed_supported = portinfo.LinkSpeedSupported; 1954 pi->p_speed_enabled = portinfo.LinkSpeedEnabled; 1955 pi->p_speed_active = portinfo.LinkSpeedActive; 1956 pi->p_mtu = portinfo.MTUCap; 1957 pi->p_lmc = portinfo.LMC; 1958 pi->p_max_vl = portinfo.VLCap; 1959 pi->p_subnet_timeout = portinfo.SubnetTimeOut; 1960 pi->p_msg_sz = ((uint32_t)1 << HERMON_QP_LOG_MAX_MSGSZ); 1961 tbl_size = state->hs_cfg_profile->cp_log_max_gidtbl; 1962 pi->p_sgid_tbl_sz = (1 << tbl_size); 1963 tbl_size = state->hs_cfg_profile->cp_log_max_pkeytbl; 1964 pi->p_pkey_tbl_sz = (1 << tbl_size); 1965 state->hs_sn_prefix[port - 1] = portinfo.GidPrefix; 1966 1967 /* 1968 * Convert InfiniBand-defined port capability flags to the format 1969 * specified by the IBTF 1970 */ 1971 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM) 1972 pi->p_capabilities |= IBT_PORT_CAP_SM; 1973 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM_DISABLED) 1974 pi->p_capabilities |= IBT_PORT_CAP_SM_DISABLED; 1975 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SNMP_SUPPD) 1976 pi->p_capabilities |= IBT_PORT_CAP_SNMP_TUNNEL; 1977 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_DM_SUPPD) 1978 pi->p_capabilities |= IBT_PORT_CAP_DM; 1979 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_VM_SUPPD) 1980 pi->p_capabilities |= IBT_PORT_CAP_VENDOR; 1981 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_CLNT_REREG_SUPPD) 1982 pi->p_capabilities |= IBT_PORT_CAP_CLNT_REREG; 1983 1984 /* 1985 * Fill in the SGID table. Since the only access to the Hermon 1986 * GID tables is through the firmware's MAD_IFC interface, we 1987 * post as many GetGUIDInfo MADs as necessary to read in the entire 1988 * contents of the SGID table (for the specified port). Note: The 1989 * GetGUIDInfo command only gets eight GUIDs per operation. These 1990 * GUIDs are then appended to the GID prefix for the port (from the 1991 * GetPortInfo above) to form the entire SGID table. 1992 */ 1993 for (i = 0; i < pi->p_sgid_tbl_sz; i += 8) { 1994 status = hermon_getguidinfo_cmd_post(state, port, i >> 3, 1995 HERMON_SLEEPFLAG_FOR_CONTEXT(), &guidinfo); 1996 if (status != HERMON_CMD_SUCCESS) { 1997 cmn_err(CE_CONT, "Hermon: GetGUIDInfo (port %02d) " 1998 "command failed: %08x\n", port, status); 1999 if (status == HERMON_CMD_INVALID_STATUS) { 2000 hermon_fm_ereport(state, HCA_SYS_ERR, 2001 HCA_ERR_SRV_LOST); 2002 } 2003 return (ibc_get_ci_failure(0)); 2004 } 2005 2006 /* Figure out how many of the entries are valid */ 2007 sgid_max = min((pi->p_sgid_tbl_sz - i), 8); 2008 for (j = 0; j < sgid_max; j++) { 2009 indx = (i + j); 2010 sgid = &pi->p_sgid_tbl[indx]; 2011 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sgid)) 2012 sgid->gid_prefix = portinfo.GidPrefix; 2013 guidp[indx] = sgid->gid_guid = 2014 guidinfo.GUIDBlocks[j]; 2015 } 2016 } 2017 2018 /* 2019 * Fill in the PKey table. Just as for the GID tables above, the 2020 * only access to the Hermon PKey tables is through the firmware's 2021 * MAD_IFC interface. We post as many GetPKeyTable MADs as necessary 2022 * to read in the entire contents of the PKey table (for the specified 2023 * port). Note: The GetPKeyTable command only gets 32 PKeys per 2024 * operation. 2025 */ 2026 for (i = 0; i < pi->p_pkey_tbl_sz; i += 32) { 2027 status = hermon_getpkeytable_cmd_post(state, port, i, 2028 HERMON_SLEEPFLAG_FOR_CONTEXT(), &pkeytable); 2029 if (status != HERMON_CMD_SUCCESS) { 2030 cmn_err(CE_CONT, "Hermon: GetPKeyTable (port %02d) " 2031 "command failed: %08x\n", port, status); 2032 if (status == HERMON_CMD_INVALID_STATUS) { 2033 hermon_fm_ereport(state, HCA_SYS_ERR, 2034 HCA_ERR_SRV_LOST); 2035 } 2036 return (ibc_get_ci_failure(0)); 2037 } 2038 2039 /* Figure out how many of the entries are valid */ 2040 pkey_max = min((pi->p_pkey_tbl_sz - i), 32); 2041 for (j = 0; j < pkey_max; j++) { 2042 indx = (i + j); 2043 pkeyp[indx] = pi->p_pkey_tbl[indx] = 2044 pkeytable.P_KeyTableBlocks[j]; 2045 } 2046 } 2047 2048 return (DDI_SUCCESS); 2049 } 2050 2051 2052 /* 2053 * hermon_port_modify() 2054 * Context: Can be called only from user or kernel context. 2055 */ 2056 /* ARGSUSED */ 2057 int 2058 hermon_port_modify(hermon_state_t *state, uint8_t port, 2059 ibt_port_modify_flags_t flags, uint8_t init_type) 2060 { 2061 sm_portinfo_t portinfo; 2062 uint32_t capmask; 2063 int status; 2064 hermon_hw_set_port_t set_port; 2065 2066 /* 2067 * Return an error if either of the unsupported flags are set 2068 */ 2069 if ((flags & IBT_PORT_SHUTDOWN) || 2070 (flags & IBT_PORT_SET_INIT_TYPE)) { 2071 return (IBT_NOT_SUPPORTED); 2072 } 2073 2074 bzero(&set_port, sizeof (set_port)); 2075 2076 /* 2077 * Determine whether we are trying to reset the QKey counter 2078 */ 2079 if (flags & IBT_PORT_RESET_QKEY) 2080 set_port.rqk = 1; 2081 2082 /* Validate that specified port number is legal */ 2083 if (!hermon_portnum_is_valid(state, port)) { 2084 return (IBT_HCA_PORT_INVALID); 2085 } 2086 2087 /* 2088 * Use the Hermon MAD_IFC command to post a GetPortInfo MAD to the 2089 * firmware (for the specified port number). This returns a full 2090 * PortInfo MAD (in "portinfo") from which we pull the current 2091 * capability mask. We then modify the capability mask as directed 2092 * by the "pmod_flags" field, and write the updated capability mask 2093 * using the Hermon SET_IB command (below). 2094 */ 2095 status = hermon_getportinfo_cmd_post(state, port, 2096 HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo); 2097 if (status != HERMON_CMD_SUCCESS) { 2098 if (status == HERMON_CMD_INVALID_STATUS) { 2099 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 2100 } 2101 return (ibc_get_ci_failure(0)); 2102 } 2103 2104 /* 2105 * Convert InfiniBand-defined port capability flags to the format 2106 * specified by the IBTF. Specifically, we modify the capability 2107 * mask based on the specified values. 2108 */ 2109 capmask = portinfo.CapabilityMask; 2110 2111 if (flags & IBT_PORT_RESET_SM) 2112 capmask &= ~SM_CAP_MASK_IS_SM; 2113 else if (flags & IBT_PORT_SET_SM) 2114 capmask |= SM_CAP_MASK_IS_SM; 2115 2116 if (flags & IBT_PORT_RESET_SNMP) 2117 capmask &= ~SM_CAP_MASK_IS_SNMP_SUPPD; 2118 else if (flags & IBT_PORT_SET_SNMP) 2119 capmask |= SM_CAP_MASK_IS_SNMP_SUPPD; 2120 2121 if (flags & IBT_PORT_RESET_DEVMGT) 2122 capmask &= ~SM_CAP_MASK_IS_DM_SUPPD; 2123 else if (flags & IBT_PORT_SET_DEVMGT) 2124 capmask |= SM_CAP_MASK_IS_DM_SUPPD; 2125 2126 if (flags & IBT_PORT_RESET_VENDOR) 2127 capmask &= ~SM_CAP_MASK_IS_VM_SUPPD; 2128 else if (flags & IBT_PORT_SET_VENDOR) 2129 capmask |= SM_CAP_MASK_IS_VM_SUPPD; 2130 2131 set_port.cap_mask = capmask; 2132 2133 /* 2134 * Use the Hermon SET_PORT command to update the capability mask and 2135 * (possibly) reset the QKey violation counter for the specified port. 2136 * Note: In general, this operation shouldn't fail. If it does, then 2137 * it is an indication that something (probably in HW, but maybe in 2138 * SW) has gone seriously wrong. 2139 */ 2140 status = hermon_set_port_cmd_post(state, &set_port, port, 2141 HERMON_SLEEPFLAG_FOR_CONTEXT()); 2142 if (status != HERMON_CMD_SUCCESS) { 2143 HERMON_WARNING(state, "failed to modify port capabilities"); 2144 cmn_err(CE_CONT, "Hermon: SET_IB (port %02d) command failed: " 2145 "%08x\n", port, status); 2146 if (status == HERMON_CMD_INVALID_STATUS) { 2147 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 2148 } 2149 return (ibc_get_ci_failure(0)); 2150 } 2151 2152 return (DDI_SUCCESS); 2153 } 2154 2155 2156 /* 2157 * hermon_set_addr_path() 2158 * Context: Can be called from interrupt or base context. 2159 * 2160 * Note: This routine is used for two purposes. It is used to fill in the 2161 * Hermon UDAV fields, and it is used to fill in the address path information 2162 * for QPs. Because the two Hermon structures are similar, common fields can 2163 * be filled in here. Because they are different, however, we pass 2164 * an additional flag to indicate which type is being filled and do each one 2165 * uniquely 2166 */ 2167 2168 int hermon_srate_override = -1; /* allows ease of testing */ 2169 2170 int 2171 hermon_set_addr_path(hermon_state_t *state, ibt_adds_vect_t *av, 2172 hermon_hw_addr_path_t *path, uint_t type) 2173 { 2174 uint_t gidtbl_sz; 2175 hermon_hw_udav_t *udav; 2176 2177 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av)) 2178 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path)) 2179 2180 udav = (hermon_hw_udav_t *)(void *)path; 2181 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav)) 2182 path->mlid = av->av_src_path; 2183 path->rlid = av->av_dlid; 2184 2185 switch (av->av_srate) { 2186 case IBT_SRATE_2: /* 1xSDR-2.5Gb/s injection rate */ 2187 path->max_stat_rate = 7; break; 2188 case IBT_SRATE_10: /* 4xSDR-10.0Gb/s injection rate */ 2189 path->max_stat_rate = 8; break; 2190 case IBT_SRATE_30: /* 12xSDR-30Gb/s injection rate */ 2191 path->max_stat_rate = 9; break; 2192 case IBT_SRATE_5: /* 1xDDR-5Gb/s injection rate */ 2193 path->max_stat_rate = 10; break; 2194 case IBT_SRATE_20: /* 4xDDR-20Gb/s injection rate */ 2195 path->max_stat_rate = 11; break; 2196 case IBT_SRATE_40: /* 4xQDR-40Gb/s injection rate */ 2197 path->max_stat_rate = 12; break; 2198 case IBT_SRATE_60: /* 12xDDR-60Gb/s injection rate */ 2199 path->max_stat_rate = 13; break; 2200 case IBT_SRATE_80: /* 8xQDR-80Gb/s injection rate */ 2201 path->max_stat_rate = 14; break; 2202 case IBT_SRATE_120: /* 12xQDR-120Gb/s injection rate */ 2203 path->max_stat_rate = 15; break; 2204 case IBT_SRATE_NOT_SPECIFIED: /* Max */ 2205 path->max_stat_rate = 0; break; 2206 default: 2207 return (IBT_STATIC_RATE_INVALID); 2208 } 2209 if (hermon_srate_override != -1) /* for evaluating HCA firmware */ 2210 path->max_stat_rate = hermon_srate_override; 2211 2212 /* If "grh" flag is set, then check for valid SGID index too */ 2213 gidtbl_sz = (1 << state->hs_queryport.log_max_gid); 2214 if ((av->av_send_grh) && (av->av_sgid_ix > gidtbl_sz)) { 2215 return (IBT_SGID_INVALID); 2216 } 2217 2218 /* 2219 * Fill in all "global" values regardless of the value in the GRH 2220 * flag. Because "grh" is not set unless "av_send_grh" is set, the 2221 * hardware will ignore the other "global" values as necessary. Note: 2222 * SW does this here to enable later query operations to return 2223 * exactly the same params that were passed when the addr path was 2224 * last written. 2225 */ 2226 path->grh = av->av_send_grh; 2227 if (type == HERMON_ADDRPATH_QP) { 2228 path->mgid_index = av->av_sgid_ix; 2229 } else { 2230 /* 2231 * For Hermon UDAV, the "mgid_index" field is the index into 2232 * a combined table (not a per-port table), but having sections 2233 * for each port. So some extra calculations are necessary. 2234 */ 2235 2236 path->mgid_index = ((av->av_port_num - 1) * gidtbl_sz) + 2237 av->av_sgid_ix; 2238 2239 udav->portnum = av->av_port_num; 2240 } 2241 2242 /* 2243 * According to Hermon PRM, the (31:0) part of rgid_l must be set to 2244 * "0x2" if the 'grh' or 'g' bit is cleared. It also says that we 2245 * only need to do it for UDAV's. So we enforce that here. 2246 * 2247 * NOTE: The entire 64 bits worth of GUID info is actually being 2248 * preserved (for UDAVs) by the callers of this function 2249 * (hermon_ah_alloc() and hermon_ah_modify()) and as long as the 2250 * 'grh' bit is not set, the upper 32 bits (63:32) of rgid_l are 2251 * "don't care". 2252 */ 2253 if ((path->grh) || (type == HERMON_ADDRPATH_QP)) { 2254 path->flow_label = av->av_flow; 2255 path->tclass = av->av_tclass; 2256 path->hop_limit = av->av_hop; 2257 bcopy(&(av->av_dgid.gid_prefix), &(path->rgid_h), 2258 sizeof (uint64_t)); 2259 bcopy(&(av->av_dgid.gid_guid), &(path->rgid_l), 2260 sizeof (uint64_t)); 2261 } else { 2262 path->rgid_l = 0x2; 2263 path->flow_label = 0; 2264 path->tclass = 0; 2265 path->hop_limit = 0; 2266 path->rgid_h = 0; 2267 } 2268 /* extract the default service level */ 2269 udav->sl = (HERMON_DEF_SCHED_SELECTION & 0x3C) >> 2; 2270 2271 return (DDI_SUCCESS); 2272 } 2273 2274 2275 /* 2276 * hermon_get_addr_path() 2277 * Context: Can be called from interrupt or base context. 2278 * 2279 * Note: Just like hermon_set_addr_path() above, this routine is used for two 2280 * purposes. It is used to read in the Hermon UDAV fields, and it is used to 2281 * read in the address path information for QPs. Because the two Hermon 2282 * structures are similar, common fields can be read in here. But because 2283 * they are slightly different, we pass an additional flag to indicate which 2284 * type is being read. 2285 */ 2286 void 2287 hermon_get_addr_path(hermon_state_t *state, hermon_hw_addr_path_t *path, 2288 ibt_adds_vect_t *av, uint_t type) 2289 { 2290 uint_t gidtbl_sz; 2291 2292 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path)) 2293 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av)) 2294 2295 av->av_src_path = path->mlid; 2296 av->av_dlid = path->rlid; 2297 2298 /* Set "av_ipd" value from max_stat_rate */ 2299 switch (path->max_stat_rate) { 2300 case 7: /* 1xSDR-2.5Gb/s injection rate */ 2301 av->av_srate = IBT_SRATE_2; break; 2302 case 8: /* 4xSDR-10.0Gb/s injection rate */ 2303 av->av_srate = IBT_SRATE_10; break; 2304 case 9: /* 12xSDR-30Gb/s injection rate */ 2305 av->av_srate = IBT_SRATE_30; break; 2306 case 10: /* 1xDDR-5Gb/s injection rate */ 2307 av->av_srate = IBT_SRATE_5; break; 2308 case 11: /* 4xDDR-20Gb/s injection rate */ 2309 av->av_srate = IBT_SRATE_20; break; 2310 case 12: /* xQDR-40Gb/s injection rate */ 2311 av->av_srate = IBT_SRATE_40; break; 2312 case 13: /* 12xDDR-60Gb/s injection rate */ 2313 av->av_srate = IBT_SRATE_60; break; 2314 case 14: /* 8xQDR-80Gb/s injection rate */ 2315 av->av_srate = IBT_SRATE_80; break; 2316 case 15: /* 12xQDR-120Gb/s injection rate */ 2317 av->av_srate = IBT_SRATE_120; break; 2318 case 0: /* max */ 2319 av->av_srate = IBT_SRATE_NOT_SPECIFIED; break; 2320 default: /* 1x injection rate */ 2321 av->av_srate = IBT_SRATE_1X; 2322 } 2323 2324 /* 2325 * Extract all "global" values regardless of the value in the GRH 2326 * flag. Because "av_send_grh" is set only if "grh" is set, software 2327 * knows to ignore the other "global" values as necessary. Note: SW 2328 * does it this way to enable these query operations to return exactly 2329 * the same params that were passed when the addr path was last written. 2330 */ 2331 av->av_send_grh = path->grh; 2332 if (type == HERMON_ADDRPATH_QP) { 2333 av->av_sgid_ix = path->mgid_index; 2334 } else { 2335 /* 2336 * For Hermon UDAV, the "mgid_index" field is the index into 2337 * a combined table (not a per-port table). 2338 */ 2339 gidtbl_sz = (1 << state->hs_queryport.log_max_gid); 2340 av->av_sgid_ix = path->mgid_index - ((av->av_port_num - 1) * 2341 gidtbl_sz); 2342 2343 av->av_port_num = ((hermon_hw_udav_t *)(void *)path)->portnum; 2344 } 2345 av->av_flow = path->flow_label; 2346 av->av_tclass = path->tclass; 2347 av->av_hop = path->hop_limit; 2348 /* this is for alignment issue w/ the addr path struct in Hermon */ 2349 bcopy(&(path->rgid_h), &(av->av_dgid.gid_prefix), sizeof (uint64_t)); 2350 bcopy(&(path->rgid_l), &(av->av_dgid.gid_guid), sizeof (uint64_t)); 2351 } 2352 2353 2354 /* 2355 * hermon_portnum_is_valid() 2356 * Context: Can be called from interrupt or base context. 2357 */ 2358 int 2359 hermon_portnum_is_valid(hermon_state_t *state, uint_t portnum) 2360 { 2361 uint_t max_port; 2362 2363 max_port = state->hs_cfg_profile->cp_num_ports; 2364 if ((portnum <= max_port) && (portnum != 0)) { 2365 return (1); 2366 } else { 2367 return (0); 2368 } 2369 } 2370 2371 2372 /* 2373 * hermon_pkeyindex_is_valid() 2374 * Context: Can be called from interrupt or base context. 2375 */ 2376 int 2377 hermon_pkeyindex_is_valid(hermon_state_t *state, uint_t pkeyindx) 2378 { 2379 uint_t max_pkeyindx; 2380 2381 max_pkeyindx = 1 << state->hs_cfg_profile->cp_log_max_pkeytbl; 2382 if (pkeyindx < max_pkeyindx) { 2383 return (1); 2384 } else { 2385 return (0); 2386 } 2387 } 2388 2389 2390 /* 2391 * hermon_queue_alloc() 2392 * Context: Can be called from interrupt or base context. 2393 */ 2394 int 2395 hermon_queue_alloc(hermon_state_t *state, hermon_qalloc_info_t *qa_info, 2396 uint_t sleepflag) 2397 { 2398 ddi_dma_attr_t dma_attr; 2399 int (*callback)(caddr_t); 2400 uint64_t realsize, alloc_mask; 2401 int flag, status; 2402 2403 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info)) 2404 2405 /* Set the callback flag appropriately */ 2406 callback = (sleepflag == HERMON_SLEEP) ? DDI_DMA_SLEEP : 2407 DDI_DMA_DONTWAIT; 2408 2409 /* 2410 * Initialize many of the default DMA attributes. Then set additional 2411 * alignment restrictions as necessary for the queue memory. Also 2412 * respect the configured value for IOMMU bypass 2413 */ 2414 hermon_dma_attr_init(state, &dma_attr); 2415 dma_attr.dma_attr_align = qa_info->qa_bind_align; 2416 #ifdef __sparc 2417 if (state->hs_cfg_profile->cp_iommu_bypass == HERMON_BINDMEM_BYPASS) { 2418 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 2419 } 2420 #endif 2421 2422 /* Allocate a DMA handle */ 2423 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, callback, NULL, 2424 &qa_info->qa_dmahdl); 2425 if (status != DDI_SUCCESS) { 2426 return (DDI_FAILURE); 2427 } 2428 2429 /* 2430 * Determine the amount of memory to allocate, depending on the values 2431 * in "qa_bind_align" and "qa_alloc_align". The problem we are trying 2432 * to solve here is that allocating a DMA handle with IOMMU bypass 2433 * (DDI_DMA_FORCE_PHYSICAL) constrains us to only requesting alignments 2434 * that are less restrictive than the page size. Since we may need 2435 * stricter alignments on the memory allocated by ddi_dma_mem_alloc() 2436 * (e.g. in Hermon QP work queue memory allocation), we use the 2437 * following method to calculate how much additional memory to request, 2438 * and we enforce our own alignment on the allocated result. 2439 */ 2440 alloc_mask = qa_info->qa_alloc_align - 1; 2441 if (qa_info->qa_bind_align == qa_info->qa_alloc_align) { 2442 realsize = qa_info->qa_size; 2443 } else { 2444 realsize = qa_info->qa_size + alloc_mask; 2445 } 2446 2447 /* 2448 * If we are to allocate the queue from system memory, then use 2449 * ddi_dma_mem_alloc() to find the space. Otherwise, this is a 2450 * host memory allocation, use ddi_umem_alloc(). In either case, 2451 * return a pointer to the memory range allocated (including any 2452 * necessary alignment adjustments), the "real" memory pointer, 2453 * the "real" size, and a ddi_acc_handle_t to use when reading 2454 * from/writing to the memory. 2455 */ 2456 if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) { 2457 /* Allocate system memory for the queue */ 2458 status = ddi_dma_mem_alloc(qa_info->qa_dmahdl, realsize, 2459 &state->hs_reg_accattr, DDI_DMA_CONSISTENT, callback, NULL, 2460 (caddr_t *)&qa_info->qa_buf_real, 2461 (size_t *)&qa_info->qa_buf_realsz, &qa_info->qa_acchdl); 2462 if (status != DDI_SUCCESS) { 2463 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2464 return (DDI_FAILURE); 2465 } 2466 2467 /* 2468 * Save temporary copy of the real pointer. (This may be 2469 * modified in the last step below). 2470 */ 2471 qa_info->qa_buf_aligned = qa_info->qa_buf_real; 2472 2473 bzero(qa_info->qa_buf_real, qa_info->qa_buf_realsz); 2474 2475 } else { /* HERMON_QUEUE_LOCATION_USERLAND */ 2476 2477 /* Allocate userland mappable memory for the queue */ 2478 flag = (sleepflag == HERMON_SLEEP) ? DDI_UMEM_SLEEP : 2479 DDI_UMEM_NOSLEEP; 2480 qa_info->qa_buf_real = ddi_umem_alloc(realsize, flag, 2481 &qa_info->qa_umemcookie); 2482 if (qa_info->qa_buf_real == NULL) { 2483 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2484 return (DDI_FAILURE); 2485 } 2486 2487 /* 2488 * Save temporary copy of the real pointer. (This may be 2489 * modified in the last step below). 2490 */ 2491 qa_info->qa_buf_aligned = qa_info->qa_buf_real; 2492 2493 } 2494 2495 /* 2496 * The next to last step is to ensure that the final address 2497 * ("qa_buf_aligned") has the appropriate "alloc" alignment 2498 * restriction applied to it (if necessary). 2499 */ 2500 if (qa_info->qa_bind_align != qa_info->qa_alloc_align) { 2501 qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t) 2502 qa_info->qa_buf_aligned + alloc_mask) & ~alloc_mask); 2503 } 2504 /* 2505 * The last step is to figure out the offset of the start relative 2506 * to the first page of the region - will be used in the eqc/cqc 2507 * passed to the HW 2508 */ 2509 qa_info->qa_pgoffs = (uint_t)((uintptr_t) 2510 qa_info->qa_buf_aligned & HERMON_PAGEOFFSET); 2511 2512 return (DDI_SUCCESS); 2513 } 2514 2515 2516 /* 2517 * hermon_queue_free() 2518 * Context: Can be called from interrupt or base context. 2519 */ 2520 void 2521 hermon_queue_free(hermon_qalloc_info_t *qa_info) 2522 { 2523 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info)) 2524 2525 /* 2526 * Depending on how (i.e. from where) we allocated the memory for 2527 * this queue, we choose the appropriate method for releasing the 2528 * resources. 2529 */ 2530 if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) { 2531 2532 ddi_dma_mem_free(&qa_info->qa_acchdl); 2533 2534 } else if (qa_info->qa_location == HERMON_QUEUE_LOCATION_USERLAND) { 2535 2536 ddi_umem_free(qa_info->qa_umemcookie); 2537 2538 } 2539 2540 /* Always free the dma handle */ 2541 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2542 } 2543 2544 /* 2545 * hermon_create_fmr_pool() 2546 * Create a pool of FMRs. 2547 * Context: Can be called from kernel context only. 2548 */ 2549 int 2550 hermon_create_fmr_pool(hermon_state_t *state, hermon_pdhdl_t pd, 2551 ibt_fmr_pool_attr_t *fmr_attr, hermon_fmrhdl_t *fmrpoolp) 2552 { 2553 hermon_fmrhdl_t fmrpool; 2554 hermon_fmr_list_t *fmr, *fmr_next; 2555 hermon_mrhdl_t mr; 2556 int status; 2557 int sleep; 2558 int i; 2559 2560 sleep = (fmr_attr->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP : 2561 HERMON_NOSLEEP; 2562 if ((sleep == HERMON_SLEEP) && 2563 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 2564 return (IBT_INVALID_PARAM); 2565 } 2566 2567 fmrpool = (hermon_fmrhdl_t)kmem_zalloc(sizeof (*fmrpool), sleep); 2568 if (fmrpool == NULL) { 2569 status = IBT_INSUFF_RESOURCE; 2570 goto fail; 2571 } 2572 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmrpool)) 2573 2574 mutex_init(&fmrpool->fmr_lock, NULL, MUTEX_DRIVER, 2575 DDI_INTR_PRI(state->hs_intrmsi_pri)); 2576 mutex_init(&fmrpool->remap_lock, NULL, MUTEX_DRIVER, 2577 DDI_INTR_PRI(state->hs_intrmsi_pri)); 2578 mutex_init(&fmrpool->dirty_lock, NULL, MUTEX_DRIVER, 2579 DDI_INTR_PRI(state->hs_intrmsi_pri)); 2580 2581 fmrpool->fmr_state = state; 2582 fmrpool->fmr_flush_function = fmr_attr->fmr_func_hdlr; 2583 fmrpool->fmr_flush_arg = fmr_attr->fmr_func_arg; 2584 fmrpool->fmr_pool_size = 0; 2585 fmrpool->fmr_max_pages = fmr_attr->fmr_max_pages_per_fmr; 2586 fmrpool->fmr_page_sz = fmr_attr->fmr_page_sz; 2587 fmrpool->fmr_dirty_watermark = fmr_attr->fmr_pool_size / 4; 2588 fmrpool->fmr_dirty_len = 0; 2589 fmrpool->fmr_remap_watermark = fmr_attr->fmr_pool_size / 32; 2590 fmrpool->fmr_remap_len = 0; 2591 fmrpool->fmr_flags = fmr_attr->fmr_flags; 2592 fmrpool->fmr_stat_register = 0; 2593 fmrpool->fmr_max_remaps = state->hs_cfg_profile->cp_fmr_max_remaps; 2594 fmrpool->fmr_remap_gen = 1; 2595 2596 fmrpool->fmr_free_list_tail = &fmrpool->fmr_free_list; 2597 fmrpool->fmr_dirty_list = NULL; 2598 fmrpool->fmr_dirty_list_tail = &fmrpool->fmr_dirty_list; 2599 fmrpool->fmr_remap_list = NULL; 2600 fmrpool->fmr_remap_list_tail = &fmrpool->fmr_remap_list; 2601 fmrpool->fmr_pool_size = fmrpool->fmr_free_len = 2602 fmr_attr->fmr_pool_size; 2603 2604 for (i = 0; i < fmr_attr->fmr_pool_size; i++) { 2605 status = hermon_mr_alloc_fmr(state, pd, fmrpool, &mr); 2606 if (status != DDI_SUCCESS) { 2607 goto fail2; 2608 } 2609 2610 fmr = (hermon_fmr_list_t *)kmem_zalloc( 2611 sizeof (hermon_fmr_list_t), sleep); 2612 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr)) 2613 2614 fmr->fmr = mr; 2615 fmr->fmr_remaps = 0; 2616 fmr->fmr_remap_gen = fmrpool->fmr_remap_gen; 2617 fmr->fmr_pool = fmrpool; 2618 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 2619 mr->mr_fmr = fmr; 2620 2621 if (!i) /* address of last entry's link */ 2622 fmrpool->fmr_free_list_tail = &fmr->fmr_next; 2623 fmr->fmr_next = fmrpool->fmr_free_list; 2624 fmrpool->fmr_free_list = fmr; 2625 } 2626 2627 /* Set to return pool */ 2628 *fmrpoolp = fmrpool; 2629 2630 IBTF_DPRINTF_L2("fmr", "create_fmr_pool SUCCESS"); 2631 return (IBT_SUCCESS); 2632 fail2: 2633 for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) { 2634 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr)) 2635 fmr_next = fmr->fmr_next; 2636 (void) hermon_mr_dealloc_fmr(state, &fmr->fmr); 2637 kmem_free(fmr, sizeof (hermon_fmr_list_t)); 2638 } 2639 kmem_free(fmrpool, sizeof (*fmrpool)); 2640 fail: 2641 *fmrpoolp = NULL; 2642 IBTF_DPRINTF_L2("fmr", "create_fmr_pool FAILED"); 2643 if (status == DDI_FAILURE) { 2644 return (ibc_get_ci_failure(0)); 2645 } else { 2646 return (status); 2647 } 2648 } 2649 2650 /* 2651 * hermon_destroy_fmr_pool() 2652 * Destroy an FMR pool and free all associated resources. 2653 * Context: Can be called from kernel context only. 2654 */ 2655 int 2656 hermon_destroy_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool) 2657 { 2658 hermon_fmr_list_t *fmr, *fmr_next; 2659 2660 mutex_enter(&fmrpool->fmr_lock); 2661 hermon_fmr_cleanup(fmrpool); 2662 2663 for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) { 2664 fmr_next = fmr->fmr_next; 2665 2666 (void) hermon_mr_dealloc_fmr(state, &fmr->fmr); 2667 kmem_free(fmr, sizeof (hermon_fmr_list_t)); 2668 2669 --fmrpool->fmr_pool_size; 2670 } 2671 ASSERT(fmrpool->fmr_pool_size == 0); 2672 mutex_exit(&fmrpool->fmr_lock); 2673 2674 mutex_destroy(&fmrpool->fmr_lock); 2675 mutex_destroy(&fmrpool->dirty_lock); 2676 mutex_destroy(&fmrpool->remap_lock); 2677 2678 kmem_free(fmrpool, sizeof (*fmrpool)); 2679 IBTF_DPRINTF_L2("fmr", "destroy_fmr_pool SUCCESS"); 2680 return (DDI_SUCCESS); 2681 } 2682 2683 /* 2684 * hermon_flush_fmr_pool() 2685 * Ensure that all unmapped FMRs are fully invalidated. 2686 * Context: Can be called from kernel context only. 2687 */ 2688 /* ARGSUSED */ 2689 int 2690 hermon_flush_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool) 2691 { 2692 /* 2693 * Force the unmapping of all entries on the dirty list, regardless of 2694 * whether the watermark has been hit yet. 2695 */ 2696 /* grab the pool lock */ 2697 mutex_enter(&fmrpool->fmr_lock); 2698 hermon_fmr_cleanup(fmrpool); 2699 mutex_exit(&fmrpool->fmr_lock); 2700 return (DDI_SUCCESS); 2701 } 2702 2703 /* 2704 * hermon_register_physical_fmr() 2705 * Map memory into FMR 2706 * Context: Can be called from interrupt or base context. 2707 */ 2708 int 2709 hermon_register_physical_fmr(hermon_state_t *state, hermon_fmrhdl_t fmrpool, 2710 ibt_pmr_attr_t *mem_pattr, hermon_mrhdl_t *mr, 2711 ibt_pmr_desc_t *mem_desc_p) 2712 { 2713 hermon_fmr_list_t *fmr; 2714 int status; 2715 2716 /* Check length */ 2717 if (mem_pattr->pmr_len < 1 || (mem_pattr->pmr_num_buf > 2718 fmrpool->fmr_max_pages)) { 2719 return (IBT_MR_LEN_INVALID); 2720 } 2721 2722 mutex_enter(&fmrpool->fmr_lock); 2723 if (fmrpool->fmr_free_list == NULL) { 2724 if (hermon_fmr_verbose & 2) 2725 IBTF_DPRINTF_L2("fmr", "register needs remap"); 2726 mutex_enter(&fmrpool->remap_lock); 2727 if (fmrpool->fmr_remap_list) { 2728 /* add to free list */ 2729 *(fmrpool->fmr_free_list_tail) = 2730 fmrpool->fmr_remap_list; 2731 fmrpool->fmr_remap_list = NULL; 2732 fmrpool->fmr_free_list_tail = 2733 fmrpool->fmr_remap_list_tail; 2734 2735 /* reset list */ 2736 fmrpool->fmr_remap_list_tail = &fmrpool->fmr_remap_list; 2737 fmrpool->fmr_free_len += fmrpool->fmr_remap_len; 2738 fmrpool->fmr_remap_len = 0; 2739 } 2740 mutex_exit(&fmrpool->remap_lock); 2741 } 2742 if (fmrpool->fmr_free_list == NULL) { 2743 if (hermon_fmr_verbose & 2) 2744 IBTF_DPRINTF_L2("fmr", "register needs cleanup"); 2745 hermon_fmr_cleanup(fmrpool); 2746 } 2747 2748 /* grab next free entry */ 2749 fmr = fmrpool->fmr_free_list; 2750 if (fmr == NULL) { 2751 IBTF_DPRINTF_L2("fmr", "WARNING: no free fmr resource"); 2752 cmn_err(CE_CONT, "no free fmr resource\n"); 2753 mutex_exit(&fmrpool->fmr_lock); 2754 return (IBT_INSUFF_RESOURCE); 2755 } 2756 2757 if ((fmrpool->fmr_free_list = fmr->fmr_next) == NULL) 2758 fmrpool->fmr_free_list_tail = &fmrpool->fmr_free_list; 2759 fmr->fmr_next = NULL; 2760 fmrpool->fmr_stat_register++; 2761 mutex_exit(&fmrpool->fmr_lock); 2762 2763 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr)) 2764 status = hermon_mr_register_physical_fmr(state, mem_pattr, fmr->fmr, 2765 mem_desc_p); 2766 if (status != DDI_SUCCESS) { 2767 return (status); 2768 } 2769 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr->fmr)) 2770 if (hermon_rdma_debug & 0x4) 2771 IBTF_DPRINTF_L2("fmr", " reg: mr %p key %x", 2772 fmr->fmr, fmr->fmr->mr_rkey); 2773 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*fmr->fmr)) 2774 if (fmr->fmr_remap_gen != fmrpool->fmr_remap_gen) { 2775 fmr->fmr_remap_gen = fmrpool->fmr_remap_gen; 2776 fmr->fmr_remaps = 0; 2777 } 2778 2779 fmr->fmr_remaps++; 2780 2781 *mr = (hermon_mrhdl_t)fmr->fmr; 2782 2783 return (DDI_SUCCESS); 2784 } 2785 2786 /* 2787 * hermon_deregister_fmr() 2788 * Unmap FMR 2789 * Context: Can be called from kernel context only. 2790 */ 2791 int 2792 hermon_deregister_fmr(hermon_state_t *state, hermon_mrhdl_t mr) 2793 { 2794 hermon_fmrhdl_t fmrpool; 2795 hermon_fmr_list_t *fmr, **fmrlast; 2796 int len; 2797 2798 fmr = mr->mr_fmr; 2799 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr)) 2800 fmrpool = fmr->fmr_pool; 2801 2802 /* mark as owned by software */ 2803 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr->fmr))) 2804 *(uint8_t *)(fmr->fmr->mr_mptrsrcp->hr_addr) = 0xF0; 2805 2806 if (fmr->fmr_remaps < 2807 state->hs_cfg_profile->cp_fmr_max_remaps) { 2808 /* add to remap list */ 2809 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr->fmr))) 2810 if (hermon_rdma_debug & 0x4) 2811 IBTF_DPRINTF_L2("fmr", "dereg: mr %p key %x", 2812 fmr->fmr, fmr->fmr->mr_rkey); 2813 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*(fmr->fmr))) 2814 mutex_enter(&fmrpool->remap_lock); 2815 fmr->fmr_next = NULL; 2816 *(fmrpool->fmr_remap_list_tail) = fmr; 2817 fmrpool->fmr_remap_list_tail = &fmr->fmr_next; 2818 fmrpool->fmr_remap_len++; 2819 2820 /* conditionally add remap list back to free list */ 2821 fmrlast = NULL; 2822 if (fmrpool->fmr_remap_len >= 2823 fmrpool->fmr_remap_watermark) { 2824 fmr = fmrpool->fmr_remap_list; 2825 fmrlast = fmrpool->fmr_remap_list_tail; 2826 len = fmrpool->fmr_remap_len; 2827 fmrpool->fmr_remap_len = 0; 2828 fmrpool->fmr_remap_list = NULL; 2829 fmrpool->fmr_remap_list_tail = 2830 &fmrpool->fmr_remap_list; 2831 } 2832 mutex_exit(&fmrpool->remap_lock); 2833 if (fmrlast) { 2834 mutex_enter(&fmrpool->fmr_lock); 2835 *(fmrpool->fmr_free_list_tail) = fmr; 2836 fmrpool->fmr_free_list_tail = fmrlast; 2837 fmrpool->fmr_free_len += len; 2838 mutex_exit(&fmrpool->fmr_lock); 2839 } 2840 } else { 2841 /* add to dirty list */ 2842 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr->fmr))) 2843 if (hermon_rdma_debug & 0x4) 2844 IBTF_DPRINTF_L2("fmr", "dirty: mr %p key %x", 2845 fmr->fmr, fmr->fmr->mr_rkey); 2846 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*(fmr->fmr))) 2847 2848 mutex_enter(&fmrpool->dirty_lock); 2849 fmr->fmr_next = NULL; 2850 *(fmrpool->fmr_dirty_list_tail) = fmr; 2851 fmrpool->fmr_dirty_list_tail = &fmr->fmr_next; 2852 fmrpool->fmr_dirty_len++; 2853 2854 if (fmrpool->fmr_dirty_len >= 2855 fmrpool->fmr_dirty_watermark) { 2856 mutex_exit(&fmrpool->dirty_lock); 2857 mutex_enter(&fmrpool->fmr_lock); 2858 hermon_fmr_cleanup(fmrpool); 2859 mutex_exit(&fmrpool->fmr_lock); 2860 } else 2861 mutex_exit(&fmrpool->dirty_lock); 2862 } 2863 return (DDI_SUCCESS); 2864 } 2865 2866 /* 2867 * hermon_fmr_cleanup() 2868 * Context: Called from any context. 2869 */ 2870 static void 2871 hermon_fmr_cleanup(hermon_fmrhdl_t fmrpool) 2872 { 2873 int status; 2874 2875 ASSERT(MUTEX_HELD(&fmrpool->fmr_lock)); 2876 2877 if (fmrpool->fmr_stat_register == 0) 2878 return; 2879 2880 fmrpool->fmr_stat_register = 0; 2881 membar_producer(); 2882 2883 if (hermon_fmr_verbose) 2884 IBTF_DPRINTF_L2("fmr", "TPT_SYNC"); 2885 status = hermon_sync_tpt_cmd_post(fmrpool->fmr_state, 2886 HERMON_CMD_NOSLEEP_SPIN); 2887 if (status != HERMON_CMD_SUCCESS) { 2888 cmn_err(CE_WARN, "fmr SYNC_TPT failed(%x)\n", status); 2889 } 2890 fmrpool->fmr_remap_gen++; 2891 2892 /* add everything back to the free list */ 2893 mutex_enter(&fmrpool->dirty_lock); 2894 if (fmrpool->fmr_dirty_list) { 2895 /* add to free list */ 2896 *(fmrpool->fmr_free_list_tail) = fmrpool->fmr_dirty_list; 2897 fmrpool->fmr_dirty_list = NULL; 2898 fmrpool->fmr_free_list_tail = fmrpool->fmr_dirty_list_tail; 2899 2900 /* reset list */ 2901 fmrpool->fmr_dirty_list_tail = &fmrpool->fmr_dirty_list; 2902 fmrpool->fmr_free_len += fmrpool->fmr_dirty_len; 2903 fmrpool->fmr_dirty_len = 0; 2904 } 2905 mutex_exit(&fmrpool->dirty_lock); 2906 2907 mutex_enter(&fmrpool->remap_lock); 2908 if (fmrpool->fmr_remap_list) { 2909 /* add to free list */ 2910 *(fmrpool->fmr_free_list_tail) = fmrpool->fmr_remap_list; 2911 fmrpool->fmr_remap_list = NULL; 2912 fmrpool->fmr_free_list_tail = fmrpool->fmr_remap_list_tail; 2913 2914 /* reset list */ 2915 fmrpool->fmr_remap_list_tail = &fmrpool->fmr_remap_list; 2916 fmrpool->fmr_free_len += fmrpool->fmr_remap_len; 2917 fmrpool->fmr_remap_len = 0; 2918 } 2919 mutex_exit(&fmrpool->remap_lock); 2920 2921 if (fmrpool->fmr_flush_function != NULL) { 2922 (void) fmrpool->fmr_flush_function( 2923 (ibc_fmr_pool_hdl_t)fmrpool, 2924 fmrpool->fmr_flush_arg); 2925 } 2926 } 2927