1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * hermon_misc.c 29 * Hermon Miscellaneous routines - Address Handle, Multicast, Protection 30 * Domain, and port-related operations 31 * 32 * Implements all the routines necessary for allocating, freeing, querying 33 * and modifying Address Handles and Protection Domains. Also implements 34 * all the routines necessary for adding and removing Queue Pairs to/from 35 * Multicast Groups. Lastly, it implements the routines necessary for 36 * port-related query and modify operations. 37 */ 38 39 #include <sys/types.h> 40 #include <sys/conf.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/modctl.h> 44 #include <sys/bitmap.h> 45 #include <sys/sysmacros.h> 46 47 #include <sys/ib/adapters/hermon/hermon.h> 48 49 extern uint32_t hermon_kernel_data_ro; 50 extern int hermon_rdma_debug; 51 52 /* used for helping uniquify fmr pool taskq name */ 53 static uint_t hermon_debug_fmrpool_cnt = 0x00000000; 54 55 static int hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg, 56 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp, uint_t *qp_found); 57 static int hermon_mcg_qplist_remove(hermon_mcghdl_t mcg, 58 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp); 59 static void hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp); 60 static void hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp); 61 static uint_t hermon_mcg_walk_mgid_hash(hermon_state_t *state, 62 uint64_t start_indx, ib_gid_t mgid, uint_t *prev_indx); 63 static void hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg, 64 hermon_hw_mcg_t *mcg_hdr, ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc); 65 static int hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx, 66 uint_t prev_indx, hermon_hw_mcg_t *mcg_entry); 67 static int hermon_mcg_entry_invalidate(hermon_state_t *state, 68 hermon_hw_mcg_t *mcg_entry, uint_t indx); 69 static int hermon_mgid_is_valid(ib_gid_t gid); 70 static int hermon_mlid_is_valid(ib_lid_t lid); 71 static void hermon_fmr_processing(void *fmr_args); 72 static int hermon_fmr_cleanup(hermon_state_t *state, hermon_fmrhdl_t pool); 73 static void hermon_fmr_cache_init(hermon_fmrhdl_t fmr); 74 static void hermon_fmr_cache_fini(hermon_fmrhdl_t fmr); 75 static int hermon_fmr_avl_compare(const void *q, const void *e); 76 77 78 #define HERMON_MAX_DBR_PAGES_PER_USER 64 79 #define HERMON_DBR_KEY(index, page) \ 80 (((uint64_t)index) * HERMON_MAX_DBR_PAGES_PER_USER + (page)) 81 82 static hermon_udbr_page_t * 83 hermon_dbr_new_user_page(hermon_state_t *state, uint_t index, 84 uint_t page) 85 { 86 hermon_udbr_page_t *pagep; 87 ddi_dma_attr_t dma_attr; 88 uint_t cookiecnt; 89 int status; 90 hermon_umap_db_entry_t *umapdb; 91 92 pagep = kmem_alloc(sizeof (*pagep), KM_SLEEP); 93 pagep->upg_index = page; 94 pagep->upg_nfree = PAGESIZE / sizeof (hermon_dbr_t); 95 96 /* Allocate 1 bit per dbr for free/alloc management (0 => "free") */ 97 pagep->upg_free = kmem_zalloc(PAGESIZE / sizeof (hermon_dbr_t) / 8, 98 KM_SLEEP); 99 pagep->upg_kvaddr = ddi_umem_alloc(PAGESIZE, DDI_UMEM_SLEEP, 100 &pagep->upg_umemcookie); /* not HERMON_PAGESIZE here */ 101 102 pagep->upg_buf = ddi_umem_iosetup(pagep->upg_umemcookie, 0, 103 PAGESIZE, B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP); 104 105 hermon_dma_attr_init(state, &dma_attr); 106 #ifdef __sparc 107 if (state->hs_cfg_profile->cp_iommu_bypass == HERMON_BINDMEM_BYPASS) 108 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 109 #endif 110 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 111 DDI_DMA_SLEEP, NULL, &pagep->upg_dmahdl); 112 if (status != DDI_SUCCESS) { 113 IBTF_DPRINTF_L2("hermon", "hermon_new_user_page: " 114 "ddi_dma_buf_bind_handle failed: %d", status); 115 return (NULL); 116 } 117 status = ddi_dma_buf_bind_handle(pagep->upg_dmahdl, 118 pagep->upg_buf, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 119 DDI_DMA_SLEEP, NULL, &pagep->upg_dmacookie, &cookiecnt); 120 if (status != DDI_SUCCESS) { 121 IBTF_DPRINTF_L2("hermon", "hermon_dbr_new_user_page: " 122 "ddi_dma_buf_bind_handle failed: %d", status); 123 ddi_dma_free_handle(&pagep->upg_dmahdl); 124 return (NULL); 125 } 126 ASSERT(cookiecnt == 1); 127 128 /* create db entry for mmap */ 129 umapdb = hermon_umap_db_alloc(state->hs_instance, 130 HERMON_DBR_KEY(index, page), MLNX_UMAP_DBRMEM_RSRC, 131 (uint64_t)(uintptr_t)pagep); 132 hermon_umap_db_add(umapdb); 133 return (pagep); 134 } 135 136 137 /*ARGSUSED*/ 138 static int 139 hermon_user_dbr_alloc(hermon_state_t *state, uint_t index, 140 ddi_acc_handle_t *acchdl, hermon_dbr_t **vdbr, uint64_t *pdbr, 141 uint64_t *mapoffset) 142 { 143 hermon_user_dbr_t *udbr; 144 hermon_udbr_page_t *pagep; 145 uint_t next_page; 146 int dbr_index; 147 int i1, i2, i3, last; 148 uint64_t u64, mask; 149 150 mutex_enter(&state->hs_dbr_lock); 151 for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link) 152 if (udbr->udbr_index == index) 153 break; 154 if (udbr == NULL) { 155 udbr = kmem_alloc(sizeof (*udbr), KM_SLEEP); 156 udbr->udbr_link = state->hs_user_dbr; 157 state->hs_user_dbr = udbr; 158 udbr->udbr_index = index; 159 udbr->udbr_pagep = NULL; 160 } 161 pagep = udbr->udbr_pagep; 162 next_page = (pagep == NULL) ? 0 : (pagep->upg_index + 1); 163 while (pagep != NULL) 164 if (pagep->upg_nfree > 0) 165 break; 166 else 167 pagep = pagep->upg_link; 168 if (pagep == NULL) { 169 pagep = hermon_dbr_new_user_page(state, index, next_page); 170 if (pagep == NULL) { 171 mutex_exit(&state->hs_dbr_lock); 172 return (DDI_FAILURE); 173 } 174 pagep->upg_link = udbr->udbr_pagep; 175 udbr->udbr_pagep = pagep; 176 } 177 178 /* Since nfree > 0, we're assured the loops below will succeed */ 179 180 /* First, find a 64-bit (not ~0) that has a free dbr */ 181 last = PAGESIZE / sizeof (uint64_t) / 64; 182 mask = ~0ull; 183 for (i1 = 0; i1 < last; i1++) 184 if ((pagep->upg_free[i1] & mask) != mask) 185 break; 186 u64 = pagep->upg_free[i1]; 187 188 /* Second, find a byte (not 0xff) that has a free dbr */ 189 last = sizeof (uint64_t) / sizeof (uint8_t); 190 for (i2 = 0, mask = 0xff; i2 < last; i2++, mask <<= 8) 191 if ((u64 & mask) != mask) 192 break; 193 194 /* Third, find a bit that is free (0) */ 195 for (i3 = 0; i3 < sizeof (uint64_t) / sizeof (uint8_t); i3++) 196 if ((u64 & (1ul << (i3 + 8 * i2))) == 0) 197 break; 198 199 /* Mark it as allocated */ 200 pagep->upg_free[i1] |= (1ul << (i3 + 8 * i2)); 201 202 dbr_index = ((i1 * sizeof (uint64_t)) + i2) * sizeof (uint64_t) + i3; 203 pagep->upg_nfree--; 204 ((uint64_t *)(void *)pagep->upg_kvaddr)[dbr_index] = 0; /* clear dbr */ 205 *mapoffset = ((HERMON_DBR_KEY(index, pagep->upg_index) << 206 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_DBRMEM_RSRC) << PAGESHIFT; 207 *vdbr = (hermon_dbr_t *)((uint64_t *)(void *)pagep->upg_kvaddr + 208 dbr_index); 209 *pdbr = pagep->upg_dmacookie.dmac_laddress + dbr_index * 210 sizeof (uint64_t); 211 212 mutex_exit(&state->hs_dbr_lock); 213 return (DDI_SUCCESS); 214 } 215 216 static void 217 hermon_user_dbr_free(hermon_state_t *state, uint_t index, hermon_dbr_t *record) 218 { 219 hermon_user_dbr_t *udbr; 220 hermon_udbr_page_t *pagep; 221 caddr_t kvaddr; 222 uint_t dbr_index; 223 uint_t max_free = PAGESIZE / sizeof (hermon_dbr_t); 224 int i1, i2; 225 226 dbr_index = (uintptr_t)record & PAGEOFFSET; /* offset (not yet index) */ 227 kvaddr = (caddr_t)record - dbr_index; 228 dbr_index /= sizeof (hermon_dbr_t); /* now it's the index */ 229 230 mutex_enter(&state->hs_dbr_lock); 231 for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link) 232 if (udbr->udbr_index == index) 233 break; 234 if (udbr == NULL) { 235 IBTF_DPRINTF_L2("hermon", "free user dbr: udbr struct not " 236 "found for index %x", index); 237 mutex_exit(&state->hs_dbr_lock); 238 return; 239 } 240 for (pagep = udbr->udbr_pagep; pagep != NULL; pagep = pagep->upg_link) 241 if (pagep->upg_kvaddr == kvaddr) 242 break; 243 if (pagep == NULL) { 244 IBTF_DPRINTF_L2("hermon", "free user dbr: pagep struct not" 245 " found for index %x, kvaddr %p, DBR index %x", 246 index, kvaddr, dbr_index); 247 mutex_exit(&state->hs_dbr_lock); 248 return; 249 } 250 if (pagep->upg_nfree >= max_free) { 251 IBTF_DPRINTF_L2("hermon", "free user dbr: overflow: " 252 "UCE index %x, DBR index %x", index, dbr_index); 253 mutex_exit(&state->hs_dbr_lock); 254 return; 255 } 256 ASSERT(dbr_index < max_free); 257 i1 = dbr_index / 64; 258 i2 = dbr_index % 64; 259 ASSERT((pagep->upg_free[i1] & (1ul << i2)) == (1ul << i2)); 260 pagep->upg_free[i1] &= ~(1ul << i2); 261 pagep->upg_nfree++; 262 mutex_exit(&state->hs_dbr_lock); 263 } 264 265 /* 266 * hermon_dbr_page_alloc() 267 * first page allocation - called from attach or open 268 * in this case, we want exactly one page per call, and aligned on a 269 * page - and may need to be mapped to the user for access 270 */ 271 int 272 hermon_dbr_page_alloc(hermon_state_t *state, hermon_dbr_info_t **dinfo) 273 { 274 int status; 275 ddi_dma_handle_t dma_hdl; 276 ddi_acc_handle_t acc_hdl; 277 ddi_dma_attr_t dma_attr; 278 ddi_dma_cookie_t cookie; 279 uint_t cookie_cnt; 280 int i; 281 hermon_dbr_info_t *info; 282 caddr_t dmaaddr; 283 uint64_t dmalen; 284 285 info = kmem_zalloc(sizeof (hermon_dbr_info_t), KM_SLEEP); 286 287 /* 288 * Initialize many of the default DMA attributes. Then set additional 289 * alignment restrictions if necessary for the dbr memory, meaning 290 * page aligned. Also use the configured value for IOMMU bypass 291 */ 292 hermon_dma_attr_init(state, &dma_attr); 293 dma_attr.dma_attr_align = PAGESIZE; 294 dma_attr.dma_attr_sgllen = 1; /* make sure only one cookie */ 295 #ifdef __sparc 296 if (state->hs_cfg_profile->cp_iommu_bypass == HERMON_BINDMEM_BYPASS) 297 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 298 #endif 299 300 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 301 DDI_DMA_SLEEP, NULL, &dma_hdl); 302 if (status != DDI_SUCCESS) { 303 kmem_free((void *)info, sizeof (hermon_dbr_info_t)); 304 cmn_err(CE_NOTE, "dbr DMA handle alloc failed\n"); 305 return (DDI_FAILURE); 306 } 307 308 status = ddi_dma_mem_alloc(dma_hdl, PAGESIZE, 309 &state->hs_reg_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, 310 NULL, &dmaaddr, (size_t *)&dmalen, &acc_hdl); 311 if (status != DDI_SUCCESS) { 312 ddi_dma_free_handle(&dma_hdl); 313 cmn_err(CE_CONT, "dbr DMA mem alloc failed(status %d)", status); 314 kmem_free((void *)info, sizeof (hermon_dbr_info_t)); 315 return (DDI_FAILURE); 316 } 317 318 /* this memory won't be IB registered, so do the bind here */ 319 status = ddi_dma_addr_bind_handle(dma_hdl, NULL, 320 dmaaddr, (size_t)dmalen, DDI_DMA_RDWR | 321 DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &cookie, &cookie_cnt); 322 if (status != DDI_SUCCESS) { 323 ddi_dma_mem_free(&acc_hdl); 324 ddi_dma_free_handle(&dma_hdl); 325 kmem_free((void *)info, sizeof (hermon_dbr_info_t)); 326 cmn_err(CE_CONT, "dbr DMA bind handle failed (status %d)", 327 status); 328 return (DDI_FAILURE); 329 } 330 *dinfo = info; /* Pass back the pointer */ 331 332 /* init the info structure with returned info */ 333 info->dbr_dmahdl = dma_hdl; 334 info->dbr_acchdl = acc_hdl; 335 info->dbr_page = (hermon_dbr_t *)(void *)dmaaddr; 336 info->dbr_link = NULL; 337 /* extract the phys addr from the cookie */ 338 info->dbr_paddr = cookie.dmac_laddress; 339 info->dbr_firstfree = 0; 340 info->dbr_nfree = HERMON_NUM_DBR_PER_PAGE; 341 /* link all DBrs onto the free list */ 342 for (i = 0; i < HERMON_NUM_DBR_PER_PAGE; i++) { 343 info->dbr_page[i] = i + 1; 344 } 345 346 return (DDI_SUCCESS); 347 } 348 349 350 /* 351 * hermon_dbr_alloc() 352 * DBr record allocation - called from alloc cq/qp/srq 353 * will check for available dbrs in current 354 * page - if needed it will allocate another and link them 355 */ 356 357 int 358 hermon_dbr_alloc(hermon_state_t *state, uint_t index, ddi_acc_handle_t *acchdl, 359 hermon_dbr_t **vdbr, uint64_t *pdbr, uint64_t *mapoffset) 360 { 361 hermon_dbr_t *record = NULL; 362 hermon_dbr_info_t *info = NULL; 363 uint32_t idx; 364 int status; 365 366 if (index != state->hs_kernel_uar_index) 367 return (hermon_user_dbr_alloc(state, index, acchdl, vdbr, pdbr, 368 mapoffset)); 369 370 mutex_enter(&state->hs_dbr_lock); 371 for (info = state->hs_kern_dbr; info != NULL; info = info->dbr_link) 372 if (info->dbr_nfree != 0) 373 break; /* found a page w/ one available */ 374 375 if (info == NULL) { /* did NOT find a page with one available */ 376 status = hermon_dbr_page_alloc(state, &info); 377 if (status != DDI_SUCCESS) { 378 /* do error handling */ 379 mutex_exit(&state->hs_dbr_lock); 380 return (DDI_FAILURE); 381 } 382 /* got a new page, so link it in. */ 383 info->dbr_link = state->hs_kern_dbr; 384 state->hs_kern_dbr = info; 385 } 386 idx = info->dbr_firstfree; 387 record = info->dbr_page + idx; 388 info->dbr_firstfree = *record; 389 info->dbr_nfree--; 390 *record = 0; 391 392 *acchdl = info->dbr_acchdl; 393 *vdbr = record; 394 *pdbr = info->dbr_paddr + idx * sizeof (hermon_dbr_t); 395 mutex_exit(&state->hs_dbr_lock); 396 return (DDI_SUCCESS); 397 } 398 399 /* 400 * hermon_dbr_free() 401 * DBr record deallocation - called from free cq/qp 402 * will update the counter in the header, and invalidate 403 * the dbr, but will NEVER free pages of dbrs - small 404 * price to pay, but userland access never will anyway 405 */ 406 void 407 hermon_dbr_free(hermon_state_t *state, uint_t indx, hermon_dbr_t *record) 408 { 409 hermon_dbr_t *page; 410 hermon_dbr_info_t *info; 411 412 if (indx != state->hs_kernel_uar_index) { 413 hermon_user_dbr_free(state, indx, record); 414 return; 415 } 416 page = (hermon_dbr_t *)(uintptr_t)((uintptr_t)record & PAGEMASK); 417 mutex_enter(&state->hs_dbr_lock); 418 for (info = state->hs_kern_dbr; info != NULL; info = info->dbr_link) 419 if (info->dbr_page == page) 420 break; 421 ASSERT(info != NULL); 422 *record = info->dbr_firstfree; 423 info->dbr_firstfree = record - info->dbr_page; 424 info->dbr_nfree++; 425 mutex_exit(&state->hs_dbr_lock); 426 } 427 428 /* 429 * hermon_dbr_kern_free() 430 * Context: Can be called only from detach context. 431 * 432 * Free all kernel dbr pages. This includes the freeing of all the dma 433 * resources acquired during the allocation of the pages. 434 * 435 * Also, free all the user dbr pages. 436 */ 437 void 438 hermon_dbr_kern_free(hermon_state_t *state) 439 { 440 hermon_dbr_info_t *info, *link; 441 hermon_user_dbr_t *udbr, *next; 442 hermon_udbr_page_t *pagep, *nextp; 443 hermon_umap_db_entry_t *umapdb; 444 int instance, status; 445 uint64_t value; 446 extern hermon_umap_db_t hermon_userland_rsrc_db; 447 448 mutex_enter(&state->hs_dbr_lock); 449 for (info = state->hs_kern_dbr; info != NULL; info = link) { 450 (void) ddi_dma_unbind_handle(info->dbr_dmahdl); 451 ddi_dma_mem_free(&info->dbr_acchdl); /* free page */ 452 ddi_dma_free_handle(&info->dbr_dmahdl); 453 link = info->dbr_link; 454 kmem_free(info, sizeof (hermon_dbr_info_t)); 455 } 456 457 udbr = state->hs_user_dbr; 458 instance = state->hs_instance; 459 mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock); 460 while (udbr != NULL) { 461 pagep = udbr->udbr_pagep; 462 while (pagep != NULL) { 463 /* probably need to remove "db" */ 464 (void) ddi_dma_unbind_handle(pagep->upg_dmahdl); 465 ddi_dma_free_handle(&pagep->upg_dmahdl); 466 freerbuf(pagep->upg_buf); 467 ddi_umem_free(pagep->upg_umemcookie); 468 status = hermon_umap_db_find_nolock(instance, 469 HERMON_DBR_KEY(udbr->udbr_index, 470 pagep->upg_index), MLNX_UMAP_DBRMEM_RSRC, 471 &value, HERMON_UMAP_DB_REMOVE, &umapdb); 472 if (status == DDI_SUCCESS) 473 hermon_umap_db_free(umapdb); 474 kmem_free(pagep->upg_free, 475 PAGESIZE / sizeof (hermon_dbr_t) / 8); 476 nextp = pagep->upg_link; 477 kmem_free(pagep, sizeof (*pagep)); 478 pagep = nextp; 479 } 480 next = udbr->udbr_link; 481 kmem_free(udbr, sizeof (*udbr)); 482 udbr = next; 483 } 484 mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock); 485 mutex_exit(&state->hs_dbr_lock); 486 } 487 488 /* 489 * hermon_ah_alloc() 490 * Context: Can be called only from user or kernel context. 491 */ 492 int 493 hermon_ah_alloc(hermon_state_t *state, hermon_pdhdl_t pd, 494 ibt_adds_vect_t *attr_p, hermon_ahhdl_t *ahhdl, uint_t sleepflag) 495 { 496 hermon_rsrc_t *rsrc; 497 hermon_hw_udav_t *udav; 498 hermon_ahhdl_t ah; 499 int status; 500 501 /* 502 * Someday maybe the "ibt_adds_vect_t *attr_p" will be NULL to 503 * indicate that we wish to allocate an "invalid" (i.e. empty) 504 * address handle XXX 505 */ 506 507 /* Validate that specified port number is legal */ 508 if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) { 509 return (IBT_HCA_PORT_INVALID); 510 } 511 512 /* 513 * Allocate the software structure for tracking the address handle 514 * (i.e. the Hermon Address Handle struct). 515 */ 516 status = hermon_rsrc_alloc(state, HERMON_AHHDL, 1, sleepflag, &rsrc); 517 if (status != DDI_SUCCESS) { 518 return (IBT_INSUFF_RESOURCE); 519 } 520 ah = (hermon_ahhdl_t)rsrc->hr_addr; 521 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah)) 522 523 /* Increment the reference count on the protection domain (PD) */ 524 hermon_pd_refcnt_inc(pd); 525 526 udav = (hermon_hw_udav_t *)kmem_zalloc(sizeof (hermon_hw_udav_t), 527 KM_SLEEP); 528 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav)) 529 530 /* 531 * Fill in the UDAV data. We first zero out the UDAV, then populate 532 * it by then calling hermon_set_addr_path() to fill in the common 533 * portions that can be pulled from the "ibt_adds_vect_t" passed in 534 */ 535 status = hermon_set_addr_path(state, attr_p, 536 (hermon_hw_addr_path_t *)udav, HERMON_ADDRPATH_UDAV); 537 if (status != DDI_SUCCESS) { 538 hermon_pd_refcnt_dec(pd); 539 hermon_rsrc_free(state, &rsrc); 540 return (status); 541 } 542 udav->pd = pd->pd_pdnum; 543 udav->sl = attr_p->av_srvl; 544 545 /* 546 * Fill in the rest of the Hermon Address Handle struct. 547 * 548 * NOTE: We are saving away a copy of the "av_dgid.gid_guid" field 549 * here because we may need to return it later to the IBTF (as a 550 * result of a subsequent query operation). Unlike the other UDAV 551 * parameters, the value of "av_dgid.gid_guid" is not always preserved. 552 * The reason for this is described in hermon_set_addr_path(). 553 */ 554 ah->ah_rsrcp = rsrc; 555 ah->ah_pdhdl = pd; 556 ah->ah_udav = udav; 557 ah->ah_save_guid = attr_p->av_dgid.gid_guid; 558 *ahhdl = ah; 559 560 return (DDI_SUCCESS); 561 } 562 563 564 /* 565 * hermon_ah_free() 566 * Context: Can be called only from user or kernel context. 567 */ 568 /* ARGSUSED */ 569 int 570 hermon_ah_free(hermon_state_t *state, hermon_ahhdl_t *ahhdl, uint_t sleepflag) 571 { 572 hermon_rsrc_t *rsrc; 573 hermon_pdhdl_t pd; 574 hermon_ahhdl_t ah; 575 576 /* 577 * Pull all the necessary information from the Hermon Address Handle 578 * struct. This is necessary here because the resource for the 579 * AH is going to be freed up as part of this operation. 580 */ 581 ah = *ahhdl; 582 mutex_enter(&ah->ah_lock); 583 rsrc = ah->ah_rsrcp; 584 pd = ah->ah_pdhdl; 585 mutex_exit(&ah->ah_lock); 586 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah)) 587 588 /* Free the UDAV memory */ 589 kmem_free(ah->ah_udav, sizeof (hermon_hw_udav_t)); 590 591 /* Decrement the reference count on the protection domain (PD) */ 592 hermon_pd_refcnt_dec(pd); 593 594 /* Free the Hermon Address Handle structure */ 595 hermon_rsrc_free(state, &rsrc); 596 597 /* Set the ahhdl pointer to NULL and return success */ 598 *ahhdl = NULL; 599 600 return (DDI_SUCCESS); 601 } 602 603 604 /* 605 * hermon_ah_query() 606 * Context: Can be called from interrupt or base context. 607 */ 608 /* ARGSUSED */ 609 int 610 hermon_ah_query(hermon_state_t *state, hermon_ahhdl_t ah, hermon_pdhdl_t *pd, 611 ibt_adds_vect_t *attr_p) 612 { 613 mutex_enter(&ah->ah_lock); 614 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p)) 615 616 /* 617 * Pull the PD and UDAV from the Hermon Address Handle structure 618 */ 619 *pd = ah->ah_pdhdl; 620 621 /* 622 * Fill in "ibt_adds_vect_t". We call hermon_get_addr_path() to fill 623 * the common portions that can be pulled from the UDAV we pass in. 624 * 625 * NOTE: We will also fill the "av_dgid.gid_guid" field from the 626 * "ah_save_guid" field we have previously saved away. The reason 627 * for this is described in hermon_ah_alloc() and hermon_ah_modify(). 628 */ 629 hermon_get_addr_path(state, (hermon_hw_addr_path_t *)ah->ah_udav, 630 attr_p, HERMON_ADDRPATH_UDAV); 631 632 attr_p->av_dgid.gid_guid = ah->ah_save_guid; 633 634 mutex_exit(&ah->ah_lock); 635 return (DDI_SUCCESS); 636 } 637 638 639 /* 640 * hermon_ah_modify() 641 * Context: Can be called from interrupt or base context. 642 */ 643 /* ARGSUSED */ 644 int 645 hermon_ah_modify(hermon_state_t *state, hermon_ahhdl_t ah, 646 ibt_adds_vect_t *attr_p) 647 { 648 hermon_hw_udav_t old_udav; 649 uint64_t data_old; 650 int status, size, i; 651 652 /* Validate that specified port number is legal */ 653 if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) { 654 return (IBT_HCA_PORT_INVALID); 655 } 656 657 mutex_enter(&ah->ah_lock); 658 659 /* Save a copy of the current UDAV data in old_udav. */ 660 bcopy(ah->ah_udav, &old_udav, sizeof (hermon_hw_udav_t)); 661 662 /* 663 * Fill in the new UDAV with the caller's data, passed in via the 664 * "ibt_adds_vect_t" structure. 665 * 666 * NOTE: We also need to save away a copy of the "av_dgid.gid_guid" 667 * field here (just as we did during hermon_ah_alloc()) because we 668 * may need to return it later to the IBTF (as a result of a 669 * subsequent query operation). As explained in hermon_ah_alloc(), 670 * unlike the other UDAV parameters, the value of "av_dgid.gid_guid" 671 * is not always preserved. The reason for this is described in 672 * hermon_set_addr_path(). 673 */ 674 status = hermon_set_addr_path(state, attr_p, 675 (hermon_hw_addr_path_t *)ah->ah_udav, HERMON_ADDRPATH_UDAV); 676 if (status != DDI_SUCCESS) { 677 mutex_exit(&ah->ah_lock); 678 return (status); 679 } 680 ah->ah_save_guid = attr_p->av_dgid.gid_guid; 681 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(ah->ah_udav))) 682 ah->ah_udav->sl = attr_p->av_srvl; 683 684 /* 685 * Copy changes into the new UDAV. 686 * Note: We copy in 64-bit chunks. For the first two of these 687 * chunks it is necessary to read the current contents of the 688 * UDAV, mask off the modifiable portions (maintaining any 689 * of the "reserved" portions), and then mask on the new data. 690 */ 691 size = sizeof (hermon_hw_udav_t) >> 3; 692 for (i = 0; i < size; i++) { 693 data_old = ((uint64_t *)&old_udav)[i]; 694 695 /* 696 * Apply mask to change only the relevant values. 697 */ 698 if (i == 0) { 699 data_old = data_old & HERMON_UDAV_MODIFY_MASK0; 700 } else if (i == 1) { 701 data_old = data_old & HERMON_UDAV_MODIFY_MASK1; 702 } else { 703 data_old = 0; 704 } 705 706 /* Store the updated values to the UDAV */ 707 ((uint64_t *)ah->ah_udav)[i] |= data_old; 708 } 709 710 /* 711 * Put the valid PD number back into the UDAV entry, as it 712 * might have been clobbered above. 713 */ 714 ah->ah_udav->pd = old_udav.pd; 715 716 717 mutex_exit(&ah->ah_lock); 718 return (DDI_SUCCESS); 719 } 720 721 /* 722 * hermon_mcg_attach() 723 * Context: Can be called only from user or kernel context. 724 */ 725 int 726 hermon_mcg_attach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid, 727 ib_lid_t lid) 728 { 729 hermon_rsrc_t *rsrc; 730 hermon_hw_mcg_t *mcg_entry; 731 hermon_hw_mcg_qp_list_t *mcg_entry_qplist; 732 hermon_mcghdl_t mcg, newmcg; 733 uint64_t mgid_hash; 734 uint32_t end_indx; 735 int status; 736 uint_t qp_found; 737 738 /* 739 * It is only allowed to attach MCG to UD queue pairs. Verify 740 * that the intended QP is of the appropriate transport type 741 */ 742 if (qp->qp_serv_type != HERMON_QP_UD) { 743 return (IBT_QP_SRV_TYPE_INVALID); 744 } 745 746 /* 747 * Check for invalid Multicast DLID. Specifically, all Multicast 748 * LIDs should be within a well defined range. If the specified LID 749 * is outside of that range, then return an error. 750 */ 751 if (hermon_mlid_is_valid(lid) == 0) { 752 return (IBT_MC_MLID_INVALID); 753 } 754 /* 755 * Check for invalid Multicast GID. All Multicast GIDs should have 756 * a well-defined pattern of bits and flags that are allowable. If 757 * the specified GID does not meet the criteria, then return an error. 758 */ 759 if (hermon_mgid_is_valid(gid) == 0) { 760 return (IBT_MC_MGID_INVALID); 761 } 762 763 /* 764 * Compute the MGID hash value. Since the MCG table is arranged as 765 * a number of separate hash chains, this operation converts the 766 * specified MGID into the starting index of an entry in the hash 767 * table (i.e. the index for the start of the appropriate hash chain). 768 * Subsequent operations below will walk the chain searching for the 769 * right place to add this new QP. 770 */ 771 status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid, 772 &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT()); 773 if (status != HERMON_CMD_SUCCESS) { 774 cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n", 775 status); 776 if (status == HERMON_CMD_INVALID_STATUS) { 777 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 778 } 779 return (ibc_get_ci_failure(0)); 780 } 781 782 /* 783 * Grab the multicast group mutex. Then grab the pre-allocated 784 * temporary buffer used for holding and/or modifying MCG entries. 785 * Zero out the temporary MCG entry before we begin. 786 */ 787 mutex_enter(&state->hs_mcglock); 788 mcg_entry = state->hs_mcgtmp; 789 mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry); 790 bzero(mcg_entry, HERMON_MCGMEM_SZ(state)); 791 792 /* 793 * Walk through the array of MCG entries starting at "mgid_hash". 794 * Try to find the appropriate place for this new QP to be added. 795 * This could happen when the first entry of the chain has MGID == 0 796 * (which means that the hash chain is empty), or because we find 797 * an entry with the same MGID (in which case we'll add the QP to 798 * that MCG), or because we come to the end of the chain (in which 799 * case this is the first QP being added to the multicast group that 800 * corresponds to the MGID. The hermon_mcg_walk_mgid_hash() routine 801 * walks the list and returns an index into the MCG table. The entry 802 * at this index is then checked to determine which case we have 803 * fallen into (see below). Note: We are using the "shadow" MCG 804 * list (of hermon_mcg_t structs) for this lookup because the real 805 * MCG entries are in hardware (and the lookup process would be much 806 * more time consuming). 807 */ 808 end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, NULL); 809 mcg = &state->hs_mcghdl[end_indx]; 810 811 /* 812 * If MGID == 0, then the hash chain is empty. Just fill in the 813 * current entry. Note: No need to allocate an MCG table entry 814 * as all the hash chain "heads" are already preallocated. 815 */ 816 if ((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) { 817 818 /* Fill in the current entry in the "shadow" MCG list */ 819 hermon_mcg_setup_new_hdr(mcg, mcg_entry, gid, NULL); 820 821 /* 822 * Try to add the new QP number to the list. This (and the 823 * above) routine fills in a temporary MCG. The "mcg_entry" 824 * and "mcg_entry_qplist" pointers simply point to different 825 * offsets within the same temporary copy of the MCG (for 826 * convenience). Note: If this fails, we need to invalidate 827 * the entries we've already put into the "shadow" list entry 828 * above. 829 */ 830 status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp, 831 &qp_found); 832 if (status != DDI_SUCCESS) { 833 bzero(mcg, sizeof (struct hermon_sw_mcg_list_s)); 834 mutex_exit(&state->hs_mcglock); 835 return (status); 836 } 837 if (!qp_found) 838 mcg_entry->member_cnt = (mcg->mcg_num_qps + 1); 839 /* set the member count */ 840 841 /* 842 * Once the temporary MCG has been filled in, write the entry 843 * into the appropriate location in the Hermon MCG entry table. 844 * If it's successful, then drop the lock and return success. 845 * Note: In general, this operation shouldn't fail. If it 846 * does, then it is an indication that something (probably in 847 * HW, but maybe in SW) has gone seriously wrong. We still 848 * want to zero out the entries that we've filled in above 849 * (in the hermon_mcg_setup_new_hdr() routine). 850 */ 851 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 852 HERMON_CMD_NOSLEEP_SPIN); 853 if (status != HERMON_CMD_SUCCESS) { 854 bzero(mcg, sizeof (struct hermon_sw_mcg_list_s)); 855 mutex_exit(&state->hs_mcglock); 856 HERMON_WARNING(state, "failed to write MCG entry"); 857 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 858 "%08x\n", status); 859 if (status == HERMON_CMD_INVALID_STATUS) { 860 hermon_fm_ereport(state, HCA_SYS_ERR, 861 HCA_ERR_SRV_LOST); 862 } 863 return (ibc_get_ci_failure(0)); 864 } 865 866 /* 867 * Now that we know all the Hermon firmware accesses have been 868 * successful, we update the "shadow" MCG entry by incrementing 869 * the "number of attached QPs" count. 870 * 871 * We increment only if the QP is not already part of the 872 * MCG by checking the 'qp_found' flag returned from the 873 * qplist_add above. 874 */ 875 if (!qp_found) { 876 mcg->mcg_num_qps++; 877 878 /* 879 * Increment the refcnt for this QP. Because the QP 880 * was added to this MCG, the refcnt must be 881 * incremented. 882 */ 883 hermon_qp_mcg_refcnt_inc(qp); 884 } 885 886 /* 887 * We drop the lock and return success. 888 */ 889 mutex_exit(&state->hs_mcglock); 890 return (DDI_SUCCESS); 891 } 892 893 /* 894 * If the specified MGID matches the MGID in the current entry, then 895 * we need to try to add the QP to the current MCG entry. In this 896 * case, it means that we need to read the existing MCG entry (into 897 * the temporary MCG), add the new QP number to the temporary entry 898 * (using the same method we used above), and write the entry back 899 * to the hardware (same as above). 900 */ 901 if ((mcg->mcg_mgid_h == gid.gid_prefix) && 902 (mcg->mcg_mgid_l == gid.gid_guid)) { 903 904 /* 905 * Read the current MCG entry into the temporary MCG. Note: 906 * In general, this operation shouldn't fail. If it does, 907 * then it is an indication that something (probably in HW, 908 * but maybe in SW) has gone seriously wrong. 909 */ 910 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx, 911 HERMON_CMD_NOSLEEP_SPIN); 912 if (status != HERMON_CMD_SUCCESS) { 913 mutex_exit(&state->hs_mcglock); 914 HERMON_WARNING(state, "failed to read MCG entry"); 915 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: " 916 "%08x\n", status); 917 if (status == HERMON_CMD_INVALID_STATUS) { 918 hermon_fm_ereport(state, HCA_SYS_ERR, 919 HCA_ERR_SRV_LOST); 920 } 921 return (ibc_get_ci_failure(0)); 922 } 923 924 /* 925 * Try to add the new QP number to the list. This routine 926 * fills in the necessary pieces of the temporary MCG. The 927 * "mcg_entry_qplist" pointer is used to point to the portion 928 * of the temporary MCG that holds the QP numbers. 929 * 930 * Note: hermon_mcg_qplist_add() returns SUCCESS if it 931 * already found the QP in the list. In this case, the QP is 932 * not added on to the list again. Check the flag 'qp_found' 933 * if this value is needed to be known. 934 * 935 */ 936 status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp, 937 &qp_found); 938 if (status != DDI_SUCCESS) { 939 mutex_exit(&state->hs_mcglock); 940 return (status); 941 } 942 if (!qp_found) 943 mcg_entry->member_cnt = (mcg->mcg_num_qps + 1); 944 /* set the member count */ 945 946 /* 947 * Once the temporary MCG has been updated, write the entry 948 * into the appropriate location in the Hermon MCG entry table. 949 * If it's successful, then drop the lock and return success. 950 * Note: In general, this operation shouldn't fail. If it 951 * does, then it is an indication that something (probably in 952 * HW, but maybe in SW) has gone seriously wrong. 953 */ 954 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 955 HERMON_CMD_NOSLEEP_SPIN); 956 if (status != HERMON_CMD_SUCCESS) { 957 mutex_exit(&state->hs_mcglock); 958 HERMON_WARNING(state, "failed to write MCG entry"); 959 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 960 "%08x\n", status); 961 if (status == HERMON_CMD_INVALID_STATUS) { 962 hermon_fm_ereport(state, HCA_SYS_ERR, 963 HCA_ERR_SRV_LOST); 964 } 965 return (ibc_get_ci_failure(0)); 966 } 967 968 /* 969 * Now that we know all the Hermon firmware accesses have been 970 * successful, we update the current "shadow" MCG entry by 971 * incrementing the "number of attached QPs" count. 972 * 973 * We increment only if the QP is not already part of the 974 * MCG by checking the 'qp_found' flag returned 975 * hermon_mcg_walk_mgid_hashfrom the qplist_add above. 976 */ 977 if (!qp_found) { 978 mcg->mcg_num_qps++; 979 980 /* 981 * Increment the refcnt for this QP. Because the QP 982 * was added to this MCG, the refcnt must be 983 * incremented. 984 */ 985 hermon_qp_mcg_refcnt_inc(qp); 986 } 987 988 /* 989 * We drop the lock and return success. 990 */ 991 mutex_exit(&state->hs_mcglock); 992 return (DDI_SUCCESS); 993 } 994 995 /* 996 * If we've reached here, then we're at the end of the hash chain. 997 * We need to allocate a new MCG entry, fill it in, write it to Hermon, 998 * and update the previous entry to link the new one to the end of the 999 * chain. 1000 */ 1001 1002 /* 1003 * Allocate an MCG table entry. This will be filled in with all 1004 * the necessary parameters to define the multicast group. Then it 1005 * will be written to the hardware in the next-to-last step below. 1006 */ 1007 status = hermon_rsrc_alloc(state, HERMON_MCG, 1, HERMON_NOSLEEP, &rsrc); 1008 if (status != DDI_SUCCESS) { 1009 mutex_exit(&state->hs_mcglock); 1010 return (IBT_INSUFF_RESOURCE); 1011 } 1012 1013 /* 1014 * Fill in the new entry in the "shadow" MCG list. Note: Just as 1015 * it does above, hermon_mcg_setup_new_hdr() also fills in a portion 1016 * of the temporary MCG entry (the rest of which will be filled in by 1017 * hermon_mcg_qplist_add() below) 1018 */ 1019 newmcg = &state->hs_mcghdl[rsrc->hr_indx]; 1020 hermon_mcg_setup_new_hdr(newmcg, mcg_entry, gid, rsrc); 1021 1022 /* 1023 * Try to add the new QP number to the list. This routine fills in 1024 * the final necessary pieces of the temporary MCG. The 1025 * "mcg_entry_qplist" pointer is used to point to the portion of the 1026 * temporary MCG that holds the QP numbers. If we fail here, we 1027 * must undo the previous resource allocation. 1028 * 1029 * Note: hermon_mcg_qplist_add() can we return SUCCESS if it already 1030 * found the QP in the list. In this case, the QP is not added on to 1031 * the list again. Check the flag 'qp_found' if this value is needed 1032 * to be known. 1033 */ 1034 status = hermon_mcg_qplist_add(state, newmcg, mcg_entry_qplist, qp, 1035 &qp_found); 1036 if (status != DDI_SUCCESS) { 1037 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1038 hermon_rsrc_free(state, &rsrc); 1039 mutex_exit(&state->hs_mcglock); 1040 return (status); 1041 } 1042 mcg_entry->member_cnt = (newmcg->mcg_num_qps + 1); 1043 /* set the member count */ 1044 1045 /* 1046 * Once the temporary MCG has been updated, write the entry into the 1047 * appropriate location in the Hermon MCG entry table. If this is 1048 * successful, then we need to chain the previous entry to this one. 1049 * Note: In general, this operation shouldn't fail. If it does, then 1050 * it is an indication that something (probably in HW, but maybe in 1051 * SW) has gone seriously wrong. 1052 */ 1053 status = hermon_write_mgm_cmd_post(state, mcg_entry, rsrc->hr_indx, 1054 HERMON_CMD_NOSLEEP_SPIN); 1055 if (status != HERMON_CMD_SUCCESS) { 1056 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1057 hermon_rsrc_free(state, &rsrc); 1058 mutex_exit(&state->hs_mcglock); 1059 HERMON_WARNING(state, "failed to write MCG entry"); 1060 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1061 status); 1062 if (status == HERMON_CMD_INVALID_STATUS) { 1063 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1064 } 1065 return (ibc_get_ci_failure(0)); 1066 } 1067 1068 /* 1069 * Now read the current MCG entry (the one previously at the end of 1070 * hash chain) into the temporary MCG. We are going to update its 1071 * "next_gid_indx" now and write the entry back to the MCG table. 1072 * Note: In general, this operation shouldn't fail. If it does, then 1073 * it is an indication that something (probably in HW, but maybe in SW) 1074 * has gone seriously wrong. We will free up the MCG entry resource, 1075 * but we will not undo the previously written MCG entry in the HW. 1076 * This is OK, though, because the MCG entry is not currently attached 1077 * to any hash chain. 1078 */ 1079 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx, 1080 HERMON_CMD_NOSLEEP_SPIN); 1081 if (status != HERMON_CMD_SUCCESS) { 1082 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1083 hermon_rsrc_free(state, &rsrc); 1084 mutex_exit(&state->hs_mcglock); 1085 HERMON_WARNING(state, "failed to read MCG entry"); 1086 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n", 1087 status); 1088 if (status == HERMON_CMD_INVALID_STATUS) { 1089 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1090 } 1091 return (ibc_get_ci_failure(0)); 1092 } 1093 1094 /* 1095 * Finally, we update the "next_gid_indx" field in the temporary MCG 1096 * and attempt to write the entry back into the Hermon MCG table. If 1097 * this succeeds, then we update the "shadow" list to reflect the 1098 * change, drop the lock, and return success. Note: In general, this 1099 * operation shouldn't fail. If it does, then it is an indication 1100 * that something (probably in HW, but maybe in SW) has gone seriously 1101 * wrong. Just as we do above, we will free up the MCG entry resource, 1102 * but we will not try to undo the previously written MCG entry. This 1103 * is OK, though, because (since we failed here to update the end of 1104 * the chain) that other entry is not currently attached to any chain. 1105 */ 1106 mcg_entry->next_gid_indx = rsrc->hr_indx; 1107 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 1108 HERMON_CMD_NOSLEEP_SPIN); 1109 if (status != HERMON_CMD_SUCCESS) { 1110 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s)); 1111 hermon_rsrc_free(state, &rsrc); 1112 mutex_exit(&state->hs_mcglock); 1113 HERMON_WARNING(state, "failed to write MCG entry"); 1114 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1115 status); 1116 if (status == HERMON_CMD_INVALID_STATUS) { 1117 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1118 } 1119 return (ibc_get_ci_failure(0)); 1120 } 1121 mcg = &state->hs_mcghdl[end_indx]; 1122 mcg->mcg_next_indx = rsrc->hr_indx; 1123 1124 /* 1125 * Now that we know all the Hermon firmware accesses have been 1126 * successful, we update the new "shadow" MCG entry by incrementing 1127 * the "number of attached QPs" count. Then we drop the lock and 1128 * return success. 1129 */ 1130 newmcg->mcg_num_qps++; 1131 1132 /* 1133 * Increment the refcnt for this QP. Because the QP 1134 * was added to this MCG, the refcnt must be 1135 * incremented. 1136 */ 1137 hermon_qp_mcg_refcnt_inc(qp); 1138 1139 mutex_exit(&state->hs_mcglock); 1140 return (DDI_SUCCESS); 1141 } 1142 1143 1144 /* 1145 * hermon_mcg_detach() 1146 * Context: Can be called only from user or kernel context. 1147 */ 1148 int 1149 hermon_mcg_detach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid, 1150 ib_lid_t lid) 1151 { 1152 hermon_hw_mcg_t *mcg_entry; 1153 hermon_hw_mcg_qp_list_t *mcg_entry_qplist; 1154 hermon_mcghdl_t mcg; 1155 uint64_t mgid_hash; 1156 uint32_t end_indx, prev_indx; 1157 int status; 1158 1159 /* 1160 * Check for invalid Multicast DLID. Specifically, all Multicast 1161 * LIDs should be within a well defined range. If the specified LID 1162 * is outside of that range, then return an error. 1163 */ 1164 if (hermon_mlid_is_valid(lid) == 0) { 1165 return (IBT_MC_MLID_INVALID); 1166 } 1167 1168 /* 1169 * Compute the MGID hash value. As described above, the MCG table is 1170 * arranged as a number of separate hash chains. This operation 1171 * converts the specified MGID into the starting index of an entry in 1172 * the hash table (i.e. the index for the start of the appropriate 1173 * hash chain). Subsequent operations below will walk the chain 1174 * searching for a matching entry from which to attempt to remove 1175 * the specified QP. 1176 */ 1177 status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid, 1178 &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT()); 1179 if (status != HERMON_CMD_SUCCESS) { 1180 cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n", 1181 status); 1182 if (status == HERMON_CMD_INVALID_STATUS) { 1183 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1184 } 1185 return (ibc_get_ci_failure(0)); 1186 } 1187 1188 /* 1189 * Grab the multicast group mutex. Then grab the pre-allocated 1190 * temporary buffer used for holding and/or modifying MCG entries. 1191 */ 1192 mutex_enter(&state->hs_mcglock); 1193 mcg_entry = state->hs_mcgtmp; 1194 mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry); 1195 1196 /* 1197 * Walk through the array of MCG entries starting at "mgid_hash". 1198 * Try to find an MCG entry with a matching MGID. The 1199 * hermon_mcg_walk_mgid_hash() routine walks the list and returns an 1200 * index into the MCG table. The entry at this index is checked to 1201 * determine whether it is a match or not. If it is a match, then 1202 * we continue on to attempt to remove the QP from the MCG. If it 1203 * is not a match (or not a valid MCG entry), then we return an error. 1204 */ 1205 end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, &prev_indx); 1206 mcg = &state->hs_mcghdl[end_indx]; 1207 1208 /* 1209 * If MGID == 0 (the hash chain is empty) or if the specified MGID 1210 * does not match the MGID in the current entry, then return 1211 * IBT_MC_MGID_INVALID (to indicate that the specified MGID is not 1212 * valid). 1213 */ 1214 if (((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) || 1215 ((mcg->mcg_mgid_h != gid.gid_prefix) || 1216 (mcg->mcg_mgid_l != gid.gid_guid))) { 1217 mutex_exit(&state->hs_mcglock); 1218 return (IBT_MC_MGID_INVALID); 1219 } 1220 1221 /* 1222 * Read the current MCG entry into the temporary MCG. Note: In 1223 * general, this operation shouldn't fail. If it does, then it is 1224 * an indication that something (probably in HW, but maybe in SW) 1225 * has gone seriously wrong. 1226 */ 1227 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx, 1228 HERMON_CMD_NOSLEEP_SPIN); 1229 if (status != HERMON_CMD_SUCCESS) { 1230 mutex_exit(&state->hs_mcglock); 1231 HERMON_WARNING(state, "failed to read MCG entry"); 1232 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n", 1233 status); 1234 if (status == HERMON_CMD_INVALID_STATUS) { 1235 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1236 } 1237 return (ibc_get_ci_failure(0)); 1238 } 1239 1240 /* 1241 * Search the QP number list for a match. If a match is found, then 1242 * remove the entry from the QP list. Otherwise, if no match is found, 1243 * return an error. 1244 */ 1245 status = hermon_mcg_qplist_remove(mcg, mcg_entry_qplist, qp); 1246 if (status != DDI_SUCCESS) { 1247 mutex_exit(&state->hs_mcglock); 1248 return (status); 1249 } 1250 1251 /* 1252 * Decrement the MCG count for this QP. When the 'qp_mcg' 1253 * field becomes 0, then this QP is no longer a member of any 1254 * MCG. 1255 */ 1256 hermon_qp_mcg_refcnt_dec(qp); 1257 1258 /* 1259 * If the current MCG's QP number list is about to be made empty 1260 * ("mcg_num_qps" == 1), then remove the entry itself from the hash 1261 * chain. Otherwise, just write the updated MCG entry back to the 1262 * hardware. In either case, once we successfully update the hardware 1263 * chain, then we decrement the "shadow" list entry's "mcg_num_qps" 1264 * count (or zero out the entire "shadow" list entry) before returning 1265 * success. Note: Zeroing out the "shadow" list entry is done 1266 * inside of hermon_mcg_hash_list_remove(). 1267 */ 1268 if (mcg->mcg_num_qps == 1) { 1269 1270 /* Remove an MCG entry from the hash chain */ 1271 status = hermon_mcg_hash_list_remove(state, end_indx, prev_indx, 1272 mcg_entry); 1273 if (status != DDI_SUCCESS) { 1274 mutex_exit(&state->hs_mcglock); 1275 return (status); 1276 } 1277 1278 } else { 1279 /* 1280 * Write the updated MCG entry back to the Hermon MCG table. 1281 * If this succeeds, then we update the "shadow" list to 1282 * reflect the change (i.e. decrement the "mcg_num_qps"), 1283 * drop the lock, and return success. Note: In general, 1284 * this operation shouldn't fail. If it does, then it is an 1285 * indication that something (probably in HW, but maybe in SW) 1286 * has gone seriously wrong. 1287 */ 1288 mcg_entry->member_cnt = (mcg->mcg_num_qps - 1); 1289 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx, 1290 HERMON_CMD_NOSLEEP_SPIN); 1291 if (status != HERMON_CMD_SUCCESS) { 1292 mutex_exit(&state->hs_mcglock); 1293 HERMON_WARNING(state, "failed to write MCG entry"); 1294 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 1295 "%08x\n", status); 1296 if (status == HERMON_CMD_INVALID_STATUS) { 1297 hermon_fm_ereport(state, HCA_SYS_ERR, 1298 HCA_ERR_SRV_LOST); 1299 } 1300 return (ibc_get_ci_failure(0)); 1301 } 1302 mcg->mcg_num_qps--; 1303 } 1304 1305 mutex_exit(&state->hs_mcglock); 1306 return (DDI_SUCCESS); 1307 } 1308 1309 /* 1310 * hermon_qp_mcg_refcnt_inc() 1311 * Context: Can be called from interrupt or base context. 1312 */ 1313 static void 1314 hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp) 1315 { 1316 /* Increment the QP's MCG reference count */ 1317 mutex_enter(&qp->qp_lock); 1318 qp->qp_mcg_refcnt++; 1319 mutex_exit(&qp->qp_lock); 1320 } 1321 1322 1323 /* 1324 * hermon_qp_mcg_refcnt_dec() 1325 * Context: Can be called from interrupt or base context. 1326 */ 1327 static void 1328 hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp) 1329 { 1330 /* Decrement the QP's MCG reference count */ 1331 mutex_enter(&qp->qp_lock); 1332 qp->qp_mcg_refcnt--; 1333 mutex_exit(&qp->qp_lock); 1334 } 1335 1336 1337 /* 1338 * hermon_mcg_qplist_add() 1339 * Context: Can be called from interrupt or base context. 1340 */ 1341 static int 1342 hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg, 1343 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp, 1344 uint_t *qp_found) 1345 { 1346 uint_t qplist_indx; 1347 1348 ASSERT(MUTEX_HELD(&state->hs_mcglock)); 1349 1350 qplist_indx = mcg->mcg_num_qps; 1351 1352 /* 1353 * Determine if we have exceeded the maximum number of QP per 1354 * multicast group. If we have, then return an error 1355 */ 1356 if (qplist_indx >= state->hs_cfg_profile->cp_num_qp_per_mcg) { 1357 return (IBT_HCA_MCG_QP_EXCEEDED); 1358 } 1359 1360 /* 1361 * Determine if the QP is already attached to this MCG table. If it 1362 * is, then we break out and treat this operation as a NO-OP 1363 */ 1364 for (qplist_indx = 0; qplist_indx < mcg->mcg_num_qps; 1365 qplist_indx++) { 1366 if (mcg_qplist[qplist_indx].qpn == qp->qp_qpnum) { 1367 break; 1368 } 1369 } 1370 1371 /* 1372 * If the QP was already on the list, set 'qp_found' to TRUE. We still 1373 * return SUCCESS in this case, but the qplist will not have been 1374 * updated because the QP was already on the list. 1375 */ 1376 if (qplist_indx < mcg->mcg_num_qps) { 1377 *qp_found = 1; 1378 } else { 1379 /* 1380 * Otherwise, append the new QP number to the end of the 1381 * current QP list. Note: We will increment the "mcg_num_qps" 1382 * field on the "shadow" MCG list entry later (after we know 1383 * that all necessary Hermon firmware accesses have been 1384 * successful). 1385 * 1386 * Set 'qp_found' to 0 so we know the QP was added on to the 1387 * list for sure. 1388 */ 1389 mcg_qplist[qplist_indx].qpn = 1390 (qp->qp_qpnum | HERMON_MCG_QPN_BLOCK_LB); 1391 *qp_found = 0; 1392 } 1393 1394 return (DDI_SUCCESS); 1395 } 1396 1397 1398 1399 /* 1400 * hermon_mcg_qplist_remove() 1401 * Context: Can be called from interrupt or base context. 1402 */ 1403 static int 1404 hermon_mcg_qplist_remove(hermon_mcghdl_t mcg, 1405 hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp) 1406 { 1407 uint_t i, qplist_indx; 1408 1409 /* 1410 * Search the MCG QP list for a matching QPN. When 1411 * it's found, we swap the last entry with the current 1412 * one, set the last entry to zero, decrement the last 1413 * entry, and return. If it's not found, then it's 1414 * and error. 1415 */ 1416 qplist_indx = mcg->mcg_num_qps; 1417 for (i = 0; i < qplist_indx; i++) { 1418 if (mcg_qplist[i].qpn == qp->qp_qpnum) { 1419 mcg_qplist[i] = mcg_qplist[qplist_indx - 1]; 1420 mcg_qplist[qplist_indx - 1].qpn = 0; 1421 1422 return (DDI_SUCCESS); 1423 } 1424 } 1425 1426 return (IBT_QP_HDL_INVALID); 1427 } 1428 1429 1430 /* 1431 * hermon_mcg_walk_mgid_hash() 1432 * Context: Can be called from interrupt or base context. 1433 */ 1434 static uint_t 1435 hermon_mcg_walk_mgid_hash(hermon_state_t *state, uint64_t start_indx, 1436 ib_gid_t mgid, uint_t *p_indx) 1437 { 1438 hermon_mcghdl_t curr_mcghdl; 1439 uint_t curr_indx, prev_indx; 1440 1441 ASSERT(MUTEX_HELD(&state->hs_mcglock)); 1442 1443 /* Start at the head of the hash chain */ 1444 curr_indx = (uint_t)start_indx; 1445 prev_indx = curr_indx; 1446 curr_mcghdl = &state->hs_mcghdl[curr_indx]; 1447 1448 /* If the first entry in the chain has MGID == 0, then stop */ 1449 if ((curr_mcghdl->mcg_mgid_h == 0) && 1450 (curr_mcghdl->mcg_mgid_l == 0)) { 1451 goto end_mgid_hash_walk; 1452 } 1453 1454 /* If the first entry in the chain matches the MGID, then stop */ 1455 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) && 1456 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) { 1457 goto end_mgid_hash_walk; 1458 } 1459 1460 /* Otherwise, walk the hash chain looking for a match */ 1461 while (curr_mcghdl->mcg_next_indx != 0) { 1462 prev_indx = curr_indx; 1463 curr_indx = curr_mcghdl->mcg_next_indx; 1464 curr_mcghdl = &state->hs_mcghdl[curr_indx]; 1465 1466 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) && 1467 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) { 1468 break; 1469 } 1470 } 1471 1472 end_mgid_hash_walk: 1473 /* 1474 * If necessary, return the index of the previous entry too. This 1475 * is primarily used for detaching a QP from a multicast group. It 1476 * may be necessary, in that case, to delete an MCG entry from the 1477 * hash chain and having the index of the previous entry is helpful. 1478 */ 1479 if (p_indx != NULL) { 1480 *p_indx = prev_indx; 1481 } 1482 return (curr_indx); 1483 } 1484 1485 1486 /* 1487 * hermon_mcg_setup_new_hdr() 1488 * Context: Can be called from interrupt or base context. 1489 */ 1490 static void 1491 hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg, hermon_hw_mcg_t *mcg_hdr, 1492 ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc) 1493 { 1494 /* 1495 * Fill in the fields of the "shadow" entry used by software 1496 * to track MCG hardware entry 1497 */ 1498 mcg->mcg_mgid_h = mgid.gid_prefix; 1499 mcg->mcg_mgid_l = mgid.gid_guid; 1500 mcg->mcg_rsrcp = mcg_rsrc; 1501 mcg->mcg_next_indx = 0; 1502 mcg->mcg_num_qps = 0; 1503 1504 /* 1505 * Fill the header fields of the MCG entry (in the temporary copy) 1506 */ 1507 mcg_hdr->mgid_h = mgid.gid_prefix; 1508 mcg_hdr->mgid_l = mgid.gid_guid; 1509 mcg_hdr->next_gid_indx = 0; 1510 } 1511 1512 1513 /* 1514 * hermon_mcg_hash_list_remove() 1515 * Context: Can be called only from user or kernel context. 1516 */ 1517 static int 1518 hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx, 1519 uint_t prev_indx, hermon_hw_mcg_t *mcg_entry) 1520 { 1521 hermon_mcghdl_t curr_mcg, prev_mcg, next_mcg; 1522 uint_t next_indx; 1523 int status; 1524 1525 /* Get the pointer to "shadow" list for current entry */ 1526 curr_mcg = &state->hs_mcghdl[curr_indx]; 1527 1528 /* 1529 * If this is the first entry on a hash chain, then attempt to replace 1530 * the entry with the next entry on the chain. If there are no 1531 * subsequent entries on the chain, then this is the only entry and 1532 * should be invalidated. 1533 */ 1534 if (curr_indx == prev_indx) { 1535 1536 /* 1537 * If this is the only entry on the chain, then invalidate it. 1538 * Note: Invalidating an MCG entry means writing all zeros 1539 * to the entry. This is only necessary for those MCG 1540 * entries that are the "head" entries of the individual hash 1541 * chains. Regardless of whether this operation returns 1542 * success or failure, return that result to the caller. 1543 */ 1544 next_indx = curr_mcg->mcg_next_indx; 1545 if (next_indx == 0) { 1546 status = hermon_mcg_entry_invalidate(state, mcg_entry, 1547 curr_indx); 1548 bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1549 return (status); 1550 } 1551 1552 /* 1553 * Otherwise, this is just the first entry on the chain, so 1554 * grab the next one 1555 */ 1556 next_mcg = &state->hs_mcghdl[next_indx]; 1557 1558 /* 1559 * Read the next MCG entry into the temporary MCG. Note: 1560 * In general, this operation shouldn't fail. If it does, 1561 * then it is an indication that something (probably in HW, 1562 * but maybe in SW) has gone seriously wrong. 1563 */ 1564 status = hermon_read_mgm_cmd_post(state, mcg_entry, next_indx, 1565 HERMON_CMD_NOSLEEP_SPIN); 1566 if (status != HERMON_CMD_SUCCESS) { 1567 HERMON_WARNING(state, "failed to read MCG entry"); 1568 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: " 1569 "%08x\n", status); 1570 if (status == HERMON_CMD_INVALID_STATUS) { 1571 hermon_fm_ereport(state, HCA_SYS_ERR, 1572 HCA_ERR_SRV_LOST); 1573 } 1574 return (ibc_get_ci_failure(0)); 1575 } 1576 1577 /* 1578 * Copy/Write the temporary MCG back to the hardware MCG list 1579 * using the current index. This essentially removes the 1580 * current MCG entry from the list by writing over it with 1581 * the next one. If this is successful, then we can do the 1582 * same operation for the "shadow" list. And we can also 1583 * free up the Hermon MCG entry resource that was associated 1584 * with the (old) next entry. Note: In general, this 1585 * operation shouldn't fail. If it does, then it is an 1586 * indication that something (probably in HW, but maybe in SW) 1587 * has gone seriously wrong. 1588 */ 1589 status = hermon_write_mgm_cmd_post(state, mcg_entry, curr_indx, 1590 HERMON_CMD_NOSLEEP_SPIN); 1591 if (status != HERMON_CMD_SUCCESS) { 1592 HERMON_WARNING(state, "failed to write MCG entry"); 1593 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: " 1594 "%08x\n", status); 1595 if (status == HERMON_CMD_INVALID_STATUS) { 1596 hermon_fm_ereport(state, HCA_SYS_ERR, 1597 HCA_ERR_SRV_LOST); 1598 } 1599 return (ibc_get_ci_failure(0)); 1600 } 1601 1602 /* 1603 * Copy all the software tracking information from the next 1604 * entry on the "shadow" MCG list into the current entry on 1605 * the list. Then invalidate (zero out) the other "shadow" 1606 * list entry. 1607 */ 1608 bcopy(next_mcg, curr_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1609 bzero(next_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1610 1611 /* 1612 * Free up the Hermon MCG entry resource used by the "next" 1613 * MCG entry. That resource is no longer needed by any 1614 * MCG entry which is first on a hash chain (like the "next" 1615 * entry has just become). 1616 */ 1617 hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp); 1618 1619 return (DDI_SUCCESS); 1620 } 1621 1622 /* 1623 * Else if this is the last entry on the hash chain (or a middle 1624 * entry, then we update the previous entry's "next_gid_index" field 1625 * to make it point instead to the next entry on the chain. By 1626 * skipping over the removed entry in this way, we can then free up 1627 * any resources associated with the current entry. Note: We don't 1628 * need to invalidate the "skipped over" hardware entry because it 1629 * will no be longer connected to any hash chains, and if/when it is 1630 * finally re-used, it will be written with entirely new values. 1631 */ 1632 1633 /* 1634 * Read the next MCG entry into the temporary MCG. Note: In general, 1635 * this operation shouldn't fail. If it does, then it is an 1636 * indication that something (probably in HW, but maybe in SW) has 1637 * gone seriously wrong. 1638 */ 1639 status = hermon_read_mgm_cmd_post(state, mcg_entry, prev_indx, 1640 HERMON_CMD_NOSLEEP_SPIN); 1641 if (status != HERMON_CMD_SUCCESS) { 1642 HERMON_WARNING(state, "failed to read MCG entry"); 1643 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n", 1644 status); 1645 if (status == HERMON_CMD_INVALID_STATUS) { 1646 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1647 } 1648 return (ibc_get_ci_failure(0)); 1649 } 1650 1651 /* 1652 * Finally, we update the "next_gid_indx" field in the temporary MCG 1653 * and attempt to write the entry back into the Hermon MCG table. If 1654 * this succeeds, then we update the "shadow" list to reflect the 1655 * change, free up the Hermon MCG entry resource that was associated 1656 * with the current entry, and return success. Note: In general, 1657 * this operation shouldn't fail. If it does, then it is an indication 1658 * that something (probably in HW, but maybe in SW) has gone seriously 1659 * wrong. 1660 */ 1661 mcg_entry->next_gid_indx = curr_mcg->mcg_next_indx; 1662 status = hermon_write_mgm_cmd_post(state, mcg_entry, prev_indx, 1663 HERMON_CMD_NOSLEEP_SPIN); 1664 if (status != HERMON_CMD_SUCCESS) { 1665 HERMON_WARNING(state, "failed to write MCG entry"); 1666 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1667 status); 1668 if (status == HERMON_CMD_INVALID_STATUS) { 1669 hermon_fm_ereport(state, HCA_SYS_ERR, 1670 HCA_ERR_SRV_LOST); 1671 } 1672 return (ibc_get_ci_failure(0)); 1673 } 1674 1675 /* 1676 * Get the pointer to the "shadow" MCG list entry for the previous 1677 * MCG. Update its "mcg_next_indx" to point to the next entry 1678 * the one after the current entry. Note: This next index may be 1679 * zero, indicating the end of the list. 1680 */ 1681 prev_mcg = &state->hs_mcghdl[prev_indx]; 1682 prev_mcg->mcg_next_indx = curr_mcg->mcg_next_indx; 1683 1684 /* 1685 * Free up the Hermon MCG entry resource used by the current entry. 1686 * This resource is no longer needed because the chain now skips over 1687 * the current entry. Then invalidate (zero out) the current "shadow" 1688 * list entry. 1689 */ 1690 hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp); 1691 bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s)); 1692 1693 return (DDI_SUCCESS); 1694 } 1695 1696 1697 /* 1698 * hermon_mcg_entry_invalidate() 1699 * Context: Can be called only from user or kernel context. 1700 */ 1701 static int 1702 hermon_mcg_entry_invalidate(hermon_state_t *state, hermon_hw_mcg_t *mcg_entry, 1703 uint_t indx) 1704 { 1705 int status; 1706 1707 /* 1708 * Invalidate the hardware MCG entry by zeroing out this temporary 1709 * MCG and writing it the the hardware. Note: In general, this 1710 * operation shouldn't fail. If it does, then it is an indication 1711 * that something (probably in HW, but maybe in SW) has gone seriously 1712 * wrong. 1713 */ 1714 bzero(mcg_entry, HERMON_MCGMEM_SZ(state)); 1715 status = hermon_write_mgm_cmd_post(state, mcg_entry, indx, 1716 HERMON_CMD_NOSLEEP_SPIN); 1717 if (status != HERMON_CMD_SUCCESS) { 1718 HERMON_WARNING(state, "failed to write MCG entry"); 1719 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n", 1720 status); 1721 if (status == HERMON_CMD_INVALID_STATUS) { 1722 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1723 } 1724 return (ibc_get_ci_failure(0)); 1725 } 1726 1727 return (DDI_SUCCESS); 1728 } 1729 1730 1731 /* 1732 * hermon_mgid_is_valid() 1733 * Context: Can be called from interrupt or base context. 1734 */ 1735 static int 1736 hermon_mgid_is_valid(ib_gid_t gid) 1737 { 1738 uint_t topbits, flags, scope; 1739 1740 /* 1741 * According to IBA 1.1 specification (section 4.1.1) a valid 1742 * "multicast GID" must have its top eight bits set to all ones 1743 */ 1744 topbits = (gid.gid_prefix >> HERMON_MCG_TOPBITS_SHIFT) & 1745 HERMON_MCG_TOPBITS_MASK; 1746 if (topbits != HERMON_MCG_TOPBITS) { 1747 return (0); 1748 } 1749 1750 /* 1751 * The next 4 bits are the "flag" bits. These are valid only 1752 * if they are "0" (which correspond to permanently assigned/ 1753 * "well-known" multicast GIDs) or "1" (for so-called "transient" 1754 * multicast GIDs). All other values are reserved. 1755 */ 1756 flags = (gid.gid_prefix >> HERMON_MCG_FLAGS_SHIFT) & 1757 HERMON_MCG_FLAGS_MASK; 1758 if (!((flags == HERMON_MCG_FLAGS_PERM) || 1759 (flags == HERMON_MCG_FLAGS_NONPERM))) { 1760 return (0); 1761 } 1762 1763 /* 1764 * The next 4 bits are the "scope" bits. These are valid only 1765 * if they are "2" (Link-local), "5" (Site-local), "8" 1766 * (Organization-local) or "E" (Global). All other values 1767 * are reserved (or currently unassigned). 1768 */ 1769 scope = (gid.gid_prefix >> HERMON_MCG_SCOPE_SHIFT) & 1770 HERMON_MCG_SCOPE_MASK; 1771 if (!((scope == HERMON_MCG_SCOPE_LINKLOC) || 1772 (scope == HERMON_MCG_SCOPE_SITELOC) || 1773 (scope == HERMON_MCG_SCOPE_ORGLOC) || 1774 (scope == HERMON_MCG_SCOPE_GLOBAL))) { 1775 return (0); 1776 } 1777 1778 /* 1779 * If it passes all of the above checks, then we will consider it 1780 * a valid multicast GID. 1781 */ 1782 return (1); 1783 } 1784 1785 1786 /* 1787 * hermon_mlid_is_valid() 1788 * Context: Can be called from interrupt or base context. 1789 */ 1790 static int 1791 hermon_mlid_is_valid(ib_lid_t lid) 1792 { 1793 /* 1794 * According to IBA 1.1 specification (section 4.1.1) a valid 1795 * "multicast DLID" must be between 0xC000 and 0xFFFE. 1796 */ 1797 if ((lid < IB_LID_MC_FIRST) || (lid > IB_LID_MC_LAST)) { 1798 return (0); 1799 } 1800 1801 return (1); 1802 } 1803 1804 1805 /* 1806 * hermon_pd_alloc() 1807 * Context: Can be called only from user or kernel context. 1808 */ 1809 int 1810 hermon_pd_alloc(hermon_state_t *state, hermon_pdhdl_t *pdhdl, uint_t sleepflag) 1811 { 1812 hermon_rsrc_t *rsrc; 1813 hermon_pdhdl_t pd; 1814 int status; 1815 1816 /* 1817 * Allocate the software structure for tracking the protection domain 1818 * (i.e. the Hermon Protection Domain handle). By default each PD 1819 * structure will have a unique PD number assigned to it. All that 1820 * is necessary is for software to initialize the PD reference count 1821 * (to zero) and return success. 1822 */ 1823 status = hermon_rsrc_alloc(state, HERMON_PDHDL, 1, sleepflag, &rsrc); 1824 if (status != DDI_SUCCESS) { 1825 return (IBT_INSUFF_RESOURCE); 1826 } 1827 pd = (hermon_pdhdl_t)rsrc->hr_addr; 1828 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd)) 1829 1830 pd->pd_refcnt = 0; 1831 *pdhdl = pd; 1832 1833 return (DDI_SUCCESS); 1834 } 1835 1836 1837 /* 1838 * hermon_pd_free() 1839 * Context: Can be called only from user or kernel context. 1840 */ 1841 int 1842 hermon_pd_free(hermon_state_t *state, hermon_pdhdl_t *pdhdl) 1843 { 1844 hermon_rsrc_t *rsrc; 1845 hermon_pdhdl_t pd; 1846 1847 /* 1848 * Pull all the necessary information from the Hermon Protection Domain 1849 * handle. This is necessary here because the resource for the 1850 * PD is going to be freed up as part of this operation. 1851 */ 1852 pd = *pdhdl; 1853 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd)) 1854 rsrc = pd->pd_rsrcp; 1855 1856 /* 1857 * Check the PD reference count. If the reference count is non-zero, 1858 * then it means that this protection domain is still referenced by 1859 * some memory region, queue pair, address handle, or other IB object 1860 * If it is non-zero, then return an error. Otherwise, free the 1861 * Hermon resource and return success. 1862 */ 1863 if (pd->pd_refcnt != 0) { 1864 return (IBT_PD_IN_USE); 1865 } 1866 1867 /* Free the Hermon Protection Domain handle */ 1868 hermon_rsrc_free(state, &rsrc); 1869 1870 /* Set the pdhdl pointer to NULL and return success */ 1871 *pdhdl = (hermon_pdhdl_t)NULL; 1872 1873 return (DDI_SUCCESS); 1874 } 1875 1876 1877 /* 1878 * hermon_pd_refcnt_inc() 1879 * Context: Can be called from interrupt or base context. 1880 */ 1881 void 1882 hermon_pd_refcnt_inc(hermon_pdhdl_t pd) 1883 { 1884 /* Increment the protection domain's reference count */ 1885 atomic_inc_32(&pd->pd_refcnt); 1886 } 1887 1888 1889 /* 1890 * hermon_pd_refcnt_dec() 1891 * Context: Can be called from interrupt or base context. 1892 */ 1893 void 1894 hermon_pd_refcnt_dec(hermon_pdhdl_t pd) 1895 { 1896 /* Decrement the protection domain's reference count */ 1897 atomic_dec_32(&pd->pd_refcnt); 1898 } 1899 1900 1901 /* 1902 * hermon_port_query() 1903 * Context: Can be called only from user or kernel context. 1904 */ 1905 int 1906 hermon_port_query(hermon_state_t *state, uint_t port, ibt_hca_portinfo_t *pi) 1907 { 1908 sm_portinfo_t portinfo; 1909 sm_guidinfo_t guidinfo; 1910 sm_pkey_table_t pkeytable; 1911 ib_gid_t *sgid; 1912 uint_t sgid_max, pkey_max, tbl_size; 1913 int i, j, indx, status; 1914 ib_pkey_t *pkeyp; 1915 ib_guid_t *guidp; 1916 1917 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pi)) 1918 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*state)) 1919 1920 /* Validate that specified port number is legal */ 1921 if (!hermon_portnum_is_valid(state, port)) { 1922 return (IBT_HCA_PORT_INVALID); 1923 } 1924 pkeyp = state->hs_pkey[port - 1]; 1925 guidp = state->hs_guid[port - 1]; 1926 1927 /* 1928 * We use the Hermon MAD_IFC command to post a GetPortInfo MAD 1929 * to the firmware (for the specified port number). This returns 1930 * a full PortInfo MAD (in "portinfo") which we subsequently 1931 * parse to fill in the "ibt_hca_portinfo_t" structure returned 1932 * to the IBTF. 1933 */ 1934 status = hermon_getportinfo_cmd_post(state, port, 1935 HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo); 1936 if (status != HERMON_CMD_SUCCESS) { 1937 cmn_err(CE_CONT, "Hermon: GetPortInfo (port %02d) command " 1938 "failed: %08x\n", port, status); 1939 if (status == HERMON_CMD_INVALID_STATUS) { 1940 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1941 } 1942 return (ibc_get_ci_failure(0)); 1943 } 1944 1945 /* 1946 * Parse the PortInfo MAD and fill in the IBTF structure 1947 */ 1948 pi->p_base_lid = portinfo.LID; 1949 pi->p_qkey_violations = portinfo.Q_KeyViolations; 1950 pi->p_pkey_violations = portinfo.P_KeyViolations; 1951 pi->p_sm_sl = portinfo.MasterSMSL; 1952 pi->p_sm_lid = portinfo.MasterSMLID; 1953 pi->p_linkstate = portinfo.PortState; 1954 pi->p_port_num = portinfo.LocalPortNum; 1955 pi->p_phys_state = portinfo.PortPhysicalState; 1956 pi->p_width_supported = portinfo.LinkWidthSupported; 1957 pi->p_width_enabled = portinfo.LinkWidthEnabled; 1958 pi->p_width_active = portinfo.LinkWidthActive; 1959 pi->p_speed_supported = portinfo.LinkSpeedSupported; 1960 pi->p_speed_enabled = portinfo.LinkSpeedEnabled; 1961 pi->p_speed_active = portinfo.LinkSpeedActive; 1962 pi->p_mtu = portinfo.MTUCap; 1963 pi->p_lmc = portinfo.LMC; 1964 pi->p_max_vl = portinfo.VLCap; 1965 pi->p_subnet_timeout = portinfo.SubnetTimeOut; 1966 pi->p_msg_sz = ((uint32_t)1 << HERMON_QP_LOG_MAX_MSGSZ); 1967 tbl_size = state->hs_cfg_profile->cp_log_max_gidtbl; 1968 pi->p_sgid_tbl_sz = (1 << tbl_size); 1969 tbl_size = state->hs_cfg_profile->cp_log_max_pkeytbl; 1970 pi->p_pkey_tbl_sz = (1 << tbl_size); 1971 state->hs_sn_prefix[port - 1] = portinfo.GidPrefix; 1972 1973 /* 1974 * Convert InfiniBand-defined port capability flags to the format 1975 * specified by the IBTF 1976 */ 1977 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM) 1978 pi->p_capabilities |= IBT_PORT_CAP_SM; 1979 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM_DISABLED) 1980 pi->p_capabilities |= IBT_PORT_CAP_SM_DISABLED; 1981 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SNMP_SUPPD) 1982 pi->p_capabilities |= IBT_PORT_CAP_SNMP_TUNNEL; 1983 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_DM_SUPPD) 1984 pi->p_capabilities |= IBT_PORT_CAP_DM; 1985 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_VM_SUPPD) 1986 pi->p_capabilities |= IBT_PORT_CAP_VENDOR; 1987 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_CLNT_REREG_SUPPD) 1988 pi->p_capabilities |= IBT_PORT_CAP_CLNT_REREG; 1989 1990 /* 1991 * Fill in the SGID table. Since the only access to the Hermon 1992 * GID tables is through the firmware's MAD_IFC interface, we 1993 * post as many GetGUIDInfo MADs as necessary to read in the entire 1994 * contents of the SGID table (for the specified port). Note: The 1995 * GetGUIDInfo command only gets eight GUIDs per operation. These 1996 * GUIDs are then appended to the GID prefix for the port (from the 1997 * GetPortInfo above) to form the entire SGID table. 1998 */ 1999 for (i = 0; i < pi->p_sgid_tbl_sz; i += 8) { 2000 status = hermon_getguidinfo_cmd_post(state, port, i >> 3, 2001 HERMON_SLEEPFLAG_FOR_CONTEXT(), &guidinfo); 2002 if (status != HERMON_CMD_SUCCESS) { 2003 cmn_err(CE_CONT, "Hermon: GetGUIDInfo (port %02d) " 2004 "command failed: %08x\n", port, status); 2005 if (status == HERMON_CMD_INVALID_STATUS) { 2006 hermon_fm_ereport(state, HCA_SYS_ERR, 2007 HCA_ERR_SRV_LOST); 2008 } 2009 return (ibc_get_ci_failure(0)); 2010 } 2011 2012 /* Figure out how many of the entries are valid */ 2013 sgid_max = min((pi->p_sgid_tbl_sz - i), 8); 2014 for (j = 0; j < sgid_max; j++) { 2015 indx = (i + j); 2016 sgid = &pi->p_sgid_tbl[indx]; 2017 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sgid)) 2018 sgid->gid_prefix = portinfo.GidPrefix; 2019 guidp[indx] = sgid->gid_guid = 2020 guidinfo.GUIDBlocks[j]; 2021 } 2022 } 2023 2024 /* 2025 * Fill in the PKey table. Just as for the GID tables above, the 2026 * only access to the Hermon PKey tables is through the firmware's 2027 * MAD_IFC interface. We post as many GetPKeyTable MADs as necessary 2028 * to read in the entire contents of the PKey table (for the specified 2029 * port). Note: The GetPKeyTable command only gets 32 PKeys per 2030 * operation. 2031 */ 2032 for (i = 0; i < pi->p_pkey_tbl_sz; i += 32) { 2033 status = hermon_getpkeytable_cmd_post(state, port, i, 2034 HERMON_SLEEPFLAG_FOR_CONTEXT(), &pkeytable); 2035 if (status != HERMON_CMD_SUCCESS) { 2036 cmn_err(CE_CONT, "Hermon: GetPKeyTable (port %02d) " 2037 "command failed: %08x\n", port, status); 2038 if (status == HERMON_CMD_INVALID_STATUS) { 2039 hermon_fm_ereport(state, HCA_SYS_ERR, 2040 HCA_ERR_SRV_LOST); 2041 } 2042 return (ibc_get_ci_failure(0)); 2043 } 2044 2045 /* Figure out how many of the entries are valid */ 2046 pkey_max = min((pi->p_pkey_tbl_sz - i), 32); 2047 for (j = 0; j < pkey_max; j++) { 2048 indx = (i + j); 2049 pkeyp[indx] = pi->p_pkey_tbl[indx] = 2050 pkeytable.P_KeyTableBlocks[j]; 2051 } 2052 } 2053 2054 return (DDI_SUCCESS); 2055 } 2056 2057 2058 /* 2059 * hermon_port_modify() 2060 * Context: Can be called only from user or kernel context. 2061 */ 2062 /* ARGSUSED */ 2063 int 2064 hermon_port_modify(hermon_state_t *state, uint8_t port, 2065 ibt_port_modify_flags_t flags, uint8_t init_type) 2066 { 2067 sm_portinfo_t portinfo; 2068 uint32_t capmask; 2069 int status; 2070 hermon_hw_set_port_t set_port; 2071 2072 /* 2073 * Return an error if either of the unsupported flags are set 2074 */ 2075 if ((flags & IBT_PORT_SHUTDOWN) || 2076 (flags & IBT_PORT_SET_INIT_TYPE)) { 2077 return (IBT_NOT_SUPPORTED); 2078 } 2079 2080 bzero(&set_port, sizeof (set_port)); 2081 2082 /* 2083 * Determine whether we are trying to reset the QKey counter 2084 */ 2085 if (flags & IBT_PORT_RESET_QKEY) 2086 set_port.rqk = 1; 2087 2088 /* Validate that specified port number is legal */ 2089 if (!hermon_portnum_is_valid(state, port)) { 2090 return (IBT_HCA_PORT_INVALID); 2091 } 2092 2093 /* 2094 * Use the Hermon MAD_IFC command to post a GetPortInfo MAD to the 2095 * firmware (for the specified port number). This returns a full 2096 * PortInfo MAD (in "portinfo") from which we pull the current 2097 * capability mask. We then modify the capability mask as directed 2098 * by the "pmod_flags" field, and write the updated capability mask 2099 * using the Hermon SET_IB command (below). 2100 */ 2101 status = hermon_getportinfo_cmd_post(state, port, 2102 HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo); 2103 if (status != HERMON_CMD_SUCCESS) { 2104 if (status == HERMON_CMD_INVALID_STATUS) { 2105 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 2106 } 2107 return (ibc_get_ci_failure(0)); 2108 } 2109 2110 /* 2111 * Convert InfiniBand-defined port capability flags to the format 2112 * specified by the IBTF. Specifically, we modify the capability 2113 * mask based on the specified values. 2114 */ 2115 capmask = portinfo.CapabilityMask; 2116 2117 if (flags & IBT_PORT_RESET_SM) 2118 capmask &= ~SM_CAP_MASK_IS_SM; 2119 else if (flags & IBT_PORT_SET_SM) 2120 capmask |= SM_CAP_MASK_IS_SM; 2121 2122 if (flags & IBT_PORT_RESET_SNMP) 2123 capmask &= ~SM_CAP_MASK_IS_SNMP_SUPPD; 2124 else if (flags & IBT_PORT_SET_SNMP) 2125 capmask |= SM_CAP_MASK_IS_SNMP_SUPPD; 2126 2127 if (flags & IBT_PORT_RESET_DEVMGT) 2128 capmask &= ~SM_CAP_MASK_IS_DM_SUPPD; 2129 else if (flags & IBT_PORT_SET_DEVMGT) 2130 capmask |= SM_CAP_MASK_IS_DM_SUPPD; 2131 2132 if (flags & IBT_PORT_RESET_VENDOR) 2133 capmask &= ~SM_CAP_MASK_IS_VM_SUPPD; 2134 else if (flags & IBT_PORT_SET_VENDOR) 2135 capmask |= SM_CAP_MASK_IS_VM_SUPPD; 2136 2137 set_port.cap_mask = capmask; 2138 2139 /* 2140 * Use the Hermon SET_PORT command to update the capability mask and 2141 * (possibly) reset the QKey violation counter for the specified port. 2142 * Note: In general, this operation shouldn't fail. If it does, then 2143 * it is an indication that something (probably in HW, but maybe in 2144 * SW) has gone seriously wrong. 2145 */ 2146 status = hermon_set_port_cmd_post(state, &set_port, port, 2147 HERMON_SLEEPFLAG_FOR_CONTEXT()); 2148 if (status != HERMON_CMD_SUCCESS) { 2149 HERMON_WARNING(state, "failed to modify port capabilities"); 2150 cmn_err(CE_CONT, "Hermon: SET_IB (port %02d) command failed: " 2151 "%08x\n", port, status); 2152 if (status == HERMON_CMD_INVALID_STATUS) { 2153 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 2154 } 2155 return (ibc_get_ci_failure(0)); 2156 } 2157 2158 return (DDI_SUCCESS); 2159 } 2160 2161 2162 /* 2163 * hermon_set_addr_path() 2164 * Context: Can be called from interrupt or base context. 2165 * 2166 * Note: This routine is used for two purposes. It is used to fill in the 2167 * Hermon UDAV fields, and it is used to fill in the address path information 2168 * for QPs. Because the two Hermon structures are similar, common fields can 2169 * be filled in here. Because they are different, however, we pass 2170 * an additional flag to indicate which type is being filled and do each one 2171 * uniquely 2172 */ 2173 2174 int hermon_srate_override = -1; /* allows ease of testing */ 2175 2176 int 2177 hermon_set_addr_path(hermon_state_t *state, ibt_adds_vect_t *av, 2178 hermon_hw_addr_path_t *path, uint_t type) 2179 { 2180 uint_t gidtbl_sz; 2181 hermon_hw_udav_t *udav; 2182 2183 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av)) 2184 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path)) 2185 2186 udav = (hermon_hw_udav_t *)(void *)path; 2187 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav)) 2188 path->mlid = av->av_src_path; 2189 path->rlid = av->av_dlid; 2190 2191 switch (av->av_srate) { 2192 case IBT_SRATE_2: /* 1xSDR-2.5Gb/s injection rate */ 2193 path->max_stat_rate = 7; break; 2194 case IBT_SRATE_10: /* 4xSDR-10.0Gb/s injection rate */ 2195 path->max_stat_rate = 8; break; 2196 case IBT_SRATE_30: /* 12xSDR-30Gb/s injection rate */ 2197 path->max_stat_rate = 9; break; 2198 case IBT_SRATE_5: /* 1xDDR-5Gb/s injection rate */ 2199 path->max_stat_rate = 10; break; 2200 case IBT_SRATE_20: /* 4xDDR-20Gb/s injection rate */ 2201 path->max_stat_rate = 11; break; 2202 case IBT_SRATE_40: /* 4xQDR-40Gb/s injection rate */ 2203 path->max_stat_rate = 12; break; 2204 case IBT_SRATE_60: /* 12xDDR-60Gb/s injection rate */ 2205 path->max_stat_rate = 13; break; 2206 case IBT_SRATE_80: /* 8xQDR-80Gb/s injection rate */ 2207 path->max_stat_rate = 14; break; 2208 case IBT_SRATE_120: /* 12xQDR-120Gb/s injection rate */ 2209 path->max_stat_rate = 15; break; 2210 case IBT_SRATE_NOT_SPECIFIED: /* Max */ 2211 path->max_stat_rate = 0; break; 2212 default: 2213 return (IBT_STATIC_RATE_INVALID); 2214 } 2215 if (hermon_srate_override != -1) /* for evaluating HCA firmware */ 2216 path->max_stat_rate = hermon_srate_override; 2217 2218 /* If "grh" flag is set, then check for valid SGID index too */ 2219 gidtbl_sz = (1 << state->hs_queryport.log_max_gid); 2220 if ((av->av_send_grh) && (av->av_sgid_ix > gidtbl_sz)) { 2221 return (IBT_SGID_INVALID); 2222 } 2223 2224 /* 2225 * Fill in all "global" values regardless of the value in the GRH 2226 * flag. Because "grh" is not set unless "av_send_grh" is set, the 2227 * hardware will ignore the other "global" values as necessary. Note: 2228 * SW does this here to enable later query operations to return 2229 * exactly the same params that were passed when the addr path was 2230 * last written. 2231 */ 2232 path->grh = av->av_send_grh; 2233 if (type == HERMON_ADDRPATH_QP) { 2234 path->mgid_index = av->av_sgid_ix; 2235 } else { 2236 /* 2237 * For Hermon UDAV, the "mgid_index" field is the index into 2238 * a combined table (not a per-port table), but having sections 2239 * for each port. So some extra calculations are necessary. 2240 */ 2241 2242 path->mgid_index = ((av->av_port_num - 1) * gidtbl_sz) + 2243 av->av_sgid_ix; 2244 2245 udav->portnum = av->av_port_num; 2246 } 2247 2248 /* 2249 * According to Hermon PRM, the (31:0) part of rgid_l must be set to 2250 * "0x2" if the 'grh' or 'g' bit is cleared. It also says that we 2251 * only need to do it for UDAV's. So we enforce that here. 2252 * 2253 * NOTE: The entire 64 bits worth of GUID info is actually being 2254 * preserved (for UDAVs) by the callers of this function 2255 * (hermon_ah_alloc() and hermon_ah_modify()) and as long as the 2256 * 'grh' bit is not set, the upper 32 bits (63:32) of rgid_l are 2257 * "don't care". 2258 */ 2259 if ((path->grh) || (type == HERMON_ADDRPATH_QP)) { 2260 path->flow_label = av->av_flow; 2261 path->tclass = av->av_tclass; 2262 path->hop_limit = av->av_hop; 2263 bcopy(&(av->av_dgid.gid_prefix), &(path->rgid_h), 2264 sizeof (uint64_t)); 2265 bcopy(&(av->av_dgid.gid_guid), &(path->rgid_l), 2266 sizeof (uint64_t)); 2267 } else { 2268 path->rgid_l = 0x2; 2269 path->flow_label = 0; 2270 path->tclass = 0; 2271 path->hop_limit = 0; 2272 path->rgid_h = 0; 2273 } 2274 /* extract the default service level */ 2275 udav->sl = (HERMON_DEF_SCHED_SELECTION & 0x3C) >> 2; 2276 2277 return (DDI_SUCCESS); 2278 } 2279 2280 2281 /* 2282 * hermon_get_addr_path() 2283 * Context: Can be called from interrupt or base context. 2284 * 2285 * Note: Just like hermon_set_addr_path() above, this routine is used for two 2286 * purposes. It is used to read in the Hermon UDAV fields, and it is used to 2287 * read in the address path information for QPs. Because the two Hermon 2288 * structures are similar, common fields can be read in here. But because 2289 * they are slightly different, we pass an additional flag to indicate which 2290 * type is being read. 2291 */ 2292 void 2293 hermon_get_addr_path(hermon_state_t *state, hermon_hw_addr_path_t *path, 2294 ibt_adds_vect_t *av, uint_t type) 2295 { 2296 uint_t gidtbl_sz; 2297 2298 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path)) 2299 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av)) 2300 2301 av->av_src_path = path->mlid; 2302 av->av_dlid = path->rlid; 2303 2304 /* Set "av_ipd" value from max_stat_rate */ 2305 switch (path->max_stat_rate) { 2306 case 7: /* 1xSDR-2.5Gb/s injection rate */ 2307 av->av_srate = IBT_SRATE_2; break; 2308 case 8: /* 4xSDR-10.0Gb/s injection rate */ 2309 av->av_srate = IBT_SRATE_10; break; 2310 case 9: /* 12xSDR-30Gb/s injection rate */ 2311 av->av_srate = IBT_SRATE_30; break; 2312 case 10: /* 1xDDR-5Gb/s injection rate */ 2313 av->av_srate = IBT_SRATE_5; break; 2314 case 11: /* 4xDDR-20Gb/s injection rate */ 2315 av->av_srate = IBT_SRATE_20; break; 2316 case 12: /* xQDR-40Gb/s injection rate */ 2317 av->av_srate = IBT_SRATE_40; break; 2318 case 13: /* 12xDDR-60Gb/s injection rate */ 2319 av->av_srate = IBT_SRATE_60; break; 2320 case 14: /* 8xQDR-80Gb/s injection rate */ 2321 av->av_srate = IBT_SRATE_80; break; 2322 case 15: /* 12xQDR-120Gb/s injection rate */ 2323 av->av_srate = IBT_SRATE_120; break; 2324 case 0: /* max */ 2325 av->av_srate = IBT_SRATE_NOT_SPECIFIED; break; 2326 default: /* 1x injection rate */ 2327 av->av_srate = IBT_SRATE_1X; 2328 } 2329 2330 /* 2331 * Extract all "global" values regardless of the value in the GRH 2332 * flag. Because "av_send_grh" is set only if "grh" is set, software 2333 * knows to ignore the other "global" values as necessary. Note: SW 2334 * does it this way to enable these query operations to return exactly 2335 * the same params that were passed when the addr path was last written. 2336 */ 2337 av->av_send_grh = path->grh; 2338 if (type == HERMON_ADDRPATH_QP) { 2339 av->av_sgid_ix = path->mgid_index; 2340 } else { 2341 /* 2342 * For Hermon UDAV, the "mgid_index" field is the index into 2343 * a combined table (not a per-port table). 2344 */ 2345 gidtbl_sz = (1 << state->hs_queryport.log_max_gid); 2346 av->av_sgid_ix = path->mgid_index - ((av->av_port_num - 1) * 2347 gidtbl_sz); 2348 2349 av->av_port_num = ((hermon_hw_udav_t *)(void *)path)->portnum; 2350 } 2351 av->av_flow = path->flow_label; 2352 av->av_tclass = path->tclass; 2353 av->av_hop = path->hop_limit; 2354 /* this is for alignment issue w/ the addr path struct in Hermon */ 2355 bcopy(&(path->rgid_h), &(av->av_dgid.gid_prefix), sizeof (uint64_t)); 2356 bcopy(&(path->rgid_l), &(av->av_dgid.gid_guid), sizeof (uint64_t)); 2357 } 2358 2359 2360 /* 2361 * hermon_portnum_is_valid() 2362 * Context: Can be called from interrupt or base context. 2363 */ 2364 int 2365 hermon_portnum_is_valid(hermon_state_t *state, uint_t portnum) 2366 { 2367 uint_t max_port; 2368 2369 max_port = state->hs_cfg_profile->cp_num_ports; 2370 if ((portnum <= max_port) && (portnum != 0)) { 2371 return (1); 2372 } else { 2373 return (0); 2374 } 2375 } 2376 2377 2378 /* 2379 * hermon_pkeyindex_is_valid() 2380 * Context: Can be called from interrupt or base context. 2381 */ 2382 int 2383 hermon_pkeyindex_is_valid(hermon_state_t *state, uint_t pkeyindx) 2384 { 2385 uint_t max_pkeyindx; 2386 2387 max_pkeyindx = 1 << state->hs_cfg_profile->cp_log_max_pkeytbl; 2388 if (pkeyindx < max_pkeyindx) { 2389 return (1); 2390 } else { 2391 return (0); 2392 } 2393 } 2394 2395 2396 /* 2397 * hermon_queue_alloc() 2398 * Context: Can be called from interrupt or base context. 2399 */ 2400 int 2401 hermon_queue_alloc(hermon_state_t *state, hermon_qalloc_info_t *qa_info, 2402 uint_t sleepflag) 2403 { 2404 ddi_dma_attr_t dma_attr; 2405 int (*callback)(caddr_t); 2406 uint64_t realsize, alloc_mask; 2407 int flag, status; 2408 2409 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info)) 2410 2411 /* Set the callback flag appropriately */ 2412 callback = (sleepflag == HERMON_SLEEP) ? DDI_DMA_SLEEP : 2413 DDI_DMA_DONTWAIT; 2414 2415 /* 2416 * Initialize many of the default DMA attributes. Then set additional 2417 * alignment restrictions as necessary for the queue memory. Also 2418 * respect the configured value for IOMMU bypass 2419 */ 2420 hermon_dma_attr_init(state, &dma_attr); 2421 dma_attr.dma_attr_align = qa_info->qa_bind_align; 2422 #ifdef __sparc 2423 if (state->hs_cfg_profile->cp_iommu_bypass == HERMON_BINDMEM_BYPASS) { 2424 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 2425 } 2426 #endif 2427 2428 /* Allocate a DMA handle */ 2429 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, callback, NULL, 2430 &qa_info->qa_dmahdl); 2431 if (status != DDI_SUCCESS) { 2432 return (DDI_FAILURE); 2433 } 2434 2435 /* 2436 * Determine the amount of memory to allocate, depending on the values 2437 * in "qa_bind_align" and "qa_alloc_align". The problem we are trying 2438 * to solve here is that allocating a DMA handle with IOMMU bypass 2439 * (DDI_DMA_FORCE_PHYSICAL) constrains us to only requesting alignments 2440 * that are less restrictive than the page size. Since we may need 2441 * stricter alignments on the memory allocated by ddi_dma_mem_alloc() 2442 * (e.g. in Hermon QP work queue memory allocation), we use the 2443 * following method to calculate how much additional memory to request, 2444 * and we enforce our own alignment on the allocated result. 2445 */ 2446 alloc_mask = qa_info->qa_alloc_align - 1; 2447 if (qa_info->qa_bind_align == qa_info->qa_alloc_align) { 2448 realsize = qa_info->qa_size; 2449 } else { 2450 realsize = qa_info->qa_size + alloc_mask; 2451 } 2452 2453 /* 2454 * If we are to allocate the queue from system memory, then use 2455 * ddi_dma_mem_alloc() to find the space. Otherwise, this is a 2456 * host memory allocation, use ddi_umem_alloc(). In either case, 2457 * return a pointer to the memory range allocated (including any 2458 * necessary alignment adjustments), the "real" memory pointer, 2459 * the "real" size, and a ddi_acc_handle_t to use when reading 2460 * from/writing to the memory. 2461 */ 2462 if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) { 2463 /* Allocate system memory for the queue */ 2464 status = ddi_dma_mem_alloc(qa_info->qa_dmahdl, realsize, 2465 &state->hs_reg_accattr, DDI_DMA_CONSISTENT, callback, NULL, 2466 (caddr_t *)&qa_info->qa_buf_real, 2467 (size_t *)&qa_info->qa_buf_realsz, &qa_info->qa_acchdl); 2468 if (status != DDI_SUCCESS) { 2469 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2470 return (DDI_FAILURE); 2471 } 2472 2473 /* 2474 * Save temporary copy of the real pointer. (This may be 2475 * modified in the last step below). 2476 */ 2477 qa_info->qa_buf_aligned = qa_info->qa_buf_real; 2478 2479 bzero(qa_info->qa_buf_real, qa_info->qa_buf_realsz); 2480 2481 } else { /* HERMON_QUEUE_LOCATION_USERLAND */ 2482 2483 /* Allocate userland mappable memory for the queue */ 2484 flag = (sleepflag == HERMON_SLEEP) ? DDI_UMEM_SLEEP : 2485 DDI_UMEM_NOSLEEP; 2486 qa_info->qa_buf_real = ddi_umem_alloc(realsize, flag, 2487 &qa_info->qa_umemcookie); 2488 if (qa_info->qa_buf_real == NULL) { 2489 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2490 return (DDI_FAILURE); 2491 } 2492 2493 /* 2494 * Save temporary copy of the real pointer. (This may be 2495 * modified in the last step below). 2496 */ 2497 qa_info->qa_buf_aligned = qa_info->qa_buf_real; 2498 2499 } 2500 2501 /* 2502 * The next to last step is to ensure that the final address 2503 * ("qa_buf_aligned") has the appropriate "alloc" alignment 2504 * restriction applied to it (if necessary). 2505 */ 2506 if (qa_info->qa_bind_align != qa_info->qa_alloc_align) { 2507 qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t) 2508 qa_info->qa_buf_aligned + alloc_mask) & ~alloc_mask); 2509 } 2510 /* 2511 * The last step is to figure out the offset of the start relative 2512 * to the first page of the region - will be used in the eqc/cqc 2513 * passed to the HW 2514 */ 2515 qa_info->qa_pgoffs = (uint_t)((uintptr_t) 2516 qa_info->qa_buf_aligned & HERMON_PAGEOFFSET); 2517 2518 return (DDI_SUCCESS); 2519 } 2520 2521 2522 /* 2523 * hermon_queue_free() 2524 * Context: Can be called from interrupt or base context. 2525 */ 2526 void 2527 hermon_queue_free(hermon_qalloc_info_t *qa_info) 2528 { 2529 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info)) 2530 2531 /* 2532 * Depending on how (i.e. from where) we allocated the memory for 2533 * this queue, we choose the appropriate method for releasing the 2534 * resources. 2535 */ 2536 if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) { 2537 2538 ddi_dma_mem_free(&qa_info->qa_acchdl); 2539 2540 } else if (qa_info->qa_location == HERMON_QUEUE_LOCATION_USERLAND) { 2541 2542 ddi_umem_free(qa_info->qa_umemcookie); 2543 2544 } 2545 2546 /* Always free the dma handle */ 2547 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2548 } 2549 2550 /* 2551 * hermon_create_fmr_pool() 2552 * Create a pool of FMRs. 2553 * Context: Can be called from kernel context only. 2554 */ 2555 int 2556 hermon_create_fmr_pool(hermon_state_t *state, hermon_pdhdl_t pd, 2557 ibt_fmr_pool_attr_t *fmr_attr, hermon_fmrhdl_t *fmrpoolp) 2558 { 2559 hermon_fmrhdl_t fmrpool; 2560 hermon_fmr_list_t *fmr, *fmr_next; 2561 hermon_mrhdl_t mr; 2562 char taskqname[48]; 2563 int status; 2564 int sleep; 2565 int i; 2566 2567 sleep = (fmr_attr->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP : 2568 HERMON_NOSLEEP; 2569 if ((sleep == HERMON_SLEEP) && 2570 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 2571 return (IBT_INVALID_PARAM); 2572 } 2573 2574 fmrpool = (hermon_fmrhdl_t)kmem_zalloc(sizeof (*fmrpool), sleep); 2575 if (fmrpool == NULL) { 2576 status = IBT_INSUFF_RESOURCE; 2577 goto fail; 2578 } 2579 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmrpool)) 2580 2581 mutex_init(&fmrpool->fmr_lock, NULL, MUTEX_DRIVER, 2582 DDI_INTR_PRI(state->hs_intrmsi_pri)); 2583 2584 fmrpool->fmr_state = state; 2585 fmrpool->fmr_flush_function = fmr_attr->fmr_func_hdlr; 2586 fmrpool->fmr_flush_arg = fmr_attr->fmr_func_arg; 2587 fmrpool->fmr_pool_size = 0; 2588 fmrpool->fmr_cache = 0; 2589 fmrpool->fmr_max_pages = fmr_attr->fmr_max_pages_per_fmr; 2590 fmrpool->fmr_page_sz = fmr_attr->fmr_page_sz; 2591 fmrpool->fmr_dirty_watermark = fmr_attr->fmr_dirty_watermark; 2592 fmrpool->fmr_dirty_len = 0; 2593 fmrpool->fmr_flags = fmr_attr->fmr_flags; 2594 2595 /* Create taskq to handle cleanup and flush processing */ 2596 (void) snprintf(taskqname, 50, "fmrpool/%d/%d @ 0x%" PRIx64, 2597 fmr_attr->fmr_pool_size, hermon_debug_fmrpool_cnt, 2598 (uint64_t)(uintptr_t)fmrpool); 2599 fmrpool->fmr_taskq = ddi_taskq_create(state->hs_dip, taskqname, 2600 HERMON_TASKQ_NTHREADS, TASKQ_DEFAULTPRI, 0); 2601 if (fmrpool->fmr_taskq == NULL) { 2602 status = IBT_INSUFF_RESOURCE; 2603 goto fail1; 2604 } 2605 2606 fmrpool->fmr_free_list = NULL; 2607 fmrpool->fmr_dirty_list = NULL; 2608 2609 if (fmr_attr->fmr_cache) { 2610 hermon_fmr_cache_init(fmrpool); 2611 } 2612 2613 for (i = 0; i < fmr_attr->fmr_pool_size; i++) { 2614 status = hermon_mr_alloc_fmr(state, pd, fmrpool, &mr); 2615 if (status != DDI_SUCCESS) { 2616 goto fail2; 2617 } 2618 2619 fmr = (hermon_fmr_list_t *)kmem_zalloc( 2620 sizeof (hermon_fmr_list_t), sleep); 2621 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr)) 2622 2623 fmr->fmr = mr; 2624 fmr->fmr_refcnt = 0; 2625 fmr->fmr_remaps = 0; 2626 fmr->fmr_pool = fmrpool; 2627 fmr->fmr_in_cache = 0; 2628 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 2629 mr->mr_fmr = fmr; 2630 2631 fmr->fmr_next = fmrpool->fmr_free_list; 2632 fmrpool->fmr_free_list = fmr; 2633 fmrpool->fmr_pool_size++; 2634 } 2635 2636 /* Set to return pool */ 2637 *fmrpoolp = fmrpool; 2638 2639 return (IBT_SUCCESS); 2640 fail2: 2641 hermon_fmr_cache_fini(fmrpool); 2642 for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) { 2643 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr)) 2644 fmr_next = fmr->fmr_next; 2645 (void) hermon_mr_dealloc_fmr(state, &fmr->fmr); 2646 kmem_free(fmr, sizeof (hermon_fmr_list_t)); 2647 } 2648 ddi_taskq_destroy(fmrpool->fmr_taskq); 2649 fail1: 2650 kmem_free(fmrpool, sizeof (*fmrpool)); 2651 fail: 2652 if (status == DDI_FAILURE) { 2653 return (ibc_get_ci_failure(0)); 2654 } else { 2655 return (status); 2656 } 2657 } 2658 2659 /* 2660 * hermon_destroy_fmr_pool() 2661 * Destroy an FMR pool and free all associated resources. 2662 * Context: Can be called from kernel context only. 2663 */ 2664 int 2665 hermon_destroy_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool) 2666 { 2667 hermon_fmr_list_t *fmr, *fmr_next; 2668 int status; 2669 2670 mutex_enter(&fmrpool->fmr_lock); 2671 status = hermon_fmr_cleanup(state, fmrpool); 2672 if (status != DDI_SUCCESS) { 2673 mutex_exit(&fmrpool->fmr_lock); 2674 return (status); 2675 } 2676 2677 if (fmrpool->fmr_cache) { 2678 hermon_fmr_cache_fini(fmrpool); 2679 } 2680 2681 for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) { 2682 fmr_next = fmr->fmr_next; 2683 2684 (void) hermon_mr_dealloc_fmr(state, &fmr->fmr); 2685 kmem_free(fmr, sizeof (hermon_fmr_list_t)); 2686 } 2687 mutex_exit(&fmrpool->fmr_lock); 2688 2689 ddi_taskq_destroy(fmrpool->fmr_taskq); 2690 mutex_destroy(&fmrpool->fmr_lock); 2691 2692 kmem_free(fmrpool, sizeof (*fmrpool)); 2693 return (DDI_SUCCESS); 2694 } 2695 2696 /* 2697 * hermon_flush_fmr_pool() 2698 * Ensure that all unmapped FMRs are fully invalidated. 2699 * Context: Can be called from kernel context only. 2700 */ 2701 int 2702 hermon_flush_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool) 2703 { 2704 int status; 2705 2706 /* 2707 * Force the unmapping of all entries on the dirty list, regardless of 2708 * whether the watermark has been hit yet. 2709 */ 2710 /* grab the pool lock */ 2711 mutex_enter(&fmrpool->fmr_lock); 2712 status = hermon_fmr_cleanup(state, fmrpool); 2713 mutex_exit(&fmrpool->fmr_lock); 2714 return (status); 2715 } 2716 2717 /* 2718 * hermon_deregister_fmr() 2719 * Map memory into FMR 2720 * Context: Can be called from interrupt or base context. 2721 */ 2722 int 2723 hermon_register_physical_fmr(hermon_state_t *state, hermon_fmrhdl_t fmrpool, 2724 ibt_pmr_attr_t *mem_pattr, hermon_mrhdl_t *mr, 2725 ibt_pmr_desc_t *mem_desc_p) 2726 { 2727 hermon_fmr_list_t *fmr; 2728 hermon_fmr_list_t query; 2729 avl_index_t where; 2730 int status; 2731 2732 /* Check length */ 2733 mutex_enter(&fmrpool->fmr_lock); 2734 if (mem_pattr->pmr_len < 1 || (mem_pattr->pmr_num_buf > 2735 fmrpool->fmr_max_pages)) { 2736 mutex_exit(&fmrpool->fmr_lock); 2737 return (IBT_MR_LEN_INVALID); 2738 } 2739 2740 mutex_enter(&fmrpool->fmr_cachelock); 2741 /* lookup in fmr cache */ 2742 /* if exists, grab it, and return it */ 2743 if (fmrpool->fmr_cache) { 2744 query.fmr_desc.pmd_iova = mem_pattr->pmr_iova; 2745 query.fmr_desc.pmd_phys_buf_list_sz = mem_pattr->pmr_len; 2746 fmr = (hermon_fmr_list_t *)avl_find(&fmrpool->fmr_cache_avl, 2747 &query, &where); 2748 2749 /* 2750 * If valid FMR was found in cache, return that fmr info 2751 */ 2752 if (fmr != NULL) { 2753 fmr->fmr_refcnt++; 2754 /* Store pmr desc for use in cache */ 2755 (void) memcpy(mem_desc_p, &fmr->fmr_desc, 2756 sizeof (ibt_pmr_desc_t)); 2757 *mr = (hermon_mrhdl_t)fmr->fmr; 2758 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr->fmr))) 2759 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS( 2760 *(fmr->fmr->mr_mptrsrcp))) 2761 if (hermon_rdma_debug & 0x4) 2762 IBTF_DPRINTF_L2("fmr", " reg cache: mr %p " 2763 "index %x", fmr->fmr, 2764 fmr->fmr->mr_mptrsrcp->hr_indx); 2765 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS( 2766 *(fmr->fmr->mr_mptrsrcp))) 2767 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*(fmr->fmr))) 2768 mutex_exit(&fmrpool->fmr_cachelock); 2769 mutex_exit(&fmrpool->fmr_lock); 2770 return (DDI_SUCCESS); 2771 } 2772 } 2773 2774 /* FMR does not exist in cache, proceed with registration */ 2775 2776 /* grab next free entry */ 2777 fmr = fmrpool->fmr_free_list; 2778 if (fmr == NULL) { 2779 IBTF_DPRINTF_L2("fmr", "WARNING: no free fmr resource"); 2780 mutex_exit(&fmrpool->fmr_cachelock); 2781 mutex_exit(&fmrpool->fmr_lock); 2782 return (IBT_INSUFF_RESOURCE); 2783 } 2784 2785 fmrpool->fmr_free_list = fmrpool->fmr_free_list->fmr_next; 2786 fmr->fmr_next = NULL; 2787 2788 status = hermon_mr_register_physical_fmr(state, mem_pattr, fmr->fmr, 2789 mem_desc_p); 2790 if (status != DDI_SUCCESS) { 2791 mutex_exit(&fmrpool->fmr_cachelock); 2792 mutex_exit(&fmrpool->fmr_lock); 2793 return (status); 2794 } 2795 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr->fmr)) 2796 if (hermon_rdma_debug & 0x4) 2797 IBTF_DPRINTF_L2("fmr", " reg: mr %p key %x", 2798 fmr->fmr, fmr->fmr->mr_rkey); 2799 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*fmr->fmr)) 2800 2801 fmr->fmr_refcnt = 1; 2802 fmr->fmr_remaps++; 2803 2804 /* Store pmr desc for use in cache */ 2805 (void) memcpy(&fmr->fmr_desc, mem_desc_p, sizeof (ibt_pmr_desc_t)); 2806 *mr = (hermon_mrhdl_t)fmr->fmr; 2807 2808 /* Store in cache */ 2809 if (fmrpool->fmr_cache) { 2810 if (!fmr->fmr_in_cache) { 2811 avl_insert(&fmrpool->fmr_cache_avl, fmr, where); 2812 fmr->fmr_in_cache = 1; 2813 } 2814 } 2815 2816 mutex_exit(&fmrpool->fmr_cachelock); 2817 mutex_exit(&fmrpool->fmr_lock); 2818 return (DDI_SUCCESS); 2819 } 2820 2821 /* 2822 * hermon_deregister_fmr() 2823 * Unmap FMR 2824 * Context: Can be called from kernel context only. 2825 */ 2826 int 2827 hermon_deregister_fmr(hermon_state_t *state, hermon_mrhdl_t mr) 2828 { 2829 hermon_fmr_list_t *fmr; 2830 hermon_fmrhdl_t fmrpool; 2831 int status; 2832 2833 fmr = mr->mr_fmr; 2834 fmrpool = fmr->fmr_pool; 2835 2836 /* Grab pool lock */ 2837 mutex_enter(&fmrpool->fmr_lock); 2838 fmr->fmr_refcnt--; 2839 2840 if (fmr->fmr_refcnt == 0) { 2841 /* 2842 * First, do some bit of invalidation, reducing our exposure to 2843 * having this region still registered in hardware. 2844 */ 2845 (void) hermon_mr_invalidate_fmr(state, mr); 2846 2847 /* 2848 * If we've exhausted our remaps then add the FMR to the dirty 2849 * list, not allowing it to be re-used until we have done a 2850 * flush. Otherwise, simply add it back to the free list for 2851 * re-mapping. 2852 */ 2853 if (fmr->fmr_remaps < 2854 state->hs_cfg_profile->cp_fmr_max_remaps) { 2855 /* add to free list */ 2856 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr->fmr))) 2857 if (hermon_rdma_debug & 0x4) 2858 IBTF_DPRINTF_L2("fmr", "dereg: mr %p key %x", 2859 fmr->fmr, fmr->fmr->mr_rkey); 2860 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*(fmr->fmr))) 2861 fmr->fmr_next = fmrpool->fmr_free_list; 2862 fmrpool->fmr_free_list = fmr; 2863 } else { 2864 /* add to dirty list */ 2865 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr->fmr))) 2866 if (hermon_rdma_debug & 0x4) 2867 IBTF_DPRINTF_L2("fmr", "dirty: mr %p key %x", 2868 fmr->fmr, fmr->fmr->mr_rkey); 2869 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*(fmr->fmr))) 2870 fmr->fmr_next = fmrpool->fmr_dirty_list; 2871 fmrpool->fmr_dirty_list = fmr; 2872 fmrpool->fmr_dirty_len++; 2873 2874 status = ddi_taskq_dispatch(fmrpool->fmr_taskq, 2875 hermon_fmr_processing, fmrpool, DDI_NOSLEEP); 2876 if (status == DDI_FAILURE) { 2877 mutex_exit(&fmrpool->fmr_lock); 2878 return (IBT_INSUFF_RESOURCE); 2879 } 2880 } 2881 } 2882 /* Release pool lock */ 2883 mutex_exit(&fmrpool->fmr_lock); 2884 2885 return (DDI_SUCCESS); 2886 } 2887 2888 2889 /* 2890 * hermon_fmr_processing() 2891 * If required, perform cleanup. 2892 * Context: Called from taskq context only. 2893 */ 2894 static void 2895 hermon_fmr_processing(void *fmr_args) 2896 { 2897 hermon_fmrhdl_t fmrpool; 2898 int status; 2899 2900 ASSERT(fmr_args != NULL); 2901 2902 fmrpool = (hermon_fmrhdl_t)fmr_args; 2903 2904 /* grab pool lock */ 2905 mutex_enter(&fmrpool->fmr_lock); 2906 if (fmrpool->fmr_dirty_len >= fmrpool->fmr_dirty_watermark) { 2907 status = hermon_fmr_cleanup(fmrpool->fmr_state, fmrpool); 2908 if (status != DDI_SUCCESS) { 2909 mutex_exit(&fmrpool->fmr_lock); 2910 return; 2911 } 2912 2913 if (fmrpool->fmr_flush_function != NULL) { 2914 (void) fmrpool->fmr_flush_function( 2915 (ibc_fmr_pool_hdl_t)fmrpool, 2916 fmrpool->fmr_flush_arg); 2917 } 2918 } 2919 2920 /* let pool lock go */ 2921 mutex_exit(&fmrpool->fmr_lock); 2922 } 2923 2924 /* 2925 * hermon_fmr_cleanup() 2926 * Perform cleaning processing, walking the list and performing the MTT sync 2927 * operation if required. 2928 * Context: can be called from taskq or base context. 2929 */ 2930 static int 2931 hermon_fmr_cleanup(hermon_state_t *state, hermon_fmrhdl_t fmrpool) 2932 { 2933 hermon_fmr_list_t *fmr; 2934 hermon_fmr_list_t *fmr_next; 2935 int sync_needed; 2936 int status; 2937 2938 ASSERT(MUTEX_HELD(&fmrpool->fmr_lock)); 2939 2940 sync_needed = 0; 2941 for (fmr = fmrpool->fmr_dirty_list; fmr; fmr = fmr_next) { 2942 fmr_next = fmr->fmr_next; 2943 fmr->fmr_remaps = 0; 2944 2945 (void) hermon_mr_deregister_fmr(state, fmr->fmr); 2946 2947 /* 2948 * Update lists. 2949 * - add fmr back to free list 2950 * - remove fmr from dirty list 2951 */ 2952 fmr->fmr_next = fmrpool->fmr_free_list; 2953 fmrpool->fmr_free_list = fmr; 2954 2955 2956 /* 2957 * Because we have updated the dirty list, and deregistered the 2958 * FMR entry, we do need to sync the TPT, so we set the 2959 * 'sync_needed' flag here so we sync once we finish dirty_list 2960 * processing. 2961 */ 2962 sync_needed = 1; 2963 } 2964 2965 fmrpool->fmr_dirty_list = NULL; 2966 fmrpool->fmr_dirty_len = 0; 2967 2968 if (sync_needed) { 2969 status = hermon_sync_tpt_cmd_post(state, 2970 HERMON_CMD_NOSLEEP_SPIN); 2971 if (status != HERMON_CMD_SUCCESS) { 2972 return (status); 2973 } 2974 } 2975 2976 return (DDI_SUCCESS); 2977 } 2978 2979 /* 2980 * hermon_fmr_avl_compare() 2981 * Context: Can be called from user or kernel context. 2982 */ 2983 static int 2984 hermon_fmr_avl_compare(const void *q, const void *e) 2985 { 2986 hermon_fmr_list_t *entry, *query; 2987 2988 entry = (hermon_fmr_list_t *)e; 2989 query = (hermon_fmr_list_t *)q; 2990 2991 if (query->fmr_desc.pmd_iova < entry->fmr_desc.pmd_iova) { 2992 return (-1); 2993 } else if (query->fmr_desc.pmd_iova > entry->fmr_desc.pmd_iova) { 2994 return (+1); 2995 } else { 2996 return (0); 2997 } 2998 } 2999 3000 3001 /* 3002 * hermon_fmr_cache_init() 3003 * Context: Can be called from user or kernel context. 3004 */ 3005 static void 3006 hermon_fmr_cache_init(hermon_fmrhdl_t fmr) 3007 { 3008 /* Initialize the lock used for FMR cache AVL tree access */ 3009 mutex_init(&fmr->fmr_cachelock, NULL, MUTEX_DRIVER, 3010 DDI_INTR_PRI(fmr->fmr_state->hs_intrmsi_pri)); 3011 3012 /* Initialize the AVL tree for the FMR cache */ 3013 avl_create(&fmr->fmr_cache_avl, hermon_fmr_avl_compare, 3014 sizeof (hermon_fmr_list_t), 3015 offsetof(hermon_fmr_list_t, fmr_avlnode)); 3016 3017 fmr->fmr_cache = 1; 3018 } 3019 3020 3021 /* 3022 * hermon_fmr_cache_fini() 3023 * Context: Can be called from user or kernel context. 3024 */ 3025 static void 3026 hermon_fmr_cache_fini(hermon_fmrhdl_t fmr) 3027 { 3028 void *cookie; 3029 3030 /* 3031 * Empty all entries (if necessary) and destroy the AVL tree. 3032 * The FMRs themselves are freed as part of destroy_pool() 3033 */ 3034 cookie = NULL; 3035 while (((void *)(hermon_fmr_list_t *)avl_destroy_nodes( 3036 &fmr->fmr_cache_avl, &cookie)) != NULL) { 3037 /* loop through */ 3038 } 3039 avl_destroy(&fmr->fmr_cache_avl); 3040 3041 /* Destroy the lock used for FMR cache */ 3042 mutex_destroy(&fmr->fmr_cachelock); 3043 } 3044