1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * hermon_mr.c 29 * Hermon Memory Region/Window Routines 30 * 31 * Implements all the routines necessary to provide the requisite memory 32 * registration verbs. These include operations like RegisterMemRegion(), 33 * DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion, 34 * etc., that affect Memory Regions. It also includes the verbs that 35 * affect Memory Windows, including AllocMemWindow(), FreeMemWindow(), 36 * and QueryMemWindow(). 37 */ 38 39 #include <sys/types.h> 40 #include <sys/conf.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/modctl.h> 44 #include <sys/esunddi.h> 45 46 #include <sys/ib/adapters/hermon/hermon.h> 47 48 extern uint32_t hermon_kernel_data_ro; 49 extern uint32_t hermon_user_data_ro; 50 51 /* 52 * Used by hermon_mr_keycalc() below to fill in the "unconstrained" portion 53 * of Hermon memory keys (LKeys and RKeys) 54 */ 55 static uint_t hermon_memkey_cnt = 0x00; 56 #define HERMON_MEMKEY_SHIFT 24 57 #define HERMON_MPT_SW_OWNERSHIP 0xF 58 59 static int hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd, 60 hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op, 61 hermon_mpt_rsrc_type_t mpt_type); 62 static int hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr, 63 hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new, 64 hermon_mr_options_t *op); 65 static int hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr, 66 hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr, 67 uint_t sleep, uint_t *dereg_level); 68 static uint64_t hermon_mr_nummtt_needed(hermon_state_t *state, 69 hermon_bind_info_t *bind, uint_t *mtt_pgsize); 70 static int hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind, 71 ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer); 72 static void hermon_mr_mem_unbind(hermon_state_t *state, 73 hermon_bind_info_t *bind); 74 static int hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt, 75 hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits); 76 static int hermon_mr_fast_mtt_write_fmr(hermon_rsrc_t *mtt, 77 ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits); 78 static uint_t hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc); 79 static uint_t hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc); 80 81 82 /* 83 * The Hermon umem_lockmemory() callback ops. When userland memory is 84 * registered, these callback ops are specified. The hermon_umap_umemlock_cb() 85 * callback will be called whenever the memory for the corresponding 86 * ddi_umem_cookie_t is being freed. 87 */ 88 static struct umem_callback_ops hermon_umem_cbops = { 89 UMEM_CALLBACK_VERSION, 90 hermon_umap_umemlock_cb, 91 }; 92 93 94 95 /* 96 * hermon_mr_register() 97 * Context: Can be called from interrupt or base context. 98 */ 99 int 100 hermon_mr_register(hermon_state_t *state, hermon_pdhdl_t pd, 101 ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op, 102 hermon_mpt_rsrc_type_t mpt_type) 103 { 104 hermon_bind_info_t bind; 105 int status; 106 107 /* 108 * Fill in the "bind" struct. This struct provides the majority 109 * of the information that will be used to distinguish between an 110 * "addr" binding (as is the case here) and a "buf" binding (see 111 * below). The "bind" struct is later passed to hermon_mr_mem_bind() 112 * which does most of the "heavy lifting" for the Hermon memory 113 * registration routines. 114 */ 115 bind.bi_type = HERMON_BINDHDL_VADDR; 116 bind.bi_addr = mr_attr->mr_vaddr; 117 bind.bi_len = mr_attr->mr_len; 118 bind.bi_as = mr_attr->mr_as; 119 bind.bi_flags = mr_attr->mr_flags; 120 status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op, 121 mpt_type); 122 return (status); 123 } 124 125 126 /* 127 * hermon_mr_register_buf() 128 * Context: Can be called from interrupt or base context. 129 */ 130 int 131 hermon_mr_register_buf(hermon_state_t *state, hermon_pdhdl_t pd, 132 ibt_smr_attr_t *mr_attr, struct buf *buf, hermon_mrhdl_t *mrhdl, 133 hermon_mr_options_t *op, hermon_mpt_rsrc_type_t mpt_type) 134 { 135 hermon_bind_info_t bind; 136 int status; 137 138 /* 139 * Fill in the "bind" struct. This struct provides the majority 140 * of the information that will be used to distinguish between an 141 * "addr" binding (see above) and a "buf" binding (as is the case 142 * here). The "bind" struct is later passed to hermon_mr_mem_bind() 143 * which does most of the "heavy lifting" for the Hermon memory 144 * registration routines. Note: We have chosen to provide 145 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is 146 * not set). It is not critical what value we choose here as it need 147 * only be unique for the given RKey (which will happen by default), 148 * so the choice here is somewhat arbitrary. 149 */ 150 bind.bi_type = HERMON_BINDHDL_BUF; 151 bind.bi_buf = buf; 152 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) { 153 bind.bi_addr = mr_attr->mr_vaddr; 154 } else { 155 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr; 156 } 157 bind.bi_as = NULL; 158 bind.bi_len = (uint64_t)buf->b_bcount; 159 bind.bi_flags = mr_attr->mr_flags; 160 status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op, mpt_type); 161 return (status); 162 } 163 164 165 /* 166 * hermon_mr_register_shared() 167 * Context: Can be called from interrupt or base context. 168 */ 169 int 170 hermon_mr_register_shared(hermon_state_t *state, hermon_mrhdl_t mrhdl, 171 hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new) 172 { 173 hermon_rsrc_t *mpt, *mtt, *rsrc; 174 hermon_umap_db_entry_t *umapdb; 175 hermon_hw_dmpt_t mpt_entry; 176 hermon_mrhdl_t mr; 177 hermon_bind_info_t *bind; 178 ddi_umem_cookie_t umem_cookie; 179 size_t umem_len; 180 caddr_t umem_addr; 181 uint64_t mtt_addr, pgsize_msk; 182 uint_t sleep, mr_is_umem; 183 int status, umem_flags; 184 185 /* 186 * Check the sleep flag. Ensure that it is consistent with the 187 * current thread context (i.e. if we are currently in the interrupt 188 * context, then we shouldn't be attempting to sleep). 189 */ 190 sleep = (mr_attr->mr_flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP : 191 HERMON_SLEEP; 192 if ((sleep == HERMON_SLEEP) && 193 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 194 status = IBT_INVALID_PARAM; 195 goto mrshared_fail; 196 } 197 198 /* Increment the reference count on the protection domain (PD) */ 199 hermon_pd_refcnt_inc(pd); 200 201 /* 202 * Allocate an MPT entry. This will be filled in with all the 203 * necessary parameters to define the shared memory region. 204 * Specifically, it will be made to reference the currently existing 205 * MTT entries and ownership of the MPT will be passed to the hardware 206 * in the last step below. If we fail here, we must undo the 207 * protection domain reference count. 208 */ 209 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); 210 if (status != DDI_SUCCESS) { 211 status = IBT_INSUFF_RESOURCE; 212 goto mrshared_fail1; 213 } 214 215 /* 216 * Allocate the software structure for tracking the shared memory 217 * region (i.e. the Hermon Memory Region handle). If we fail here, we 218 * must undo the protection domain reference count and the previous 219 * resource allocation. 220 */ 221 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); 222 if (status != DDI_SUCCESS) { 223 status = IBT_INSUFF_RESOURCE; 224 goto mrshared_fail2; 225 } 226 mr = (hermon_mrhdl_t)rsrc->hr_addr; 227 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 228 229 /* 230 * Setup and validate the memory region access flags. This means 231 * translating the IBTF's enable flags into the access flags that 232 * will be used in later operations. 233 */ 234 mr->mr_accflag = 0; 235 if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND) 236 mr->mr_accflag |= IBT_MR_WINDOW_BIND; 237 if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE) 238 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 239 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ) 240 mr->mr_accflag |= IBT_MR_REMOTE_READ; 241 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE) 242 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 243 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 244 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 245 246 /* 247 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 248 * from a certain number of "constrained" bits (the least significant 249 * bits) and some number of "unconstrained" bits. The constrained 250 * bits must be set to the index of the entry in the MPT table, but 251 * the unconstrained bits can be set to any value we wish. Note: 252 * if no remote access is required, then the RKey value is not filled 253 * in. Otherwise both Rkey and LKey are given the same value. 254 */ 255 mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); 256 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 257 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 258 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 259 mr->mr_rkey = mr->mr_lkey; 260 } 261 262 /* Grab the MR lock for the current memory region */ 263 mutex_enter(&mrhdl->mr_lock); 264 265 /* 266 * Check here to see if the memory region has already been partially 267 * deregistered as a result of a hermon_umap_umemlock_cb() callback. 268 * If so, this is an error, return failure. 269 */ 270 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) { 271 mutex_exit(&mrhdl->mr_lock); 272 status = IBT_MR_HDL_INVALID; 273 goto mrshared_fail3; 274 } 275 276 /* 277 * Determine if the original memory was from userland and, if so, pin 278 * the pages (again) with umem_lockmemory(). This will guarantee a 279 * separate callback for each of this shared region's MR handles. 280 * If this is userland memory, then allocate an entry in the 281 * "userland resources database". This will later be added to 282 * the database (after all further memory registration operations are 283 * successful). If we fail here, we must undo all the above setup. 284 */ 285 mr_is_umem = mrhdl->mr_is_umem; 286 if (mr_is_umem) { 287 umem_len = ptob(btopr(mrhdl->mr_bindinfo.bi_len)); 288 umem_addr = (caddr_t)((uintptr_t)mrhdl->mr_bindinfo.bi_addr & 289 ~PAGEOFFSET); 290 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ | 291 DDI_UMEMLOCK_LONGTERM); 292 status = umem_lockmemory(umem_addr, umem_len, umem_flags, 293 &umem_cookie, &hermon_umem_cbops, NULL); 294 if (status != 0) { 295 mutex_exit(&mrhdl->mr_lock); 296 status = IBT_INSUFF_RESOURCE; 297 goto mrshared_fail3; 298 } 299 300 umapdb = hermon_umap_db_alloc(state->hs_instance, 301 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC, 302 (uint64_t)(uintptr_t)rsrc); 303 if (umapdb == NULL) { 304 mutex_exit(&mrhdl->mr_lock); 305 status = IBT_INSUFF_RESOURCE; 306 goto mrshared_fail4; 307 } 308 } 309 310 /* 311 * Copy the MTT resource pointer (and additional parameters) from 312 * the original Hermon Memory Region handle. Note: this is normally 313 * where the hermon_mr_mem_bind() routine would be called, but because 314 * we already have bound and filled-in MTT entries it is simply a 315 * matter here of managing the MTT reference count and grabbing the 316 * address of the MTT table entries (for filling in the shared region's 317 * MPT entry). 318 */ 319 mr->mr_mttrsrcp = mrhdl->mr_mttrsrcp; 320 mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz; 321 mr->mr_bindinfo = mrhdl->mr_bindinfo; 322 mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp; 323 mutex_exit(&mrhdl->mr_lock); 324 bind = &mr->mr_bindinfo; 325 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 326 mtt = mr->mr_mttrsrcp; 327 328 /* 329 * Increment the MTT reference count (to reflect the fact that 330 * the MTT is now shared) 331 */ 332 (void) hermon_mtt_refcnt_inc(mr->mr_mttrefcntp); 333 334 /* 335 * Update the new "bind" virtual address. Do some extra work here 336 * to ensure proper alignment. That is, make sure that the page 337 * offset for the beginning of the old range is the same as the 338 * offset for this new mapping 339 */ 340 pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1); 341 bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) | 342 (mr->mr_bindinfo.bi_addr & pgsize_msk)); 343 344 /* 345 * Fill in the MPT entry. This is the final step before passing 346 * ownership of the MPT entry to the Hermon hardware. We use all of 347 * the information collected/calculated above to fill in the 348 * requisite portions of the MPT. 349 */ 350 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); 351 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; 352 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 353 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 354 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 355 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 356 mpt_entry.lr = 1; 357 mpt_entry.reg_win = HERMON_MPT_IS_REGION; 358 mpt_entry.entity_sz = mr->mr_logmttpgsz; 359 mpt_entry.mem_key = mr->mr_lkey; 360 mpt_entry.pd = pd->pd_pdnum; 361 mpt_entry.start_addr = bind->bi_addr; 362 mpt_entry.reg_win_len = bind->bi_len; 363 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT); 364 mpt_entry.mtt_addr_h = mtt_addr >> 32; 365 mpt_entry.mtt_addr_l = mtt_addr >> 3; 366 367 /* 368 * Write the MPT entry to hardware. Lastly, we pass ownership of 369 * the entry to the hardware. Note: in general, this operation 370 * shouldn't fail. But if it does, we have to undo everything we've 371 * done above before returning error. 372 */ 373 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 374 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); 375 if (status != HERMON_CMD_SUCCESS) { 376 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", 377 status); 378 if (status == HERMON_CMD_INVALID_STATUS) { 379 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 380 } 381 status = ibc_get_ci_failure(0); 382 goto mrshared_fail5; 383 } 384 385 /* 386 * Fill in the rest of the Hermon Memory Region handle. Having 387 * successfully transferred ownership of the MPT, we can update the 388 * following fields for use in further operations on the MR. 389 */ 390 mr->mr_mptrsrcp = mpt; 391 mr->mr_mttrsrcp = mtt; 392 mr->mr_mpt_type = HERMON_MPT_DMPT; 393 mr->mr_pdhdl = pd; 394 mr->mr_rsrcp = rsrc; 395 mr->mr_is_umem = mr_is_umem; 396 mr->mr_is_fmr = 0; 397 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL; 398 mr->mr_umem_cbfunc = NULL; 399 mr->mr_umem_cbarg1 = NULL; 400 mr->mr_umem_cbarg2 = NULL; 401 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); 402 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey); 403 404 /* 405 * If this is userland memory, then we need to insert the previously 406 * allocated entry into the "userland resources database". This will 407 * allow for later coordination between the hermon_umap_umemlock_cb() 408 * callback and hermon_mr_deregister(). 409 */ 410 if (mr_is_umem) { 411 hermon_umap_db_add(umapdb); 412 } 413 414 *mrhdl_new = mr; 415 416 return (DDI_SUCCESS); 417 418 /* 419 * The following is cleanup for all possible failure cases in this routine 420 */ 421 mrshared_fail5: 422 (void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp); 423 if (mr_is_umem) { 424 hermon_umap_db_free(umapdb); 425 } 426 mrshared_fail4: 427 if (mr_is_umem) { 428 ddi_umem_unlock(umem_cookie); 429 } 430 mrshared_fail3: 431 hermon_rsrc_free(state, &rsrc); 432 mrshared_fail2: 433 hermon_rsrc_free(state, &mpt); 434 mrshared_fail1: 435 hermon_pd_refcnt_dec(pd); 436 mrshared_fail: 437 return (status); 438 } 439 440 /* 441 * hermon_mr_alloc_fmr() 442 * Context: Can be called from interrupt or base context. 443 */ 444 int 445 hermon_mr_alloc_fmr(hermon_state_t *state, hermon_pdhdl_t pd, 446 hermon_fmrhdl_t fmr_pool, hermon_mrhdl_t *mrhdl) 447 { 448 hermon_rsrc_t *mpt, *mtt, *rsrc; 449 hermon_hw_dmpt_t mpt_entry; 450 hermon_mrhdl_t mr; 451 hermon_bind_info_t bind; 452 uint64_t mtt_addr; 453 uint64_t nummtt; 454 uint_t sleep, mtt_pgsize_bits; 455 int status; 456 457 /* 458 * Check the sleep flag. Ensure that it is consistent with the 459 * current thread context (i.e. if we are currently in the interrupt 460 * context, then we shouldn't be attempting to sleep). 461 */ 462 sleep = (fmr_pool->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP : 463 HERMON_NOSLEEP; 464 if ((sleep == HERMON_SLEEP) && 465 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 466 return (IBT_INVALID_PARAM); 467 } 468 469 /* Increment the reference count on the protection domain (PD) */ 470 hermon_pd_refcnt_inc(pd); 471 472 /* 473 * Allocate an MPT entry. This will be filled in with all the 474 * necessary parameters to define the FMR. Specifically, it will be 475 * made to reference the currently existing MTT entries and ownership 476 * of the MPT will be passed to the hardware in the last step below. 477 * If we fail here, we must undo the protection domain reference count. 478 */ 479 480 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); 481 if (status != DDI_SUCCESS) { 482 status = IBT_INSUFF_RESOURCE; 483 goto fmralloc_fail1; 484 } 485 486 /* 487 * Allocate the software structure for tracking the fmr memory 488 * region (i.e. the Hermon Memory Region handle). If we fail here, we 489 * must undo the protection domain reference count and the previous 490 * resource allocation. 491 */ 492 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); 493 if (status != DDI_SUCCESS) { 494 status = IBT_INSUFF_RESOURCE; 495 goto fmralloc_fail2; 496 } 497 mr = (hermon_mrhdl_t)rsrc->hr_addr; 498 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 499 500 /* 501 * Setup and validate the memory region access flags. This means 502 * translating the IBTF's enable flags into the access flags that 503 * will be used in later operations. 504 */ 505 mr->mr_accflag = 0; 506 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_LOCAL_WRITE) 507 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 508 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_READ) 509 mr->mr_accflag |= IBT_MR_REMOTE_READ; 510 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_WRITE) 511 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 512 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 513 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 514 515 /* 516 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 517 * from a certain number of "constrained" bits (the least significant 518 * bits) and some number of "unconstrained" bits. The constrained 519 * bits must be set to the index of the entry in the MPT table, but 520 * the unconstrained bits can be set to any value we wish. Note: 521 * if no remote access is required, then the RKey value is not filled 522 * in. Otherwise both Rkey and LKey are given the same value. 523 */ 524 mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); 525 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 526 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 527 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 528 mr->mr_rkey = mr->mr_lkey; 529 } 530 531 /* 532 * Determine number of pages spanned. This routine uses the 533 * information in the "bind" struct to determine the required 534 * number of MTT entries needed (and returns the suggested page size - 535 * as a "power-of-2" - for each MTT entry). 536 */ 537 /* Assume address will be page aligned later */ 538 bind.bi_addr = 0; 539 /* Calculate size based on given max pages */ 540 bind.bi_len = fmr_pool->fmr_max_pages << PAGESHIFT; 541 nummtt = hermon_mr_nummtt_needed(state, &bind, &mtt_pgsize_bits); 542 543 /* 544 * Allocate the MTT entries. Use the calculations performed above to 545 * allocate the required number of MTT entries. If we fail here, we 546 * must not only undo all the previous resource allocation (and PD 547 * reference count), but we must also unbind the memory. 548 */ 549 status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt); 550 if (status != DDI_SUCCESS) { 551 status = IBT_INSUFF_RESOURCE; 552 goto fmralloc_fail3; 553 } 554 mr->mr_logmttpgsz = mtt_pgsize_bits; 555 556 /* 557 * Fill in the MPT entry. This is the final step before passing 558 * ownership of the MPT entry to the Hermon hardware. We use all of 559 * the information collected/calculated above to fill in the 560 * requisite portions of the MPT. 561 */ 562 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); 563 mpt_entry.en_bind = 0; 564 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 565 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 566 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 567 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 568 mpt_entry.lr = 1; 569 mpt_entry.reg_win = HERMON_MPT_IS_REGION; 570 mpt_entry.pd = pd->pd_pdnum; 571 572 mpt_entry.entity_sz = mr->mr_logmttpgsz; 573 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT); 574 mpt_entry.mtt_addr_h = mtt_addr >> 32; 575 mpt_entry.mtt_addr_l = mtt_addr >> 3; 576 mpt_entry.mem_key = mr->mr_lkey; 577 578 /* 579 * FMR sets these to 0 for now. Later during actual fmr registration 580 * these values are filled in. 581 */ 582 mpt_entry.start_addr = 0; 583 mpt_entry.reg_win_len = 0; 584 585 /* 586 * Write the MPT entry to hardware. Lastly, we pass ownership of 587 * the entry to the hardware. Note: in general, this operation 588 * shouldn't fail. But if it does, we have to undo everything we've 589 * done above before returning error. 590 */ 591 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 592 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); 593 if (status != HERMON_CMD_SUCCESS) { 594 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", 595 status); 596 if (status == HERMON_CMD_INVALID_STATUS) { 597 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 598 } 599 status = ibc_get_ci_failure(0); 600 goto fmralloc_fail4; 601 } 602 603 /* 604 * Fill in the rest of the Hermon Memory Region handle. Having 605 * successfully transferred ownership of the MPT, we can update the 606 * following fields for use in further operations on the MR. Also, set 607 * that this is an FMR region. 608 */ 609 mr->mr_mptrsrcp = mpt; 610 mr->mr_mttrsrcp = mtt; 611 mr->mr_mpt_type = HERMON_MPT_DMPT; 612 mr->mr_pdhdl = pd; 613 mr->mr_rsrcp = rsrc; 614 mr->mr_is_fmr = 1; 615 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); 616 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey); 617 (void) memcpy(&mr->mr_bindinfo, &bind, sizeof (hermon_bind_info_t)); 618 619 *mrhdl = mr; 620 621 return (DDI_SUCCESS); 622 623 /* 624 * The following is cleanup for all possible failure cases in this routine 625 */ 626 fmralloc_fail4: 627 kmem_free(mtt, sizeof (hermon_rsrc_t) * nummtt); 628 fmralloc_fail3: 629 hermon_rsrc_free(state, &rsrc); 630 fmralloc_fail2: 631 hermon_rsrc_free(state, &mpt); 632 fmralloc_fail1: 633 hermon_pd_refcnt_dec(pd); 634 fmralloc_fail: 635 return (status); 636 } 637 638 /* 639 * hermon_mr_register_physical_fmr() 640 * Context: Can be called from interrupt or base context. 641 */ 642 /*ARGSUSED*/ 643 int 644 hermon_mr_register_physical_fmr(hermon_state_t *state, 645 ibt_pmr_attr_t *mem_pattr_p, hermon_mrhdl_t mr, ibt_pmr_desc_t *mem_desc_p) 646 { 647 hermon_rsrc_t *mpt; 648 uint64_t *mpt_table; 649 int status; 650 651 mutex_enter(&mr->mr_lock); 652 mpt = mr->mr_mptrsrcp; 653 mpt_table = (uint64_t *)mpt->hr_addr; 654 655 /* Write MPT status to SW bit */ 656 ddi_put8(mpt->hr_acchdl, (uint8_t *)&mpt_table[0], 0xF); 657 658 /* 659 * Write the mapped addresses into the MTT entries. FMR needs to do 660 * this a little differently, so we call the fmr specific fast mtt 661 * write here. 662 */ 663 status = hermon_mr_fast_mtt_write_fmr(mr->mr_mttrsrcp, mem_pattr_p, 664 mr->mr_logmttpgsz); 665 if (status != DDI_SUCCESS) { 666 mutex_exit(&mr->mr_lock); 667 status = ibc_get_ci_failure(0); 668 goto fmr_reg_fail1; 669 } 670 671 /* 672 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 673 * from a certain number of "constrained" bits (the least significant 674 * bits) and some number of "unconstrained" bits. The constrained 675 * bits must be set to the index of the entry in the MPT table, but 676 * the unconstrained bits can be set to any value we wish. Note: 677 * if no remote access is required, then the RKey value is not filled 678 * in. Otherwise both Rkey and LKey are given the same value. 679 */ 680 mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); 681 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 682 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 683 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 684 mr->mr_rkey = mr->mr_lkey; 685 } 686 687 /* write mem key value */ 688 ddi_put32(mpt->hr_acchdl, (uint32_t *)&mpt_table[1], mr->mr_lkey); 689 690 /* write length value */ 691 ddi_put64(mpt->hr_acchdl, &mpt_table[3], mem_pattr_p->pmr_len); 692 693 /* write start addr value */ 694 ddi_put64(mpt->hr_acchdl, &mpt_table[2], mem_pattr_p->pmr_iova); 695 696 /* write lkey value */ 697 ddi_put32(mpt->hr_acchdl, (uint32_t *)&mpt_table[4], mr->mr_lkey); 698 699 /* Write MPT status to HW bit */ 700 ddi_put8(mpt->hr_acchdl, (uint8_t *)&mpt_table[0], 0x0); 701 702 /* Fill in return parameters */ 703 mem_desc_p->pmd_lkey = mr->mr_lkey; 704 mem_desc_p->pmd_rkey = mr->mr_rkey; 705 mem_desc_p->pmd_iova = mem_pattr_p->pmr_iova; 706 mem_desc_p->pmd_phys_buf_list_sz = mem_pattr_p->pmr_len; 707 708 /* Fill in MR bindinfo struct for later sync or query operations */ 709 mr->mr_bindinfo.bi_addr = mem_pattr_p->pmr_iova; 710 mr->mr_bindinfo.bi_flags = mem_pattr_p->pmr_flags & IBT_MR_NONCOHERENT; 711 712 mutex_exit(&mr->mr_lock); 713 714 return (DDI_SUCCESS); 715 716 fmr_reg_fail1: 717 /* 718 * Note, we fail here, and purposely leave the memory ownership in 719 * software. The memory tables may be corrupt, so we leave the region 720 * unregistered. 721 */ 722 return (DDI_FAILURE); 723 } 724 725 726 /* 727 * hermon_mr_deregister() 728 * Context: Can be called from interrupt or base context. 729 */ 730 /* ARGSUSED */ 731 int 732 hermon_mr_deregister(hermon_state_t *state, hermon_mrhdl_t *mrhdl, uint_t level, 733 uint_t sleep) 734 { 735 hermon_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; 736 hermon_umap_db_entry_t *umapdb; 737 hermon_pdhdl_t pd; 738 hermon_mrhdl_t mr; 739 hermon_bind_info_t *bind; 740 uint64_t value; 741 int status; 742 uint_t shared_mtt; 743 744 /* 745 * Check the sleep flag. Ensure that it is consistent with the 746 * current thread context (i.e. if we are currently in the interrupt 747 * context, then we shouldn't be attempting to sleep). 748 */ 749 if ((sleep == HERMON_SLEEP) && 750 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 751 status = IBT_INVALID_PARAM; 752 return (status); 753 } 754 755 /* 756 * Pull all the necessary information from the Hermon Memory Region 757 * handle. This is necessary here because the resource for the 758 * MR handle is going to be freed up as part of the this 759 * deregistration 760 */ 761 mr = *mrhdl; 762 mutex_enter(&mr->mr_lock); 763 mpt = mr->mr_mptrsrcp; 764 mtt = mr->mr_mttrsrcp; 765 mtt_refcnt = mr->mr_mttrefcntp; 766 rsrc = mr->mr_rsrcp; 767 pd = mr->mr_pdhdl; 768 bind = &mr->mr_bindinfo; 769 770 /* 771 * Check here if the memory region is really an FMR. If so, this is a 772 * bad thing and we shouldn't be here. Return failure. 773 */ 774 if (mr->mr_is_fmr) { 775 mutex_exit(&mr->mr_lock); 776 return (IBT_INVALID_PARAM); 777 } 778 779 /* 780 * Check here to see if the memory region has already been partially 781 * deregistered as a result of the hermon_umap_umemlock_cb() callback. 782 * If so, then jump to the end and free the remaining resources. 783 */ 784 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 785 goto mrdereg_finish_cleanup; 786 } 787 788 /* 789 * We must drop the "mr_lock" here to ensure that both SLEEP and 790 * NOSLEEP calls into the firmware work as expected. Also, if two 791 * threads are attemping to access this MR (via de-register, 792 * re-register, or otherwise), then we allow the firmware to enforce 793 * the checking, that only one deregister is valid. 794 */ 795 mutex_exit(&mr->mr_lock); 796 797 /* 798 * Reclaim MPT entry from hardware (if necessary). Since the 799 * hermon_mr_deregister() routine is used in the memory region 800 * reregistration process as well, it is possible that we will 801 * not always wish to reclaim ownership of the MPT. Check the 802 * "level" arg and, if necessary, attempt to reclaim it. If 803 * the ownership transfer fails for any reason, we check to see 804 * what command status was returned from the hardware. The only 805 * "expected" error status is the one that indicates an attempt to 806 * deregister a memory region that has memory windows bound to it 807 */ 808 if (level >= HERMON_MR_DEREG_ALL) { 809 if (mr->mr_mpt_type >= HERMON_MPT_DMPT) { 810 status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, 811 NULL, 0, mpt->hr_indx, sleep); 812 if (status != HERMON_CMD_SUCCESS) { 813 if (status == HERMON_CMD_REG_BOUND) { 814 return (IBT_MR_IN_USE); 815 } else { 816 cmn_err(CE_CONT, "Hermon: HW2SW_MPT " 817 "command failed: %08x\n", status); 818 if (status == 819 HERMON_CMD_INVALID_STATUS) { 820 hermon_fm_ereport(state, 821 HCA_SYS_ERR, 822 DDI_SERVICE_LOST); 823 } 824 return (IBT_INVALID_PARAM); 825 } 826 } 827 } 828 } 829 830 /* 831 * Re-grab the mr_lock here. Since further access to the protected 832 * 'mr' structure is needed, and we would have returned previously for 833 * the multiple deregistration case, we can safely grab the lock here. 834 */ 835 mutex_enter(&mr->mr_lock); 836 837 /* 838 * If the memory had come from userland, then we do a lookup in the 839 * "userland resources database". On success, we free the entry, call 840 * ddi_umem_unlock(), and continue the cleanup. On failure (which is 841 * an indication that the umem_lockmemory() callback has called 842 * hermon_mr_deregister()), we call ddi_umem_unlock() and invalidate 843 * the "mr_umemcookie" field in the MR handle (this will be used 844 * later to detect that only partial cleaup still remains to be done 845 * on the MR handle). 846 */ 847 if (mr->mr_is_umem) { 848 status = hermon_umap_db_find(state->hs_instance, 849 (uint64_t)(uintptr_t)mr->mr_umemcookie, 850 MLNX_UMAP_MRMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE, 851 &umapdb); 852 if (status == DDI_SUCCESS) { 853 hermon_umap_db_free(umapdb); 854 ddi_umem_unlock(mr->mr_umemcookie); 855 } else { 856 ddi_umem_unlock(mr->mr_umemcookie); 857 mr->mr_umemcookie = NULL; 858 } 859 } 860 861 /* 862 * Decrement the MTT reference count. Since the MTT resource 863 * may be shared between multiple memory regions (as a result 864 * of a "RegisterSharedMR" verb) it is important that we not 865 * free up or unbind resources prematurely. If it's not shared (as 866 * indicated by the return status), then free the resource. 867 */ 868 shared_mtt = hermon_mtt_refcnt_dec(mtt_refcnt); 869 if (!shared_mtt) { 870 hermon_rsrc_free(state, &mtt_refcnt); 871 } 872 873 /* 874 * Free up the MTT entries and unbind the memory. Here, as above, we 875 * attempt to free these resources only if it is appropriate to do so. 876 */ 877 if (!shared_mtt) { 878 if (level >= HERMON_MR_DEREG_NO_HW2SW_MPT) { 879 hermon_mr_mem_unbind(state, bind); 880 } 881 hermon_rsrc_free(state, &mtt); 882 } 883 884 /* 885 * If the MR handle has been invalidated, then drop the 886 * lock and return success. Note: This only happens because 887 * the umem_lockmemory() callback has been triggered. The 888 * cleanup here is partial, and further cleanup (in a 889 * subsequent hermon_mr_deregister() call) will be necessary. 890 */ 891 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 892 mutex_exit(&mr->mr_lock); 893 return (DDI_SUCCESS); 894 } 895 896 mrdereg_finish_cleanup: 897 mutex_exit(&mr->mr_lock); 898 899 /* Free the Hermon Memory Region handle */ 900 hermon_rsrc_free(state, &rsrc); 901 902 /* Free up the MPT entry resource */ 903 if (mpt != NULL) 904 hermon_rsrc_free(state, &mpt); 905 906 /* Decrement the reference count on the protection domain (PD) */ 907 hermon_pd_refcnt_dec(pd); 908 909 /* Set the mrhdl pointer to NULL and return success */ 910 *mrhdl = NULL; 911 912 return (DDI_SUCCESS); 913 } 914 915 /* 916 * hermon_mr_dealloc_fmr() 917 * Context: Can be called from interrupt or base context. 918 */ 919 /* ARGSUSED */ 920 int 921 hermon_mr_dealloc_fmr(hermon_state_t *state, hermon_mrhdl_t *mrhdl) 922 { 923 hermon_rsrc_t *mpt, *mtt, *rsrc; 924 hermon_pdhdl_t pd; 925 hermon_mrhdl_t mr; 926 927 /* 928 * Pull all the necessary information from the Hermon Memory Region 929 * handle. This is necessary here because the resource for the 930 * MR handle is going to be freed up as part of the this 931 * deregistration 932 */ 933 mr = *mrhdl; 934 mutex_enter(&mr->mr_lock); 935 mpt = mr->mr_mptrsrcp; 936 mtt = mr->mr_mttrsrcp; 937 rsrc = mr->mr_rsrcp; 938 pd = mr->mr_pdhdl; 939 mutex_exit(&mr->mr_lock); 940 941 /* Free the MTT entries */ 942 hermon_rsrc_free(state, &mtt); 943 944 /* Free the Hermon Memory Region handle */ 945 hermon_rsrc_free(state, &rsrc); 946 947 /* Free up the MPT entry resource */ 948 hermon_rsrc_free(state, &mpt); 949 950 /* Decrement the reference count on the protection domain (PD) */ 951 hermon_pd_refcnt_dec(pd); 952 953 /* Set the mrhdl pointer to NULL and return success */ 954 *mrhdl = NULL; 955 956 return (DDI_SUCCESS); 957 } 958 959 /* 960 * hermon_mr_invalidate_fmr() 961 * Context: Can be called from interrupt or base context. 962 */ 963 /* ARGSUSED */ 964 int 965 hermon_mr_invalidate_fmr(hermon_state_t *state, hermon_mrhdl_t mr) 966 { 967 hermon_rsrc_t *mpt; 968 uint64_t *mpt_table; 969 970 mutex_enter(&mr->mr_lock); 971 mpt = mr->mr_mptrsrcp; 972 mpt_table = (uint64_t *)mpt->hr_addr; 973 974 /* Write MPT status to SW bit */ 975 ddi_put8(mpt->hr_acchdl, (uint8_t *)&mpt_table[0], 0xF); 976 977 /* invalidate mem key value */ 978 ddi_put32(mpt->hr_acchdl, (uint32_t *)&mpt_table[1], 0); 979 980 /* invalidate lkey value */ 981 ddi_put32(mpt->hr_acchdl, (uint32_t *)&mpt_table[4], 0); 982 983 /* Write MPT status to HW bit */ 984 ddi_put8(mpt->hr_acchdl, (uint8_t *)&mpt_table[0], 0x0); 985 986 mutex_exit(&mr->mr_lock); 987 988 return (DDI_SUCCESS); 989 } 990 991 /* 992 * hermon_mr_deregister_fmr() 993 * Context: Can be called from interrupt or base context. 994 */ 995 /* ARGSUSED */ 996 int 997 hermon_mr_deregister_fmr(hermon_state_t *state, hermon_mrhdl_t mr) 998 { 999 hermon_rsrc_t *mpt; 1000 uint64_t *mpt_table; 1001 1002 mutex_enter(&mr->mr_lock); 1003 mpt = mr->mr_mptrsrcp; 1004 mpt_table = (uint64_t *)mpt->hr_addr; 1005 1006 /* Write MPT status to SW bit */ 1007 ddi_put8(mpt->hr_acchdl, (uint8_t *)&mpt_table[0], 0xF); 1008 mutex_exit(&mr->mr_lock); 1009 1010 return (DDI_SUCCESS); 1011 } 1012 1013 1014 /* 1015 * hermon_mr_query() 1016 * Context: Can be called from interrupt or base context. 1017 */ 1018 /* ARGSUSED */ 1019 int 1020 hermon_mr_query(hermon_state_t *state, hermon_mrhdl_t mr, 1021 ibt_mr_query_attr_t *attr) 1022 { 1023 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr)) 1024 1025 mutex_enter(&mr->mr_lock); 1026 1027 /* 1028 * Check here to see if the memory region has already been partially 1029 * deregistered as a result of a hermon_umap_umemlock_cb() callback. 1030 * If so, this is an error, return failure. 1031 */ 1032 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 1033 mutex_exit(&mr->mr_lock); 1034 return (IBT_MR_HDL_INVALID); 1035 } 1036 1037 /* Fill in the queried attributes */ 1038 attr->mr_attr_flags = mr->mr_accflag; 1039 attr->mr_pd = (ibt_pd_hdl_t)mr->mr_pdhdl; 1040 1041 /* Fill in the "local" attributes */ 1042 attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey; 1043 attr->mr_lbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr; 1044 attr->mr_lbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len; 1045 1046 /* 1047 * Fill in the "remote" attributes (if necessary). Note: the 1048 * remote attributes are only valid if the memory region has one 1049 * or more of the remote access flags set. 1050 */ 1051 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 1052 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 1053 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 1054 attr->mr_rkey = (ibt_rkey_t)mr->mr_rkey; 1055 attr->mr_rbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr; 1056 attr->mr_rbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len; 1057 } 1058 1059 /* 1060 * If region is mapped for streaming (i.e. noncoherent), then set sync 1061 * is required 1062 */ 1063 attr->mr_sync_required = (mr->mr_bindinfo.bi_flags & 1064 IBT_MR_NONCOHERENT) ? B_TRUE : B_FALSE; 1065 1066 mutex_exit(&mr->mr_lock); 1067 return (DDI_SUCCESS); 1068 } 1069 1070 1071 /* 1072 * hermon_mr_reregister() 1073 * Context: Can be called from interrupt or base context. 1074 */ 1075 int 1076 hermon_mr_reregister(hermon_state_t *state, hermon_mrhdl_t mr, 1077 hermon_pdhdl_t pd, ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new, 1078 hermon_mr_options_t *op) 1079 { 1080 hermon_bind_info_t bind; 1081 int status; 1082 1083 /* 1084 * Fill in the "bind" struct. This struct provides the majority 1085 * of the information that will be used to distinguish between an 1086 * "addr" binding (as is the case here) and a "buf" binding (see 1087 * below). The "bind" struct is later passed to hermon_mr_mem_bind() 1088 * which does most of the "heavy lifting" for the Hermon memory 1089 * registration (and reregistration) routines. 1090 */ 1091 bind.bi_type = HERMON_BINDHDL_VADDR; 1092 bind.bi_addr = mr_attr->mr_vaddr; 1093 bind.bi_len = mr_attr->mr_len; 1094 bind.bi_as = mr_attr->mr_as; 1095 bind.bi_flags = mr_attr->mr_flags; 1096 status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op); 1097 return (status); 1098 } 1099 1100 1101 /* 1102 * hermon_mr_reregister_buf() 1103 * Context: Can be called from interrupt or base context. 1104 */ 1105 int 1106 hermon_mr_reregister_buf(hermon_state_t *state, hermon_mrhdl_t mr, 1107 hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf, 1108 hermon_mrhdl_t *mrhdl_new, hermon_mr_options_t *op) 1109 { 1110 hermon_bind_info_t bind; 1111 int status; 1112 1113 /* 1114 * Fill in the "bind" struct. This struct provides the majority 1115 * of the information that will be used to distinguish between an 1116 * "addr" binding (see above) and a "buf" binding (as is the case 1117 * here). The "bind" struct is later passed to hermon_mr_mem_bind() 1118 * which does most of the "heavy lifting" for the Hermon memory 1119 * registration routines. Note: We have chosen to provide 1120 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is 1121 * not set). It is not critical what value we choose here as it need 1122 * only be unique for the given RKey (which will happen by default), 1123 * so the choice here is somewhat arbitrary. 1124 */ 1125 bind.bi_type = HERMON_BINDHDL_BUF; 1126 bind.bi_buf = buf; 1127 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) { 1128 bind.bi_addr = mr_attr->mr_vaddr; 1129 } else { 1130 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr; 1131 } 1132 bind.bi_len = (uint64_t)buf->b_bcount; 1133 bind.bi_flags = mr_attr->mr_flags; 1134 bind.bi_as = NULL; 1135 status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op); 1136 return (status); 1137 } 1138 1139 1140 /* 1141 * hermon_mr_sync() 1142 * Context: Can be called from interrupt or base context. 1143 */ 1144 /* ARGSUSED */ 1145 int 1146 hermon_mr_sync(hermon_state_t *state, ibt_mr_sync_t *mr_segs, size_t num_segs) 1147 { 1148 hermon_mrhdl_t mrhdl; 1149 uint64_t seg_vaddr, seg_len, seg_end; 1150 uint64_t mr_start, mr_end; 1151 uint_t type; 1152 int status, i; 1153 1154 /* Process each of the ibt_mr_sync_t's */ 1155 for (i = 0; i < num_segs; i++) { 1156 mrhdl = (hermon_mrhdl_t)mr_segs[i].ms_handle; 1157 1158 /* Check for valid memory region handle */ 1159 if (mrhdl == NULL) { 1160 status = IBT_MR_HDL_INVALID; 1161 goto mrsync_fail; 1162 } 1163 1164 mutex_enter(&mrhdl->mr_lock); 1165 1166 /* 1167 * Check here to see if the memory region has already been 1168 * partially deregistered as a result of a 1169 * hermon_umap_umemlock_cb() callback. If so, this is an 1170 * error, return failure. 1171 */ 1172 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) { 1173 mutex_exit(&mrhdl->mr_lock); 1174 status = IBT_MR_HDL_INVALID; 1175 goto mrsync_fail; 1176 } 1177 1178 /* Check for valid bounds on sync request */ 1179 seg_vaddr = mr_segs[i].ms_vaddr; 1180 seg_len = mr_segs[i].ms_len; 1181 seg_end = seg_vaddr + seg_len - 1; 1182 mr_start = mrhdl->mr_bindinfo.bi_addr; 1183 mr_end = mr_start + mrhdl->mr_bindinfo.bi_len - 1; 1184 if ((seg_vaddr < mr_start) || (seg_vaddr > mr_end)) { 1185 mutex_exit(&mrhdl->mr_lock); 1186 status = IBT_MR_VA_INVALID; 1187 goto mrsync_fail; 1188 } 1189 if ((seg_end < mr_start) || (seg_end > mr_end)) { 1190 mutex_exit(&mrhdl->mr_lock); 1191 status = IBT_MR_LEN_INVALID; 1192 goto mrsync_fail; 1193 } 1194 1195 /* Determine what type (i.e. direction) for sync */ 1196 if (mr_segs[i].ms_flags & IBT_SYNC_READ) { 1197 type = DDI_DMA_SYNC_FORDEV; 1198 } else if (mr_segs[i].ms_flags & IBT_SYNC_WRITE) { 1199 type = DDI_DMA_SYNC_FORCPU; 1200 } else { 1201 mutex_exit(&mrhdl->mr_lock); 1202 status = IBT_INVALID_PARAM; 1203 goto mrsync_fail; 1204 } 1205 1206 (void) ddi_dma_sync(mrhdl->mr_bindinfo.bi_dmahdl, 1207 (off_t)(seg_vaddr - mr_start), (size_t)seg_len, type); 1208 1209 mutex_exit(&mrhdl->mr_lock); 1210 } 1211 1212 return (DDI_SUCCESS); 1213 1214 mrsync_fail: 1215 return (status); 1216 } 1217 1218 1219 /* 1220 * hermon_mw_alloc() 1221 * Context: Can be called from interrupt or base context. 1222 */ 1223 int 1224 hermon_mw_alloc(hermon_state_t *state, hermon_pdhdl_t pd, ibt_mw_flags_t flags, 1225 hermon_mwhdl_t *mwhdl) 1226 { 1227 hermon_rsrc_t *mpt, *rsrc; 1228 hermon_hw_dmpt_t mpt_entry; 1229 hermon_mwhdl_t mw; 1230 uint_t sleep; 1231 int status; 1232 1233 if (state != NULL) /* XXX - bogus test that is always TRUE */ 1234 return (IBT_INSUFF_RESOURCE); 1235 1236 /* 1237 * Check the sleep flag. Ensure that it is consistent with the 1238 * current thread context (i.e. if we are currently in the interrupt 1239 * context, then we shouldn't be attempting to sleep). 1240 */ 1241 sleep = (flags & IBT_MW_NOSLEEP) ? HERMON_NOSLEEP : HERMON_SLEEP; 1242 if ((sleep == HERMON_SLEEP) && 1243 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 1244 status = IBT_INVALID_PARAM; 1245 goto mwalloc_fail; 1246 } 1247 1248 /* Increment the reference count on the protection domain (PD) */ 1249 hermon_pd_refcnt_inc(pd); 1250 1251 /* 1252 * Allocate an MPT entry (for use as a memory window). Since the 1253 * Hermon hardware uses the MPT entry for memory regions and for 1254 * memory windows, we will fill in this MPT with all the necessary 1255 * parameters for the memory window. And then (just as we do for 1256 * memory regions) ownership will be passed to the hardware in the 1257 * final step below. If we fail here, we must undo the protection 1258 * domain reference count. 1259 */ 1260 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); 1261 if (status != DDI_SUCCESS) { 1262 status = IBT_INSUFF_RESOURCE; 1263 goto mwalloc_fail1; 1264 } 1265 1266 /* 1267 * Allocate the software structure for tracking the memory window (i.e. 1268 * the Hermon Memory Window handle). Note: This is actually the same 1269 * software structure used for tracking memory regions, but since many 1270 * of the same properties are needed, only a single structure is 1271 * necessary. If we fail here, we must undo the protection domain 1272 * reference count and the previous resource allocation. 1273 */ 1274 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); 1275 if (status != DDI_SUCCESS) { 1276 status = IBT_INSUFF_RESOURCE; 1277 goto mwalloc_fail2; 1278 } 1279 mw = (hermon_mwhdl_t)rsrc->hr_addr; 1280 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw)) 1281 1282 /* 1283 * Calculate an "unbound" RKey from MPT index. In much the same way 1284 * as we do for memory regions (above), this key is constructed from 1285 * a "constrained" (which depends on the MPT index) and an 1286 * "unconstrained" portion (which may be arbitrarily chosen). 1287 */ 1288 mw->mr_rkey = hermon_mr_keycalc(mpt->hr_indx); 1289 1290 /* 1291 * Fill in the MPT entry. This is the final step before passing 1292 * ownership of the MPT entry to the Hermon hardware. We use all of 1293 * the information collected/calculated above to fill in the 1294 * requisite portions of the MPT. Note: fewer entries in the MPT 1295 * entry are necessary to allocate a memory window. 1296 */ 1297 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); 1298 mpt_entry.reg_win = HERMON_MPT_IS_WINDOW; 1299 mpt_entry.mem_key = mw->mr_rkey; 1300 mpt_entry.pd = pd->pd_pdnum; 1301 mpt_entry.lr = 1; 1302 1303 /* 1304 * Write the MPT entry to hardware. Lastly, we pass ownership of 1305 * the entry to the hardware. Note: in general, this operation 1306 * shouldn't fail. But if it does, we have to undo everything we've 1307 * done above before returning error. 1308 */ 1309 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 1310 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); 1311 if (status != HERMON_CMD_SUCCESS) { 1312 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", 1313 status); 1314 if (status == HERMON_CMD_INVALID_STATUS) { 1315 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1316 } 1317 status = ibc_get_ci_failure(0); 1318 goto mwalloc_fail3; 1319 } 1320 1321 /* 1322 * Fill in the rest of the Hermon Memory Window handle. Having 1323 * successfully transferred ownership of the MPT, we can update the 1324 * following fields for use in further operations on the MW. 1325 */ 1326 mw->mr_mptrsrcp = mpt; 1327 mw->mr_pdhdl = pd; 1328 mw->mr_rsrcp = rsrc; 1329 mw->mr_rkey = hermon_mr_key_swap(mw->mr_rkey); 1330 *mwhdl = mw; 1331 1332 return (DDI_SUCCESS); 1333 1334 mwalloc_fail3: 1335 hermon_rsrc_free(state, &rsrc); 1336 mwalloc_fail2: 1337 hermon_rsrc_free(state, &mpt); 1338 mwalloc_fail1: 1339 hermon_pd_refcnt_dec(pd); 1340 mwalloc_fail: 1341 return (status); 1342 } 1343 1344 1345 /* 1346 * hermon_mw_free() 1347 * Context: Can be called from interrupt or base context. 1348 */ 1349 int 1350 hermon_mw_free(hermon_state_t *state, hermon_mwhdl_t *mwhdl, uint_t sleep) 1351 { 1352 hermon_rsrc_t *mpt, *rsrc; 1353 hermon_mwhdl_t mw; 1354 int status; 1355 hermon_pdhdl_t pd; 1356 1357 /* 1358 * Check the sleep flag. Ensure that it is consistent with the 1359 * current thread context (i.e. if we are currently in the interrupt 1360 * context, then we shouldn't be attempting to sleep). 1361 */ 1362 if ((sleep == HERMON_SLEEP) && 1363 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 1364 status = IBT_INVALID_PARAM; 1365 return (status); 1366 } 1367 1368 /* 1369 * Pull all the necessary information from the Hermon Memory Window 1370 * handle. This is necessary here because the resource for the 1371 * MW handle is going to be freed up as part of the this operation. 1372 */ 1373 mw = *mwhdl; 1374 mutex_enter(&mw->mr_lock); 1375 mpt = mw->mr_mptrsrcp; 1376 rsrc = mw->mr_rsrcp; 1377 pd = mw->mr_pdhdl; 1378 mutex_exit(&mw->mr_lock); 1379 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw)) 1380 1381 /* 1382 * Reclaim the MPT entry from hardware. Note: in general, it is 1383 * unexpected for this operation to return an error. 1384 */ 1385 status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL, 1386 0, mpt->hr_indx, sleep); 1387 if (status != HERMON_CMD_SUCCESS) { 1388 cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: %08x\n", 1389 status); 1390 if (status == HERMON_CMD_INVALID_STATUS) { 1391 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1392 } 1393 return (ibc_get_ci_failure(0)); 1394 } 1395 1396 /* Free the Hermon Memory Window handle */ 1397 hermon_rsrc_free(state, &rsrc); 1398 1399 /* Free up the MPT entry resource */ 1400 hermon_rsrc_free(state, &mpt); 1401 1402 /* Decrement the reference count on the protection domain (PD) */ 1403 hermon_pd_refcnt_dec(pd); 1404 1405 /* Set the mwhdl pointer to NULL and return success */ 1406 *mwhdl = NULL; 1407 1408 return (DDI_SUCCESS); 1409 } 1410 1411 1412 /* 1413 * hermon_mr_keycalc() 1414 * Context: Can be called from interrupt or base context. 1415 * NOTE: Produces a key in the form of 1416 * KKKKKKKK IIIIIIII IIIIIIII IIIIIIIII 1417 * where K == the arbitrary bits and I == the index 1418 */ 1419 uint32_t 1420 hermon_mr_keycalc(uint32_t indx) 1421 { 1422 uint32_t tmp_key, tmp_indx; 1423 1424 /* 1425 * Generate a simple key from counter. Note: We increment this 1426 * static variable _intentionally_ without any kind of mutex around 1427 * it. First, single-threading all operations through a single lock 1428 * would be a bad idea (from a performance point-of-view). Second, 1429 * the upper "unconstrained" bits don't really have to be unique 1430 * because the lower bits are guaranteed to be (although we do make a 1431 * best effort to ensure that they are). Third, the window for the 1432 * race (where both threads read and update the counter at the same 1433 * time) is incredibly small. 1434 * And, lastly, we'd like to make this into a "random" key 1435 */ 1436 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(hermon_memkey_cnt)) 1437 tmp_key = (hermon_memkey_cnt++) << HERMON_MEMKEY_SHIFT; 1438 tmp_indx = indx & 0xffffff; 1439 return (tmp_key | tmp_indx); 1440 } 1441 1442 1443 /* 1444 * hermon_mr_key_swap() 1445 * Context: Can be called from interrupt or base context. 1446 * NOTE: Produces a key in the form of 1447 * IIIIIIII IIIIIIII IIIIIIIII KKKKKKKK 1448 * where K == the arbitrary bits and I == the index 1449 */ 1450 uint32_t 1451 hermon_mr_key_swap(uint32_t indx) 1452 { 1453 /* 1454 * The memory key format to pass down to the hardware is 1455 * (key[7:0],index[23:0]), which defines the index to the 1456 * hardware resource. When the driver passes this as a memory 1457 * key, (i.e. to retrieve a resource) the format is 1458 * (index[23:0],key[7:0]). 1459 */ 1460 return (((indx >> 24) & 0x000000ff) | ((indx << 8) & 0xffffff00)); 1461 } 1462 1463 /* 1464 * hermon_mr_common_reg() 1465 * Context: Can be called from interrupt or base context. 1466 */ 1467 static int 1468 hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd, 1469 hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op, 1470 hermon_mpt_rsrc_type_t mpt_type) 1471 { 1472 hermon_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; 1473 hermon_umap_db_entry_t *umapdb; 1474 hermon_sw_refcnt_t *swrc_tmp; 1475 hermon_hw_dmpt_t mpt_entry; 1476 hermon_mrhdl_t mr; 1477 ibt_mr_flags_t flags; 1478 hermon_bind_info_t *bh; 1479 ddi_dma_handle_t bind_dmahdl; 1480 ddi_umem_cookie_t umem_cookie; 1481 size_t umem_len; 1482 caddr_t umem_addr; 1483 uint64_t mtt_addr, max_sz; 1484 uint_t sleep, mtt_pgsize_bits, bind_type, mr_is_umem; 1485 int status, umem_flags, bind_override_addr; 1486 1487 /* 1488 * Check the "options" flag. Currently this flag tells the driver 1489 * whether or not the region should be bound normally (i.e. with 1490 * entries written into the PCI IOMMU), whether it should be 1491 * registered to bypass the IOMMU, and whether or not the resulting 1492 * address should be "zero-based" (to aid the alignment restrictions 1493 * for QPs). 1494 */ 1495 if (op == NULL) { 1496 bind_type = HERMON_BINDMEM_NORMAL; 1497 bind_dmahdl = NULL; 1498 bind_override_addr = 0; 1499 } else { 1500 bind_type = op->mro_bind_type; 1501 bind_dmahdl = op->mro_bind_dmahdl; 1502 bind_override_addr = op->mro_bind_override_addr; 1503 } 1504 1505 /* check what kind of mpt to use */ 1506 1507 /* Extract the flags field from the hermon_bind_info_t */ 1508 flags = bind->bi_flags; 1509 1510 /* 1511 * Check for invalid length. Check is the length is zero or if the 1512 * length is larger than the maximum configured value. Return error 1513 * if it is. 1514 */ 1515 max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz); 1516 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) { 1517 status = IBT_MR_LEN_INVALID; 1518 goto mrcommon_fail; 1519 } 1520 1521 /* 1522 * Check the sleep flag. Ensure that it is consistent with the 1523 * current thread context (i.e. if we are currently in the interrupt 1524 * context, then we shouldn't be attempting to sleep). 1525 */ 1526 sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP; 1527 if ((sleep == HERMON_SLEEP) && 1528 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 1529 status = IBT_INVALID_PARAM; 1530 goto mrcommon_fail; 1531 } 1532 1533 /* Increment the reference count on the protection domain (PD) */ 1534 hermon_pd_refcnt_inc(pd); 1535 1536 /* 1537 * Allocate an MPT entry. This will be filled in with all the 1538 * necessary parameters to define the memory region. And then 1539 * ownership will be passed to the hardware in the final step 1540 * below. If we fail here, we must undo the protection domain 1541 * reference count. 1542 */ 1543 if (mpt_type == HERMON_MPT_DMPT) { 1544 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); 1545 if (status != DDI_SUCCESS) { 1546 status = IBT_INSUFF_RESOURCE; 1547 goto mrcommon_fail1; 1548 } 1549 } else { 1550 mpt = NULL; 1551 } 1552 1553 /* 1554 * Allocate the software structure for tracking the memory region (i.e. 1555 * the Hermon Memory Region handle). If we fail here, we must undo 1556 * the protection domain reference count and the previous resource 1557 * allocation. 1558 */ 1559 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); 1560 if (status != DDI_SUCCESS) { 1561 status = IBT_INSUFF_RESOURCE; 1562 goto mrcommon_fail2; 1563 } 1564 mr = (hermon_mrhdl_t)rsrc->hr_addr; 1565 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 1566 1567 /* 1568 * Setup and validate the memory region access flags. This means 1569 * translating the IBTF's enable flags into the access flags that 1570 * will be used in later operations. 1571 */ 1572 mr->mr_accflag = 0; 1573 if (flags & IBT_MR_ENABLE_WINDOW_BIND) 1574 mr->mr_accflag |= IBT_MR_WINDOW_BIND; 1575 if (flags & IBT_MR_ENABLE_LOCAL_WRITE) 1576 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 1577 if (flags & IBT_MR_ENABLE_REMOTE_READ) 1578 mr->mr_accflag |= IBT_MR_REMOTE_READ; 1579 if (flags & IBT_MR_ENABLE_REMOTE_WRITE) 1580 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 1581 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 1582 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 1583 1584 /* 1585 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 1586 * from a certain number of "constrained" bits (the least significant 1587 * bits) and some number of "unconstrained" bits. The constrained 1588 * bits must be set to the index of the entry in the MPT table, but 1589 * the unconstrained bits can be set to any value we wish. Note: 1590 * if no remote access is required, then the RKey value is not filled 1591 * in. Otherwise both Rkey and LKey are given the same value. 1592 */ 1593 if (mpt) 1594 mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); 1595 1596 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 1597 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 1598 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 1599 mr->mr_rkey = mr->mr_lkey; 1600 } 1601 1602 /* 1603 * Determine if the memory is from userland and pin the pages 1604 * with umem_lockmemory() if necessary. 1605 * Then, if this is userland memory, allocate an entry in the 1606 * "userland resources database". This will later be added to 1607 * the database (after all further memory registration operations are 1608 * successful). If we fail here, we must undo the reference counts 1609 * and the previous resource allocations. 1610 */ 1611 mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0); 1612 if (mr_is_umem) { 1613 umem_len = ptob(btopr(bind->bi_len + 1614 ((uintptr_t)bind->bi_addr & PAGEOFFSET))); 1615 umem_addr = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET); 1616 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ | 1617 DDI_UMEMLOCK_LONGTERM); 1618 status = umem_lockmemory(umem_addr, umem_len, umem_flags, 1619 &umem_cookie, &hermon_umem_cbops, NULL); 1620 if (status != 0) { 1621 status = IBT_INSUFF_RESOURCE; 1622 goto mrcommon_fail3; 1623 } 1624 1625 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 1626 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind->bi_buf)) 1627 1628 bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len, 1629 B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP); 1630 if (bind->bi_buf == NULL) { 1631 status = IBT_INSUFF_RESOURCE; 1632 goto mrcommon_fail3; 1633 } 1634 bind->bi_type = HERMON_BINDHDL_UBUF; 1635 bind->bi_buf->b_flags |= B_READ; 1636 1637 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind->bi_buf)) 1638 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 1639 1640 umapdb = hermon_umap_db_alloc(state->hs_instance, 1641 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC, 1642 (uint64_t)(uintptr_t)rsrc); 1643 if (umapdb == NULL) { 1644 status = IBT_INSUFF_RESOURCE; 1645 goto mrcommon_fail4; 1646 } 1647 } 1648 1649 /* 1650 * Setup the bindinfo for the mtt bind call 1651 */ 1652 bh = &mr->mr_bindinfo; 1653 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bh)) 1654 bcopy(bind, bh, sizeof (hermon_bind_info_t)); 1655 bh->bi_bypass = bind_type; 1656 status = hermon_mr_mtt_bind(state, bh, bind_dmahdl, &mtt, 1657 &mtt_pgsize_bits, mpt != NULL); 1658 if (status != DDI_SUCCESS) { 1659 goto mrcommon_fail5; 1660 } 1661 mr->mr_logmttpgsz = mtt_pgsize_bits; 1662 1663 /* 1664 * Allocate MTT reference count (to track shared memory regions). 1665 * This reference count resource may never be used on the given 1666 * memory region, but if it is ever later registered as "shared" 1667 * memory region then this resource will be necessary. If we fail 1668 * here, we do pretty much the same as above to clean up. 1669 */ 1670 status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep, 1671 &mtt_refcnt); 1672 if (status != DDI_SUCCESS) { 1673 status = IBT_INSUFF_RESOURCE; 1674 goto mrcommon_fail6; 1675 } 1676 mr->mr_mttrefcntp = mtt_refcnt; 1677 swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr; 1678 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp)) 1679 HERMON_MTT_REFCNT_INIT(swrc_tmp); 1680 1681 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT); 1682 1683 /* 1684 * Fill in the MPT entry. This is the final step before passing 1685 * ownership of the MPT entry to the Hermon hardware. We use all of 1686 * the information collected/calculated above to fill in the 1687 * requisite portions of the MPT. Do this ONLY for DMPTs. 1688 */ 1689 if (mpt == NULL) 1690 goto no_passown; 1691 1692 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); 1693 1694 mpt_entry.status = HERMON_MPT_SW_OWNERSHIP; 1695 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; 1696 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 1697 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 1698 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 1699 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 1700 mpt_entry.lr = 1; 1701 mpt_entry.phys_addr = 0; 1702 mpt_entry.reg_win = HERMON_MPT_IS_REGION; 1703 1704 mpt_entry.entity_sz = mr->mr_logmttpgsz; 1705 mpt_entry.mem_key = mr->mr_lkey; 1706 mpt_entry.pd = pd->pd_pdnum; 1707 mpt_entry.rem_acc_en = 0; 1708 mpt_entry.fast_reg_en = 0; 1709 mpt_entry.en_inval = 0; 1710 mpt_entry.lkey = 0; 1711 mpt_entry.win_cnt = 0; 1712 1713 if (bind_override_addr == 0) { 1714 mpt_entry.start_addr = bh->bi_addr; 1715 } else { 1716 bh->bi_addr = bh->bi_addr & ((1 << mr->mr_logmttpgsz) - 1); 1717 mpt_entry.start_addr = bh->bi_addr; 1718 } 1719 mpt_entry.reg_win_len = bh->bi_len; 1720 1721 mpt_entry.mtt_addr_h = mtt_addr >> 32; /* only 8 more bits */ 1722 mpt_entry.mtt_addr_l = mtt_addr >> 3; /* only 29 bits */ 1723 1724 /* 1725 * Write the MPT entry to hardware. Lastly, we pass ownership of 1726 * the entry to the hardware if needed. Note: in general, this 1727 * operation shouldn't fail. But if it does, we have to undo 1728 * everything we've done above before returning error. 1729 * 1730 * For Hermon, this routine (which is common to the contexts) will only 1731 * set the ownership if needed - the process of passing the context 1732 * itself to HW will take care of setting up the MPT (based on type 1733 * and index). 1734 */ 1735 1736 mpt_entry.bnd_qp = 0; /* dMPT for a qp, check for window */ 1737 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 1738 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); 1739 if (status != HERMON_CMD_SUCCESS) { 1740 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", 1741 status); 1742 if (status == HERMON_CMD_INVALID_STATUS) { 1743 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1744 } 1745 status = ibc_get_ci_failure(0); 1746 goto mrcommon_fail7; 1747 } 1748 no_passown: 1749 1750 /* 1751 * Fill in the rest of the Hermon Memory Region handle. Having 1752 * successfully transferred ownership of the MPT, we can update the 1753 * following fields for use in further operations on the MR. 1754 */ 1755 mr->mr_mttaddr = mtt_addr; 1756 1757 mr->mr_log2_pgsz = (mr->mr_logmttpgsz - HERMON_PAGESHIFT); 1758 mr->mr_mptrsrcp = mpt; 1759 mr->mr_mttrsrcp = mtt; 1760 mr->mr_pdhdl = pd; 1761 mr->mr_rsrcp = rsrc; 1762 mr->mr_is_umem = mr_is_umem; 1763 mr->mr_is_fmr = 0; 1764 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL; 1765 mr->mr_umem_cbfunc = NULL; 1766 mr->mr_umem_cbarg1 = NULL; 1767 mr->mr_umem_cbarg2 = NULL; 1768 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); 1769 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey); 1770 mr->mr_mpt_type = mpt_type; 1771 1772 /* 1773 * If this is userland memory, then we need to insert the previously 1774 * allocated entry into the "userland resources database". This will 1775 * allow for later coordination between the hermon_umap_umemlock_cb() 1776 * callback and hermon_mr_deregister(). 1777 */ 1778 if (mr_is_umem) { 1779 hermon_umap_db_add(umapdb); 1780 } 1781 1782 *mrhdl = mr; 1783 1784 return (DDI_SUCCESS); 1785 1786 /* 1787 * The following is cleanup for all possible failure cases in this routine 1788 */ 1789 mrcommon_fail7: 1790 hermon_rsrc_free(state, &mtt_refcnt); 1791 mrcommon_fail6: 1792 hermon_mr_mem_unbind(state, bh); 1793 mrcommon_fail5: 1794 if (mr_is_umem) { 1795 hermon_umap_db_free(umapdb); 1796 } 1797 mrcommon_fail4: 1798 if (mr_is_umem) { 1799 /* 1800 * Free up the memory ddi_umem_iosetup() allocates 1801 * internally. 1802 */ 1803 if (bind->bi_type == HERMON_BINDHDL_UBUF) { 1804 freerbuf(bind->bi_buf); 1805 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 1806 bind->bi_type = HERMON_BINDHDL_NONE; 1807 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 1808 } 1809 ddi_umem_unlock(umem_cookie); 1810 } 1811 mrcommon_fail3: 1812 hermon_rsrc_free(state, &rsrc); 1813 mrcommon_fail2: 1814 if (mpt != NULL) 1815 hermon_rsrc_free(state, &mpt); 1816 mrcommon_fail1: 1817 hermon_pd_refcnt_dec(pd); 1818 mrcommon_fail: 1819 return (status); 1820 } 1821 1822 /* 1823 * hermon_mr_mtt_bind() 1824 * Context: Can be called from interrupt or base context. 1825 */ 1826 int 1827 hermon_mr_mtt_bind(hermon_state_t *state, hermon_bind_info_t *bind, 1828 ddi_dma_handle_t bind_dmahdl, hermon_rsrc_t **mtt, uint_t *mtt_pgsize_bits, 1829 uint_t is_buffer) 1830 { 1831 uint64_t nummtt; 1832 uint_t sleep; 1833 int status; 1834 1835 /* 1836 * Check the sleep flag. Ensure that it is consistent with the 1837 * current thread context (i.e. if we are currently in the interrupt 1838 * context, then we shouldn't be attempting to sleep). 1839 */ 1840 sleep = (bind->bi_flags & IBT_MR_NOSLEEP) ? 1841 HERMON_NOSLEEP : HERMON_SLEEP; 1842 if ((sleep == HERMON_SLEEP) && 1843 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 1844 status = IBT_INVALID_PARAM; 1845 goto mrmttbind_fail; 1846 } 1847 1848 /* 1849 * Bind the memory and determine the mapped addresses. This is 1850 * the first of two routines that do all the "heavy lifting" for 1851 * the Hermon memory registration routines. The hermon_mr_mem_bind() 1852 * routine takes the "bind" struct with all its fields filled 1853 * in and returns a list of DMA cookies (for the PCI mapped addresses 1854 * corresponding to the specified address region) which are used by 1855 * the hermon_mr_fast_mtt_write() routine below. If we fail here, we 1856 * must undo all the previous resource allocation (and PD reference 1857 * count). 1858 */ 1859 status = hermon_mr_mem_bind(state, bind, bind_dmahdl, sleep, is_buffer); 1860 if (status != DDI_SUCCESS) { 1861 status = IBT_INSUFF_RESOURCE; 1862 goto mrmttbind_fail; 1863 } 1864 1865 /* 1866 * Determine number of pages spanned. This routine uses the 1867 * information in the "bind" struct to determine the required 1868 * number of MTT entries needed (and returns the suggested page size - 1869 * as a "power-of-2" - for each MTT entry). 1870 */ 1871 nummtt = hermon_mr_nummtt_needed(state, bind, mtt_pgsize_bits); 1872 1873 /* 1874 * Allocate the MTT entries. Use the calculations performed above to 1875 * allocate the required number of MTT entries. If we fail here, we 1876 * must not only undo all the previous resource allocation (and PD 1877 * reference count), but we must also unbind the memory. 1878 */ 1879 status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, mtt); 1880 if (status != DDI_SUCCESS) { 1881 status = IBT_INSUFF_RESOURCE; 1882 goto mrmttbind_fail2; 1883 } 1884 1885 /* 1886 * Write the mapped addresses into the MTT entries. This is part two 1887 * of the "heavy lifting" routines that we talked about above. Note: 1888 * we pass the suggested page size from the earlier operation here. 1889 * And if we fail here, we again do pretty much the same huge clean up. 1890 */ 1891 status = hermon_mr_fast_mtt_write(state, *mtt, bind, *mtt_pgsize_bits); 1892 if (status != DDI_SUCCESS) { 1893 /* 1894 * hermon_mr_fast_mtt_write() returns DDI_FAILURE 1895 * only if it detects a HW error during DMA. 1896 */ 1897 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1898 status = ibc_get_ci_failure(0); 1899 goto mrmttbind_fail3; 1900 } 1901 return (DDI_SUCCESS); 1902 1903 /* 1904 * The following is cleanup for all possible failure cases in this routine 1905 */ 1906 mrmttbind_fail3: 1907 hermon_rsrc_free(state, mtt); 1908 mrmttbind_fail2: 1909 hermon_mr_mem_unbind(state, bind); 1910 mrmttbind_fail: 1911 return (status); 1912 } 1913 1914 1915 /* 1916 * hermon_mr_mtt_unbind() 1917 * Context: Can be called from interrupt or base context. 1918 */ 1919 int 1920 hermon_mr_mtt_unbind(hermon_state_t *state, hermon_bind_info_t *bind, 1921 hermon_rsrc_t *mtt) 1922 { 1923 /* 1924 * Free up the MTT entries and unbind the memory. Here, as above, we 1925 * attempt to free these resources only if it is appropriate to do so. 1926 */ 1927 hermon_mr_mem_unbind(state, bind); 1928 hermon_rsrc_free(state, &mtt); 1929 1930 return (DDI_SUCCESS); 1931 } 1932 1933 1934 /* 1935 * hermon_mr_common_rereg() 1936 * Context: Can be called from interrupt or base context. 1937 */ 1938 static int 1939 hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr, 1940 hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new, 1941 hermon_mr_options_t *op) 1942 { 1943 hermon_rsrc_t *mpt; 1944 ibt_mr_attr_flags_t acc_flags_to_use; 1945 ibt_mr_flags_t flags; 1946 hermon_pdhdl_t pd_to_use; 1947 hermon_hw_dmpt_t mpt_entry; 1948 uint64_t mtt_addr_to_use, vaddr_to_use, len_to_use; 1949 uint_t sleep, dereg_level; 1950 int status; 1951 1952 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 1953 1954 /* 1955 * Check here to see if the memory region corresponds to a userland 1956 * mapping. Reregistration of userland memory regions is not 1957 * currently supported. Return failure. 1958 */ 1959 if (mr->mr_is_umem) { 1960 status = IBT_MR_HDL_INVALID; 1961 goto mrrereg_fail; 1962 } 1963 1964 mutex_enter(&mr->mr_lock); 1965 1966 /* Pull MPT resource pointer from the Hermon Memory Region handle */ 1967 mpt = mr->mr_mptrsrcp; 1968 1969 /* Extract the flags field from the hermon_bind_info_t */ 1970 flags = bind->bi_flags; 1971 1972 /* 1973 * Check the sleep flag. Ensure that it is consistent with the 1974 * current thread context (i.e. if we are currently in the interrupt 1975 * context, then we shouldn't be attempting to sleep). 1976 */ 1977 sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP; 1978 if ((sleep == HERMON_SLEEP) && 1979 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 1980 mutex_exit(&mr->mr_lock); 1981 status = IBT_INVALID_PARAM; 1982 goto mrrereg_fail; 1983 } 1984 1985 /* 1986 * First step is to temporarily invalidate the MPT entry. This 1987 * regains ownership from the hardware, and gives us the opportunity 1988 * to modify the entry. Note: The HW2SW_MPT command returns the 1989 * current MPT entry contents. These are saved away here because 1990 * they will be reused in a later step below. If the region has 1991 * bound memory windows that we fail returning an "in use" error code. 1992 * Otherwise, this is an unexpected error and we deregister the 1993 * memory region and return error. 1994 * 1995 * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect 1996 * against holding the lock around this rereg call in all contexts. 1997 */ 1998 status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, &mpt_entry, 1999 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN); 2000 if (status != HERMON_CMD_SUCCESS) { 2001 mutex_exit(&mr->mr_lock); 2002 if (status == HERMON_CMD_REG_BOUND) { 2003 return (IBT_MR_IN_USE); 2004 } else { 2005 cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: " 2006 "%08x\n", status); 2007 if (status == HERMON_CMD_INVALID_STATUS) { 2008 hermon_fm_ereport(state, HCA_SYS_ERR, 2009 HCA_ERR_SRV_LOST); 2010 } 2011 /* 2012 * Call deregister and ensure that all current 2013 * resources get freed up 2014 */ 2015 if (hermon_mr_deregister(state, &mr, 2016 HERMON_MR_DEREG_ALL, sleep) != DDI_SUCCESS) { 2017 HERMON_WARNING(state, "failed to deregister " 2018 "memory region"); 2019 } 2020 return (ibc_get_ci_failure(0)); 2021 } 2022 } 2023 2024 /* 2025 * If we're changing the protection domain, then validate the new one 2026 */ 2027 if (flags & IBT_MR_CHANGE_PD) { 2028 2029 /* Check for valid PD handle pointer */ 2030 if (pd == NULL) { 2031 mutex_exit(&mr->mr_lock); 2032 /* 2033 * Call deregister and ensure that all current 2034 * resources get properly freed up. Unnecessary 2035 * here to attempt to regain software ownership 2036 * of the MPT entry as that has already been 2037 * done above. 2038 */ 2039 if (hermon_mr_deregister(state, &mr, 2040 HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != 2041 DDI_SUCCESS) { 2042 HERMON_WARNING(state, "failed to deregister " 2043 "memory region"); 2044 } 2045 status = IBT_PD_HDL_INVALID; 2046 goto mrrereg_fail; 2047 } 2048 2049 /* Use the new PD handle in all operations below */ 2050 pd_to_use = pd; 2051 2052 } else { 2053 /* Use the current PD handle in all operations below */ 2054 pd_to_use = mr->mr_pdhdl; 2055 } 2056 2057 /* 2058 * If we're changing access permissions, then validate the new ones 2059 */ 2060 if (flags & IBT_MR_CHANGE_ACCESS) { 2061 /* 2062 * Validate the access flags. Both remote write and remote 2063 * atomic require the local write flag to be set 2064 */ 2065 if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) || 2066 (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) && 2067 !(flags & IBT_MR_ENABLE_LOCAL_WRITE)) { 2068 mutex_exit(&mr->mr_lock); 2069 /* 2070 * Call deregister and ensure that all current 2071 * resources get properly freed up. Unnecessary 2072 * here to attempt to regain software ownership 2073 * of the MPT entry as that has already been 2074 * done above. 2075 */ 2076 if (hermon_mr_deregister(state, &mr, 2077 HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != 2078 DDI_SUCCESS) { 2079 HERMON_WARNING(state, "failed to deregister " 2080 "memory region"); 2081 } 2082 status = IBT_MR_ACCESS_REQ_INVALID; 2083 goto mrrereg_fail; 2084 } 2085 2086 /* 2087 * Setup and validate the memory region access flags. This 2088 * means translating the IBTF's enable flags into the access 2089 * flags that will be used in later operations. 2090 */ 2091 acc_flags_to_use = 0; 2092 if (flags & IBT_MR_ENABLE_WINDOW_BIND) 2093 acc_flags_to_use |= IBT_MR_WINDOW_BIND; 2094 if (flags & IBT_MR_ENABLE_LOCAL_WRITE) 2095 acc_flags_to_use |= IBT_MR_LOCAL_WRITE; 2096 if (flags & IBT_MR_ENABLE_REMOTE_READ) 2097 acc_flags_to_use |= IBT_MR_REMOTE_READ; 2098 if (flags & IBT_MR_ENABLE_REMOTE_WRITE) 2099 acc_flags_to_use |= IBT_MR_REMOTE_WRITE; 2100 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 2101 acc_flags_to_use |= IBT_MR_REMOTE_ATOMIC; 2102 2103 } else { 2104 acc_flags_to_use = mr->mr_accflag; 2105 } 2106 2107 /* 2108 * If we're modifying the translation, then figure out whether 2109 * we can reuse the current MTT resources. This means calling 2110 * hermon_mr_rereg_xlat_helper() which does most of the heavy lifting 2111 * for the reregistration. If the current memory region contains 2112 * sufficient MTT entries for the new regions, then it will be 2113 * reused and filled in. Otherwise, new entries will be allocated, 2114 * the old ones will be freed, and the new entries will be filled 2115 * in. Note: If we're not modifying the translation, then we 2116 * should already have all the information we need to update the MPT. 2117 * Also note: If hermon_mr_rereg_xlat_helper() fails, it will return 2118 * a "dereg_level" which is the level of cleanup that needs to be 2119 * passed to hermon_mr_deregister() to finish the cleanup. 2120 */ 2121 if (flags & IBT_MR_CHANGE_TRANSLATION) { 2122 status = hermon_mr_rereg_xlat_helper(state, mr, bind, op, 2123 &mtt_addr_to_use, sleep, &dereg_level); 2124 if (status != DDI_SUCCESS) { 2125 mutex_exit(&mr->mr_lock); 2126 /* 2127 * Call deregister and ensure that all resources get 2128 * properly freed up. 2129 */ 2130 if (hermon_mr_deregister(state, &mr, dereg_level, 2131 sleep) != DDI_SUCCESS) { 2132 HERMON_WARNING(state, "failed to deregister " 2133 "memory region"); 2134 } 2135 goto mrrereg_fail; 2136 } 2137 vaddr_to_use = mr->mr_bindinfo.bi_addr; 2138 len_to_use = mr->mr_bindinfo.bi_len; 2139 } else { 2140 mtt_addr_to_use = mr->mr_mttaddr; 2141 vaddr_to_use = mr->mr_bindinfo.bi_addr; 2142 len_to_use = mr->mr_bindinfo.bi_len; 2143 } 2144 2145 /* 2146 * Calculate new keys (Lkey, Rkey) from MPT index. Just like they were 2147 * when the region was first registered, each key is formed from 2148 * "constrained" bits and "unconstrained" bits. Note: If no remote 2149 * access is required, then the RKey value is not filled in. Otherwise 2150 * both Rkey and LKey are given the same value. 2151 */ 2152 mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); 2153 if ((acc_flags_to_use & IBT_MR_REMOTE_READ) || 2154 (acc_flags_to_use & IBT_MR_REMOTE_WRITE) || 2155 (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC)) { 2156 mr->mr_rkey = mr->mr_lkey; 2157 } else 2158 mr->mr_rkey = 0; 2159 2160 /* 2161 * Fill in the MPT entry. This is the final step before passing 2162 * ownership of the MPT entry to the Hermon hardware. We use all of 2163 * the information collected/calculated above to fill in the 2164 * requisite portions of the MPT. 2165 */ 2166 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); 2167 2168 mpt_entry.status = HERMON_MPT_SW_OWNERSHIP; 2169 mpt_entry.en_bind = (acc_flags_to_use & IBT_MR_WINDOW_BIND) ? 1 : 0; 2170 mpt_entry.atomic = (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 2171 mpt_entry.rw = (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ? 1 : 0; 2172 mpt_entry.rr = (acc_flags_to_use & IBT_MR_REMOTE_READ) ? 1 : 0; 2173 mpt_entry.lw = (acc_flags_to_use & IBT_MR_LOCAL_WRITE) ? 1 : 0; 2174 mpt_entry.lr = 1; 2175 mpt_entry.phys_addr = 0; 2176 mpt_entry.reg_win = HERMON_MPT_IS_REGION; 2177 2178 mpt_entry.entity_sz = mr->mr_logmttpgsz; 2179 mpt_entry.mem_key = mr->mr_lkey; 2180 mpt_entry.pd = pd_to_use->pd_pdnum; 2181 2182 mpt_entry.start_addr = vaddr_to_use; 2183 mpt_entry.reg_win_len = len_to_use; 2184 mpt_entry.mtt_addr_h = mtt_addr_to_use >> 32; 2185 mpt_entry.mtt_addr_l = mtt_addr_to_use >> 3; 2186 2187 /* 2188 * Write the updated MPT entry to hardware 2189 * 2190 * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect 2191 * against holding the lock around this rereg call in all contexts. 2192 */ 2193 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 2194 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN); 2195 if (status != HERMON_CMD_SUCCESS) { 2196 mutex_exit(&mr->mr_lock); 2197 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", 2198 status); 2199 if (status == HERMON_CMD_INVALID_STATUS) { 2200 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 2201 } 2202 /* 2203 * Call deregister and ensure that all current resources get 2204 * properly freed up. Unnecessary here to attempt to regain 2205 * software ownership of the MPT entry as that has already 2206 * been done above. 2207 */ 2208 if (hermon_mr_deregister(state, &mr, 2209 HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) { 2210 HERMON_WARNING(state, "failed to deregister memory " 2211 "region"); 2212 } 2213 return (ibc_get_ci_failure(0)); 2214 } 2215 2216 /* 2217 * If we're changing PD, then update their reference counts now. 2218 * This means decrementing the reference count on the old PD and 2219 * incrementing the reference count on the new PD. 2220 */ 2221 if (flags & IBT_MR_CHANGE_PD) { 2222 hermon_pd_refcnt_dec(mr->mr_pdhdl); 2223 hermon_pd_refcnt_inc(pd); 2224 } 2225 2226 /* 2227 * Update the contents of the Hermon Memory Region handle to reflect 2228 * what has been changed. 2229 */ 2230 mr->mr_pdhdl = pd_to_use; 2231 mr->mr_accflag = acc_flags_to_use; 2232 mr->mr_is_umem = 0; 2233 mr->mr_is_fmr = 0; 2234 mr->mr_umemcookie = NULL; 2235 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); 2236 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey); 2237 2238 /* New MR handle is same as the old */ 2239 *mrhdl_new = mr; 2240 mutex_exit(&mr->mr_lock); 2241 2242 return (DDI_SUCCESS); 2243 2244 mrrereg_fail: 2245 return (status); 2246 } 2247 2248 2249 /* 2250 * hermon_mr_rereg_xlat_helper 2251 * Context: Can be called from interrupt or base context. 2252 * Note: This routine expects the "mr_lock" to be held when it 2253 * is called. Upon returning failure, this routine passes information 2254 * about what "dereg_level" should be passed to hermon_mr_deregister(). 2255 */ 2256 static int 2257 hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr, 2258 hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr, 2259 uint_t sleep, uint_t *dereg_level) 2260 { 2261 hermon_rsrc_t *mtt, *mtt_refcnt; 2262 hermon_sw_refcnt_t *swrc_old, *swrc_new; 2263 ddi_dma_handle_t dmahdl; 2264 uint64_t nummtt_needed, nummtt_in_currrsrc, max_sz; 2265 uint_t mtt_pgsize_bits, bind_type, reuse_dmahdl; 2266 int status; 2267 2268 ASSERT(MUTEX_HELD(&mr->mr_lock)); 2269 2270 /* 2271 * Check the "options" flag. Currently this flag tells the driver 2272 * whether or not the region should be bound normally (i.e. with 2273 * entries written into the PCI IOMMU) or whether it should be 2274 * registered to bypass the IOMMU. 2275 */ 2276 if (op == NULL) { 2277 bind_type = HERMON_BINDMEM_NORMAL; 2278 } else { 2279 bind_type = op->mro_bind_type; 2280 } 2281 2282 /* 2283 * Check for invalid length. Check is the length is zero or if the 2284 * length is larger than the maximum configured value. Return error 2285 * if it is. 2286 */ 2287 max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz); 2288 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) { 2289 /* 2290 * Deregister will be called upon returning failure from this 2291 * routine. This will ensure that all current resources get 2292 * properly freed up. Unnecessary to attempt to regain 2293 * software ownership of the MPT entry as that has already 2294 * been done above (in hermon_mr_reregister()) 2295 */ 2296 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT; 2297 2298 status = IBT_MR_LEN_INVALID; 2299 goto mrrereghelp_fail; 2300 } 2301 2302 /* 2303 * Determine the number of pages necessary for new region and the 2304 * number of pages supported by the current MTT resources 2305 */ 2306 nummtt_needed = hermon_mr_nummtt_needed(state, bind, &mtt_pgsize_bits); 2307 nummtt_in_currrsrc = mr->mr_mttrsrcp->hr_len >> HERMON_MTT_SIZE_SHIFT; 2308 2309 /* 2310 * Depending on whether we have enough pages or not, the next step is 2311 * to fill in a set of MTT entries that reflect the new mapping. In 2312 * the first case below, we already have enough entries. This means 2313 * we need to unbind the memory from the previous mapping, bind the 2314 * memory for the new mapping, write the new MTT entries, and update 2315 * the mr to reflect the changes. 2316 * In the second case below, we do not have enough entries in the 2317 * current mapping. So, in this case, we need not only to unbind the 2318 * current mapping, but we need to free up the MTT resources associated 2319 * with that mapping. After we've successfully done that, we continue 2320 * by binding the new memory, allocating new MTT entries, writing the 2321 * new MTT entries, and updating the mr to reflect the changes. 2322 */ 2323 2324 /* 2325 * If this region is being shared (i.e. MTT refcount != 1), then we 2326 * can't reuse the current MTT resources regardless of their size. 2327 * Instead we'll need to alloc new ones (below) just as if there 2328 * hadn't been enough room in the current entries. 2329 */ 2330 swrc_old = (hermon_sw_refcnt_t *)mr->mr_mttrefcntp->hr_addr; 2331 if (HERMON_MTT_IS_NOT_SHARED(swrc_old) && 2332 (nummtt_needed <= nummtt_in_currrsrc)) { 2333 2334 /* 2335 * Unbind the old mapping for this memory region, but retain 2336 * the ddi_dma_handle_t (if possible) for reuse in the bind 2337 * operation below. Note: If original memory region was 2338 * bound for IOMMU bypass and the new region can not use 2339 * bypass, then a new DMA handle will be necessary. 2340 */ 2341 if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) { 2342 mr->mr_bindinfo.bi_free_dmahdl = 0; 2343 hermon_mr_mem_unbind(state, &mr->mr_bindinfo); 2344 dmahdl = mr->mr_bindinfo.bi_dmahdl; 2345 reuse_dmahdl = 1; 2346 } else { 2347 hermon_mr_mem_unbind(state, &mr->mr_bindinfo); 2348 dmahdl = NULL; 2349 reuse_dmahdl = 0; 2350 } 2351 2352 /* 2353 * Bind the new memory and determine the mapped addresses. 2354 * As described, this routine and hermon_mr_fast_mtt_write() 2355 * do the majority of the work for the memory registration 2356 * operations. Note: When we successfully finish the binding, 2357 * we will set the "bi_free_dmahdl" flag to indicate that 2358 * even though we may have reused the ddi_dma_handle_t we do 2359 * wish it to be freed up at some later time. Note also that 2360 * if we fail, we may need to cleanup the ddi_dma_handle_t. 2361 */ 2362 bind->bi_bypass = bind_type; 2363 status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1); 2364 if (status != DDI_SUCCESS) { 2365 if (reuse_dmahdl) { 2366 ddi_dma_free_handle(&dmahdl); 2367 } 2368 2369 /* 2370 * Deregister will be called upon returning failure 2371 * from this routine. This will ensure that all 2372 * current resources get properly freed up. 2373 * Unnecessary to attempt to regain software ownership 2374 * of the MPT entry as that has already been done 2375 * above (in hermon_mr_reregister()). Also unnecessary 2376 * to attempt to unbind the memory. 2377 */ 2378 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2379 2380 status = IBT_INSUFF_RESOURCE; 2381 goto mrrereghelp_fail; 2382 } 2383 if (reuse_dmahdl) { 2384 bind->bi_free_dmahdl = 1; 2385 } 2386 2387 /* 2388 * Using the new mapping, but reusing the current MTT 2389 * resources, write the updated entries to MTT 2390 */ 2391 mtt = mr->mr_mttrsrcp; 2392 status = hermon_mr_fast_mtt_write(state, mtt, bind, 2393 mtt_pgsize_bits); 2394 if (status != DDI_SUCCESS) { 2395 /* 2396 * Deregister will be called upon returning failure 2397 * from this routine. This will ensure that all 2398 * current resources get properly freed up. 2399 * Unnecessary to attempt to regain software ownership 2400 * of the MPT entry as that has already been done 2401 * above (in hermon_mr_reregister()). Also unnecessary 2402 * to attempt to unbind the memory. 2403 * 2404 * But we do need to unbind the newly bound memory 2405 * before returning. 2406 */ 2407 hermon_mr_mem_unbind(state, bind); 2408 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2409 2410 /* 2411 * hermon_mr_fast_mtt_write() returns DDI_FAILURE 2412 * only if it detects a HW error during DMA. 2413 */ 2414 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 2415 status = ibc_get_ci_failure(0); 2416 goto mrrereghelp_fail; 2417 } 2418 2419 /* Put the updated information into the Mem Region handle */ 2420 mr->mr_bindinfo = *bind; 2421 mr->mr_logmttpgsz = mtt_pgsize_bits; 2422 2423 } else { 2424 /* 2425 * Check if the memory region MTT is shared by any other MRs. 2426 * Since the resource may be shared between multiple memory 2427 * regions (as a result of a "RegisterSharedMR()" verb) it is 2428 * important that we not unbind any resources prematurely. 2429 */ 2430 if (!HERMON_MTT_IS_SHARED(swrc_old)) { 2431 /* 2432 * Unbind the old mapping for this memory region, but 2433 * retain the ddi_dma_handle_t for reuse in the bind 2434 * operation below. Note: This can only be done here 2435 * because the region being reregistered is not 2436 * currently shared. Also if original memory region 2437 * was bound for IOMMU bypass and the new region can 2438 * not use bypass, then a new DMA handle will be 2439 * necessary. 2440 */ 2441 if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) { 2442 mr->mr_bindinfo.bi_free_dmahdl = 0; 2443 hermon_mr_mem_unbind(state, &mr->mr_bindinfo); 2444 dmahdl = mr->mr_bindinfo.bi_dmahdl; 2445 reuse_dmahdl = 1; 2446 } else { 2447 hermon_mr_mem_unbind(state, &mr->mr_bindinfo); 2448 dmahdl = NULL; 2449 reuse_dmahdl = 0; 2450 } 2451 } else { 2452 dmahdl = NULL; 2453 reuse_dmahdl = 0; 2454 } 2455 2456 /* 2457 * Bind the new memory and determine the mapped addresses. 2458 * As described, this routine and hermon_mr_fast_mtt_write() 2459 * do the majority of the work for the memory registration 2460 * operations. Note: When we successfully finish the binding, 2461 * we will set the "bi_free_dmahdl" flag to indicate that 2462 * even though we may have reused the ddi_dma_handle_t we do 2463 * wish it to be freed up at some later time. Note also that 2464 * if we fail, we may need to cleanup the ddi_dma_handle_t. 2465 */ 2466 bind->bi_bypass = bind_type; 2467 status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1); 2468 if (status != DDI_SUCCESS) { 2469 if (reuse_dmahdl) { 2470 ddi_dma_free_handle(&dmahdl); 2471 } 2472 2473 /* 2474 * Deregister will be called upon returning failure 2475 * from this routine. This will ensure that all 2476 * current resources get properly freed up. 2477 * Unnecessary to attempt to regain software ownership 2478 * of the MPT entry as that has already been done 2479 * above (in hermon_mr_reregister()). Also unnecessary 2480 * to attempt to unbind the memory. 2481 */ 2482 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2483 2484 status = IBT_INSUFF_RESOURCE; 2485 goto mrrereghelp_fail; 2486 } 2487 if (reuse_dmahdl) { 2488 bind->bi_free_dmahdl = 1; 2489 } 2490 2491 /* 2492 * Allocate the new MTT entries resource 2493 */ 2494 status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt_needed, 2495 sleep, &mtt); 2496 if (status != DDI_SUCCESS) { 2497 /* 2498 * Deregister will be called upon returning failure 2499 * from this routine. This will ensure that all 2500 * current resources get properly freed up. 2501 * Unnecessary to attempt to regain software ownership 2502 * of the MPT entry as that has already been done 2503 * above (in hermon_mr_reregister()). Also unnecessary 2504 * to attempt to unbind the memory. 2505 * 2506 * But we do need to unbind the newly bound memory 2507 * before returning. 2508 */ 2509 hermon_mr_mem_unbind(state, bind); 2510 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2511 2512 status = IBT_INSUFF_RESOURCE; 2513 goto mrrereghelp_fail; 2514 } 2515 2516 /* 2517 * Allocate MTT reference count (to track shared memory 2518 * regions). As mentioned elsewhere above, this reference 2519 * count resource may never be used on the given memory region, 2520 * but if it is ever later registered as a "shared" memory 2521 * region then this resource will be necessary. Note: This 2522 * is only necessary here if the existing memory region is 2523 * already being shared (because otherwise we already have 2524 * a useable reference count resource). 2525 */ 2526 if (HERMON_MTT_IS_SHARED(swrc_old)) { 2527 status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, 2528 sleep, &mtt_refcnt); 2529 if (status != DDI_SUCCESS) { 2530 /* 2531 * Deregister will be called upon returning 2532 * failure from this routine. This will ensure 2533 * that all current resources get properly 2534 * freed up. Unnecessary to attempt to regain 2535 * software ownership of the MPT entry as that 2536 * has already been done above (in 2537 * hermon_mr_reregister()). Also unnecessary 2538 * to attempt to unbind the memory. 2539 * 2540 * But we need to unbind the newly bound 2541 * memory and free up the newly allocated MTT 2542 * entries before returning. 2543 */ 2544 hermon_mr_mem_unbind(state, bind); 2545 hermon_rsrc_free(state, &mtt); 2546 *dereg_level = 2547 HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2548 2549 status = IBT_INSUFF_RESOURCE; 2550 goto mrrereghelp_fail; 2551 } 2552 swrc_new = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr; 2553 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_new)) 2554 HERMON_MTT_REFCNT_INIT(swrc_new); 2555 } else { 2556 mtt_refcnt = mr->mr_mttrefcntp; 2557 } 2558 2559 /* 2560 * Using the new mapping and the new MTT resources, write the 2561 * updated entries to MTT 2562 */ 2563 status = hermon_mr_fast_mtt_write(state, mtt, bind, 2564 mtt_pgsize_bits); 2565 if (status != DDI_SUCCESS) { 2566 /* 2567 * Deregister will be called upon returning failure 2568 * from this routine. This will ensure that all 2569 * current resources get properly freed up. 2570 * Unnecessary to attempt to regain software ownership 2571 * of the MPT entry as that has already been done 2572 * above (in hermon_mr_reregister()). Also unnecessary 2573 * to attempt to unbind the memory. 2574 * 2575 * But we need to unbind the newly bound memory, 2576 * free up the newly allocated MTT entries, and 2577 * (possibly) free the new MTT reference count 2578 * resource before returning. 2579 */ 2580 if (HERMON_MTT_IS_SHARED(swrc_old)) { 2581 hermon_rsrc_free(state, &mtt_refcnt); 2582 } 2583 hermon_mr_mem_unbind(state, bind); 2584 hermon_rsrc_free(state, &mtt); 2585 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2586 2587 status = IBT_INSUFF_RESOURCE; 2588 goto mrrereghelp_fail; 2589 } 2590 2591 /* 2592 * Check if the memory region MTT is shared by any other MRs. 2593 * Since the resource may be shared between multiple memory 2594 * regions (as a result of a "RegisterSharedMR()" verb) it is 2595 * important that we not free up any resources prematurely. 2596 */ 2597 if (HERMON_MTT_IS_SHARED(swrc_old)) { 2598 /* Decrement MTT reference count for "old" region */ 2599 (void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp); 2600 } else { 2601 /* Free up the old MTT entries resource */ 2602 hermon_rsrc_free(state, &mr->mr_mttrsrcp); 2603 } 2604 2605 /* Put the updated information into the mrhdl */ 2606 mr->mr_bindinfo = *bind; 2607 mr->mr_logmttpgsz = mtt_pgsize_bits; 2608 mr->mr_mttrsrcp = mtt; 2609 mr->mr_mttrefcntp = mtt_refcnt; 2610 } 2611 2612 /* 2613 * Calculate and return the updated MTT address (in the DDR address 2614 * space). This will be used by the caller (hermon_mr_reregister) in 2615 * the updated MPT entry 2616 */ 2617 *mtt_addr = mtt->hr_indx << HERMON_MTT_SIZE_SHIFT; 2618 2619 return (DDI_SUCCESS); 2620 2621 mrrereghelp_fail: 2622 return (status); 2623 } 2624 2625 2626 /* 2627 * hermon_mr_nummtt_needed() 2628 * Context: Can be called from interrupt or base context. 2629 */ 2630 /* ARGSUSED */ 2631 static uint64_t 2632 hermon_mr_nummtt_needed(hermon_state_t *state, hermon_bind_info_t *bind, 2633 uint_t *mtt_pgsize_bits) 2634 { 2635 uint64_t pg_offset_mask; 2636 uint64_t pg_offset, tmp_length; 2637 2638 /* 2639 * For now we specify the page size as 8Kb (the default page size for 2640 * the sun4u architecture), or 4Kb for x86. Figure out optimal page 2641 * size by examining the dmacookies 2642 */ 2643 *mtt_pgsize_bits = PAGESHIFT; 2644 2645 pg_offset_mask = ((uint64_t)1 << *mtt_pgsize_bits) - 1; 2646 pg_offset = bind->bi_addr & pg_offset_mask; 2647 tmp_length = pg_offset + (bind->bi_len - 1); 2648 return ((tmp_length >> *mtt_pgsize_bits) + 1); 2649 } 2650 2651 2652 /* 2653 * hermon_mr_mem_bind() 2654 * Context: Can be called from interrupt or base context. 2655 */ 2656 static int 2657 hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind, 2658 ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer) 2659 { 2660 ddi_dma_attr_t dma_attr; 2661 int (*callback)(caddr_t); 2662 int status; 2663 2664 /* bi_type must be set to a meaningful value to get a bind handle */ 2665 ASSERT(bind->bi_type == HERMON_BINDHDL_VADDR || 2666 bind->bi_type == HERMON_BINDHDL_BUF || 2667 bind->bi_type == HERMON_BINDHDL_UBUF); 2668 2669 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 2670 2671 /* Set the callback flag appropriately */ 2672 callback = (sleep == HERMON_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT; 2673 2674 /* 2675 * Initialize many of the default DMA attributes. Then, if we're 2676 * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag. 2677 */ 2678 if (dmahdl == NULL) { 2679 hermon_dma_attr_init(state, &dma_attr); 2680 #ifdef __sparc 2681 if (bind->bi_bypass == HERMON_BINDMEM_BYPASS) { 2682 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 2683 } 2684 #endif 2685 2686 /* set RO if needed - tunable set and 'is_buffer' is non-0 */ 2687 if (is_buffer) { 2688 if (! (bind->bi_flags & IBT_MR_DISABLE_RO)) { 2689 if ((bind->bi_type != HERMON_BINDHDL_UBUF) && 2690 (hermon_kernel_data_ro == 2691 HERMON_RO_ENABLED)) { 2692 dma_attr.dma_attr_flags |= 2693 DDI_DMA_RELAXED_ORDERING; 2694 } 2695 if (((bind->bi_type == HERMON_BINDHDL_UBUF) && 2696 (hermon_user_data_ro == 2697 HERMON_RO_ENABLED))) { 2698 dma_attr.dma_attr_flags |= 2699 DDI_DMA_RELAXED_ORDERING; 2700 } 2701 } 2702 } 2703 2704 /* Allocate a DMA handle for the binding */ 2705 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 2706 callback, NULL, &bind->bi_dmahdl); 2707 if (status != DDI_SUCCESS) { 2708 return (status); 2709 } 2710 bind->bi_free_dmahdl = 1; 2711 2712 } else { 2713 bind->bi_dmahdl = dmahdl; 2714 bind->bi_free_dmahdl = 0; 2715 } 2716 2717 2718 /* 2719 * Bind the memory to get the PCI mapped addresses. The decision 2720 * to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle() 2721 * is determined by the "bi_type" flag. Note: if the bind operation 2722 * fails then we have to free up the DMA handle and return error. 2723 */ 2724 if (bind->bi_type == HERMON_BINDHDL_VADDR) { 2725 status = ddi_dma_addr_bind_handle(bind->bi_dmahdl, NULL, 2726 (caddr_t)(uintptr_t)bind->bi_addr, bind->bi_len, 2727 (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback, NULL, 2728 &bind->bi_dmacookie, &bind->bi_cookiecnt); 2729 2730 } else { /* HERMON_BINDHDL_BUF or HERMON_BINDHDL_UBUF */ 2731 2732 status = ddi_dma_buf_bind_handle(bind->bi_dmahdl, 2733 bind->bi_buf, (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback, 2734 NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt); 2735 } 2736 if (status != DDI_DMA_MAPPED) { 2737 if (bind->bi_free_dmahdl != 0) { 2738 ddi_dma_free_handle(&bind->bi_dmahdl); 2739 } 2740 return (status); 2741 } 2742 2743 return (DDI_SUCCESS); 2744 } 2745 2746 2747 /* 2748 * hermon_mr_mem_unbind() 2749 * Context: Can be called from interrupt or base context. 2750 */ 2751 static void 2752 hermon_mr_mem_unbind(hermon_state_t *state, hermon_bind_info_t *bind) 2753 { 2754 int status; 2755 2756 /* 2757 * In case of HERMON_BINDHDL_UBUF, the memory bi_buf points to 2758 * is actually allocated by ddi_umem_iosetup() internally, then 2759 * it's required to free it here. Reset bi_type to HERMON_BINDHDL_NONE 2760 * not to free it again later. 2761 */ 2762 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 2763 if (bind->bi_type == HERMON_BINDHDL_UBUF) { 2764 freerbuf(bind->bi_buf); 2765 bind->bi_type = HERMON_BINDHDL_NONE; 2766 } 2767 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 2768 2769 /* 2770 * Unbind the DMA memory for the region 2771 * 2772 * Note: The only way ddi_dma_unbind_handle() currently 2773 * can return an error is if the handle passed in is invalid. 2774 * Since this should never happen, we choose to return void 2775 * from this function! If this does return an error, however, 2776 * then we print a warning message to the console. 2777 */ 2778 status = ddi_dma_unbind_handle(bind->bi_dmahdl); 2779 if (status != DDI_SUCCESS) { 2780 HERMON_WARNING(state, "failed to unbind DMA mapping"); 2781 return; 2782 } 2783 2784 /* Free up the DMA handle */ 2785 if (bind->bi_free_dmahdl != 0) { 2786 ddi_dma_free_handle(&bind->bi_dmahdl); 2787 } 2788 } 2789 2790 2791 /* 2792 * hermon_mr_fast_mtt_write() 2793 * Context: Can be called from interrupt or base context. 2794 */ 2795 static int 2796 hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt, 2797 hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits) 2798 { 2799 hermon_icm_table_t *icm_table; 2800 hermon_dma_info_t *dma_info; 2801 uint32_t index1, index2, rindx; 2802 ddi_dma_cookie_t dmacookie; 2803 uint_t cookie_cnt; 2804 uint64_t *mtt_table; 2805 uint64_t mtt_entry; 2806 uint64_t addr, endaddr; 2807 uint64_t pagesize; 2808 offset_t i, start; 2809 uint_t per_span; 2810 int sync_needed; 2811 2812 /* 2813 * XXX According to the PRM, we are to use the WRITE_MTT 2814 * command to write out MTTs. Tavor does not do this, 2815 * instead taking advantage of direct access to the MTTs, 2816 * and knowledge that Mellanox FMR relies on our ability 2817 * to write directly to the MTTs without any further 2818 * notification to the firmware. Likewise, we will choose 2819 * to not use the WRITE_MTT command, but to simply write 2820 * out the MTTs. 2821 */ 2822 2823 /* Calculate page size from the suggested value passed in */ 2824 pagesize = ((uint64_t)1 << mtt_pgsize_bits); 2825 2826 /* Walk the "cookie list" and fill in the MTT table entries */ 2827 dmacookie = bind->bi_dmacookie; 2828 cookie_cnt = bind->bi_cookiecnt; 2829 2830 icm_table = &state->hs_icm[HERMON_MTT]; 2831 rindx = mtt->hr_indx; 2832 hermon_index(index1, index2, rindx, icm_table, i); 2833 start = i; 2834 2835 per_span = icm_table->span; 2836 dma_info = icm_table->icm_dma[index1] + index2; 2837 mtt_table = (uint64_t *)(uintptr_t)dma_info->vaddr; 2838 2839 sync_needed = 0; 2840 while (cookie_cnt-- > 0) { 2841 addr = dmacookie.dmac_laddress; 2842 endaddr = addr + (dmacookie.dmac_size - 1); 2843 addr = addr & ~((uint64_t)pagesize - 1); 2844 2845 while (addr <= endaddr) { 2846 2847 /* 2848 * Fill in the mapped addresses (calculated above) and 2849 * set HERMON_MTT_ENTRY_PRESENT flag for each MTT entry. 2850 */ 2851 mtt_entry = addr | HERMON_MTT_ENTRY_PRESENT; 2852 mtt_table[i] = htonll(mtt_entry); 2853 i++; 2854 rindx++; 2855 2856 if (i == per_span) { 2857 2858 (void) ddi_dma_sync(dma_info->dma_hdl, 2859 start * sizeof (hermon_hw_mtt_t), 2860 (i - start) * sizeof (hermon_hw_mtt_t), 2861 DDI_DMA_SYNC_FORDEV); 2862 2863 if ((addr + pagesize > endaddr) && 2864 (cookie_cnt == 0)) 2865 return (DDI_SUCCESS); 2866 2867 hermon_index(index1, index2, rindx, icm_table, 2868 i); 2869 start = i * sizeof (hermon_hw_mtt_t); 2870 dma_info = icm_table->icm_dma[index1] + index2; 2871 mtt_table = 2872 (uint64_t *)(uintptr_t)dma_info->vaddr; 2873 2874 sync_needed = 0; 2875 } else { 2876 sync_needed = 1; 2877 } 2878 2879 addr += pagesize; 2880 if (addr == 0) { 2881 static int do_once = 1; 2882 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", 2883 do_once)) 2884 if (do_once) { 2885 do_once = 0; 2886 cmn_err(CE_NOTE, "probable error in " 2887 "dma_cookie address from caller\n"); 2888 } 2889 break; 2890 } 2891 } 2892 2893 /* 2894 * When we've reached the end of the current DMA cookie, 2895 * jump to the next cookie (if there are more) 2896 */ 2897 if (cookie_cnt != 0) { 2898 ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie); 2899 } 2900 } 2901 2902 /* done all the cookies, now sync the memory for the device */ 2903 if (sync_needed) 2904 (void) ddi_dma_sync(dma_info->dma_hdl, 2905 start * sizeof (hermon_hw_mtt_t), 2906 (i - start) * sizeof (hermon_hw_mtt_t), 2907 DDI_DMA_SYNC_FORDEV); 2908 2909 return (DDI_SUCCESS); 2910 } 2911 2912 /* 2913 * hermon_mr_fast_mtt_write_fmr() 2914 * Context: Can be called from interrupt or base context. 2915 */ 2916 static int 2917 hermon_mr_fast_mtt_write_fmr(hermon_rsrc_t *mtt, ibt_pmr_attr_t *mem_pattr, 2918 uint32_t mtt_pgsize_bits) 2919 { 2920 uint64_t *mtt_table; 2921 ibt_phys_addr_t *buf; 2922 uint64_t mtt_entry; 2923 uint64_t addr, first_addr, endaddr; 2924 uint64_t pagesize; 2925 int i; 2926 2927 /* Calculate page size from the suggested value passed in */ 2928 pagesize = ((uint64_t)1 << mtt_pgsize_bits); 2929 2930 /* 2931 * Walk the "addr list" and fill in the MTT table entries 2932 */ 2933 mtt_table = (uint64_t *)mtt->hr_addr; 2934 for (i = 0; i < mem_pattr->pmr_num_buf; i++) { 2935 buf = &mem_pattr->pmr_addr_list[i]; 2936 2937 /* 2938 * For first cookie, use the offset field to determine where 2939 * the buffer starts. The end addr is then calculated with the 2940 * offset in mind. 2941 */ 2942 if (i == 0) { 2943 first_addr = addr = buf->p_laddr + 2944 mem_pattr->pmr_offset; 2945 endaddr = addr + (mem_pattr->pmr_buf_sz - 1) - 2946 mem_pattr->pmr_offset; 2947 /* 2948 * For last cookie, determine end addr based on starting 2949 * address and size of the total buffer 2950 */ 2951 } else if (i == mem_pattr->pmr_num_buf - 1) { 2952 addr = buf->p_laddr; 2953 endaddr = addr + (first_addr + mem_pattr->pmr_len & 2954 (mem_pattr->pmr_buf_sz - 1)); 2955 /* 2956 * For the middle cookies case, start and end addr are 2957 * straightforward. Just use the laddr, and the size, as all 2958 * middle cookies are a set size. 2959 */ 2960 } else { 2961 addr = buf->p_laddr; 2962 endaddr = addr + (mem_pattr->pmr_buf_sz - 1); 2963 } 2964 2965 addr = addr & ~((uint64_t)pagesize - 1); 2966 while (addr <= endaddr) { 2967 /* 2968 * Fill in the mapped addresses (calculated above) and 2969 * set HERMON_MTT_ENTRY_PRESENT flag for each MTT entry. 2970 */ 2971 mtt_entry = addr | HERMON_MTT_ENTRY_PRESENT; 2972 mtt_table[i] = htonll(mtt_entry); 2973 addr += pagesize; 2974 } 2975 } 2976 2977 return (DDI_SUCCESS); 2978 } 2979 2980 2981 /* 2982 * hermon_mtt_refcnt_inc() 2983 * Context: Can be called from interrupt or base context. 2984 */ 2985 static uint_t 2986 hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc) 2987 { 2988 hermon_sw_refcnt_t *rc; 2989 2990 rc = (hermon_sw_refcnt_t *)rsrc->hr_addr; 2991 return (atomic_inc_uint_nv(&rc->swrc_refcnt)); 2992 } 2993 2994 2995 /* 2996 * hermon_mtt_refcnt_dec() 2997 * Context: Can be called from interrupt or base context. 2998 */ 2999 static uint_t 3000 hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc) 3001 { 3002 hermon_sw_refcnt_t *rc; 3003 3004 rc = (hermon_sw_refcnt_t *)rsrc->hr_addr; 3005 return (atomic_dec_uint_nv(&rc->swrc_refcnt)); 3006 } 3007