1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * hermon_mr.c 29 * Hermon Memory Region/Window Routines 30 * 31 * Implements all the routines necessary to provide the requisite memory 32 * registration verbs. These include operations like RegisterMemRegion(), 33 * DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion, 34 * etc., that affect Memory Regions. It also includes the verbs that 35 * affect Memory Windows, including AllocMemWindow(), FreeMemWindow(), 36 * and QueryMemWindow(). 37 */ 38 39 #include <sys/types.h> 40 #include <sys/conf.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/modctl.h> 44 #include <sys/esunddi.h> 45 46 #include <sys/ib/adapters/hermon/hermon.h> 47 48 extern uint32_t hermon_kernel_data_ro; 49 extern uint32_t hermon_user_data_ro; 50 extern int hermon_rdma_debug; 51 52 /* 53 * Used by hermon_mr_keycalc() below to fill in the "unconstrained" portion 54 * of Hermon memory keys (LKeys and RKeys) 55 */ 56 static uint_t hermon_memkey_cnt = 0x00; 57 #define HERMON_MEMKEY_SHIFT 24 58 59 /* initial state of an MPT */ 60 #define HERMON_MPT_SW_OWNERSHIP 0xF /* memory regions */ 61 #define HERMON_MPT_FREE 0x3 /* allocate lkey */ 62 63 static int hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd, 64 hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op, 65 hermon_mpt_rsrc_type_t mpt_type); 66 static int hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr, 67 hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new, 68 hermon_mr_options_t *op); 69 static int hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr, 70 hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr, 71 uint_t sleep, uint_t *dereg_level); 72 static uint64_t hermon_mr_nummtt_needed(hermon_state_t *state, 73 hermon_bind_info_t *bind, uint_t *mtt_pgsize); 74 static int hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind, 75 ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer); 76 static void hermon_mr_mem_unbind(hermon_state_t *state, 77 hermon_bind_info_t *bind); 78 static int hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt, 79 hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits); 80 static int hermon_mr_fast_mtt_write_fmr(hermon_state_t *state, 81 hermon_rsrc_t *mtt, ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits); 82 static uint_t hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc); 83 static uint_t hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc); 84 85 86 /* 87 * The Hermon umem_lockmemory() callback ops. When userland memory is 88 * registered, these callback ops are specified. The hermon_umap_umemlock_cb() 89 * callback will be called whenever the memory for the corresponding 90 * ddi_umem_cookie_t is being freed. 91 */ 92 static struct umem_callback_ops hermon_umem_cbops = { 93 UMEM_CALLBACK_VERSION, 94 hermon_umap_umemlock_cb, 95 }; 96 97 98 99 /* 100 * hermon_mr_register() 101 * Context: Can be called from interrupt or base context. 102 */ 103 int 104 hermon_mr_register(hermon_state_t *state, hermon_pdhdl_t pd, 105 ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op, 106 hermon_mpt_rsrc_type_t mpt_type) 107 { 108 hermon_bind_info_t bind; 109 int status; 110 111 /* 112 * Fill in the "bind" struct. This struct provides the majority 113 * of the information that will be used to distinguish between an 114 * "addr" binding (as is the case here) and a "buf" binding (see 115 * below). The "bind" struct is later passed to hermon_mr_mem_bind() 116 * which does most of the "heavy lifting" for the Hermon memory 117 * registration routines. 118 */ 119 bind.bi_type = HERMON_BINDHDL_VADDR; 120 bind.bi_addr = mr_attr->mr_vaddr; 121 bind.bi_len = mr_attr->mr_len; 122 bind.bi_as = mr_attr->mr_as; 123 bind.bi_flags = mr_attr->mr_flags; 124 status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op, 125 mpt_type); 126 return (status); 127 } 128 129 130 /* 131 * hermon_mr_register_buf() 132 * Context: Can be called from interrupt or base context. 133 */ 134 int 135 hermon_mr_register_buf(hermon_state_t *state, hermon_pdhdl_t pd, 136 ibt_smr_attr_t *mr_attr, struct buf *buf, hermon_mrhdl_t *mrhdl, 137 hermon_mr_options_t *op, hermon_mpt_rsrc_type_t mpt_type) 138 { 139 hermon_bind_info_t bind; 140 int status; 141 142 /* 143 * Fill in the "bind" struct. This struct provides the majority 144 * of the information that will be used to distinguish between an 145 * "addr" binding (see above) and a "buf" binding (as is the case 146 * here). The "bind" struct is later passed to hermon_mr_mem_bind() 147 * which does most of the "heavy lifting" for the Hermon memory 148 * registration routines. Note: We have chosen to provide 149 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is 150 * not set). It is not critical what value we choose here as it need 151 * only be unique for the given RKey (which will happen by default), 152 * so the choice here is somewhat arbitrary. 153 */ 154 bind.bi_type = HERMON_BINDHDL_BUF; 155 bind.bi_buf = buf; 156 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) { 157 bind.bi_addr = mr_attr->mr_vaddr; 158 } else { 159 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr; 160 } 161 bind.bi_as = NULL; 162 bind.bi_len = (uint64_t)buf->b_bcount; 163 bind.bi_flags = mr_attr->mr_flags; 164 status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op, mpt_type); 165 return (status); 166 } 167 168 169 /* 170 * hermon_mr_register_shared() 171 * Context: Can be called from interrupt or base context. 172 */ 173 int 174 hermon_mr_register_shared(hermon_state_t *state, hermon_mrhdl_t mrhdl, 175 hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new) 176 { 177 hermon_rsrc_t *mpt, *mtt, *rsrc; 178 hermon_umap_db_entry_t *umapdb; 179 hermon_hw_dmpt_t mpt_entry; 180 hermon_mrhdl_t mr; 181 hermon_bind_info_t *bind; 182 ddi_umem_cookie_t umem_cookie; 183 size_t umem_len; 184 caddr_t umem_addr; 185 uint64_t mtt_addr, pgsize_msk; 186 uint_t sleep, mr_is_umem; 187 int status, umem_flags; 188 189 /* 190 * Check the sleep flag. Ensure that it is consistent with the 191 * current thread context (i.e. if we are currently in the interrupt 192 * context, then we shouldn't be attempting to sleep). 193 */ 194 sleep = (mr_attr->mr_flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP : 195 HERMON_SLEEP; 196 if ((sleep == HERMON_SLEEP) && 197 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 198 status = IBT_INVALID_PARAM; 199 goto mrshared_fail; 200 } 201 202 /* Increment the reference count on the protection domain (PD) */ 203 hermon_pd_refcnt_inc(pd); 204 205 /* 206 * Allocate an MPT entry. This will be filled in with all the 207 * necessary parameters to define the shared memory region. 208 * Specifically, it will be made to reference the currently existing 209 * MTT entries and ownership of the MPT will be passed to the hardware 210 * in the last step below. If we fail here, we must undo the 211 * protection domain reference count. 212 */ 213 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); 214 if (status != DDI_SUCCESS) { 215 status = IBT_INSUFF_RESOURCE; 216 goto mrshared_fail1; 217 } 218 219 /* 220 * Allocate the software structure for tracking the shared memory 221 * region (i.e. the Hermon Memory Region handle). If we fail here, we 222 * must undo the protection domain reference count and the previous 223 * resource allocation. 224 */ 225 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); 226 if (status != DDI_SUCCESS) { 227 status = IBT_INSUFF_RESOURCE; 228 goto mrshared_fail2; 229 } 230 mr = (hermon_mrhdl_t)rsrc->hr_addr; 231 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 232 233 /* 234 * Setup and validate the memory region access flags. This means 235 * translating the IBTF's enable flags into the access flags that 236 * will be used in later operations. 237 */ 238 mr->mr_accflag = 0; 239 if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND) 240 mr->mr_accflag |= IBT_MR_WINDOW_BIND; 241 if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE) 242 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 243 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ) 244 mr->mr_accflag |= IBT_MR_REMOTE_READ; 245 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE) 246 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 247 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 248 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 249 250 /* 251 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 252 * from a certain number of "constrained" bits (the least significant 253 * bits) and some number of "unconstrained" bits. The constrained 254 * bits must be set to the index of the entry in the MPT table, but 255 * the unconstrained bits can be set to any value we wish. Note: 256 * if no remote access is required, then the RKey value is not filled 257 * in. Otherwise both Rkey and LKey are given the same value. 258 */ 259 mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); 260 261 /* Grab the MR lock for the current memory region */ 262 mutex_enter(&mrhdl->mr_lock); 263 264 /* 265 * Check here to see if the memory region has already been partially 266 * deregistered as a result of a hermon_umap_umemlock_cb() callback. 267 * If so, this is an error, return failure. 268 */ 269 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) { 270 mutex_exit(&mrhdl->mr_lock); 271 status = IBT_MR_HDL_INVALID; 272 goto mrshared_fail3; 273 } 274 275 /* 276 * Determine if the original memory was from userland and, if so, pin 277 * the pages (again) with umem_lockmemory(). This will guarantee a 278 * separate callback for each of this shared region's MR handles. 279 * If this is userland memory, then allocate an entry in the 280 * "userland resources database". This will later be added to 281 * the database (after all further memory registration operations are 282 * successful). If we fail here, we must undo all the above setup. 283 */ 284 mr_is_umem = mrhdl->mr_is_umem; 285 if (mr_is_umem) { 286 umem_len = ptob(btopr(mrhdl->mr_bindinfo.bi_len)); 287 umem_addr = (caddr_t)((uintptr_t)mrhdl->mr_bindinfo.bi_addr & 288 ~PAGEOFFSET); 289 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ | 290 DDI_UMEMLOCK_LONGTERM); 291 status = umem_lockmemory(umem_addr, umem_len, umem_flags, 292 &umem_cookie, &hermon_umem_cbops, NULL); 293 if (status != 0) { 294 mutex_exit(&mrhdl->mr_lock); 295 status = IBT_INSUFF_RESOURCE; 296 goto mrshared_fail3; 297 } 298 299 umapdb = hermon_umap_db_alloc(state->hs_instance, 300 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC, 301 (uint64_t)(uintptr_t)rsrc); 302 if (umapdb == NULL) { 303 mutex_exit(&mrhdl->mr_lock); 304 status = IBT_INSUFF_RESOURCE; 305 goto mrshared_fail4; 306 } 307 } 308 309 /* 310 * Copy the MTT resource pointer (and additional parameters) from 311 * the original Hermon Memory Region handle. Note: this is normally 312 * where the hermon_mr_mem_bind() routine would be called, but because 313 * we already have bound and filled-in MTT entries it is simply a 314 * matter here of managing the MTT reference count and grabbing the 315 * address of the MTT table entries (for filling in the shared region's 316 * MPT entry). 317 */ 318 mr->mr_mttrsrcp = mrhdl->mr_mttrsrcp; 319 mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz; 320 mr->mr_bindinfo = mrhdl->mr_bindinfo; 321 mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp; 322 mutex_exit(&mrhdl->mr_lock); 323 bind = &mr->mr_bindinfo; 324 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 325 mtt = mr->mr_mttrsrcp; 326 327 /* 328 * Increment the MTT reference count (to reflect the fact that 329 * the MTT is now shared) 330 */ 331 (void) hermon_mtt_refcnt_inc(mr->mr_mttrefcntp); 332 333 /* 334 * Update the new "bind" virtual address. Do some extra work here 335 * to ensure proper alignment. That is, make sure that the page 336 * offset for the beginning of the old range is the same as the 337 * offset for this new mapping 338 */ 339 pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1); 340 bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) | 341 (mr->mr_bindinfo.bi_addr & pgsize_msk)); 342 343 /* 344 * Fill in the MPT entry. This is the final step before passing 345 * ownership of the MPT entry to the Hermon hardware. We use all of 346 * the information collected/calculated above to fill in the 347 * requisite portions of the MPT. 348 */ 349 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); 350 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; 351 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 352 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 353 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 354 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 355 mpt_entry.lr = 1; 356 mpt_entry.reg_win = HERMON_MPT_IS_REGION; 357 mpt_entry.entity_sz = mr->mr_logmttpgsz; 358 mpt_entry.mem_key = mr->mr_lkey; 359 mpt_entry.pd = pd->pd_pdnum; 360 mpt_entry.start_addr = bind->bi_addr; 361 mpt_entry.reg_win_len = bind->bi_len; 362 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT); 363 mpt_entry.mtt_addr_h = mtt_addr >> 32; 364 mpt_entry.mtt_addr_l = mtt_addr >> 3; 365 366 /* 367 * Write the MPT entry to hardware. Lastly, we pass ownership of 368 * the entry to the hardware. Note: in general, this operation 369 * shouldn't fail. But if it does, we have to undo everything we've 370 * done above before returning error. 371 */ 372 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 373 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); 374 if (status != HERMON_CMD_SUCCESS) { 375 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", 376 status); 377 if (status == HERMON_CMD_INVALID_STATUS) { 378 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 379 } 380 status = ibc_get_ci_failure(0); 381 goto mrshared_fail5; 382 } 383 384 /* 385 * Fill in the rest of the Hermon Memory Region handle. Having 386 * successfully transferred ownership of the MPT, we can update the 387 * following fields for use in further operations on the MR. 388 */ 389 mr->mr_mptrsrcp = mpt; 390 mr->mr_mttrsrcp = mtt; 391 mr->mr_mpt_type = HERMON_MPT_DMPT; 392 mr->mr_pdhdl = pd; 393 mr->mr_rsrcp = rsrc; 394 mr->mr_is_umem = mr_is_umem; 395 mr->mr_is_fmr = 0; 396 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL; 397 mr->mr_umem_cbfunc = NULL; 398 mr->mr_umem_cbarg1 = NULL; 399 mr->mr_umem_cbarg2 = NULL; 400 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); 401 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey); 402 403 /* 404 * If this is userland memory, then we need to insert the previously 405 * allocated entry into the "userland resources database". This will 406 * allow for later coordination between the hermon_umap_umemlock_cb() 407 * callback and hermon_mr_deregister(). 408 */ 409 if (mr_is_umem) { 410 hermon_umap_db_add(umapdb); 411 } 412 413 *mrhdl_new = mr; 414 415 return (DDI_SUCCESS); 416 417 /* 418 * The following is cleanup for all possible failure cases in this routine 419 */ 420 mrshared_fail5: 421 (void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp); 422 if (mr_is_umem) { 423 hermon_umap_db_free(umapdb); 424 } 425 mrshared_fail4: 426 if (mr_is_umem) { 427 ddi_umem_unlock(umem_cookie); 428 } 429 mrshared_fail3: 430 hermon_rsrc_free(state, &rsrc); 431 mrshared_fail2: 432 hermon_rsrc_free(state, &mpt); 433 mrshared_fail1: 434 hermon_pd_refcnt_dec(pd); 435 mrshared_fail: 436 return (status); 437 } 438 439 /* 440 * hermon_mr_alloc_fmr() 441 * Context: Can be called from interrupt or base context. 442 */ 443 int 444 hermon_mr_alloc_fmr(hermon_state_t *state, hermon_pdhdl_t pd, 445 hermon_fmrhdl_t fmr_pool, hermon_mrhdl_t *mrhdl) 446 { 447 hermon_rsrc_t *mpt, *mtt, *rsrc; 448 hermon_hw_dmpt_t mpt_entry; 449 hermon_mrhdl_t mr; 450 hermon_bind_info_t bind; 451 uint64_t mtt_addr; 452 uint64_t nummtt; 453 uint_t sleep, mtt_pgsize_bits; 454 int status; 455 offset_t i; 456 hermon_icm_table_t *icm_table; 457 hermon_dma_info_t *dma_info; 458 uint32_t index1, index2, rindx; 459 460 /* 461 * Check the sleep flag. Ensure that it is consistent with the 462 * current thread context (i.e. if we are currently in the interrupt 463 * context, then we shouldn't be attempting to sleep). 464 */ 465 sleep = (fmr_pool->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP : 466 HERMON_NOSLEEP; 467 if ((sleep == HERMON_SLEEP) && 468 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 469 return (IBT_INVALID_PARAM); 470 } 471 472 /* Increment the reference count on the protection domain (PD) */ 473 hermon_pd_refcnt_inc(pd); 474 475 /* 476 * Allocate an MPT entry. This will be filled in with all the 477 * necessary parameters to define the FMR. Specifically, it will be 478 * made to reference the currently existing MTT entries and ownership 479 * of the MPT will be passed to the hardware in the last step below. 480 * If we fail here, we must undo the protection domain reference count. 481 */ 482 483 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); 484 if (status != DDI_SUCCESS) { 485 status = IBT_INSUFF_RESOURCE; 486 goto fmralloc_fail1; 487 } 488 489 /* 490 * Allocate the software structure for tracking the fmr memory 491 * region (i.e. the Hermon Memory Region handle). If we fail here, we 492 * must undo the protection domain reference count and the previous 493 * resource allocation. 494 */ 495 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); 496 if (status != DDI_SUCCESS) { 497 status = IBT_INSUFF_RESOURCE; 498 goto fmralloc_fail2; 499 } 500 mr = (hermon_mrhdl_t)rsrc->hr_addr; 501 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 502 503 /* 504 * Setup and validate the memory region access flags. This means 505 * translating the IBTF's enable flags into the access flags that 506 * will be used in later operations. 507 */ 508 mr->mr_accflag = 0; 509 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_LOCAL_WRITE) 510 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 511 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_READ) 512 mr->mr_accflag |= IBT_MR_REMOTE_READ; 513 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_WRITE) 514 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 515 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 516 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 517 518 /* 519 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 520 * from a certain number of "constrained" bits (the least significant 521 * bits) and some number of "unconstrained" bits. The constrained 522 * bits must be set to the index of the entry in the MPT table, but 523 * the unconstrained bits can be set to any value we wish. Note: 524 * if no remote access is required, then the RKey value is not filled 525 * in. Otherwise both Rkey and LKey are given the same value. 526 */ 527 mr->mr_fmr_key = 1; /* ready for the next reload */ 528 mr->mr_rkey = mr->mr_lkey = mpt->hr_indx; 529 530 /* 531 * Determine number of pages spanned. This routine uses the 532 * information in the "bind" struct to determine the required 533 * number of MTT entries needed (and returns the suggested page size - 534 * as a "power-of-2" - for each MTT entry). 535 */ 536 /* Assume address will be page aligned later */ 537 bind.bi_addr = 0; 538 /* Calculate size based on given max pages */ 539 bind.bi_len = fmr_pool->fmr_max_pages << PAGESHIFT; 540 nummtt = hermon_mr_nummtt_needed(state, &bind, &mtt_pgsize_bits); 541 542 /* 543 * Allocate the MTT entries. Use the calculations performed above to 544 * allocate the required number of MTT entries. If we fail here, we 545 * must not only undo all the previous resource allocation (and PD 546 * reference count), but we must also unbind the memory. 547 */ 548 status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt); 549 if (status != DDI_SUCCESS) { 550 status = IBT_INSUFF_RESOURCE; 551 goto fmralloc_fail3; 552 } 553 mr->mr_logmttpgsz = mtt_pgsize_bits; 554 555 /* 556 * Fill in the MPT entry. This is the final step before passing 557 * ownership of the MPT entry to the Hermon hardware. We use all of 558 * the information collected/calculated above to fill in the 559 * requisite portions of the MPT. 560 */ 561 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); 562 mpt_entry.en_bind = 0; 563 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 564 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 565 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 566 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 567 mpt_entry.lr = 1; 568 mpt_entry.reg_win = HERMON_MPT_IS_REGION; 569 mpt_entry.pd = pd->pd_pdnum; 570 571 mpt_entry.entity_sz = mr->mr_logmttpgsz; 572 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT); 573 mpt_entry.fast_reg_en = 1; 574 mpt_entry.mtt_size = (uint_t)nummtt; 575 mpt_entry.mtt_addr_h = mtt_addr >> 32; 576 mpt_entry.mtt_addr_l = mtt_addr >> 3; 577 mpt_entry.mem_key = mr->mr_lkey; 578 579 /* 580 * FMR sets these to 0 for now. Later during actual fmr registration 581 * these values are filled in. 582 */ 583 mpt_entry.start_addr = 0; 584 mpt_entry.reg_win_len = 0; 585 586 /* 587 * Write the MPT entry to hardware. Lastly, we pass ownership of 588 * the entry to the hardware. Note: in general, this operation 589 * shouldn't fail. But if it does, we have to undo everything we've 590 * done above before returning error. 591 */ 592 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 593 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); 594 if (status != HERMON_CMD_SUCCESS) { 595 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", 596 status); 597 if (status == HERMON_CMD_INVALID_STATUS) { 598 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 599 } 600 status = ibc_get_ci_failure(0); 601 goto fmralloc_fail4; 602 } 603 604 /* 605 * Fill in the rest of the Hermon Memory Region handle. Having 606 * successfully transferred ownership of the MPT, we can update the 607 * following fields for use in further operations on the MR. Also, set 608 * that this is an FMR region. 609 */ 610 mr->mr_mptrsrcp = mpt; 611 mr->mr_mttrsrcp = mtt; 612 613 mr->mr_mpt_type = HERMON_MPT_DMPT; 614 mr->mr_pdhdl = pd; 615 mr->mr_rsrcp = rsrc; 616 mr->mr_is_fmr = 1; 617 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); 618 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey); 619 mr->mr_mttaddr = mtt_addr; 620 (void) memcpy(&mr->mr_bindinfo, &bind, sizeof (hermon_bind_info_t)); 621 622 /* initialize hr_addr for use during register/deregister/invalidate */ 623 icm_table = &state->hs_icm[HERMON_DMPT]; 624 rindx = mpt->hr_indx; 625 hermon_index(index1, index2, rindx, icm_table, i); 626 dma_info = icm_table->icm_dma[index1] + index2; 627 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mpt)) 628 mpt->hr_addr = (void *)((uintptr_t)(dma_info->vaddr + i * mpt->hr_len)); 629 630 *mrhdl = mr; 631 632 return (DDI_SUCCESS); 633 634 /* 635 * The following is cleanup for all possible failure cases in this routine 636 */ 637 fmralloc_fail4: 638 kmem_free(mtt, sizeof (hermon_rsrc_t) * nummtt); 639 fmralloc_fail3: 640 hermon_rsrc_free(state, &rsrc); 641 fmralloc_fail2: 642 hermon_rsrc_free(state, &mpt); 643 fmralloc_fail1: 644 hermon_pd_refcnt_dec(pd); 645 fmralloc_fail: 646 return (status); 647 } 648 649 650 /* 651 * hermon_mr_register_physical_fmr() 652 * Context: Can be called from interrupt or base context. 653 */ 654 /*ARGSUSED*/ 655 int 656 hermon_mr_register_physical_fmr(hermon_state_t *state, 657 ibt_pmr_attr_t *mem_pattr_p, hermon_mrhdl_t mr, ibt_pmr_desc_t *mem_desc_p) 658 { 659 hermon_rsrc_t *mpt; 660 uint64_t *mpt_table; 661 int status; 662 uint32_t key; 663 664 mutex_enter(&mr->mr_lock); 665 mpt = mr->mr_mptrsrcp; 666 mpt_table = (uint64_t *)mpt->hr_addr; 667 668 /* Write MPT status to SW bit */ 669 *(uint8_t *)mpt_table = 0xF0; 670 671 membar_producer(); 672 673 /* 674 * Write the mapped addresses into the MTT entries. FMR needs to do 675 * this a little differently, so we call the fmr specific fast mtt 676 * write here. 677 */ 678 status = hermon_mr_fast_mtt_write_fmr(state, mr->mr_mttrsrcp, 679 mem_pattr_p, mr->mr_logmttpgsz); 680 if (status != DDI_SUCCESS) { 681 mutex_exit(&mr->mr_lock); 682 status = ibc_get_ci_failure(0); 683 goto fmr_reg_fail1; 684 } 685 686 /* 687 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 688 * from a certain number of "constrained" bits (the least significant 689 * bits) and some number of "unconstrained" bits. The constrained 690 * bits must be set to the index of the entry in the MPT table, but 691 * the unconstrained bits can be set to any value we wish. Note: 692 * if no remote access is required, then the RKey value is not filled 693 * in. Otherwise both Rkey and LKey are given the same value. 694 */ 695 key = mpt->hr_indx | (mr->mr_fmr_key++ << HERMON_MEMKEY_SHIFT); 696 mr->mr_lkey = mr->mr_rkey = hermon_mr_key_swap(key); 697 698 /* write mem key value */ 699 *(uint32_t *)&mpt_table[1] = htonl(key); 700 701 /* write length value */ 702 mpt_table[3] = htonll(mem_pattr_p->pmr_len); 703 704 /* write start addr value */ 705 mpt_table[2] = htonll(mem_pattr_p->pmr_iova); 706 707 /* write lkey value */ 708 *(uint32_t *)&mpt_table[4] = htonl(key); 709 710 membar_producer(); 711 712 /* Write MPT status to HW bit */ 713 *(uint8_t *)mpt_table = 0x00; 714 715 /* Fill in return parameters */ 716 mem_desc_p->pmd_lkey = mr->mr_lkey; 717 mem_desc_p->pmd_rkey = mr->mr_rkey; 718 mem_desc_p->pmd_iova = mem_pattr_p->pmr_iova; 719 mem_desc_p->pmd_phys_buf_list_sz = mem_pattr_p->pmr_len; 720 721 /* Fill in MR bindinfo struct for later sync or query operations */ 722 mr->mr_bindinfo.bi_addr = mem_pattr_p->pmr_iova; 723 mr->mr_bindinfo.bi_flags = mem_pattr_p->pmr_flags & IBT_MR_NONCOHERENT; 724 725 mutex_exit(&mr->mr_lock); 726 727 return (DDI_SUCCESS); 728 729 fmr_reg_fail1: 730 /* 731 * Note, we fail here, and purposely leave the memory ownership in 732 * software. The memory tables may be corrupt, so we leave the region 733 * unregistered. 734 */ 735 return (status); 736 } 737 738 739 /* 740 * hermon_mr_deregister() 741 * Context: Can be called from interrupt or base context. 742 */ 743 /* ARGSUSED */ 744 int 745 hermon_mr_deregister(hermon_state_t *state, hermon_mrhdl_t *mrhdl, uint_t level, 746 uint_t sleep) 747 { 748 hermon_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; 749 hermon_umap_db_entry_t *umapdb; 750 hermon_pdhdl_t pd; 751 hermon_mrhdl_t mr; 752 hermon_bind_info_t *bind; 753 uint64_t value; 754 int status; 755 uint_t shared_mtt; 756 757 /* 758 * Check the sleep flag. Ensure that it is consistent with the 759 * current thread context (i.e. if we are currently in the interrupt 760 * context, then we shouldn't be attempting to sleep). 761 */ 762 if ((sleep == HERMON_SLEEP) && 763 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 764 status = IBT_INVALID_PARAM; 765 return (status); 766 } 767 768 /* 769 * Pull all the necessary information from the Hermon Memory Region 770 * handle. This is necessary here because the resource for the 771 * MR handle is going to be freed up as part of the this 772 * deregistration 773 */ 774 mr = *mrhdl; 775 mutex_enter(&mr->mr_lock); 776 mpt = mr->mr_mptrsrcp; 777 mtt = mr->mr_mttrsrcp; 778 mtt_refcnt = mr->mr_mttrefcntp; 779 rsrc = mr->mr_rsrcp; 780 pd = mr->mr_pdhdl; 781 bind = &mr->mr_bindinfo; 782 783 /* 784 * Check here if the memory region is really an FMR. If so, this is a 785 * bad thing and we shouldn't be here. Return failure. 786 */ 787 if (mr->mr_is_fmr) { 788 mutex_exit(&mr->mr_lock); 789 return (IBT_INVALID_PARAM); 790 } 791 792 /* 793 * Check here to see if the memory region has already been partially 794 * deregistered as a result of the hermon_umap_umemlock_cb() callback. 795 * If so, then jump to the end and free the remaining resources. 796 */ 797 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 798 goto mrdereg_finish_cleanup; 799 } 800 if (hermon_rdma_debug & 0x4) 801 IBTF_DPRINTF_L2("mr", "dereg: mr %p key %x", 802 mr, mr->mr_rkey); 803 804 /* 805 * We must drop the "mr_lock" here to ensure that both SLEEP and 806 * NOSLEEP calls into the firmware work as expected. Also, if two 807 * threads are attemping to access this MR (via de-register, 808 * re-register, or otherwise), then we allow the firmware to enforce 809 * the checking, that only one deregister is valid. 810 */ 811 mutex_exit(&mr->mr_lock); 812 813 /* 814 * Reclaim MPT entry from hardware (if necessary). Since the 815 * hermon_mr_deregister() routine is used in the memory region 816 * reregistration process as well, it is possible that we will 817 * not always wish to reclaim ownership of the MPT. Check the 818 * "level" arg and, if necessary, attempt to reclaim it. If 819 * the ownership transfer fails for any reason, we check to see 820 * what command status was returned from the hardware. The only 821 * "expected" error status is the one that indicates an attempt to 822 * deregister a memory region that has memory windows bound to it 823 */ 824 if (level >= HERMON_MR_DEREG_ALL) { 825 if (mr->mr_mpt_type >= HERMON_MPT_DMPT) { 826 status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, 827 NULL, 0, mpt->hr_indx, sleep); 828 if (status != HERMON_CMD_SUCCESS) { 829 if (status == HERMON_CMD_REG_BOUND) { 830 return (IBT_MR_IN_USE); 831 } else { 832 cmn_err(CE_CONT, "Hermon: HW2SW_MPT " 833 "command failed: %08x\n", status); 834 if (status == 835 HERMON_CMD_INVALID_STATUS) { 836 hermon_fm_ereport(state, 837 HCA_SYS_ERR, 838 DDI_SERVICE_LOST); 839 } 840 return (IBT_INVALID_PARAM); 841 } 842 } 843 } 844 } 845 846 /* 847 * Re-grab the mr_lock here. Since further access to the protected 848 * 'mr' structure is needed, and we would have returned previously for 849 * the multiple deregistration case, we can safely grab the lock here. 850 */ 851 mutex_enter(&mr->mr_lock); 852 853 /* 854 * If the memory had come from userland, then we do a lookup in the 855 * "userland resources database". On success, we free the entry, call 856 * ddi_umem_unlock(), and continue the cleanup. On failure (which is 857 * an indication that the umem_lockmemory() callback has called 858 * hermon_mr_deregister()), we call ddi_umem_unlock() and invalidate 859 * the "mr_umemcookie" field in the MR handle (this will be used 860 * later to detect that only partial cleaup still remains to be done 861 * on the MR handle). 862 */ 863 if (mr->mr_is_umem) { 864 status = hermon_umap_db_find(state->hs_instance, 865 (uint64_t)(uintptr_t)mr->mr_umemcookie, 866 MLNX_UMAP_MRMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE, 867 &umapdb); 868 if (status == DDI_SUCCESS) { 869 hermon_umap_db_free(umapdb); 870 ddi_umem_unlock(mr->mr_umemcookie); 871 } else { 872 ddi_umem_unlock(mr->mr_umemcookie); 873 mr->mr_umemcookie = NULL; 874 } 875 } 876 877 /* 878 * Decrement the MTT reference count. Since the MTT resource 879 * may be shared between multiple memory regions (as a result 880 * of a "RegisterSharedMR" verb) it is important that we not 881 * free up or unbind resources prematurely. If it's not shared (as 882 * indicated by the return status), then free the resource. 883 */ 884 shared_mtt = hermon_mtt_refcnt_dec(mtt_refcnt); 885 if (!shared_mtt) { 886 hermon_rsrc_free(state, &mtt_refcnt); 887 } 888 889 /* 890 * Free up the MTT entries and unbind the memory. Here, as above, we 891 * attempt to free these resources only if it is appropriate to do so. 892 */ 893 if (!shared_mtt) { 894 if (level >= HERMON_MR_DEREG_NO_HW2SW_MPT) { 895 hermon_mr_mem_unbind(state, bind); 896 } 897 hermon_rsrc_free(state, &mtt); 898 } 899 900 /* 901 * If the MR handle has been invalidated, then drop the 902 * lock and return success. Note: This only happens because 903 * the umem_lockmemory() callback has been triggered. The 904 * cleanup here is partial, and further cleanup (in a 905 * subsequent hermon_mr_deregister() call) will be necessary. 906 */ 907 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 908 mutex_exit(&mr->mr_lock); 909 return (DDI_SUCCESS); 910 } 911 912 mrdereg_finish_cleanup: 913 mutex_exit(&mr->mr_lock); 914 915 /* Free the Hermon Memory Region handle */ 916 hermon_rsrc_free(state, &rsrc); 917 918 /* Free up the MPT entry resource */ 919 if (mpt != NULL) 920 hermon_rsrc_free(state, &mpt); 921 922 /* Decrement the reference count on the protection domain (PD) */ 923 hermon_pd_refcnt_dec(pd); 924 925 /* Set the mrhdl pointer to NULL and return success */ 926 *mrhdl = NULL; 927 928 return (DDI_SUCCESS); 929 } 930 931 /* 932 * hermon_mr_dealloc_fmr() 933 * Context: Can be called from interrupt or base context. 934 */ 935 /* ARGSUSED */ 936 int 937 hermon_mr_dealloc_fmr(hermon_state_t *state, hermon_mrhdl_t *mrhdl) 938 { 939 hermon_rsrc_t *mpt, *mtt, *rsrc; 940 hermon_pdhdl_t pd; 941 hermon_mrhdl_t mr; 942 943 /* 944 * Pull all the necessary information from the Hermon Memory Region 945 * handle. This is necessary here because the resource for the 946 * MR handle is going to be freed up as part of the this 947 * deregistration 948 */ 949 mr = *mrhdl; 950 mutex_enter(&mr->mr_lock); 951 mpt = mr->mr_mptrsrcp; 952 mtt = mr->mr_mttrsrcp; 953 rsrc = mr->mr_rsrcp; 954 pd = mr->mr_pdhdl; 955 mutex_exit(&mr->mr_lock); 956 957 /* Free the MTT entries */ 958 hermon_rsrc_free(state, &mtt); 959 960 /* Free the Hermon Memory Region handle */ 961 hermon_rsrc_free(state, &rsrc); 962 963 /* Free up the MPT entry resource */ 964 hermon_rsrc_free(state, &mpt); 965 966 /* Decrement the reference count on the protection domain (PD) */ 967 hermon_pd_refcnt_dec(pd); 968 969 /* Set the mrhdl pointer to NULL and return success */ 970 *mrhdl = NULL; 971 972 return (DDI_SUCCESS); 973 } 974 975 /* 976 * hermon_mr_invalidate_fmr() 977 * Context: Can be called from interrupt or base context. 978 */ 979 /* ARGSUSED */ 980 int 981 hermon_mr_invalidate_fmr(hermon_state_t *state, hermon_mrhdl_t mr) 982 { 983 hermon_rsrc_t *mpt; 984 uint64_t *mpt_table; 985 986 mutex_enter(&mr->mr_lock); 987 mpt = mr->mr_mptrsrcp; 988 mpt_table = (uint64_t *)mpt->hr_addr; 989 990 /* Write MPT status to SW bit */ 991 *(uint8_t *)&mpt_table[0] = 0xF0; 992 993 membar_producer(); 994 995 /* invalidate mem key value */ 996 *(uint32_t *)&mpt_table[1] = 0; 997 998 /* invalidate lkey value */ 999 *(uint32_t *)&mpt_table[4] = 0; 1000 1001 membar_producer(); 1002 1003 /* Write MPT status to HW bit */ 1004 *(uint8_t *)&mpt_table[0] = 0x00; 1005 1006 mutex_exit(&mr->mr_lock); 1007 1008 return (DDI_SUCCESS); 1009 } 1010 1011 /* 1012 * hermon_mr_deregister_fmr() 1013 * Context: Can be called from interrupt or base context. 1014 */ 1015 /* ARGSUSED */ 1016 int 1017 hermon_mr_deregister_fmr(hermon_state_t *state, hermon_mrhdl_t mr) 1018 { 1019 hermon_rsrc_t *mpt; 1020 uint64_t *mpt_table; 1021 1022 mutex_enter(&mr->mr_lock); 1023 mpt = mr->mr_mptrsrcp; 1024 mpt_table = (uint64_t *)mpt->hr_addr; 1025 1026 /* Write MPT status to SW bit */ 1027 *(uint8_t *)&mpt_table[0] = 0xF0; 1028 1029 mutex_exit(&mr->mr_lock); 1030 1031 return (DDI_SUCCESS); 1032 } 1033 1034 1035 /* 1036 * hermon_mr_query() 1037 * Context: Can be called from interrupt or base context. 1038 */ 1039 /* ARGSUSED */ 1040 int 1041 hermon_mr_query(hermon_state_t *state, hermon_mrhdl_t mr, 1042 ibt_mr_query_attr_t *attr) 1043 { 1044 int status; 1045 hermon_hw_dmpt_t mpt_entry; 1046 uint32_t lkey; 1047 1048 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr)) 1049 1050 mutex_enter(&mr->mr_lock); 1051 1052 /* 1053 * Check here to see if the memory region has already been partially 1054 * deregistered as a result of a hermon_umap_umemlock_cb() callback. 1055 * If so, this is an error, return failure. 1056 */ 1057 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 1058 mutex_exit(&mr->mr_lock); 1059 return (IBT_MR_HDL_INVALID); 1060 } 1061 1062 status = hermon_cmn_query_cmd_post(state, QUERY_MPT, 0, 1063 mr->mr_lkey >> 8, &mpt_entry, sizeof (hermon_hw_dmpt_t), 1064 HERMON_NOSLEEP); 1065 if (status != HERMON_CMD_SUCCESS) { 1066 cmn_err(CE_CONT, "Hermon: QUERY_MPT failed: status %x", status); 1067 mutex_exit(&mr->mr_lock); 1068 return (ibc_get_ci_failure(0)); 1069 } 1070 1071 /* Update the mr sw struct from the hw struct. */ 1072 lkey = mpt_entry.mem_key; 1073 mr->mr_lkey = mr->mr_rkey = (lkey >> 8) | (lkey << 24); 1074 mr->mr_bindinfo.bi_addr = mpt_entry.start_addr; 1075 mr->mr_bindinfo.bi_len = mpt_entry.reg_win_len; 1076 mr->mr_accflag = (mr->mr_accflag & IBT_MR_RO_DISABLED) | 1077 (mpt_entry.lw ? IBT_MR_LOCAL_WRITE : 0) | 1078 (mpt_entry.rr ? IBT_MR_REMOTE_READ : 0) | 1079 (mpt_entry.rw ? IBT_MR_REMOTE_WRITE : 0) | 1080 (mpt_entry.atomic ? IBT_MR_REMOTE_ATOMIC : 0) | 1081 (mpt_entry.en_bind ? IBT_MR_WINDOW_BIND : 0); 1082 mr->mr_mttaddr = ((uint64_t)mpt_entry.mtt_addr_h << 32) | 1083 (mpt_entry.mtt_addr_l << 3); 1084 mr->mr_logmttpgsz = mpt_entry.entity_sz; 1085 1086 /* Fill in the queried attributes */ 1087 attr->mr_lkey_state = 1088 (mpt_entry.status == HERMON_MPT_FREE) ? IBT_KEY_FREE : 1089 (mpt_entry.status == HERMON_MPT_SW_OWNERSHIP) ? IBT_KEY_INVALID : 1090 IBT_KEY_VALID; 1091 attr->mr_phys_buf_list_sz = mpt_entry.mtt_size; 1092 attr->mr_attr_flags = mr->mr_accflag; 1093 attr->mr_pd = (ibt_pd_hdl_t)mr->mr_pdhdl; 1094 1095 /* Fill in the "local" attributes */ 1096 attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey; 1097 attr->mr_lbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr; 1098 attr->mr_lbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len; 1099 1100 /* 1101 * Fill in the "remote" attributes (if necessary). Note: the 1102 * remote attributes are only valid if the memory region has one 1103 * or more of the remote access flags set. 1104 */ 1105 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 1106 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 1107 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 1108 attr->mr_rkey = (ibt_rkey_t)mr->mr_rkey; 1109 attr->mr_rbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr; 1110 attr->mr_rbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len; 1111 } 1112 1113 /* 1114 * If region is mapped for streaming (i.e. noncoherent), then set sync 1115 * is required 1116 */ 1117 attr->mr_sync_required = (mr->mr_bindinfo.bi_flags & 1118 IBT_MR_NONCOHERENT) ? B_TRUE : B_FALSE; 1119 1120 mutex_exit(&mr->mr_lock); 1121 return (DDI_SUCCESS); 1122 } 1123 1124 1125 /* 1126 * hermon_mr_reregister() 1127 * Context: Can be called from interrupt or base context. 1128 */ 1129 int 1130 hermon_mr_reregister(hermon_state_t *state, hermon_mrhdl_t mr, 1131 hermon_pdhdl_t pd, ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new, 1132 hermon_mr_options_t *op) 1133 { 1134 hermon_bind_info_t bind; 1135 int status; 1136 1137 /* 1138 * Fill in the "bind" struct. This struct provides the majority 1139 * of the information that will be used to distinguish between an 1140 * "addr" binding (as is the case here) and a "buf" binding (see 1141 * below). The "bind" struct is later passed to hermon_mr_mem_bind() 1142 * which does most of the "heavy lifting" for the Hermon memory 1143 * registration (and reregistration) routines. 1144 */ 1145 bind.bi_type = HERMON_BINDHDL_VADDR; 1146 bind.bi_addr = mr_attr->mr_vaddr; 1147 bind.bi_len = mr_attr->mr_len; 1148 bind.bi_as = mr_attr->mr_as; 1149 bind.bi_flags = mr_attr->mr_flags; 1150 status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op); 1151 return (status); 1152 } 1153 1154 1155 /* 1156 * hermon_mr_reregister_buf() 1157 * Context: Can be called from interrupt or base context. 1158 */ 1159 int 1160 hermon_mr_reregister_buf(hermon_state_t *state, hermon_mrhdl_t mr, 1161 hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf, 1162 hermon_mrhdl_t *mrhdl_new, hermon_mr_options_t *op) 1163 { 1164 hermon_bind_info_t bind; 1165 int status; 1166 1167 /* 1168 * Fill in the "bind" struct. This struct provides the majority 1169 * of the information that will be used to distinguish between an 1170 * "addr" binding (see above) and a "buf" binding (as is the case 1171 * here). The "bind" struct is later passed to hermon_mr_mem_bind() 1172 * which does most of the "heavy lifting" for the Hermon memory 1173 * registration routines. Note: We have chosen to provide 1174 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is 1175 * not set). It is not critical what value we choose here as it need 1176 * only be unique for the given RKey (which will happen by default), 1177 * so the choice here is somewhat arbitrary. 1178 */ 1179 bind.bi_type = HERMON_BINDHDL_BUF; 1180 bind.bi_buf = buf; 1181 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) { 1182 bind.bi_addr = mr_attr->mr_vaddr; 1183 } else { 1184 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr; 1185 } 1186 bind.bi_len = (uint64_t)buf->b_bcount; 1187 bind.bi_flags = mr_attr->mr_flags; 1188 bind.bi_as = NULL; 1189 status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op); 1190 return (status); 1191 } 1192 1193 1194 /* 1195 * hermon_mr_sync() 1196 * Context: Can be called from interrupt or base context. 1197 */ 1198 /* ARGSUSED */ 1199 int 1200 hermon_mr_sync(hermon_state_t *state, ibt_mr_sync_t *mr_segs, size_t num_segs) 1201 { 1202 hermon_mrhdl_t mrhdl; 1203 uint64_t seg_vaddr, seg_len, seg_end; 1204 uint64_t mr_start, mr_end; 1205 uint_t type; 1206 int status, i; 1207 1208 /* Process each of the ibt_mr_sync_t's */ 1209 for (i = 0; i < num_segs; i++) { 1210 mrhdl = (hermon_mrhdl_t)mr_segs[i].ms_handle; 1211 1212 /* Check for valid memory region handle */ 1213 if (mrhdl == NULL) { 1214 status = IBT_MR_HDL_INVALID; 1215 goto mrsync_fail; 1216 } 1217 1218 mutex_enter(&mrhdl->mr_lock); 1219 1220 /* 1221 * Check here to see if the memory region has already been 1222 * partially deregistered as a result of a 1223 * hermon_umap_umemlock_cb() callback. If so, this is an 1224 * error, return failure. 1225 */ 1226 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) { 1227 mutex_exit(&mrhdl->mr_lock); 1228 status = IBT_MR_HDL_INVALID; 1229 goto mrsync_fail; 1230 } 1231 1232 /* Check for valid bounds on sync request */ 1233 seg_vaddr = mr_segs[i].ms_vaddr; 1234 seg_len = mr_segs[i].ms_len; 1235 seg_end = seg_vaddr + seg_len - 1; 1236 mr_start = mrhdl->mr_bindinfo.bi_addr; 1237 mr_end = mr_start + mrhdl->mr_bindinfo.bi_len - 1; 1238 if ((seg_vaddr < mr_start) || (seg_vaddr > mr_end)) { 1239 mutex_exit(&mrhdl->mr_lock); 1240 status = IBT_MR_VA_INVALID; 1241 goto mrsync_fail; 1242 } 1243 if ((seg_end < mr_start) || (seg_end > mr_end)) { 1244 mutex_exit(&mrhdl->mr_lock); 1245 status = IBT_MR_LEN_INVALID; 1246 goto mrsync_fail; 1247 } 1248 1249 /* Determine what type (i.e. direction) for sync */ 1250 if (mr_segs[i].ms_flags & IBT_SYNC_READ) { 1251 type = DDI_DMA_SYNC_FORDEV; 1252 } else if (mr_segs[i].ms_flags & IBT_SYNC_WRITE) { 1253 type = DDI_DMA_SYNC_FORCPU; 1254 } else { 1255 mutex_exit(&mrhdl->mr_lock); 1256 status = IBT_INVALID_PARAM; 1257 goto mrsync_fail; 1258 } 1259 1260 (void) ddi_dma_sync(mrhdl->mr_bindinfo.bi_dmahdl, 1261 (off_t)(seg_vaddr - mr_start), (size_t)seg_len, type); 1262 1263 mutex_exit(&mrhdl->mr_lock); 1264 } 1265 1266 return (DDI_SUCCESS); 1267 1268 mrsync_fail: 1269 return (status); 1270 } 1271 1272 1273 /* 1274 * hermon_mw_alloc() 1275 * Context: Can be called from interrupt or base context. 1276 */ 1277 int 1278 hermon_mw_alloc(hermon_state_t *state, hermon_pdhdl_t pd, ibt_mw_flags_t flags, 1279 hermon_mwhdl_t *mwhdl) 1280 { 1281 hermon_rsrc_t *mpt, *rsrc; 1282 hermon_hw_dmpt_t mpt_entry; 1283 hermon_mwhdl_t mw; 1284 uint_t sleep; 1285 int status; 1286 1287 if (state != NULL) /* XXX - bogus test that is always TRUE */ 1288 return (IBT_INSUFF_RESOURCE); 1289 1290 /* 1291 * Check the sleep flag. Ensure that it is consistent with the 1292 * current thread context (i.e. if we are currently in the interrupt 1293 * context, then we shouldn't be attempting to sleep). 1294 */ 1295 sleep = (flags & IBT_MW_NOSLEEP) ? HERMON_NOSLEEP : HERMON_SLEEP; 1296 if ((sleep == HERMON_SLEEP) && 1297 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 1298 status = IBT_INVALID_PARAM; 1299 goto mwalloc_fail; 1300 } 1301 1302 /* Increment the reference count on the protection domain (PD) */ 1303 hermon_pd_refcnt_inc(pd); 1304 1305 /* 1306 * Allocate an MPT entry (for use as a memory window). Since the 1307 * Hermon hardware uses the MPT entry for memory regions and for 1308 * memory windows, we will fill in this MPT with all the necessary 1309 * parameters for the memory window. And then (just as we do for 1310 * memory regions) ownership will be passed to the hardware in the 1311 * final step below. If we fail here, we must undo the protection 1312 * domain reference count. 1313 */ 1314 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); 1315 if (status != DDI_SUCCESS) { 1316 status = IBT_INSUFF_RESOURCE; 1317 goto mwalloc_fail1; 1318 } 1319 1320 /* 1321 * Allocate the software structure for tracking the memory window (i.e. 1322 * the Hermon Memory Window handle). Note: This is actually the same 1323 * software structure used for tracking memory regions, but since many 1324 * of the same properties are needed, only a single structure is 1325 * necessary. If we fail here, we must undo the protection domain 1326 * reference count and the previous resource allocation. 1327 */ 1328 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); 1329 if (status != DDI_SUCCESS) { 1330 status = IBT_INSUFF_RESOURCE; 1331 goto mwalloc_fail2; 1332 } 1333 mw = (hermon_mwhdl_t)rsrc->hr_addr; 1334 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw)) 1335 1336 /* 1337 * Calculate an "unbound" RKey from MPT index. In much the same way 1338 * as we do for memory regions (above), this key is constructed from 1339 * a "constrained" (which depends on the MPT index) and an 1340 * "unconstrained" portion (which may be arbitrarily chosen). 1341 */ 1342 mw->mr_rkey = hermon_mr_keycalc(mpt->hr_indx); 1343 1344 /* 1345 * Fill in the MPT entry. This is the final step before passing 1346 * ownership of the MPT entry to the Hermon hardware. We use all of 1347 * the information collected/calculated above to fill in the 1348 * requisite portions of the MPT. Note: fewer entries in the MPT 1349 * entry are necessary to allocate a memory window. 1350 */ 1351 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); 1352 mpt_entry.reg_win = HERMON_MPT_IS_WINDOW; 1353 mpt_entry.mem_key = mw->mr_rkey; 1354 mpt_entry.pd = pd->pd_pdnum; 1355 mpt_entry.lr = 1; 1356 1357 /* 1358 * Write the MPT entry to hardware. Lastly, we pass ownership of 1359 * the entry to the hardware. Note: in general, this operation 1360 * shouldn't fail. But if it does, we have to undo everything we've 1361 * done above before returning error. 1362 */ 1363 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 1364 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); 1365 if (status != HERMON_CMD_SUCCESS) { 1366 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", 1367 status); 1368 if (status == HERMON_CMD_INVALID_STATUS) { 1369 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1370 } 1371 status = ibc_get_ci_failure(0); 1372 goto mwalloc_fail3; 1373 } 1374 1375 /* 1376 * Fill in the rest of the Hermon Memory Window handle. Having 1377 * successfully transferred ownership of the MPT, we can update the 1378 * following fields for use in further operations on the MW. 1379 */ 1380 mw->mr_mptrsrcp = mpt; 1381 mw->mr_pdhdl = pd; 1382 mw->mr_rsrcp = rsrc; 1383 mw->mr_rkey = hermon_mr_key_swap(mw->mr_rkey); 1384 *mwhdl = mw; 1385 1386 return (DDI_SUCCESS); 1387 1388 mwalloc_fail3: 1389 hermon_rsrc_free(state, &rsrc); 1390 mwalloc_fail2: 1391 hermon_rsrc_free(state, &mpt); 1392 mwalloc_fail1: 1393 hermon_pd_refcnt_dec(pd); 1394 mwalloc_fail: 1395 return (status); 1396 } 1397 1398 1399 /* 1400 * hermon_mw_free() 1401 * Context: Can be called from interrupt or base context. 1402 */ 1403 int 1404 hermon_mw_free(hermon_state_t *state, hermon_mwhdl_t *mwhdl, uint_t sleep) 1405 { 1406 hermon_rsrc_t *mpt, *rsrc; 1407 hermon_mwhdl_t mw; 1408 int status; 1409 hermon_pdhdl_t pd; 1410 1411 /* 1412 * Check the sleep flag. Ensure that it is consistent with the 1413 * current thread context (i.e. if we are currently in the interrupt 1414 * context, then we shouldn't be attempting to sleep). 1415 */ 1416 if ((sleep == HERMON_SLEEP) && 1417 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 1418 status = IBT_INVALID_PARAM; 1419 return (status); 1420 } 1421 1422 /* 1423 * Pull all the necessary information from the Hermon Memory Window 1424 * handle. This is necessary here because the resource for the 1425 * MW handle is going to be freed up as part of the this operation. 1426 */ 1427 mw = *mwhdl; 1428 mutex_enter(&mw->mr_lock); 1429 mpt = mw->mr_mptrsrcp; 1430 rsrc = mw->mr_rsrcp; 1431 pd = mw->mr_pdhdl; 1432 mutex_exit(&mw->mr_lock); 1433 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw)) 1434 1435 /* 1436 * Reclaim the MPT entry from hardware. Note: in general, it is 1437 * unexpected for this operation to return an error. 1438 */ 1439 status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL, 1440 0, mpt->hr_indx, sleep); 1441 if (status != HERMON_CMD_SUCCESS) { 1442 cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: %08x\n", 1443 status); 1444 if (status == HERMON_CMD_INVALID_STATUS) { 1445 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1446 } 1447 return (ibc_get_ci_failure(0)); 1448 } 1449 1450 /* Free the Hermon Memory Window handle */ 1451 hermon_rsrc_free(state, &rsrc); 1452 1453 /* Free up the MPT entry resource */ 1454 hermon_rsrc_free(state, &mpt); 1455 1456 /* Decrement the reference count on the protection domain (PD) */ 1457 hermon_pd_refcnt_dec(pd); 1458 1459 /* Set the mwhdl pointer to NULL and return success */ 1460 *mwhdl = NULL; 1461 1462 return (DDI_SUCCESS); 1463 } 1464 1465 1466 /* 1467 * hermon_mr_keycalc() 1468 * Context: Can be called from interrupt or base context. 1469 * NOTE: Produces a key in the form of 1470 * KKKKKKKK IIIIIIII IIIIIIII IIIIIIIII 1471 * where K == the arbitrary bits and I == the index 1472 */ 1473 uint32_t 1474 hermon_mr_keycalc(uint32_t indx) 1475 { 1476 uint32_t tmp_key, tmp_indx; 1477 1478 /* 1479 * Generate a simple key from counter. Note: We increment this 1480 * static variable _intentionally_ without any kind of mutex around 1481 * it. First, single-threading all operations through a single lock 1482 * would be a bad idea (from a performance point-of-view). Second, 1483 * the upper "unconstrained" bits don't really have to be unique 1484 * because the lower bits are guaranteed to be (although we do make a 1485 * best effort to ensure that they are). Third, the window for the 1486 * race (where both threads read and update the counter at the same 1487 * time) is incredibly small. 1488 * And, lastly, we'd like to make this into a "random" key 1489 */ 1490 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(hermon_memkey_cnt)) 1491 tmp_key = (hermon_memkey_cnt++) << HERMON_MEMKEY_SHIFT; 1492 tmp_indx = indx & 0xffffff; 1493 return (tmp_key | tmp_indx); 1494 } 1495 1496 1497 /* 1498 * hermon_mr_key_swap() 1499 * Context: Can be called from interrupt or base context. 1500 * NOTE: Produces a key in the form of 1501 * IIIIIIII IIIIIIII IIIIIIIII KKKKKKKK 1502 * where K == the arbitrary bits and I == the index 1503 */ 1504 uint32_t 1505 hermon_mr_key_swap(uint32_t indx) 1506 { 1507 /* 1508 * The memory key format to pass down to the hardware is 1509 * (key[7:0],index[23:0]), which defines the index to the 1510 * hardware resource. When the driver passes this as a memory 1511 * key, (i.e. to retrieve a resource) the format is 1512 * (index[23:0],key[7:0]). 1513 */ 1514 return (((indx >> 24) & 0x000000ff) | ((indx << 8) & 0xffffff00)); 1515 } 1516 1517 /* 1518 * hermon_mr_common_reg() 1519 * Context: Can be called from interrupt or base context. 1520 */ 1521 static int 1522 hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd, 1523 hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op, 1524 hermon_mpt_rsrc_type_t mpt_type) 1525 { 1526 hermon_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; 1527 hermon_umap_db_entry_t *umapdb; 1528 hermon_sw_refcnt_t *swrc_tmp; 1529 hermon_hw_dmpt_t mpt_entry; 1530 hermon_mrhdl_t mr; 1531 ibt_mr_flags_t flags; 1532 hermon_bind_info_t *bh; 1533 ddi_dma_handle_t bind_dmahdl; 1534 ddi_umem_cookie_t umem_cookie; 1535 size_t umem_len; 1536 caddr_t umem_addr; 1537 uint64_t mtt_addr, max_sz; 1538 uint_t sleep, mtt_pgsize_bits, bind_type, mr_is_umem; 1539 int status, umem_flags, bind_override_addr; 1540 1541 /* 1542 * Check the "options" flag. Currently this flag tells the driver 1543 * whether or not the region should be bound normally (i.e. with 1544 * entries written into the PCI IOMMU), whether it should be 1545 * registered to bypass the IOMMU, and whether or not the resulting 1546 * address should be "zero-based" (to aid the alignment restrictions 1547 * for QPs). 1548 */ 1549 if (op == NULL) { 1550 bind_type = HERMON_BINDMEM_NORMAL; 1551 bind_dmahdl = NULL; 1552 bind_override_addr = 0; 1553 } else { 1554 bind_type = op->mro_bind_type; 1555 bind_dmahdl = op->mro_bind_dmahdl; 1556 bind_override_addr = op->mro_bind_override_addr; 1557 } 1558 1559 /* check what kind of mpt to use */ 1560 1561 /* Extract the flags field from the hermon_bind_info_t */ 1562 flags = bind->bi_flags; 1563 1564 /* 1565 * Check for invalid length. Check is the length is zero or if the 1566 * length is larger than the maximum configured value. Return error 1567 * if it is. 1568 */ 1569 max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz); 1570 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) { 1571 status = IBT_MR_LEN_INVALID; 1572 goto mrcommon_fail; 1573 } 1574 1575 /* 1576 * Check the sleep flag. Ensure that it is consistent with the 1577 * current thread context (i.e. if we are currently in the interrupt 1578 * context, then we shouldn't be attempting to sleep). 1579 */ 1580 sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP; 1581 if ((sleep == HERMON_SLEEP) && 1582 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 1583 status = IBT_INVALID_PARAM; 1584 goto mrcommon_fail; 1585 } 1586 1587 /* Increment the reference count on the protection domain (PD) */ 1588 hermon_pd_refcnt_inc(pd); 1589 1590 /* 1591 * Allocate an MPT entry. This will be filled in with all the 1592 * necessary parameters to define the memory region. And then 1593 * ownership will be passed to the hardware in the final step 1594 * below. If we fail here, we must undo the protection domain 1595 * reference count. 1596 */ 1597 if (mpt_type == HERMON_MPT_DMPT) { 1598 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); 1599 if (status != DDI_SUCCESS) { 1600 status = IBT_INSUFF_RESOURCE; 1601 goto mrcommon_fail1; 1602 } 1603 } else { 1604 mpt = NULL; 1605 } 1606 1607 /* 1608 * Allocate the software structure for tracking the memory region (i.e. 1609 * the Hermon Memory Region handle). If we fail here, we must undo 1610 * the protection domain reference count and the previous resource 1611 * allocation. 1612 */ 1613 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); 1614 if (status != DDI_SUCCESS) { 1615 status = IBT_INSUFF_RESOURCE; 1616 goto mrcommon_fail2; 1617 } 1618 mr = (hermon_mrhdl_t)rsrc->hr_addr; 1619 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 1620 1621 /* 1622 * Setup and validate the memory region access flags. This means 1623 * translating the IBTF's enable flags into the access flags that 1624 * will be used in later operations. 1625 */ 1626 mr->mr_accflag = 0; 1627 if (flags & IBT_MR_ENABLE_WINDOW_BIND) 1628 mr->mr_accflag |= IBT_MR_WINDOW_BIND; 1629 if (flags & IBT_MR_ENABLE_LOCAL_WRITE) 1630 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 1631 if (flags & IBT_MR_ENABLE_REMOTE_READ) 1632 mr->mr_accflag |= IBT_MR_REMOTE_READ; 1633 if (flags & IBT_MR_ENABLE_REMOTE_WRITE) 1634 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 1635 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 1636 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 1637 1638 /* 1639 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 1640 * from a certain number of "constrained" bits (the least significant 1641 * bits) and some number of "unconstrained" bits. The constrained 1642 * bits must be set to the index of the entry in the MPT table, but 1643 * the unconstrained bits can be set to any value we wish. Note: 1644 * if no remote access is required, then the RKey value is not filled 1645 * in. Otherwise both Rkey and LKey are given the same value. 1646 */ 1647 if (mpt) 1648 mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); 1649 1650 /* 1651 * Determine if the memory is from userland and pin the pages 1652 * with umem_lockmemory() if necessary. 1653 * Then, if this is userland memory, allocate an entry in the 1654 * "userland resources database". This will later be added to 1655 * the database (after all further memory registration operations are 1656 * successful). If we fail here, we must undo the reference counts 1657 * and the previous resource allocations. 1658 */ 1659 mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0); 1660 if (mr_is_umem) { 1661 umem_len = ptob(btopr(bind->bi_len + 1662 ((uintptr_t)bind->bi_addr & PAGEOFFSET))); 1663 umem_addr = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET); 1664 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ | 1665 DDI_UMEMLOCK_LONGTERM); 1666 status = umem_lockmemory(umem_addr, umem_len, umem_flags, 1667 &umem_cookie, &hermon_umem_cbops, NULL); 1668 if (status != 0) { 1669 status = IBT_INSUFF_RESOURCE; 1670 goto mrcommon_fail3; 1671 } 1672 1673 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 1674 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind->bi_buf)) 1675 1676 bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len, 1677 B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP); 1678 if (bind->bi_buf == NULL) { 1679 status = IBT_INSUFF_RESOURCE; 1680 goto mrcommon_fail3; 1681 } 1682 bind->bi_type = HERMON_BINDHDL_UBUF; 1683 bind->bi_buf->b_flags |= B_READ; 1684 1685 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind->bi_buf)) 1686 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 1687 1688 umapdb = hermon_umap_db_alloc(state->hs_instance, 1689 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC, 1690 (uint64_t)(uintptr_t)rsrc); 1691 if (umapdb == NULL) { 1692 status = IBT_INSUFF_RESOURCE; 1693 goto mrcommon_fail4; 1694 } 1695 } 1696 1697 /* 1698 * Setup the bindinfo for the mtt bind call 1699 */ 1700 bh = &mr->mr_bindinfo; 1701 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bh)) 1702 bcopy(bind, bh, sizeof (hermon_bind_info_t)); 1703 bh->bi_bypass = bind_type; 1704 status = hermon_mr_mtt_bind(state, bh, bind_dmahdl, &mtt, 1705 &mtt_pgsize_bits, mpt != NULL); 1706 if (status != DDI_SUCCESS) { 1707 /* 1708 * When mtt_bind fails, freerbuf has already been done, 1709 * so make sure not to call it again. 1710 */ 1711 bind->bi_type = bh->bi_type; 1712 goto mrcommon_fail5; 1713 } 1714 mr->mr_logmttpgsz = mtt_pgsize_bits; 1715 1716 /* 1717 * Allocate MTT reference count (to track shared memory regions). 1718 * This reference count resource may never be used on the given 1719 * memory region, but if it is ever later registered as "shared" 1720 * memory region then this resource will be necessary. If we fail 1721 * here, we do pretty much the same as above to clean up. 1722 */ 1723 status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep, 1724 &mtt_refcnt); 1725 if (status != DDI_SUCCESS) { 1726 status = IBT_INSUFF_RESOURCE; 1727 goto mrcommon_fail6; 1728 } 1729 mr->mr_mttrefcntp = mtt_refcnt; 1730 swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr; 1731 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp)) 1732 HERMON_MTT_REFCNT_INIT(swrc_tmp); 1733 1734 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT); 1735 1736 /* 1737 * Fill in the MPT entry. This is the final step before passing 1738 * ownership of the MPT entry to the Hermon hardware. We use all of 1739 * the information collected/calculated above to fill in the 1740 * requisite portions of the MPT. Do this ONLY for DMPTs. 1741 */ 1742 if (mpt == NULL) 1743 goto no_passown; 1744 1745 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); 1746 1747 mpt_entry.status = HERMON_MPT_SW_OWNERSHIP; 1748 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; 1749 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 1750 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 1751 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 1752 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 1753 mpt_entry.lr = 1; 1754 mpt_entry.phys_addr = 0; 1755 mpt_entry.reg_win = HERMON_MPT_IS_REGION; 1756 1757 mpt_entry.entity_sz = mr->mr_logmttpgsz; 1758 mpt_entry.mem_key = mr->mr_lkey; 1759 mpt_entry.pd = pd->pd_pdnum; 1760 mpt_entry.rem_acc_en = 0; 1761 mpt_entry.fast_reg_en = 0; 1762 mpt_entry.en_inval = 0; 1763 mpt_entry.lkey = 0; 1764 mpt_entry.win_cnt = 0; 1765 1766 if (bind_override_addr == 0) { 1767 mpt_entry.start_addr = bh->bi_addr; 1768 } else { 1769 bh->bi_addr = bh->bi_addr & ((1 << mr->mr_logmttpgsz) - 1); 1770 mpt_entry.start_addr = bh->bi_addr; 1771 } 1772 mpt_entry.reg_win_len = bh->bi_len; 1773 1774 mpt_entry.mtt_addr_h = mtt_addr >> 32; /* only 8 more bits */ 1775 mpt_entry.mtt_addr_l = mtt_addr >> 3; /* only 29 bits */ 1776 1777 /* 1778 * Write the MPT entry to hardware. Lastly, we pass ownership of 1779 * the entry to the hardware if needed. Note: in general, this 1780 * operation shouldn't fail. But if it does, we have to undo 1781 * everything we've done above before returning error. 1782 * 1783 * For Hermon, this routine (which is common to the contexts) will only 1784 * set the ownership if needed - the process of passing the context 1785 * itself to HW will take care of setting up the MPT (based on type 1786 * and index). 1787 */ 1788 1789 mpt_entry.bnd_qp = 0; /* dMPT for a qp, check for window */ 1790 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 1791 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); 1792 if (status != HERMON_CMD_SUCCESS) { 1793 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", 1794 status); 1795 if (status == HERMON_CMD_INVALID_STATUS) { 1796 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1797 } 1798 status = ibc_get_ci_failure(0); 1799 goto mrcommon_fail7; 1800 } 1801 if (hermon_rdma_debug & 0x4) 1802 IBTF_DPRINTF_L2("mr", " reg: mr %p key %x", 1803 mr, hermon_mr_key_swap(mr->mr_rkey)); 1804 no_passown: 1805 1806 /* 1807 * Fill in the rest of the Hermon Memory Region handle. Having 1808 * successfully transferred ownership of the MPT, we can update the 1809 * following fields for use in further operations on the MR. 1810 */ 1811 mr->mr_mttaddr = mtt_addr; 1812 1813 mr->mr_log2_pgsz = (mr->mr_logmttpgsz - HERMON_PAGESHIFT); 1814 mr->mr_mptrsrcp = mpt; 1815 mr->mr_mttrsrcp = mtt; 1816 mr->mr_pdhdl = pd; 1817 mr->mr_rsrcp = rsrc; 1818 mr->mr_is_umem = mr_is_umem; 1819 mr->mr_is_fmr = 0; 1820 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL; 1821 mr->mr_umem_cbfunc = NULL; 1822 mr->mr_umem_cbarg1 = NULL; 1823 mr->mr_umem_cbarg2 = NULL; 1824 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); 1825 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey); 1826 mr->mr_mpt_type = mpt_type; 1827 1828 /* 1829 * If this is userland memory, then we need to insert the previously 1830 * allocated entry into the "userland resources database". This will 1831 * allow for later coordination between the hermon_umap_umemlock_cb() 1832 * callback and hermon_mr_deregister(). 1833 */ 1834 if (mr_is_umem) { 1835 hermon_umap_db_add(umapdb); 1836 } 1837 1838 *mrhdl = mr; 1839 1840 return (DDI_SUCCESS); 1841 1842 /* 1843 * The following is cleanup for all possible failure cases in this routine 1844 */ 1845 mrcommon_fail7: 1846 hermon_rsrc_free(state, &mtt_refcnt); 1847 mrcommon_fail6: 1848 hermon_mr_mem_unbind(state, bh); 1849 bind->bi_type = bh->bi_type; 1850 mrcommon_fail5: 1851 if (mr_is_umem) { 1852 hermon_umap_db_free(umapdb); 1853 } 1854 mrcommon_fail4: 1855 if (mr_is_umem) { 1856 /* 1857 * Free up the memory ddi_umem_iosetup() allocates 1858 * internally. 1859 */ 1860 if (bind->bi_type == HERMON_BINDHDL_UBUF) { 1861 freerbuf(bind->bi_buf); 1862 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 1863 bind->bi_type = HERMON_BINDHDL_NONE; 1864 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 1865 } 1866 ddi_umem_unlock(umem_cookie); 1867 } 1868 mrcommon_fail3: 1869 hermon_rsrc_free(state, &rsrc); 1870 mrcommon_fail2: 1871 if (mpt != NULL) 1872 hermon_rsrc_free(state, &mpt); 1873 mrcommon_fail1: 1874 hermon_pd_refcnt_dec(pd); 1875 mrcommon_fail: 1876 return (status); 1877 } 1878 1879 /* 1880 * hermon_mr_mtt_bind() 1881 * Context: Can be called from interrupt or base context. 1882 */ 1883 int 1884 hermon_mr_mtt_bind(hermon_state_t *state, hermon_bind_info_t *bind, 1885 ddi_dma_handle_t bind_dmahdl, hermon_rsrc_t **mtt, uint_t *mtt_pgsize_bits, 1886 uint_t is_buffer) 1887 { 1888 uint64_t nummtt; 1889 uint_t sleep; 1890 int status; 1891 1892 /* 1893 * Check the sleep flag. Ensure that it is consistent with the 1894 * current thread context (i.e. if we are currently in the interrupt 1895 * context, then we shouldn't be attempting to sleep). 1896 */ 1897 sleep = (bind->bi_flags & IBT_MR_NOSLEEP) ? 1898 HERMON_NOSLEEP : HERMON_SLEEP; 1899 if ((sleep == HERMON_SLEEP) && 1900 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 1901 status = IBT_INVALID_PARAM; 1902 goto mrmttbind_fail; 1903 } 1904 1905 /* 1906 * Bind the memory and determine the mapped addresses. This is 1907 * the first of two routines that do all the "heavy lifting" for 1908 * the Hermon memory registration routines. The hermon_mr_mem_bind() 1909 * routine takes the "bind" struct with all its fields filled 1910 * in and returns a list of DMA cookies (for the PCI mapped addresses 1911 * corresponding to the specified address region) which are used by 1912 * the hermon_mr_fast_mtt_write() routine below. If we fail here, we 1913 * must undo all the previous resource allocation (and PD reference 1914 * count). 1915 */ 1916 status = hermon_mr_mem_bind(state, bind, bind_dmahdl, sleep, is_buffer); 1917 if (status != DDI_SUCCESS) { 1918 status = IBT_INSUFF_RESOURCE; 1919 goto mrmttbind_fail; 1920 } 1921 1922 /* 1923 * Determine number of pages spanned. This routine uses the 1924 * information in the "bind" struct to determine the required 1925 * number of MTT entries needed (and returns the suggested page size - 1926 * as a "power-of-2" - for each MTT entry). 1927 */ 1928 nummtt = hermon_mr_nummtt_needed(state, bind, mtt_pgsize_bits); 1929 1930 /* 1931 * Allocate the MTT entries. Use the calculations performed above to 1932 * allocate the required number of MTT entries. If we fail here, we 1933 * must not only undo all the previous resource allocation (and PD 1934 * reference count), but we must also unbind the memory. 1935 */ 1936 status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, mtt); 1937 if (status != DDI_SUCCESS) { 1938 status = IBT_INSUFF_RESOURCE; 1939 goto mrmttbind_fail2; 1940 } 1941 1942 /* 1943 * Write the mapped addresses into the MTT entries. This is part two 1944 * of the "heavy lifting" routines that we talked about above. Note: 1945 * we pass the suggested page size from the earlier operation here. 1946 * And if we fail here, we again do pretty much the same huge clean up. 1947 */ 1948 status = hermon_mr_fast_mtt_write(state, *mtt, bind, *mtt_pgsize_bits); 1949 if (status != DDI_SUCCESS) { 1950 /* 1951 * hermon_mr_fast_mtt_write() returns DDI_FAILURE 1952 * only if it detects a HW error during DMA. 1953 */ 1954 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1955 status = ibc_get_ci_failure(0); 1956 goto mrmttbind_fail3; 1957 } 1958 return (DDI_SUCCESS); 1959 1960 /* 1961 * The following is cleanup for all possible failure cases in this routine 1962 */ 1963 mrmttbind_fail3: 1964 hermon_rsrc_free(state, mtt); 1965 mrmttbind_fail2: 1966 hermon_mr_mem_unbind(state, bind); 1967 mrmttbind_fail: 1968 return (status); 1969 } 1970 1971 1972 /* 1973 * hermon_mr_mtt_unbind() 1974 * Context: Can be called from interrupt or base context. 1975 */ 1976 int 1977 hermon_mr_mtt_unbind(hermon_state_t *state, hermon_bind_info_t *bind, 1978 hermon_rsrc_t *mtt) 1979 { 1980 /* 1981 * Free up the MTT entries and unbind the memory. Here, as above, we 1982 * attempt to free these resources only if it is appropriate to do so. 1983 */ 1984 hermon_mr_mem_unbind(state, bind); 1985 hermon_rsrc_free(state, &mtt); 1986 1987 return (DDI_SUCCESS); 1988 } 1989 1990 1991 /* 1992 * hermon_mr_common_rereg() 1993 * Context: Can be called from interrupt or base context. 1994 */ 1995 static int 1996 hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr, 1997 hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new, 1998 hermon_mr_options_t *op) 1999 { 2000 hermon_rsrc_t *mpt; 2001 ibt_mr_attr_flags_t acc_flags_to_use; 2002 ibt_mr_flags_t flags; 2003 hermon_pdhdl_t pd_to_use; 2004 hermon_hw_dmpt_t mpt_entry; 2005 uint64_t mtt_addr_to_use, vaddr_to_use, len_to_use; 2006 uint_t sleep, dereg_level; 2007 int status; 2008 2009 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 2010 2011 /* 2012 * Check here to see if the memory region corresponds to a userland 2013 * mapping. Reregistration of userland memory regions is not 2014 * currently supported. Return failure. 2015 */ 2016 if (mr->mr_is_umem) { 2017 status = IBT_MR_HDL_INVALID; 2018 goto mrrereg_fail; 2019 } 2020 2021 mutex_enter(&mr->mr_lock); 2022 2023 /* Pull MPT resource pointer from the Hermon Memory Region handle */ 2024 mpt = mr->mr_mptrsrcp; 2025 2026 /* Extract the flags field from the hermon_bind_info_t */ 2027 flags = bind->bi_flags; 2028 2029 /* 2030 * Check the sleep flag. Ensure that it is consistent with the 2031 * current thread context (i.e. if we are currently in the interrupt 2032 * context, then we shouldn't be attempting to sleep). 2033 */ 2034 sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP; 2035 if ((sleep == HERMON_SLEEP) && 2036 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 2037 mutex_exit(&mr->mr_lock); 2038 status = IBT_INVALID_PARAM; 2039 goto mrrereg_fail; 2040 } 2041 2042 /* 2043 * First step is to temporarily invalidate the MPT entry. This 2044 * regains ownership from the hardware, and gives us the opportunity 2045 * to modify the entry. Note: The HW2SW_MPT command returns the 2046 * current MPT entry contents. These are saved away here because 2047 * they will be reused in a later step below. If the region has 2048 * bound memory windows that we fail returning an "in use" error code. 2049 * Otherwise, this is an unexpected error and we deregister the 2050 * memory region and return error. 2051 * 2052 * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect 2053 * against holding the lock around this rereg call in all contexts. 2054 */ 2055 status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, &mpt_entry, 2056 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN); 2057 if (status != HERMON_CMD_SUCCESS) { 2058 mutex_exit(&mr->mr_lock); 2059 if (status == HERMON_CMD_REG_BOUND) { 2060 return (IBT_MR_IN_USE); 2061 } else { 2062 cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: " 2063 "%08x\n", status); 2064 if (status == HERMON_CMD_INVALID_STATUS) { 2065 hermon_fm_ereport(state, HCA_SYS_ERR, 2066 HCA_ERR_SRV_LOST); 2067 } 2068 /* 2069 * Call deregister and ensure that all current 2070 * resources get freed up 2071 */ 2072 if (hermon_mr_deregister(state, &mr, 2073 HERMON_MR_DEREG_ALL, sleep) != DDI_SUCCESS) { 2074 HERMON_WARNING(state, "failed to deregister " 2075 "memory region"); 2076 } 2077 return (ibc_get_ci_failure(0)); 2078 } 2079 } 2080 2081 /* 2082 * If we're changing the protection domain, then validate the new one 2083 */ 2084 if (flags & IBT_MR_CHANGE_PD) { 2085 2086 /* Check for valid PD handle pointer */ 2087 if (pd == NULL) { 2088 mutex_exit(&mr->mr_lock); 2089 /* 2090 * Call deregister and ensure that all current 2091 * resources get properly freed up. Unnecessary 2092 * here to attempt to regain software ownership 2093 * of the MPT entry as that has already been 2094 * done above. 2095 */ 2096 if (hermon_mr_deregister(state, &mr, 2097 HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != 2098 DDI_SUCCESS) { 2099 HERMON_WARNING(state, "failed to deregister " 2100 "memory region"); 2101 } 2102 status = IBT_PD_HDL_INVALID; 2103 goto mrrereg_fail; 2104 } 2105 2106 /* Use the new PD handle in all operations below */ 2107 pd_to_use = pd; 2108 2109 } else { 2110 /* Use the current PD handle in all operations below */ 2111 pd_to_use = mr->mr_pdhdl; 2112 } 2113 2114 /* 2115 * If we're changing access permissions, then validate the new ones 2116 */ 2117 if (flags & IBT_MR_CHANGE_ACCESS) { 2118 /* 2119 * Validate the access flags. Both remote write and remote 2120 * atomic require the local write flag to be set 2121 */ 2122 if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) || 2123 (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) && 2124 !(flags & IBT_MR_ENABLE_LOCAL_WRITE)) { 2125 mutex_exit(&mr->mr_lock); 2126 /* 2127 * Call deregister and ensure that all current 2128 * resources get properly freed up. Unnecessary 2129 * here to attempt to regain software ownership 2130 * of the MPT entry as that has already been 2131 * done above. 2132 */ 2133 if (hermon_mr_deregister(state, &mr, 2134 HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != 2135 DDI_SUCCESS) { 2136 HERMON_WARNING(state, "failed to deregister " 2137 "memory region"); 2138 } 2139 status = IBT_MR_ACCESS_REQ_INVALID; 2140 goto mrrereg_fail; 2141 } 2142 2143 /* 2144 * Setup and validate the memory region access flags. This 2145 * means translating the IBTF's enable flags into the access 2146 * flags that will be used in later operations. 2147 */ 2148 acc_flags_to_use = 0; 2149 if (flags & IBT_MR_ENABLE_WINDOW_BIND) 2150 acc_flags_to_use |= IBT_MR_WINDOW_BIND; 2151 if (flags & IBT_MR_ENABLE_LOCAL_WRITE) 2152 acc_flags_to_use |= IBT_MR_LOCAL_WRITE; 2153 if (flags & IBT_MR_ENABLE_REMOTE_READ) 2154 acc_flags_to_use |= IBT_MR_REMOTE_READ; 2155 if (flags & IBT_MR_ENABLE_REMOTE_WRITE) 2156 acc_flags_to_use |= IBT_MR_REMOTE_WRITE; 2157 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 2158 acc_flags_to_use |= IBT_MR_REMOTE_ATOMIC; 2159 2160 } else { 2161 acc_flags_to_use = mr->mr_accflag; 2162 } 2163 2164 /* 2165 * If we're modifying the translation, then figure out whether 2166 * we can reuse the current MTT resources. This means calling 2167 * hermon_mr_rereg_xlat_helper() which does most of the heavy lifting 2168 * for the reregistration. If the current memory region contains 2169 * sufficient MTT entries for the new regions, then it will be 2170 * reused and filled in. Otherwise, new entries will be allocated, 2171 * the old ones will be freed, and the new entries will be filled 2172 * in. Note: If we're not modifying the translation, then we 2173 * should already have all the information we need to update the MPT. 2174 * Also note: If hermon_mr_rereg_xlat_helper() fails, it will return 2175 * a "dereg_level" which is the level of cleanup that needs to be 2176 * passed to hermon_mr_deregister() to finish the cleanup. 2177 */ 2178 if (flags & IBT_MR_CHANGE_TRANSLATION) { 2179 status = hermon_mr_rereg_xlat_helper(state, mr, bind, op, 2180 &mtt_addr_to_use, sleep, &dereg_level); 2181 if (status != DDI_SUCCESS) { 2182 mutex_exit(&mr->mr_lock); 2183 /* 2184 * Call deregister and ensure that all resources get 2185 * properly freed up. 2186 */ 2187 if (hermon_mr_deregister(state, &mr, dereg_level, 2188 sleep) != DDI_SUCCESS) { 2189 HERMON_WARNING(state, "failed to deregister " 2190 "memory region"); 2191 } 2192 goto mrrereg_fail; 2193 } 2194 vaddr_to_use = mr->mr_bindinfo.bi_addr; 2195 len_to_use = mr->mr_bindinfo.bi_len; 2196 } else { 2197 mtt_addr_to_use = mr->mr_mttaddr; 2198 vaddr_to_use = mr->mr_bindinfo.bi_addr; 2199 len_to_use = mr->mr_bindinfo.bi_len; 2200 } 2201 2202 /* 2203 * Calculate new keys (Lkey, Rkey) from MPT index. Just like they were 2204 * when the region was first registered, each key is formed from 2205 * "constrained" bits and "unconstrained" bits. Note: If no remote 2206 * access is required, then the RKey value is not filled in. Otherwise 2207 * both Rkey and LKey are given the same value. 2208 */ 2209 mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); 2210 if ((acc_flags_to_use & IBT_MR_REMOTE_READ) || 2211 (acc_flags_to_use & IBT_MR_REMOTE_WRITE) || 2212 (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC)) { 2213 mr->mr_rkey = mr->mr_lkey; 2214 } else 2215 mr->mr_rkey = 0; 2216 2217 /* 2218 * Fill in the MPT entry. This is the final step before passing 2219 * ownership of the MPT entry to the Hermon hardware. We use all of 2220 * the information collected/calculated above to fill in the 2221 * requisite portions of the MPT. 2222 */ 2223 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); 2224 2225 mpt_entry.status = HERMON_MPT_SW_OWNERSHIP; 2226 mpt_entry.en_bind = (acc_flags_to_use & IBT_MR_WINDOW_BIND) ? 1 : 0; 2227 mpt_entry.atomic = (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 2228 mpt_entry.rw = (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ? 1 : 0; 2229 mpt_entry.rr = (acc_flags_to_use & IBT_MR_REMOTE_READ) ? 1 : 0; 2230 mpt_entry.lw = (acc_flags_to_use & IBT_MR_LOCAL_WRITE) ? 1 : 0; 2231 mpt_entry.lr = 1; 2232 mpt_entry.phys_addr = 0; 2233 mpt_entry.reg_win = HERMON_MPT_IS_REGION; 2234 2235 mpt_entry.entity_sz = mr->mr_logmttpgsz; 2236 mpt_entry.mem_key = mr->mr_lkey; 2237 mpt_entry.pd = pd_to_use->pd_pdnum; 2238 2239 mpt_entry.start_addr = vaddr_to_use; 2240 mpt_entry.reg_win_len = len_to_use; 2241 mpt_entry.mtt_addr_h = mtt_addr_to_use >> 32; 2242 mpt_entry.mtt_addr_l = mtt_addr_to_use >> 3; 2243 2244 /* 2245 * Write the updated MPT entry to hardware 2246 * 2247 * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect 2248 * against holding the lock around this rereg call in all contexts. 2249 */ 2250 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 2251 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN); 2252 if (status != HERMON_CMD_SUCCESS) { 2253 mutex_exit(&mr->mr_lock); 2254 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", 2255 status); 2256 if (status == HERMON_CMD_INVALID_STATUS) { 2257 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 2258 } 2259 /* 2260 * Call deregister and ensure that all current resources get 2261 * properly freed up. Unnecessary here to attempt to regain 2262 * software ownership of the MPT entry as that has already 2263 * been done above. 2264 */ 2265 if (hermon_mr_deregister(state, &mr, 2266 HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) { 2267 HERMON_WARNING(state, "failed to deregister memory " 2268 "region"); 2269 } 2270 return (ibc_get_ci_failure(0)); 2271 } 2272 2273 /* 2274 * If we're changing PD, then update their reference counts now. 2275 * This means decrementing the reference count on the old PD and 2276 * incrementing the reference count on the new PD. 2277 */ 2278 if (flags & IBT_MR_CHANGE_PD) { 2279 hermon_pd_refcnt_dec(mr->mr_pdhdl); 2280 hermon_pd_refcnt_inc(pd); 2281 } 2282 2283 /* 2284 * Update the contents of the Hermon Memory Region handle to reflect 2285 * what has been changed. 2286 */ 2287 mr->mr_pdhdl = pd_to_use; 2288 mr->mr_accflag = acc_flags_to_use; 2289 mr->mr_is_umem = 0; 2290 mr->mr_is_fmr = 0; 2291 mr->mr_umemcookie = NULL; 2292 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); 2293 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey); 2294 2295 /* New MR handle is same as the old */ 2296 *mrhdl_new = mr; 2297 mutex_exit(&mr->mr_lock); 2298 2299 return (DDI_SUCCESS); 2300 2301 mrrereg_fail: 2302 return (status); 2303 } 2304 2305 2306 /* 2307 * hermon_mr_rereg_xlat_helper 2308 * Context: Can be called from interrupt or base context. 2309 * Note: This routine expects the "mr_lock" to be held when it 2310 * is called. Upon returning failure, this routine passes information 2311 * about what "dereg_level" should be passed to hermon_mr_deregister(). 2312 */ 2313 static int 2314 hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr, 2315 hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr, 2316 uint_t sleep, uint_t *dereg_level) 2317 { 2318 hermon_rsrc_t *mtt, *mtt_refcnt; 2319 hermon_sw_refcnt_t *swrc_old, *swrc_new; 2320 ddi_dma_handle_t dmahdl; 2321 uint64_t nummtt_needed, nummtt_in_currrsrc, max_sz; 2322 uint_t mtt_pgsize_bits, bind_type, reuse_dmahdl; 2323 int status; 2324 2325 ASSERT(MUTEX_HELD(&mr->mr_lock)); 2326 2327 /* 2328 * Check the "options" flag. Currently this flag tells the driver 2329 * whether or not the region should be bound normally (i.e. with 2330 * entries written into the PCI IOMMU) or whether it should be 2331 * registered to bypass the IOMMU. 2332 */ 2333 if (op == NULL) { 2334 bind_type = HERMON_BINDMEM_NORMAL; 2335 } else { 2336 bind_type = op->mro_bind_type; 2337 } 2338 2339 /* 2340 * Check for invalid length. Check is the length is zero or if the 2341 * length is larger than the maximum configured value. Return error 2342 * if it is. 2343 */ 2344 max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz); 2345 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) { 2346 /* 2347 * Deregister will be called upon returning failure from this 2348 * routine. This will ensure that all current resources get 2349 * properly freed up. Unnecessary to attempt to regain 2350 * software ownership of the MPT entry as that has already 2351 * been done above (in hermon_mr_reregister()) 2352 */ 2353 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT; 2354 2355 status = IBT_MR_LEN_INVALID; 2356 goto mrrereghelp_fail; 2357 } 2358 2359 /* 2360 * Determine the number of pages necessary for new region and the 2361 * number of pages supported by the current MTT resources 2362 */ 2363 nummtt_needed = hermon_mr_nummtt_needed(state, bind, &mtt_pgsize_bits); 2364 nummtt_in_currrsrc = mr->mr_mttrsrcp->hr_len >> HERMON_MTT_SIZE_SHIFT; 2365 2366 /* 2367 * Depending on whether we have enough pages or not, the next step is 2368 * to fill in a set of MTT entries that reflect the new mapping. In 2369 * the first case below, we already have enough entries. This means 2370 * we need to unbind the memory from the previous mapping, bind the 2371 * memory for the new mapping, write the new MTT entries, and update 2372 * the mr to reflect the changes. 2373 * In the second case below, we do not have enough entries in the 2374 * current mapping. So, in this case, we need not only to unbind the 2375 * current mapping, but we need to free up the MTT resources associated 2376 * with that mapping. After we've successfully done that, we continue 2377 * by binding the new memory, allocating new MTT entries, writing the 2378 * new MTT entries, and updating the mr to reflect the changes. 2379 */ 2380 2381 /* 2382 * If this region is being shared (i.e. MTT refcount != 1), then we 2383 * can't reuse the current MTT resources regardless of their size. 2384 * Instead we'll need to alloc new ones (below) just as if there 2385 * hadn't been enough room in the current entries. 2386 */ 2387 swrc_old = (hermon_sw_refcnt_t *)mr->mr_mttrefcntp->hr_addr; 2388 if (HERMON_MTT_IS_NOT_SHARED(swrc_old) && 2389 (nummtt_needed <= nummtt_in_currrsrc)) { 2390 2391 /* 2392 * Unbind the old mapping for this memory region, but retain 2393 * the ddi_dma_handle_t (if possible) for reuse in the bind 2394 * operation below. Note: If original memory region was 2395 * bound for IOMMU bypass and the new region can not use 2396 * bypass, then a new DMA handle will be necessary. 2397 */ 2398 if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) { 2399 mr->mr_bindinfo.bi_free_dmahdl = 0; 2400 hermon_mr_mem_unbind(state, &mr->mr_bindinfo); 2401 dmahdl = mr->mr_bindinfo.bi_dmahdl; 2402 reuse_dmahdl = 1; 2403 } else { 2404 hermon_mr_mem_unbind(state, &mr->mr_bindinfo); 2405 dmahdl = NULL; 2406 reuse_dmahdl = 0; 2407 } 2408 2409 /* 2410 * Bind the new memory and determine the mapped addresses. 2411 * As described, this routine and hermon_mr_fast_mtt_write() 2412 * do the majority of the work for the memory registration 2413 * operations. Note: When we successfully finish the binding, 2414 * we will set the "bi_free_dmahdl" flag to indicate that 2415 * even though we may have reused the ddi_dma_handle_t we do 2416 * wish it to be freed up at some later time. Note also that 2417 * if we fail, we may need to cleanup the ddi_dma_handle_t. 2418 */ 2419 bind->bi_bypass = bind_type; 2420 status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1); 2421 if (status != DDI_SUCCESS) { 2422 if (reuse_dmahdl) { 2423 ddi_dma_free_handle(&dmahdl); 2424 } 2425 2426 /* 2427 * Deregister will be called upon returning failure 2428 * from this routine. This will ensure that all 2429 * current resources get properly freed up. 2430 * Unnecessary to attempt to regain software ownership 2431 * of the MPT entry as that has already been done 2432 * above (in hermon_mr_reregister()). Also unnecessary 2433 * to attempt to unbind the memory. 2434 */ 2435 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2436 2437 status = IBT_INSUFF_RESOURCE; 2438 goto mrrereghelp_fail; 2439 } 2440 if (reuse_dmahdl) { 2441 bind->bi_free_dmahdl = 1; 2442 } 2443 2444 /* 2445 * Using the new mapping, but reusing the current MTT 2446 * resources, write the updated entries to MTT 2447 */ 2448 mtt = mr->mr_mttrsrcp; 2449 status = hermon_mr_fast_mtt_write(state, mtt, bind, 2450 mtt_pgsize_bits); 2451 if (status != DDI_SUCCESS) { 2452 /* 2453 * Deregister will be called upon returning failure 2454 * from this routine. This will ensure that all 2455 * current resources get properly freed up. 2456 * Unnecessary to attempt to regain software ownership 2457 * of the MPT entry as that has already been done 2458 * above (in hermon_mr_reregister()). Also unnecessary 2459 * to attempt to unbind the memory. 2460 * 2461 * But we do need to unbind the newly bound memory 2462 * before returning. 2463 */ 2464 hermon_mr_mem_unbind(state, bind); 2465 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2466 2467 /* 2468 * hermon_mr_fast_mtt_write() returns DDI_FAILURE 2469 * only if it detects a HW error during DMA. 2470 */ 2471 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 2472 status = ibc_get_ci_failure(0); 2473 goto mrrereghelp_fail; 2474 } 2475 2476 /* Put the updated information into the Mem Region handle */ 2477 mr->mr_bindinfo = *bind; 2478 mr->mr_logmttpgsz = mtt_pgsize_bits; 2479 2480 } else { 2481 /* 2482 * Check if the memory region MTT is shared by any other MRs. 2483 * Since the resource may be shared between multiple memory 2484 * regions (as a result of a "RegisterSharedMR()" verb) it is 2485 * important that we not unbind any resources prematurely. 2486 */ 2487 if (!HERMON_MTT_IS_SHARED(swrc_old)) { 2488 /* 2489 * Unbind the old mapping for this memory region, but 2490 * retain the ddi_dma_handle_t for reuse in the bind 2491 * operation below. Note: This can only be done here 2492 * because the region being reregistered is not 2493 * currently shared. Also if original memory region 2494 * was bound for IOMMU bypass and the new region can 2495 * not use bypass, then a new DMA handle will be 2496 * necessary. 2497 */ 2498 if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) { 2499 mr->mr_bindinfo.bi_free_dmahdl = 0; 2500 hermon_mr_mem_unbind(state, &mr->mr_bindinfo); 2501 dmahdl = mr->mr_bindinfo.bi_dmahdl; 2502 reuse_dmahdl = 1; 2503 } else { 2504 hermon_mr_mem_unbind(state, &mr->mr_bindinfo); 2505 dmahdl = NULL; 2506 reuse_dmahdl = 0; 2507 } 2508 } else { 2509 dmahdl = NULL; 2510 reuse_dmahdl = 0; 2511 } 2512 2513 /* 2514 * Bind the new memory and determine the mapped addresses. 2515 * As described, this routine and hermon_mr_fast_mtt_write() 2516 * do the majority of the work for the memory registration 2517 * operations. Note: When we successfully finish the binding, 2518 * we will set the "bi_free_dmahdl" flag to indicate that 2519 * even though we may have reused the ddi_dma_handle_t we do 2520 * wish it to be freed up at some later time. Note also that 2521 * if we fail, we may need to cleanup the ddi_dma_handle_t. 2522 */ 2523 bind->bi_bypass = bind_type; 2524 status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1); 2525 if (status != DDI_SUCCESS) { 2526 if (reuse_dmahdl) { 2527 ddi_dma_free_handle(&dmahdl); 2528 } 2529 2530 /* 2531 * Deregister will be called upon returning failure 2532 * from this routine. This will ensure that all 2533 * current resources get properly freed up. 2534 * Unnecessary to attempt to regain software ownership 2535 * of the MPT entry as that has already been done 2536 * above (in hermon_mr_reregister()). Also unnecessary 2537 * to attempt to unbind the memory. 2538 */ 2539 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2540 2541 status = IBT_INSUFF_RESOURCE; 2542 goto mrrereghelp_fail; 2543 } 2544 if (reuse_dmahdl) { 2545 bind->bi_free_dmahdl = 1; 2546 } 2547 2548 /* 2549 * Allocate the new MTT entries resource 2550 */ 2551 status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt_needed, 2552 sleep, &mtt); 2553 if (status != DDI_SUCCESS) { 2554 /* 2555 * Deregister will be called upon returning failure 2556 * from this routine. This will ensure that all 2557 * current resources get properly freed up. 2558 * Unnecessary to attempt to regain software ownership 2559 * of the MPT entry as that has already been done 2560 * above (in hermon_mr_reregister()). Also unnecessary 2561 * to attempt to unbind the memory. 2562 * 2563 * But we do need to unbind the newly bound memory 2564 * before returning. 2565 */ 2566 hermon_mr_mem_unbind(state, bind); 2567 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2568 2569 status = IBT_INSUFF_RESOURCE; 2570 goto mrrereghelp_fail; 2571 } 2572 2573 /* 2574 * Allocate MTT reference count (to track shared memory 2575 * regions). As mentioned elsewhere above, this reference 2576 * count resource may never be used on the given memory region, 2577 * but if it is ever later registered as a "shared" memory 2578 * region then this resource will be necessary. Note: This 2579 * is only necessary here if the existing memory region is 2580 * already being shared (because otherwise we already have 2581 * a useable reference count resource). 2582 */ 2583 if (HERMON_MTT_IS_SHARED(swrc_old)) { 2584 status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, 2585 sleep, &mtt_refcnt); 2586 if (status != DDI_SUCCESS) { 2587 /* 2588 * Deregister will be called upon returning 2589 * failure from this routine. This will ensure 2590 * that all current resources get properly 2591 * freed up. Unnecessary to attempt to regain 2592 * software ownership of the MPT entry as that 2593 * has already been done above (in 2594 * hermon_mr_reregister()). Also unnecessary 2595 * to attempt to unbind the memory. 2596 * 2597 * But we need to unbind the newly bound 2598 * memory and free up the newly allocated MTT 2599 * entries before returning. 2600 */ 2601 hermon_mr_mem_unbind(state, bind); 2602 hermon_rsrc_free(state, &mtt); 2603 *dereg_level = 2604 HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2605 2606 status = IBT_INSUFF_RESOURCE; 2607 goto mrrereghelp_fail; 2608 } 2609 swrc_new = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr; 2610 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_new)) 2611 HERMON_MTT_REFCNT_INIT(swrc_new); 2612 } else { 2613 mtt_refcnt = mr->mr_mttrefcntp; 2614 } 2615 2616 /* 2617 * Using the new mapping and the new MTT resources, write the 2618 * updated entries to MTT 2619 */ 2620 status = hermon_mr_fast_mtt_write(state, mtt, bind, 2621 mtt_pgsize_bits); 2622 if (status != DDI_SUCCESS) { 2623 /* 2624 * Deregister will be called upon returning failure 2625 * from this routine. This will ensure that all 2626 * current resources get properly freed up. 2627 * Unnecessary to attempt to regain software ownership 2628 * of the MPT entry as that has already been done 2629 * above (in hermon_mr_reregister()). Also unnecessary 2630 * to attempt to unbind the memory. 2631 * 2632 * But we need to unbind the newly bound memory, 2633 * free up the newly allocated MTT entries, and 2634 * (possibly) free the new MTT reference count 2635 * resource before returning. 2636 */ 2637 if (HERMON_MTT_IS_SHARED(swrc_old)) { 2638 hermon_rsrc_free(state, &mtt_refcnt); 2639 } 2640 hermon_mr_mem_unbind(state, bind); 2641 hermon_rsrc_free(state, &mtt); 2642 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2643 2644 status = IBT_INSUFF_RESOURCE; 2645 goto mrrereghelp_fail; 2646 } 2647 2648 /* 2649 * Check if the memory region MTT is shared by any other MRs. 2650 * Since the resource may be shared between multiple memory 2651 * regions (as a result of a "RegisterSharedMR()" verb) it is 2652 * important that we not free up any resources prematurely. 2653 */ 2654 if (HERMON_MTT_IS_SHARED(swrc_old)) { 2655 /* Decrement MTT reference count for "old" region */ 2656 (void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp); 2657 } else { 2658 /* Free up the old MTT entries resource */ 2659 hermon_rsrc_free(state, &mr->mr_mttrsrcp); 2660 } 2661 2662 /* Put the updated information into the mrhdl */ 2663 mr->mr_bindinfo = *bind; 2664 mr->mr_logmttpgsz = mtt_pgsize_bits; 2665 mr->mr_mttrsrcp = mtt; 2666 mr->mr_mttrefcntp = mtt_refcnt; 2667 } 2668 2669 /* 2670 * Calculate and return the updated MTT address (in the DDR address 2671 * space). This will be used by the caller (hermon_mr_reregister) in 2672 * the updated MPT entry 2673 */ 2674 *mtt_addr = mtt->hr_indx << HERMON_MTT_SIZE_SHIFT; 2675 2676 return (DDI_SUCCESS); 2677 2678 mrrereghelp_fail: 2679 return (status); 2680 } 2681 2682 2683 /* 2684 * hermon_mr_nummtt_needed() 2685 * Context: Can be called from interrupt or base context. 2686 */ 2687 /* ARGSUSED */ 2688 static uint64_t 2689 hermon_mr_nummtt_needed(hermon_state_t *state, hermon_bind_info_t *bind, 2690 uint_t *mtt_pgsize_bits) 2691 { 2692 uint64_t pg_offset_mask; 2693 uint64_t pg_offset, tmp_length; 2694 2695 /* 2696 * For now we specify the page size as 8Kb (the default page size for 2697 * the sun4u architecture), or 4Kb for x86. Figure out optimal page 2698 * size by examining the dmacookies 2699 */ 2700 *mtt_pgsize_bits = PAGESHIFT; 2701 2702 pg_offset_mask = ((uint64_t)1 << *mtt_pgsize_bits) - 1; 2703 pg_offset = bind->bi_addr & pg_offset_mask; 2704 tmp_length = pg_offset + (bind->bi_len - 1); 2705 return ((tmp_length >> *mtt_pgsize_bits) + 1); 2706 } 2707 2708 2709 /* 2710 * hermon_mr_mem_bind() 2711 * Context: Can be called from interrupt or base context. 2712 */ 2713 static int 2714 hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind, 2715 ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer) 2716 { 2717 ddi_dma_attr_t dma_attr; 2718 int (*callback)(caddr_t); 2719 int status; 2720 2721 /* bi_type must be set to a meaningful value to get a bind handle */ 2722 ASSERT(bind->bi_type == HERMON_BINDHDL_VADDR || 2723 bind->bi_type == HERMON_BINDHDL_BUF || 2724 bind->bi_type == HERMON_BINDHDL_UBUF); 2725 2726 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 2727 2728 /* Set the callback flag appropriately */ 2729 callback = (sleep == HERMON_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT; 2730 2731 /* 2732 * Initialize many of the default DMA attributes. Then, if we're 2733 * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag. 2734 */ 2735 if (dmahdl == NULL) { 2736 hermon_dma_attr_init(state, &dma_attr); 2737 #ifdef __sparc 2738 if (bind->bi_bypass == HERMON_BINDMEM_BYPASS) { 2739 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 2740 } 2741 #endif 2742 2743 /* set RO if needed - tunable set and 'is_buffer' is non-0 */ 2744 if (is_buffer) { 2745 if (! (bind->bi_flags & IBT_MR_DISABLE_RO)) { 2746 if ((bind->bi_type != HERMON_BINDHDL_UBUF) && 2747 (hermon_kernel_data_ro == 2748 HERMON_RO_ENABLED)) { 2749 dma_attr.dma_attr_flags |= 2750 DDI_DMA_RELAXED_ORDERING; 2751 } 2752 if (((bind->bi_type == HERMON_BINDHDL_UBUF) && 2753 (hermon_user_data_ro == 2754 HERMON_RO_ENABLED))) { 2755 dma_attr.dma_attr_flags |= 2756 DDI_DMA_RELAXED_ORDERING; 2757 } 2758 } 2759 } 2760 2761 /* Allocate a DMA handle for the binding */ 2762 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 2763 callback, NULL, &bind->bi_dmahdl); 2764 if (status != DDI_SUCCESS) { 2765 return (status); 2766 } 2767 bind->bi_free_dmahdl = 1; 2768 2769 } else { 2770 bind->bi_dmahdl = dmahdl; 2771 bind->bi_free_dmahdl = 0; 2772 } 2773 2774 2775 /* 2776 * Bind the memory to get the PCI mapped addresses. The decision 2777 * to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle() 2778 * is determined by the "bi_type" flag. Note: if the bind operation 2779 * fails then we have to free up the DMA handle and return error. 2780 */ 2781 if (bind->bi_type == HERMON_BINDHDL_VADDR) { 2782 status = ddi_dma_addr_bind_handle(bind->bi_dmahdl, NULL, 2783 (caddr_t)(uintptr_t)bind->bi_addr, bind->bi_len, 2784 (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback, NULL, 2785 &bind->bi_dmacookie, &bind->bi_cookiecnt); 2786 2787 } else { /* HERMON_BINDHDL_BUF or HERMON_BINDHDL_UBUF */ 2788 2789 status = ddi_dma_buf_bind_handle(bind->bi_dmahdl, 2790 bind->bi_buf, (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback, 2791 NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt); 2792 } 2793 if (status != DDI_DMA_MAPPED) { 2794 if (bind->bi_free_dmahdl != 0) { 2795 ddi_dma_free_handle(&bind->bi_dmahdl); 2796 } 2797 return (status); 2798 } 2799 2800 return (DDI_SUCCESS); 2801 } 2802 2803 2804 /* 2805 * hermon_mr_mem_unbind() 2806 * Context: Can be called from interrupt or base context. 2807 */ 2808 static void 2809 hermon_mr_mem_unbind(hermon_state_t *state, hermon_bind_info_t *bind) 2810 { 2811 int status; 2812 2813 /* 2814 * In case of HERMON_BINDHDL_UBUF, the memory bi_buf points to 2815 * is actually allocated by ddi_umem_iosetup() internally, then 2816 * it's required to free it here. Reset bi_type to HERMON_BINDHDL_NONE 2817 * not to free it again later. 2818 */ 2819 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 2820 if (bind->bi_type == HERMON_BINDHDL_UBUF) { 2821 freerbuf(bind->bi_buf); 2822 bind->bi_type = HERMON_BINDHDL_NONE; 2823 } 2824 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 2825 2826 /* 2827 * Unbind the DMA memory for the region 2828 * 2829 * Note: The only way ddi_dma_unbind_handle() currently 2830 * can return an error is if the handle passed in is invalid. 2831 * Since this should never happen, we choose to return void 2832 * from this function! If this does return an error, however, 2833 * then we print a warning message to the console. 2834 */ 2835 status = ddi_dma_unbind_handle(bind->bi_dmahdl); 2836 if (status != DDI_SUCCESS) { 2837 HERMON_WARNING(state, "failed to unbind DMA mapping"); 2838 return; 2839 } 2840 2841 /* Free up the DMA handle */ 2842 if (bind->bi_free_dmahdl != 0) { 2843 ddi_dma_free_handle(&bind->bi_dmahdl); 2844 } 2845 } 2846 2847 2848 /* 2849 * hermon_mr_fast_mtt_write() 2850 * Context: Can be called from interrupt or base context. 2851 */ 2852 static int 2853 hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt, 2854 hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits) 2855 { 2856 hermon_icm_table_t *icm_table; 2857 hermon_dma_info_t *dma_info; 2858 uint32_t index1, index2, rindx; 2859 ddi_dma_cookie_t dmacookie; 2860 uint_t cookie_cnt; 2861 uint64_t *mtt_table; 2862 uint64_t mtt_entry; 2863 uint64_t addr, endaddr; 2864 uint64_t pagesize; 2865 offset_t i, start; 2866 uint_t per_span; 2867 int sync_needed; 2868 2869 /* 2870 * XXX According to the PRM, we are to use the WRITE_MTT 2871 * command to write out MTTs. Tavor does not do this, 2872 * instead taking advantage of direct access to the MTTs, 2873 * and knowledge that Mellanox FMR relies on our ability 2874 * to write directly to the MTTs without any further 2875 * notification to the firmware. Likewise, we will choose 2876 * to not use the WRITE_MTT command, but to simply write 2877 * out the MTTs. 2878 */ 2879 2880 /* Calculate page size from the suggested value passed in */ 2881 pagesize = ((uint64_t)1 << mtt_pgsize_bits); 2882 2883 /* Walk the "cookie list" and fill in the MTT table entries */ 2884 dmacookie = bind->bi_dmacookie; 2885 cookie_cnt = bind->bi_cookiecnt; 2886 2887 icm_table = &state->hs_icm[HERMON_MTT]; 2888 rindx = mtt->hr_indx; 2889 hermon_index(index1, index2, rindx, icm_table, i); 2890 start = i; 2891 2892 per_span = icm_table->span; 2893 dma_info = icm_table->icm_dma[index1] + index2; 2894 mtt_table = (uint64_t *)(uintptr_t)dma_info->vaddr; 2895 2896 sync_needed = 0; 2897 while (cookie_cnt-- > 0) { 2898 addr = dmacookie.dmac_laddress; 2899 endaddr = addr + (dmacookie.dmac_size - 1); 2900 addr = addr & ~((uint64_t)pagesize - 1); 2901 2902 while (addr <= endaddr) { 2903 2904 /* 2905 * Fill in the mapped addresses (calculated above) and 2906 * set HERMON_MTT_ENTRY_PRESENT flag for each MTT entry. 2907 */ 2908 mtt_entry = addr | HERMON_MTT_ENTRY_PRESENT; 2909 mtt_table[i] = htonll(mtt_entry); 2910 i++; 2911 rindx++; 2912 2913 if (i == per_span) { 2914 2915 (void) ddi_dma_sync(dma_info->dma_hdl, 2916 start * sizeof (hermon_hw_mtt_t), 2917 (i - start) * sizeof (hermon_hw_mtt_t), 2918 DDI_DMA_SYNC_FORDEV); 2919 2920 if ((addr + pagesize > endaddr) && 2921 (cookie_cnt == 0)) 2922 return (DDI_SUCCESS); 2923 2924 hermon_index(index1, index2, rindx, icm_table, 2925 i); 2926 start = i * sizeof (hermon_hw_mtt_t); 2927 dma_info = icm_table->icm_dma[index1] + index2; 2928 mtt_table = 2929 (uint64_t *)(uintptr_t)dma_info->vaddr; 2930 2931 sync_needed = 0; 2932 } else { 2933 sync_needed = 1; 2934 } 2935 2936 addr += pagesize; 2937 if (addr == 0) { 2938 static int do_once = 1; 2939 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", 2940 do_once)) 2941 if (do_once) { 2942 do_once = 0; 2943 cmn_err(CE_NOTE, "probable error in " 2944 "dma_cookie address from caller\n"); 2945 } 2946 break; 2947 } 2948 } 2949 2950 /* 2951 * When we've reached the end of the current DMA cookie, 2952 * jump to the next cookie (if there are more) 2953 */ 2954 if (cookie_cnt != 0) { 2955 ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie); 2956 } 2957 } 2958 2959 /* done all the cookies, now sync the memory for the device */ 2960 if (sync_needed) 2961 (void) ddi_dma_sync(dma_info->dma_hdl, 2962 start * sizeof (hermon_hw_mtt_t), 2963 (i - start) * sizeof (hermon_hw_mtt_t), 2964 DDI_DMA_SYNC_FORDEV); 2965 2966 return (DDI_SUCCESS); 2967 } 2968 2969 /* 2970 * hermon_mr_fast_mtt_write_fmr() 2971 * Context: Can be called from interrupt or base context. 2972 */ 2973 /* ARGSUSED */ 2974 static int 2975 hermon_mr_fast_mtt_write_fmr(hermon_state_t *state, hermon_rsrc_t *mtt, 2976 ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits) 2977 { 2978 hermon_icm_table_t *icm_table; 2979 hermon_dma_info_t *dma_info; 2980 uint32_t index1, index2, rindx; 2981 uint64_t *mtt_table; 2982 offset_t i, j; 2983 uint_t per_span; 2984 2985 icm_table = &state->hs_icm[HERMON_MTT]; 2986 rindx = mtt->hr_indx; 2987 hermon_index(index1, index2, rindx, icm_table, i); 2988 per_span = icm_table->span; 2989 dma_info = icm_table->icm_dma[index1] + index2; 2990 mtt_table = (uint64_t *)(uintptr_t)dma_info->vaddr; 2991 2992 /* 2993 * Fill in the MTT table entries 2994 */ 2995 for (j = 0; j < mem_pattr->pmr_num_buf; j++) { 2996 mtt_table[i] = mem_pattr->pmr_addr_list[j].p_laddr; 2997 i++; 2998 rindx++; 2999 if (i == per_span) { 3000 hermon_index(index1, index2, rindx, icm_table, i); 3001 dma_info = icm_table->icm_dma[index1] + index2; 3002 mtt_table = (uint64_t *)(uintptr_t)dma_info->vaddr; 3003 } 3004 } 3005 3006 return (DDI_SUCCESS); 3007 } 3008 3009 3010 /* 3011 * hermon_mtt_refcnt_inc() 3012 * Context: Can be called from interrupt or base context. 3013 */ 3014 static uint_t 3015 hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc) 3016 { 3017 hermon_sw_refcnt_t *rc; 3018 3019 rc = (hermon_sw_refcnt_t *)rsrc->hr_addr; 3020 return (atomic_inc_uint_nv(&rc->swrc_refcnt)); 3021 } 3022 3023 3024 /* 3025 * hermon_mtt_refcnt_dec() 3026 * Context: Can be called from interrupt or base context. 3027 */ 3028 static uint_t 3029 hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc) 3030 { 3031 hermon_sw_refcnt_t *rc; 3032 3033 rc = (hermon_sw_refcnt_t *)rsrc->hr_addr; 3034 return (atomic_dec_uint_nv(&rc->swrc_refcnt)); 3035 } 3036