1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * tavor_mr.c 28 * Tavor Memory Region/Window Routines 29 * 30 * Implements all the routines necessary to provide the requisite memory 31 * registration verbs. These include operations like RegisterMemRegion(), 32 * DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion, 33 * etc., that affect Memory Regions. It also includes the verbs that 34 * affect Memory Windows, including AllocMemWindow(), FreeMemWindow(), 35 * and QueryMemWindow(). 36 */ 37 38 #include <sys/types.h> 39 #include <sys/conf.h> 40 #include <sys/ddi.h> 41 #include <sys/sunddi.h> 42 #include <sys/modctl.h> 43 #include <sys/esunddi.h> 44 45 #include <sys/ib/adapters/tavor/tavor.h> 46 47 48 /* 49 * Used by tavor_mr_keycalc() below to fill in the "unconstrained" portion 50 * of Tavor memory keys (LKeys and RKeys) 51 */ 52 static uint_t tavor_debug_memkey_cnt = 0x00000000; 53 54 static int tavor_mr_common_reg(tavor_state_t *state, tavor_pdhdl_t pd, 55 tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op); 56 static int tavor_mr_common_rereg(tavor_state_t *state, tavor_mrhdl_t mr, 57 tavor_pdhdl_t pd, tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl_new, 58 tavor_mr_options_t *op); 59 static int tavor_mr_rereg_xlat_helper(tavor_state_t *state, tavor_mrhdl_t mr, 60 tavor_bind_info_t *bind, tavor_mr_options_t *op, uint64_t *mtt_addr, 61 uint_t sleep, uint_t *dereg_level); 62 static uint64_t tavor_mr_nummtt_needed(tavor_state_t *state, 63 tavor_bind_info_t *bind, uint_t *mtt_pgsize); 64 static int tavor_mr_mem_bind(tavor_state_t *state, tavor_bind_info_t *bind, 65 ddi_dma_handle_t dmahdl, uint_t sleep); 66 static void tavor_mr_mem_unbind(tavor_state_t *state, 67 tavor_bind_info_t *bind); 68 static int tavor_mr_fast_mtt_write(tavor_rsrc_t *mtt, tavor_bind_info_t *bind, 69 uint32_t mtt_pgsize_bits); 70 static int tavor_mtt_refcnt_inc(tavor_rsrc_t *rsrc); 71 static int tavor_mtt_refcnt_dec(tavor_rsrc_t *rsrc); 72 73 /* 74 * The Tavor umem_lockmemory() callback ops. When userland memory is 75 * registered, these callback ops are specified. The tavor_umap_umemlock_cb() 76 * callback will be called whenever the memory for the corresponding 77 * ddi_umem_cookie_t is being freed. 78 */ 79 static struct umem_callback_ops tavor_umem_cbops = { 80 UMEM_CALLBACK_VERSION, 81 tavor_umap_umemlock_cb, 82 }; 83 84 85 /* 86 * tavor_mr_register() 87 * Context: Can be called from interrupt or base context. 88 */ 89 int 90 tavor_mr_register(tavor_state_t *state, tavor_pdhdl_t pd, 91 ibt_mr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op) 92 { 93 tavor_bind_info_t bind; 94 int status; 95 96 TAVOR_TNF_ENTER(tavor_mr_register); 97 98 /* 99 * Fill in the "bind" struct. This struct provides the majority 100 * of the information that will be used to distinguish between an 101 * "addr" binding (as is the case here) and a "buf" binding (see 102 * below). The "bind" struct is later passed to tavor_mr_mem_bind() 103 * which does most of the "heavy lifting" for the Tavor memory 104 * registration routines. 105 */ 106 bind.bi_type = TAVOR_BINDHDL_VADDR; 107 bind.bi_addr = mr_attr->mr_vaddr; 108 bind.bi_len = mr_attr->mr_len; 109 bind.bi_as = mr_attr->mr_as; 110 bind.bi_flags = mr_attr->mr_flags; 111 status = tavor_mr_common_reg(state, pd, &bind, mrhdl, op); 112 if (status != DDI_SUCCESS) { 113 TNF_PROBE_0(tavor_mr_register_cmnreg_fail, 114 TAVOR_TNF_ERROR, ""); 115 TAVOR_TNF_EXIT(tavor_mr_register); 116 return (status); 117 } 118 119 TAVOR_TNF_EXIT(tavor_mr_register); 120 return (DDI_SUCCESS); 121 } 122 123 124 /* 125 * tavor_mr_register_buf() 126 * Context: Can be called from interrupt or base context. 127 */ 128 int 129 tavor_mr_register_buf(tavor_state_t *state, tavor_pdhdl_t pd, 130 ibt_smr_attr_t *mr_attr, struct buf *buf, tavor_mrhdl_t *mrhdl, 131 tavor_mr_options_t *op) 132 { 133 tavor_bind_info_t bind; 134 int status; 135 136 TAVOR_TNF_ENTER(tavor_mr_register_buf); 137 138 /* 139 * Fill in the "bind" struct. This struct provides the majority 140 * of the information that will be used to distinguish between an 141 * "addr" binding (see above) and a "buf" binding (as is the case 142 * here). The "bind" struct is later passed to tavor_mr_mem_bind() 143 * which does most of the "heavy lifting" for the Tavor memory 144 * registration routines. Note: We have chosen to provide 145 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is 146 * not set). It is not critical what value we choose here as it need 147 * only be unique for the given RKey (which will happen by default), 148 * so the choice here is somewhat arbitrary. 149 */ 150 bind.bi_type = TAVOR_BINDHDL_BUF; 151 bind.bi_buf = buf; 152 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) { 153 bind.bi_addr = mr_attr->mr_vaddr; 154 } else { 155 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr; 156 } 157 bind.bi_as = NULL; 158 bind.bi_len = (uint64_t)buf->b_bcount; 159 bind.bi_flags = mr_attr->mr_flags; 160 status = tavor_mr_common_reg(state, pd, &bind, mrhdl, op); 161 if (status != DDI_SUCCESS) { 162 TNF_PROBE_0(tavor_mr_register_buf_cmnreg_fail, 163 TAVOR_TNF_ERROR, ""); 164 TAVOR_TNF_EXIT(tavor_mr_register_buf); 165 return (status); 166 } 167 168 TAVOR_TNF_EXIT(tavor_mr_register_buf); 169 return (DDI_SUCCESS); 170 } 171 172 173 /* 174 * tavor_mr_register_shared() 175 * Context: Can be called from interrupt or base context. 176 */ 177 int 178 tavor_mr_register_shared(tavor_state_t *state, tavor_mrhdl_t mrhdl, 179 tavor_pdhdl_t pd, ibt_smr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl_new) 180 { 181 tavor_rsrc_pool_info_t *rsrc_pool; 182 tavor_rsrc_t *mpt, *mtt, *rsrc; 183 tavor_umap_db_entry_t *umapdb; 184 tavor_hw_mpt_t mpt_entry; 185 tavor_mrhdl_t mr; 186 tavor_bind_info_t *bind; 187 ddi_umem_cookie_t umem_cookie; 188 size_t umem_len; 189 caddr_t umem_addr; 190 uint64_t mtt_addr, mtt_ddrbaseaddr, pgsize_msk; 191 uint_t sleep, mr_is_umem; 192 int status, umem_flags; 193 char *errormsg; 194 195 TAVOR_TNF_ENTER(tavor_mr_register_shared); 196 197 /* 198 * Check the sleep flag. Ensure that it is consistent with the 199 * current thread context (i.e. if we are currently in the interrupt 200 * context, then we shouldn't be attempting to sleep). 201 */ 202 sleep = (mr_attr->mr_flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP : 203 TAVOR_SLEEP; 204 if ((sleep == TAVOR_SLEEP) && 205 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 206 /* Set "status" and "errormsg" and goto failure */ 207 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 208 goto mrshared_fail; 209 } 210 211 /* Increment the reference count on the protection domain (PD) */ 212 tavor_pd_refcnt_inc(pd); 213 214 /* 215 * Allocate an MPT entry. This will be filled in with all the 216 * necessary parameters to define the shared memory region. 217 * Specifically, it will be made to reference the currently existing 218 * MTT entries and ownership of the MPT will be passed to the hardware 219 * in the last step below. If we fail here, we must undo the 220 * protection domain reference count. 221 */ 222 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 223 if (status != DDI_SUCCESS) { 224 /* Set "status" and "errormsg" and goto failure */ 225 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT"); 226 goto mrshared_fail1; 227 } 228 229 /* 230 * Allocate the software structure for tracking the shared memory 231 * region (i.e. the Tavor Memory Region handle). If we fail here, we 232 * must undo the protection domain reference count and the previous 233 * resource allocation. 234 */ 235 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 236 if (status != DDI_SUCCESS) { 237 /* Set "status" and "errormsg" and goto failure */ 238 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle"); 239 goto mrshared_fail2; 240 } 241 mr = (tavor_mrhdl_t)rsrc->tr_addr; 242 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 243 244 /* 245 * Setup and validate the memory region access flags. This means 246 * translating the IBTF's enable flags into the access flags that 247 * will be used in later operations. 248 */ 249 mr->mr_accflag = 0; 250 if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND) 251 mr->mr_accflag |= IBT_MR_WINDOW_BIND; 252 if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE) 253 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 254 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ) 255 mr->mr_accflag |= IBT_MR_REMOTE_READ; 256 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE) 257 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 258 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 259 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 260 261 /* 262 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 263 * from a certain number of "constrained" bits (the least significant 264 * bits) and some number of "unconstrained" bits. The constrained 265 * bits must be set to the index of the entry in the MPT table, but 266 * the unconstrained bits can be set to any value we wish. Note: 267 * if no remote access is required, then the RKey value is not filled 268 * in. Otherwise both Rkey and LKey are given the same value. 269 */ 270 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 271 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 272 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 273 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 274 mr->mr_rkey = mr->mr_lkey; 275 } 276 277 /* Grab the MR lock for the current memory region */ 278 mutex_enter(&mrhdl->mr_lock); 279 280 /* 281 * Check here to see if the memory region has already been partially 282 * deregistered as a result of a tavor_umap_umemlock_cb() callback. 283 * If so, this is an error, return failure. 284 */ 285 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) { 286 mutex_exit(&mrhdl->mr_lock); 287 /* Set "status" and "errormsg" and goto failure */ 288 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl"); 289 goto mrshared_fail3; 290 } 291 292 /* 293 * Determine if the original memory was from userland and, if so, pin 294 * the pages (again) with umem_lockmemory(). This will guarantee a 295 * separate callback for each of this shared region's MR handles. 296 * If this is userland memory, then allocate an entry in the 297 * "userland resources database". This will later be added to 298 * the database (after all further memory registration operations are 299 * successful). If we fail here, we must undo all the above setup. 300 */ 301 mr_is_umem = mrhdl->mr_is_umem; 302 if (mr_is_umem) { 303 umem_len = ptob(btopr(mrhdl->mr_bindinfo.bi_len + 304 ((uintptr_t)mrhdl->mr_bindinfo.bi_addr & PAGEOFFSET))); 305 umem_addr = (caddr_t)((uintptr_t)mrhdl->mr_bindinfo.bi_addr & 306 ~PAGEOFFSET); 307 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ | 308 DDI_UMEMLOCK_LONGTERM); 309 status = umem_lockmemory(umem_addr, umem_len, umem_flags, 310 &umem_cookie, &tavor_umem_cbops, NULL); 311 if (status != 0) { 312 mutex_exit(&mrhdl->mr_lock); 313 /* Set "status" and "errormsg" and goto failure */ 314 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umem pin"); 315 goto mrshared_fail3; 316 } 317 318 umapdb = tavor_umap_db_alloc(state->ts_instance, 319 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC, 320 (uint64_t)(uintptr_t)rsrc); 321 if (umapdb == NULL) { 322 mutex_exit(&mrhdl->mr_lock); 323 /* Set "status" and "errormsg" and goto failure */ 324 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add"); 325 goto mrshared_fail4; 326 } 327 } 328 329 /* 330 * Copy the MTT resource pointer (and additional parameters) from 331 * the original Tavor Memory Region handle. Note: this is normally 332 * where the tavor_mr_mem_bind() routine would be called, but because 333 * we already have bound and filled-in MTT entries it is simply a 334 * matter here of managing the MTT reference count and grabbing the 335 * address of the MTT table entries (for filling in the shared region's 336 * MPT entry). 337 */ 338 mr->mr_mttrsrcp = mrhdl->mr_mttrsrcp; 339 mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz; 340 mr->mr_bindinfo = mrhdl->mr_bindinfo; 341 mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp; 342 mutex_exit(&mrhdl->mr_lock); 343 bind = &mr->mr_bindinfo; 344 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 345 mtt = mr->mr_mttrsrcp; 346 347 /* 348 * Increment the MTT reference count (to reflect the fact that 349 * the MTT is now shared) 350 */ 351 (void) tavor_mtt_refcnt_inc(mr->mr_mttrefcntp); 352 353 /* 354 * Update the new "bind" virtual address. Do some extra work here 355 * to ensure proper alignment. That is, make sure that the page 356 * offset for the beginning of the old range is the same as the 357 * offset for this new mapping 358 */ 359 pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1); 360 bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) | 361 (mr->mr_bindinfo.bi_addr & pgsize_msk)); 362 363 /* 364 * Get the base address for the MTT table. This will be necessary 365 * in the next step when we are setting up the MPT entry. 366 */ 367 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; 368 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; 369 370 /* 371 * Fill in the MPT entry. This is the final step before passing 372 * ownership of the MPT entry to the Tavor hardware. We use all of 373 * the information collected/calculated above to fill in the 374 * requisite portions of the MPT. 375 */ 376 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 377 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE; 378 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; 379 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 380 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 381 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 382 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 383 mpt_entry.lr = 1; 384 mpt_entry.reg_win = TAVOR_MPT_IS_REGION; 385 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 386 mpt_entry.mem_key = mr->mr_lkey; 387 mpt_entry.pd = pd->pd_pdnum; 388 mpt_entry.start_addr = bind->bi_addr; 389 mpt_entry.reg_win_len = bind->bi_len; 390 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND; 391 mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT); 392 mpt_entry.mttseg_addr_h = mtt_addr >> 32; 393 mpt_entry.mttseg_addr_l = mtt_addr >> 6; 394 395 /* 396 * Write the MPT entry to hardware. Lastly, we pass ownership of 397 * the entry to the hardware. Note: in general, this operation 398 * shouldn't fail. But if it does, we have to undo everything we've 399 * done above before returning error. 400 */ 401 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 402 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 403 if (status != TAVOR_CMD_SUCCESS) { 404 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 405 status); 406 TNF_PROBE_1(tavor_mr_register_shared_sw2hw_mpt_cmd_fail, 407 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 408 /* Set "status" and "errormsg" and goto failure */ 409 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 410 "tavor SW2HW_MPT command"); 411 goto mrshared_fail5; 412 } 413 414 /* 415 * Fill in the rest of the Tavor Memory Region handle. Having 416 * successfully transferred ownership of the MPT, we can update the 417 * following fields for use in further operations on the MR. 418 */ 419 mr->mr_mptrsrcp = mpt; 420 mr->mr_mttrsrcp = mtt; 421 mr->mr_pdhdl = pd; 422 mr->mr_rsrcp = rsrc; 423 mr->mr_is_umem = mr_is_umem; 424 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL; 425 mr->mr_umem_cbfunc = NULL; 426 mr->mr_umem_cbarg1 = NULL; 427 mr->mr_umem_cbarg2 = NULL; 428 429 /* 430 * If this is userland memory, then we need to insert the previously 431 * allocated entry into the "userland resources database". This will 432 * allow for later coordination between the tavor_umap_umemlock_cb() 433 * callback and tavor_mr_deregister(). 434 */ 435 if (mr_is_umem) { 436 tavor_umap_db_add(umapdb); 437 } 438 439 *mrhdl_new = mr; 440 441 TAVOR_TNF_EXIT(tavor_mr_register_shared); 442 return (DDI_SUCCESS); 443 444 /* 445 * The following is cleanup for all possible failure cases in this routine 446 */ 447 mrshared_fail5: 448 (void) tavor_mtt_refcnt_dec(mr->mr_mttrefcntp); 449 if (mr_is_umem) { 450 tavor_umap_db_free(umapdb); 451 } 452 mrshared_fail4: 453 if (mr_is_umem) { 454 ddi_umem_unlock(umem_cookie); 455 } 456 mrshared_fail3: 457 tavor_rsrc_free(state, &rsrc); 458 mrshared_fail2: 459 tavor_rsrc_free(state, &mpt); 460 mrshared_fail1: 461 tavor_pd_refcnt_dec(pd); 462 mrshared_fail: 463 TNF_PROBE_1(tavor_mr_register_shared_fail, TAVOR_TNF_ERROR, "", 464 tnf_string, msg, errormsg); 465 TAVOR_TNF_EXIT(tavor_mr_register_shared); 466 return (status); 467 } 468 469 470 /* 471 * tavor_mr_deregister() 472 * Context: Can be called from interrupt or base context. 473 */ 474 /* ARGSUSED */ 475 int 476 tavor_mr_deregister(tavor_state_t *state, tavor_mrhdl_t *mrhdl, uint_t level, 477 uint_t sleep) 478 { 479 tavor_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; 480 tavor_umap_db_entry_t *umapdb; 481 tavor_pdhdl_t pd; 482 tavor_mrhdl_t mr; 483 tavor_bind_info_t *bind; 484 uint64_t value; 485 int status, shared_mtt; 486 char *errormsg; 487 488 TAVOR_TNF_ENTER(tavor_mr_deregister); 489 490 /* 491 * Check the sleep flag. Ensure that it is consistent with the 492 * current thread context (i.e. if we are currently in the interrupt 493 * context, then we shouldn't be attempting to sleep). 494 */ 495 if ((sleep == TAVOR_SLEEP) && 496 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 497 /* Set "status" and "errormsg" and goto failure */ 498 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sleep flags"); 499 TNF_PROBE_1(tavor_mr_deregister_fail, TAVOR_TNF_ERROR, "", 500 tnf_string, msg, errormsg); 501 TAVOR_TNF_EXIT(tavor_mr_deregister); 502 return (status); 503 } 504 505 /* 506 * Pull all the necessary information from the Tavor Memory Region 507 * handle. This is necessary here because the resource for the 508 * MR handle is going to be freed up as part of the this 509 * deregistration 510 */ 511 mr = *mrhdl; 512 mutex_enter(&mr->mr_lock); 513 mpt = mr->mr_mptrsrcp; 514 mtt = mr->mr_mttrsrcp; 515 mtt_refcnt = mr->mr_mttrefcntp; 516 rsrc = mr->mr_rsrcp; 517 pd = mr->mr_pdhdl; 518 bind = &mr->mr_bindinfo; 519 520 /* 521 * Check here to see if the memory region has already been partially 522 * deregistered as a result of the tavor_umap_umemlock_cb() callback. 523 * If so, then jump to the end and free the remaining resources. 524 */ 525 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 526 goto mrdereg_finish_cleanup; 527 } 528 529 /* 530 * We must drop the "mr_lock" here to ensure that both SLEEP and 531 * NOSLEEP calls into the firmware work as expected. Also, if two 532 * threads are attemping to access this MR (via de-register, 533 * re-register, or otherwise), then we allow the firmware to enforce 534 * the checking, that only one deregister is valid. 535 */ 536 mutex_exit(&mr->mr_lock); 537 538 /* 539 * Reclaim MPT entry from hardware (if necessary). Since the 540 * tavor_mr_deregister() routine is used in the memory region 541 * reregistration process as well, it is possible that we will 542 * not always wish to reclaim ownership of the MPT. Check the 543 * "level" arg and, if necessary, attempt to reclaim it. If 544 * the ownership transfer fails for any reason, we check to see 545 * what command status was returned from the hardware. The only 546 * "expected" error status is the one that indicates an attempt to 547 * deregister a memory region that has memory windows bound to it 548 */ 549 if (level >= TAVOR_MR_DEREG_ALL) { 550 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, 551 NULL, 0, mpt->tr_indx, sleep); 552 if (status != TAVOR_CMD_SUCCESS) { 553 if (status == TAVOR_CMD_REG_BOUND) { 554 TAVOR_TNF_EXIT(tavor_mr_deregister); 555 return (IBT_MR_IN_USE); 556 } else { 557 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command " 558 "failed: %08x\n", status); 559 TNF_PROBE_1(tavor_hw2sw_mpt_cmd_fail, 560 TAVOR_TNF_ERROR, "", tnf_uint, status, 561 status); 562 TAVOR_TNF_EXIT(tavor_mr_deregister); 563 return (IBT_INVALID_PARAM); 564 } 565 } 566 } 567 568 /* 569 * Re-grab the mr_lock here. Since further access to the protected 570 * 'mr' structure is needed, and we would have returned previously for 571 * the multiple deregistration case, we can safely grab the lock here. 572 */ 573 mutex_enter(&mr->mr_lock); 574 575 /* 576 * If the memory had come from userland, then we do a lookup in the 577 * "userland resources database". On success, we free the entry, call 578 * ddi_umem_unlock(), and continue the cleanup. On failure (which is 579 * an indication that the umem_lockmemory() callback has called 580 * tavor_mr_deregister()), we call ddi_umem_unlock() and invalidate 581 * the "mr_umemcookie" field in the MR handle (this will be used 582 * later to detect that only partial cleaup still remains to be done 583 * on the MR handle). 584 */ 585 if (mr->mr_is_umem) { 586 status = tavor_umap_db_find(state->ts_instance, 587 (uint64_t)(uintptr_t)mr->mr_umemcookie, 588 MLNX_UMAP_MRMEM_RSRC, &value, TAVOR_UMAP_DB_REMOVE, 589 &umapdb); 590 if (status == DDI_SUCCESS) { 591 tavor_umap_db_free(umapdb); 592 ddi_umem_unlock(mr->mr_umemcookie); 593 } else { 594 ddi_umem_unlock(mr->mr_umemcookie); 595 mr->mr_umemcookie = NULL; 596 } 597 } 598 599 /* mtt_refcnt is NULL in the case of tavor_dma_mr_register() */ 600 if (mtt_refcnt != NULL) { 601 /* 602 * Decrement the MTT reference count. Since the MTT resource 603 * may be shared between multiple memory regions (as a result 604 * of a "RegisterSharedMR" verb) it is important that we not 605 * free up or unbind resources prematurely. If it's not shared 606 * (as indicated by the return status), then free the resource. 607 */ 608 shared_mtt = tavor_mtt_refcnt_dec(mtt_refcnt); 609 if (!shared_mtt) { 610 tavor_rsrc_free(state, &mtt_refcnt); 611 } 612 613 /* 614 * Free up the MTT entries and unbind the memory. Here, 615 * as above, we attempt to free these resources only if 616 * it is appropriate to do so. 617 */ 618 if (!shared_mtt) { 619 if (level >= TAVOR_MR_DEREG_NO_HW2SW_MPT) { 620 tavor_mr_mem_unbind(state, bind); 621 } 622 tavor_rsrc_free(state, &mtt); 623 } 624 } 625 626 /* 627 * If the MR handle has been invalidated, then drop the 628 * lock and return success. Note: This only happens because 629 * the umem_lockmemory() callback has been triggered. The 630 * cleanup here is partial, and further cleanup (in a 631 * subsequent tavor_mr_deregister() call) will be necessary. 632 */ 633 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 634 mutex_exit(&mr->mr_lock); 635 TAVOR_TNF_EXIT(tavor_mr_deregister); 636 return (DDI_SUCCESS); 637 } 638 639 mrdereg_finish_cleanup: 640 mutex_exit(&mr->mr_lock); 641 642 /* Free the Tavor Memory Region handle */ 643 tavor_rsrc_free(state, &rsrc); 644 645 /* Free up the MPT entry resource */ 646 tavor_rsrc_free(state, &mpt); 647 648 /* Decrement the reference count on the protection domain (PD) */ 649 tavor_pd_refcnt_dec(pd); 650 651 /* Set the mrhdl pointer to NULL and return success */ 652 *mrhdl = NULL; 653 654 TAVOR_TNF_EXIT(tavor_mr_deregister); 655 return (DDI_SUCCESS); 656 } 657 658 659 /* 660 * tavor_mr_query() 661 * Context: Can be called from interrupt or base context. 662 */ 663 /* ARGSUSED */ 664 int 665 tavor_mr_query(tavor_state_t *state, tavor_mrhdl_t mr, 666 ibt_mr_query_attr_t *attr) 667 { 668 TAVOR_TNF_ENTER(tavor_mr_query); 669 670 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr)) 671 672 mutex_enter(&mr->mr_lock); 673 674 /* 675 * Check here to see if the memory region has already been partially 676 * deregistered as a result of a tavor_umap_umemlock_cb() callback. 677 * If so, this is an error, return failure. 678 */ 679 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 680 mutex_exit(&mr->mr_lock); 681 TNF_PROBE_0(tavor_mr_query_inv_mrhdl_fail, TAVOR_TNF_ERROR, ""); 682 TAVOR_TNF_EXIT(tavor_mr_query); 683 return (IBT_MR_HDL_INVALID); 684 } 685 686 /* Fill in the queried attributes */ 687 attr->mr_attr_flags = mr->mr_accflag; 688 attr->mr_pd = (ibt_pd_hdl_t)mr->mr_pdhdl; 689 690 /* Fill in the "local" attributes */ 691 attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey; 692 attr->mr_lbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr; 693 attr->mr_lbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len; 694 695 /* 696 * Fill in the "remote" attributes (if necessary). Note: the 697 * remote attributes are only valid if the memory region has one 698 * or more of the remote access flags set. 699 */ 700 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 701 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 702 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 703 attr->mr_rkey = (ibt_rkey_t)mr->mr_rkey; 704 attr->mr_rbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr; 705 attr->mr_rbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len; 706 } 707 708 /* 709 * If region is mapped for streaming (i.e. noncoherent), then set sync 710 * is required 711 */ 712 attr->mr_sync_required = (mr->mr_bindinfo.bi_flags & 713 IBT_MR_NONCOHERENT) ? B_TRUE : B_FALSE; 714 715 mutex_exit(&mr->mr_lock); 716 TAVOR_TNF_EXIT(tavor_mr_query); 717 return (DDI_SUCCESS); 718 } 719 720 721 /* 722 * tavor_mr_reregister() 723 * Context: Can be called from interrupt or base context. 724 */ 725 int 726 tavor_mr_reregister(tavor_state_t *state, tavor_mrhdl_t mr, 727 tavor_pdhdl_t pd, ibt_mr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl_new, 728 tavor_mr_options_t *op) 729 { 730 tavor_bind_info_t bind; 731 int status; 732 733 TAVOR_TNF_ENTER(tavor_mr_reregister); 734 735 /* 736 * Fill in the "bind" struct. This struct provides the majority 737 * of the information that will be used to distinguish between an 738 * "addr" binding (as is the case here) and a "buf" binding (see 739 * below). The "bind" struct is later passed to tavor_mr_mem_bind() 740 * which does most of the "heavy lifting" for the Tavor memory 741 * registration (and reregistration) routines. 742 */ 743 bind.bi_type = TAVOR_BINDHDL_VADDR; 744 bind.bi_addr = mr_attr->mr_vaddr; 745 bind.bi_len = mr_attr->mr_len; 746 bind.bi_as = mr_attr->mr_as; 747 bind.bi_flags = mr_attr->mr_flags; 748 status = tavor_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op); 749 if (status != DDI_SUCCESS) { 750 TNF_PROBE_0(tavor_mr_reregister_cmnreg_fail, 751 TAVOR_TNF_ERROR, ""); 752 TAVOR_TNF_EXIT(tavor_mr_reregister); 753 return (status); 754 } 755 756 TAVOR_TNF_EXIT(tavor_mr_reregister); 757 return (DDI_SUCCESS); 758 } 759 760 761 /* 762 * tavor_mr_reregister_buf() 763 * Context: Can be called from interrupt or base context. 764 */ 765 int 766 tavor_mr_reregister_buf(tavor_state_t *state, tavor_mrhdl_t mr, 767 tavor_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf, 768 tavor_mrhdl_t *mrhdl_new, tavor_mr_options_t *op) 769 { 770 tavor_bind_info_t bind; 771 int status; 772 773 TAVOR_TNF_ENTER(tavor_mr_reregister_buf); 774 775 /* 776 * Fill in the "bind" struct. This struct provides the majority 777 * of the information that will be used to distinguish between an 778 * "addr" binding (see above) and a "buf" binding (as is the case 779 * here). The "bind" struct is later passed to tavor_mr_mem_bind() 780 * which does most of the "heavy lifting" for the Tavor memory 781 * registration routines. Note: We have chosen to provide 782 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is 783 * not set). It is not critical what value we choose here as it need 784 * only be unique for the given RKey (which will happen by default), 785 * so the choice here is somewhat arbitrary. 786 */ 787 bind.bi_type = TAVOR_BINDHDL_BUF; 788 bind.bi_buf = buf; 789 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) { 790 bind.bi_addr = mr_attr->mr_vaddr; 791 } else { 792 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr; 793 } 794 bind.bi_len = (uint64_t)buf->b_bcount; 795 bind.bi_flags = mr_attr->mr_flags; 796 bind.bi_as = NULL; 797 status = tavor_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op); 798 if (status != DDI_SUCCESS) { 799 TNF_PROBE_0(tavor_mr_reregister_buf_cmnreg_fail, 800 TAVOR_TNF_ERROR, ""); 801 TAVOR_TNF_EXIT(tavor_mr_reregister_buf); 802 return (status); 803 } 804 805 TAVOR_TNF_EXIT(tavor_mr_reregister_buf); 806 return (DDI_SUCCESS); 807 } 808 809 810 /* 811 * tavor_mr_sync() 812 * Context: Can be called from interrupt or base context. 813 */ 814 /* ARGSUSED */ 815 int 816 tavor_mr_sync(tavor_state_t *state, ibt_mr_sync_t *mr_segs, size_t num_segs) 817 { 818 tavor_mrhdl_t mrhdl; 819 uint64_t seg_vaddr, seg_len, seg_end; 820 uint64_t mr_start, mr_end; 821 uint_t type; 822 int status, i; 823 char *errormsg; 824 825 TAVOR_TNF_ENTER(tavor_mr_sync); 826 827 /* Process each of the ibt_mr_sync_t's */ 828 for (i = 0; i < num_segs; i++) { 829 mrhdl = (tavor_mrhdl_t)mr_segs[i].ms_handle; 830 831 /* Check for valid memory region handle */ 832 if (mrhdl == NULL) { 833 /* Set "status" and "errormsg" and goto failure */ 834 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl"); 835 goto mrsync_fail; 836 } 837 838 mutex_enter(&mrhdl->mr_lock); 839 840 /* 841 * Check here to see if the memory region has already been 842 * partially deregistered as a result of a 843 * tavor_umap_umemlock_cb() callback. If so, this is an 844 * error, return failure. 845 */ 846 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) { 847 mutex_exit(&mrhdl->mr_lock); 848 /* Set "status" and "errormsg" and goto failure */ 849 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl2"); 850 goto mrsync_fail; 851 } 852 853 /* Check for valid bounds on sync request */ 854 seg_vaddr = mr_segs[i].ms_vaddr; 855 seg_len = mr_segs[i].ms_len; 856 seg_end = seg_vaddr + seg_len - 1; 857 mr_start = mrhdl->mr_bindinfo.bi_addr; 858 mr_end = mr_start + mrhdl->mr_bindinfo.bi_len - 1; 859 if ((seg_vaddr < mr_start) || (seg_vaddr > mr_end)) { 860 mutex_exit(&mrhdl->mr_lock); 861 /* Set "status" and "errormsg" and goto failure */ 862 TAVOR_TNF_FAIL(IBT_MR_VA_INVALID, "invalid vaddr"); 863 goto mrsync_fail; 864 } 865 if ((seg_end < mr_start) || (seg_end > mr_end)) { 866 mutex_exit(&mrhdl->mr_lock); 867 /* Set "status" and "errormsg" and goto failure */ 868 TAVOR_TNF_FAIL(IBT_MR_LEN_INVALID, "invalid length"); 869 goto mrsync_fail; 870 } 871 872 /* Determine what type (i.e. direction) for sync */ 873 if (mr_segs[i].ms_flags & IBT_SYNC_READ) { 874 type = DDI_DMA_SYNC_FORDEV; 875 } else if (mr_segs[i].ms_flags & IBT_SYNC_WRITE) { 876 type = DDI_DMA_SYNC_FORCPU; 877 } else { 878 mutex_exit(&mrhdl->mr_lock); 879 /* Set "status" and "errormsg" and goto failure */ 880 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sync type"); 881 goto mrsync_fail; 882 } 883 884 (void) ddi_dma_sync(mrhdl->mr_bindinfo.bi_dmahdl, 885 (off_t)(seg_vaddr - mr_start), (size_t)seg_len, type); 886 mutex_exit(&mrhdl->mr_lock); 887 } 888 889 TAVOR_TNF_EXIT(tavor_mr_sync); 890 return (DDI_SUCCESS); 891 892 mrsync_fail: 893 TNF_PROBE_1(tavor_mr_sync_fail, TAVOR_TNF_ERROR, "", tnf_string, msg, 894 errormsg); 895 TAVOR_TNF_EXIT(tavor_mr_sync); 896 return (status); 897 } 898 899 900 /* 901 * tavor_mw_alloc() 902 * Context: Can be called from interrupt or base context. 903 */ 904 int 905 tavor_mw_alloc(tavor_state_t *state, tavor_pdhdl_t pd, ibt_mw_flags_t flags, 906 tavor_mwhdl_t *mwhdl) 907 { 908 tavor_rsrc_t *mpt, *rsrc; 909 tavor_hw_mpt_t mpt_entry; 910 tavor_mwhdl_t mw; 911 uint_t sleep; 912 int status; 913 char *errormsg; 914 915 TAVOR_TNF_ENTER(tavor_mw_alloc); 916 917 /* 918 * Check the sleep flag. Ensure that it is consistent with the 919 * current thread context (i.e. if we are currently in the interrupt 920 * context, then we shouldn't be attempting to sleep). 921 */ 922 sleep = (flags & IBT_MW_NOSLEEP) ? TAVOR_NOSLEEP : TAVOR_SLEEP; 923 if ((sleep == TAVOR_SLEEP) && 924 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 925 /* Set "status" and "errormsg" and goto failure */ 926 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 927 goto mwalloc_fail; 928 } 929 930 /* Increment the reference count on the protection domain (PD) */ 931 tavor_pd_refcnt_inc(pd); 932 933 /* 934 * Allocate an MPT entry (for use as a memory window). Since the 935 * Tavor hardware uses the MPT entry for memory regions and for 936 * memory windows, we will fill in this MPT with all the necessary 937 * parameters for the memory window. And then (just as we do for 938 * memory regions) ownership will be passed to the hardware in the 939 * final step below. If we fail here, we must undo the protection 940 * domain reference count. 941 */ 942 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 943 if (status != DDI_SUCCESS) { 944 /* Set "status" and "errormsg" and goto failure */ 945 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT"); 946 goto mwalloc_fail1; 947 } 948 949 /* 950 * Allocate the software structure for tracking the memory window (i.e. 951 * the Tavor Memory Window handle). Note: This is actually the same 952 * software structure used for tracking memory regions, but since many 953 * of the same properties are needed, only a single structure is 954 * necessary. If we fail here, we must undo the protection domain 955 * reference count and the previous resource allocation. 956 */ 957 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 958 if (status != DDI_SUCCESS) { 959 /* Set "status" and "errormsg" and goto failure */ 960 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle"); 961 goto mwalloc_fail2; 962 } 963 mw = (tavor_mwhdl_t)rsrc->tr_addr; 964 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw)) 965 966 /* 967 * Calculate an "unbound" RKey from MPT index. In much the same way 968 * as we do for memory regions (above), this key is constructed from 969 * a "constrained" (which depends on the MPT index) and an 970 * "unconstrained" portion (which may be arbitrarily chosen). 971 */ 972 tavor_mr_keycalc(state, mpt->tr_indx, &mw->mr_rkey); 973 974 /* 975 * Fill in the MPT entry. This is the final step before passing 976 * ownership of the MPT entry to the Tavor hardware. We use all of 977 * the information collected/calculated above to fill in the 978 * requisite portions of the MPT. Note: fewer entries in the MPT 979 * entry are necessary to allocate a memory window. 980 */ 981 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 982 mpt_entry.reg_win = TAVOR_MPT_IS_WINDOW; 983 mpt_entry.mem_key = mw->mr_rkey; 984 mpt_entry.pd = pd->pd_pdnum; 985 986 /* 987 * Write the MPT entry to hardware. Lastly, we pass ownership of 988 * the entry to the hardware. Note: in general, this operation 989 * shouldn't fail. But if it does, we have to undo everything we've 990 * done above before returning error. 991 */ 992 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 993 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 994 if (status != TAVOR_CMD_SUCCESS) { 995 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 996 status); 997 TNF_PROBE_1(tavor_mw_alloc_sw2hw_mpt_cmd_fail, 998 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 999 /* Set "status" and "errormsg" and goto failure */ 1000 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 1001 "tavor SW2HW_MPT command"); 1002 goto mwalloc_fail3; 1003 } 1004 1005 /* 1006 * Fill in the rest of the Tavor Memory Window handle. Having 1007 * successfully transferred ownership of the MPT, we can update the 1008 * following fields for use in further operations on the MW. 1009 */ 1010 mw->mr_mptrsrcp = mpt; 1011 mw->mr_pdhdl = pd; 1012 mw->mr_rsrcp = rsrc; 1013 *mwhdl = mw; 1014 1015 TAVOR_TNF_EXIT(tavor_mw_alloc); 1016 return (DDI_SUCCESS); 1017 1018 mwalloc_fail3: 1019 tavor_rsrc_free(state, &rsrc); 1020 mwalloc_fail2: 1021 tavor_rsrc_free(state, &mpt); 1022 mwalloc_fail1: 1023 tavor_pd_refcnt_dec(pd); 1024 mwalloc_fail: 1025 TNF_PROBE_1(tavor_mw_alloc_fail, TAVOR_TNF_ERROR, "", 1026 tnf_string, msg, errormsg); 1027 TAVOR_TNF_EXIT(tavor_mw_alloc); 1028 return (status); 1029 } 1030 1031 1032 /* 1033 * tavor_mw_free() 1034 * Context: Can be called from interrupt or base context. 1035 */ 1036 int 1037 tavor_mw_free(tavor_state_t *state, tavor_mwhdl_t *mwhdl, uint_t sleep) 1038 { 1039 tavor_rsrc_t *mpt, *rsrc; 1040 tavor_mwhdl_t mw; 1041 int status; 1042 char *errormsg; 1043 tavor_pdhdl_t pd; 1044 1045 TAVOR_TNF_ENTER(tavor_mw_free); 1046 1047 /* 1048 * Check the sleep flag. Ensure that it is consistent with the 1049 * current thread context (i.e. if we are currently in the interrupt 1050 * context, then we shouldn't be attempting to sleep). 1051 */ 1052 if ((sleep == TAVOR_SLEEP) && 1053 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1054 /* Set "status" and "errormsg" and goto failure */ 1055 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sleep flags"); 1056 TNF_PROBE_1(tavor_mw_free_fail, TAVOR_TNF_ERROR, "", 1057 tnf_string, msg, errormsg); 1058 TAVOR_TNF_EXIT(tavor_mw_free); 1059 return (status); 1060 } 1061 1062 /* 1063 * Pull all the necessary information from the Tavor Memory Window 1064 * handle. This is necessary here because the resource for the 1065 * MW handle is going to be freed up as part of the this operation. 1066 */ 1067 mw = *mwhdl; 1068 mutex_enter(&mw->mr_lock); 1069 mpt = mw->mr_mptrsrcp; 1070 rsrc = mw->mr_rsrcp; 1071 pd = mw->mr_pdhdl; 1072 mutex_exit(&mw->mr_lock); 1073 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw)) 1074 1075 /* 1076 * Reclaim the MPT entry from hardware. Note: in general, it is 1077 * unexpected for this operation to return an error. 1078 */ 1079 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL, 1080 0, mpt->tr_indx, sleep); 1081 if (status != TAVOR_CMD_SUCCESS) { 1082 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command failed: %08x\n", 1083 status); 1084 TNF_PROBE_1(tavor_hw2sw_mpt_cmd_fail, TAVOR_TNF_ERROR, "", 1085 tnf_uint, status, status); 1086 TAVOR_TNF_EXIT(tavor_mw_free); 1087 return (IBT_INVALID_PARAM); 1088 } 1089 1090 /* Free the Tavor Memory Window handle */ 1091 tavor_rsrc_free(state, &rsrc); 1092 1093 /* Free up the MPT entry resource */ 1094 tavor_rsrc_free(state, &mpt); 1095 1096 /* Decrement the reference count on the protection domain (PD) */ 1097 tavor_pd_refcnt_dec(pd); 1098 1099 /* Set the mwhdl pointer to NULL and return success */ 1100 *mwhdl = NULL; 1101 1102 TAVOR_TNF_EXIT(tavor_mw_free); 1103 return (DDI_SUCCESS); 1104 } 1105 1106 1107 /* 1108 * tavor_mr_keycalc() 1109 * Context: Can be called from interrupt or base context. 1110 */ 1111 void 1112 tavor_mr_keycalc(tavor_state_t *state, uint32_t indx, uint32_t *key) 1113 { 1114 uint32_t tmp, log_num_mpt; 1115 1116 /* 1117 * Generate a simple key from counter. Note: We increment this 1118 * static variable _intentionally_ without any kind of mutex around 1119 * it. First, single-threading all operations through a single lock 1120 * would be a bad idea (from a performance point-of-view). Second, 1121 * the upper "unconstrained" bits don't really have to be unique 1122 * because the lower bits are guaranteed to be (although we do make a 1123 * best effort to ensure that they are). Third, the window for the 1124 * race (where both threads read and update the counter at the same 1125 * time) is incredibly small. 1126 * And, lastly, we'd like to make this into a "random" key XXX 1127 */ 1128 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(tavor_debug_memkey_cnt)) 1129 log_num_mpt = state->ts_cfg_profile->cp_log_num_mpt; 1130 tmp = (tavor_debug_memkey_cnt++) << log_num_mpt; 1131 *key = tmp | indx; 1132 } 1133 1134 1135 /* 1136 * tavor_mr_common_reg() 1137 * Context: Can be called from interrupt or base context. 1138 */ 1139 static int 1140 tavor_mr_common_reg(tavor_state_t *state, tavor_pdhdl_t pd, 1141 tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op) 1142 { 1143 tavor_rsrc_pool_info_t *rsrc_pool; 1144 tavor_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; 1145 tavor_umap_db_entry_t *umapdb; 1146 tavor_sw_refcnt_t *swrc_tmp; 1147 tavor_hw_mpt_t mpt_entry; 1148 tavor_mrhdl_t mr; 1149 ibt_mr_flags_t flags; 1150 tavor_bind_info_t *bh; 1151 ddi_dma_handle_t bind_dmahdl; 1152 ddi_umem_cookie_t umem_cookie; 1153 size_t umem_len; 1154 caddr_t umem_addr; 1155 uint64_t mtt_addr, mtt_ddrbaseaddr, max_sz; 1156 uint_t sleep, mtt_pgsize_bits, bind_type, mr_is_umem; 1157 int status, umem_flags, bind_override_addr; 1158 char *errormsg; 1159 1160 TAVOR_TNF_ENTER(tavor_mr_common_reg); 1161 1162 /* 1163 * Check the "options" flag. Currently this flag tells the driver 1164 * whether or not the region should be bound normally (i.e. with 1165 * entries written into the PCI IOMMU), whether it should be 1166 * registered to bypass the IOMMU, and whether or not the resulting 1167 * address should be "zero-based" (to aid the alignment restrictions 1168 * for QPs). 1169 */ 1170 if (op == NULL) { 1171 bind_type = TAVOR_BINDMEM_NORMAL; 1172 bind_dmahdl = NULL; 1173 bind_override_addr = 0; 1174 } else { 1175 bind_type = op->mro_bind_type; 1176 bind_dmahdl = op->mro_bind_dmahdl; 1177 bind_override_addr = op->mro_bind_override_addr; 1178 } 1179 1180 /* Extract the flags field from the tavor_bind_info_t */ 1181 flags = bind->bi_flags; 1182 1183 /* 1184 * Check for invalid length. Check is the length is zero or if the 1185 * length is larger than the maximum configured value. Return error 1186 * if it is. 1187 */ 1188 max_sz = ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_mrw_sz); 1189 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) { 1190 /* Set "status" and "errormsg" and goto failure */ 1191 TAVOR_TNF_FAIL(IBT_MR_LEN_INVALID, "invalid length"); 1192 goto mrcommon_fail; 1193 } 1194 1195 /* 1196 * Check the sleep flag. Ensure that it is consistent with the 1197 * current thread context (i.e. if we are currently in the interrupt 1198 * context, then we shouldn't be attempting to sleep). 1199 */ 1200 sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP; 1201 if ((sleep == TAVOR_SLEEP) && 1202 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1203 /* Set "status" and "errormsg" and goto failure */ 1204 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 1205 goto mrcommon_fail; 1206 } 1207 1208 /* 1209 * Get the base address for the MTT table. This will be necessary 1210 * below when we are setting up the MPT entry. 1211 */ 1212 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; 1213 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; 1214 1215 /* Increment the reference count on the protection domain (PD) */ 1216 tavor_pd_refcnt_inc(pd); 1217 1218 /* 1219 * Allocate an MPT entry. This will be filled in with all the 1220 * necessary parameters to define the memory region. And then 1221 * ownership will be passed to the hardware in the final step 1222 * below. If we fail here, we must undo the protection domain 1223 * reference count. 1224 */ 1225 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 1226 if (status != DDI_SUCCESS) { 1227 /* Set "status" and "errormsg" and goto failure */ 1228 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT"); 1229 goto mrcommon_fail1; 1230 } 1231 1232 /* 1233 * Allocate the software structure for tracking the memory region (i.e. 1234 * the Tavor Memory Region handle). If we fail here, we must undo 1235 * the protection domain reference count and the previous resource 1236 * allocation. 1237 */ 1238 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 1239 if (status != DDI_SUCCESS) { 1240 /* Set "status" and "errormsg" and goto failure */ 1241 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle"); 1242 goto mrcommon_fail2; 1243 } 1244 mr = (tavor_mrhdl_t)rsrc->tr_addr; 1245 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 1246 1247 /* 1248 * Setup and validate the memory region access flags. This means 1249 * translating the IBTF's enable flags into the access flags that 1250 * will be used in later operations. 1251 */ 1252 mr->mr_accflag = 0; 1253 if (flags & IBT_MR_ENABLE_WINDOW_BIND) 1254 mr->mr_accflag |= IBT_MR_WINDOW_BIND; 1255 if (flags & IBT_MR_ENABLE_LOCAL_WRITE) 1256 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 1257 if (flags & IBT_MR_ENABLE_REMOTE_READ) 1258 mr->mr_accflag |= IBT_MR_REMOTE_READ; 1259 if (flags & IBT_MR_ENABLE_REMOTE_WRITE) 1260 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 1261 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 1262 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 1263 1264 /* 1265 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 1266 * from a certain number of "constrained" bits (the least significant 1267 * bits) and some number of "unconstrained" bits. The constrained 1268 * bits must be set to the index of the entry in the MPT table, but 1269 * the unconstrained bits can be set to any value we wish. Note: 1270 * if no remote access is required, then the RKey value is not filled 1271 * in. Otherwise both Rkey and LKey are given the same value. 1272 */ 1273 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 1274 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 1275 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 1276 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 1277 mr->mr_rkey = mr->mr_lkey; 1278 } 1279 1280 /* 1281 * Determine if the memory is from userland and pin the pages 1282 * with umem_lockmemory() if necessary. 1283 * Then, if this is userland memory, allocate an entry in the 1284 * "userland resources database". This will later be added to 1285 * the database (after all further memory registration operations are 1286 * successful). If we fail here, we must undo the reference counts 1287 * and the previous resource allocations. 1288 */ 1289 mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0); 1290 if (mr_is_umem) { 1291 umem_len = ptob(btopr(bind->bi_len + 1292 ((uintptr_t)bind->bi_addr & PAGEOFFSET))); 1293 umem_addr = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET); 1294 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ | 1295 DDI_UMEMLOCK_LONGTERM); 1296 status = umem_lockmemory(umem_addr, umem_len, umem_flags, 1297 &umem_cookie, &tavor_umem_cbops, NULL); 1298 if (status != 0) { 1299 /* Set "status" and "errormsg" and goto failure */ 1300 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umem pin"); 1301 goto mrcommon_fail3; 1302 } 1303 1304 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 1305 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind->bi_buf)) 1306 1307 bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len, 1308 B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP); 1309 if (bind->bi_buf == NULL) { 1310 /* Set "status" and "errormsg" and goto failure */ 1311 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed iosetup"); 1312 goto mrcommon_fail3; 1313 } 1314 bind->bi_type = TAVOR_BINDHDL_UBUF; 1315 bind->bi_buf->b_flags |= B_READ; 1316 1317 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind->bi_buf)) 1318 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 1319 1320 umapdb = tavor_umap_db_alloc(state->ts_instance, 1321 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC, 1322 (uint64_t)(uintptr_t)rsrc); 1323 if (umapdb == NULL) { 1324 /* Set "status" and "errormsg" and goto failure */ 1325 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add"); 1326 goto mrcommon_fail4; 1327 } 1328 } 1329 1330 /* 1331 * Setup the bindinfo for the mtt bind call 1332 */ 1333 bh = &mr->mr_bindinfo; 1334 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bh)) 1335 bcopy(bind, bh, sizeof (tavor_bind_info_t)); 1336 bh->bi_bypass = bind_type; 1337 status = tavor_mr_mtt_bind(state, bh, bind_dmahdl, &mtt, 1338 &mtt_pgsize_bits); 1339 if (status != DDI_SUCCESS) { 1340 /* Set "status" and "errormsg" and goto failure */ 1341 TAVOR_TNF_FAIL(status, "failed mtt bind"); 1342 /* 1343 * When mtt_bind fails, freerbuf has already been done, 1344 * so make sure not to call it again. 1345 */ 1346 bind->bi_type = bh->bi_type; 1347 goto mrcommon_fail5; 1348 } 1349 mr->mr_logmttpgsz = mtt_pgsize_bits; 1350 1351 /* 1352 * Allocate MTT reference count (to track shared memory regions). 1353 * This reference count resource may never be used on the given 1354 * memory region, but if it is ever later registered as "shared" 1355 * memory region then this resource will be necessary. If we fail 1356 * here, we do pretty much the same as above to clean up. 1357 */ 1358 status = tavor_rsrc_alloc(state, TAVOR_REFCNT, 1, sleep, 1359 &mtt_refcnt); 1360 if (status != DDI_SUCCESS) { 1361 /* Set "status" and "errormsg" and goto failure */ 1362 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed refence count"); 1363 goto mrcommon_fail6; 1364 } 1365 mr->mr_mttrefcntp = mtt_refcnt; 1366 swrc_tmp = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr; 1367 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp)) 1368 TAVOR_MTT_REFCNT_INIT(swrc_tmp); 1369 1370 /* 1371 * Fill in the MPT entry. This is the final step before passing 1372 * ownership of the MPT entry to the Tavor hardware. We use all of 1373 * the information collected/calculated above to fill in the 1374 * requisite portions of the MPT. 1375 */ 1376 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 1377 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE; 1378 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; 1379 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 1380 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 1381 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 1382 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 1383 mpt_entry.lr = 1; 1384 mpt_entry.reg_win = TAVOR_MPT_IS_REGION; 1385 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 1386 mpt_entry.mem_key = mr->mr_lkey; 1387 mpt_entry.pd = pd->pd_pdnum; 1388 if (bind_override_addr == 0) { 1389 mpt_entry.start_addr = bh->bi_addr; 1390 } else { 1391 bh->bi_addr = bh->bi_addr & ((1 << mr->mr_logmttpgsz) - 1); 1392 mpt_entry.start_addr = bh->bi_addr; 1393 } 1394 mpt_entry.reg_win_len = bh->bi_len; 1395 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND; 1396 mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT); 1397 mpt_entry.mttseg_addr_h = mtt_addr >> 32; 1398 mpt_entry.mttseg_addr_l = mtt_addr >> 6; 1399 1400 /* 1401 * Write the MPT entry to hardware. Lastly, we pass ownership of 1402 * the entry to the hardware. Note: in general, this operation 1403 * shouldn't fail. But if it does, we have to undo everything we've 1404 * done above before returning error. 1405 */ 1406 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 1407 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 1408 if (status != TAVOR_CMD_SUCCESS) { 1409 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 1410 status); 1411 TNF_PROBE_1(tavor_mr_common_reg_sw2hw_mpt_cmd_fail, 1412 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 1413 /* Set "status" and "errormsg" and goto failure */ 1414 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 1415 "tavor SW2HW_MPT command"); 1416 goto mrcommon_fail7; 1417 } 1418 1419 /* 1420 * Fill in the rest of the Tavor Memory Region handle. Having 1421 * successfully transferred ownership of the MPT, we can update the 1422 * following fields for use in further operations on the MR. 1423 */ 1424 mr->mr_mptrsrcp = mpt; 1425 mr->mr_mttrsrcp = mtt; 1426 mr->mr_pdhdl = pd; 1427 mr->mr_rsrcp = rsrc; 1428 mr->mr_is_umem = mr_is_umem; 1429 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL; 1430 mr->mr_umem_cbfunc = NULL; 1431 mr->mr_umem_cbarg1 = NULL; 1432 mr->mr_umem_cbarg2 = NULL; 1433 1434 /* 1435 * If this is userland memory, then we need to insert the previously 1436 * allocated entry into the "userland resources database". This will 1437 * allow for later coordination between the tavor_umap_umemlock_cb() 1438 * callback and tavor_mr_deregister(). 1439 */ 1440 if (mr_is_umem) { 1441 tavor_umap_db_add(umapdb); 1442 } 1443 1444 *mrhdl = mr; 1445 1446 TAVOR_TNF_EXIT(tavor_mr_common_reg); 1447 return (DDI_SUCCESS); 1448 1449 /* 1450 * The following is cleanup for all possible failure cases in this routine 1451 */ 1452 mrcommon_fail7: 1453 tavor_rsrc_free(state, &mtt_refcnt); 1454 mrcommon_fail6: 1455 tavor_rsrc_free(state, &mtt); 1456 tavor_mr_mem_unbind(state, bh); 1457 bind->bi_type = bh->bi_type; 1458 mrcommon_fail5: 1459 if (mr_is_umem) { 1460 tavor_umap_db_free(umapdb); 1461 } 1462 mrcommon_fail4: 1463 if (mr_is_umem) { 1464 /* 1465 * Free up the memory ddi_umem_iosetup() allocates 1466 * internally. 1467 */ 1468 if (bind->bi_type == TAVOR_BINDHDL_UBUF) { 1469 freerbuf(bind->bi_buf); 1470 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 1471 bind->bi_type = TAVOR_BINDHDL_NONE; 1472 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 1473 } 1474 ddi_umem_unlock(umem_cookie); 1475 } 1476 mrcommon_fail3: 1477 tavor_rsrc_free(state, &rsrc); 1478 mrcommon_fail2: 1479 tavor_rsrc_free(state, &mpt); 1480 mrcommon_fail1: 1481 tavor_pd_refcnt_dec(pd); 1482 mrcommon_fail: 1483 TNF_PROBE_1(tavor_mr_common_reg_fail, TAVOR_TNF_ERROR, "", 1484 tnf_string, msg, errormsg); 1485 TAVOR_TNF_EXIT(tavor_mr_common_reg); 1486 return (status); 1487 } 1488 1489 int 1490 tavor_dma_mr_register(tavor_state_t *state, tavor_pdhdl_t pd, 1491 ibt_dmr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl) 1492 { 1493 tavor_rsrc_t *mpt, *rsrc; 1494 tavor_hw_mpt_t mpt_entry; 1495 tavor_mrhdl_t mr; 1496 ibt_mr_flags_t flags; 1497 uint_t sleep; 1498 int status; 1499 1500 /* Extract the flags field */ 1501 flags = mr_attr->dmr_flags; 1502 1503 /* 1504 * Check the sleep flag. Ensure that it is consistent with the 1505 * current thread context (i.e. if we are currently in the interrupt 1506 * context, then we shouldn't be attempting to sleep). 1507 */ 1508 sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP; 1509 if ((sleep == TAVOR_SLEEP) && 1510 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1511 status = IBT_INVALID_PARAM; 1512 goto mrcommon_fail; 1513 } 1514 1515 /* Increment the reference count on the protection domain (PD) */ 1516 tavor_pd_refcnt_inc(pd); 1517 1518 /* 1519 * Allocate an MPT entry. This will be filled in with all the 1520 * necessary parameters to define the memory region. And then 1521 * ownership will be passed to the hardware in the final step 1522 * below. If we fail here, we must undo the protection domain 1523 * reference count. 1524 */ 1525 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 1526 if (status != DDI_SUCCESS) { 1527 status = IBT_INSUFF_RESOURCE; 1528 goto mrcommon_fail1; 1529 } 1530 1531 /* 1532 * Allocate the software structure for tracking the memory region (i.e. 1533 * the Tavor Memory Region handle). If we fail here, we must undo 1534 * the protection domain reference count and the previous resource 1535 * allocation. 1536 */ 1537 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 1538 if (status != DDI_SUCCESS) { 1539 status = IBT_INSUFF_RESOURCE; 1540 goto mrcommon_fail2; 1541 } 1542 mr = (tavor_mrhdl_t)rsrc->tr_addr; 1543 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 1544 bzero(mr, sizeof (*mr)); 1545 1546 /* 1547 * Setup and validate the memory region access flags. This means 1548 * translating the IBTF's enable flags into the access flags that 1549 * will be used in later operations. 1550 */ 1551 mr->mr_accflag = 0; 1552 if (flags & IBT_MR_ENABLE_WINDOW_BIND) 1553 mr->mr_accflag |= IBT_MR_WINDOW_BIND; 1554 if (flags & IBT_MR_ENABLE_LOCAL_WRITE) 1555 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 1556 if (flags & IBT_MR_ENABLE_REMOTE_READ) 1557 mr->mr_accflag |= IBT_MR_REMOTE_READ; 1558 if (flags & IBT_MR_ENABLE_REMOTE_WRITE) 1559 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 1560 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 1561 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 1562 1563 /* 1564 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 1565 * from a certain number of "constrained" bits (the least significant 1566 * bits) and some number of "unconstrained" bits. The constrained 1567 * bits must be set to the index of the entry in the MPT table, but 1568 * the unconstrained bits can be set to any value we wish. Note: 1569 * if no remote access is required, then the RKey value is not filled 1570 * in. Otherwise both Rkey and LKey are given the same value. 1571 */ 1572 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 1573 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 1574 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 1575 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 1576 mr->mr_rkey = mr->mr_lkey; 1577 } 1578 1579 /* 1580 * Fill in the MPT entry. This is the final step before passing 1581 * ownership of the MPT entry to the Tavor hardware. We use all of 1582 * the information collected/calculated above to fill in the 1583 * requisite portions of the MPT. 1584 */ 1585 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 1586 1587 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE; 1588 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; 1589 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 1590 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 1591 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 1592 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 1593 mpt_entry.lr = 1; 1594 mpt_entry.phys_addr = 1; /* critical bit for this */ 1595 mpt_entry.reg_win = TAVOR_MPT_IS_REGION; 1596 1597 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 1598 mpt_entry.mem_key = mr->mr_lkey; 1599 mpt_entry.pd = pd->pd_pdnum; 1600 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND; 1601 1602 mpt_entry.start_addr = mr_attr->dmr_paddr; 1603 mpt_entry.reg_win_len = mr_attr->dmr_len; 1604 1605 mpt_entry.mttseg_addr_h = 0; 1606 mpt_entry.mttseg_addr_l = 0; 1607 1608 /* 1609 * Write the MPT entry to hardware. Lastly, we pass ownership of 1610 * the entry to the hardware if needed. Note: in general, this 1611 * operation shouldn't fail. But if it does, we have to undo 1612 * everything we've done above before returning error. 1613 * 1614 * For Tavor, this routine (which is common to the contexts) will only 1615 * set the ownership if needed - the process of passing the context 1616 * itself to HW will take care of setting up the MPT (based on type 1617 * and index). 1618 */ 1619 1620 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 1621 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 1622 if (status != TAVOR_CMD_SUCCESS) { 1623 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 1624 status); 1625 status = ibc_get_ci_failure(0); 1626 goto mrcommon_fail7; 1627 } 1628 1629 /* 1630 * Fill in the rest of the Tavor Memory Region handle. Having 1631 * successfully transferred ownership of the MPT, we can update the 1632 * following fields for use in further operations on the MR. 1633 */ 1634 mr->mr_mptrsrcp = mpt; 1635 mr->mr_mttrsrcp = NULL; 1636 mr->mr_pdhdl = pd; 1637 mr->mr_rsrcp = rsrc; 1638 mr->mr_is_umem = 0; 1639 mr->mr_umemcookie = NULL; 1640 mr->mr_umem_cbfunc = NULL; 1641 mr->mr_umem_cbarg1 = NULL; 1642 mr->mr_umem_cbarg2 = NULL; 1643 1644 *mrhdl = mr; 1645 1646 return (DDI_SUCCESS); 1647 1648 /* 1649 * The following is cleanup for all possible failure cases in this routine 1650 */ 1651 mrcommon_fail7: 1652 tavor_rsrc_free(state, &rsrc); 1653 mrcommon_fail2: 1654 tavor_rsrc_free(state, &mpt); 1655 mrcommon_fail1: 1656 tavor_pd_refcnt_dec(pd); 1657 mrcommon_fail: 1658 return (status); 1659 } 1660 1661 /* 1662 * tavor_mr_mtt_bind() 1663 * Context: Can be called from interrupt or base context. 1664 */ 1665 int 1666 tavor_mr_mtt_bind(tavor_state_t *state, tavor_bind_info_t *bind, 1667 ddi_dma_handle_t bind_dmahdl, tavor_rsrc_t **mtt, uint_t *mtt_pgsize_bits) 1668 { 1669 uint64_t nummtt; 1670 uint_t sleep; 1671 int status; 1672 char *errormsg; 1673 1674 TAVOR_TNF_ENTER(tavor_mr_common_reg); 1675 1676 /* 1677 * Check the sleep flag. Ensure that it is consistent with the 1678 * current thread context (i.e. if we are currently in the interrupt 1679 * context, then we shouldn't be attempting to sleep). 1680 */ 1681 sleep = (bind->bi_flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP; 1682 if ((sleep == TAVOR_SLEEP) && 1683 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1684 /* Set "status" and "errormsg" and goto failure */ 1685 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 1686 goto mrmttbind_fail; 1687 } 1688 1689 /* 1690 * Bind the memory and determine the mapped addresses. This is 1691 * the first of two routines that do all the "heavy lifting" for 1692 * the Tavor memory registration routines. The tavor_mr_mem_bind() 1693 * routine takes the "bind" struct with all its fields filled 1694 * in and returns a list of DMA cookies (for the PCI mapped addresses 1695 * corresponding to the specified address region) which are used by 1696 * the tavor_mr_fast_mtt_write() routine below. If we fail here, we 1697 * must undo all the previous resource allocation (and PD reference 1698 * count). 1699 */ 1700 status = tavor_mr_mem_bind(state, bind, bind_dmahdl, sleep); 1701 if (status != DDI_SUCCESS) { 1702 /* Set "status" and "errormsg" and goto failure */ 1703 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed mem bind"); 1704 goto mrmttbind_fail; 1705 } 1706 1707 /* 1708 * Determine number of pages spanned. This routine uses the 1709 * information in the "bind" struct to determine the required 1710 * number of MTT entries needed (and returns the suggested page size - 1711 * as a "power-of-2" - for each MTT entry). 1712 */ 1713 nummtt = tavor_mr_nummtt_needed(state, bind, mtt_pgsize_bits); 1714 1715 /* 1716 * Allocate the MTT entries. Use the calculations performed above to 1717 * allocate the required number of MTT entries. Note: MTT entries are 1718 * allocated in "MTT segments" which consist of complete cachelines 1719 * (i.e. 8 entries, 16 entries, etc.) So the TAVOR_NUMMTT_TO_MTTSEG() 1720 * macro is used to do the proper conversion. If we fail here, we 1721 * must not only undo all the previous resource allocation (and PD 1722 * reference count), but we must also unbind the memory. 1723 */ 1724 status = tavor_rsrc_alloc(state, TAVOR_MTT, 1725 TAVOR_NUMMTT_TO_MTTSEG(nummtt), sleep, mtt); 1726 if (status != DDI_SUCCESS) { 1727 /* Set "status" and "errormsg" and goto failure */ 1728 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MTT"); 1729 goto mrmttbind_fail2; 1730 } 1731 1732 /* 1733 * Write the mapped addresses into the MTT entries. This is part two 1734 * of the "heavy lifting" routines that we talked about above. Note: 1735 * we pass the suggested page size from the earlier operation here. 1736 * And if we fail here, we again do pretty much the same huge clean up. 1737 */ 1738 status = tavor_mr_fast_mtt_write(*mtt, bind, *mtt_pgsize_bits); 1739 if (status != DDI_SUCCESS) { 1740 /* Set "status" and "errormsg" and goto failure */ 1741 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), "failed write mtt"); 1742 goto mrmttbind_fail3; 1743 } 1744 TAVOR_TNF_EXIT(tavor_mr_mtt_bind); 1745 return (DDI_SUCCESS); 1746 1747 /* 1748 * The following is cleanup for all possible failure cases in this routine 1749 */ 1750 mrmttbind_fail3: 1751 tavor_rsrc_free(state, mtt); 1752 mrmttbind_fail2: 1753 tavor_mr_mem_unbind(state, bind); 1754 mrmttbind_fail: 1755 TNF_PROBE_1(tavor_mr_mtt_bind_fail, TAVOR_TNF_ERROR, "", 1756 tnf_string, msg, errormsg); 1757 TAVOR_TNF_EXIT(tavor_mr_mtt_bind); 1758 return (status); 1759 } 1760 1761 1762 /* 1763 * tavor_mr_mtt_unbind() 1764 * Context: Can be called from interrupt or base context. 1765 */ 1766 int 1767 tavor_mr_mtt_unbind(tavor_state_t *state, tavor_bind_info_t *bind, 1768 tavor_rsrc_t *mtt) 1769 { 1770 TAVOR_TNF_ENTER(tavor_mr_mtt_unbind); 1771 1772 /* 1773 * Free up the MTT entries and unbind the memory. Here, as above, we 1774 * attempt to free these resources only if it is appropriate to do so. 1775 */ 1776 tavor_mr_mem_unbind(state, bind); 1777 tavor_rsrc_free(state, &mtt); 1778 1779 TAVOR_TNF_EXIT(tavor_mr_mtt_unbind); 1780 return (DDI_SUCCESS); 1781 } 1782 1783 1784 /* 1785 * tavor_mr_common_rereg() 1786 * Context: Can be called from interrupt or base context. 1787 */ 1788 static int 1789 tavor_mr_common_rereg(tavor_state_t *state, tavor_mrhdl_t mr, 1790 tavor_pdhdl_t pd, tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl_new, 1791 tavor_mr_options_t *op) 1792 { 1793 tavor_rsrc_t *mpt; 1794 ibt_mr_attr_flags_t acc_flags_to_use; 1795 ibt_mr_flags_t flags; 1796 tavor_pdhdl_t pd_to_use; 1797 tavor_hw_mpt_t mpt_entry; 1798 uint64_t mtt_addr_to_use, vaddr_to_use, len_to_use; 1799 uint_t sleep, dereg_level; 1800 int status; 1801 char *errormsg; 1802 1803 TAVOR_TNF_ENTER(tavor_mr_common_rereg); 1804 1805 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 1806 1807 /* 1808 * Check here to see if the memory region corresponds to a userland 1809 * mapping. Reregistration of userland memory regions is not 1810 * currently supported. Return failure. XXX 1811 */ 1812 if (mr->mr_is_umem) { 1813 /* Set "status" and "errormsg" and goto failure */ 1814 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl"); 1815 goto mrrereg_fail; 1816 } 1817 1818 mutex_enter(&mr->mr_lock); 1819 1820 /* Pull MPT resource pointer from the Tavor Memory Region handle */ 1821 mpt = mr->mr_mptrsrcp; 1822 1823 /* Extract the flags field from the tavor_bind_info_t */ 1824 flags = bind->bi_flags; 1825 1826 /* 1827 * Check the sleep flag. Ensure that it is consistent with the 1828 * current thread context (i.e. if we are currently in the interrupt 1829 * context, then we shouldn't be attempting to sleep). 1830 */ 1831 sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP; 1832 if ((sleep == TAVOR_SLEEP) && 1833 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1834 mutex_exit(&mr->mr_lock); 1835 /* Set "status" and "errormsg" and goto failure */ 1836 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 1837 goto mrrereg_fail; 1838 } 1839 1840 /* 1841 * First step is to temporarily invalidate the MPT entry. This 1842 * regains ownership from the hardware, and gives us the opportunity 1843 * to modify the entry. Note: The HW2SW_MPT command returns the 1844 * current MPT entry contents. These are saved away here because 1845 * they will be reused in a later step below. If the region has 1846 * bound memory windows that we fail returning an "in use" error code. 1847 * Otherwise, this is an unexpected error and we deregister the 1848 * memory region and return error. 1849 * 1850 * We use TAVOR_CMD_NOSLEEP_SPIN here always because we must protect 1851 * against holding the lock around this rereg call in all contexts. 1852 */ 1853 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, &mpt_entry, 1854 sizeof (tavor_hw_mpt_t), mpt->tr_indx, TAVOR_CMD_NOSLEEP_SPIN); 1855 if (status != TAVOR_CMD_SUCCESS) { 1856 mutex_exit(&mr->mr_lock); 1857 if (status == TAVOR_CMD_REG_BOUND) { 1858 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 1859 return (IBT_MR_IN_USE); 1860 } else { 1861 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command failed: " 1862 "%08x\n", status); 1863 1864 /* 1865 * Call deregister and ensure that all current 1866 * resources get freed up 1867 */ 1868 if (tavor_mr_deregister(state, &mr, 1869 TAVOR_MR_DEREG_ALL, sleep) != DDI_SUCCESS) { 1870 TAVOR_WARNING(state, "failed to deregister " 1871 "memory region"); 1872 } 1873 TNF_PROBE_1(tavor_mr_common_rereg_hw2sw_mpt_cmd_fail, 1874 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 1875 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 1876 return (ibc_get_ci_failure(0)); 1877 } 1878 } 1879 1880 /* 1881 * If we're changing the protection domain, then validate the new one 1882 */ 1883 if (flags & IBT_MR_CHANGE_PD) { 1884 1885 /* Check for valid PD handle pointer */ 1886 if (pd == NULL) { 1887 mutex_exit(&mr->mr_lock); 1888 /* 1889 * Call deregister and ensure that all current 1890 * resources get properly freed up. Unnecessary 1891 * here to attempt to regain software ownership 1892 * of the MPT entry as that has already been 1893 * done above. 1894 */ 1895 if (tavor_mr_deregister(state, &mr, 1896 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) != 1897 DDI_SUCCESS) { 1898 TAVOR_WARNING(state, "failed to deregister " 1899 "memory region"); 1900 } 1901 /* Set "status" and "errormsg" and goto failure */ 1902 TAVOR_TNF_FAIL(IBT_PD_HDL_INVALID, "invalid PD handle"); 1903 goto mrrereg_fail; 1904 } 1905 1906 /* Use the new PD handle in all operations below */ 1907 pd_to_use = pd; 1908 1909 } else { 1910 /* Use the current PD handle in all operations below */ 1911 pd_to_use = mr->mr_pdhdl; 1912 } 1913 1914 /* 1915 * If we're changing access permissions, then validate the new ones 1916 */ 1917 if (flags & IBT_MR_CHANGE_ACCESS) { 1918 /* 1919 * Validate the access flags. Both remote write and remote 1920 * atomic require the local write flag to be set 1921 */ 1922 if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) || 1923 (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) && 1924 !(flags & IBT_MR_ENABLE_LOCAL_WRITE)) { 1925 mutex_exit(&mr->mr_lock); 1926 /* 1927 * Call deregister and ensure that all current 1928 * resources get properly freed up. Unnecessary 1929 * here to attempt to regain software ownership 1930 * of the MPT entry as that has already been 1931 * done above. 1932 */ 1933 if (tavor_mr_deregister(state, &mr, 1934 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) != 1935 DDI_SUCCESS) { 1936 TAVOR_WARNING(state, "failed to deregister " 1937 "memory region"); 1938 } 1939 /* Set "status" and "errormsg" and goto failure */ 1940 TAVOR_TNF_FAIL(IBT_MR_ACCESS_REQ_INVALID, 1941 "invalid access flags"); 1942 goto mrrereg_fail; 1943 } 1944 1945 /* 1946 * Setup and validate the memory region access flags. This 1947 * means translating the IBTF's enable flags into the access 1948 * flags that will be used in later operations. 1949 */ 1950 acc_flags_to_use = 0; 1951 if (flags & IBT_MR_ENABLE_WINDOW_BIND) 1952 acc_flags_to_use |= IBT_MR_WINDOW_BIND; 1953 if (flags & IBT_MR_ENABLE_LOCAL_WRITE) 1954 acc_flags_to_use |= IBT_MR_LOCAL_WRITE; 1955 if (flags & IBT_MR_ENABLE_REMOTE_READ) 1956 acc_flags_to_use |= IBT_MR_REMOTE_READ; 1957 if (flags & IBT_MR_ENABLE_REMOTE_WRITE) 1958 acc_flags_to_use |= IBT_MR_REMOTE_WRITE; 1959 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 1960 acc_flags_to_use |= IBT_MR_REMOTE_ATOMIC; 1961 1962 } else { 1963 acc_flags_to_use = mr->mr_accflag; 1964 } 1965 1966 /* 1967 * If we're modifying the translation, then figure out whether 1968 * we can reuse the current MTT resources. This means calling 1969 * tavor_mr_rereg_xlat_helper() which does most of the heavy lifting 1970 * for the reregistration. If the current memory region contains 1971 * sufficient MTT entries for the new regions, then it will be 1972 * reused and filled in. Otherwise, new entries will be allocated, 1973 * the old ones will be freed, and the new entries will be filled 1974 * in. Note: If we're not modifying the translation, then we 1975 * should already have all the information we need to update the MPT. 1976 * Also note: If tavor_mr_rereg_xlat_helper() fails, it will return 1977 * a "dereg_level" which is the level of cleanup that needs to be 1978 * passed to tavor_mr_deregister() to finish the cleanup. 1979 */ 1980 if (flags & IBT_MR_CHANGE_TRANSLATION) { 1981 status = tavor_mr_rereg_xlat_helper(state, mr, bind, op, 1982 &mtt_addr_to_use, sleep, &dereg_level); 1983 if (status != DDI_SUCCESS) { 1984 mutex_exit(&mr->mr_lock); 1985 /* 1986 * Call deregister and ensure that all resources get 1987 * properly freed up. 1988 */ 1989 if (tavor_mr_deregister(state, &mr, dereg_level, 1990 sleep) != DDI_SUCCESS) { 1991 TAVOR_WARNING(state, "failed to deregister " 1992 "memory region"); 1993 } 1994 1995 /* Set "status" and "errormsg" and goto failure */ 1996 TAVOR_TNF_FAIL(status, "failed rereg helper"); 1997 goto mrrereg_fail; 1998 } 1999 vaddr_to_use = mr->mr_bindinfo.bi_addr; 2000 len_to_use = mr->mr_bindinfo.bi_len; 2001 } else { 2002 mtt_addr_to_use = (((uint64_t)mpt_entry.mttseg_addr_h << 32) | 2003 ((uint64_t)mpt_entry.mttseg_addr_l << 6)); 2004 vaddr_to_use = mr->mr_bindinfo.bi_addr; 2005 len_to_use = mr->mr_bindinfo.bi_len; 2006 } 2007 2008 /* 2009 * Calculate new keys (Lkey, Rkey) from MPT index. Just like they were 2010 * when the region was first registered, each key is formed from 2011 * "constrained" bits and "unconstrained" bits. Note: If no remote 2012 * access is required, then the RKey value is not filled in. Otherwise 2013 * both Rkey and LKey are given the same value. 2014 */ 2015 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 2016 if ((acc_flags_to_use & IBT_MR_REMOTE_READ) || 2017 (acc_flags_to_use & IBT_MR_REMOTE_WRITE) || 2018 (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC)) { 2019 mr->mr_rkey = mr->mr_lkey; 2020 } 2021 2022 /* 2023 * Update the MPT entry with the new information. Some of this 2024 * information is retained from the previous operation, some of 2025 * it is new based on request. 2026 */ 2027 mpt_entry.en_bind = (acc_flags_to_use & IBT_MR_WINDOW_BIND) ? 1 : 0; 2028 mpt_entry.atomic = (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 2029 mpt_entry.rw = (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ? 1 : 0; 2030 mpt_entry.rr = (acc_flags_to_use & IBT_MR_REMOTE_READ) ? 1 : 0; 2031 mpt_entry.lw = (acc_flags_to_use & IBT_MR_LOCAL_WRITE) ? 1 : 0; 2032 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 2033 mpt_entry.mem_key = mr->mr_lkey; 2034 mpt_entry.pd = pd_to_use->pd_pdnum; 2035 mpt_entry.start_addr = vaddr_to_use; 2036 mpt_entry.reg_win_len = len_to_use; 2037 mpt_entry.mttseg_addr_h = mtt_addr_to_use >> 32; 2038 mpt_entry.mttseg_addr_l = mtt_addr_to_use >> 6; 2039 2040 /* 2041 * Write the updated MPT entry to hardware 2042 * 2043 * We use TAVOR_CMD_NOSLEEP_SPIN here always because we must protect 2044 * against holding the lock around this rereg call in all contexts. 2045 */ 2046 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 2047 sizeof (tavor_hw_mpt_t), mpt->tr_indx, TAVOR_CMD_NOSLEEP_SPIN); 2048 if (status != TAVOR_CMD_SUCCESS) { 2049 mutex_exit(&mr->mr_lock); 2050 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 2051 status); 2052 /* 2053 * Call deregister and ensure that all current resources get 2054 * properly freed up. Unnecessary here to attempt to regain 2055 * software ownership of the MPT entry as that has already 2056 * been done above. 2057 */ 2058 if (tavor_mr_deregister(state, &mr, 2059 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) { 2060 TAVOR_WARNING(state, "failed to deregister memory " 2061 "region"); 2062 } 2063 TNF_PROBE_1(tavor_mr_common_rereg_sw2hw_mpt_cmd_fail, 2064 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 2065 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 2066 return (ibc_get_ci_failure(0)); 2067 } 2068 2069 /* 2070 * If we're changing PD, then update their reference counts now. 2071 * This means decrementing the reference count on the old PD and 2072 * incrementing the reference count on the new PD. 2073 */ 2074 if (flags & IBT_MR_CHANGE_PD) { 2075 tavor_pd_refcnt_dec(mr->mr_pdhdl); 2076 tavor_pd_refcnt_inc(pd); 2077 } 2078 2079 /* 2080 * Update the contents of the Tavor Memory Region handle to reflect 2081 * what has been changed. 2082 */ 2083 mr->mr_pdhdl = pd_to_use; 2084 mr->mr_accflag = acc_flags_to_use; 2085 mr->mr_is_umem = 0; 2086 mr->mr_umemcookie = NULL; 2087 2088 /* New MR handle is same as the old */ 2089 *mrhdl_new = mr; 2090 mutex_exit(&mr->mr_lock); 2091 2092 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 2093 return (DDI_SUCCESS); 2094 2095 mrrereg_fail: 2096 TNF_PROBE_1(tavor_mr_common_rereg_fail, TAVOR_TNF_ERROR, "", 2097 tnf_string, msg, errormsg); 2098 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 2099 return (status); 2100 } 2101 2102 2103 /* 2104 * tavor_mr_rereg_xlat_helper 2105 * Context: Can be called from interrupt or base context. 2106 * Note: This routine expects the "mr_lock" to be held when it 2107 * is called. Upon returning failure, this routine passes information 2108 * about what "dereg_level" should be passed to tavor_mr_deregister(). 2109 */ 2110 static int 2111 tavor_mr_rereg_xlat_helper(tavor_state_t *state, tavor_mrhdl_t mr, 2112 tavor_bind_info_t *bind, tavor_mr_options_t *op, uint64_t *mtt_addr, 2113 uint_t sleep, uint_t *dereg_level) 2114 { 2115 tavor_rsrc_pool_info_t *rsrc_pool; 2116 tavor_rsrc_t *mtt, *mtt_refcnt; 2117 tavor_sw_refcnt_t *swrc_old, *swrc_new; 2118 ddi_dma_handle_t dmahdl; 2119 uint64_t nummtt_needed, nummtt_in_currrsrc, max_sz; 2120 uint64_t mtt_ddrbaseaddr; 2121 uint_t mtt_pgsize_bits, bind_type, reuse_dmahdl; 2122 int status; 2123 char *errormsg; 2124 2125 TAVOR_TNF_ENTER(tavor_mr_rereg_xlat_helper); 2126 2127 ASSERT(MUTEX_HELD(&mr->mr_lock)); 2128 2129 /* 2130 * Check the "options" flag. Currently this flag tells the driver 2131 * whether or not the region should be bound normally (i.e. with 2132 * entries written into the PCI IOMMU) or whether it should be 2133 * registered to bypass the IOMMU. 2134 */ 2135 if (op == NULL) { 2136 bind_type = TAVOR_BINDMEM_NORMAL; 2137 } else { 2138 bind_type = op->mro_bind_type; 2139 } 2140 2141 /* 2142 * Check for invalid length. Check is the length is zero or if the 2143 * length is larger than the maximum configured value. Return error 2144 * if it is. 2145 */ 2146 max_sz = ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_mrw_sz); 2147 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) { 2148 /* 2149 * Deregister will be called upon returning failure from this 2150 * routine. This will ensure that all current resources get 2151 * properly freed up. Unnecessary to attempt to regain 2152 * software ownership of the MPT entry as that has already 2153 * been done above (in tavor_mr_reregister()) 2154 */ 2155 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT; 2156 2157 /* Set "status" and "errormsg" and goto failure */ 2158 TAVOR_TNF_FAIL(IBT_MR_LEN_INVALID, "invalid length"); 2159 goto mrrereghelp_fail; 2160 } 2161 2162 /* 2163 * Determine the number of pages necessary for new region and the 2164 * number of pages supported by the current MTT resources 2165 */ 2166 nummtt_needed = tavor_mr_nummtt_needed(state, bind, &mtt_pgsize_bits); 2167 nummtt_in_currrsrc = mr->mr_mttrsrcp->tr_len >> TAVOR_MTT_SIZE_SHIFT; 2168 2169 /* 2170 * Depending on whether we have enough pages or not, the next step is 2171 * to fill in a set of MTT entries that reflect the new mapping. In 2172 * the first case below, we already have enough entries. This means 2173 * we need to unbind the memory from the previous mapping, bind the 2174 * memory for the new mapping, write the new MTT entries, and update 2175 * the mr to reflect the changes. 2176 * In the second case below, we do not have enough entries in the 2177 * current mapping. So, in this case, we need not only to unbind the 2178 * current mapping, but we need to free up the MTT resources associated 2179 * with that mapping. After we've successfully done that, we continue 2180 * by binding the new memory, allocating new MTT entries, writing the 2181 * new MTT entries, and updating the mr to reflect the changes. 2182 */ 2183 2184 /* 2185 * If this region is being shared (i.e. MTT refcount != 1), then we 2186 * can't reuse the current MTT resources regardless of their size. 2187 * Instead we'll need to alloc new ones (below) just as if there 2188 * hadn't been enough room in the current entries. 2189 */ 2190 swrc_old = (tavor_sw_refcnt_t *)mr->mr_mttrefcntp->tr_addr; 2191 if (TAVOR_MTT_IS_NOT_SHARED(swrc_old) && 2192 (nummtt_needed <= nummtt_in_currrsrc)) { 2193 2194 /* 2195 * Unbind the old mapping for this memory region, but retain 2196 * the ddi_dma_handle_t (if possible) for reuse in the bind 2197 * operation below. Note: If original memory region was 2198 * bound for IOMMU bypass and the new region can not use 2199 * bypass, then a new DMA handle will be necessary. 2200 */ 2201 if (TAVOR_MR_REUSE_DMAHDL(mr, bind->bi_flags)) { 2202 mr->mr_bindinfo.bi_free_dmahdl = 0; 2203 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 2204 dmahdl = mr->mr_bindinfo.bi_dmahdl; 2205 reuse_dmahdl = 1; 2206 } else { 2207 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 2208 dmahdl = NULL; 2209 reuse_dmahdl = 0; 2210 } 2211 2212 /* 2213 * Bind the new memory and determine the mapped addresses. 2214 * As described, this routine and tavor_mr_fast_mtt_write() 2215 * do the majority of the work for the memory registration 2216 * operations. Note: When we successfully finish the binding, 2217 * we will set the "bi_free_dmahdl" flag to indicate that 2218 * even though we may have reused the ddi_dma_handle_t we do 2219 * wish it to be freed up at some later time. Note also that 2220 * if we fail, we may need to cleanup the ddi_dma_handle_t. 2221 */ 2222 bind->bi_bypass = bind_type; 2223 status = tavor_mr_mem_bind(state, bind, dmahdl, sleep); 2224 if (status != DDI_SUCCESS) { 2225 if (reuse_dmahdl) { 2226 ddi_dma_free_handle(&dmahdl); 2227 } 2228 2229 /* 2230 * Deregister will be called upon returning failure 2231 * from this routine. This will ensure that all 2232 * current resources get properly freed up. 2233 * Unnecessary to attempt to regain software ownership 2234 * of the MPT entry as that has already been done 2235 * above (in tavor_mr_reregister()). Also unnecessary 2236 * to attempt to unbind the memory. 2237 */ 2238 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2239 2240 /* Set "status" and "errormsg" and goto failure */ 2241 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed mem bind"); 2242 goto mrrereghelp_fail; 2243 } 2244 if (reuse_dmahdl) { 2245 bind->bi_free_dmahdl = 1; 2246 } 2247 2248 /* 2249 * Using the new mapping, but reusing the current MTT 2250 * resources, write the updated entries to MTT 2251 */ 2252 mtt = mr->mr_mttrsrcp; 2253 status = tavor_mr_fast_mtt_write(mtt, bind, mtt_pgsize_bits); 2254 if (status != DDI_SUCCESS) { 2255 /* 2256 * Deregister will be called upon returning failure 2257 * from this routine. This will ensure that all 2258 * current resources get properly freed up. 2259 * Unnecessary to attempt to regain software ownership 2260 * of the MPT entry as that has already been done 2261 * above (in tavor_mr_reregister()). Also unnecessary 2262 * to attempt to unbind the memory. 2263 * 2264 * But we do need to unbind the newly bound memory 2265 * before returning. 2266 */ 2267 tavor_mr_mem_unbind(state, bind); 2268 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2269 2270 /* Set "status" and "errormsg" and goto failure */ 2271 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 2272 "failed write mtt"); 2273 goto mrrereghelp_fail; 2274 } 2275 2276 /* Put the updated information into the Mem Region handle */ 2277 mr->mr_bindinfo = *bind; 2278 mr->mr_logmttpgsz = mtt_pgsize_bits; 2279 2280 } else { 2281 /* 2282 * Check if the memory region MTT is shared by any other MRs. 2283 * Since the resource may be shared between multiple memory 2284 * regions (as a result of a "RegisterSharedMR()" verb) it is 2285 * important that we not unbind any resources prematurely. 2286 */ 2287 if (!TAVOR_MTT_IS_SHARED(swrc_old)) { 2288 /* 2289 * Unbind the old mapping for this memory region, but 2290 * retain the ddi_dma_handle_t for reuse in the bind 2291 * operation below. Note: This can only be done here 2292 * because the region being reregistered is not 2293 * currently shared. Also if original memory region 2294 * was bound for IOMMU bypass and the new region can 2295 * not use bypass, then a new DMA handle will be 2296 * necessary. 2297 */ 2298 if (TAVOR_MR_REUSE_DMAHDL(mr, bind->bi_flags)) { 2299 mr->mr_bindinfo.bi_free_dmahdl = 0; 2300 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 2301 dmahdl = mr->mr_bindinfo.bi_dmahdl; 2302 reuse_dmahdl = 1; 2303 } else { 2304 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 2305 dmahdl = NULL; 2306 reuse_dmahdl = 0; 2307 } 2308 } else { 2309 dmahdl = NULL; 2310 reuse_dmahdl = 0; 2311 } 2312 2313 /* 2314 * Bind the new memory and determine the mapped addresses. 2315 * As described, this routine and tavor_mr_fast_mtt_write() 2316 * do the majority of the work for the memory registration 2317 * operations. Note: When we successfully finish the binding, 2318 * we will set the "bi_free_dmahdl" flag to indicate that 2319 * even though we may have reused the ddi_dma_handle_t we do 2320 * wish it to be freed up at some later time. Note also that 2321 * if we fail, we may need to cleanup the ddi_dma_handle_t. 2322 */ 2323 bind->bi_bypass = bind_type; 2324 status = tavor_mr_mem_bind(state, bind, dmahdl, sleep); 2325 if (status != DDI_SUCCESS) { 2326 if (reuse_dmahdl) { 2327 ddi_dma_free_handle(&dmahdl); 2328 } 2329 2330 /* 2331 * Deregister will be called upon returning failure 2332 * from this routine. This will ensure that all 2333 * current resources get properly freed up. 2334 * Unnecessary to attempt to regain software ownership 2335 * of the MPT entry as that has already been done 2336 * above (in tavor_mr_reregister()). Also unnecessary 2337 * to attempt to unbind the memory. 2338 */ 2339 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2340 2341 /* Set "status" and "errormsg" and goto failure */ 2342 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed mem bind"); 2343 goto mrrereghelp_fail; 2344 } 2345 if (reuse_dmahdl) { 2346 bind->bi_free_dmahdl = 1; 2347 } 2348 2349 /* 2350 * Allocate the new MTT entries resource 2351 */ 2352 status = tavor_rsrc_alloc(state, TAVOR_MTT, 2353 TAVOR_NUMMTT_TO_MTTSEG(nummtt_needed), sleep, &mtt); 2354 if (status != DDI_SUCCESS) { 2355 /* 2356 * Deregister will be called upon returning failure 2357 * from this routine. This will ensure that all 2358 * current resources get properly freed up. 2359 * Unnecessary to attempt to regain software ownership 2360 * of the MPT entry as that has already been done 2361 * above (in tavor_mr_reregister()). Also unnecessary 2362 * to attempt to unbind the memory. 2363 * 2364 * But we do need to unbind the newly bound memory 2365 * before returning. 2366 */ 2367 tavor_mr_mem_unbind(state, bind); 2368 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2369 2370 /* Set "status" and "errormsg" and goto failure */ 2371 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MTT"); 2372 goto mrrereghelp_fail; 2373 } 2374 2375 /* 2376 * Allocate MTT reference count (to track shared memory 2377 * regions). As mentioned elsewhere above, this reference 2378 * count resource may never be used on the given memory region, 2379 * but if it is ever later registered as a "shared" memory 2380 * region then this resource will be necessary. Note: This 2381 * is only necessary here if the existing memory region is 2382 * already being shared (because otherwise we already have 2383 * a useable reference count resource). 2384 */ 2385 if (TAVOR_MTT_IS_SHARED(swrc_old)) { 2386 status = tavor_rsrc_alloc(state, TAVOR_REFCNT, 1, 2387 sleep, &mtt_refcnt); 2388 if (status != DDI_SUCCESS) { 2389 /* 2390 * Deregister will be called upon returning 2391 * failure from this routine. This will ensure 2392 * that all current resources get properly 2393 * freed up. Unnecessary to attempt to regain 2394 * software ownership of the MPT entry as that 2395 * has already been done above (in 2396 * tavor_mr_reregister()). Also unnecessary 2397 * to attempt to unbind the memory. 2398 * 2399 * But we need to unbind the newly bound 2400 * memory and free up the newly allocated MTT 2401 * entries before returning. 2402 */ 2403 tavor_mr_mem_unbind(state, bind); 2404 tavor_rsrc_free(state, &mtt); 2405 *dereg_level = 2406 TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2407 2408 /* Set "status"/"errormsg", goto failure */ 2409 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, 2410 "failed reference count"); 2411 goto mrrereghelp_fail; 2412 } 2413 swrc_new = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr; 2414 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_new)) 2415 TAVOR_MTT_REFCNT_INIT(swrc_new); 2416 } else { 2417 mtt_refcnt = mr->mr_mttrefcntp; 2418 } 2419 2420 /* 2421 * Using the new mapping and the new MTT resources, write the 2422 * updated entries to MTT 2423 */ 2424 status = tavor_mr_fast_mtt_write(mtt, bind, mtt_pgsize_bits); 2425 if (status != DDI_SUCCESS) { 2426 /* 2427 * Deregister will be called upon returning failure 2428 * from this routine. This will ensure that all 2429 * current resources get properly freed up. 2430 * Unnecessary to attempt to regain software ownership 2431 * of the MPT entry as that has already been done 2432 * above (in tavor_mr_reregister()). Also unnecessary 2433 * to attempt to unbind the memory. 2434 * 2435 * But we need to unbind the newly bound memory, 2436 * free up the newly allocated MTT entries, and 2437 * (possibly) free the new MTT reference count 2438 * resource before returning. 2439 */ 2440 if (TAVOR_MTT_IS_SHARED(swrc_old)) { 2441 tavor_rsrc_free(state, &mtt_refcnt); 2442 } 2443 tavor_mr_mem_unbind(state, bind); 2444 tavor_rsrc_free(state, &mtt); 2445 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2446 2447 /* Set "status" and "errormsg" and goto failure */ 2448 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed write mtt"); 2449 goto mrrereghelp_fail; 2450 } 2451 2452 /* 2453 * Check if the memory region MTT is shared by any other MRs. 2454 * Since the resource may be shared between multiple memory 2455 * regions (as a result of a "RegisterSharedMR()" verb) it is 2456 * important that we not free up any resources prematurely. 2457 */ 2458 if (TAVOR_MTT_IS_SHARED(swrc_old)) { 2459 /* Decrement MTT reference count for "old" region */ 2460 (void) tavor_mtt_refcnt_dec(mr->mr_mttrefcntp); 2461 } else { 2462 /* Free up the old MTT entries resource */ 2463 tavor_rsrc_free(state, &mr->mr_mttrsrcp); 2464 } 2465 2466 /* Put the updated information into the mrhdl */ 2467 mr->mr_bindinfo = *bind; 2468 mr->mr_logmttpgsz = mtt_pgsize_bits; 2469 mr->mr_mttrsrcp = mtt; 2470 mr->mr_mttrefcntp = mtt_refcnt; 2471 } 2472 2473 /* 2474 * Calculate and return the updated MTT address (in the DDR address 2475 * space). This will be used by the caller (tavor_mr_reregister) in 2476 * the updated MPT entry 2477 */ 2478 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; 2479 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; 2480 *mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << 2481 TAVOR_MTT_SIZE_SHIFT); 2482 2483 TAVOR_TNF_EXIT(tavor_mr_rereg_xlat_helper); 2484 return (DDI_SUCCESS); 2485 2486 mrrereghelp_fail: 2487 TNF_PROBE_1(tavor_mr_rereg_xlat_helper_fail, TAVOR_TNF_ERROR, "", 2488 tnf_string, msg, errormsg); 2489 TAVOR_TNF_EXIT(tavor_mr_rereg_xlat_helper); 2490 return (status); 2491 } 2492 2493 2494 /* 2495 * tavor_mr_nummtt_needed() 2496 * Context: Can be called from interrupt or base context. 2497 */ 2498 /* ARGSUSED */ 2499 static uint64_t 2500 tavor_mr_nummtt_needed(tavor_state_t *state, tavor_bind_info_t *bind, 2501 uint_t *mtt_pgsize_bits) 2502 { 2503 uint64_t pg_offset_mask; 2504 uint64_t pg_offset, tmp_length; 2505 2506 /* 2507 * For now we specify the page size as 8Kb (the default page size for 2508 * the sun4u architecture), or 4Kb for x86. Figure out optimal page 2509 * size by examining the dmacookies XXX 2510 */ 2511 *mtt_pgsize_bits = PAGESHIFT; 2512 2513 pg_offset_mask = ((uint64_t)1 << *mtt_pgsize_bits) - 1; 2514 pg_offset = bind->bi_addr & pg_offset_mask; 2515 tmp_length = pg_offset + (bind->bi_len - 1); 2516 return ((tmp_length >> *mtt_pgsize_bits) + 1); 2517 } 2518 2519 2520 /* 2521 * tavor_mr_mem_bind() 2522 * Context: Can be called from interrupt or base context. 2523 */ 2524 static int 2525 tavor_mr_mem_bind(tavor_state_t *state, tavor_bind_info_t *bind, 2526 ddi_dma_handle_t dmahdl, uint_t sleep) 2527 { 2528 ddi_dma_attr_t dma_attr; 2529 int (*callback)(caddr_t); 2530 uint_t dma_xfer_mode; 2531 int status; 2532 2533 /* bi_type must be set to a meaningful value to get a bind handle */ 2534 ASSERT(bind->bi_type == TAVOR_BINDHDL_VADDR || 2535 bind->bi_type == TAVOR_BINDHDL_BUF || 2536 bind->bi_type == TAVOR_BINDHDL_UBUF); 2537 2538 TAVOR_TNF_ENTER(tavor_mr_mem_bind); 2539 2540 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 2541 2542 /* Set the callback flag appropriately */ 2543 callback = (sleep == TAVOR_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT; 2544 2545 /* Determine whether to map STREAMING or CONSISTENT */ 2546 dma_xfer_mode = (bind->bi_flags & IBT_MR_NONCOHERENT) ? 2547 DDI_DMA_STREAMING : DDI_DMA_CONSISTENT; 2548 2549 /* 2550 * Initialize many of the default DMA attributes. Then, if we're 2551 * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag. 2552 */ 2553 if (dmahdl == NULL) { 2554 tavor_dma_attr_init(&dma_attr); 2555 #ifdef __sparc 2556 /* 2557 * First, disable streaming and switch to consistent if 2558 * configured to do so and IOMMU BYPASS is enabled. 2559 */ 2560 if (state->ts_cfg_profile->cp_disable_streaming_on_bypass && 2561 dma_xfer_mode == DDI_DMA_STREAMING && 2562 bind->bi_bypass == TAVOR_BINDMEM_BYPASS) { 2563 dma_xfer_mode = DDI_DMA_CONSISTENT; 2564 } 2565 2566 /* 2567 * Then, if streaming is still specified, then "bypass" is not 2568 * allowed. 2569 */ 2570 if ((dma_xfer_mode == DDI_DMA_CONSISTENT) && 2571 (bind->bi_bypass == TAVOR_BINDMEM_BYPASS)) { 2572 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 2573 } 2574 #endif 2575 /* Allocate a DMA handle for the binding */ 2576 status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr, 2577 callback, NULL, &bind->bi_dmahdl); 2578 if (status != DDI_SUCCESS) { 2579 TNF_PROBE_0(tavor_mr_mem_bind_dmahdl_fail, 2580 TAVOR_TNF_ERROR, ""); 2581 TAVOR_TNF_EXIT(tavor_mr_mem_bind); 2582 return (status); 2583 } 2584 bind->bi_free_dmahdl = 1; 2585 2586 } else { 2587 bind->bi_dmahdl = dmahdl; 2588 bind->bi_free_dmahdl = 0; 2589 } 2590 2591 /* 2592 * Bind the memory to get the PCI mapped addresses. The decision 2593 * to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle() 2594 * is determined by the "bi_type" flag. Note: if the bind operation 2595 * fails then we have to free up the DMA handle and return error. 2596 */ 2597 if (bind->bi_type == TAVOR_BINDHDL_VADDR) { 2598 status = ddi_dma_addr_bind_handle(bind->bi_dmahdl, NULL, 2599 (caddr_t)(uintptr_t)bind->bi_addr, bind->bi_len, 2600 (DDI_DMA_RDWR | dma_xfer_mode), callback, NULL, 2601 &bind->bi_dmacookie, &bind->bi_cookiecnt); 2602 } else { /* TAVOR_BINDHDL_BUF || TAVOR_BINDHDL_UBUF */ 2603 status = ddi_dma_buf_bind_handle(bind->bi_dmahdl, 2604 bind->bi_buf, (DDI_DMA_RDWR | dma_xfer_mode), callback, 2605 NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt); 2606 } 2607 2608 if (status != DDI_DMA_MAPPED) { 2609 if (bind->bi_free_dmahdl != 0) { 2610 ddi_dma_free_handle(&bind->bi_dmahdl); 2611 } 2612 TNF_PROBE_0(tavor_mr_mem_bind_dmabind_fail, TAVOR_TNF_ERROR, 2613 ""); 2614 TAVOR_TNF_EXIT(tavor_mr_mem_bind); 2615 return (status); 2616 } 2617 2618 TAVOR_TNF_EXIT(tavor_mr_mem_bind); 2619 return (DDI_SUCCESS); 2620 } 2621 2622 2623 /* 2624 * tavor_mr_mem_unbind() 2625 * Context: Can be called from interrupt or base context. 2626 */ 2627 static void 2628 tavor_mr_mem_unbind(tavor_state_t *state, tavor_bind_info_t *bind) 2629 { 2630 int status; 2631 2632 TAVOR_TNF_ENTER(tavor_mr_mem_unbind); 2633 2634 /* 2635 * In case of TAVOR_BINDHDL_UBUF, the memory bi_buf points to 2636 * is actually allocated by ddi_umem_iosetup() internally, then 2637 * it's required to free it here. Reset bi_type to TAVOR_BINDHDL_NONE 2638 * not to free it again later. 2639 */ 2640 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 2641 if (bind->bi_type == TAVOR_BINDHDL_UBUF) { 2642 freerbuf(bind->bi_buf); 2643 bind->bi_type = TAVOR_BINDHDL_NONE; 2644 } 2645 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 2646 2647 /* 2648 * Unbind the DMA memory for the region 2649 * 2650 * Note: The only way ddi_dma_unbind_handle() currently 2651 * can return an error is if the handle passed in is invalid. 2652 * Since this should never happen, we choose to return void 2653 * from this function! If this does return an error, however, 2654 * then we print a warning message to the console. 2655 */ 2656 status = ddi_dma_unbind_handle(bind->bi_dmahdl); 2657 if (status != DDI_SUCCESS) { 2658 TAVOR_WARNING(state, "failed to unbind DMA mapping"); 2659 TNF_PROBE_0(tavor_mr_mem_unbind_dmaunbind_fail, 2660 TAVOR_TNF_ERROR, ""); 2661 TAVOR_TNF_EXIT(tavor_mr_mem_unbind); 2662 return; 2663 } 2664 2665 /* Free up the DMA handle */ 2666 if (bind->bi_free_dmahdl != 0) { 2667 ddi_dma_free_handle(&bind->bi_dmahdl); 2668 } 2669 2670 TAVOR_TNF_EXIT(tavor_mr_mem_unbind); 2671 } 2672 2673 2674 /* 2675 * tavor_mr_fast_mtt_write() 2676 * Context: Can be called from interrupt or base context. 2677 */ 2678 static int 2679 tavor_mr_fast_mtt_write(tavor_rsrc_t *mtt, tavor_bind_info_t *bind, 2680 uint32_t mtt_pgsize_bits) 2681 { 2682 ddi_dma_cookie_t dmacookie; 2683 uint_t cookie_cnt; 2684 uint64_t *mtt_table; 2685 uint64_t mtt_entry; 2686 uint64_t addr, endaddr; 2687 uint64_t pagesize; 2688 int i; 2689 2690 TAVOR_TNF_ENTER(tavor_mr_fast_mtt_write); 2691 2692 /* Calculate page size from the suggested value passed in */ 2693 pagesize = ((uint64_t)1 << mtt_pgsize_bits); 2694 2695 /* 2696 * Walk the "cookie list" and fill in the MTT table entries 2697 */ 2698 i = 0; 2699 mtt_table = (uint64_t *)mtt->tr_addr; 2700 dmacookie = bind->bi_dmacookie; 2701 cookie_cnt = bind->bi_cookiecnt; 2702 while (cookie_cnt-- > 0) { 2703 addr = dmacookie.dmac_laddress; 2704 endaddr = addr + (dmacookie.dmac_size - 1); 2705 addr = addr & ~((uint64_t)pagesize - 1); 2706 while (addr <= endaddr) { 2707 /* 2708 * Fill in the mapped addresses (calculated above) and 2709 * set TAVOR_MTT_ENTRY_PRESET flag for each MTT entry. 2710 */ 2711 mtt_entry = addr | TAVOR_MTT_ENTRY_PRESET; 2712 ddi_put64(mtt->tr_acchdl, &mtt_table[i], mtt_entry); 2713 addr += pagesize; 2714 i++; 2715 2716 if (addr == 0) { 2717 static int do_once = 1; 2718 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", 2719 do_once)) 2720 if (do_once) { 2721 do_once = 0; 2722 cmn_err(CE_NOTE, "probable error in " 2723 "dma_cookie address from caller\n"); 2724 } 2725 break; 2726 } 2727 } 2728 2729 /* 2730 * When we've reached the end of the current DMA cookie, 2731 * jump to the next cookie (if there are more) 2732 */ 2733 if (cookie_cnt != 0) { 2734 ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie); 2735 } 2736 } 2737 2738 TAVOR_TNF_EXIT(tavor_mr_fast_mtt_write); 2739 return (DDI_SUCCESS); 2740 } 2741 2742 /* 2743 * tavor_mtt_refcnt_inc() 2744 * Context: Can be called from interrupt or base context. 2745 */ 2746 static int 2747 tavor_mtt_refcnt_inc(tavor_rsrc_t *rsrc) 2748 { 2749 tavor_sw_refcnt_t *rc; 2750 uint32_t cnt; 2751 2752 rc = (tavor_sw_refcnt_t *)rsrc->tr_addr; 2753 2754 /* Increment the MTT's reference count */ 2755 mutex_enter(&rc->swrc_lock); 2756 TNF_PROBE_1_DEBUG(tavor_mtt_refcnt_inc, TAVOR_TNF_TRACE, "", 2757 tnf_uint, refcnt, rc->swrc_refcnt); 2758 cnt = rc->swrc_refcnt++; 2759 mutex_exit(&rc->swrc_lock); 2760 2761 return (cnt); 2762 } 2763 2764 2765 /* 2766 * tavor_mtt_refcnt_dec() 2767 * Context: Can be called from interrupt or base context. 2768 */ 2769 static int 2770 tavor_mtt_refcnt_dec(tavor_rsrc_t *rsrc) 2771 { 2772 tavor_sw_refcnt_t *rc; 2773 uint32_t cnt; 2774 2775 rc = (tavor_sw_refcnt_t *)rsrc->tr_addr; 2776 2777 /* Decrement the MTT's reference count */ 2778 mutex_enter(&rc->swrc_lock); 2779 cnt = --rc->swrc_refcnt; 2780 TNF_PROBE_1_DEBUG(tavor_mtt_refcnt_dec, TAVOR_TNF_TRACE, "", 2781 tnf_uint, refcnt, rc->swrc_refcnt); 2782 mutex_exit(&rc->swrc_lock); 2783 2784 return (cnt); 2785 } 2786