1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * tavor_mr.c 28 * Tavor Memory Region/Window Routines 29 * 30 * Implements all the routines necessary to provide the requisite memory 31 * registration verbs. These include operations like RegisterMemRegion(), 32 * DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion, 33 * etc., that affect Memory Regions. It also includes the verbs that 34 * affect Memory Windows, including AllocMemWindow(), FreeMemWindow(), 35 * and QueryMemWindow(). 36 */ 37 38 #include <sys/types.h> 39 #include <sys/conf.h> 40 #include <sys/ddi.h> 41 #include <sys/sunddi.h> 42 #include <sys/modctl.h> 43 #include <sys/esunddi.h> 44 45 #include <sys/ib/adapters/tavor/tavor.h> 46 47 48 /* 49 * Used by tavor_mr_keycalc() below to fill in the "unconstrained" portion 50 * of Tavor memory keys (LKeys and RKeys) 51 */ 52 static uint_t tavor_debug_memkey_cnt = 0x00000000; 53 54 static int tavor_mr_common_reg(tavor_state_t *state, tavor_pdhdl_t pd, 55 tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op); 56 static int tavor_mr_common_rereg(tavor_state_t *state, tavor_mrhdl_t mr, 57 tavor_pdhdl_t pd, tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl_new, 58 tavor_mr_options_t *op); 59 static int tavor_mr_rereg_xlat_helper(tavor_state_t *state, tavor_mrhdl_t mr, 60 tavor_bind_info_t *bind, tavor_mr_options_t *op, uint64_t *mtt_addr, 61 uint_t sleep, uint_t *dereg_level); 62 static uint64_t tavor_mr_nummtt_needed(tavor_state_t *state, 63 tavor_bind_info_t *bind, uint_t *mtt_pgsize); 64 static int tavor_mr_mem_bind(tavor_state_t *state, tavor_bind_info_t *bind, 65 ddi_dma_handle_t dmahdl, uint_t sleep); 66 static void tavor_mr_mem_unbind(tavor_state_t *state, 67 tavor_bind_info_t *bind); 68 static int tavor_mr_fast_mtt_write(tavor_rsrc_t *mtt, tavor_bind_info_t *bind, 69 uint32_t mtt_pgsize_bits); 70 static int tavor_mtt_refcnt_inc(tavor_rsrc_t *rsrc); 71 static int tavor_mtt_refcnt_dec(tavor_rsrc_t *rsrc); 72 73 /* 74 * The Tavor umem_lockmemory() callback ops. When userland memory is 75 * registered, these callback ops are specified. The tavor_umap_umemlock_cb() 76 * callback will be called whenever the memory for the corresponding 77 * ddi_umem_cookie_t is being freed. 78 */ 79 static struct umem_callback_ops tavor_umem_cbops = { 80 UMEM_CALLBACK_VERSION, 81 tavor_umap_umemlock_cb, 82 }; 83 84 85 /* 86 * tavor_mr_register() 87 * Context: Can be called from interrupt or base context. 88 */ 89 int 90 tavor_mr_register(tavor_state_t *state, tavor_pdhdl_t pd, 91 ibt_mr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op) 92 { 93 tavor_bind_info_t bind; 94 int status; 95 96 /* 97 * Fill in the "bind" struct. This struct provides the majority 98 * of the information that will be used to distinguish between an 99 * "addr" binding (as is the case here) and a "buf" binding (see 100 * below). The "bind" struct is later passed to tavor_mr_mem_bind() 101 * which does most of the "heavy lifting" for the Tavor memory 102 * registration routines. 103 */ 104 bind.bi_type = TAVOR_BINDHDL_VADDR; 105 bind.bi_addr = mr_attr->mr_vaddr; 106 bind.bi_len = mr_attr->mr_len; 107 bind.bi_as = mr_attr->mr_as; 108 bind.bi_flags = mr_attr->mr_flags; 109 status = tavor_mr_common_reg(state, pd, &bind, mrhdl, op); 110 111 return (status); 112 } 113 114 115 /* 116 * tavor_mr_register_buf() 117 * Context: Can be called from interrupt or base context. 118 */ 119 int 120 tavor_mr_register_buf(tavor_state_t *state, tavor_pdhdl_t pd, 121 ibt_smr_attr_t *mr_attr, struct buf *buf, tavor_mrhdl_t *mrhdl, 122 tavor_mr_options_t *op) 123 { 124 tavor_bind_info_t bind; 125 int status; 126 127 /* 128 * Fill in the "bind" struct. This struct provides the majority 129 * of the information that will be used to distinguish between an 130 * "addr" binding (see above) and a "buf" binding (as is the case 131 * here). The "bind" struct is later passed to tavor_mr_mem_bind() 132 * which does most of the "heavy lifting" for the Tavor memory 133 * registration routines. Note: We have chosen to provide 134 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is 135 * not set). It is not critical what value we choose here as it need 136 * only be unique for the given RKey (which will happen by default), 137 * so the choice here is somewhat arbitrary. 138 */ 139 bind.bi_type = TAVOR_BINDHDL_BUF; 140 bind.bi_buf = buf; 141 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) { 142 bind.bi_addr = mr_attr->mr_vaddr; 143 } else { 144 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr; 145 } 146 bind.bi_as = NULL; 147 bind.bi_len = (uint64_t)buf->b_bcount; 148 bind.bi_flags = mr_attr->mr_flags; 149 status = tavor_mr_common_reg(state, pd, &bind, mrhdl, op); 150 151 return (status); 152 } 153 154 155 /* 156 * tavor_mr_register_shared() 157 * Context: Can be called from interrupt or base context. 158 */ 159 int 160 tavor_mr_register_shared(tavor_state_t *state, tavor_mrhdl_t mrhdl, 161 tavor_pdhdl_t pd, ibt_smr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl_new) 162 { 163 tavor_rsrc_pool_info_t *rsrc_pool; 164 tavor_rsrc_t *mpt, *mtt, *rsrc; 165 tavor_umap_db_entry_t *umapdb; 166 tavor_hw_mpt_t mpt_entry; 167 tavor_mrhdl_t mr; 168 tavor_bind_info_t *bind; 169 ddi_umem_cookie_t umem_cookie; 170 size_t umem_len; 171 caddr_t umem_addr; 172 uint64_t mtt_addr, mtt_ddrbaseaddr, pgsize_msk; 173 uint_t sleep, mr_is_umem; 174 int status, umem_flags; 175 176 /* 177 * Check the sleep flag. Ensure that it is consistent with the 178 * current thread context (i.e. if we are currently in the interrupt 179 * context, then we shouldn't be attempting to sleep). 180 */ 181 sleep = (mr_attr->mr_flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP : 182 TAVOR_SLEEP; 183 if ((sleep == TAVOR_SLEEP) && 184 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 185 goto mrshared_fail; 186 } 187 188 /* Increment the reference count on the protection domain (PD) */ 189 tavor_pd_refcnt_inc(pd); 190 191 /* 192 * Allocate an MPT entry. This will be filled in with all the 193 * necessary parameters to define the shared memory region. 194 * Specifically, it will be made to reference the currently existing 195 * MTT entries and ownership of the MPT will be passed to the hardware 196 * in the last step below. If we fail here, we must undo the 197 * protection domain reference count. 198 */ 199 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 200 if (status != DDI_SUCCESS) { 201 goto mrshared_fail1; 202 } 203 204 /* 205 * Allocate the software structure for tracking the shared memory 206 * region (i.e. the Tavor Memory Region handle). If we fail here, we 207 * must undo the protection domain reference count and the previous 208 * resource allocation. 209 */ 210 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 211 if (status != DDI_SUCCESS) { 212 goto mrshared_fail2; 213 } 214 mr = (tavor_mrhdl_t)rsrc->tr_addr; 215 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 216 217 /* 218 * Setup and validate the memory region access flags. This means 219 * translating the IBTF's enable flags into the access flags that 220 * will be used in later operations. 221 */ 222 mr->mr_accflag = 0; 223 if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND) 224 mr->mr_accflag |= IBT_MR_WINDOW_BIND; 225 if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE) 226 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 227 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ) 228 mr->mr_accflag |= IBT_MR_REMOTE_READ; 229 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE) 230 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 231 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 232 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 233 234 /* 235 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 236 * from a certain number of "constrained" bits (the least significant 237 * bits) and some number of "unconstrained" bits. The constrained 238 * bits must be set to the index of the entry in the MPT table, but 239 * the unconstrained bits can be set to any value we wish. Note: 240 * if no remote access is required, then the RKey value is not filled 241 * in. Otherwise both Rkey and LKey are given the same value. 242 */ 243 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 244 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 245 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 246 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 247 mr->mr_rkey = mr->mr_lkey; 248 } 249 250 /* Grab the MR lock for the current memory region */ 251 mutex_enter(&mrhdl->mr_lock); 252 253 /* 254 * Check here to see if the memory region has already been partially 255 * deregistered as a result of a tavor_umap_umemlock_cb() callback. 256 * If so, this is an error, return failure. 257 */ 258 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) { 259 mutex_exit(&mrhdl->mr_lock); 260 goto mrshared_fail3; 261 } 262 263 /* 264 * Determine if the original memory was from userland and, if so, pin 265 * the pages (again) with umem_lockmemory(). This will guarantee a 266 * separate callback for each of this shared region's MR handles. 267 * If this is userland memory, then allocate an entry in the 268 * "userland resources database". This will later be added to 269 * the database (after all further memory registration operations are 270 * successful). If we fail here, we must undo all the above setup. 271 */ 272 mr_is_umem = mrhdl->mr_is_umem; 273 if (mr_is_umem) { 274 umem_len = ptob(btopr(mrhdl->mr_bindinfo.bi_len + 275 ((uintptr_t)mrhdl->mr_bindinfo.bi_addr & PAGEOFFSET))); 276 umem_addr = (caddr_t)((uintptr_t)mrhdl->mr_bindinfo.bi_addr & 277 ~PAGEOFFSET); 278 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ | 279 DDI_UMEMLOCK_LONGTERM); 280 status = umem_lockmemory(umem_addr, umem_len, umem_flags, 281 &umem_cookie, &tavor_umem_cbops, NULL); 282 if (status != 0) { 283 mutex_exit(&mrhdl->mr_lock); 284 goto mrshared_fail3; 285 } 286 287 umapdb = tavor_umap_db_alloc(state->ts_instance, 288 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC, 289 (uint64_t)(uintptr_t)rsrc); 290 if (umapdb == NULL) { 291 mutex_exit(&mrhdl->mr_lock); 292 goto mrshared_fail4; 293 } 294 } 295 296 /* 297 * Copy the MTT resource pointer (and additional parameters) from 298 * the original Tavor Memory Region handle. Note: this is normally 299 * where the tavor_mr_mem_bind() routine would be called, but because 300 * we already have bound and filled-in MTT entries it is simply a 301 * matter here of managing the MTT reference count and grabbing the 302 * address of the MTT table entries (for filling in the shared region's 303 * MPT entry). 304 */ 305 mr->mr_mttrsrcp = mrhdl->mr_mttrsrcp; 306 mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz; 307 mr->mr_bindinfo = mrhdl->mr_bindinfo; 308 mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp; 309 mutex_exit(&mrhdl->mr_lock); 310 bind = &mr->mr_bindinfo; 311 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 312 mtt = mr->mr_mttrsrcp; 313 314 /* 315 * Increment the MTT reference count (to reflect the fact that 316 * the MTT is now shared) 317 */ 318 (void) tavor_mtt_refcnt_inc(mr->mr_mttrefcntp); 319 320 /* 321 * Update the new "bind" virtual address. Do some extra work here 322 * to ensure proper alignment. That is, make sure that the page 323 * offset for the beginning of the old range is the same as the 324 * offset for this new mapping 325 */ 326 pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1); 327 bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) | 328 (mr->mr_bindinfo.bi_addr & pgsize_msk)); 329 330 /* 331 * Get the base address for the MTT table. This will be necessary 332 * in the next step when we are setting up the MPT entry. 333 */ 334 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; 335 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; 336 337 /* 338 * Fill in the MPT entry. This is the final step before passing 339 * ownership of the MPT entry to the Tavor hardware. We use all of 340 * the information collected/calculated above to fill in the 341 * requisite portions of the MPT. 342 */ 343 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 344 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE; 345 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; 346 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 347 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 348 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 349 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 350 mpt_entry.lr = 1; 351 mpt_entry.reg_win = TAVOR_MPT_IS_REGION; 352 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 353 mpt_entry.mem_key = mr->mr_lkey; 354 mpt_entry.pd = pd->pd_pdnum; 355 mpt_entry.start_addr = bind->bi_addr; 356 mpt_entry.reg_win_len = bind->bi_len; 357 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND; 358 mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT); 359 mpt_entry.mttseg_addr_h = mtt_addr >> 32; 360 mpt_entry.mttseg_addr_l = mtt_addr >> 6; 361 362 /* 363 * Write the MPT entry to hardware. Lastly, we pass ownership of 364 * the entry to the hardware. Note: in general, this operation 365 * shouldn't fail. But if it does, we have to undo everything we've 366 * done above before returning error. 367 */ 368 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 369 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 370 if (status != TAVOR_CMD_SUCCESS) { 371 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 372 status); 373 goto mrshared_fail5; 374 } 375 376 /* 377 * Fill in the rest of the Tavor Memory Region handle. Having 378 * successfully transferred ownership of the MPT, we can update the 379 * following fields for use in further operations on the MR. 380 */ 381 mr->mr_mptrsrcp = mpt; 382 mr->mr_mttrsrcp = mtt; 383 mr->mr_pdhdl = pd; 384 mr->mr_rsrcp = rsrc; 385 mr->mr_is_umem = mr_is_umem; 386 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL; 387 mr->mr_umem_cbfunc = NULL; 388 mr->mr_umem_cbarg1 = NULL; 389 mr->mr_umem_cbarg2 = NULL; 390 391 /* 392 * If this is userland memory, then we need to insert the previously 393 * allocated entry into the "userland resources database". This will 394 * allow for later coordination between the tavor_umap_umemlock_cb() 395 * callback and tavor_mr_deregister(). 396 */ 397 if (mr_is_umem) { 398 tavor_umap_db_add(umapdb); 399 } 400 401 *mrhdl_new = mr; 402 403 return (DDI_SUCCESS); 404 405 /* 406 * The following is cleanup for all possible failure cases in this routine 407 */ 408 mrshared_fail5: 409 (void) tavor_mtt_refcnt_dec(mr->mr_mttrefcntp); 410 if (mr_is_umem) { 411 tavor_umap_db_free(umapdb); 412 } 413 mrshared_fail4: 414 if (mr_is_umem) { 415 ddi_umem_unlock(umem_cookie); 416 } 417 mrshared_fail3: 418 tavor_rsrc_free(state, &rsrc); 419 mrshared_fail2: 420 tavor_rsrc_free(state, &mpt); 421 mrshared_fail1: 422 tavor_pd_refcnt_dec(pd); 423 mrshared_fail: 424 return (status); 425 } 426 427 428 /* 429 * tavor_mr_deregister() 430 * Context: Can be called from interrupt or base context. 431 */ 432 /* ARGSUSED */ 433 int 434 tavor_mr_deregister(tavor_state_t *state, tavor_mrhdl_t *mrhdl, uint_t level, 435 uint_t sleep) 436 { 437 tavor_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; 438 tavor_umap_db_entry_t *umapdb; 439 tavor_pdhdl_t pd; 440 tavor_mrhdl_t mr; 441 tavor_bind_info_t *bind; 442 uint64_t value; 443 int status, shared_mtt; 444 445 /* 446 * Check the sleep flag. Ensure that it is consistent with the 447 * current thread context (i.e. if we are currently in the interrupt 448 * context, then we shouldn't be attempting to sleep). 449 */ 450 if ((sleep == TAVOR_SLEEP) && 451 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 452 return (status); 453 } 454 455 /* 456 * Pull all the necessary information from the Tavor Memory Region 457 * handle. This is necessary here because the resource for the 458 * MR handle is going to be freed up as part of the this 459 * deregistration 460 */ 461 mr = *mrhdl; 462 mutex_enter(&mr->mr_lock); 463 mpt = mr->mr_mptrsrcp; 464 mtt = mr->mr_mttrsrcp; 465 mtt_refcnt = mr->mr_mttrefcntp; 466 rsrc = mr->mr_rsrcp; 467 pd = mr->mr_pdhdl; 468 bind = &mr->mr_bindinfo; 469 470 /* 471 * Check here to see if the memory region has already been partially 472 * deregistered as a result of the tavor_umap_umemlock_cb() callback. 473 * If so, then jump to the end and free the remaining resources. 474 */ 475 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 476 goto mrdereg_finish_cleanup; 477 } 478 479 /* 480 * We must drop the "mr_lock" here to ensure that both SLEEP and 481 * NOSLEEP calls into the firmware work as expected. Also, if two 482 * threads are attemping to access this MR (via de-register, 483 * re-register, or otherwise), then we allow the firmware to enforce 484 * the checking, that only one deregister is valid. 485 */ 486 mutex_exit(&mr->mr_lock); 487 488 /* 489 * Reclaim MPT entry from hardware (if necessary). Since the 490 * tavor_mr_deregister() routine is used in the memory region 491 * reregistration process as well, it is possible that we will 492 * not always wish to reclaim ownership of the MPT. Check the 493 * "level" arg and, if necessary, attempt to reclaim it. If 494 * the ownership transfer fails for any reason, we check to see 495 * what command status was returned from the hardware. The only 496 * "expected" error status is the one that indicates an attempt to 497 * deregister a memory region that has memory windows bound to it 498 */ 499 if (level >= TAVOR_MR_DEREG_ALL) { 500 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, 501 NULL, 0, mpt->tr_indx, sleep); 502 if (status != TAVOR_CMD_SUCCESS) { 503 if (status == TAVOR_CMD_REG_BOUND) { 504 return (IBT_MR_IN_USE); 505 } else { 506 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command " 507 "failed: %08x\n", status); 508 return (IBT_INVALID_PARAM); 509 } 510 } 511 } 512 513 /* 514 * Re-grab the mr_lock here. Since further access to the protected 515 * 'mr' structure is needed, and we would have returned previously for 516 * the multiple deregistration case, we can safely grab the lock here. 517 */ 518 mutex_enter(&mr->mr_lock); 519 520 /* 521 * If the memory had come from userland, then we do a lookup in the 522 * "userland resources database". On success, we free the entry, call 523 * ddi_umem_unlock(), and continue the cleanup. On failure (which is 524 * an indication that the umem_lockmemory() callback has called 525 * tavor_mr_deregister()), we call ddi_umem_unlock() and invalidate 526 * the "mr_umemcookie" field in the MR handle (this will be used 527 * later to detect that only partial cleaup still remains to be done 528 * on the MR handle). 529 */ 530 if (mr->mr_is_umem) { 531 status = tavor_umap_db_find(state->ts_instance, 532 (uint64_t)(uintptr_t)mr->mr_umemcookie, 533 MLNX_UMAP_MRMEM_RSRC, &value, TAVOR_UMAP_DB_REMOVE, 534 &umapdb); 535 if (status == DDI_SUCCESS) { 536 tavor_umap_db_free(umapdb); 537 ddi_umem_unlock(mr->mr_umemcookie); 538 } else { 539 ddi_umem_unlock(mr->mr_umemcookie); 540 mr->mr_umemcookie = NULL; 541 } 542 } 543 544 /* mtt_refcnt is NULL in the case of tavor_dma_mr_register() */ 545 if (mtt_refcnt != NULL) { 546 /* 547 * Decrement the MTT reference count. Since the MTT resource 548 * may be shared between multiple memory regions (as a result 549 * of a "RegisterSharedMR" verb) it is important that we not 550 * free up or unbind resources prematurely. If it's not shared 551 * (as indicated by the return status), then free the resource. 552 */ 553 shared_mtt = tavor_mtt_refcnt_dec(mtt_refcnt); 554 if (!shared_mtt) { 555 tavor_rsrc_free(state, &mtt_refcnt); 556 } 557 558 /* 559 * Free up the MTT entries and unbind the memory. Here, 560 * as above, we attempt to free these resources only if 561 * it is appropriate to do so. 562 */ 563 if (!shared_mtt) { 564 if (level >= TAVOR_MR_DEREG_NO_HW2SW_MPT) { 565 tavor_mr_mem_unbind(state, bind); 566 } 567 tavor_rsrc_free(state, &mtt); 568 } 569 } 570 571 /* 572 * If the MR handle has been invalidated, then drop the 573 * lock and return success. Note: This only happens because 574 * the umem_lockmemory() callback has been triggered. The 575 * cleanup here is partial, and further cleanup (in a 576 * subsequent tavor_mr_deregister() call) will be necessary. 577 */ 578 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 579 mutex_exit(&mr->mr_lock); 580 return (DDI_SUCCESS); 581 } 582 583 mrdereg_finish_cleanup: 584 mutex_exit(&mr->mr_lock); 585 586 /* Free the Tavor Memory Region handle */ 587 tavor_rsrc_free(state, &rsrc); 588 589 /* Free up the MPT entry resource */ 590 tavor_rsrc_free(state, &mpt); 591 592 /* Decrement the reference count on the protection domain (PD) */ 593 tavor_pd_refcnt_dec(pd); 594 595 /* Set the mrhdl pointer to NULL and return success */ 596 *mrhdl = NULL; 597 598 return (DDI_SUCCESS); 599 } 600 601 602 /* 603 * tavor_mr_query() 604 * Context: Can be called from interrupt or base context. 605 */ 606 /* ARGSUSED */ 607 int 608 tavor_mr_query(tavor_state_t *state, tavor_mrhdl_t mr, 609 ibt_mr_query_attr_t *attr) 610 { 611 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr)) 612 613 mutex_enter(&mr->mr_lock); 614 615 /* 616 * Check here to see if the memory region has already been partially 617 * deregistered as a result of a tavor_umap_umemlock_cb() callback. 618 * If so, this is an error, return failure. 619 */ 620 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 621 mutex_exit(&mr->mr_lock); 622 return (IBT_MR_HDL_INVALID); 623 } 624 625 /* Fill in the queried attributes */ 626 attr->mr_attr_flags = mr->mr_accflag; 627 attr->mr_pd = (ibt_pd_hdl_t)mr->mr_pdhdl; 628 629 /* Fill in the "local" attributes */ 630 attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey; 631 attr->mr_lbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr; 632 attr->mr_lbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len; 633 634 /* 635 * Fill in the "remote" attributes (if necessary). Note: the 636 * remote attributes are only valid if the memory region has one 637 * or more of the remote access flags set. 638 */ 639 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 640 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 641 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 642 attr->mr_rkey = (ibt_rkey_t)mr->mr_rkey; 643 attr->mr_rbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr; 644 attr->mr_rbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len; 645 } 646 647 /* 648 * If region is mapped for streaming (i.e. noncoherent), then set sync 649 * is required 650 */ 651 attr->mr_sync_required = (mr->mr_bindinfo.bi_flags & 652 IBT_MR_NONCOHERENT) ? B_TRUE : B_FALSE; 653 654 mutex_exit(&mr->mr_lock); 655 return (DDI_SUCCESS); 656 } 657 658 659 /* 660 * tavor_mr_reregister() 661 * Context: Can be called from interrupt or base context. 662 */ 663 int 664 tavor_mr_reregister(tavor_state_t *state, tavor_mrhdl_t mr, 665 tavor_pdhdl_t pd, ibt_mr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl_new, 666 tavor_mr_options_t *op) 667 { 668 tavor_bind_info_t bind; 669 int status; 670 671 /* 672 * Fill in the "bind" struct. This struct provides the majority 673 * of the information that will be used to distinguish between an 674 * "addr" binding (as is the case here) and a "buf" binding (see 675 * below). The "bind" struct is later passed to tavor_mr_mem_bind() 676 * which does most of the "heavy lifting" for the Tavor memory 677 * registration (and reregistration) routines. 678 */ 679 bind.bi_type = TAVOR_BINDHDL_VADDR; 680 bind.bi_addr = mr_attr->mr_vaddr; 681 bind.bi_len = mr_attr->mr_len; 682 bind.bi_as = mr_attr->mr_as; 683 bind.bi_flags = mr_attr->mr_flags; 684 status = tavor_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op); 685 686 return (status); 687 } 688 689 690 /* 691 * tavor_mr_reregister_buf() 692 * Context: Can be called from interrupt or base context. 693 */ 694 int 695 tavor_mr_reregister_buf(tavor_state_t *state, tavor_mrhdl_t mr, 696 tavor_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf, 697 tavor_mrhdl_t *mrhdl_new, tavor_mr_options_t *op) 698 { 699 tavor_bind_info_t bind; 700 int status; 701 702 /* 703 * Fill in the "bind" struct. This struct provides the majority 704 * of the information that will be used to distinguish between an 705 * "addr" binding (see above) and a "buf" binding (as is the case 706 * here). The "bind" struct is later passed to tavor_mr_mem_bind() 707 * which does most of the "heavy lifting" for the Tavor memory 708 * registration routines. Note: We have chosen to provide 709 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is 710 * not set). It is not critical what value we choose here as it need 711 * only be unique for the given RKey (which will happen by default), 712 * so the choice here is somewhat arbitrary. 713 */ 714 bind.bi_type = TAVOR_BINDHDL_BUF; 715 bind.bi_buf = buf; 716 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) { 717 bind.bi_addr = mr_attr->mr_vaddr; 718 } else { 719 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr; 720 } 721 bind.bi_len = (uint64_t)buf->b_bcount; 722 bind.bi_flags = mr_attr->mr_flags; 723 bind.bi_as = NULL; 724 status = tavor_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op); 725 726 return (status); 727 } 728 729 730 /* 731 * tavor_mr_sync() 732 * Context: Can be called from interrupt or base context. 733 */ 734 /* ARGSUSED */ 735 int 736 tavor_mr_sync(tavor_state_t *state, ibt_mr_sync_t *mr_segs, size_t num_segs) 737 { 738 tavor_mrhdl_t mrhdl; 739 uint64_t seg_vaddr, seg_len, seg_end; 740 uint64_t mr_start, mr_end; 741 uint_t type; 742 int status, i; 743 744 /* Process each of the ibt_mr_sync_t's */ 745 for (i = 0; i < num_segs; i++) { 746 mrhdl = (tavor_mrhdl_t)mr_segs[i].ms_handle; 747 748 /* Check for valid memory region handle */ 749 if (mrhdl == NULL) { 750 goto mrsync_fail; 751 } 752 753 mutex_enter(&mrhdl->mr_lock); 754 755 /* 756 * Check here to see if the memory region has already been 757 * partially deregistered as a result of a 758 * tavor_umap_umemlock_cb() callback. If so, this is an 759 * error, return failure. 760 */ 761 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) { 762 mutex_exit(&mrhdl->mr_lock); 763 goto mrsync_fail; 764 } 765 766 /* Check for valid bounds on sync request */ 767 seg_vaddr = mr_segs[i].ms_vaddr; 768 seg_len = mr_segs[i].ms_len; 769 seg_end = seg_vaddr + seg_len - 1; 770 mr_start = mrhdl->mr_bindinfo.bi_addr; 771 mr_end = mr_start + mrhdl->mr_bindinfo.bi_len - 1; 772 if ((seg_vaddr < mr_start) || (seg_vaddr > mr_end)) { 773 mutex_exit(&mrhdl->mr_lock); 774 goto mrsync_fail; 775 } 776 if ((seg_end < mr_start) || (seg_end > mr_end)) { 777 mutex_exit(&mrhdl->mr_lock); 778 goto mrsync_fail; 779 } 780 781 /* Determine what type (i.e. direction) for sync */ 782 if (mr_segs[i].ms_flags & IBT_SYNC_READ) { 783 type = DDI_DMA_SYNC_FORDEV; 784 } else if (mr_segs[i].ms_flags & IBT_SYNC_WRITE) { 785 type = DDI_DMA_SYNC_FORCPU; 786 } else { 787 mutex_exit(&mrhdl->mr_lock); 788 goto mrsync_fail; 789 } 790 791 (void) ddi_dma_sync(mrhdl->mr_bindinfo.bi_dmahdl, 792 (off_t)(seg_vaddr - mr_start), (size_t)seg_len, type); 793 mutex_exit(&mrhdl->mr_lock); 794 } 795 796 return (DDI_SUCCESS); 797 798 mrsync_fail: 799 return (status); 800 } 801 802 803 /* 804 * tavor_mw_alloc() 805 * Context: Can be called from interrupt or base context. 806 */ 807 int 808 tavor_mw_alloc(tavor_state_t *state, tavor_pdhdl_t pd, ibt_mw_flags_t flags, 809 tavor_mwhdl_t *mwhdl) 810 { 811 tavor_rsrc_t *mpt, *rsrc; 812 tavor_hw_mpt_t mpt_entry; 813 tavor_mwhdl_t mw; 814 uint_t sleep; 815 int status; 816 817 /* 818 * Check the sleep flag. Ensure that it is consistent with the 819 * current thread context (i.e. if we are currently in the interrupt 820 * context, then we shouldn't be attempting to sleep). 821 */ 822 sleep = (flags & IBT_MW_NOSLEEP) ? TAVOR_NOSLEEP : TAVOR_SLEEP; 823 if ((sleep == TAVOR_SLEEP) && 824 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 825 goto mwalloc_fail; 826 } 827 828 /* Increment the reference count on the protection domain (PD) */ 829 tavor_pd_refcnt_inc(pd); 830 831 /* 832 * Allocate an MPT entry (for use as a memory window). Since the 833 * Tavor hardware uses the MPT entry for memory regions and for 834 * memory windows, we will fill in this MPT with all the necessary 835 * parameters for the memory window. And then (just as we do for 836 * memory regions) ownership will be passed to the hardware in the 837 * final step below. If we fail here, we must undo the protection 838 * domain reference count. 839 */ 840 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 841 if (status != DDI_SUCCESS) { 842 goto mwalloc_fail1; 843 } 844 845 /* 846 * Allocate the software structure for tracking the memory window (i.e. 847 * the Tavor Memory Window handle). Note: This is actually the same 848 * software structure used for tracking memory regions, but since many 849 * of the same properties are needed, only a single structure is 850 * necessary. If we fail here, we must undo the protection domain 851 * reference count and the previous resource allocation. 852 */ 853 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 854 if (status != DDI_SUCCESS) { 855 goto mwalloc_fail2; 856 } 857 mw = (tavor_mwhdl_t)rsrc->tr_addr; 858 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw)) 859 860 /* 861 * Calculate an "unbound" RKey from MPT index. In much the same way 862 * as we do for memory regions (above), this key is constructed from 863 * a "constrained" (which depends on the MPT index) and an 864 * "unconstrained" portion (which may be arbitrarily chosen). 865 */ 866 tavor_mr_keycalc(state, mpt->tr_indx, &mw->mr_rkey); 867 868 /* 869 * Fill in the MPT entry. This is the final step before passing 870 * ownership of the MPT entry to the Tavor hardware. We use all of 871 * the information collected/calculated above to fill in the 872 * requisite portions of the MPT. Note: fewer entries in the MPT 873 * entry are necessary to allocate a memory window. 874 */ 875 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 876 mpt_entry.reg_win = TAVOR_MPT_IS_WINDOW; 877 mpt_entry.mem_key = mw->mr_rkey; 878 mpt_entry.pd = pd->pd_pdnum; 879 880 /* 881 * Write the MPT entry to hardware. Lastly, we pass ownership of 882 * the entry to the hardware. Note: in general, this operation 883 * shouldn't fail. But if it does, we have to undo everything we've 884 * done above before returning error. 885 */ 886 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 887 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 888 if (status != TAVOR_CMD_SUCCESS) { 889 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 890 status); 891 goto mwalloc_fail3; 892 } 893 894 /* 895 * Fill in the rest of the Tavor Memory Window handle. Having 896 * successfully transferred ownership of the MPT, we can update the 897 * following fields for use in further operations on the MW. 898 */ 899 mw->mr_mptrsrcp = mpt; 900 mw->mr_pdhdl = pd; 901 mw->mr_rsrcp = rsrc; 902 *mwhdl = mw; 903 904 return (DDI_SUCCESS); 905 906 mwalloc_fail3: 907 tavor_rsrc_free(state, &rsrc); 908 mwalloc_fail2: 909 tavor_rsrc_free(state, &mpt); 910 mwalloc_fail1: 911 tavor_pd_refcnt_dec(pd); 912 mwalloc_fail: 913 return (status); 914 } 915 916 917 /* 918 * tavor_mw_free() 919 * Context: Can be called from interrupt or base context. 920 */ 921 int 922 tavor_mw_free(tavor_state_t *state, tavor_mwhdl_t *mwhdl, uint_t sleep) 923 { 924 tavor_rsrc_t *mpt, *rsrc; 925 tavor_mwhdl_t mw; 926 int status; 927 tavor_pdhdl_t pd; 928 929 /* 930 * Check the sleep flag. Ensure that it is consistent with the 931 * current thread context (i.e. if we are currently in the interrupt 932 * context, then we shouldn't be attempting to sleep). 933 */ 934 if ((sleep == TAVOR_SLEEP) && 935 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 936 return (status); 937 } 938 939 /* 940 * Pull all the necessary information from the Tavor Memory Window 941 * handle. This is necessary here because the resource for the 942 * MW handle is going to be freed up as part of the this operation. 943 */ 944 mw = *mwhdl; 945 mutex_enter(&mw->mr_lock); 946 mpt = mw->mr_mptrsrcp; 947 rsrc = mw->mr_rsrcp; 948 pd = mw->mr_pdhdl; 949 mutex_exit(&mw->mr_lock); 950 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw)) 951 952 /* 953 * Reclaim the MPT entry from hardware. Note: in general, it is 954 * unexpected for this operation to return an error. 955 */ 956 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL, 957 0, mpt->tr_indx, sleep); 958 if (status != TAVOR_CMD_SUCCESS) { 959 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command failed: %08x\n", 960 status); 961 return (IBT_INVALID_PARAM); 962 } 963 964 /* Free the Tavor Memory Window handle */ 965 tavor_rsrc_free(state, &rsrc); 966 967 /* Free up the MPT entry resource */ 968 tavor_rsrc_free(state, &mpt); 969 970 /* Decrement the reference count on the protection domain (PD) */ 971 tavor_pd_refcnt_dec(pd); 972 973 /* Set the mwhdl pointer to NULL and return success */ 974 *mwhdl = NULL; 975 976 return (DDI_SUCCESS); 977 } 978 979 980 /* 981 * tavor_mr_keycalc() 982 * Context: Can be called from interrupt or base context. 983 */ 984 void 985 tavor_mr_keycalc(tavor_state_t *state, uint32_t indx, uint32_t *key) 986 { 987 uint32_t tmp, log_num_mpt; 988 989 /* 990 * Generate a simple key from counter. Note: We increment this 991 * static variable _intentionally_ without any kind of mutex around 992 * it. First, single-threading all operations through a single lock 993 * would be a bad idea (from a performance point-of-view). Second, 994 * the upper "unconstrained" bits don't really have to be unique 995 * because the lower bits are guaranteed to be (although we do make a 996 * best effort to ensure that they are). Third, the window for the 997 * race (where both threads read and update the counter at the same 998 * time) is incredibly small. 999 * And, lastly, we'd like to make this into a "random" key XXX 1000 */ 1001 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(tavor_debug_memkey_cnt)) 1002 log_num_mpt = state->ts_cfg_profile->cp_log_num_mpt; 1003 tmp = (tavor_debug_memkey_cnt++) << log_num_mpt; 1004 *key = tmp | indx; 1005 } 1006 1007 1008 /* 1009 * tavor_mr_common_reg() 1010 * Context: Can be called from interrupt or base context. 1011 */ 1012 static int 1013 tavor_mr_common_reg(tavor_state_t *state, tavor_pdhdl_t pd, 1014 tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op) 1015 { 1016 tavor_rsrc_pool_info_t *rsrc_pool; 1017 tavor_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; 1018 tavor_umap_db_entry_t *umapdb; 1019 tavor_sw_refcnt_t *swrc_tmp; 1020 tavor_hw_mpt_t mpt_entry; 1021 tavor_mrhdl_t mr; 1022 ibt_mr_flags_t flags; 1023 tavor_bind_info_t *bh; 1024 ddi_dma_handle_t bind_dmahdl; 1025 ddi_umem_cookie_t umem_cookie; 1026 size_t umem_len; 1027 caddr_t umem_addr; 1028 uint64_t mtt_addr, mtt_ddrbaseaddr, max_sz; 1029 uint_t sleep, mtt_pgsize_bits, bind_type, mr_is_umem; 1030 int status, umem_flags, bind_override_addr; 1031 1032 /* 1033 * Check the "options" flag. Currently this flag tells the driver 1034 * whether or not the region should be bound normally (i.e. with 1035 * entries written into the PCI IOMMU), whether it should be 1036 * registered to bypass the IOMMU, and whether or not the resulting 1037 * address should be "zero-based" (to aid the alignment restrictions 1038 * for QPs). 1039 */ 1040 if (op == NULL) { 1041 bind_type = TAVOR_BINDMEM_NORMAL; 1042 bind_dmahdl = NULL; 1043 bind_override_addr = 0; 1044 } else { 1045 bind_type = op->mro_bind_type; 1046 bind_dmahdl = op->mro_bind_dmahdl; 1047 bind_override_addr = op->mro_bind_override_addr; 1048 } 1049 1050 /* Extract the flags field from the tavor_bind_info_t */ 1051 flags = bind->bi_flags; 1052 1053 /* 1054 * Check for invalid length. Check is the length is zero or if the 1055 * length is larger than the maximum configured value. Return error 1056 * if it is. 1057 */ 1058 max_sz = ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_mrw_sz); 1059 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) { 1060 goto mrcommon_fail; 1061 } 1062 1063 /* 1064 * Check the sleep flag. Ensure that it is consistent with the 1065 * current thread context (i.e. if we are currently in the interrupt 1066 * context, then we shouldn't be attempting to sleep). 1067 */ 1068 sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP; 1069 if ((sleep == TAVOR_SLEEP) && 1070 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1071 goto mrcommon_fail; 1072 } 1073 1074 /* 1075 * Get the base address for the MTT table. This will be necessary 1076 * below when we are setting up the MPT entry. 1077 */ 1078 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; 1079 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; 1080 1081 /* Increment the reference count on the protection domain (PD) */ 1082 tavor_pd_refcnt_inc(pd); 1083 1084 /* 1085 * Allocate an MPT entry. This will be filled in with all the 1086 * necessary parameters to define the memory region. And then 1087 * ownership will be passed to the hardware in the final step 1088 * below. If we fail here, we must undo the protection domain 1089 * reference count. 1090 */ 1091 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 1092 if (status != DDI_SUCCESS) { 1093 goto mrcommon_fail1; 1094 } 1095 1096 /* 1097 * Allocate the software structure for tracking the memory region (i.e. 1098 * the Tavor Memory Region handle). If we fail here, we must undo 1099 * the protection domain reference count and the previous resource 1100 * allocation. 1101 */ 1102 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 1103 if (status != DDI_SUCCESS) { 1104 goto mrcommon_fail2; 1105 } 1106 mr = (tavor_mrhdl_t)rsrc->tr_addr; 1107 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 1108 1109 /* 1110 * Setup and validate the memory region access flags. This means 1111 * translating the IBTF's enable flags into the access flags that 1112 * will be used in later operations. 1113 */ 1114 mr->mr_accflag = 0; 1115 if (flags & IBT_MR_ENABLE_WINDOW_BIND) 1116 mr->mr_accflag |= IBT_MR_WINDOW_BIND; 1117 if (flags & IBT_MR_ENABLE_LOCAL_WRITE) 1118 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 1119 if (flags & IBT_MR_ENABLE_REMOTE_READ) 1120 mr->mr_accflag |= IBT_MR_REMOTE_READ; 1121 if (flags & IBT_MR_ENABLE_REMOTE_WRITE) 1122 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 1123 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 1124 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 1125 1126 /* 1127 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 1128 * from a certain number of "constrained" bits (the least significant 1129 * bits) and some number of "unconstrained" bits. The constrained 1130 * bits must be set to the index of the entry in the MPT table, but 1131 * the unconstrained bits can be set to any value we wish. Note: 1132 * if no remote access is required, then the RKey value is not filled 1133 * in. Otherwise both Rkey and LKey are given the same value. 1134 */ 1135 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 1136 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 1137 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 1138 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 1139 mr->mr_rkey = mr->mr_lkey; 1140 } 1141 1142 /* 1143 * Determine if the memory is from userland and pin the pages 1144 * with umem_lockmemory() if necessary. 1145 * Then, if this is userland memory, allocate an entry in the 1146 * "userland resources database". This will later be added to 1147 * the database (after all further memory registration operations are 1148 * successful). If we fail here, we must undo the reference counts 1149 * and the previous resource allocations. 1150 */ 1151 mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0); 1152 if (mr_is_umem) { 1153 umem_len = ptob(btopr(bind->bi_len + 1154 ((uintptr_t)bind->bi_addr & PAGEOFFSET))); 1155 umem_addr = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET); 1156 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ | 1157 DDI_UMEMLOCK_LONGTERM); 1158 status = umem_lockmemory(umem_addr, umem_len, umem_flags, 1159 &umem_cookie, &tavor_umem_cbops, NULL); 1160 if (status != 0) { 1161 goto mrcommon_fail3; 1162 } 1163 1164 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 1165 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind->bi_buf)) 1166 1167 bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len, 1168 B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP); 1169 if (bind->bi_buf == NULL) { 1170 goto mrcommon_fail3; 1171 } 1172 bind->bi_type = TAVOR_BINDHDL_UBUF; 1173 bind->bi_buf->b_flags |= B_READ; 1174 1175 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind->bi_buf)) 1176 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 1177 1178 umapdb = tavor_umap_db_alloc(state->ts_instance, 1179 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC, 1180 (uint64_t)(uintptr_t)rsrc); 1181 if (umapdb == NULL) { 1182 goto mrcommon_fail4; 1183 } 1184 } 1185 1186 /* 1187 * Setup the bindinfo for the mtt bind call 1188 */ 1189 bh = &mr->mr_bindinfo; 1190 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bh)) 1191 bcopy(bind, bh, sizeof (tavor_bind_info_t)); 1192 bh->bi_bypass = bind_type; 1193 status = tavor_mr_mtt_bind(state, bh, bind_dmahdl, &mtt, 1194 &mtt_pgsize_bits); 1195 if (status != DDI_SUCCESS) { 1196 /* 1197 * When mtt_bind fails, freerbuf has already been done, 1198 * so make sure not to call it again. 1199 */ 1200 bind->bi_type = bh->bi_type; 1201 goto mrcommon_fail5; 1202 } 1203 mr->mr_logmttpgsz = mtt_pgsize_bits; 1204 1205 /* 1206 * Allocate MTT reference count (to track shared memory regions). 1207 * This reference count resource may never be used on the given 1208 * memory region, but if it is ever later registered as "shared" 1209 * memory region then this resource will be necessary. If we fail 1210 * here, we do pretty much the same as above to clean up. 1211 */ 1212 status = tavor_rsrc_alloc(state, TAVOR_REFCNT, 1, sleep, 1213 &mtt_refcnt); 1214 if (status != DDI_SUCCESS) { 1215 goto mrcommon_fail6; 1216 } 1217 mr->mr_mttrefcntp = mtt_refcnt; 1218 swrc_tmp = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr; 1219 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp)) 1220 TAVOR_MTT_REFCNT_INIT(swrc_tmp); 1221 1222 /* 1223 * Fill in the MPT entry. This is the final step before passing 1224 * ownership of the MPT entry to the Tavor hardware. We use all of 1225 * the information collected/calculated above to fill in the 1226 * requisite portions of the MPT. 1227 */ 1228 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 1229 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE; 1230 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; 1231 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 1232 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 1233 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 1234 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 1235 mpt_entry.lr = 1; 1236 mpt_entry.reg_win = TAVOR_MPT_IS_REGION; 1237 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 1238 mpt_entry.mem_key = mr->mr_lkey; 1239 mpt_entry.pd = pd->pd_pdnum; 1240 if (bind_override_addr == 0) { 1241 mpt_entry.start_addr = bh->bi_addr; 1242 } else { 1243 bh->bi_addr = bh->bi_addr & ((1 << mr->mr_logmttpgsz) - 1); 1244 mpt_entry.start_addr = bh->bi_addr; 1245 } 1246 mpt_entry.reg_win_len = bh->bi_len; 1247 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND; 1248 mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT); 1249 mpt_entry.mttseg_addr_h = mtt_addr >> 32; 1250 mpt_entry.mttseg_addr_l = mtt_addr >> 6; 1251 1252 /* 1253 * Write the MPT entry to hardware. Lastly, we pass ownership of 1254 * the entry to the hardware. Note: in general, this operation 1255 * shouldn't fail. But if it does, we have to undo everything we've 1256 * done above before returning error. 1257 */ 1258 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 1259 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 1260 if (status != TAVOR_CMD_SUCCESS) { 1261 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 1262 status); 1263 goto mrcommon_fail7; 1264 } 1265 1266 /* 1267 * Fill in the rest of the Tavor Memory Region handle. Having 1268 * successfully transferred ownership of the MPT, we can update the 1269 * following fields for use in further operations on the MR. 1270 */ 1271 mr->mr_mptrsrcp = mpt; 1272 mr->mr_mttrsrcp = mtt; 1273 mr->mr_pdhdl = pd; 1274 mr->mr_rsrcp = rsrc; 1275 mr->mr_is_umem = mr_is_umem; 1276 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL; 1277 mr->mr_umem_cbfunc = NULL; 1278 mr->mr_umem_cbarg1 = NULL; 1279 mr->mr_umem_cbarg2 = NULL; 1280 1281 /* 1282 * If this is userland memory, then we need to insert the previously 1283 * allocated entry into the "userland resources database". This will 1284 * allow for later coordination between the tavor_umap_umemlock_cb() 1285 * callback and tavor_mr_deregister(). 1286 */ 1287 if (mr_is_umem) { 1288 tavor_umap_db_add(umapdb); 1289 } 1290 1291 *mrhdl = mr; 1292 1293 return (DDI_SUCCESS); 1294 1295 /* 1296 * The following is cleanup for all possible failure cases in this routine 1297 */ 1298 mrcommon_fail7: 1299 tavor_rsrc_free(state, &mtt_refcnt); 1300 mrcommon_fail6: 1301 tavor_rsrc_free(state, &mtt); 1302 tavor_mr_mem_unbind(state, bh); 1303 bind->bi_type = bh->bi_type; 1304 mrcommon_fail5: 1305 if (mr_is_umem) { 1306 tavor_umap_db_free(umapdb); 1307 } 1308 mrcommon_fail4: 1309 if (mr_is_umem) { 1310 /* 1311 * Free up the memory ddi_umem_iosetup() allocates 1312 * internally. 1313 */ 1314 if (bind->bi_type == TAVOR_BINDHDL_UBUF) { 1315 freerbuf(bind->bi_buf); 1316 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 1317 bind->bi_type = TAVOR_BINDHDL_NONE; 1318 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 1319 } 1320 ddi_umem_unlock(umem_cookie); 1321 } 1322 mrcommon_fail3: 1323 tavor_rsrc_free(state, &rsrc); 1324 mrcommon_fail2: 1325 tavor_rsrc_free(state, &mpt); 1326 mrcommon_fail1: 1327 tavor_pd_refcnt_dec(pd); 1328 mrcommon_fail: 1329 return (status); 1330 } 1331 1332 int 1333 tavor_dma_mr_register(tavor_state_t *state, tavor_pdhdl_t pd, 1334 ibt_dmr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl) 1335 { 1336 tavor_rsrc_t *mpt, *rsrc; 1337 tavor_hw_mpt_t mpt_entry; 1338 tavor_mrhdl_t mr; 1339 ibt_mr_flags_t flags; 1340 uint_t sleep; 1341 int status; 1342 1343 /* Extract the flags field */ 1344 flags = mr_attr->dmr_flags; 1345 1346 /* 1347 * Check the sleep flag. Ensure that it is consistent with the 1348 * current thread context (i.e. if we are currently in the interrupt 1349 * context, then we shouldn't be attempting to sleep). 1350 */ 1351 sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP; 1352 if ((sleep == TAVOR_SLEEP) && 1353 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1354 status = IBT_INVALID_PARAM; 1355 goto mrcommon_fail; 1356 } 1357 1358 /* Increment the reference count on the protection domain (PD) */ 1359 tavor_pd_refcnt_inc(pd); 1360 1361 /* 1362 * Allocate an MPT entry. This will be filled in with all the 1363 * necessary parameters to define the memory region. And then 1364 * ownership will be passed to the hardware in the final step 1365 * below. If we fail here, we must undo the protection domain 1366 * reference count. 1367 */ 1368 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 1369 if (status != DDI_SUCCESS) { 1370 status = IBT_INSUFF_RESOURCE; 1371 goto mrcommon_fail1; 1372 } 1373 1374 /* 1375 * Allocate the software structure for tracking the memory region (i.e. 1376 * the Tavor Memory Region handle). If we fail here, we must undo 1377 * the protection domain reference count and the previous resource 1378 * allocation. 1379 */ 1380 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 1381 if (status != DDI_SUCCESS) { 1382 status = IBT_INSUFF_RESOURCE; 1383 goto mrcommon_fail2; 1384 } 1385 mr = (tavor_mrhdl_t)rsrc->tr_addr; 1386 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 1387 bzero(mr, sizeof (*mr)); 1388 1389 /* 1390 * Setup and validate the memory region access flags. This means 1391 * translating the IBTF's enable flags into the access flags that 1392 * will be used in later operations. 1393 */ 1394 mr->mr_accflag = 0; 1395 if (flags & IBT_MR_ENABLE_WINDOW_BIND) 1396 mr->mr_accflag |= IBT_MR_WINDOW_BIND; 1397 if (flags & IBT_MR_ENABLE_LOCAL_WRITE) 1398 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 1399 if (flags & IBT_MR_ENABLE_REMOTE_READ) 1400 mr->mr_accflag |= IBT_MR_REMOTE_READ; 1401 if (flags & IBT_MR_ENABLE_REMOTE_WRITE) 1402 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 1403 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 1404 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 1405 1406 /* 1407 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 1408 * from a certain number of "constrained" bits (the least significant 1409 * bits) and some number of "unconstrained" bits. The constrained 1410 * bits must be set to the index of the entry in the MPT table, but 1411 * the unconstrained bits can be set to any value we wish. Note: 1412 * if no remote access is required, then the RKey value is not filled 1413 * in. Otherwise both Rkey and LKey are given the same value. 1414 */ 1415 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 1416 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 1417 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 1418 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 1419 mr->mr_rkey = mr->mr_lkey; 1420 } 1421 1422 /* 1423 * Fill in the MPT entry. This is the final step before passing 1424 * ownership of the MPT entry to the Tavor hardware. We use all of 1425 * the information collected/calculated above to fill in the 1426 * requisite portions of the MPT. 1427 */ 1428 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 1429 1430 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE; 1431 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; 1432 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 1433 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 1434 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 1435 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 1436 mpt_entry.lr = 1; 1437 mpt_entry.phys_addr = 1; /* critical bit for this */ 1438 mpt_entry.reg_win = TAVOR_MPT_IS_REGION; 1439 1440 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 1441 mpt_entry.mem_key = mr->mr_lkey; 1442 mpt_entry.pd = pd->pd_pdnum; 1443 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND; 1444 1445 mpt_entry.start_addr = mr_attr->dmr_paddr; 1446 mpt_entry.reg_win_len = mr_attr->dmr_len; 1447 1448 mpt_entry.mttseg_addr_h = 0; 1449 mpt_entry.mttseg_addr_l = 0; 1450 1451 /* 1452 * Write the MPT entry to hardware. Lastly, we pass ownership of 1453 * the entry to the hardware if needed. Note: in general, this 1454 * operation shouldn't fail. But if it does, we have to undo 1455 * everything we've done above before returning error. 1456 * 1457 * For Tavor, this routine (which is common to the contexts) will only 1458 * set the ownership if needed - the process of passing the context 1459 * itself to HW will take care of setting up the MPT (based on type 1460 * and index). 1461 */ 1462 1463 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 1464 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 1465 if (status != TAVOR_CMD_SUCCESS) { 1466 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 1467 status); 1468 status = ibc_get_ci_failure(0); 1469 goto mrcommon_fail7; 1470 } 1471 1472 /* 1473 * Fill in the rest of the Tavor Memory Region handle. Having 1474 * successfully transferred ownership of the MPT, we can update the 1475 * following fields for use in further operations on the MR. 1476 */ 1477 mr->mr_mptrsrcp = mpt; 1478 mr->mr_mttrsrcp = NULL; 1479 mr->mr_pdhdl = pd; 1480 mr->mr_rsrcp = rsrc; 1481 mr->mr_is_umem = 0; 1482 mr->mr_umemcookie = NULL; 1483 mr->mr_umem_cbfunc = NULL; 1484 mr->mr_umem_cbarg1 = NULL; 1485 mr->mr_umem_cbarg2 = NULL; 1486 1487 *mrhdl = mr; 1488 1489 return (DDI_SUCCESS); 1490 1491 /* 1492 * The following is cleanup for all possible failure cases in this routine 1493 */ 1494 mrcommon_fail7: 1495 tavor_rsrc_free(state, &rsrc); 1496 mrcommon_fail2: 1497 tavor_rsrc_free(state, &mpt); 1498 mrcommon_fail1: 1499 tavor_pd_refcnt_dec(pd); 1500 mrcommon_fail: 1501 return (status); 1502 } 1503 1504 /* 1505 * tavor_mr_mtt_bind() 1506 * Context: Can be called from interrupt or base context. 1507 */ 1508 int 1509 tavor_mr_mtt_bind(tavor_state_t *state, tavor_bind_info_t *bind, 1510 ddi_dma_handle_t bind_dmahdl, tavor_rsrc_t **mtt, uint_t *mtt_pgsize_bits) 1511 { 1512 uint64_t nummtt; 1513 uint_t sleep; 1514 int status; 1515 1516 /* 1517 * Check the sleep flag. Ensure that it is consistent with the 1518 * current thread context (i.e. if we are currently in the interrupt 1519 * context, then we shouldn't be attempting to sleep). 1520 */ 1521 sleep = (bind->bi_flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP; 1522 if ((sleep == TAVOR_SLEEP) && 1523 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1524 goto mrmttbind_fail; 1525 } 1526 1527 /* 1528 * Bind the memory and determine the mapped addresses. This is 1529 * the first of two routines that do all the "heavy lifting" for 1530 * the Tavor memory registration routines. The tavor_mr_mem_bind() 1531 * routine takes the "bind" struct with all its fields filled 1532 * in and returns a list of DMA cookies (for the PCI mapped addresses 1533 * corresponding to the specified address region) which are used by 1534 * the tavor_mr_fast_mtt_write() routine below. If we fail here, we 1535 * must undo all the previous resource allocation (and PD reference 1536 * count). 1537 */ 1538 status = tavor_mr_mem_bind(state, bind, bind_dmahdl, sleep); 1539 if (status != DDI_SUCCESS) { 1540 goto mrmttbind_fail; 1541 } 1542 1543 /* 1544 * Determine number of pages spanned. This routine uses the 1545 * information in the "bind" struct to determine the required 1546 * number of MTT entries needed (and returns the suggested page size - 1547 * as a "power-of-2" - for each MTT entry). 1548 */ 1549 nummtt = tavor_mr_nummtt_needed(state, bind, mtt_pgsize_bits); 1550 1551 /* 1552 * Allocate the MTT entries. Use the calculations performed above to 1553 * allocate the required number of MTT entries. Note: MTT entries are 1554 * allocated in "MTT segments" which consist of complete cachelines 1555 * (i.e. 8 entries, 16 entries, etc.) So the TAVOR_NUMMTT_TO_MTTSEG() 1556 * macro is used to do the proper conversion. If we fail here, we 1557 * must not only undo all the previous resource allocation (and PD 1558 * reference count), but we must also unbind the memory. 1559 */ 1560 status = tavor_rsrc_alloc(state, TAVOR_MTT, 1561 TAVOR_NUMMTT_TO_MTTSEG(nummtt), sleep, mtt); 1562 if (status != DDI_SUCCESS) { 1563 goto mrmttbind_fail2; 1564 } 1565 1566 /* 1567 * Write the mapped addresses into the MTT entries. This is part two 1568 * of the "heavy lifting" routines that we talked about above. Note: 1569 * we pass the suggested page size from the earlier operation here. 1570 * And if we fail here, we again do pretty much the same huge clean up. 1571 */ 1572 status = tavor_mr_fast_mtt_write(*mtt, bind, *mtt_pgsize_bits); 1573 if (status != DDI_SUCCESS) { 1574 goto mrmttbind_fail3; 1575 } 1576 return (DDI_SUCCESS); 1577 1578 /* 1579 * The following is cleanup for all possible failure cases in this routine 1580 */ 1581 mrmttbind_fail3: 1582 tavor_rsrc_free(state, mtt); 1583 mrmttbind_fail2: 1584 tavor_mr_mem_unbind(state, bind); 1585 mrmttbind_fail: 1586 return (status); 1587 } 1588 1589 1590 /* 1591 * tavor_mr_mtt_unbind() 1592 * Context: Can be called from interrupt or base context. 1593 */ 1594 int 1595 tavor_mr_mtt_unbind(tavor_state_t *state, tavor_bind_info_t *bind, 1596 tavor_rsrc_t *mtt) 1597 { 1598 /* 1599 * Free up the MTT entries and unbind the memory. Here, as above, we 1600 * attempt to free these resources only if it is appropriate to do so. 1601 */ 1602 tavor_mr_mem_unbind(state, bind); 1603 tavor_rsrc_free(state, &mtt); 1604 1605 return (DDI_SUCCESS); 1606 } 1607 1608 1609 /* 1610 * tavor_mr_common_rereg() 1611 * Context: Can be called from interrupt or base context. 1612 */ 1613 static int 1614 tavor_mr_common_rereg(tavor_state_t *state, tavor_mrhdl_t mr, 1615 tavor_pdhdl_t pd, tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl_new, 1616 tavor_mr_options_t *op) 1617 { 1618 tavor_rsrc_t *mpt; 1619 ibt_mr_attr_flags_t acc_flags_to_use; 1620 ibt_mr_flags_t flags; 1621 tavor_pdhdl_t pd_to_use; 1622 tavor_hw_mpt_t mpt_entry; 1623 uint64_t mtt_addr_to_use, vaddr_to_use, len_to_use; 1624 uint_t sleep, dereg_level; 1625 int status; 1626 1627 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 1628 1629 /* 1630 * Check here to see if the memory region corresponds to a userland 1631 * mapping. Reregistration of userland memory regions is not 1632 * currently supported. Return failure. XXX 1633 */ 1634 if (mr->mr_is_umem) { 1635 goto mrrereg_fail; 1636 } 1637 1638 mutex_enter(&mr->mr_lock); 1639 1640 /* Pull MPT resource pointer from the Tavor Memory Region handle */ 1641 mpt = mr->mr_mptrsrcp; 1642 1643 /* Extract the flags field from the tavor_bind_info_t */ 1644 flags = bind->bi_flags; 1645 1646 /* 1647 * Check the sleep flag. Ensure that it is consistent with the 1648 * current thread context (i.e. if we are currently in the interrupt 1649 * context, then we shouldn't be attempting to sleep). 1650 */ 1651 sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP; 1652 if ((sleep == TAVOR_SLEEP) && 1653 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1654 mutex_exit(&mr->mr_lock); 1655 goto mrrereg_fail; 1656 } 1657 1658 /* 1659 * First step is to temporarily invalidate the MPT entry. This 1660 * regains ownership from the hardware, and gives us the opportunity 1661 * to modify the entry. Note: The HW2SW_MPT command returns the 1662 * current MPT entry contents. These are saved away here because 1663 * they will be reused in a later step below. If the region has 1664 * bound memory windows that we fail returning an "in use" error code. 1665 * Otherwise, this is an unexpected error and we deregister the 1666 * memory region and return error. 1667 * 1668 * We use TAVOR_CMD_NOSLEEP_SPIN here always because we must protect 1669 * against holding the lock around this rereg call in all contexts. 1670 */ 1671 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, &mpt_entry, 1672 sizeof (tavor_hw_mpt_t), mpt->tr_indx, TAVOR_CMD_NOSLEEP_SPIN); 1673 if (status != TAVOR_CMD_SUCCESS) { 1674 mutex_exit(&mr->mr_lock); 1675 if (status == TAVOR_CMD_REG_BOUND) { 1676 return (IBT_MR_IN_USE); 1677 } else { 1678 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command failed: " 1679 "%08x\n", status); 1680 1681 /* 1682 * Call deregister and ensure that all current 1683 * resources get freed up 1684 */ 1685 if (tavor_mr_deregister(state, &mr, 1686 TAVOR_MR_DEREG_ALL, sleep) != DDI_SUCCESS) { 1687 TAVOR_WARNING(state, "failed to deregister " 1688 "memory region"); 1689 } 1690 return (ibc_get_ci_failure(0)); 1691 } 1692 } 1693 1694 /* 1695 * If we're changing the protection domain, then validate the new one 1696 */ 1697 if (flags & IBT_MR_CHANGE_PD) { 1698 1699 /* Check for valid PD handle pointer */ 1700 if (pd == NULL) { 1701 mutex_exit(&mr->mr_lock); 1702 /* 1703 * Call deregister and ensure that all current 1704 * resources get properly freed up. Unnecessary 1705 * here to attempt to regain software ownership 1706 * of the MPT entry as that has already been 1707 * done above. 1708 */ 1709 if (tavor_mr_deregister(state, &mr, 1710 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) != 1711 DDI_SUCCESS) { 1712 TAVOR_WARNING(state, "failed to deregister " 1713 "memory region"); 1714 } 1715 goto mrrereg_fail; 1716 } 1717 1718 /* Use the new PD handle in all operations below */ 1719 pd_to_use = pd; 1720 1721 } else { 1722 /* Use the current PD handle in all operations below */ 1723 pd_to_use = mr->mr_pdhdl; 1724 } 1725 1726 /* 1727 * If we're changing access permissions, then validate the new ones 1728 */ 1729 if (flags & IBT_MR_CHANGE_ACCESS) { 1730 /* 1731 * Validate the access flags. Both remote write and remote 1732 * atomic require the local write flag to be set 1733 */ 1734 if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) || 1735 (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) && 1736 !(flags & IBT_MR_ENABLE_LOCAL_WRITE)) { 1737 mutex_exit(&mr->mr_lock); 1738 /* 1739 * Call deregister and ensure that all current 1740 * resources get properly freed up. Unnecessary 1741 * here to attempt to regain software ownership 1742 * of the MPT entry as that has already been 1743 * done above. 1744 */ 1745 if (tavor_mr_deregister(state, &mr, 1746 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) != 1747 DDI_SUCCESS) { 1748 TAVOR_WARNING(state, "failed to deregister " 1749 "memory region"); 1750 } 1751 goto mrrereg_fail; 1752 } 1753 1754 /* 1755 * Setup and validate the memory region access flags. This 1756 * means translating the IBTF's enable flags into the access 1757 * flags that will be used in later operations. 1758 */ 1759 acc_flags_to_use = 0; 1760 if (flags & IBT_MR_ENABLE_WINDOW_BIND) 1761 acc_flags_to_use |= IBT_MR_WINDOW_BIND; 1762 if (flags & IBT_MR_ENABLE_LOCAL_WRITE) 1763 acc_flags_to_use |= IBT_MR_LOCAL_WRITE; 1764 if (flags & IBT_MR_ENABLE_REMOTE_READ) 1765 acc_flags_to_use |= IBT_MR_REMOTE_READ; 1766 if (flags & IBT_MR_ENABLE_REMOTE_WRITE) 1767 acc_flags_to_use |= IBT_MR_REMOTE_WRITE; 1768 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 1769 acc_flags_to_use |= IBT_MR_REMOTE_ATOMIC; 1770 1771 } else { 1772 acc_flags_to_use = mr->mr_accflag; 1773 } 1774 1775 /* 1776 * If we're modifying the translation, then figure out whether 1777 * we can reuse the current MTT resources. This means calling 1778 * tavor_mr_rereg_xlat_helper() which does most of the heavy lifting 1779 * for the reregistration. If the current memory region contains 1780 * sufficient MTT entries for the new regions, then it will be 1781 * reused and filled in. Otherwise, new entries will be allocated, 1782 * the old ones will be freed, and the new entries will be filled 1783 * in. Note: If we're not modifying the translation, then we 1784 * should already have all the information we need to update the MPT. 1785 * Also note: If tavor_mr_rereg_xlat_helper() fails, it will return 1786 * a "dereg_level" which is the level of cleanup that needs to be 1787 * passed to tavor_mr_deregister() to finish the cleanup. 1788 */ 1789 if (flags & IBT_MR_CHANGE_TRANSLATION) { 1790 status = tavor_mr_rereg_xlat_helper(state, mr, bind, op, 1791 &mtt_addr_to_use, sleep, &dereg_level); 1792 if (status != DDI_SUCCESS) { 1793 mutex_exit(&mr->mr_lock); 1794 /* 1795 * Call deregister and ensure that all resources get 1796 * properly freed up. 1797 */ 1798 if (tavor_mr_deregister(state, &mr, dereg_level, 1799 sleep) != DDI_SUCCESS) { 1800 TAVOR_WARNING(state, "failed to deregister " 1801 "memory region"); 1802 } 1803 1804 goto mrrereg_fail; 1805 } 1806 vaddr_to_use = mr->mr_bindinfo.bi_addr; 1807 len_to_use = mr->mr_bindinfo.bi_len; 1808 } else { 1809 mtt_addr_to_use = (((uint64_t)mpt_entry.mttseg_addr_h << 32) | 1810 ((uint64_t)mpt_entry.mttseg_addr_l << 6)); 1811 vaddr_to_use = mr->mr_bindinfo.bi_addr; 1812 len_to_use = mr->mr_bindinfo.bi_len; 1813 } 1814 1815 /* 1816 * Calculate new keys (Lkey, Rkey) from MPT index. Just like they were 1817 * when the region was first registered, each key is formed from 1818 * "constrained" bits and "unconstrained" bits. Note: If no remote 1819 * access is required, then the RKey value is not filled in. Otherwise 1820 * both Rkey and LKey are given the same value. 1821 */ 1822 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 1823 if ((acc_flags_to_use & IBT_MR_REMOTE_READ) || 1824 (acc_flags_to_use & IBT_MR_REMOTE_WRITE) || 1825 (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC)) { 1826 mr->mr_rkey = mr->mr_lkey; 1827 } 1828 1829 /* 1830 * Update the MPT entry with the new information. Some of this 1831 * information is retained from the previous operation, some of 1832 * it is new based on request. 1833 */ 1834 mpt_entry.en_bind = (acc_flags_to_use & IBT_MR_WINDOW_BIND) ? 1 : 0; 1835 mpt_entry.atomic = (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 1836 mpt_entry.rw = (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ? 1 : 0; 1837 mpt_entry.rr = (acc_flags_to_use & IBT_MR_REMOTE_READ) ? 1 : 0; 1838 mpt_entry.lw = (acc_flags_to_use & IBT_MR_LOCAL_WRITE) ? 1 : 0; 1839 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 1840 mpt_entry.mem_key = mr->mr_lkey; 1841 mpt_entry.pd = pd_to_use->pd_pdnum; 1842 mpt_entry.start_addr = vaddr_to_use; 1843 mpt_entry.reg_win_len = len_to_use; 1844 mpt_entry.mttseg_addr_h = mtt_addr_to_use >> 32; 1845 mpt_entry.mttseg_addr_l = mtt_addr_to_use >> 6; 1846 1847 /* 1848 * Write the updated MPT entry to hardware 1849 * 1850 * We use TAVOR_CMD_NOSLEEP_SPIN here always because we must protect 1851 * against holding the lock around this rereg call in all contexts. 1852 */ 1853 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 1854 sizeof (tavor_hw_mpt_t), mpt->tr_indx, TAVOR_CMD_NOSLEEP_SPIN); 1855 if (status != TAVOR_CMD_SUCCESS) { 1856 mutex_exit(&mr->mr_lock); 1857 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 1858 status); 1859 /* 1860 * Call deregister and ensure that all current resources get 1861 * properly freed up. Unnecessary here to attempt to regain 1862 * software ownership of the MPT entry as that has already 1863 * been done above. 1864 */ 1865 if (tavor_mr_deregister(state, &mr, 1866 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) { 1867 TAVOR_WARNING(state, "failed to deregister memory " 1868 "region"); 1869 } 1870 return (ibc_get_ci_failure(0)); 1871 } 1872 1873 /* 1874 * If we're changing PD, then update their reference counts now. 1875 * This means decrementing the reference count on the old PD and 1876 * incrementing the reference count on the new PD. 1877 */ 1878 if (flags & IBT_MR_CHANGE_PD) { 1879 tavor_pd_refcnt_dec(mr->mr_pdhdl); 1880 tavor_pd_refcnt_inc(pd); 1881 } 1882 1883 /* 1884 * Update the contents of the Tavor Memory Region handle to reflect 1885 * what has been changed. 1886 */ 1887 mr->mr_pdhdl = pd_to_use; 1888 mr->mr_accflag = acc_flags_to_use; 1889 mr->mr_is_umem = 0; 1890 mr->mr_umemcookie = NULL; 1891 1892 /* New MR handle is same as the old */ 1893 *mrhdl_new = mr; 1894 mutex_exit(&mr->mr_lock); 1895 1896 return (DDI_SUCCESS); 1897 1898 mrrereg_fail: 1899 return (status); 1900 } 1901 1902 1903 /* 1904 * tavor_mr_rereg_xlat_helper 1905 * Context: Can be called from interrupt or base context. 1906 * Note: This routine expects the "mr_lock" to be held when it 1907 * is called. Upon returning failure, this routine passes information 1908 * about what "dereg_level" should be passed to tavor_mr_deregister(). 1909 */ 1910 static int 1911 tavor_mr_rereg_xlat_helper(tavor_state_t *state, tavor_mrhdl_t mr, 1912 tavor_bind_info_t *bind, tavor_mr_options_t *op, uint64_t *mtt_addr, 1913 uint_t sleep, uint_t *dereg_level) 1914 { 1915 tavor_rsrc_pool_info_t *rsrc_pool; 1916 tavor_rsrc_t *mtt, *mtt_refcnt; 1917 tavor_sw_refcnt_t *swrc_old, *swrc_new; 1918 ddi_dma_handle_t dmahdl; 1919 uint64_t nummtt_needed, nummtt_in_currrsrc, max_sz; 1920 uint64_t mtt_ddrbaseaddr; 1921 uint_t mtt_pgsize_bits, bind_type, reuse_dmahdl; 1922 int status; 1923 1924 ASSERT(MUTEX_HELD(&mr->mr_lock)); 1925 1926 /* 1927 * Check the "options" flag. Currently this flag tells the driver 1928 * whether or not the region should be bound normally (i.e. with 1929 * entries written into the PCI IOMMU) or whether it should be 1930 * registered to bypass the IOMMU. 1931 */ 1932 if (op == NULL) { 1933 bind_type = TAVOR_BINDMEM_NORMAL; 1934 } else { 1935 bind_type = op->mro_bind_type; 1936 } 1937 1938 /* 1939 * Check for invalid length. Check is the length is zero or if the 1940 * length is larger than the maximum configured value. Return error 1941 * if it is. 1942 */ 1943 max_sz = ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_mrw_sz); 1944 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) { 1945 /* 1946 * Deregister will be called upon returning failure from this 1947 * routine. This will ensure that all current resources get 1948 * properly freed up. Unnecessary to attempt to regain 1949 * software ownership of the MPT entry as that has already 1950 * been done above (in tavor_mr_reregister()) 1951 */ 1952 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT; 1953 1954 goto mrrereghelp_fail; 1955 } 1956 1957 /* 1958 * Determine the number of pages necessary for new region and the 1959 * number of pages supported by the current MTT resources 1960 */ 1961 nummtt_needed = tavor_mr_nummtt_needed(state, bind, &mtt_pgsize_bits); 1962 nummtt_in_currrsrc = mr->mr_mttrsrcp->tr_len >> TAVOR_MTT_SIZE_SHIFT; 1963 1964 /* 1965 * Depending on whether we have enough pages or not, the next step is 1966 * to fill in a set of MTT entries that reflect the new mapping. In 1967 * the first case below, we already have enough entries. This means 1968 * we need to unbind the memory from the previous mapping, bind the 1969 * memory for the new mapping, write the new MTT entries, and update 1970 * the mr to reflect the changes. 1971 * In the second case below, we do not have enough entries in the 1972 * current mapping. So, in this case, we need not only to unbind the 1973 * current mapping, but we need to free up the MTT resources associated 1974 * with that mapping. After we've successfully done that, we continue 1975 * by binding the new memory, allocating new MTT entries, writing the 1976 * new MTT entries, and updating the mr to reflect the changes. 1977 */ 1978 1979 /* 1980 * If this region is being shared (i.e. MTT refcount != 1), then we 1981 * can't reuse the current MTT resources regardless of their size. 1982 * Instead we'll need to alloc new ones (below) just as if there 1983 * hadn't been enough room in the current entries. 1984 */ 1985 swrc_old = (tavor_sw_refcnt_t *)mr->mr_mttrefcntp->tr_addr; 1986 if (TAVOR_MTT_IS_NOT_SHARED(swrc_old) && 1987 (nummtt_needed <= nummtt_in_currrsrc)) { 1988 1989 /* 1990 * Unbind the old mapping for this memory region, but retain 1991 * the ddi_dma_handle_t (if possible) for reuse in the bind 1992 * operation below. Note: If original memory region was 1993 * bound for IOMMU bypass and the new region can not use 1994 * bypass, then a new DMA handle will be necessary. 1995 */ 1996 if (TAVOR_MR_REUSE_DMAHDL(mr, bind->bi_flags)) { 1997 mr->mr_bindinfo.bi_free_dmahdl = 0; 1998 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 1999 dmahdl = mr->mr_bindinfo.bi_dmahdl; 2000 reuse_dmahdl = 1; 2001 } else { 2002 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 2003 dmahdl = NULL; 2004 reuse_dmahdl = 0; 2005 } 2006 2007 /* 2008 * Bind the new memory and determine the mapped addresses. 2009 * As described, this routine and tavor_mr_fast_mtt_write() 2010 * do the majority of the work for the memory registration 2011 * operations. Note: When we successfully finish the binding, 2012 * we will set the "bi_free_dmahdl" flag to indicate that 2013 * even though we may have reused the ddi_dma_handle_t we do 2014 * wish it to be freed up at some later time. Note also that 2015 * if we fail, we may need to cleanup the ddi_dma_handle_t. 2016 */ 2017 bind->bi_bypass = bind_type; 2018 status = tavor_mr_mem_bind(state, bind, dmahdl, sleep); 2019 if (status != DDI_SUCCESS) { 2020 if (reuse_dmahdl) { 2021 ddi_dma_free_handle(&dmahdl); 2022 } 2023 2024 /* 2025 * Deregister will be called upon returning failure 2026 * from this routine. This will ensure that all 2027 * current resources get properly freed up. 2028 * Unnecessary to attempt to regain software ownership 2029 * of the MPT entry as that has already been done 2030 * above (in tavor_mr_reregister()). Also unnecessary 2031 * to attempt to unbind the memory. 2032 */ 2033 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2034 2035 goto mrrereghelp_fail; 2036 } 2037 if (reuse_dmahdl) { 2038 bind->bi_free_dmahdl = 1; 2039 } 2040 2041 /* 2042 * Using the new mapping, but reusing the current MTT 2043 * resources, write the updated entries to MTT 2044 */ 2045 mtt = mr->mr_mttrsrcp; 2046 status = tavor_mr_fast_mtt_write(mtt, bind, mtt_pgsize_bits); 2047 if (status != DDI_SUCCESS) { 2048 /* 2049 * Deregister will be called upon returning failure 2050 * from this routine. This will ensure that all 2051 * current resources get properly freed up. 2052 * Unnecessary to attempt to regain software ownership 2053 * of the MPT entry as that has already been done 2054 * above (in tavor_mr_reregister()). Also unnecessary 2055 * to attempt to unbind the memory. 2056 * 2057 * But we do need to unbind the newly bound memory 2058 * before returning. 2059 */ 2060 tavor_mr_mem_unbind(state, bind); 2061 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2062 2063 goto mrrereghelp_fail; 2064 } 2065 2066 /* Put the updated information into the Mem Region handle */ 2067 mr->mr_bindinfo = *bind; 2068 mr->mr_logmttpgsz = mtt_pgsize_bits; 2069 2070 } else { 2071 /* 2072 * Check if the memory region MTT is shared by any other MRs. 2073 * Since the resource may be shared between multiple memory 2074 * regions (as a result of a "RegisterSharedMR()" verb) it is 2075 * important that we not unbind any resources prematurely. 2076 */ 2077 if (!TAVOR_MTT_IS_SHARED(swrc_old)) { 2078 /* 2079 * Unbind the old mapping for this memory region, but 2080 * retain the ddi_dma_handle_t for reuse in the bind 2081 * operation below. Note: This can only be done here 2082 * because the region being reregistered is not 2083 * currently shared. Also if original memory region 2084 * was bound for IOMMU bypass and the new region can 2085 * not use bypass, then a new DMA handle will be 2086 * necessary. 2087 */ 2088 if (TAVOR_MR_REUSE_DMAHDL(mr, bind->bi_flags)) { 2089 mr->mr_bindinfo.bi_free_dmahdl = 0; 2090 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 2091 dmahdl = mr->mr_bindinfo.bi_dmahdl; 2092 reuse_dmahdl = 1; 2093 } else { 2094 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 2095 dmahdl = NULL; 2096 reuse_dmahdl = 0; 2097 } 2098 } else { 2099 dmahdl = NULL; 2100 reuse_dmahdl = 0; 2101 } 2102 2103 /* 2104 * Bind the new memory and determine the mapped addresses. 2105 * As described, this routine and tavor_mr_fast_mtt_write() 2106 * do the majority of the work for the memory registration 2107 * operations. Note: When we successfully finish the binding, 2108 * we will set the "bi_free_dmahdl" flag to indicate that 2109 * even though we may have reused the ddi_dma_handle_t we do 2110 * wish it to be freed up at some later time. Note also that 2111 * if we fail, we may need to cleanup the ddi_dma_handle_t. 2112 */ 2113 bind->bi_bypass = bind_type; 2114 status = tavor_mr_mem_bind(state, bind, dmahdl, sleep); 2115 if (status != DDI_SUCCESS) { 2116 if (reuse_dmahdl) { 2117 ddi_dma_free_handle(&dmahdl); 2118 } 2119 2120 /* 2121 * Deregister will be called upon returning failure 2122 * from this routine. This will ensure that all 2123 * current resources get properly freed up. 2124 * Unnecessary to attempt to regain software ownership 2125 * of the MPT entry as that has already been done 2126 * above (in tavor_mr_reregister()). Also unnecessary 2127 * to attempt to unbind the memory. 2128 */ 2129 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2130 2131 goto mrrereghelp_fail; 2132 } 2133 if (reuse_dmahdl) { 2134 bind->bi_free_dmahdl = 1; 2135 } 2136 2137 /* 2138 * Allocate the new MTT entries resource 2139 */ 2140 status = tavor_rsrc_alloc(state, TAVOR_MTT, 2141 TAVOR_NUMMTT_TO_MTTSEG(nummtt_needed), sleep, &mtt); 2142 if (status != DDI_SUCCESS) { 2143 /* 2144 * Deregister will be called upon returning failure 2145 * from this routine. This will ensure that all 2146 * current resources get properly freed up. 2147 * Unnecessary to attempt to regain software ownership 2148 * of the MPT entry as that has already been done 2149 * above (in tavor_mr_reregister()). Also unnecessary 2150 * to attempt to unbind the memory. 2151 * 2152 * But we do need to unbind the newly bound memory 2153 * before returning. 2154 */ 2155 tavor_mr_mem_unbind(state, bind); 2156 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2157 2158 goto mrrereghelp_fail; 2159 } 2160 2161 /* 2162 * Allocate MTT reference count (to track shared memory 2163 * regions). As mentioned elsewhere above, this reference 2164 * count resource may never be used on the given memory region, 2165 * but if it is ever later registered as a "shared" memory 2166 * region then this resource will be necessary. Note: This 2167 * is only necessary here if the existing memory region is 2168 * already being shared (because otherwise we already have 2169 * a useable reference count resource). 2170 */ 2171 if (TAVOR_MTT_IS_SHARED(swrc_old)) { 2172 status = tavor_rsrc_alloc(state, TAVOR_REFCNT, 1, 2173 sleep, &mtt_refcnt); 2174 if (status != DDI_SUCCESS) { 2175 /* 2176 * Deregister will be called upon returning 2177 * failure from this routine. This will ensure 2178 * that all current resources get properly 2179 * freed up. Unnecessary to attempt to regain 2180 * software ownership of the MPT entry as that 2181 * has already been done above (in 2182 * tavor_mr_reregister()). Also unnecessary 2183 * to attempt to unbind the memory. 2184 * 2185 * But we need to unbind the newly bound 2186 * memory and free up the newly allocated MTT 2187 * entries before returning. 2188 */ 2189 tavor_mr_mem_unbind(state, bind); 2190 tavor_rsrc_free(state, &mtt); 2191 *dereg_level = 2192 TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2193 2194 goto mrrereghelp_fail; 2195 } 2196 swrc_new = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr; 2197 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_new)) 2198 TAVOR_MTT_REFCNT_INIT(swrc_new); 2199 } else { 2200 mtt_refcnt = mr->mr_mttrefcntp; 2201 } 2202 2203 /* 2204 * Using the new mapping and the new MTT resources, write the 2205 * updated entries to MTT 2206 */ 2207 status = tavor_mr_fast_mtt_write(mtt, bind, mtt_pgsize_bits); 2208 if (status != DDI_SUCCESS) { 2209 /* 2210 * Deregister will be called upon returning failure 2211 * from this routine. This will ensure that all 2212 * current resources get properly freed up. 2213 * Unnecessary to attempt to regain software ownership 2214 * of the MPT entry as that has already been done 2215 * above (in tavor_mr_reregister()). Also unnecessary 2216 * to attempt to unbind the memory. 2217 * 2218 * But we need to unbind the newly bound memory, 2219 * free up the newly allocated MTT entries, and 2220 * (possibly) free the new MTT reference count 2221 * resource before returning. 2222 */ 2223 if (TAVOR_MTT_IS_SHARED(swrc_old)) { 2224 tavor_rsrc_free(state, &mtt_refcnt); 2225 } 2226 tavor_mr_mem_unbind(state, bind); 2227 tavor_rsrc_free(state, &mtt); 2228 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2229 2230 goto mrrereghelp_fail; 2231 } 2232 2233 /* 2234 * Check if the memory region MTT is shared by any other MRs. 2235 * Since the resource may be shared between multiple memory 2236 * regions (as a result of a "RegisterSharedMR()" verb) it is 2237 * important that we not free up any resources prematurely. 2238 */ 2239 if (TAVOR_MTT_IS_SHARED(swrc_old)) { 2240 /* Decrement MTT reference count for "old" region */ 2241 (void) tavor_mtt_refcnt_dec(mr->mr_mttrefcntp); 2242 } else { 2243 /* Free up the old MTT entries resource */ 2244 tavor_rsrc_free(state, &mr->mr_mttrsrcp); 2245 } 2246 2247 /* Put the updated information into the mrhdl */ 2248 mr->mr_bindinfo = *bind; 2249 mr->mr_logmttpgsz = mtt_pgsize_bits; 2250 mr->mr_mttrsrcp = mtt; 2251 mr->mr_mttrefcntp = mtt_refcnt; 2252 } 2253 2254 /* 2255 * Calculate and return the updated MTT address (in the DDR address 2256 * space). This will be used by the caller (tavor_mr_reregister) in 2257 * the updated MPT entry 2258 */ 2259 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; 2260 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; 2261 *mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << 2262 TAVOR_MTT_SIZE_SHIFT); 2263 2264 return (DDI_SUCCESS); 2265 2266 mrrereghelp_fail: 2267 return (status); 2268 } 2269 2270 2271 /* 2272 * tavor_mr_nummtt_needed() 2273 * Context: Can be called from interrupt or base context. 2274 */ 2275 /* ARGSUSED */ 2276 static uint64_t 2277 tavor_mr_nummtt_needed(tavor_state_t *state, tavor_bind_info_t *bind, 2278 uint_t *mtt_pgsize_bits) 2279 { 2280 uint64_t pg_offset_mask; 2281 uint64_t pg_offset, tmp_length; 2282 2283 /* 2284 * For now we specify the page size as 8Kb (the default page size for 2285 * the sun4u architecture), or 4Kb for x86. Figure out optimal page 2286 * size by examining the dmacookies XXX 2287 */ 2288 *mtt_pgsize_bits = PAGESHIFT; 2289 2290 pg_offset_mask = ((uint64_t)1 << *mtt_pgsize_bits) - 1; 2291 pg_offset = bind->bi_addr & pg_offset_mask; 2292 tmp_length = pg_offset + (bind->bi_len - 1); 2293 return ((tmp_length >> *mtt_pgsize_bits) + 1); 2294 } 2295 2296 2297 /* 2298 * tavor_mr_mem_bind() 2299 * Context: Can be called from interrupt or base context. 2300 */ 2301 static int 2302 tavor_mr_mem_bind(tavor_state_t *state, tavor_bind_info_t *bind, 2303 ddi_dma_handle_t dmahdl, uint_t sleep) 2304 { 2305 ddi_dma_attr_t dma_attr; 2306 int (*callback)(caddr_t); 2307 uint_t dma_xfer_mode; 2308 int status; 2309 2310 /* bi_type must be set to a meaningful value to get a bind handle */ 2311 ASSERT(bind->bi_type == TAVOR_BINDHDL_VADDR || 2312 bind->bi_type == TAVOR_BINDHDL_BUF || 2313 bind->bi_type == TAVOR_BINDHDL_UBUF); 2314 2315 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 2316 2317 /* Set the callback flag appropriately */ 2318 callback = (sleep == TAVOR_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT; 2319 2320 /* Determine whether to map STREAMING or CONSISTENT */ 2321 dma_xfer_mode = (bind->bi_flags & IBT_MR_NONCOHERENT) ? 2322 DDI_DMA_STREAMING : DDI_DMA_CONSISTENT; 2323 2324 /* 2325 * Initialize many of the default DMA attributes. Then, if we're 2326 * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag. 2327 */ 2328 if (dmahdl == NULL) { 2329 tavor_dma_attr_init(&dma_attr); 2330 #ifdef __sparc 2331 /* 2332 * First, disable streaming and switch to consistent if 2333 * configured to do so and IOMMU BYPASS is enabled. 2334 */ 2335 if (state->ts_cfg_profile->cp_disable_streaming_on_bypass && 2336 dma_xfer_mode == DDI_DMA_STREAMING && 2337 bind->bi_bypass == TAVOR_BINDMEM_BYPASS) { 2338 dma_xfer_mode = DDI_DMA_CONSISTENT; 2339 } 2340 2341 /* 2342 * Then, if streaming is still specified, then "bypass" is not 2343 * allowed. 2344 */ 2345 if ((dma_xfer_mode == DDI_DMA_CONSISTENT) && 2346 (bind->bi_bypass == TAVOR_BINDMEM_BYPASS)) { 2347 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 2348 } 2349 #endif 2350 /* Allocate a DMA handle for the binding */ 2351 status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr, 2352 callback, NULL, &bind->bi_dmahdl); 2353 if (status != DDI_SUCCESS) { 2354 return (status); 2355 } 2356 bind->bi_free_dmahdl = 1; 2357 2358 } else { 2359 bind->bi_dmahdl = dmahdl; 2360 bind->bi_free_dmahdl = 0; 2361 } 2362 2363 /* 2364 * Bind the memory to get the PCI mapped addresses. The decision 2365 * to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle() 2366 * is determined by the "bi_type" flag. Note: if the bind operation 2367 * fails then we have to free up the DMA handle and return error. 2368 */ 2369 if (bind->bi_type == TAVOR_BINDHDL_VADDR) { 2370 status = ddi_dma_addr_bind_handle(bind->bi_dmahdl, NULL, 2371 (caddr_t)(uintptr_t)bind->bi_addr, bind->bi_len, 2372 (DDI_DMA_RDWR | dma_xfer_mode), callback, NULL, 2373 &bind->bi_dmacookie, &bind->bi_cookiecnt); 2374 } else { /* TAVOR_BINDHDL_BUF || TAVOR_BINDHDL_UBUF */ 2375 status = ddi_dma_buf_bind_handle(bind->bi_dmahdl, 2376 bind->bi_buf, (DDI_DMA_RDWR | dma_xfer_mode), callback, 2377 NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt); 2378 } 2379 2380 if (status != DDI_DMA_MAPPED) { 2381 if (bind->bi_free_dmahdl != 0) { 2382 ddi_dma_free_handle(&bind->bi_dmahdl); 2383 } 2384 return (status); 2385 } 2386 2387 return (DDI_SUCCESS); 2388 } 2389 2390 2391 /* 2392 * tavor_mr_mem_unbind() 2393 * Context: Can be called from interrupt or base context. 2394 */ 2395 static void 2396 tavor_mr_mem_unbind(tavor_state_t *state, tavor_bind_info_t *bind) 2397 { 2398 int status; 2399 2400 /* 2401 * In case of TAVOR_BINDHDL_UBUF, the memory bi_buf points to 2402 * is actually allocated by ddi_umem_iosetup() internally, then 2403 * it's required to free it here. Reset bi_type to TAVOR_BINDHDL_NONE 2404 * not to free it again later. 2405 */ 2406 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 2407 if (bind->bi_type == TAVOR_BINDHDL_UBUF) { 2408 freerbuf(bind->bi_buf); 2409 bind->bi_type = TAVOR_BINDHDL_NONE; 2410 } 2411 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 2412 2413 /* 2414 * Unbind the DMA memory for the region 2415 * 2416 * Note: The only way ddi_dma_unbind_handle() currently 2417 * can return an error is if the handle passed in is invalid. 2418 * Since this should never happen, we choose to return void 2419 * from this function! If this does return an error, however, 2420 * then we print a warning message to the console. 2421 */ 2422 status = ddi_dma_unbind_handle(bind->bi_dmahdl); 2423 if (status != DDI_SUCCESS) { 2424 TAVOR_WARNING(state, "failed to unbind DMA mapping"); 2425 return; 2426 } 2427 2428 /* Free up the DMA handle */ 2429 if (bind->bi_free_dmahdl != 0) { 2430 ddi_dma_free_handle(&bind->bi_dmahdl); 2431 } 2432 } 2433 2434 2435 /* 2436 * tavor_mr_fast_mtt_write() 2437 * Context: Can be called from interrupt or base context. 2438 */ 2439 static int 2440 tavor_mr_fast_mtt_write(tavor_rsrc_t *mtt, tavor_bind_info_t *bind, 2441 uint32_t mtt_pgsize_bits) 2442 { 2443 ddi_dma_cookie_t dmacookie; 2444 uint_t cookie_cnt; 2445 uint64_t *mtt_table; 2446 uint64_t mtt_entry; 2447 uint64_t addr, endaddr; 2448 uint64_t pagesize; 2449 int i; 2450 2451 /* Calculate page size from the suggested value passed in */ 2452 pagesize = ((uint64_t)1 << mtt_pgsize_bits); 2453 2454 /* 2455 * Walk the "cookie list" and fill in the MTT table entries 2456 */ 2457 i = 0; 2458 mtt_table = (uint64_t *)mtt->tr_addr; 2459 dmacookie = bind->bi_dmacookie; 2460 cookie_cnt = bind->bi_cookiecnt; 2461 while (cookie_cnt-- > 0) { 2462 addr = dmacookie.dmac_laddress; 2463 endaddr = addr + (dmacookie.dmac_size - 1); 2464 addr = addr & ~((uint64_t)pagesize - 1); 2465 while (addr <= endaddr) { 2466 /* 2467 * Fill in the mapped addresses (calculated above) and 2468 * set TAVOR_MTT_ENTRY_PRESET flag for each MTT entry. 2469 */ 2470 mtt_entry = addr | TAVOR_MTT_ENTRY_PRESET; 2471 ddi_put64(mtt->tr_acchdl, &mtt_table[i], mtt_entry); 2472 addr += pagesize; 2473 i++; 2474 2475 if (addr == 0) { 2476 static int do_once = 1; 2477 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", 2478 do_once)) 2479 if (do_once) { 2480 do_once = 0; 2481 cmn_err(CE_NOTE, "probable error in " 2482 "dma_cookie address from caller\n"); 2483 } 2484 break; 2485 } 2486 } 2487 2488 /* 2489 * When we've reached the end of the current DMA cookie, 2490 * jump to the next cookie (if there are more) 2491 */ 2492 if (cookie_cnt != 0) { 2493 ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie); 2494 } 2495 } 2496 2497 return (DDI_SUCCESS); 2498 } 2499 2500 /* 2501 * tavor_mtt_refcnt_inc() 2502 * Context: Can be called from interrupt or base context. 2503 */ 2504 static int 2505 tavor_mtt_refcnt_inc(tavor_rsrc_t *rsrc) 2506 { 2507 tavor_sw_refcnt_t *rc; 2508 uint32_t cnt; 2509 2510 rc = (tavor_sw_refcnt_t *)rsrc->tr_addr; 2511 2512 /* Increment the MTT's reference count */ 2513 mutex_enter(&rc->swrc_lock); 2514 cnt = rc->swrc_refcnt++; 2515 mutex_exit(&rc->swrc_lock); 2516 2517 return (cnt); 2518 } 2519 2520 2521 /* 2522 * tavor_mtt_refcnt_dec() 2523 * Context: Can be called from interrupt or base context. 2524 */ 2525 static int 2526 tavor_mtt_refcnt_dec(tavor_rsrc_t *rsrc) 2527 { 2528 tavor_sw_refcnt_t *rc; 2529 uint32_t cnt; 2530 2531 rc = (tavor_sw_refcnt_t *)rsrc->tr_addr; 2532 2533 /* Decrement the MTT's reference count */ 2534 mutex_enter(&rc->swrc_lock); 2535 cnt = --rc->swrc_refcnt; 2536 mutex_exit(&rc->swrc_lock); 2537 2538 return (cnt); 2539 } 2540