1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * tavor_mr.c 29 * Tavor Memory Region/Window Routines 30 * 31 * Implements all the routines necessary to provide the requisite memory 32 * registration verbs. These include operations like RegisterMemRegion(), 33 * DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion, 34 * etc., that affect Memory Regions. It also includes the verbs that 35 * affect Memory Windows, including AllocMemWindow(), FreeMemWindow(), 36 * and QueryMemWindow(). 37 */ 38 39 #include <sys/types.h> 40 #include <sys/conf.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/modctl.h> 44 #include <sys/esunddi.h> 45 46 #include <sys/ib/adapters/tavor/tavor.h> 47 48 49 /* 50 * Used by tavor_mr_keycalc() below to fill in the "unconstrained" portion 51 * of Tavor memory keys (LKeys and RKeys) 52 */ 53 static uint_t tavor_debug_memkey_cnt = 0x00000000; 54 55 static int tavor_mr_common_reg(tavor_state_t *state, tavor_pdhdl_t pd, 56 tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op); 57 static int tavor_mr_common_rereg(tavor_state_t *state, tavor_mrhdl_t mr, 58 tavor_pdhdl_t pd, tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl_new, 59 tavor_mr_options_t *op); 60 static int tavor_mr_rereg_xlat_helper(tavor_state_t *state, tavor_mrhdl_t mr, 61 tavor_bind_info_t *bind, tavor_mr_options_t *op, uint64_t *mtt_addr, 62 uint_t sleep, uint_t *dereg_level); 63 static uint64_t tavor_mr_nummtt_needed(tavor_state_t *state, 64 tavor_bind_info_t *bind, uint_t *mtt_pgsize); 65 static int tavor_mr_mem_bind(tavor_state_t *state, tavor_bind_info_t *bind, 66 ddi_dma_handle_t dmahdl, uint_t sleep); 67 static void tavor_mr_mem_unbind(tavor_state_t *state, 68 tavor_bind_info_t *bind); 69 static int tavor_mr_fast_mtt_write(tavor_rsrc_t *mtt, tavor_bind_info_t *bind, 70 uint32_t mtt_pgsize_bits); 71 static int tavor_mr_fast_mtt_write_fmr(tavor_rsrc_t *mtt, 72 ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits); 73 static int tavor_mtt_refcnt_inc(tavor_rsrc_t *rsrc); 74 static int tavor_mtt_refcnt_dec(tavor_rsrc_t *rsrc); 75 76 /* 77 * The Tavor umem_lockmemory() callback ops. When userland memory is 78 * registered, these callback ops are specified. The tavor_umap_umemlock_cb() 79 * callback will be called whenever the memory for the corresponding 80 * ddi_umem_cookie_t is being freed. 81 */ 82 static struct umem_callback_ops tavor_umem_cbops = { 83 UMEM_CALLBACK_VERSION, 84 tavor_umap_umemlock_cb, 85 }; 86 87 88 /* 89 * tavor_mr_register() 90 * Context: Can be called from interrupt or base context. 91 */ 92 int 93 tavor_mr_register(tavor_state_t *state, tavor_pdhdl_t pd, 94 ibt_mr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op) 95 { 96 tavor_bind_info_t bind; 97 int status; 98 99 TAVOR_TNF_ENTER(tavor_mr_register); 100 101 /* 102 * Fill in the "bind" struct. This struct provides the majority 103 * of the information that will be used to distinguish between an 104 * "addr" binding (as is the case here) and a "buf" binding (see 105 * below). The "bind" struct is later passed to tavor_mr_mem_bind() 106 * which does most of the "heavy lifting" for the Tavor memory 107 * registration routines. 108 */ 109 bind.bi_type = TAVOR_BINDHDL_VADDR; 110 bind.bi_addr = mr_attr->mr_vaddr; 111 bind.bi_len = mr_attr->mr_len; 112 bind.bi_as = mr_attr->mr_as; 113 bind.bi_flags = mr_attr->mr_flags; 114 status = tavor_mr_common_reg(state, pd, &bind, mrhdl, op); 115 if (status != DDI_SUCCESS) { 116 TNF_PROBE_0(tavor_mr_register_cmnreg_fail, 117 TAVOR_TNF_ERROR, ""); 118 TAVOR_TNF_EXIT(tavor_mr_register); 119 return (status); 120 } 121 122 TAVOR_TNF_EXIT(tavor_mr_register); 123 return (DDI_SUCCESS); 124 } 125 126 127 /* 128 * tavor_mr_register_buf() 129 * Context: Can be called from interrupt or base context. 130 */ 131 int 132 tavor_mr_register_buf(tavor_state_t *state, tavor_pdhdl_t pd, 133 ibt_smr_attr_t *mr_attr, struct buf *buf, tavor_mrhdl_t *mrhdl, 134 tavor_mr_options_t *op) 135 { 136 tavor_bind_info_t bind; 137 int status; 138 139 TAVOR_TNF_ENTER(tavor_mr_register_buf); 140 141 /* 142 * Fill in the "bind" struct. This struct provides the majority 143 * of the information that will be used to distinguish between an 144 * "addr" binding (see above) and a "buf" binding (as is the case 145 * here). The "bind" struct is later passed to tavor_mr_mem_bind() 146 * which does most of the "heavy lifting" for the Tavor memory 147 * registration routines. Note: We have chosen to provide 148 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is 149 * not set). It is not critical what value we choose here as it need 150 * only be unique for the given RKey (which will happen by default), 151 * so the choice here is somewhat arbitrary. 152 */ 153 bind.bi_type = TAVOR_BINDHDL_BUF; 154 bind.bi_buf = buf; 155 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) { 156 bind.bi_addr = mr_attr->mr_vaddr; 157 } else { 158 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr; 159 } 160 bind.bi_as = NULL; 161 bind.bi_len = (uint64_t)buf->b_bcount; 162 bind.bi_flags = mr_attr->mr_flags; 163 status = tavor_mr_common_reg(state, pd, &bind, mrhdl, op); 164 if (status != DDI_SUCCESS) { 165 TNF_PROBE_0(tavor_mr_register_buf_cmnreg_fail, 166 TAVOR_TNF_ERROR, ""); 167 TAVOR_TNF_EXIT(tavor_mr_register_buf); 168 return (status); 169 } 170 171 TAVOR_TNF_EXIT(tavor_mr_register_buf); 172 return (DDI_SUCCESS); 173 } 174 175 176 /* 177 * tavor_mr_register_shared() 178 * Context: Can be called from interrupt or base context. 179 */ 180 int 181 tavor_mr_register_shared(tavor_state_t *state, tavor_mrhdl_t mrhdl, 182 tavor_pdhdl_t pd, ibt_smr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl_new) 183 { 184 tavor_rsrc_pool_info_t *rsrc_pool; 185 tavor_rsrc_t *mpt, *mtt, *rsrc; 186 tavor_umap_db_entry_t *umapdb; 187 tavor_hw_mpt_t mpt_entry; 188 tavor_mrhdl_t mr; 189 tavor_bind_info_t *bind; 190 ddi_umem_cookie_t umem_cookie; 191 size_t umem_len; 192 caddr_t umem_addr; 193 uint64_t mtt_addr, mtt_ddrbaseaddr, pgsize_msk; 194 uint_t sleep, mr_is_umem; 195 int status, umem_flags; 196 char *errormsg; 197 198 TAVOR_TNF_ENTER(tavor_mr_register_shared); 199 200 /* 201 * Check the sleep flag. Ensure that it is consistent with the 202 * current thread context (i.e. if we are currently in the interrupt 203 * context, then we shouldn't be attempting to sleep). 204 */ 205 sleep = (mr_attr->mr_flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP : 206 TAVOR_SLEEP; 207 if ((sleep == TAVOR_SLEEP) && 208 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 209 /* Set "status" and "errormsg" and goto failure */ 210 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 211 goto mrshared_fail; 212 } 213 214 /* Increment the reference count on the protection domain (PD) */ 215 tavor_pd_refcnt_inc(pd); 216 217 /* 218 * Allocate an MPT entry. This will be filled in with all the 219 * necessary parameters to define the shared memory region. 220 * Specifically, it will be made to reference the currently existing 221 * MTT entries and ownership of the MPT will be passed to the hardware 222 * in the last step below. If we fail here, we must undo the 223 * protection domain reference count. 224 */ 225 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 226 if (status != DDI_SUCCESS) { 227 /* Set "status" and "errormsg" and goto failure */ 228 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT"); 229 goto mrshared_fail1; 230 } 231 232 /* 233 * Allocate the software structure for tracking the shared memory 234 * region (i.e. the Tavor Memory Region handle). If we fail here, we 235 * must undo the protection domain reference count and the previous 236 * resource allocation. 237 */ 238 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 239 if (status != DDI_SUCCESS) { 240 /* Set "status" and "errormsg" and goto failure */ 241 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle"); 242 goto mrshared_fail2; 243 } 244 mr = (tavor_mrhdl_t)rsrc->tr_addr; 245 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 246 247 /* 248 * Setup and validate the memory region access flags. This means 249 * translating the IBTF's enable flags into the access flags that 250 * will be used in later operations. 251 */ 252 mr->mr_accflag = 0; 253 if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND) 254 mr->mr_accflag |= IBT_MR_WINDOW_BIND; 255 if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE) 256 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 257 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ) 258 mr->mr_accflag |= IBT_MR_REMOTE_READ; 259 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE) 260 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 261 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 262 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 263 264 /* 265 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 266 * from a certain number of "constrained" bits (the least significant 267 * bits) and some number of "unconstrained" bits. The constrained 268 * bits must be set to the index of the entry in the MPT table, but 269 * the unconstrained bits can be set to any value we wish. Note: 270 * if no remote access is required, then the RKey value is not filled 271 * in. Otherwise both Rkey and LKey are given the same value. 272 */ 273 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 274 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 275 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 276 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 277 mr->mr_rkey = mr->mr_lkey; 278 } 279 280 /* Grab the MR lock for the current memory region */ 281 mutex_enter(&mrhdl->mr_lock); 282 283 /* 284 * Check here to see if the memory region has already been partially 285 * deregistered as a result of a tavor_umap_umemlock_cb() callback. 286 * If so, this is an error, return failure. 287 */ 288 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) { 289 mutex_exit(&mrhdl->mr_lock); 290 /* Set "status" and "errormsg" and goto failure */ 291 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl"); 292 goto mrshared_fail3; 293 } 294 295 /* 296 * Determine if the original memory was from userland and, if so, pin 297 * the pages (again) with umem_lockmemory(). This will guarantee a 298 * separate callback for each of this shared region's MR handles. 299 * If this is userland memory, then allocate an entry in the 300 * "userland resources database". This will later be added to 301 * the database (after all further memory registration operations are 302 * successful). If we fail here, we must undo all the above setup. 303 */ 304 mr_is_umem = mrhdl->mr_is_umem; 305 if (mr_is_umem) { 306 umem_len = ptob(btopr(mrhdl->mr_bindinfo.bi_len + 307 ((uintptr_t)mrhdl->mr_bindinfo.bi_addr & PAGEOFFSET))); 308 umem_addr = (caddr_t)((uintptr_t)mrhdl->mr_bindinfo.bi_addr & 309 ~PAGEOFFSET); 310 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ | 311 DDI_UMEMLOCK_LONGTERM); 312 status = umem_lockmemory(umem_addr, umem_len, umem_flags, 313 &umem_cookie, &tavor_umem_cbops, NULL); 314 if (status != 0) { 315 mutex_exit(&mrhdl->mr_lock); 316 /* Set "status" and "errormsg" and goto failure */ 317 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umem pin"); 318 goto mrshared_fail3; 319 } 320 321 umapdb = tavor_umap_db_alloc(state->ts_instance, 322 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC, 323 (uint64_t)(uintptr_t)rsrc); 324 if (umapdb == NULL) { 325 mutex_exit(&mrhdl->mr_lock); 326 /* Set "status" and "errormsg" and goto failure */ 327 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add"); 328 goto mrshared_fail4; 329 } 330 } 331 332 /* 333 * Copy the MTT resource pointer (and additional parameters) from 334 * the original Tavor Memory Region handle. Note: this is normally 335 * where the tavor_mr_mem_bind() routine would be called, but because 336 * we already have bound and filled-in MTT entries it is simply a 337 * matter here of managing the MTT reference count and grabbing the 338 * address of the MTT table entries (for filling in the shared region's 339 * MPT entry). 340 */ 341 mr->mr_mttrsrcp = mrhdl->mr_mttrsrcp; 342 mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz; 343 mr->mr_bindinfo = mrhdl->mr_bindinfo; 344 mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp; 345 mutex_exit(&mrhdl->mr_lock); 346 bind = &mr->mr_bindinfo; 347 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 348 mtt = mr->mr_mttrsrcp; 349 350 /* 351 * Increment the MTT reference count (to reflect the fact that 352 * the MTT is now shared) 353 */ 354 (void) tavor_mtt_refcnt_inc(mr->mr_mttrefcntp); 355 356 /* 357 * Update the new "bind" virtual address. Do some extra work here 358 * to ensure proper alignment. That is, make sure that the page 359 * offset for the beginning of the old range is the same as the 360 * offset for this new mapping 361 */ 362 pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1); 363 bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) | 364 (mr->mr_bindinfo.bi_addr & pgsize_msk)); 365 366 /* 367 * Get the base address for the MTT table. This will be necessary 368 * in the next step when we are setting up the MPT entry. 369 */ 370 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; 371 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; 372 373 /* 374 * Fill in the MPT entry. This is the final step before passing 375 * ownership of the MPT entry to the Tavor hardware. We use all of 376 * the information collected/calculated above to fill in the 377 * requisite portions of the MPT. 378 */ 379 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 380 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE; 381 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; 382 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 383 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 384 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 385 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 386 mpt_entry.lr = 1; 387 mpt_entry.reg_win = TAVOR_MPT_IS_REGION; 388 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 389 mpt_entry.mem_key = mr->mr_lkey; 390 mpt_entry.pd = pd->pd_pdnum; 391 mpt_entry.start_addr = bind->bi_addr; 392 mpt_entry.reg_win_len = bind->bi_len; 393 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND; 394 mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT); 395 mpt_entry.mttseg_addr_h = mtt_addr >> 32; 396 mpt_entry.mttseg_addr_l = mtt_addr >> 6; 397 398 /* 399 * Write the MPT entry to hardware. Lastly, we pass ownership of 400 * the entry to the hardware. Note: in general, this operation 401 * shouldn't fail. But if it does, we have to undo everything we've 402 * done above before returning error. 403 */ 404 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 405 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 406 if (status != TAVOR_CMD_SUCCESS) { 407 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 408 status); 409 TNF_PROBE_1(tavor_mr_register_shared_sw2hw_mpt_cmd_fail, 410 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 411 /* Set "status" and "errormsg" and goto failure */ 412 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 413 "tavor SW2HW_MPT command"); 414 goto mrshared_fail5; 415 } 416 417 /* 418 * Fill in the rest of the Tavor Memory Region handle. Having 419 * successfully transferred ownership of the MPT, we can update the 420 * following fields for use in further operations on the MR. 421 */ 422 mr->mr_mptrsrcp = mpt; 423 mr->mr_mttrsrcp = mtt; 424 mr->mr_pdhdl = pd; 425 mr->mr_rsrcp = rsrc; 426 mr->mr_is_umem = mr_is_umem; 427 mr->mr_is_fmr = 0; 428 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL; 429 mr->mr_umem_cbfunc = NULL; 430 mr->mr_umem_cbarg1 = NULL; 431 mr->mr_umem_cbarg2 = NULL; 432 433 /* 434 * If this is userland memory, then we need to insert the previously 435 * allocated entry into the "userland resources database". This will 436 * allow for later coordination between the tavor_umap_umemlock_cb() 437 * callback and tavor_mr_deregister(). 438 */ 439 if (mr_is_umem) { 440 tavor_umap_db_add(umapdb); 441 } 442 443 *mrhdl_new = mr; 444 445 TAVOR_TNF_EXIT(tavor_mr_register_shared); 446 return (DDI_SUCCESS); 447 448 /* 449 * The following is cleanup for all possible failure cases in this routine 450 */ 451 mrshared_fail5: 452 (void) tavor_mtt_refcnt_dec(mr->mr_mttrefcntp); 453 if (mr_is_umem) { 454 tavor_umap_db_free(umapdb); 455 } 456 mrshared_fail4: 457 if (mr_is_umem) { 458 ddi_umem_unlock(umem_cookie); 459 } 460 mrshared_fail3: 461 tavor_rsrc_free(state, &rsrc); 462 mrshared_fail2: 463 tavor_rsrc_free(state, &mpt); 464 mrshared_fail1: 465 tavor_pd_refcnt_dec(pd); 466 mrshared_fail: 467 TNF_PROBE_1(tavor_mr_register_shared_fail, TAVOR_TNF_ERROR, "", 468 tnf_string, msg, errormsg); 469 TAVOR_TNF_EXIT(tavor_mr_register_shared); 470 return (status); 471 } 472 473 /* 474 * tavor_mr_alloc_fmr() 475 * Context: Can be called from interrupt or base context. 476 */ 477 int 478 tavor_mr_alloc_fmr(tavor_state_t *state, tavor_pdhdl_t pd, 479 tavor_fmrhdl_t fmr_pool, tavor_mrhdl_t *mrhdl) 480 { 481 tavor_rsrc_pool_info_t *rsrc_pool; 482 tavor_rsrc_t *mpt, *mtt, *rsrc; 483 tavor_hw_mpt_t mpt_entry; 484 tavor_mrhdl_t mr; 485 tavor_bind_info_t bind; 486 uint64_t mtt_addr, mtt_ddrbaseaddr; 487 uint64_t nummtt; 488 uint_t sleep, mtt_pgsize_bits; 489 int status; 490 char *errormsg; 491 492 TAVOR_TNF_ENTER(tavor_mr_alloc_fmr); 493 494 /* 495 * Check the sleep flag. Ensure that it is consistent with the 496 * current thread context (i.e. if we are currently in the interrupt 497 * context, then we shouldn't be attempting to sleep). 498 */ 499 sleep = (fmr_pool->fmr_flags & IBT_MR_SLEEP) ? TAVOR_SLEEP : 500 TAVOR_NOSLEEP; 501 if ((sleep == TAVOR_SLEEP) && 502 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 503 TNF_PROBE_0(tavor_mr_alloc_fmr, TAVOR_TNF_ERROR, ""); 504 TAVOR_TNF_EXIT(tavor_mr_alloc_fmr); 505 return (IBT_INVALID_PARAM); 506 } 507 508 /* Increment the reference count on the protection domain (PD) */ 509 tavor_pd_refcnt_inc(pd); 510 511 /* 512 * Allocate an MPT entry. This will be filled in with all the 513 * necessary parameters to define the FMR. Specifically, it will be 514 * made to reference the currently existing MTT entries and ownership 515 * of the MPT will be passed to the hardware in the last step below. 516 * If we fail here, we must undo the protection domain reference count. 517 */ 518 519 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 520 if (status != DDI_SUCCESS) { 521 /* Set "status" and "errormsg" and goto failure */ 522 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT"); 523 goto fmralloc_fail1; 524 } 525 526 /* 527 * Allocate the software structure for tracking the fmr memory 528 * region (i.e. the Tavor Memory Region handle). If we fail here, we 529 * must undo the protection domain reference count and the previous 530 * resource allocation. 531 */ 532 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 533 if (status != DDI_SUCCESS) { 534 /* Set "status" and "errormsg" and goto failure */ 535 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle"); 536 goto fmralloc_fail2; 537 } 538 mr = (tavor_mrhdl_t)rsrc->tr_addr; 539 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 540 541 /* 542 * Setup and validate the memory region access flags. This means 543 * translating the IBTF's enable flags into the access flags that 544 * will be used in later operations. 545 */ 546 mr->mr_accflag = 0; 547 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_LOCAL_WRITE) 548 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 549 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_READ) 550 mr->mr_accflag |= IBT_MR_REMOTE_READ; 551 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_WRITE) 552 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 553 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 554 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 555 556 /* 557 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 558 * from a certain number of "constrained" bits (the least significant 559 * bits) and some number of "unconstrained" bits. The constrained 560 * bits must be set to the index of the entry in the MPT table, but 561 * the unconstrained bits can be set to any value we wish. Note: 562 * if no remote access is required, then the RKey value is not filled 563 * in. Otherwise both Rkey and LKey are given the same value. 564 */ 565 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 566 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 567 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 568 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 569 mr->mr_rkey = mr->mr_lkey; 570 } 571 572 /* 573 * Determine number of pages spanned. This routine uses the 574 * information in the "bind" struct to determine the required 575 * number of MTT entries needed (and returns the suggested page size - 576 * as a "power-of-2" - for each MTT entry). 577 */ 578 /* Assume address will be page aligned later */ 579 bind.bi_addr = 0; 580 /* Calculate size based on given max pages */ 581 bind.bi_len = fmr_pool->fmr_max_pages << PAGESHIFT; 582 nummtt = tavor_mr_nummtt_needed(state, &bind, &mtt_pgsize_bits); 583 584 /* 585 * Allocate the MTT entries. Use the calculations performed above to 586 * allocate the required number of MTT entries. Note: MTT entries are 587 * allocated in "MTT segments" which consist of complete cachelines 588 * (i.e. 8 entries, 16 entries, etc.) So the TAVOR_NUMMTT_TO_MTTSEG() 589 * macro is used to do the proper conversion. If we fail here, we 590 * must not only undo all the previous resource allocation (and PD 591 * reference count), but we must also unbind the memory. 592 */ 593 status = tavor_rsrc_alloc(state, TAVOR_MTT, 594 TAVOR_NUMMTT_TO_MTTSEG(nummtt), sleep, &mtt); 595 if (status != DDI_SUCCESS) { 596 /* Set "status" and "errormsg" and goto failure */ 597 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MTT"); 598 goto fmralloc_fail3; 599 } 600 mr->mr_logmttpgsz = mtt_pgsize_bits; 601 602 /* 603 * Get the base address for the MTT table. This will be necessary 604 * in the next step when we are setting up the MPT entry. 605 */ 606 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; 607 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; 608 609 /* 610 * Fill in the MPT entry. This is the final step before passing 611 * ownership of the MPT entry to the Tavor hardware. We use all of 612 * the information collected/calculated above to fill in the 613 * requisite portions of the MPT. 614 */ 615 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 616 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE; 617 mpt_entry.en_bind = 0; 618 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 619 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 620 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 621 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 622 mpt_entry.lr = 1; 623 mpt_entry.reg_win = TAVOR_MPT_IS_REGION; 624 mpt_entry.pd = pd->pd_pdnum; 625 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 626 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND; 627 mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT); 628 mpt_entry.mttseg_addr_h = mtt_addr >> 32; 629 mpt_entry.mttseg_addr_l = mtt_addr >> 6; 630 mpt_entry.mem_key = mr->mr_lkey; 631 632 /* 633 * FMR sets these to 0 for now. Later during actual fmr registration 634 * these values are filled in. 635 */ 636 mpt_entry.start_addr = 0; 637 mpt_entry.reg_win_len = 0; 638 639 /* 640 * Write the MPT entry to hardware. Lastly, we pass ownership of 641 * the entry to the hardware. Note: in general, this operation 642 * shouldn't fail. But if it does, we have to undo everything we've 643 * done above before returning error. 644 */ 645 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 646 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 647 if (status != TAVOR_CMD_SUCCESS) { 648 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 649 status); 650 TNF_PROBE_1(tavor_mr_register_shared_sw2hw_mpt_cmd_fail, 651 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 652 /* Set "status" and "errormsg" and goto failure */ 653 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 654 "tavor SW2HW_MPT command"); 655 goto fmralloc_fail4; 656 } 657 658 /* 659 * Fill in the rest of the Tavor Memory Region handle. Having 660 * successfully transferred ownership of the MPT, we can update the 661 * following fields for use in further operations on the MR. Also, set 662 * that this is an FMR region. 663 */ 664 mr->mr_mptrsrcp = mpt; 665 mr->mr_mttrsrcp = mtt; 666 mr->mr_pdhdl = pd; 667 mr->mr_rsrcp = rsrc; 668 mr->mr_is_fmr = 1; 669 (void) memcpy(&mr->mr_bindinfo, &bind, sizeof (tavor_bind_info_t)); 670 671 *mrhdl = mr; 672 673 TAVOR_TNF_EXIT(tavor_mr_alloc_fmr); 674 return (DDI_SUCCESS); 675 676 /* 677 * The following is cleanup for all possible failure cases in this routine 678 */ 679 fmralloc_fail4: 680 tavor_rsrc_free(state, &mtt); 681 fmralloc_fail3: 682 tavor_rsrc_free(state, &rsrc); 683 fmralloc_fail2: 684 tavor_rsrc_free(state, &mpt); 685 fmralloc_fail1: 686 tavor_pd_refcnt_dec(pd); 687 fmralloc_fail: 688 TNF_PROBE_1(tavor_mr_alloc_fmr, TAVOR_TNF_ERROR, "", 689 tnf_string, msg, errormsg); 690 TAVOR_TNF_EXIT(tavor_mr_alloc_fmr); 691 return (status); 692 } 693 694 /* 695 * tavor_mr_register_physical_fmr() 696 * Context: Can be called from interrupt or base context. 697 */ 698 int 699 tavor_mr_register_physical_fmr(tavor_state_t *state, 700 ibt_pmr_attr_t *mem_pattr_p, tavor_mrhdl_t mr, ibt_pmr_desc_t *mem_desc_p) 701 { 702 tavor_rsrc_t *mpt; 703 uint64_t *mpt_table; 704 int status; 705 char *errormsg; 706 707 TAVOR_TNF_ENTER(tavor_mr_register_physical_fmr); 708 709 mutex_enter(&mr->mr_lock); 710 mpt = mr->mr_mptrsrcp; 711 mpt_table = (uint64_t *)mpt->tr_addr; 712 713 /* Write MPT status to SW bit */ 714 ddi_put8(mpt->tr_acchdl, (uint8_t *)&mpt_table[0], 0xF); 715 716 /* 717 * Write the mapped addresses into the MTT entries. FMR needs to do 718 * this a little differently, so we call the fmr specific fast mtt 719 * write here. 720 */ 721 status = tavor_mr_fast_mtt_write_fmr(mr->mr_mttrsrcp, mem_pattr_p, 722 mr->mr_logmttpgsz); 723 if (status != DDI_SUCCESS) { 724 mutex_exit(&mr->mr_lock); 725 /* Set "status" and "errormsg" and goto failure */ 726 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), "failed write mtt"); 727 goto fmr_reg_fail1; 728 } 729 730 /* 731 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 732 * from a certain number of "constrained" bits (the least significant 733 * bits) and some number of "unconstrained" bits. The constrained 734 * bits must be set to the index of the entry in the MPT table, but 735 * the unconstrained bits can be set to any value we wish. Note: 736 * if no remote access is required, then the RKey value is not filled 737 * in. Otherwise both Rkey and LKey are given the same value. 738 */ 739 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 740 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 741 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 742 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 743 mr->mr_rkey = mr->mr_lkey; 744 } 745 746 /* write mem key value */ 747 ddi_put32(mpt->tr_acchdl, (uint32_t *)&mpt_table[1], mr->mr_lkey); 748 749 /* write length value */ 750 ddi_put64(mpt->tr_acchdl, &mpt_table[3], mem_pattr_p->pmr_len); 751 752 /* write start addr value */ 753 ddi_put64(mpt->tr_acchdl, &mpt_table[2], mem_pattr_p->pmr_iova); 754 755 /* write lkey value */ 756 ddi_put32(mpt->tr_acchdl, (uint32_t *)&mpt_table[4], mr->mr_lkey); 757 758 /* Write MPT status to HW bit */ 759 ddi_put8(mpt->tr_acchdl, (uint8_t *)&mpt_table[0], 0x0); 760 761 /* Fill in return parameters */ 762 mem_desc_p->pmd_lkey = mr->mr_lkey; 763 mem_desc_p->pmd_rkey = mr->mr_rkey; 764 mem_desc_p->pmd_iova = mem_pattr_p->pmr_iova; 765 mem_desc_p->pmd_phys_buf_list_sz = mem_pattr_p->pmr_len; 766 767 /* Fill in MR bindinfo struct for later sync or query operations */ 768 mr->mr_bindinfo.bi_addr = mem_pattr_p->pmr_iova; 769 mr->mr_bindinfo.bi_flags = mem_pattr_p->pmr_flags & IBT_MR_NONCOHERENT; 770 771 mutex_exit(&mr->mr_lock); 772 773 TAVOR_TNF_EXIT(tavor_mr_register_physical_fmr); 774 return (DDI_SUCCESS); 775 776 fmr_reg_fail1: 777 /* 778 * Note, we fail here, and purposely leave the memory ownership in 779 * software. The memory tables may be corrupt, so we leave the region 780 * unregistered. 781 */ 782 TNF_PROBE_1(tavor_mr_register_physical_fmr_fail, TAVOR_TNF_ERROR, "", 783 tnf_string, msg, errormsg); 784 TAVOR_TNF_EXIT(tavor_mr_register_physical_fmr); 785 return (DDI_FAILURE); 786 } 787 788 789 /* 790 * tavor_mr_deregister() 791 * Context: Can be called from interrupt or base context. 792 */ 793 /* ARGSUSED */ 794 int 795 tavor_mr_deregister(tavor_state_t *state, tavor_mrhdl_t *mrhdl, uint_t level, 796 uint_t sleep) 797 { 798 tavor_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; 799 tavor_umap_db_entry_t *umapdb; 800 tavor_pdhdl_t pd; 801 tavor_mrhdl_t mr; 802 tavor_bind_info_t *bind; 803 uint64_t value; 804 int status, shared_mtt; 805 char *errormsg; 806 807 TAVOR_TNF_ENTER(tavor_mr_deregister); 808 809 /* 810 * Check the sleep flag. Ensure that it is consistent with the 811 * current thread context (i.e. if we are currently in the interrupt 812 * context, then we shouldn't be attempting to sleep). 813 */ 814 if ((sleep == TAVOR_SLEEP) && 815 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 816 /* Set "status" and "errormsg" and goto failure */ 817 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sleep flags"); 818 TNF_PROBE_1(tavor_mr_deregister_fail, TAVOR_TNF_ERROR, "", 819 tnf_string, msg, errormsg); 820 TAVOR_TNF_EXIT(tavor_mr_deregister); 821 return (status); 822 } 823 824 /* 825 * Pull all the necessary information from the Tavor Memory Region 826 * handle. This is necessary here because the resource for the 827 * MR handle is going to be freed up as part of the this 828 * deregistration 829 */ 830 mr = *mrhdl; 831 mutex_enter(&mr->mr_lock); 832 mpt = mr->mr_mptrsrcp; 833 mtt = mr->mr_mttrsrcp; 834 mtt_refcnt = mr->mr_mttrefcntp; 835 rsrc = mr->mr_rsrcp; 836 pd = mr->mr_pdhdl; 837 bind = &mr->mr_bindinfo; 838 839 /* 840 * Check here if the memory region is really an FMR. If so, this is a 841 * bad thing and we shouldn't be here. Return failure. 842 */ 843 if (mr->mr_is_fmr) { 844 mutex_exit(&mr->mr_lock); 845 TNF_PROBE_0(tavor_mr_deregister_is_fmr, TAVOR_TNF_ERROR, ""); 846 TAVOR_TNF_EXIT(tavor_mr_deregister); 847 return (IBT_INVALID_PARAM); 848 } 849 850 /* 851 * Check here to see if the memory region has already been partially 852 * deregistered as a result of the tavor_umap_umemlock_cb() callback. 853 * If so, then jump to the end and free the remaining resources. 854 */ 855 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 856 goto mrdereg_finish_cleanup; 857 } 858 859 /* 860 * We must drop the "mr_lock" here to ensure that both SLEEP and 861 * NOSLEEP calls into the firmware work as expected. Also, if two 862 * threads are attemping to access this MR (via de-register, 863 * re-register, or otherwise), then we allow the firmware to enforce 864 * the checking, that only one deregister is valid. 865 */ 866 mutex_exit(&mr->mr_lock); 867 868 /* 869 * Reclaim MPT entry from hardware (if necessary). Since the 870 * tavor_mr_deregister() routine is used in the memory region 871 * reregistration process as well, it is possible that we will 872 * not always wish to reclaim ownership of the MPT. Check the 873 * "level" arg and, if necessary, attempt to reclaim it. If 874 * the ownership transfer fails for any reason, we check to see 875 * what command status was returned from the hardware. The only 876 * "expected" error status is the one that indicates an attempt to 877 * deregister a memory region that has memory windows bound to it 878 */ 879 if (level >= TAVOR_MR_DEREG_ALL) { 880 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, 881 NULL, 0, mpt->tr_indx, sleep); 882 if (status != TAVOR_CMD_SUCCESS) { 883 if (status == TAVOR_CMD_REG_BOUND) { 884 TAVOR_TNF_EXIT(tavor_mr_deregister); 885 return (IBT_MR_IN_USE); 886 } else { 887 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command " 888 "failed: %08x\n", status); 889 TNF_PROBE_1(tavor_hw2sw_mpt_cmd_fail, 890 TAVOR_TNF_ERROR, "", tnf_uint, status, 891 status); 892 TAVOR_TNF_EXIT(tavor_mr_deregister); 893 return (IBT_INVALID_PARAM); 894 } 895 } 896 } 897 898 /* 899 * Re-grab the mr_lock here. Since further access to the protected 900 * 'mr' structure is needed, and we would have returned previously for 901 * the multiple deregistration case, we can safely grab the lock here. 902 */ 903 mutex_enter(&mr->mr_lock); 904 905 /* 906 * If the memory had come from userland, then we do a lookup in the 907 * "userland resources database". On success, we free the entry, call 908 * ddi_umem_unlock(), and continue the cleanup. On failure (which is 909 * an indication that the umem_lockmemory() callback has called 910 * tavor_mr_deregister()), we call ddi_umem_unlock() and invalidate 911 * the "mr_umemcookie" field in the MR handle (this will be used 912 * later to detect that only partial cleaup still remains to be done 913 * on the MR handle). 914 */ 915 if (mr->mr_is_umem) { 916 status = tavor_umap_db_find(state->ts_instance, 917 (uint64_t)(uintptr_t)mr->mr_umemcookie, 918 MLNX_UMAP_MRMEM_RSRC, &value, TAVOR_UMAP_DB_REMOVE, 919 &umapdb); 920 if (status == DDI_SUCCESS) { 921 tavor_umap_db_free(umapdb); 922 ddi_umem_unlock(mr->mr_umemcookie); 923 } else { 924 ddi_umem_unlock(mr->mr_umemcookie); 925 mr->mr_umemcookie = NULL; 926 } 927 } 928 929 /* 930 * Decrement the MTT reference count. Since the MTT resource 931 * may be shared between multiple memory regions (as a result 932 * of a "RegisterSharedMR" verb) it is important that we not 933 * free up or unbind resources prematurely. If it's not shared (as 934 * indicated by the return status), then free the resource. 935 */ 936 shared_mtt = tavor_mtt_refcnt_dec(mtt_refcnt); 937 if (!shared_mtt) { 938 tavor_rsrc_free(state, &mtt_refcnt); 939 } 940 941 /* 942 * Free up the MTT entries and unbind the memory. Here, as above, we 943 * attempt to free these resources only if it is appropriate to do so. 944 */ 945 if (!shared_mtt) { 946 if (level >= TAVOR_MR_DEREG_NO_HW2SW_MPT) { 947 tavor_mr_mem_unbind(state, bind); 948 } 949 tavor_rsrc_free(state, &mtt); 950 } 951 952 /* 953 * If the MR handle has been invalidated, then drop the 954 * lock and return success. Note: This only happens because 955 * the umem_lockmemory() callback has been triggered. The 956 * cleanup here is partial, and further cleanup (in a 957 * subsequent tavor_mr_deregister() call) will be necessary. 958 */ 959 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 960 mutex_exit(&mr->mr_lock); 961 TAVOR_TNF_EXIT(tavor_mr_deregister); 962 return (DDI_SUCCESS); 963 } 964 965 mrdereg_finish_cleanup: 966 mutex_exit(&mr->mr_lock); 967 968 /* Free the Tavor Memory Region handle */ 969 tavor_rsrc_free(state, &rsrc); 970 971 /* Free up the MPT entry resource */ 972 tavor_rsrc_free(state, &mpt); 973 974 /* Decrement the reference count on the protection domain (PD) */ 975 tavor_pd_refcnt_dec(pd); 976 977 /* Set the mrhdl pointer to NULL and return success */ 978 *mrhdl = NULL; 979 980 TAVOR_TNF_EXIT(tavor_mr_deregister); 981 return (DDI_SUCCESS); 982 } 983 984 /* 985 * tavor_mr_dealloc_fmr() 986 * Context: Can be called from interrupt or base context. 987 */ 988 /* ARGSUSED */ 989 int 990 tavor_mr_dealloc_fmr(tavor_state_t *state, tavor_mrhdl_t *mrhdl) 991 { 992 tavor_rsrc_t *mpt, *mtt, *rsrc; 993 tavor_pdhdl_t pd; 994 tavor_mrhdl_t mr; 995 996 TAVOR_TNF_ENTER(tavor_mr_dealloc_fmr); 997 998 /* 999 * Pull all the necessary information from the Tavor Memory Region 1000 * handle. This is necessary here because the resource for the 1001 * MR handle is going to be freed up as part of the this 1002 * deregistration 1003 */ 1004 mr = *mrhdl; 1005 mutex_enter(&mr->mr_lock); 1006 mpt = mr->mr_mptrsrcp; 1007 mtt = mr->mr_mttrsrcp; 1008 rsrc = mr->mr_rsrcp; 1009 pd = mr->mr_pdhdl; 1010 mutex_exit(&mr->mr_lock); 1011 1012 /* Free the MTT entries */ 1013 tavor_rsrc_free(state, &mtt); 1014 1015 /* Free the Tavor Memory Region handle */ 1016 tavor_rsrc_free(state, &rsrc); 1017 1018 /* Free up the MPT entry resource */ 1019 tavor_rsrc_free(state, &mpt); 1020 1021 /* Decrement the reference count on the protection domain (PD) */ 1022 tavor_pd_refcnt_dec(pd); 1023 1024 /* Set the mrhdl pointer to NULL and return success */ 1025 *mrhdl = NULL; 1026 1027 TAVOR_TNF_EXIT(tavor_mr_dealloc_fmr); 1028 return (DDI_SUCCESS); 1029 } 1030 1031 /* 1032 * tavor_mr_invalidate_fmr() 1033 * Context: Can be called from interrupt or base context. 1034 */ 1035 /* ARGSUSED */ 1036 int 1037 tavor_mr_invalidate_fmr(tavor_state_t *state, tavor_mrhdl_t mr) 1038 { 1039 tavor_rsrc_t *mpt; 1040 uint64_t *mpt_table; 1041 1042 TAVOR_TNF_ENTER(tavor_mr_invalidate_fmr); 1043 1044 mutex_enter(&mr->mr_lock); 1045 mpt = mr->mr_mptrsrcp; 1046 mpt_table = (uint64_t *)mpt->tr_addr; 1047 1048 /* Write MPT status to SW bit */ 1049 ddi_put8(mpt->tr_acchdl, (uint8_t *)&mpt_table[0], 0xF); 1050 1051 /* invalidate mem key value */ 1052 ddi_put32(mpt->tr_acchdl, (uint32_t *)&mpt_table[1], 0); 1053 1054 /* invalidate lkey value */ 1055 ddi_put32(mpt->tr_acchdl, (uint32_t *)&mpt_table[4], 0); 1056 1057 /* Write MPT status to HW bit */ 1058 ddi_put8(mpt->tr_acchdl, (uint8_t *)&mpt_table[0], 0x0); 1059 1060 mutex_exit(&mr->mr_lock); 1061 1062 TAVOR_TNF_EXIT(tavor_mr_invalidate_fmr); 1063 return (DDI_SUCCESS); 1064 } 1065 1066 /* 1067 * tavor_mr_deregister_fmr() 1068 * Context: Can be called from interrupt or base context. 1069 */ 1070 /* ARGSUSED */ 1071 int 1072 tavor_mr_deregister_fmr(tavor_state_t *state, tavor_mrhdl_t mr) 1073 { 1074 tavor_rsrc_t *mpt; 1075 uint64_t *mpt_table; 1076 1077 TAVOR_TNF_ENTER(tavor_mr_deregister_fmr); 1078 1079 mutex_enter(&mr->mr_lock); 1080 mpt = mr->mr_mptrsrcp; 1081 mpt_table = (uint64_t *)mpt->tr_addr; 1082 1083 /* Write MPT status to SW bit */ 1084 ddi_put8(mpt->tr_acchdl, (uint8_t *)&mpt_table[0], 0xF); 1085 mutex_exit(&mr->mr_lock); 1086 1087 TAVOR_TNF_EXIT(tavor_mr_deregister_fmr); 1088 return (DDI_SUCCESS); 1089 } 1090 1091 1092 /* 1093 * tavor_mr_query() 1094 * Context: Can be called from interrupt or base context. 1095 */ 1096 /* ARGSUSED */ 1097 int 1098 tavor_mr_query(tavor_state_t *state, tavor_mrhdl_t mr, 1099 ibt_mr_query_attr_t *attr) 1100 { 1101 TAVOR_TNF_ENTER(tavor_mr_query); 1102 1103 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr)) 1104 1105 mutex_enter(&mr->mr_lock); 1106 1107 /* 1108 * Check here to see if the memory region has already been partially 1109 * deregistered as a result of a tavor_umap_umemlock_cb() callback. 1110 * If so, this is an error, return failure. 1111 */ 1112 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 1113 mutex_exit(&mr->mr_lock); 1114 TNF_PROBE_0(tavor_mr_query_inv_mrhdl_fail, TAVOR_TNF_ERROR, ""); 1115 TAVOR_TNF_EXIT(tavor_mr_query); 1116 return (IBT_MR_HDL_INVALID); 1117 } 1118 1119 /* Fill in the queried attributes */ 1120 attr->mr_attr_flags = mr->mr_accflag; 1121 attr->mr_pd = (ibt_pd_hdl_t)mr->mr_pdhdl; 1122 1123 /* Fill in the "local" attributes */ 1124 attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey; 1125 attr->mr_lbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr; 1126 attr->mr_lbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len; 1127 1128 /* 1129 * Fill in the "remote" attributes (if necessary). Note: the 1130 * remote attributes are only valid if the memory region has one 1131 * or more of the remote access flags set. 1132 */ 1133 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 1134 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 1135 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 1136 attr->mr_rkey = (ibt_rkey_t)mr->mr_rkey; 1137 attr->mr_rbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr; 1138 attr->mr_rbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len; 1139 } 1140 1141 /* 1142 * If region is mapped for streaming (i.e. noncoherent), then set sync 1143 * is required 1144 */ 1145 attr->mr_sync_required = (mr->mr_bindinfo.bi_flags & 1146 IBT_MR_NONCOHERENT) ? B_TRUE : B_FALSE; 1147 1148 mutex_exit(&mr->mr_lock); 1149 TAVOR_TNF_EXIT(tavor_mr_query); 1150 return (DDI_SUCCESS); 1151 } 1152 1153 1154 /* 1155 * tavor_mr_reregister() 1156 * Context: Can be called from interrupt or base context. 1157 */ 1158 int 1159 tavor_mr_reregister(tavor_state_t *state, tavor_mrhdl_t mr, 1160 tavor_pdhdl_t pd, ibt_mr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl_new, 1161 tavor_mr_options_t *op) 1162 { 1163 tavor_bind_info_t bind; 1164 int status; 1165 1166 TAVOR_TNF_ENTER(tavor_mr_reregister); 1167 1168 /* 1169 * Fill in the "bind" struct. This struct provides the majority 1170 * of the information that will be used to distinguish between an 1171 * "addr" binding (as is the case here) and a "buf" binding (see 1172 * below). The "bind" struct is later passed to tavor_mr_mem_bind() 1173 * which does most of the "heavy lifting" for the Tavor memory 1174 * registration (and reregistration) routines. 1175 */ 1176 bind.bi_type = TAVOR_BINDHDL_VADDR; 1177 bind.bi_addr = mr_attr->mr_vaddr; 1178 bind.bi_len = mr_attr->mr_len; 1179 bind.bi_as = mr_attr->mr_as; 1180 bind.bi_flags = mr_attr->mr_flags; 1181 status = tavor_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op); 1182 if (status != DDI_SUCCESS) { 1183 TNF_PROBE_0(tavor_mr_reregister_cmnreg_fail, 1184 TAVOR_TNF_ERROR, ""); 1185 TAVOR_TNF_EXIT(tavor_mr_reregister); 1186 return (status); 1187 } 1188 1189 TAVOR_TNF_EXIT(tavor_mr_reregister); 1190 return (DDI_SUCCESS); 1191 } 1192 1193 1194 /* 1195 * tavor_mr_reregister_buf() 1196 * Context: Can be called from interrupt or base context. 1197 */ 1198 int 1199 tavor_mr_reregister_buf(tavor_state_t *state, tavor_mrhdl_t mr, 1200 tavor_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf, 1201 tavor_mrhdl_t *mrhdl_new, tavor_mr_options_t *op) 1202 { 1203 tavor_bind_info_t bind; 1204 int status; 1205 1206 TAVOR_TNF_ENTER(tavor_mr_reregister_buf); 1207 1208 /* 1209 * Fill in the "bind" struct. This struct provides the majority 1210 * of the information that will be used to distinguish between an 1211 * "addr" binding (see above) and a "buf" binding (as is the case 1212 * here). The "bind" struct is later passed to tavor_mr_mem_bind() 1213 * which does most of the "heavy lifting" for the Tavor memory 1214 * registration routines. Note: We have chosen to provide 1215 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is 1216 * not set). It is not critical what value we choose here as it need 1217 * only be unique for the given RKey (which will happen by default), 1218 * so the choice here is somewhat arbitrary. 1219 */ 1220 bind.bi_type = TAVOR_BINDHDL_BUF; 1221 bind.bi_buf = buf; 1222 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) { 1223 bind.bi_addr = mr_attr->mr_vaddr; 1224 } else { 1225 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr; 1226 } 1227 bind.bi_len = (uint64_t)buf->b_bcount; 1228 bind.bi_flags = mr_attr->mr_flags; 1229 bind.bi_as = NULL; 1230 status = tavor_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op); 1231 if (status != DDI_SUCCESS) { 1232 TNF_PROBE_0(tavor_mr_reregister_buf_cmnreg_fail, 1233 TAVOR_TNF_ERROR, ""); 1234 TAVOR_TNF_EXIT(tavor_mr_reregister_buf); 1235 return (status); 1236 } 1237 1238 TAVOR_TNF_EXIT(tavor_mr_reregister_buf); 1239 return (DDI_SUCCESS); 1240 } 1241 1242 1243 /* 1244 * tavor_mr_sync() 1245 * Context: Can be called from interrupt or base context. 1246 */ 1247 /* ARGSUSED */ 1248 int 1249 tavor_mr_sync(tavor_state_t *state, ibt_mr_sync_t *mr_segs, size_t num_segs) 1250 { 1251 tavor_mrhdl_t mrhdl; 1252 uint64_t seg_vaddr, seg_len, seg_end; 1253 uint64_t mr_start, mr_end; 1254 uint_t type; 1255 int status, i; 1256 char *errormsg; 1257 1258 TAVOR_TNF_ENTER(tavor_mr_sync); 1259 1260 /* Process each of the ibt_mr_sync_t's */ 1261 for (i = 0; i < num_segs; i++) { 1262 mrhdl = (tavor_mrhdl_t)mr_segs[i].ms_handle; 1263 1264 /* Check for valid memory region handle */ 1265 if (mrhdl == NULL) { 1266 /* Set "status" and "errormsg" and goto failure */ 1267 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl"); 1268 goto mrsync_fail; 1269 } 1270 1271 mutex_enter(&mrhdl->mr_lock); 1272 1273 /* 1274 * Check here to see if the memory region has already been 1275 * partially deregistered as a result of a 1276 * tavor_umap_umemlock_cb() callback. If so, this is an 1277 * error, return failure. 1278 */ 1279 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) { 1280 mutex_exit(&mrhdl->mr_lock); 1281 /* Set "status" and "errormsg" and goto failure */ 1282 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl2"); 1283 goto mrsync_fail; 1284 } 1285 1286 /* Check for valid bounds on sync request */ 1287 seg_vaddr = mr_segs[i].ms_vaddr; 1288 seg_len = mr_segs[i].ms_len; 1289 seg_end = seg_vaddr + seg_len - 1; 1290 mr_start = mrhdl->mr_bindinfo.bi_addr; 1291 mr_end = mr_start + mrhdl->mr_bindinfo.bi_len - 1; 1292 if ((seg_vaddr < mr_start) || (seg_vaddr > mr_end)) { 1293 mutex_exit(&mrhdl->mr_lock); 1294 /* Set "status" and "errormsg" and goto failure */ 1295 TAVOR_TNF_FAIL(IBT_MR_VA_INVALID, "invalid vaddr"); 1296 goto mrsync_fail; 1297 } 1298 if ((seg_end < mr_start) || (seg_end > mr_end)) { 1299 mutex_exit(&mrhdl->mr_lock); 1300 /* Set "status" and "errormsg" and goto failure */ 1301 TAVOR_TNF_FAIL(IBT_MR_LEN_INVALID, "invalid length"); 1302 goto mrsync_fail; 1303 } 1304 1305 /* Determine what type (i.e. direction) for sync */ 1306 if (mr_segs[i].ms_flags & IBT_SYNC_READ) { 1307 type = DDI_DMA_SYNC_FORDEV; 1308 } else if (mr_segs[i].ms_flags & IBT_SYNC_WRITE) { 1309 type = DDI_DMA_SYNC_FORCPU; 1310 } else { 1311 mutex_exit(&mrhdl->mr_lock); 1312 /* Set "status" and "errormsg" and goto failure */ 1313 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sync type"); 1314 goto mrsync_fail; 1315 } 1316 1317 (void) ddi_dma_sync(mrhdl->mr_bindinfo.bi_dmahdl, 1318 (off_t)(seg_vaddr - mr_start), (size_t)seg_len, type); 1319 mutex_exit(&mrhdl->mr_lock); 1320 } 1321 1322 TAVOR_TNF_EXIT(tavor_mr_sync); 1323 return (DDI_SUCCESS); 1324 1325 mrsync_fail: 1326 TNF_PROBE_1(tavor_mr_sync_fail, TAVOR_TNF_ERROR, "", tnf_string, msg, 1327 errormsg); 1328 TAVOR_TNF_EXIT(tavor_mr_sync); 1329 return (status); 1330 } 1331 1332 1333 /* 1334 * tavor_mw_alloc() 1335 * Context: Can be called from interrupt or base context. 1336 */ 1337 int 1338 tavor_mw_alloc(tavor_state_t *state, tavor_pdhdl_t pd, ibt_mw_flags_t flags, 1339 tavor_mwhdl_t *mwhdl) 1340 { 1341 tavor_rsrc_t *mpt, *rsrc; 1342 tavor_hw_mpt_t mpt_entry; 1343 tavor_mwhdl_t mw; 1344 uint_t sleep; 1345 int status; 1346 char *errormsg; 1347 1348 TAVOR_TNF_ENTER(tavor_mw_alloc); 1349 1350 /* 1351 * Check the sleep flag. Ensure that it is consistent with the 1352 * current thread context (i.e. if we are currently in the interrupt 1353 * context, then we shouldn't be attempting to sleep). 1354 */ 1355 sleep = (flags & IBT_MW_NOSLEEP) ? TAVOR_NOSLEEP : TAVOR_SLEEP; 1356 if ((sleep == TAVOR_SLEEP) && 1357 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1358 /* Set "status" and "errormsg" and goto failure */ 1359 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 1360 goto mwalloc_fail; 1361 } 1362 1363 /* Increment the reference count on the protection domain (PD) */ 1364 tavor_pd_refcnt_inc(pd); 1365 1366 /* 1367 * Allocate an MPT entry (for use as a memory window). Since the 1368 * Tavor hardware uses the MPT entry for memory regions and for 1369 * memory windows, we will fill in this MPT with all the necessary 1370 * parameters for the memory window. And then (just as we do for 1371 * memory regions) ownership will be passed to the hardware in the 1372 * final step below. If we fail here, we must undo the protection 1373 * domain reference count. 1374 */ 1375 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 1376 if (status != DDI_SUCCESS) { 1377 /* Set "status" and "errormsg" and goto failure */ 1378 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT"); 1379 goto mwalloc_fail1; 1380 } 1381 1382 /* 1383 * Allocate the software structure for tracking the memory window (i.e. 1384 * the Tavor Memory Window handle). Note: This is actually the same 1385 * software structure used for tracking memory regions, but since many 1386 * of the same properties are needed, only a single structure is 1387 * necessary. If we fail here, we must undo the protection domain 1388 * reference count and the previous resource allocation. 1389 */ 1390 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 1391 if (status != DDI_SUCCESS) { 1392 /* Set "status" and "errormsg" and goto failure */ 1393 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle"); 1394 goto mwalloc_fail2; 1395 } 1396 mw = (tavor_mwhdl_t)rsrc->tr_addr; 1397 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw)) 1398 1399 /* 1400 * Calculate an "unbound" RKey from MPT index. In much the same way 1401 * as we do for memory regions (above), this key is constructed from 1402 * a "constrained" (which depends on the MPT index) and an 1403 * "unconstrained" portion (which may be arbitrarily chosen). 1404 */ 1405 tavor_mr_keycalc(state, mpt->tr_indx, &mw->mr_rkey); 1406 1407 /* 1408 * Fill in the MPT entry. This is the final step before passing 1409 * ownership of the MPT entry to the Tavor hardware. We use all of 1410 * the information collected/calculated above to fill in the 1411 * requisite portions of the MPT. Note: fewer entries in the MPT 1412 * entry are necessary to allocate a memory window. 1413 */ 1414 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 1415 mpt_entry.reg_win = TAVOR_MPT_IS_WINDOW; 1416 mpt_entry.mem_key = mw->mr_rkey; 1417 mpt_entry.pd = pd->pd_pdnum; 1418 1419 /* 1420 * Write the MPT entry to hardware. Lastly, we pass ownership of 1421 * the entry to the hardware. Note: in general, this operation 1422 * shouldn't fail. But if it does, we have to undo everything we've 1423 * done above before returning error. 1424 */ 1425 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 1426 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 1427 if (status != TAVOR_CMD_SUCCESS) { 1428 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 1429 status); 1430 TNF_PROBE_1(tavor_mw_alloc_sw2hw_mpt_cmd_fail, 1431 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 1432 /* Set "status" and "errormsg" and goto failure */ 1433 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 1434 "tavor SW2HW_MPT command"); 1435 goto mwalloc_fail3; 1436 } 1437 1438 /* 1439 * Fill in the rest of the Tavor Memory Window handle. Having 1440 * successfully transferred ownership of the MPT, we can update the 1441 * following fields for use in further operations on the MW. 1442 */ 1443 mw->mr_mptrsrcp = mpt; 1444 mw->mr_pdhdl = pd; 1445 mw->mr_rsrcp = rsrc; 1446 *mwhdl = mw; 1447 1448 TAVOR_TNF_EXIT(tavor_mw_alloc); 1449 return (DDI_SUCCESS); 1450 1451 mwalloc_fail3: 1452 tavor_rsrc_free(state, &rsrc); 1453 mwalloc_fail2: 1454 tavor_rsrc_free(state, &mpt); 1455 mwalloc_fail1: 1456 tavor_pd_refcnt_dec(pd); 1457 mwalloc_fail: 1458 TNF_PROBE_1(tavor_mw_alloc_fail, TAVOR_TNF_ERROR, "", 1459 tnf_string, msg, errormsg); 1460 TAVOR_TNF_EXIT(tavor_mw_alloc); 1461 return (status); 1462 } 1463 1464 1465 /* 1466 * tavor_mw_free() 1467 * Context: Can be called from interrupt or base context. 1468 */ 1469 int 1470 tavor_mw_free(tavor_state_t *state, tavor_mwhdl_t *mwhdl, uint_t sleep) 1471 { 1472 tavor_rsrc_t *mpt, *rsrc; 1473 tavor_mwhdl_t mw; 1474 int status; 1475 char *errormsg; 1476 tavor_pdhdl_t pd; 1477 1478 TAVOR_TNF_ENTER(tavor_mw_free); 1479 1480 /* 1481 * Check the sleep flag. Ensure that it is consistent with the 1482 * current thread context (i.e. if we are currently in the interrupt 1483 * context, then we shouldn't be attempting to sleep). 1484 */ 1485 if ((sleep == TAVOR_SLEEP) && 1486 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1487 /* Set "status" and "errormsg" and goto failure */ 1488 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sleep flags"); 1489 TNF_PROBE_1(tavor_mw_free_fail, TAVOR_TNF_ERROR, "", 1490 tnf_string, msg, errormsg); 1491 TAVOR_TNF_EXIT(tavor_mw_free); 1492 return (status); 1493 } 1494 1495 /* 1496 * Pull all the necessary information from the Tavor Memory Window 1497 * handle. This is necessary here because the resource for the 1498 * MW handle is going to be freed up as part of the this operation. 1499 */ 1500 mw = *mwhdl; 1501 mutex_enter(&mw->mr_lock); 1502 mpt = mw->mr_mptrsrcp; 1503 rsrc = mw->mr_rsrcp; 1504 pd = mw->mr_pdhdl; 1505 mutex_exit(&mw->mr_lock); 1506 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw)) 1507 1508 /* 1509 * Reclaim the MPT entry from hardware. Note: in general, it is 1510 * unexpected for this operation to return an error. 1511 */ 1512 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL, 1513 0, mpt->tr_indx, sleep); 1514 if (status != TAVOR_CMD_SUCCESS) { 1515 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command failed: %08x\n", 1516 status); 1517 TNF_PROBE_1(tavor_hw2sw_mpt_cmd_fail, TAVOR_TNF_ERROR, "", 1518 tnf_uint, status, status); 1519 TAVOR_TNF_EXIT(tavor_mw_free); 1520 return (IBT_INVALID_PARAM); 1521 } 1522 1523 /* Free the Tavor Memory Window handle */ 1524 tavor_rsrc_free(state, &rsrc); 1525 1526 /* Free up the MPT entry resource */ 1527 tavor_rsrc_free(state, &mpt); 1528 1529 /* Decrement the reference count on the protection domain (PD) */ 1530 tavor_pd_refcnt_dec(pd); 1531 1532 /* Set the mwhdl pointer to NULL and return success */ 1533 *mwhdl = NULL; 1534 1535 TAVOR_TNF_EXIT(tavor_mw_free); 1536 return (DDI_SUCCESS); 1537 } 1538 1539 1540 /* 1541 * tavor_mr_keycalc() 1542 * Context: Can be called from interrupt or base context. 1543 */ 1544 void 1545 tavor_mr_keycalc(tavor_state_t *state, uint32_t indx, uint32_t *key) 1546 { 1547 uint32_t tmp, log_num_mpt; 1548 1549 /* 1550 * Generate a simple key from counter. Note: We increment this 1551 * static variable _intentionally_ without any kind of mutex around 1552 * it. First, single-threading all operations through a single lock 1553 * would be a bad idea (from a performance point-of-view). Second, 1554 * the upper "unconstrained" bits don't really have to be unique 1555 * because the lower bits are guaranteed to be (although we do make a 1556 * best effort to ensure that they are). Third, the window for the 1557 * race (where both threads read and update the counter at the same 1558 * time) is incredibly small. 1559 * And, lastly, we'd like to make this into a "random" key XXX 1560 */ 1561 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(tavor_debug_memkey_cnt)) 1562 log_num_mpt = state->ts_cfg_profile->cp_log_num_mpt; 1563 tmp = (tavor_debug_memkey_cnt++) << log_num_mpt; 1564 *key = tmp | indx; 1565 } 1566 1567 1568 /* 1569 * tavor_mr_common_reg() 1570 * Context: Can be called from interrupt or base context. 1571 */ 1572 static int 1573 tavor_mr_common_reg(tavor_state_t *state, tavor_pdhdl_t pd, 1574 tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op) 1575 { 1576 tavor_rsrc_pool_info_t *rsrc_pool; 1577 tavor_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; 1578 tavor_umap_db_entry_t *umapdb; 1579 tavor_sw_refcnt_t *swrc_tmp; 1580 tavor_hw_mpt_t mpt_entry; 1581 tavor_mrhdl_t mr; 1582 ibt_mr_flags_t flags; 1583 tavor_bind_info_t *bh; 1584 ddi_dma_handle_t bind_dmahdl; 1585 ddi_umem_cookie_t umem_cookie; 1586 size_t umem_len; 1587 caddr_t umem_addr; 1588 uint64_t mtt_addr, mtt_ddrbaseaddr, max_sz; 1589 uint_t sleep, mtt_pgsize_bits, bind_type, mr_is_umem; 1590 int status, umem_flags, bind_override_addr; 1591 char *errormsg; 1592 1593 TAVOR_TNF_ENTER(tavor_mr_common_reg); 1594 1595 /* 1596 * Check the "options" flag. Currently this flag tells the driver 1597 * whether or not the region should be bound normally (i.e. with 1598 * entries written into the PCI IOMMU), whether it should be 1599 * registered to bypass the IOMMU, and whether or not the resulting 1600 * address should be "zero-based" (to aid the alignment restrictions 1601 * for QPs). 1602 */ 1603 if (op == NULL) { 1604 bind_type = TAVOR_BINDMEM_NORMAL; 1605 bind_dmahdl = NULL; 1606 bind_override_addr = 0; 1607 } else { 1608 bind_type = op->mro_bind_type; 1609 bind_dmahdl = op->mro_bind_dmahdl; 1610 bind_override_addr = op->mro_bind_override_addr; 1611 } 1612 1613 /* Extract the flags field from the tavor_bind_info_t */ 1614 flags = bind->bi_flags; 1615 1616 /* 1617 * Check for invalid length. Check is the length is zero or if the 1618 * length is larger than the maximum configured value. Return error 1619 * if it is. 1620 */ 1621 max_sz = ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_mrw_sz); 1622 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) { 1623 /* Set "status" and "errormsg" and goto failure */ 1624 TAVOR_TNF_FAIL(IBT_MR_LEN_INVALID, "invalid length"); 1625 goto mrcommon_fail; 1626 } 1627 1628 /* 1629 * Check the sleep flag. Ensure that it is consistent with the 1630 * current thread context (i.e. if we are currently in the interrupt 1631 * context, then we shouldn't be attempting to sleep). 1632 */ 1633 sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP; 1634 if ((sleep == TAVOR_SLEEP) && 1635 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1636 /* Set "status" and "errormsg" and goto failure */ 1637 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 1638 goto mrcommon_fail; 1639 } 1640 1641 /* 1642 * Get the base address for the MTT table. This will be necessary 1643 * below when we are setting up the MPT entry. 1644 */ 1645 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; 1646 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; 1647 1648 /* Increment the reference count on the protection domain (PD) */ 1649 tavor_pd_refcnt_inc(pd); 1650 1651 /* 1652 * Allocate an MPT entry. This will be filled in with all the 1653 * necessary parameters to define the memory region. And then 1654 * ownership will be passed to the hardware in the final step 1655 * below. If we fail here, we must undo the protection domain 1656 * reference count. 1657 */ 1658 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 1659 if (status != DDI_SUCCESS) { 1660 /* Set "status" and "errormsg" and goto failure */ 1661 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT"); 1662 goto mrcommon_fail1; 1663 } 1664 1665 /* 1666 * Allocate the software structure for tracking the memory region (i.e. 1667 * the Tavor Memory Region handle). If we fail here, we must undo 1668 * the protection domain reference count and the previous resource 1669 * allocation. 1670 */ 1671 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 1672 if (status != DDI_SUCCESS) { 1673 /* Set "status" and "errormsg" and goto failure */ 1674 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle"); 1675 goto mrcommon_fail2; 1676 } 1677 mr = (tavor_mrhdl_t)rsrc->tr_addr; 1678 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 1679 1680 /* 1681 * Setup and validate the memory region access flags. This means 1682 * translating the IBTF's enable flags into the access flags that 1683 * will be used in later operations. 1684 */ 1685 mr->mr_accflag = 0; 1686 if (flags & IBT_MR_ENABLE_WINDOW_BIND) 1687 mr->mr_accflag |= IBT_MR_WINDOW_BIND; 1688 if (flags & IBT_MR_ENABLE_LOCAL_WRITE) 1689 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 1690 if (flags & IBT_MR_ENABLE_REMOTE_READ) 1691 mr->mr_accflag |= IBT_MR_REMOTE_READ; 1692 if (flags & IBT_MR_ENABLE_REMOTE_WRITE) 1693 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 1694 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 1695 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 1696 1697 /* 1698 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 1699 * from a certain number of "constrained" bits (the least significant 1700 * bits) and some number of "unconstrained" bits. The constrained 1701 * bits must be set to the index of the entry in the MPT table, but 1702 * the unconstrained bits can be set to any value we wish. Note: 1703 * if no remote access is required, then the RKey value is not filled 1704 * in. Otherwise both Rkey and LKey are given the same value. 1705 */ 1706 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 1707 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 1708 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 1709 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 1710 mr->mr_rkey = mr->mr_lkey; 1711 } 1712 1713 /* 1714 * Determine if the memory is from userland and pin the pages 1715 * with umem_lockmemory() if necessary. 1716 * Then, if this is userland memory, allocate an entry in the 1717 * "userland resources database". This will later be added to 1718 * the database (after all further memory registration operations are 1719 * successful). If we fail here, we must undo the reference counts 1720 * and the previous resource allocations. 1721 */ 1722 mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0); 1723 if (mr_is_umem) { 1724 umem_len = ptob(btopr(bind->bi_len + 1725 ((uintptr_t)bind->bi_addr & PAGEOFFSET))); 1726 umem_addr = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET); 1727 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ | 1728 DDI_UMEMLOCK_LONGTERM); 1729 status = umem_lockmemory(umem_addr, umem_len, umem_flags, 1730 &umem_cookie, &tavor_umem_cbops, NULL); 1731 if (status != 0) { 1732 /* Set "status" and "errormsg" and goto failure */ 1733 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umem pin"); 1734 goto mrcommon_fail3; 1735 } 1736 1737 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 1738 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind->bi_buf)) 1739 1740 bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len, 1741 B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP); 1742 if (bind->bi_buf == NULL) { 1743 /* Set "status" and "errormsg" and goto failure */ 1744 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed iosetup"); 1745 goto mrcommon_fail3; 1746 } 1747 bind->bi_type = TAVOR_BINDHDL_UBUF; 1748 bind->bi_buf->b_flags |= B_READ; 1749 1750 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind->bi_buf)) 1751 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 1752 1753 umapdb = tavor_umap_db_alloc(state->ts_instance, 1754 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC, 1755 (uint64_t)(uintptr_t)rsrc); 1756 if (umapdb == NULL) { 1757 /* Set "status" and "errormsg" and goto failure */ 1758 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add"); 1759 goto mrcommon_fail4; 1760 } 1761 } 1762 1763 /* 1764 * Setup the bindinfo for the mtt bind call 1765 */ 1766 bh = &mr->mr_bindinfo; 1767 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bh)) 1768 bcopy(bind, bh, sizeof (tavor_bind_info_t)); 1769 bh->bi_bypass = bind_type; 1770 status = tavor_mr_mtt_bind(state, bh, bind_dmahdl, &mtt, 1771 &mtt_pgsize_bits); 1772 if (status != DDI_SUCCESS) { 1773 /* Set "status" and "errormsg" and goto failure */ 1774 TAVOR_TNF_FAIL(status, "failed mtt bind"); 1775 goto mrcommon_fail5; 1776 } 1777 mr->mr_logmttpgsz = mtt_pgsize_bits; 1778 1779 /* 1780 * Allocate MTT reference count (to track shared memory regions). 1781 * This reference count resource may never be used on the given 1782 * memory region, but if it is ever later registered as "shared" 1783 * memory region then this resource will be necessary. If we fail 1784 * here, we do pretty much the same as above to clean up. 1785 */ 1786 status = tavor_rsrc_alloc(state, TAVOR_REFCNT, 1, sleep, 1787 &mtt_refcnt); 1788 if (status != DDI_SUCCESS) { 1789 /* Set "status" and "errormsg" and goto failure */ 1790 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed refence count"); 1791 goto mrcommon_fail6; 1792 } 1793 mr->mr_mttrefcntp = mtt_refcnt; 1794 swrc_tmp = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr; 1795 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp)) 1796 TAVOR_MTT_REFCNT_INIT(swrc_tmp); 1797 1798 /* 1799 * Fill in the MPT entry. This is the final step before passing 1800 * ownership of the MPT entry to the Tavor hardware. We use all of 1801 * the information collected/calculated above to fill in the 1802 * requisite portions of the MPT. 1803 */ 1804 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 1805 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE; 1806 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; 1807 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 1808 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 1809 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 1810 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 1811 mpt_entry.lr = 1; 1812 mpt_entry.reg_win = TAVOR_MPT_IS_REGION; 1813 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 1814 mpt_entry.mem_key = mr->mr_lkey; 1815 mpt_entry.pd = pd->pd_pdnum; 1816 if (bind_override_addr == 0) { 1817 mpt_entry.start_addr = bh->bi_addr; 1818 } else { 1819 bh->bi_addr = bh->bi_addr & ((1 << mr->mr_logmttpgsz) - 1); 1820 mpt_entry.start_addr = bh->bi_addr; 1821 } 1822 mpt_entry.reg_win_len = bh->bi_len; 1823 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND; 1824 mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT); 1825 mpt_entry.mttseg_addr_h = mtt_addr >> 32; 1826 mpt_entry.mttseg_addr_l = mtt_addr >> 6; 1827 1828 /* 1829 * Write the MPT entry to hardware. Lastly, we pass ownership of 1830 * the entry to the hardware. Note: in general, this operation 1831 * shouldn't fail. But if it does, we have to undo everything we've 1832 * done above before returning error. 1833 */ 1834 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 1835 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 1836 if (status != TAVOR_CMD_SUCCESS) { 1837 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 1838 status); 1839 TNF_PROBE_1(tavor_mr_common_reg_sw2hw_mpt_cmd_fail, 1840 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 1841 /* Set "status" and "errormsg" and goto failure */ 1842 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 1843 "tavor SW2HW_MPT command"); 1844 goto mrcommon_fail7; 1845 } 1846 1847 /* 1848 * Fill in the rest of the Tavor Memory Region handle. Having 1849 * successfully transferred ownership of the MPT, we can update the 1850 * following fields for use in further operations on the MR. 1851 */ 1852 mr->mr_mptrsrcp = mpt; 1853 mr->mr_mttrsrcp = mtt; 1854 mr->mr_pdhdl = pd; 1855 mr->mr_rsrcp = rsrc; 1856 mr->mr_is_umem = mr_is_umem; 1857 mr->mr_is_fmr = 0; 1858 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL; 1859 mr->mr_umem_cbfunc = NULL; 1860 mr->mr_umem_cbarg1 = NULL; 1861 mr->mr_umem_cbarg2 = NULL; 1862 1863 /* 1864 * If this is userland memory, then we need to insert the previously 1865 * allocated entry into the "userland resources database". This will 1866 * allow for later coordination between the tavor_umap_umemlock_cb() 1867 * callback and tavor_mr_deregister(). 1868 */ 1869 if (mr_is_umem) { 1870 tavor_umap_db_add(umapdb); 1871 } 1872 1873 *mrhdl = mr; 1874 1875 TAVOR_TNF_EXIT(tavor_mr_common_reg); 1876 return (DDI_SUCCESS); 1877 1878 /* 1879 * The following is cleanup for all possible failure cases in this routine 1880 */ 1881 mrcommon_fail7: 1882 tavor_rsrc_free(state, &mtt_refcnt); 1883 mrcommon_fail6: 1884 tavor_rsrc_free(state, &mtt); 1885 tavor_mr_mem_unbind(state, bh); 1886 mrcommon_fail5: 1887 if (mr_is_umem) { 1888 tavor_umap_db_free(umapdb); 1889 } 1890 mrcommon_fail4: 1891 if (mr_is_umem) { 1892 /* 1893 * Free up the memory ddi_umem_iosetup() allocates 1894 * internally. 1895 */ 1896 if (bind->bi_type == TAVOR_BINDHDL_UBUF) { 1897 freerbuf(bind->bi_buf); 1898 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 1899 bind->bi_type = TAVOR_BINDHDL_NONE; 1900 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 1901 } 1902 ddi_umem_unlock(umem_cookie); 1903 } 1904 mrcommon_fail3: 1905 tavor_rsrc_free(state, &rsrc); 1906 mrcommon_fail2: 1907 tavor_rsrc_free(state, &mpt); 1908 mrcommon_fail1: 1909 tavor_pd_refcnt_dec(pd); 1910 mrcommon_fail: 1911 TNF_PROBE_1(tavor_mr_common_reg_fail, TAVOR_TNF_ERROR, "", 1912 tnf_string, msg, errormsg); 1913 TAVOR_TNF_EXIT(tavor_mr_common_reg); 1914 return (status); 1915 } 1916 1917 /* 1918 * tavor_mr_mtt_bind() 1919 * Context: Can be called from interrupt or base context. 1920 */ 1921 int 1922 tavor_mr_mtt_bind(tavor_state_t *state, tavor_bind_info_t *bind, 1923 ddi_dma_handle_t bind_dmahdl, tavor_rsrc_t **mtt, uint_t *mtt_pgsize_bits) 1924 { 1925 uint64_t nummtt; 1926 uint_t sleep; 1927 int status; 1928 char *errormsg; 1929 1930 TAVOR_TNF_ENTER(tavor_mr_common_reg); 1931 1932 /* 1933 * Check the sleep flag. Ensure that it is consistent with the 1934 * current thread context (i.e. if we are currently in the interrupt 1935 * context, then we shouldn't be attempting to sleep). 1936 */ 1937 sleep = (bind->bi_flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP; 1938 if ((sleep == TAVOR_SLEEP) && 1939 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1940 /* Set "status" and "errormsg" and goto failure */ 1941 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 1942 goto mrmttbind_fail; 1943 } 1944 1945 /* 1946 * Bind the memory and determine the mapped addresses. This is 1947 * the first of two routines that do all the "heavy lifting" for 1948 * the Tavor memory registration routines. The tavor_mr_mem_bind() 1949 * routine takes the "bind" struct with all its fields filled 1950 * in and returns a list of DMA cookies (for the PCI mapped addresses 1951 * corresponding to the specified address region) which are used by 1952 * the tavor_mr_fast_mtt_write() routine below. If we fail here, we 1953 * must undo all the previous resource allocation (and PD reference 1954 * count). 1955 */ 1956 status = tavor_mr_mem_bind(state, bind, bind_dmahdl, sleep); 1957 if (status != DDI_SUCCESS) { 1958 /* Set "status" and "errormsg" and goto failure */ 1959 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed mem bind"); 1960 goto mrmttbind_fail; 1961 } 1962 1963 /* 1964 * Determine number of pages spanned. This routine uses the 1965 * information in the "bind" struct to determine the required 1966 * number of MTT entries needed (and returns the suggested page size - 1967 * as a "power-of-2" - for each MTT entry). 1968 */ 1969 nummtt = tavor_mr_nummtt_needed(state, bind, mtt_pgsize_bits); 1970 1971 /* 1972 * Allocate the MTT entries. Use the calculations performed above to 1973 * allocate the required number of MTT entries. Note: MTT entries are 1974 * allocated in "MTT segments" which consist of complete cachelines 1975 * (i.e. 8 entries, 16 entries, etc.) So the TAVOR_NUMMTT_TO_MTTSEG() 1976 * macro is used to do the proper conversion. If we fail here, we 1977 * must not only undo all the previous resource allocation (and PD 1978 * reference count), but we must also unbind the memory. 1979 */ 1980 status = tavor_rsrc_alloc(state, TAVOR_MTT, 1981 TAVOR_NUMMTT_TO_MTTSEG(nummtt), sleep, mtt); 1982 if (status != DDI_SUCCESS) { 1983 /* Set "status" and "errormsg" and goto failure */ 1984 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MTT"); 1985 goto mrmttbind_fail2; 1986 } 1987 1988 /* 1989 * Write the mapped addresses into the MTT entries. This is part two 1990 * of the "heavy lifting" routines that we talked about above. Note: 1991 * we pass the suggested page size from the earlier operation here. 1992 * And if we fail here, we again do pretty much the same huge clean up. 1993 */ 1994 status = tavor_mr_fast_mtt_write(*mtt, bind, *mtt_pgsize_bits); 1995 if (status != DDI_SUCCESS) { 1996 /* Set "status" and "errormsg" and goto failure */ 1997 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), "failed write mtt"); 1998 goto mrmttbind_fail3; 1999 } 2000 TAVOR_TNF_EXIT(tavor_mr_mtt_bind); 2001 return (DDI_SUCCESS); 2002 2003 /* 2004 * The following is cleanup for all possible failure cases in this routine 2005 */ 2006 mrmttbind_fail3: 2007 tavor_rsrc_free(state, mtt); 2008 mrmttbind_fail2: 2009 tavor_mr_mem_unbind(state, bind); 2010 mrmttbind_fail: 2011 TNF_PROBE_1(tavor_mr_mtt_bind_fail, TAVOR_TNF_ERROR, "", 2012 tnf_string, msg, errormsg); 2013 TAVOR_TNF_EXIT(tavor_mr_mtt_bind); 2014 return (status); 2015 } 2016 2017 2018 /* 2019 * tavor_mr_mtt_unbind() 2020 * Context: Can be called from interrupt or base context. 2021 */ 2022 int 2023 tavor_mr_mtt_unbind(tavor_state_t *state, tavor_bind_info_t *bind, 2024 tavor_rsrc_t *mtt) 2025 { 2026 TAVOR_TNF_ENTER(tavor_mr_mtt_unbind); 2027 2028 /* 2029 * Free up the MTT entries and unbind the memory. Here, as above, we 2030 * attempt to free these resources only if it is appropriate to do so. 2031 */ 2032 tavor_mr_mem_unbind(state, bind); 2033 tavor_rsrc_free(state, &mtt); 2034 2035 TAVOR_TNF_EXIT(tavor_mr_mtt_unbind); 2036 return (DDI_SUCCESS); 2037 } 2038 2039 2040 /* 2041 * tavor_mr_common_rereg() 2042 * Context: Can be called from interrupt or base context. 2043 */ 2044 static int 2045 tavor_mr_common_rereg(tavor_state_t *state, tavor_mrhdl_t mr, 2046 tavor_pdhdl_t pd, tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl_new, 2047 tavor_mr_options_t *op) 2048 { 2049 tavor_rsrc_t *mpt; 2050 ibt_mr_attr_flags_t acc_flags_to_use; 2051 ibt_mr_flags_t flags; 2052 tavor_pdhdl_t pd_to_use; 2053 tavor_hw_mpt_t mpt_entry; 2054 uint64_t mtt_addr_to_use, vaddr_to_use, len_to_use; 2055 uint_t sleep, dereg_level; 2056 int status; 2057 char *errormsg; 2058 2059 TAVOR_TNF_ENTER(tavor_mr_common_rereg); 2060 2061 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 2062 2063 /* 2064 * Check here to see if the memory region corresponds to a userland 2065 * mapping. Reregistration of userland memory regions is not 2066 * currently supported. Return failure. XXX 2067 */ 2068 if (mr->mr_is_umem) { 2069 /* Set "status" and "errormsg" and goto failure */ 2070 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl"); 2071 goto mrrereg_fail; 2072 } 2073 2074 mutex_enter(&mr->mr_lock); 2075 2076 /* Pull MPT resource pointer from the Tavor Memory Region handle */ 2077 mpt = mr->mr_mptrsrcp; 2078 2079 /* Extract the flags field from the tavor_bind_info_t */ 2080 flags = bind->bi_flags; 2081 2082 /* 2083 * Check the sleep flag. Ensure that it is consistent with the 2084 * current thread context (i.e. if we are currently in the interrupt 2085 * context, then we shouldn't be attempting to sleep). 2086 */ 2087 sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP; 2088 if ((sleep == TAVOR_SLEEP) && 2089 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 2090 mutex_exit(&mr->mr_lock); 2091 /* Set "status" and "errormsg" and goto failure */ 2092 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 2093 goto mrrereg_fail; 2094 } 2095 2096 /* 2097 * First step is to temporarily invalidate the MPT entry. This 2098 * regains ownership from the hardware, and gives us the opportunity 2099 * to modify the entry. Note: The HW2SW_MPT command returns the 2100 * current MPT entry contents. These are saved away here because 2101 * they will be reused in a later step below. If the region has 2102 * bound memory windows that we fail returning an "in use" error code. 2103 * Otherwise, this is an unexpected error and we deregister the 2104 * memory region and return error. 2105 * 2106 * We use TAVOR_CMD_NOSLEEP_SPIN here always because we must protect 2107 * against holding the lock around this rereg call in all contexts. 2108 */ 2109 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, &mpt_entry, 2110 sizeof (tavor_hw_mpt_t), mpt->tr_indx, TAVOR_CMD_NOSLEEP_SPIN); 2111 if (status != TAVOR_CMD_SUCCESS) { 2112 mutex_exit(&mr->mr_lock); 2113 if (status == TAVOR_CMD_REG_BOUND) { 2114 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 2115 return (IBT_MR_IN_USE); 2116 } else { 2117 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command failed: " 2118 "%08x\n", status); 2119 2120 /* 2121 * Call deregister and ensure that all current 2122 * resources get freed up 2123 */ 2124 if (tavor_mr_deregister(state, &mr, 2125 TAVOR_MR_DEREG_ALL, sleep) != DDI_SUCCESS) { 2126 TAVOR_WARNING(state, "failed to deregister " 2127 "memory region"); 2128 } 2129 TNF_PROBE_1(tavor_mr_common_rereg_hw2sw_mpt_cmd_fail, 2130 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 2131 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 2132 return (ibc_get_ci_failure(0)); 2133 } 2134 } 2135 2136 /* 2137 * If we're changing the protection domain, then validate the new one 2138 */ 2139 if (flags & IBT_MR_CHANGE_PD) { 2140 2141 /* Check for valid PD handle pointer */ 2142 if (pd == NULL) { 2143 mutex_exit(&mr->mr_lock); 2144 /* 2145 * Call deregister and ensure that all current 2146 * resources get properly freed up. Unnecessary 2147 * here to attempt to regain software ownership 2148 * of the MPT entry as that has already been 2149 * done above. 2150 */ 2151 if (tavor_mr_deregister(state, &mr, 2152 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) != 2153 DDI_SUCCESS) { 2154 TAVOR_WARNING(state, "failed to deregister " 2155 "memory region"); 2156 } 2157 /* Set "status" and "errormsg" and goto failure */ 2158 TAVOR_TNF_FAIL(IBT_PD_HDL_INVALID, "invalid PD handle"); 2159 goto mrrereg_fail; 2160 } 2161 2162 /* Use the new PD handle in all operations below */ 2163 pd_to_use = pd; 2164 2165 } else { 2166 /* Use the current PD handle in all operations below */ 2167 pd_to_use = mr->mr_pdhdl; 2168 } 2169 2170 /* 2171 * If we're changing access permissions, then validate the new ones 2172 */ 2173 if (flags & IBT_MR_CHANGE_ACCESS) { 2174 /* 2175 * Validate the access flags. Both remote write and remote 2176 * atomic require the local write flag to be set 2177 */ 2178 if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) || 2179 (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) && 2180 !(flags & IBT_MR_ENABLE_LOCAL_WRITE)) { 2181 mutex_exit(&mr->mr_lock); 2182 /* 2183 * Call deregister and ensure that all current 2184 * resources get properly freed up. Unnecessary 2185 * here to attempt to regain software ownership 2186 * of the MPT entry as that has already been 2187 * done above. 2188 */ 2189 if (tavor_mr_deregister(state, &mr, 2190 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) != 2191 DDI_SUCCESS) { 2192 TAVOR_WARNING(state, "failed to deregister " 2193 "memory region"); 2194 } 2195 /* Set "status" and "errormsg" and goto failure */ 2196 TAVOR_TNF_FAIL(IBT_MR_ACCESS_REQ_INVALID, 2197 "invalid access flags"); 2198 goto mrrereg_fail; 2199 } 2200 2201 /* 2202 * Setup and validate the memory region access flags. This 2203 * means translating the IBTF's enable flags into the access 2204 * flags that will be used in later operations. 2205 */ 2206 acc_flags_to_use = 0; 2207 if (flags & IBT_MR_ENABLE_WINDOW_BIND) 2208 acc_flags_to_use |= IBT_MR_WINDOW_BIND; 2209 if (flags & IBT_MR_ENABLE_LOCAL_WRITE) 2210 acc_flags_to_use |= IBT_MR_LOCAL_WRITE; 2211 if (flags & IBT_MR_ENABLE_REMOTE_READ) 2212 acc_flags_to_use |= IBT_MR_REMOTE_READ; 2213 if (flags & IBT_MR_ENABLE_REMOTE_WRITE) 2214 acc_flags_to_use |= IBT_MR_REMOTE_WRITE; 2215 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 2216 acc_flags_to_use |= IBT_MR_REMOTE_ATOMIC; 2217 2218 } else { 2219 acc_flags_to_use = mr->mr_accflag; 2220 } 2221 2222 /* 2223 * If we're modifying the translation, then figure out whether 2224 * we can reuse the current MTT resources. This means calling 2225 * tavor_mr_rereg_xlat_helper() which does most of the heavy lifting 2226 * for the reregistration. If the current memory region contains 2227 * sufficient MTT entries for the new regions, then it will be 2228 * reused and filled in. Otherwise, new entries will be allocated, 2229 * the old ones will be freed, and the new entries will be filled 2230 * in. Note: If we're not modifying the translation, then we 2231 * should already have all the information we need to update the MPT. 2232 * Also note: If tavor_mr_rereg_xlat_helper() fails, it will return 2233 * a "dereg_level" which is the level of cleanup that needs to be 2234 * passed to tavor_mr_deregister() to finish the cleanup. 2235 */ 2236 if (flags & IBT_MR_CHANGE_TRANSLATION) { 2237 status = tavor_mr_rereg_xlat_helper(state, mr, bind, op, 2238 &mtt_addr_to_use, sleep, &dereg_level); 2239 if (status != DDI_SUCCESS) { 2240 mutex_exit(&mr->mr_lock); 2241 /* 2242 * Call deregister and ensure that all resources get 2243 * properly freed up. 2244 */ 2245 if (tavor_mr_deregister(state, &mr, dereg_level, 2246 sleep) != DDI_SUCCESS) { 2247 TAVOR_WARNING(state, "failed to deregister " 2248 "memory region"); 2249 } 2250 2251 /* Set "status" and "errormsg" and goto failure */ 2252 TAVOR_TNF_FAIL(status, "failed rereg helper"); 2253 goto mrrereg_fail; 2254 } 2255 vaddr_to_use = mr->mr_bindinfo.bi_addr; 2256 len_to_use = mr->mr_bindinfo.bi_len; 2257 } else { 2258 mtt_addr_to_use = (((uint64_t)mpt_entry.mttseg_addr_h << 32) | 2259 ((uint64_t)mpt_entry.mttseg_addr_l << 6)); 2260 vaddr_to_use = mr->mr_bindinfo.bi_addr; 2261 len_to_use = mr->mr_bindinfo.bi_len; 2262 } 2263 2264 /* 2265 * Calculate new keys (Lkey, Rkey) from MPT index. Just like they were 2266 * when the region was first registered, each key is formed from 2267 * "constrained" bits and "unconstrained" bits. Note: If no remote 2268 * access is required, then the RKey value is not filled in. Otherwise 2269 * both Rkey and LKey are given the same value. 2270 */ 2271 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 2272 if ((acc_flags_to_use & IBT_MR_REMOTE_READ) || 2273 (acc_flags_to_use & IBT_MR_REMOTE_WRITE) || 2274 (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC)) { 2275 mr->mr_rkey = mr->mr_lkey; 2276 } 2277 2278 /* 2279 * Update the MPT entry with the new information. Some of this 2280 * information is retained from the previous operation, some of 2281 * it is new based on request. 2282 */ 2283 mpt_entry.en_bind = (acc_flags_to_use & IBT_MR_WINDOW_BIND) ? 1 : 0; 2284 mpt_entry.atomic = (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 2285 mpt_entry.rw = (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ? 1 : 0; 2286 mpt_entry.rr = (acc_flags_to_use & IBT_MR_REMOTE_READ) ? 1 : 0; 2287 mpt_entry.lw = (acc_flags_to_use & IBT_MR_LOCAL_WRITE) ? 1 : 0; 2288 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 2289 mpt_entry.mem_key = mr->mr_lkey; 2290 mpt_entry.pd = pd_to_use->pd_pdnum; 2291 mpt_entry.start_addr = vaddr_to_use; 2292 mpt_entry.reg_win_len = len_to_use; 2293 mpt_entry.mttseg_addr_h = mtt_addr_to_use >> 32; 2294 mpt_entry.mttseg_addr_l = mtt_addr_to_use >> 6; 2295 2296 /* 2297 * Write the updated MPT entry to hardware 2298 * 2299 * We use TAVOR_CMD_NOSLEEP_SPIN here always because we must protect 2300 * against holding the lock around this rereg call in all contexts. 2301 */ 2302 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 2303 sizeof (tavor_hw_mpt_t), mpt->tr_indx, TAVOR_CMD_NOSLEEP_SPIN); 2304 if (status != TAVOR_CMD_SUCCESS) { 2305 mutex_exit(&mr->mr_lock); 2306 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 2307 status); 2308 /* 2309 * Call deregister and ensure that all current resources get 2310 * properly freed up. Unnecessary here to attempt to regain 2311 * software ownership of the MPT entry as that has already 2312 * been done above. 2313 */ 2314 if (tavor_mr_deregister(state, &mr, 2315 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) { 2316 TAVOR_WARNING(state, "failed to deregister memory " 2317 "region"); 2318 } 2319 TNF_PROBE_1(tavor_mr_common_rereg_sw2hw_mpt_cmd_fail, 2320 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 2321 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 2322 return (ibc_get_ci_failure(0)); 2323 } 2324 2325 /* 2326 * If we're changing PD, then update their reference counts now. 2327 * This means decrementing the reference count on the old PD and 2328 * incrementing the reference count on the new PD. 2329 */ 2330 if (flags & IBT_MR_CHANGE_PD) { 2331 tavor_pd_refcnt_dec(mr->mr_pdhdl); 2332 tavor_pd_refcnt_inc(pd); 2333 } 2334 2335 /* 2336 * Update the contents of the Tavor Memory Region handle to reflect 2337 * what has been changed. 2338 */ 2339 mr->mr_pdhdl = pd_to_use; 2340 mr->mr_accflag = acc_flags_to_use; 2341 mr->mr_is_umem = 0; 2342 mr->mr_is_fmr = 0; 2343 mr->mr_umemcookie = NULL; 2344 2345 /* New MR handle is same as the old */ 2346 *mrhdl_new = mr; 2347 mutex_exit(&mr->mr_lock); 2348 2349 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 2350 return (DDI_SUCCESS); 2351 2352 mrrereg_fail: 2353 TNF_PROBE_1(tavor_mr_common_rereg_fail, TAVOR_TNF_ERROR, "", 2354 tnf_string, msg, errormsg); 2355 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 2356 return (status); 2357 } 2358 2359 2360 /* 2361 * tavor_mr_rereg_xlat_helper 2362 * Context: Can be called from interrupt or base context. 2363 * Note: This routine expects the "mr_lock" to be held when it 2364 * is called. Upon returning failure, this routine passes information 2365 * about what "dereg_level" should be passed to tavor_mr_deregister(). 2366 */ 2367 static int 2368 tavor_mr_rereg_xlat_helper(tavor_state_t *state, tavor_mrhdl_t mr, 2369 tavor_bind_info_t *bind, tavor_mr_options_t *op, uint64_t *mtt_addr, 2370 uint_t sleep, uint_t *dereg_level) 2371 { 2372 tavor_rsrc_pool_info_t *rsrc_pool; 2373 tavor_rsrc_t *mtt, *mtt_refcnt; 2374 tavor_sw_refcnt_t *swrc_old, *swrc_new; 2375 ddi_dma_handle_t dmahdl; 2376 uint64_t nummtt_needed, nummtt_in_currrsrc, max_sz; 2377 uint64_t mtt_ddrbaseaddr; 2378 uint_t mtt_pgsize_bits, bind_type, reuse_dmahdl; 2379 int status; 2380 char *errormsg; 2381 2382 TAVOR_TNF_ENTER(tavor_mr_rereg_xlat_helper); 2383 2384 ASSERT(MUTEX_HELD(&mr->mr_lock)); 2385 2386 /* 2387 * Check the "options" flag. Currently this flag tells the driver 2388 * whether or not the region should be bound normally (i.e. with 2389 * entries written into the PCI IOMMU) or whether it should be 2390 * registered to bypass the IOMMU. 2391 */ 2392 if (op == NULL) { 2393 bind_type = TAVOR_BINDMEM_NORMAL; 2394 } else { 2395 bind_type = op->mro_bind_type; 2396 } 2397 2398 /* 2399 * Check for invalid length. Check is the length is zero or if the 2400 * length is larger than the maximum configured value. Return error 2401 * if it is. 2402 */ 2403 max_sz = ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_mrw_sz); 2404 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) { 2405 /* 2406 * Deregister will be called upon returning failure from this 2407 * routine. This will ensure that all current resources get 2408 * properly freed up. Unnecessary to attempt to regain 2409 * software ownership of the MPT entry as that has already 2410 * been done above (in tavor_mr_reregister()) 2411 */ 2412 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT; 2413 2414 /* Set "status" and "errormsg" and goto failure */ 2415 TAVOR_TNF_FAIL(IBT_MR_LEN_INVALID, "invalid length"); 2416 goto mrrereghelp_fail; 2417 } 2418 2419 /* 2420 * Determine the number of pages necessary for new region and the 2421 * number of pages supported by the current MTT resources 2422 */ 2423 nummtt_needed = tavor_mr_nummtt_needed(state, bind, &mtt_pgsize_bits); 2424 nummtt_in_currrsrc = mr->mr_mttrsrcp->tr_len >> TAVOR_MTT_SIZE_SHIFT; 2425 2426 /* 2427 * Depending on whether we have enough pages or not, the next step is 2428 * to fill in a set of MTT entries that reflect the new mapping. In 2429 * the first case below, we already have enough entries. This means 2430 * we need to unbind the memory from the previous mapping, bind the 2431 * memory for the new mapping, write the new MTT entries, and update 2432 * the mr to reflect the changes. 2433 * In the second case below, we do not have enough entries in the 2434 * current mapping. So, in this case, we need not only to unbind the 2435 * current mapping, but we need to free up the MTT resources associated 2436 * with that mapping. After we've successfully done that, we continue 2437 * by binding the new memory, allocating new MTT entries, writing the 2438 * new MTT entries, and updating the mr to reflect the changes. 2439 */ 2440 2441 /* 2442 * If this region is being shared (i.e. MTT refcount != 1), then we 2443 * can't reuse the current MTT resources regardless of their size. 2444 * Instead we'll need to alloc new ones (below) just as if there 2445 * hadn't been enough room in the current entries. 2446 */ 2447 swrc_old = (tavor_sw_refcnt_t *)mr->mr_mttrefcntp->tr_addr; 2448 if (TAVOR_MTT_IS_NOT_SHARED(swrc_old) && 2449 (nummtt_needed <= nummtt_in_currrsrc)) { 2450 2451 /* 2452 * Unbind the old mapping for this memory region, but retain 2453 * the ddi_dma_handle_t (if possible) for reuse in the bind 2454 * operation below. Note: If original memory region was 2455 * bound for IOMMU bypass and the new region can not use 2456 * bypass, then a new DMA handle will be necessary. 2457 */ 2458 if (TAVOR_MR_REUSE_DMAHDL(mr, bind->bi_flags)) { 2459 mr->mr_bindinfo.bi_free_dmahdl = 0; 2460 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 2461 dmahdl = mr->mr_bindinfo.bi_dmahdl; 2462 reuse_dmahdl = 1; 2463 } else { 2464 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 2465 dmahdl = NULL; 2466 reuse_dmahdl = 0; 2467 } 2468 2469 /* 2470 * Bind the new memory and determine the mapped addresses. 2471 * As described, this routine and tavor_mr_fast_mtt_write() 2472 * do the majority of the work for the memory registration 2473 * operations. Note: When we successfully finish the binding, 2474 * we will set the "bi_free_dmahdl" flag to indicate that 2475 * even though we may have reused the ddi_dma_handle_t we do 2476 * wish it to be freed up at some later time. Note also that 2477 * if we fail, we may need to cleanup the ddi_dma_handle_t. 2478 */ 2479 bind->bi_bypass = bind_type; 2480 status = tavor_mr_mem_bind(state, bind, dmahdl, sleep); 2481 if (status != DDI_SUCCESS) { 2482 if (reuse_dmahdl) { 2483 ddi_dma_free_handle(&dmahdl); 2484 } 2485 2486 /* 2487 * Deregister will be called upon returning failure 2488 * from this routine. This will ensure that all 2489 * current resources get properly freed up. 2490 * Unnecessary to attempt to regain software ownership 2491 * of the MPT entry as that has already been done 2492 * above (in tavor_mr_reregister()). Also unnecessary 2493 * to attempt to unbind the memory. 2494 */ 2495 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2496 2497 /* Set "status" and "errormsg" and goto failure */ 2498 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed mem bind"); 2499 goto mrrereghelp_fail; 2500 } 2501 if (reuse_dmahdl) { 2502 bind->bi_free_dmahdl = 1; 2503 } 2504 2505 /* 2506 * Using the new mapping, but reusing the current MTT 2507 * resources, write the updated entries to MTT 2508 */ 2509 mtt = mr->mr_mttrsrcp; 2510 status = tavor_mr_fast_mtt_write(mtt, bind, mtt_pgsize_bits); 2511 if (status != DDI_SUCCESS) { 2512 /* 2513 * Deregister will be called upon returning failure 2514 * from this routine. This will ensure that all 2515 * current resources get properly freed up. 2516 * Unnecessary to attempt to regain software ownership 2517 * of the MPT entry as that has already been done 2518 * above (in tavor_mr_reregister()). Also unnecessary 2519 * to attempt to unbind the memory. 2520 * 2521 * But we do need to unbind the newly bound memory 2522 * before returning. 2523 */ 2524 tavor_mr_mem_unbind(state, bind); 2525 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2526 2527 /* Set "status" and "errormsg" and goto failure */ 2528 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 2529 "failed write mtt"); 2530 goto mrrereghelp_fail; 2531 } 2532 2533 /* Put the updated information into the Mem Region handle */ 2534 mr->mr_bindinfo = *bind; 2535 mr->mr_logmttpgsz = mtt_pgsize_bits; 2536 2537 } else { 2538 /* 2539 * Check if the memory region MTT is shared by any other MRs. 2540 * Since the resource may be shared between multiple memory 2541 * regions (as a result of a "RegisterSharedMR()" verb) it is 2542 * important that we not unbind any resources prematurely. 2543 */ 2544 if (!TAVOR_MTT_IS_SHARED(swrc_old)) { 2545 /* 2546 * Unbind the old mapping for this memory region, but 2547 * retain the ddi_dma_handle_t for reuse in the bind 2548 * operation below. Note: This can only be done here 2549 * because the region being reregistered is not 2550 * currently shared. Also if original memory region 2551 * was bound for IOMMU bypass and the new region can 2552 * not use bypass, then a new DMA handle will be 2553 * necessary. 2554 */ 2555 if (TAVOR_MR_REUSE_DMAHDL(mr, bind->bi_flags)) { 2556 mr->mr_bindinfo.bi_free_dmahdl = 0; 2557 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 2558 dmahdl = mr->mr_bindinfo.bi_dmahdl; 2559 reuse_dmahdl = 1; 2560 } else { 2561 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 2562 dmahdl = NULL; 2563 reuse_dmahdl = 0; 2564 } 2565 } else { 2566 dmahdl = NULL; 2567 reuse_dmahdl = 0; 2568 } 2569 2570 /* 2571 * Bind the new memory and determine the mapped addresses. 2572 * As described, this routine and tavor_mr_fast_mtt_write() 2573 * do the majority of the work for the memory registration 2574 * operations. Note: When we successfully finish the binding, 2575 * we will set the "bi_free_dmahdl" flag to indicate that 2576 * even though we may have reused the ddi_dma_handle_t we do 2577 * wish it to be freed up at some later time. Note also that 2578 * if we fail, we may need to cleanup the ddi_dma_handle_t. 2579 */ 2580 bind->bi_bypass = bind_type; 2581 status = tavor_mr_mem_bind(state, bind, dmahdl, sleep); 2582 if (status != DDI_SUCCESS) { 2583 if (reuse_dmahdl) { 2584 ddi_dma_free_handle(&dmahdl); 2585 } 2586 2587 /* 2588 * Deregister will be called upon returning failure 2589 * from this routine. This will ensure that all 2590 * current resources get properly freed up. 2591 * Unnecessary to attempt to regain software ownership 2592 * of the MPT entry as that has already been done 2593 * above (in tavor_mr_reregister()). Also unnecessary 2594 * to attempt to unbind the memory. 2595 */ 2596 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2597 2598 /* Set "status" and "errormsg" and goto failure */ 2599 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed mem bind"); 2600 goto mrrereghelp_fail; 2601 } 2602 if (reuse_dmahdl) { 2603 bind->bi_free_dmahdl = 1; 2604 } 2605 2606 /* 2607 * Allocate the new MTT entries resource 2608 */ 2609 status = tavor_rsrc_alloc(state, TAVOR_MTT, 2610 TAVOR_NUMMTT_TO_MTTSEG(nummtt_needed), sleep, &mtt); 2611 if (status != DDI_SUCCESS) { 2612 /* 2613 * Deregister will be called upon returning failure 2614 * from this routine. This will ensure that all 2615 * current resources get properly freed up. 2616 * Unnecessary to attempt to regain software ownership 2617 * of the MPT entry as that has already been done 2618 * above (in tavor_mr_reregister()). Also unnecessary 2619 * to attempt to unbind the memory. 2620 * 2621 * But we do need to unbind the newly bound memory 2622 * before returning. 2623 */ 2624 tavor_mr_mem_unbind(state, bind); 2625 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2626 2627 /* Set "status" and "errormsg" and goto failure */ 2628 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MTT"); 2629 goto mrrereghelp_fail; 2630 } 2631 2632 /* 2633 * Allocate MTT reference count (to track shared memory 2634 * regions). As mentioned elsewhere above, this reference 2635 * count resource may never be used on the given memory region, 2636 * but if it is ever later registered as a "shared" memory 2637 * region then this resource will be necessary. Note: This 2638 * is only necessary here if the existing memory region is 2639 * already being shared (because otherwise we already have 2640 * a useable reference count resource). 2641 */ 2642 if (TAVOR_MTT_IS_SHARED(swrc_old)) { 2643 status = tavor_rsrc_alloc(state, TAVOR_REFCNT, 1, 2644 sleep, &mtt_refcnt); 2645 if (status != DDI_SUCCESS) { 2646 /* 2647 * Deregister will be called upon returning 2648 * failure from this routine. This will ensure 2649 * that all current resources get properly 2650 * freed up. Unnecessary to attempt to regain 2651 * software ownership of the MPT entry as that 2652 * has already been done above (in 2653 * tavor_mr_reregister()). Also unnecessary 2654 * to attempt to unbind the memory. 2655 * 2656 * But we need to unbind the newly bound 2657 * memory and free up the newly allocated MTT 2658 * entries before returning. 2659 */ 2660 tavor_mr_mem_unbind(state, bind); 2661 tavor_rsrc_free(state, &mtt); 2662 *dereg_level = 2663 TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2664 2665 /* Set "status"/"errormsg", goto failure */ 2666 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, 2667 "failed reference count"); 2668 goto mrrereghelp_fail; 2669 } 2670 swrc_new = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr; 2671 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_new)) 2672 TAVOR_MTT_REFCNT_INIT(swrc_new); 2673 } else { 2674 mtt_refcnt = mr->mr_mttrefcntp; 2675 } 2676 2677 /* 2678 * Using the new mapping and the new MTT resources, write the 2679 * updated entries to MTT 2680 */ 2681 status = tavor_mr_fast_mtt_write(mtt, bind, mtt_pgsize_bits); 2682 if (status != DDI_SUCCESS) { 2683 /* 2684 * Deregister will be called upon returning failure 2685 * from this routine. This will ensure that all 2686 * current resources get properly freed up. 2687 * Unnecessary to attempt to regain software ownership 2688 * of the MPT entry as that has already been done 2689 * above (in tavor_mr_reregister()). Also unnecessary 2690 * to attempt to unbind the memory. 2691 * 2692 * But we need to unbind the newly bound memory, 2693 * free up the newly allocated MTT entries, and 2694 * (possibly) free the new MTT reference count 2695 * resource before returning. 2696 */ 2697 if (TAVOR_MTT_IS_SHARED(swrc_old)) { 2698 tavor_rsrc_free(state, &mtt_refcnt); 2699 } 2700 tavor_mr_mem_unbind(state, bind); 2701 tavor_rsrc_free(state, &mtt); 2702 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2703 2704 /* Set "status" and "errormsg" and goto failure */ 2705 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed write mtt"); 2706 goto mrrereghelp_fail; 2707 } 2708 2709 /* 2710 * Check if the memory region MTT is shared by any other MRs. 2711 * Since the resource may be shared between multiple memory 2712 * regions (as a result of a "RegisterSharedMR()" verb) it is 2713 * important that we not free up any resources prematurely. 2714 */ 2715 if (TAVOR_MTT_IS_SHARED(swrc_old)) { 2716 /* Decrement MTT reference count for "old" region */ 2717 (void) tavor_mtt_refcnt_dec(mr->mr_mttrefcntp); 2718 } else { 2719 /* Free up the old MTT entries resource */ 2720 tavor_rsrc_free(state, &mr->mr_mttrsrcp); 2721 } 2722 2723 /* Put the updated information into the mrhdl */ 2724 mr->mr_bindinfo = *bind; 2725 mr->mr_logmttpgsz = mtt_pgsize_bits; 2726 mr->mr_mttrsrcp = mtt; 2727 mr->mr_mttrefcntp = mtt_refcnt; 2728 } 2729 2730 /* 2731 * Calculate and return the updated MTT address (in the DDR address 2732 * space). This will be used by the caller (tavor_mr_reregister) in 2733 * the updated MPT entry 2734 */ 2735 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; 2736 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; 2737 *mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << 2738 TAVOR_MTT_SIZE_SHIFT); 2739 2740 TAVOR_TNF_EXIT(tavor_mr_rereg_xlat_helper); 2741 return (DDI_SUCCESS); 2742 2743 mrrereghelp_fail: 2744 TNF_PROBE_1(tavor_mr_rereg_xlat_helper_fail, TAVOR_TNF_ERROR, "", 2745 tnf_string, msg, errormsg); 2746 TAVOR_TNF_EXIT(tavor_mr_rereg_xlat_helper); 2747 return (status); 2748 } 2749 2750 2751 /* 2752 * tavor_mr_nummtt_needed() 2753 * Context: Can be called from interrupt or base context. 2754 */ 2755 /* ARGSUSED */ 2756 static uint64_t 2757 tavor_mr_nummtt_needed(tavor_state_t *state, tavor_bind_info_t *bind, 2758 uint_t *mtt_pgsize_bits) 2759 { 2760 uint64_t pg_offset_mask; 2761 uint64_t pg_offset, tmp_length; 2762 2763 /* 2764 * For now we specify the page size as 8Kb (the default page size for 2765 * the sun4u architecture), or 4Kb for x86. Figure out optimal page 2766 * size by examining the dmacookies XXX 2767 */ 2768 *mtt_pgsize_bits = PAGESHIFT; 2769 2770 pg_offset_mask = ((uint64_t)1 << *mtt_pgsize_bits) - 1; 2771 pg_offset = bind->bi_addr & pg_offset_mask; 2772 tmp_length = pg_offset + (bind->bi_len - 1); 2773 return ((tmp_length >> *mtt_pgsize_bits) + 1); 2774 } 2775 2776 2777 /* 2778 * tavor_mr_mem_bind() 2779 * Context: Can be called from interrupt or base context. 2780 */ 2781 static int 2782 tavor_mr_mem_bind(tavor_state_t *state, tavor_bind_info_t *bind, 2783 ddi_dma_handle_t dmahdl, uint_t sleep) 2784 { 2785 ddi_dma_attr_t dma_attr; 2786 int (*callback)(caddr_t); 2787 uint_t dma_xfer_mode; 2788 int status; 2789 2790 /* bi_type must be set to a meaningful value to get a bind handle */ 2791 ASSERT(bind->bi_type == TAVOR_BINDHDL_VADDR || 2792 bind->bi_type == TAVOR_BINDHDL_BUF || 2793 bind->bi_type == TAVOR_BINDHDL_UBUF); 2794 2795 TAVOR_TNF_ENTER(tavor_mr_mem_bind); 2796 2797 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 2798 2799 /* Set the callback flag appropriately */ 2800 callback = (sleep == TAVOR_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT; 2801 2802 /* Determine whether to map STREAMING or CONSISTENT */ 2803 dma_xfer_mode = (bind->bi_flags & IBT_MR_NONCOHERENT) ? 2804 DDI_DMA_STREAMING : DDI_DMA_CONSISTENT; 2805 2806 /* 2807 * Initialize many of the default DMA attributes. Then, if we're 2808 * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag. 2809 */ 2810 if (dmahdl == NULL) { 2811 tavor_dma_attr_init(&dma_attr); 2812 #ifdef __sparc 2813 /* 2814 * First, disable streaming and switch to consistent if 2815 * configured to do so and IOMMU BYPASS is enabled. 2816 */ 2817 if (state->ts_cfg_profile->cp_disable_streaming_on_bypass && 2818 dma_xfer_mode == DDI_DMA_STREAMING && 2819 bind->bi_bypass == TAVOR_BINDMEM_BYPASS) { 2820 dma_xfer_mode = DDI_DMA_CONSISTENT; 2821 } 2822 2823 /* 2824 * Then, if streaming is still specified, then "bypass" is not 2825 * allowed. 2826 */ 2827 if ((dma_xfer_mode == DDI_DMA_CONSISTENT) && 2828 (bind->bi_bypass == TAVOR_BINDMEM_BYPASS)) { 2829 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 2830 } 2831 #endif 2832 /* Allocate a DMA handle for the binding */ 2833 status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr, 2834 callback, NULL, &bind->bi_dmahdl); 2835 if (status != DDI_SUCCESS) { 2836 TNF_PROBE_0(tavor_mr_mem_bind_dmahdl_fail, 2837 TAVOR_TNF_ERROR, ""); 2838 TAVOR_TNF_EXIT(tavor_mr_mem_bind); 2839 return (status); 2840 } 2841 bind->bi_free_dmahdl = 1; 2842 2843 } else { 2844 bind->bi_dmahdl = dmahdl; 2845 bind->bi_free_dmahdl = 0; 2846 } 2847 2848 /* 2849 * Bind the memory to get the PCI mapped addresses. The decision 2850 * to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle() 2851 * is determined by the "bi_type" flag. Note: if the bind operation 2852 * fails then we have to free up the DMA handle and return error. 2853 */ 2854 if (bind->bi_type == TAVOR_BINDHDL_VADDR) { 2855 status = ddi_dma_addr_bind_handle(bind->bi_dmahdl, NULL, 2856 (caddr_t)(uintptr_t)bind->bi_addr, bind->bi_len, 2857 (DDI_DMA_RDWR | dma_xfer_mode), callback, NULL, 2858 &bind->bi_dmacookie, &bind->bi_cookiecnt); 2859 } else { /* TAVOR_BINDHDL_BUF || TAVOR_BINDHDL_UBUF */ 2860 status = ddi_dma_buf_bind_handle(bind->bi_dmahdl, 2861 bind->bi_buf, (DDI_DMA_RDWR | dma_xfer_mode), callback, 2862 NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt); 2863 } 2864 2865 if (status != DDI_DMA_MAPPED) { 2866 if (bind->bi_free_dmahdl != 0) { 2867 ddi_dma_free_handle(&bind->bi_dmahdl); 2868 } 2869 TNF_PROBE_0(tavor_mr_mem_bind_dmabind_fail, TAVOR_TNF_ERROR, 2870 ""); 2871 TAVOR_TNF_EXIT(tavor_mr_mem_bind); 2872 return (status); 2873 } 2874 2875 TAVOR_TNF_EXIT(tavor_mr_mem_bind); 2876 return (DDI_SUCCESS); 2877 } 2878 2879 2880 /* 2881 * tavor_mr_mem_unbind() 2882 * Context: Can be called from interrupt or base context. 2883 */ 2884 static void 2885 tavor_mr_mem_unbind(tavor_state_t *state, tavor_bind_info_t *bind) 2886 { 2887 int status; 2888 2889 TAVOR_TNF_ENTER(tavor_mr_mem_unbind); 2890 2891 /* 2892 * In case of TAVOR_BINDHDL_UBUF, the memory bi_buf points to 2893 * is actually allocated by ddi_umem_iosetup() internally, then 2894 * it's required to free it here. Reset bi_type to TAVOR_BINDHDL_NONE 2895 * not to free it again later. 2896 */ 2897 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 2898 if (bind->bi_type == TAVOR_BINDHDL_UBUF) { 2899 freerbuf(bind->bi_buf); 2900 bind->bi_type = TAVOR_BINDHDL_NONE; 2901 } 2902 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 2903 2904 /* 2905 * Unbind the DMA memory for the region 2906 * 2907 * Note: The only way ddi_dma_unbind_handle() currently 2908 * can return an error is if the handle passed in is invalid. 2909 * Since this should never happen, we choose to return void 2910 * from this function! If this does return an error, however, 2911 * then we print a warning message to the console. 2912 */ 2913 status = ddi_dma_unbind_handle(bind->bi_dmahdl); 2914 if (status != DDI_SUCCESS) { 2915 TAVOR_WARNING(state, "failed to unbind DMA mapping"); 2916 TNF_PROBE_0(tavor_mr_mem_unbind_dmaunbind_fail, 2917 TAVOR_TNF_ERROR, ""); 2918 TAVOR_TNF_EXIT(tavor_mr_mem_unbind); 2919 return; 2920 } 2921 2922 /* Free up the DMA handle */ 2923 if (bind->bi_free_dmahdl != 0) { 2924 ddi_dma_free_handle(&bind->bi_dmahdl); 2925 } 2926 2927 TAVOR_TNF_EXIT(tavor_mr_mem_unbind); 2928 } 2929 2930 2931 /* 2932 * tavor_mr_fast_mtt_write() 2933 * Context: Can be called from interrupt or base context. 2934 */ 2935 static int 2936 tavor_mr_fast_mtt_write(tavor_rsrc_t *mtt, tavor_bind_info_t *bind, 2937 uint32_t mtt_pgsize_bits) 2938 { 2939 ddi_dma_cookie_t dmacookie; 2940 uint_t cookie_cnt; 2941 uint64_t *mtt_table; 2942 uint64_t mtt_entry; 2943 uint64_t addr, endaddr; 2944 uint64_t pagesize; 2945 int i; 2946 2947 TAVOR_TNF_ENTER(tavor_mr_fast_mtt_write); 2948 2949 /* Calculate page size from the suggested value passed in */ 2950 pagesize = ((uint64_t)1 << mtt_pgsize_bits); 2951 2952 /* 2953 * Walk the "cookie list" and fill in the MTT table entries 2954 */ 2955 i = 0; 2956 mtt_table = (uint64_t *)mtt->tr_addr; 2957 dmacookie = bind->bi_dmacookie; 2958 cookie_cnt = bind->bi_cookiecnt; 2959 while (cookie_cnt-- > 0) { 2960 addr = dmacookie.dmac_laddress; 2961 endaddr = addr + (dmacookie.dmac_size - 1); 2962 addr = addr & ~((uint64_t)pagesize - 1); 2963 while (addr <= endaddr) { 2964 /* 2965 * Fill in the mapped addresses (calculated above) and 2966 * set TAVOR_MTT_ENTRY_PRESET flag for each MTT entry. 2967 */ 2968 mtt_entry = addr | TAVOR_MTT_ENTRY_PRESET; 2969 ddi_put64(mtt->tr_acchdl, &mtt_table[i], mtt_entry); 2970 addr += pagesize; 2971 i++; 2972 2973 if (addr == 0) { 2974 static int do_once = 1; 2975 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", 2976 do_once)) 2977 if (do_once) { 2978 do_once = 0; 2979 cmn_err(CE_NOTE, "probable error in " 2980 "dma_cookie address from caller\n"); 2981 } 2982 break; 2983 } 2984 } 2985 2986 /* 2987 * When we've reached the end of the current DMA cookie, 2988 * jump to the next cookie (if there are more) 2989 */ 2990 if (cookie_cnt != 0) { 2991 ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie); 2992 } 2993 } 2994 2995 TAVOR_TNF_EXIT(tavor_mr_fast_mtt_write); 2996 return (DDI_SUCCESS); 2997 } 2998 2999 /* 3000 * tavor_mr_fast_mtt_write_fmr() 3001 * Context: Can be called from interrupt or base context. 3002 */ 3003 static int 3004 tavor_mr_fast_mtt_write_fmr(tavor_rsrc_t *mtt, ibt_pmr_attr_t *mem_pattr, 3005 uint32_t mtt_pgsize_bits) 3006 { 3007 uint64_t *mtt_table; 3008 ibt_phys_addr_t *buf; 3009 uint64_t mtt_entry; 3010 uint64_t addr, first_addr, endaddr; 3011 uint64_t pagesize; 3012 int i; 3013 3014 TAVOR_TNF_ENTER(tavor_mr_fast_mtt_write_fmr); 3015 3016 /* Calculate page size from the suggested value passed in */ 3017 pagesize = ((uint64_t)1 << mtt_pgsize_bits); 3018 3019 /* 3020 * Walk the "buf list" and fill in the MTT table entries 3021 */ 3022 mtt_table = (uint64_t *)mtt->tr_addr; 3023 for (i = 0; i < mem_pattr->pmr_num_buf; i++) { 3024 buf = &mem_pattr->pmr_addr_list[i]; 3025 3026 /* 3027 * For first cookie, use the offset field to determine where 3028 * the buffer starts. The end addr is then calculated with the 3029 * offset in mind. 3030 */ 3031 if (i == 0) { 3032 first_addr = addr = buf->p_laddr + 3033 mem_pattr->pmr_offset; 3034 endaddr = addr + (mem_pattr->pmr_buf_sz - 1) - 3035 mem_pattr->pmr_offset; 3036 /* 3037 * For last cookie, determine end addr based on starting 3038 * address and size of the total buffer 3039 */ 3040 } else if (i == mem_pattr->pmr_num_buf - 1) { 3041 addr = buf->p_laddr; 3042 endaddr = addr + (first_addr + mem_pattr->pmr_len & 3043 (mem_pattr->pmr_buf_sz - 1)); 3044 /* 3045 * For the middle cookies case, start and end addr are 3046 * straightforward. Just use the laddr, and the size, as all 3047 * middle cookies are a set size. 3048 */ 3049 } else { 3050 addr = buf->p_laddr; 3051 endaddr = addr + (mem_pattr->pmr_buf_sz - 1); 3052 } 3053 3054 addr = addr & ~((uint64_t)pagesize - 1); 3055 while (addr <= endaddr) { 3056 /* 3057 * Fill in the mapped addresses (calculated above) and 3058 * set TAVOR_MTT_ENTRY_PRESET flag for each MTT entry. 3059 */ 3060 mtt_entry = addr | TAVOR_MTT_ENTRY_PRESET; 3061 ddi_put64(mtt->tr_acchdl, &mtt_table[i], mtt_entry); 3062 addr += pagesize; 3063 } 3064 } 3065 3066 TAVOR_TNF_EXIT(tavor_mr_fast_mtt_write_fmr); 3067 return (DDI_SUCCESS); 3068 } 3069 3070 3071 /* 3072 * tavor_mtt_refcnt_inc() 3073 * Context: Can be called from interrupt or base context. 3074 */ 3075 static int 3076 tavor_mtt_refcnt_inc(tavor_rsrc_t *rsrc) 3077 { 3078 tavor_sw_refcnt_t *rc; 3079 uint32_t cnt; 3080 3081 rc = (tavor_sw_refcnt_t *)rsrc->tr_addr; 3082 3083 /* Increment the MTT's reference count */ 3084 mutex_enter(&rc->swrc_lock); 3085 TNF_PROBE_1_DEBUG(tavor_mtt_refcnt_inc, TAVOR_TNF_TRACE, "", 3086 tnf_uint, refcnt, rc->swrc_refcnt); 3087 cnt = rc->swrc_refcnt++; 3088 mutex_exit(&rc->swrc_lock); 3089 3090 return (cnt); 3091 } 3092 3093 3094 /* 3095 * tavor_mtt_refcnt_dec() 3096 * Context: Can be called from interrupt or base context. 3097 */ 3098 static int 3099 tavor_mtt_refcnt_dec(tavor_rsrc_t *rsrc) 3100 { 3101 tavor_sw_refcnt_t *rc; 3102 uint32_t cnt; 3103 3104 rc = (tavor_sw_refcnt_t *)rsrc->tr_addr; 3105 3106 /* Decrement the MTT's reference count */ 3107 mutex_enter(&rc->swrc_lock); 3108 cnt = --rc->swrc_refcnt; 3109 TNF_PROBE_1_DEBUG(tavor_mtt_refcnt_dec, TAVOR_TNF_TRACE, "", 3110 tnf_uint, refcnt, rc->swrc_refcnt); 3111 mutex_exit(&rc->swrc_lock); 3112 3113 return (cnt); 3114 } 3115