1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * tavor_mr.c
28 * Tavor Memory Region/Window Routines
29 *
30 * Implements all the routines necessary to provide the requisite memory
31 * registration verbs. These include operations like RegisterMemRegion(),
32 * DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion,
33 * etc., that affect Memory Regions. It also includes the verbs that
34 * affect Memory Windows, including AllocMemWindow(), FreeMemWindow(),
35 * and QueryMemWindow().
36 */
37
38 #include <sys/types.h>
39 #include <sys/conf.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/modctl.h>
43 #include <sys/esunddi.h>
44
45 #include <sys/ib/adapters/tavor/tavor.h>
46
47
48 /*
49 * Used by tavor_mr_keycalc() below to fill in the "unconstrained" portion
50 * of Tavor memory keys (LKeys and RKeys)
51 */
52 static uint_t tavor_debug_memkey_cnt = 0x00000000;
53
54 static int tavor_mr_common_reg(tavor_state_t *state, tavor_pdhdl_t pd,
55 tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op);
56 static int tavor_mr_common_rereg(tavor_state_t *state, tavor_mrhdl_t mr,
57 tavor_pdhdl_t pd, tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl_new,
58 tavor_mr_options_t *op);
59 static int tavor_mr_rereg_xlat_helper(tavor_state_t *state, tavor_mrhdl_t mr,
60 tavor_bind_info_t *bind, tavor_mr_options_t *op, uint64_t *mtt_addr,
61 uint_t sleep, uint_t *dereg_level);
62 static uint64_t tavor_mr_nummtt_needed(tavor_state_t *state,
63 tavor_bind_info_t *bind, uint_t *mtt_pgsize);
64 static int tavor_mr_mem_bind(tavor_state_t *state, tavor_bind_info_t *bind,
65 ddi_dma_handle_t dmahdl, uint_t sleep);
66 static void tavor_mr_mem_unbind(tavor_state_t *state,
67 tavor_bind_info_t *bind);
68 static int tavor_mr_fast_mtt_write(tavor_rsrc_t *mtt, tavor_bind_info_t *bind,
69 uint32_t mtt_pgsize_bits);
70 static int tavor_mtt_refcnt_inc(tavor_rsrc_t *rsrc);
71 static int tavor_mtt_refcnt_dec(tavor_rsrc_t *rsrc);
72
73 /*
74 * The Tavor umem_lockmemory() callback ops. When userland memory is
75 * registered, these callback ops are specified. The tavor_umap_umemlock_cb()
76 * callback will be called whenever the memory for the corresponding
77 * ddi_umem_cookie_t is being freed.
78 */
79 static struct umem_callback_ops tavor_umem_cbops = {
80 UMEM_CALLBACK_VERSION,
81 tavor_umap_umemlock_cb,
82 };
83
84
85 /*
86 * tavor_mr_register()
87 * Context: Can be called from interrupt or base context.
88 */
89 int
tavor_mr_register(tavor_state_t * state,tavor_pdhdl_t pd,ibt_mr_attr_t * mr_attr,tavor_mrhdl_t * mrhdl,tavor_mr_options_t * op)90 tavor_mr_register(tavor_state_t *state, tavor_pdhdl_t pd,
91 ibt_mr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op)
92 {
93 tavor_bind_info_t bind;
94 int status;
95
96 TAVOR_TNF_ENTER(tavor_mr_register);
97
98 /*
99 * Fill in the "bind" struct. This struct provides the majority
100 * of the information that will be used to distinguish between an
101 * "addr" binding (as is the case here) and a "buf" binding (see
102 * below). The "bind" struct is later passed to tavor_mr_mem_bind()
103 * which does most of the "heavy lifting" for the Tavor memory
104 * registration routines.
105 */
106 bind.bi_type = TAVOR_BINDHDL_VADDR;
107 bind.bi_addr = mr_attr->mr_vaddr;
108 bind.bi_len = mr_attr->mr_len;
109 bind.bi_as = mr_attr->mr_as;
110 bind.bi_flags = mr_attr->mr_flags;
111 status = tavor_mr_common_reg(state, pd, &bind, mrhdl, op);
112 if (status != DDI_SUCCESS) {
113 TNF_PROBE_0(tavor_mr_register_cmnreg_fail,
114 TAVOR_TNF_ERROR, "");
115 TAVOR_TNF_EXIT(tavor_mr_register);
116 return (status);
117 }
118
119 TAVOR_TNF_EXIT(tavor_mr_register);
120 return (DDI_SUCCESS);
121 }
122
123
124 /*
125 * tavor_mr_register_buf()
126 * Context: Can be called from interrupt or base context.
127 */
128 int
tavor_mr_register_buf(tavor_state_t * state,tavor_pdhdl_t pd,ibt_smr_attr_t * mr_attr,struct buf * buf,tavor_mrhdl_t * mrhdl,tavor_mr_options_t * op)129 tavor_mr_register_buf(tavor_state_t *state, tavor_pdhdl_t pd,
130 ibt_smr_attr_t *mr_attr, struct buf *buf, tavor_mrhdl_t *mrhdl,
131 tavor_mr_options_t *op)
132 {
133 tavor_bind_info_t bind;
134 int status;
135
136 TAVOR_TNF_ENTER(tavor_mr_register_buf);
137
138 /*
139 * Fill in the "bind" struct. This struct provides the majority
140 * of the information that will be used to distinguish between an
141 * "addr" binding (see above) and a "buf" binding (as is the case
142 * here). The "bind" struct is later passed to tavor_mr_mem_bind()
143 * which does most of the "heavy lifting" for the Tavor memory
144 * registration routines. Note: We have chosen to provide
145 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
146 * not set). It is not critical what value we choose here as it need
147 * only be unique for the given RKey (which will happen by default),
148 * so the choice here is somewhat arbitrary.
149 */
150 bind.bi_type = TAVOR_BINDHDL_BUF;
151 bind.bi_buf = buf;
152 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
153 bind.bi_addr = mr_attr->mr_vaddr;
154 } else {
155 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr;
156 }
157 bind.bi_as = NULL;
158 bind.bi_len = (uint64_t)buf->b_bcount;
159 bind.bi_flags = mr_attr->mr_flags;
160 status = tavor_mr_common_reg(state, pd, &bind, mrhdl, op);
161 if (status != DDI_SUCCESS) {
162 TNF_PROBE_0(tavor_mr_register_buf_cmnreg_fail,
163 TAVOR_TNF_ERROR, "");
164 TAVOR_TNF_EXIT(tavor_mr_register_buf);
165 return (status);
166 }
167
168 TAVOR_TNF_EXIT(tavor_mr_register_buf);
169 return (DDI_SUCCESS);
170 }
171
172
173 /*
174 * tavor_mr_register_shared()
175 * Context: Can be called from interrupt or base context.
176 */
177 int
tavor_mr_register_shared(tavor_state_t * state,tavor_mrhdl_t mrhdl,tavor_pdhdl_t pd,ibt_smr_attr_t * mr_attr,tavor_mrhdl_t * mrhdl_new)178 tavor_mr_register_shared(tavor_state_t *state, tavor_mrhdl_t mrhdl,
179 tavor_pdhdl_t pd, ibt_smr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl_new)
180 {
181 tavor_rsrc_pool_info_t *rsrc_pool;
182 tavor_rsrc_t *mpt, *mtt, *rsrc;
183 tavor_umap_db_entry_t *umapdb;
184 tavor_hw_mpt_t mpt_entry;
185 tavor_mrhdl_t mr;
186 tavor_bind_info_t *bind;
187 ddi_umem_cookie_t umem_cookie;
188 size_t umem_len;
189 caddr_t umem_addr;
190 uint64_t mtt_addr, mtt_ddrbaseaddr, pgsize_msk;
191 uint_t sleep, mr_is_umem;
192 int status, umem_flags;
193 char *errormsg;
194
195 TAVOR_TNF_ENTER(tavor_mr_register_shared);
196
197 /*
198 * Check the sleep flag. Ensure that it is consistent with the
199 * current thread context (i.e. if we are currently in the interrupt
200 * context, then we shouldn't be attempting to sleep).
201 */
202 sleep = (mr_attr->mr_flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP :
203 TAVOR_SLEEP;
204 if ((sleep == TAVOR_SLEEP) &&
205 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
206 /* Set "status" and "errormsg" and goto failure */
207 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags");
208 goto mrshared_fail;
209 }
210
211 /* Increment the reference count on the protection domain (PD) */
212 tavor_pd_refcnt_inc(pd);
213
214 /*
215 * Allocate an MPT entry. This will be filled in with all the
216 * necessary parameters to define the shared memory region.
217 * Specifically, it will be made to reference the currently existing
218 * MTT entries and ownership of the MPT will be passed to the hardware
219 * in the last step below. If we fail here, we must undo the
220 * protection domain reference count.
221 */
222 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
223 if (status != DDI_SUCCESS) {
224 /* Set "status" and "errormsg" and goto failure */
225 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT");
226 goto mrshared_fail1;
227 }
228
229 /*
230 * Allocate the software structure for tracking the shared memory
231 * region (i.e. the Tavor Memory Region handle). If we fail here, we
232 * must undo the protection domain reference count and the previous
233 * resource allocation.
234 */
235 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
236 if (status != DDI_SUCCESS) {
237 /* Set "status" and "errormsg" and goto failure */
238 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle");
239 goto mrshared_fail2;
240 }
241 mr = (tavor_mrhdl_t)rsrc->tr_addr;
242 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
243
244 /*
245 * Setup and validate the memory region access flags. This means
246 * translating the IBTF's enable flags into the access flags that
247 * will be used in later operations.
248 */
249 mr->mr_accflag = 0;
250 if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND)
251 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
252 if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
253 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
254 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ)
255 mr->mr_accflag |= IBT_MR_REMOTE_READ;
256 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
257 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
258 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
259 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
260
261 /*
262 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed
263 * from a certain number of "constrained" bits (the least significant
264 * bits) and some number of "unconstrained" bits. The constrained
265 * bits must be set to the index of the entry in the MPT table, but
266 * the unconstrained bits can be set to any value we wish. Note:
267 * if no remote access is required, then the RKey value is not filled
268 * in. Otherwise both Rkey and LKey are given the same value.
269 */
270 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey);
271 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
272 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
273 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
274 mr->mr_rkey = mr->mr_lkey;
275 }
276
277 /* Grab the MR lock for the current memory region */
278 mutex_enter(&mrhdl->mr_lock);
279
280 /*
281 * Check here to see if the memory region has already been partially
282 * deregistered as a result of a tavor_umap_umemlock_cb() callback.
283 * If so, this is an error, return failure.
284 */
285 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
286 mutex_exit(&mrhdl->mr_lock);
287 /* Set "status" and "errormsg" and goto failure */
288 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl");
289 goto mrshared_fail3;
290 }
291
292 /*
293 * Determine if the original memory was from userland and, if so, pin
294 * the pages (again) with umem_lockmemory(). This will guarantee a
295 * separate callback for each of this shared region's MR handles.
296 * If this is userland memory, then allocate an entry in the
297 * "userland resources database". This will later be added to
298 * the database (after all further memory registration operations are
299 * successful). If we fail here, we must undo all the above setup.
300 */
301 mr_is_umem = mrhdl->mr_is_umem;
302 if (mr_is_umem) {
303 umem_len = ptob(btopr(mrhdl->mr_bindinfo.bi_len +
304 ((uintptr_t)mrhdl->mr_bindinfo.bi_addr & PAGEOFFSET)));
305 umem_addr = (caddr_t)((uintptr_t)mrhdl->mr_bindinfo.bi_addr &
306 ~PAGEOFFSET);
307 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
308 DDI_UMEMLOCK_LONGTERM);
309 status = umem_lockmemory(umem_addr, umem_len, umem_flags,
310 &umem_cookie, &tavor_umem_cbops, NULL);
311 if (status != 0) {
312 mutex_exit(&mrhdl->mr_lock);
313 /* Set "status" and "errormsg" and goto failure */
314 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umem pin");
315 goto mrshared_fail3;
316 }
317
318 umapdb = tavor_umap_db_alloc(state->ts_instance,
319 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
320 (uint64_t)(uintptr_t)rsrc);
321 if (umapdb == NULL) {
322 mutex_exit(&mrhdl->mr_lock);
323 /* Set "status" and "errormsg" and goto failure */
324 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add");
325 goto mrshared_fail4;
326 }
327 }
328
329 /*
330 * Copy the MTT resource pointer (and additional parameters) from
331 * the original Tavor Memory Region handle. Note: this is normally
332 * where the tavor_mr_mem_bind() routine would be called, but because
333 * we already have bound and filled-in MTT entries it is simply a
334 * matter here of managing the MTT reference count and grabbing the
335 * address of the MTT table entries (for filling in the shared region's
336 * MPT entry).
337 */
338 mr->mr_mttrsrcp = mrhdl->mr_mttrsrcp;
339 mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz;
340 mr->mr_bindinfo = mrhdl->mr_bindinfo;
341 mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp;
342 mutex_exit(&mrhdl->mr_lock);
343 bind = &mr->mr_bindinfo;
344 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
345 mtt = mr->mr_mttrsrcp;
346
347 /*
348 * Increment the MTT reference count (to reflect the fact that
349 * the MTT is now shared)
350 */
351 (void) tavor_mtt_refcnt_inc(mr->mr_mttrefcntp);
352
353 /*
354 * Update the new "bind" virtual address. Do some extra work here
355 * to ensure proper alignment. That is, make sure that the page
356 * offset for the beginning of the old range is the same as the
357 * offset for this new mapping
358 */
359 pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1);
360 bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) |
361 (mr->mr_bindinfo.bi_addr & pgsize_msk));
362
363 /*
364 * Get the base address for the MTT table. This will be necessary
365 * in the next step when we are setting up the MPT entry.
366 */
367 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT];
368 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset;
369
370 /*
371 * Fill in the MPT entry. This is the final step before passing
372 * ownership of the MPT entry to the Tavor hardware. We use all of
373 * the information collected/calculated above to fill in the
374 * requisite portions of the MPT.
375 */
376 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
377 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE;
378 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0;
379 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
380 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0;
381 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0;
382 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0;
383 mpt_entry.lr = 1;
384 mpt_entry.reg_win = TAVOR_MPT_IS_REGION;
385 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC;
386 mpt_entry.mem_key = mr->mr_lkey;
387 mpt_entry.pd = pd->pd_pdnum;
388 mpt_entry.start_addr = bind->bi_addr;
389 mpt_entry.reg_win_len = bind->bi_len;
390 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND;
391 mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT);
392 mpt_entry.mttseg_addr_h = mtt_addr >> 32;
393 mpt_entry.mttseg_addr_l = mtt_addr >> 6;
394
395 /*
396 * Write the MPT entry to hardware. Lastly, we pass ownership of
397 * the entry to the hardware. Note: in general, this operation
398 * shouldn't fail. But if it does, we have to undo everything we've
399 * done above before returning error.
400 */
401 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
402 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep);
403 if (status != TAVOR_CMD_SUCCESS) {
404 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n",
405 status);
406 TNF_PROBE_1(tavor_mr_register_shared_sw2hw_mpt_cmd_fail,
407 TAVOR_TNF_ERROR, "", tnf_uint, status, status);
408 /* Set "status" and "errormsg" and goto failure */
409 TAVOR_TNF_FAIL(ibc_get_ci_failure(0),
410 "tavor SW2HW_MPT command");
411 goto mrshared_fail5;
412 }
413
414 /*
415 * Fill in the rest of the Tavor Memory Region handle. Having
416 * successfully transferred ownership of the MPT, we can update the
417 * following fields for use in further operations on the MR.
418 */
419 mr->mr_mptrsrcp = mpt;
420 mr->mr_mttrsrcp = mtt;
421 mr->mr_pdhdl = pd;
422 mr->mr_rsrcp = rsrc;
423 mr->mr_is_umem = mr_is_umem;
424 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL;
425 mr->mr_umem_cbfunc = NULL;
426 mr->mr_umem_cbarg1 = NULL;
427 mr->mr_umem_cbarg2 = NULL;
428
429 /*
430 * If this is userland memory, then we need to insert the previously
431 * allocated entry into the "userland resources database". This will
432 * allow for later coordination between the tavor_umap_umemlock_cb()
433 * callback and tavor_mr_deregister().
434 */
435 if (mr_is_umem) {
436 tavor_umap_db_add(umapdb);
437 }
438
439 *mrhdl_new = mr;
440
441 TAVOR_TNF_EXIT(tavor_mr_register_shared);
442 return (DDI_SUCCESS);
443
444 /*
445 * The following is cleanup for all possible failure cases in this routine
446 */
447 mrshared_fail5:
448 (void) tavor_mtt_refcnt_dec(mr->mr_mttrefcntp);
449 if (mr_is_umem) {
450 tavor_umap_db_free(umapdb);
451 }
452 mrshared_fail4:
453 if (mr_is_umem) {
454 ddi_umem_unlock(umem_cookie);
455 }
456 mrshared_fail3:
457 tavor_rsrc_free(state, &rsrc);
458 mrshared_fail2:
459 tavor_rsrc_free(state, &mpt);
460 mrshared_fail1:
461 tavor_pd_refcnt_dec(pd);
462 mrshared_fail:
463 TNF_PROBE_1(tavor_mr_register_shared_fail, TAVOR_TNF_ERROR, "",
464 tnf_string, msg, errormsg);
465 TAVOR_TNF_EXIT(tavor_mr_register_shared);
466 return (status);
467 }
468
469
470 /*
471 * tavor_mr_deregister()
472 * Context: Can be called from interrupt or base context.
473 */
474 /* ARGSUSED */
475 int
tavor_mr_deregister(tavor_state_t * state,tavor_mrhdl_t * mrhdl,uint_t level,uint_t sleep)476 tavor_mr_deregister(tavor_state_t *state, tavor_mrhdl_t *mrhdl, uint_t level,
477 uint_t sleep)
478 {
479 tavor_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt;
480 tavor_umap_db_entry_t *umapdb;
481 tavor_pdhdl_t pd;
482 tavor_mrhdl_t mr;
483 tavor_bind_info_t *bind;
484 uint64_t value;
485 int status, shared_mtt;
486 char *errormsg;
487
488 TAVOR_TNF_ENTER(tavor_mr_deregister);
489
490 /*
491 * Check the sleep flag. Ensure that it is consistent with the
492 * current thread context (i.e. if we are currently in the interrupt
493 * context, then we shouldn't be attempting to sleep).
494 */
495 if ((sleep == TAVOR_SLEEP) &&
496 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
497 /* Set "status" and "errormsg" and goto failure */
498 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sleep flags");
499 TNF_PROBE_1(tavor_mr_deregister_fail, TAVOR_TNF_ERROR, "",
500 tnf_string, msg, errormsg);
501 TAVOR_TNF_EXIT(tavor_mr_deregister);
502 return (status);
503 }
504
505 /*
506 * Pull all the necessary information from the Tavor Memory Region
507 * handle. This is necessary here because the resource for the
508 * MR handle is going to be freed up as part of the this
509 * deregistration
510 */
511 mr = *mrhdl;
512 mutex_enter(&mr->mr_lock);
513 mpt = mr->mr_mptrsrcp;
514 mtt = mr->mr_mttrsrcp;
515 mtt_refcnt = mr->mr_mttrefcntp;
516 rsrc = mr->mr_rsrcp;
517 pd = mr->mr_pdhdl;
518 bind = &mr->mr_bindinfo;
519
520 /*
521 * Check here to see if the memory region has already been partially
522 * deregistered as a result of the tavor_umap_umemlock_cb() callback.
523 * If so, then jump to the end and free the remaining resources.
524 */
525 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
526 goto mrdereg_finish_cleanup;
527 }
528
529 /*
530 * We must drop the "mr_lock" here to ensure that both SLEEP and
531 * NOSLEEP calls into the firmware work as expected. Also, if two
532 * threads are attemping to access this MR (via de-register,
533 * re-register, or otherwise), then we allow the firmware to enforce
534 * the checking, that only one deregister is valid.
535 */
536 mutex_exit(&mr->mr_lock);
537
538 /*
539 * Reclaim MPT entry from hardware (if necessary). Since the
540 * tavor_mr_deregister() routine is used in the memory region
541 * reregistration process as well, it is possible that we will
542 * not always wish to reclaim ownership of the MPT. Check the
543 * "level" arg and, if necessary, attempt to reclaim it. If
544 * the ownership transfer fails for any reason, we check to see
545 * what command status was returned from the hardware. The only
546 * "expected" error status is the one that indicates an attempt to
547 * deregister a memory region that has memory windows bound to it
548 */
549 if (level >= TAVOR_MR_DEREG_ALL) {
550 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT,
551 NULL, 0, mpt->tr_indx, sleep);
552 if (status != TAVOR_CMD_SUCCESS) {
553 if (status == TAVOR_CMD_REG_BOUND) {
554 TAVOR_TNF_EXIT(tavor_mr_deregister);
555 return (IBT_MR_IN_USE);
556 } else {
557 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command "
558 "failed: %08x\n", status);
559 TNF_PROBE_1(tavor_hw2sw_mpt_cmd_fail,
560 TAVOR_TNF_ERROR, "", tnf_uint, status,
561 status);
562 TAVOR_TNF_EXIT(tavor_mr_deregister);
563 return (IBT_INVALID_PARAM);
564 }
565 }
566 }
567
568 /*
569 * Re-grab the mr_lock here. Since further access to the protected
570 * 'mr' structure is needed, and we would have returned previously for
571 * the multiple deregistration case, we can safely grab the lock here.
572 */
573 mutex_enter(&mr->mr_lock);
574
575 /*
576 * If the memory had come from userland, then we do a lookup in the
577 * "userland resources database". On success, we free the entry, call
578 * ddi_umem_unlock(), and continue the cleanup. On failure (which is
579 * an indication that the umem_lockmemory() callback has called
580 * tavor_mr_deregister()), we call ddi_umem_unlock() and invalidate
581 * the "mr_umemcookie" field in the MR handle (this will be used
582 * later to detect that only partial cleaup still remains to be done
583 * on the MR handle).
584 */
585 if (mr->mr_is_umem) {
586 status = tavor_umap_db_find(state->ts_instance,
587 (uint64_t)(uintptr_t)mr->mr_umemcookie,
588 MLNX_UMAP_MRMEM_RSRC, &value, TAVOR_UMAP_DB_REMOVE,
589 &umapdb);
590 if (status == DDI_SUCCESS) {
591 tavor_umap_db_free(umapdb);
592 ddi_umem_unlock(mr->mr_umemcookie);
593 } else {
594 ddi_umem_unlock(mr->mr_umemcookie);
595 mr->mr_umemcookie = NULL;
596 }
597 }
598
599 /* mtt_refcnt is NULL in the case of tavor_dma_mr_register() */
600 if (mtt_refcnt != NULL) {
601 /*
602 * Decrement the MTT reference count. Since the MTT resource
603 * may be shared between multiple memory regions (as a result
604 * of a "RegisterSharedMR" verb) it is important that we not
605 * free up or unbind resources prematurely. If it's not shared
606 * (as indicated by the return status), then free the resource.
607 */
608 shared_mtt = tavor_mtt_refcnt_dec(mtt_refcnt);
609 if (!shared_mtt) {
610 tavor_rsrc_free(state, &mtt_refcnt);
611 }
612
613 /*
614 * Free up the MTT entries and unbind the memory. Here,
615 * as above, we attempt to free these resources only if
616 * it is appropriate to do so.
617 */
618 if (!shared_mtt) {
619 if (level >= TAVOR_MR_DEREG_NO_HW2SW_MPT) {
620 tavor_mr_mem_unbind(state, bind);
621 }
622 tavor_rsrc_free(state, &mtt);
623 }
624 }
625
626 /*
627 * If the MR handle has been invalidated, then drop the
628 * lock and return success. Note: This only happens because
629 * the umem_lockmemory() callback has been triggered. The
630 * cleanup here is partial, and further cleanup (in a
631 * subsequent tavor_mr_deregister() call) will be necessary.
632 */
633 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
634 mutex_exit(&mr->mr_lock);
635 TAVOR_TNF_EXIT(tavor_mr_deregister);
636 return (DDI_SUCCESS);
637 }
638
639 mrdereg_finish_cleanup:
640 mutex_exit(&mr->mr_lock);
641
642 /* Free the Tavor Memory Region handle */
643 tavor_rsrc_free(state, &rsrc);
644
645 /* Free up the MPT entry resource */
646 tavor_rsrc_free(state, &mpt);
647
648 /* Decrement the reference count on the protection domain (PD) */
649 tavor_pd_refcnt_dec(pd);
650
651 /* Set the mrhdl pointer to NULL and return success */
652 *mrhdl = NULL;
653
654 TAVOR_TNF_EXIT(tavor_mr_deregister);
655 return (DDI_SUCCESS);
656 }
657
658
659 /*
660 * tavor_mr_query()
661 * Context: Can be called from interrupt or base context.
662 */
663 /* ARGSUSED */
664 int
tavor_mr_query(tavor_state_t * state,tavor_mrhdl_t mr,ibt_mr_query_attr_t * attr)665 tavor_mr_query(tavor_state_t *state, tavor_mrhdl_t mr,
666 ibt_mr_query_attr_t *attr)
667 {
668 TAVOR_TNF_ENTER(tavor_mr_query);
669
670 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr))
671
672 mutex_enter(&mr->mr_lock);
673
674 /*
675 * Check here to see if the memory region has already been partially
676 * deregistered as a result of a tavor_umap_umemlock_cb() callback.
677 * If so, this is an error, return failure.
678 */
679 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
680 mutex_exit(&mr->mr_lock);
681 TNF_PROBE_0(tavor_mr_query_inv_mrhdl_fail, TAVOR_TNF_ERROR, "");
682 TAVOR_TNF_EXIT(tavor_mr_query);
683 return (IBT_MR_HDL_INVALID);
684 }
685
686 /* Fill in the queried attributes */
687 attr->mr_attr_flags = mr->mr_accflag;
688 attr->mr_pd = (ibt_pd_hdl_t)mr->mr_pdhdl;
689
690 /* Fill in the "local" attributes */
691 attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey;
692 attr->mr_lbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
693 attr->mr_lbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len;
694
695 /*
696 * Fill in the "remote" attributes (if necessary). Note: the
697 * remote attributes are only valid if the memory region has one
698 * or more of the remote access flags set.
699 */
700 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
701 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
702 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
703 attr->mr_rkey = (ibt_rkey_t)mr->mr_rkey;
704 attr->mr_rbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
705 attr->mr_rbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len;
706 }
707
708 /*
709 * If region is mapped for streaming (i.e. noncoherent), then set sync
710 * is required
711 */
712 attr->mr_sync_required = (mr->mr_bindinfo.bi_flags &
713 IBT_MR_NONCOHERENT) ? B_TRUE : B_FALSE;
714
715 mutex_exit(&mr->mr_lock);
716 TAVOR_TNF_EXIT(tavor_mr_query);
717 return (DDI_SUCCESS);
718 }
719
720
721 /*
722 * tavor_mr_reregister()
723 * Context: Can be called from interrupt or base context.
724 */
725 int
tavor_mr_reregister(tavor_state_t * state,tavor_mrhdl_t mr,tavor_pdhdl_t pd,ibt_mr_attr_t * mr_attr,tavor_mrhdl_t * mrhdl_new,tavor_mr_options_t * op)726 tavor_mr_reregister(tavor_state_t *state, tavor_mrhdl_t mr,
727 tavor_pdhdl_t pd, ibt_mr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl_new,
728 tavor_mr_options_t *op)
729 {
730 tavor_bind_info_t bind;
731 int status;
732
733 TAVOR_TNF_ENTER(tavor_mr_reregister);
734
735 /*
736 * Fill in the "bind" struct. This struct provides the majority
737 * of the information that will be used to distinguish between an
738 * "addr" binding (as is the case here) and a "buf" binding (see
739 * below). The "bind" struct is later passed to tavor_mr_mem_bind()
740 * which does most of the "heavy lifting" for the Tavor memory
741 * registration (and reregistration) routines.
742 */
743 bind.bi_type = TAVOR_BINDHDL_VADDR;
744 bind.bi_addr = mr_attr->mr_vaddr;
745 bind.bi_len = mr_attr->mr_len;
746 bind.bi_as = mr_attr->mr_as;
747 bind.bi_flags = mr_attr->mr_flags;
748 status = tavor_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
749 if (status != DDI_SUCCESS) {
750 TNF_PROBE_0(tavor_mr_reregister_cmnreg_fail,
751 TAVOR_TNF_ERROR, "");
752 TAVOR_TNF_EXIT(tavor_mr_reregister);
753 return (status);
754 }
755
756 TAVOR_TNF_EXIT(tavor_mr_reregister);
757 return (DDI_SUCCESS);
758 }
759
760
761 /*
762 * tavor_mr_reregister_buf()
763 * Context: Can be called from interrupt or base context.
764 */
765 int
tavor_mr_reregister_buf(tavor_state_t * state,tavor_mrhdl_t mr,tavor_pdhdl_t pd,ibt_smr_attr_t * mr_attr,struct buf * buf,tavor_mrhdl_t * mrhdl_new,tavor_mr_options_t * op)766 tavor_mr_reregister_buf(tavor_state_t *state, tavor_mrhdl_t mr,
767 tavor_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf,
768 tavor_mrhdl_t *mrhdl_new, tavor_mr_options_t *op)
769 {
770 tavor_bind_info_t bind;
771 int status;
772
773 TAVOR_TNF_ENTER(tavor_mr_reregister_buf);
774
775 /*
776 * Fill in the "bind" struct. This struct provides the majority
777 * of the information that will be used to distinguish between an
778 * "addr" binding (see above) and a "buf" binding (as is the case
779 * here). The "bind" struct is later passed to tavor_mr_mem_bind()
780 * which does most of the "heavy lifting" for the Tavor memory
781 * registration routines. Note: We have chosen to provide
782 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
783 * not set). It is not critical what value we choose here as it need
784 * only be unique for the given RKey (which will happen by default),
785 * so the choice here is somewhat arbitrary.
786 */
787 bind.bi_type = TAVOR_BINDHDL_BUF;
788 bind.bi_buf = buf;
789 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
790 bind.bi_addr = mr_attr->mr_vaddr;
791 } else {
792 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr;
793 }
794 bind.bi_len = (uint64_t)buf->b_bcount;
795 bind.bi_flags = mr_attr->mr_flags;
796 bind.bi_as = NULL;
797 status = tavor_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
798 if (status != DDI_SUCCESS) {
799 TNF_PROBE_0(tavor_mr_reregister_buf_cmnreg_fail,
800 TAVOR_TNF_ERROR, "");
801 TAVOR_TNF_EXIT(tavor_mr_reregister_buf);
802 return (status);
803 }
804
805 TAVOR_TNF_EXIT(tavor_mr_reregister_buf);
806 return (DDI_SUCCESS);
807 }
808
809
810 /*
811 * tavor_mr_sync()
812 * Context: Can be called from interrupt or base context.
813 */
814 /* ARGSUSED */
815 int
tavor_mr_sync(tavor_state_t * state,ibt_mr_sync_t * mr_segs,size_t num_segs)816 tavor_mr_sync(tavor_state_t *state, ibt_mr_sync_t *mr_segs, size_t num_segs)
817 {
818 tavor_mrhdl_t mrhdl;
819 uint64_t seg_vaddr, seg_len, seg_end;
820 uint64_t mr_start, mr_end;
821 uint_t type;
822 int status, i;
823 char *errormsg;
824
825 TAVOR_TNF_ENTER(tavor_mr_sync);
826
827 /* Process each of the ibt_mr_sync_t's */
828 for (i = 0; i < num_segs; i++) {
829 mrhdl = (tavor_mrhdl_t)mr_segs[i].ms_handle;
830
831 /* Check for valid memory region handle */
832 if (mrhdl == NULL) {
833 /* Set "status" and "errormsg" and goto failure */
834 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl");
835 goto mrsync_fail;
836 }
837
838 mutex_enter(&mrhdl->mr_lock);
839
840 /*
841 * Check here to see if the memory region has already been
842 * partially deregistered as a result of a
843 * tavor_umap_umemlock_cb() callback. If so, this is an
844 * error, return failure.
845 */
846 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
847 mutex_exit(&mrhdl->mr_lock);
848 /* Set "status" and "errormsg" and goto failure */
849 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl2");
850 goto mrsync_fail;
851 }
852
853 /* Check for valid bounds on sync request */
854 seg_vaddr = mr_segs[i].ms_vaddr;
855 seg_len = mr_segs[i].ms_len;
856 seg_end = seg_vaddr + seg_len - 1;
857 mr_start = mrhdl->mr_bindinfo.bi_addr;
858 mr_end = mr_start + mrhdl->mr_bindinfo.bi_len - 1;
859 if ((seg_vaddr < mr_start) || (seg_vaddr > mr_end)) {
860 mutex_exit(&mrhdl->mr_lock);
861 /* Set "status" and "errormsg" and goto failure */
862 TAVOR_TNF_FAIL(IBT_MR_VA_INVALID, "invalid vaddr");
863 goto mrsync_fail;
864 }
865 if ((seg_end < mr_start) || (seg_end > mr_end)) {
866 mutex_exit(&mrhdl->mr_lock);
867 /* Set "status" and "errormsg" and goto failure */
868 TAVOR_TNF_FAIL(IBT_MR_LEN_INVALID, "invalid length");
869 goto mrsync_fail;
870 }
871
872 /* Determine what type (i.e. direction) for sync */
873 if (mr_segs[i].ms_flags & IBT_SYNC_READ) {
874 type = DDI_DMA_SYNC_FORDEV;
875 } else if (mr_segs[i].ms_flags & IBT_SYNC_WRITE) {
876 type = DDI_DMA_SYNC_FORCPU;
877 } else {
878 mutex_exit(&mrhdl->mr_lock);
879 /* Set "status" and "errormsg" and goto failure */
880 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sync type");
881 goto mrsync_fail;
882 }
883
884 (void) ddi_dma_sync(mrhdl->mr_bindinfo.bi_dmahdl,
885 (off_t)(seg_vaddr - mr_start), (size_t)seg_len, type);
886 mutex_exit(&mrhdl->mr_lock);
887 }
888
889 TAVOR_TNF_EXIT(tavor_mr_sync);
890 return (DDI_SUCCESS);
891
892 mrsync_fail:
893 TNF_PROBE_1(tavor_mr_sync_fail, TAVOR_TNF_ERROR, "", tnf_string, msg,
894 errormsg);
895 TAVOR_TNF_EXIT(tavor_mr_sync);
896 return (status);
897 }
898
899
900 /*
901 * tavor_mw_alloc()
902 * Context: Can be called from interrupt or base context.
903 */
904 int
tavor_mw_alloc(tavor_state_t * state,tavor_pdhdl_t pd,ibt_mw_flags_t flags,tavor_mwhdl_t * mwhdl)905 tavor_mw_alloc(tavor_state_t *state, tavor_pdhdl_t pd, ibt_mw_flags_t flags,
906 tavor_mwhdl_t *mwhdl)
907 {
908 tavor_rsrc_t *mpt, *rsrc;
909 tavor_hw_mpt_t mpt_entry;
910 tavor_mwhdl_t mw;
911 uint_t sleep;
912 int status;
913 char *errormsg;
914
915 TAVOR_TNF_ENTER(tavor_mw_alloc);
916
917 /*
918 * Check the sleep flag. Ensure that it is consistent with the
919 * current thread context (i.e. if we are currently in the interrupt
920 * context, then we shouldn't be attempting to sleep).
921 */
922 sleep = (flags & IBT_MW_NOSLEEP) ? TAVOR_NOSLEEP : TAVOR_SLEEP;
923 if ((sleep == TAVOR_SLEEP) &&
924 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
925 /* Set "status" and "errormsg" and goto failure */
926 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags");
927 goto mwalloc_fail;
928 }
929
930 /* Increment the reference count on the protection domain (PD) */
931 tavor_pd_refcnt_inc(pd);
932
933 /*
934 * Allocate an MPT entry (for use as a memory window). Since the
935 * Tavor hardware uses the MPT entry for memory regions and for
936 * memory windows, we will fill in this MPT with all the necessary
937 * parameters for the memory window. And then (just as we do for
938 * memory regions) ownership will be passed to the hardware in the
939 * final step below. If we fail here, we must undo the protection
940 * domain reference count.
941 */
942 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
943 if (status != DDI_SUCCESS) {
944 /* Set "status" and "errormsg" and goto failure */
945 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT");
946 goto mwalloc_fail1;
947 }
948
949 /*
950 * Allocate the software structure for tracking the memory window (i.e.
951 * the Tavor Memory Window handle). Note: This is actually the same
952 * software structure used for tracking memory regions, but since many
953 * of the same properties are needed, only a single structure is
954 * necessary. If we fail here, we must undo the protection domain
955 * reference count and the previous resource allocation.
956 */
957 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
958 if (status != DDI_SUCCESS) {
959 /* Set "status" and "errormsg" and goto failure */
960 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle");
961 goto mwalloc_fail2;
962 }
963 mw = (tavor_mwhdl_t)rsrc->tr_addr;
964 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
965
966 /*
967 * Calculate an "unbound" RKey from MPT index. In much the same way
968 * as we do for memory regions (above), this key is constructed from
969 * a "constrained" (which depends on the MPT index) and an
970 * "unconstrained" portion (which may be arbitrarily chosen).
971 */
972 tavor_mr_keycalc(state, mpt->tr_indx, &mw->mr_rkey);
973
974 /*
975 * Fill in the MPT entry. This is the final step before passing
976 * ownership of the MPT entry to the Tavor hardware. We use all of
977 * the information collected/calculated above to fill in the
978 * requisite portions of the MPT. Note: fewer entries in the MPT
979 * entry are necessary to allocate a memory window.
980 */
981 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
982 mpt_entry.reg_win = TAVOR_MPT_IS_WINDOW;
983 mpt_entry.mem_key = mw->mr_rkey;
984 mpt_entry.pd = pd->pd_pdnum;
985
986 /*
987 * Write the MPT entry to hardware. Lastly, we pass ownership of
988 * the entry to the hardware. Note: in general, this operation
989 * shouldn't fail. But if it does, we have to undo everything we've
990 * done above before returning error.
991 */
992 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
993 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep);
994 if (status != TAVOR_CMD_SUCCESS) {
995 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n",
996 status);
997 TNF_PROBE_1(tavor_mw_alloc_sw2hw_mpt_cmd_fail,
998 TAVOR_TNF_ERROR, "", tnf_uint, status, status);
999 /* Set "status" and "errormsg" and goto failure */
1000 TAVOR_TNF_FAIL(ibc_get_ci_failure(0),
1001 "tavor SW2HW_MPT command");
1002 goto mwalloc_fail3;
1003 }
1004
1005 /*
1006 * Fill in the rest of the Tavor Memory Window handle. Having
1007 * successfully transferred ownership of the MPT, we can update the
1008 * following fields for use in further operations on the MW.
1009 */
1010 mw->mr_mptrsrcp = mpt;
1011 mw->mr_pdhdl = pd;
1012 mw->mr_rsrcp = rsrc;
1013 *mwhdl = mw;
1014
1015 TAVOR_TNF_EXIT(tavor_mw_alloc);
1016 return (DDI_SUCCESS);
1017
1018 mwalloc_fail3:
1019 tavor_rsrc_free(state, &rsrc);
1020 mwalloc_fail2:
1021 tavor_rsrc_free(state, &mpt);
1022 mwalloc_fail1:
1023 tavor_pd_refcnt_dec(pd);
1024 mwalloc_fail:
1025 TNF_PROBE_1(tavor_mw_alloc_fail, TAVOR_TNF_ERROR, "",
1026 tnf_string, msg, errormsg);
1027 TAVOR_TNF_EXIT(tavor_mw_alloc);
1028 return (status);
1029 }
1030
1031
1032 /*
1033 * tavor_mw_free()
1034 * Context: Can be called from interrupt or base context.
1035 */
1036 int
tavor_mw_free(tavor_state_t * state,tavor_mwhdl_t * mwhdl,uint_t sleep)1037 tavor_mw_free(tavor_state_t *state, tavor_mwhdl_t *mwhdl, uint_t sleep)
1038 {
1039 tavor_rsrc_t *mpt, *rsrc;
1040 tavor_mwhdl_t mw;
1041 int status;
1042 char *errormsg;
1043 tavor_pdhdl_t pd;
1044
1045 TAVOR_TNF_ENTER(tavor_mw_free);
1046
1047 /*
1048 * Check the sleep flag. Ensure that it is consistent with the
1049 * current thread context (i.e. if we are currently in the interrupt
1050 * context, then we shouldn't be attempting to sleep).
1051 */
1052 if ((sleep == TAVOR_SLEEP) &&
1053 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
1054 /* Set "status" and "errormsg" and goto failure */
1055 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sleep flags");
1056 TNF_PROBE_1(tavor_mw_free_fail, TAVOR_TNF_ERROR, "",
1057 tnf_string, msg, errormsg);
1058 TAVOR_TNF_EXIT(tavor_mw_free);
1059 return (status);
1060 }
1061
1062 /*
1063 * Pull all the necessary information from the Tavor Memory Window
1064 * handle. This is necessary here because the resource for the
1065 * MW handle is going to be freed up as part of the this operation.
1066 */
1067 mw = *mwhdl;
1068 mutex_enter(&mw->mr_lock);
1069 mpt = mw->mr_mptrsrcp;
1070 rsrc = mw->mr_rsrcp;
1071 pd = mw->mr_pdhdl;
1072 mutex_exit(&mw->mr_lock);
1073 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
1074
1075 /*
1076 * Reclaim the MPT entry from hardware. Note: in general, it is
1077 * unexpected for this operation to return an error.
1078 */
1079 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL,
1080 0, mpt->tr_indx, sleep);
1081 if (status != TAVOR_CMD_SUCCESS) {
1082 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command failed: %08x\n",
1083 status);
1084 TNF_PROBE_1(tavor_hw2sw_mpt_cmd_fail, TAVOR_TNF_ERROR, "",
1085 tnf_uint, status, status);
1086 TAVOR_TNF_EXIT(tavor_mw_free);
1087 return (IBT_INVALID_PARAM);
1088 }
1089
1090 /* Free the Tavor Memory Window handle */
1091 tavor_rsrc_free(state, &rsrc);
1092
1093 /* Free up the MPT entry resource */
1094 tavor_rsrc_free(state, &mpt);
1095
1096 /* Decrement the reference count on the protection domain (PD) */
1097 tavor_pd_refcnt_dec(pd);
1098
1099 /* Set the mwhdl pointer to NULL and return success */
1100 *mwhdl = NULL;
1101
1102 TAVOR_TNF_EXIT(tavor_mw_free);
1103 return (DDI_SUCCESS);
1104 }
1105
1106
1107 /*
1108 * tavor_mr_keycalc()
1109 * Context: Can be called from interrupt or base context.
1110 */
1111 void
tavor_mr_keycalc(tavor_state_t * state,uint32_t indx,uint32_t * key)1112 tavor_mr_keycalc(tavor_state_t *state, uint32_t indx, uint32_t *key)
1113 {
1114 uint32_t tmp, log_num_mpt;
1115
1116 /*
1117 * Generate a simple key from counter. Note: We increment this
1118 * static variable _intentionally_ without any kind of mutex around
1119 * it. First, single-threading all operations through a single lock
1120 * would be a bad idea (from a performance point-of-view). Second,
1121 * the upper "unconstrained" bits don't really have to be unique
1122 * because the lower bits are guaranteed to be (although we do make a
1123 * best effort to ensure that they are). Third, the window for the
1124 * race (where both threads read and update the counter at the same
1125 * time) is incredibly small.
1126 * And, lastly, we'd like to make this into a "random" key XXX
1127 */
1128 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(tavor_debug_memkey_cnt))
1129 log_num_mpt = state->ts_cfg_profile->cp_log_num_mpt;
1130 tmp = (tavor_debug_memkey_cnt++) << log_num_mpt;
1131 *key = tmp | indx;
1132 }
1133
1134
1135 /*
1136 * tavor_mr_common_reg()
1137 * Context: Can be called from interrupt or base context.
1138 */
1139 static int
tavor_mr_common_reg(tavor_state_t * state,tavor_pdhdl_t pd,tavor_bind_info_t * bind,tavor_mrhdl_t * mrhdl,tavor_mr_options_t * op)1140 tavor_mr_common_reg(tavor_state_t *state, tavor_pdhdl_t pd,
1141 tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op)
1142 {
1143 tavor_rsrc_pool_info_t *rsrc_pool;
1144 tavor_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt;
1145 tavor_umap_db_entry_t *umapdb;
1146 tavor_sw_refcnt_t *swrc_tmp;
1147 tavor_hw_mpt_t mpt_entry;
1148 tavor_mrhdl_t mr;
1149 ibt_mr_flags_t flags;
1150 tavor_bind_info_t *bh;
1151 ddi_dma_handle_t bind_dmahdl;
1152 ddi_umem_cookie_t umem_cookie;
1153 size_t umem_len;
1154 caddr_t umem_addr;
1155 uint64_t mtt_addr, mtt_ddrbaseaddr, max_sz;
1156 uint_t sleep, mtt_pgsize_bits, bind_type, mr_is_umem;
1157 int status, umem_flags, bind_override_addr;
1158 char *errormsg;
1159
1160 TAVOR_TNF_ENTER(tavor_mr_common_reg);
1161
1162 /*
1163 * Check the "options" flag. Currently this flag tells the driver
1164 * whether or not the region should be bound normally (i.e. with
1165 * entries written into the PCI IOMMU), whether it should be
1166 * registered to bypass the IOMMU, and whether or not the resulting
1167 * address should be "zero-based" (to aid the alignment restrictions
1168 * for QPs).
1169 */
1170 if (op == NULL) {
1171 bind_type = TAVOR_BINDMEM_NORMAL;
1172 bind_dmahdl = NULL;
1173 bind_override_addr = 0;
1174 } else {
1175 bind_type = op->mro_bind_type;
1176 bind_dmahdl = op->mro_bind_dmahdl;
1177 bind_override_addr = op->mro_bind_override_addr;
1178 }
1179
1180 /* Extract the flags field from the tavor_bind_info_t */
1181 flags = bind->bi_flags;
1182
1183 /*
1184 * Check for invalid length. Check is the length is zero or if the
1185 * length is larger than the maximum configured value. Return error
1186 * if it is.
1187 */
1188 max_sz = ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_mrw_sz);
1189 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
1190 /* Set "status" and "errormsg" and goto failure */
1191 TAVOR_TNF_FAIL(IBT_MR_LEN_INVALID, "invalid length");
1192 goto mrcommon_fail;
1193 }
1194
1195 /*
1196 * Check the sleep flag. Ensure that it is consistent with the
1197 * current thread context (i.e. if we are currently in the interrupt
1198 * context, then we shouldn't be attempting to sleep).
1199 */
1200 sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP;
1201 if ((sleep == TAVOR_SLEEP) &&
1202 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
1203 /* Set "status" and "errormsg" and goto failure */
1204 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags");
1205 goto mrcommon_fail;
1206 }
1207
1208 /*
1209 * Get the base address for the MTT table. This will be necessary
1210 * below when we are setting up the MPT entry.
1211 */
1212 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT];
1213 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset;
1214
1215 /* Increment the reference count on the protection domain (PD) */
1216 tavor_pd_refcnt_inc(pd);
1217
1218 /*
1219 * Allocate an MPT entry. This will be filled in with all the
1220 * necessary parameters to define the memory region. And then
1221 * ownership will be passed to the hardware in the final step
1222 * below. If we fail here, we must undo the protection domain
1223 * reference count.
1224 */
1225 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
1226 if (status != DDI_SUCCESS) {
1227 /* Set "status" and "errormsg" and goto failure */
1228 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT");
1229 goto mrcommon_fail1;
1230 }
1231
1232 /*
1233 * Allocate the software structure for tracking the memory region (i.e.
1234 * the Tavor Memory Region handle). If we fail here, we must undo
1235 * the protection domain reference count and the previous resource
1236 * allocation.
1237 */
1238 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
1239 if (status != DDI_SUCCESS) {
1240 /* Set "status" and "errormsg" and goto failure */
1241 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle");
1242 goto mrcommon_fail2;
1243 }
1244 mr = (tavor_mrhdl_t)rsrc->tr_addr;
1245 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
1246
1247 /*
1248 * Setup and validate the memory region access flags. This means
1249 * translating the IBTF's enable flags into the access flags that
1250 * will be used in later operations.
1251 */
1252 mr->mr_accflag = 0;
1253 if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1254 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1255 if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1256 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1257 if (flags & IBT_MR_ENABLE_REMOTE_READ)
1258 mr->mr_accflag |= IBT_MR_REMOTE_READ;
1259 if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1260 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1261 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1262 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1263
1264 /*
1265 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed
1266 * from a certain number of "constrained" bits (the least significant
1267 * bits) and some number of "unconstrained" bits. The constrained
1268 * bits must be set to the index of the entry in the MPT table, but
1269 * the unconstrained bits can be set to any value we wish. Note:
1270 * if no remote access is required, then the RKey value is not filled
1271 * in. Otherwise both Rkey and LKey are given the same value.
1272 */
1273 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey);
1274 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
1275 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
1276 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
1277 mr->mr_rkey = mr->mr_lkey;
1278 }
1279
1280 /*
1281 * Determine if the memory is from userland and pin the pages
1282 * with umem_lockmemory() if necessary.
1283 * Then, if this is userland memory, allocate an entry in the
1284 * "userland resources database". This will later be added to
1285 * the database (after all further memory registration operations are
1286 * successful). If we fail here, we must undo the reference counts
1287 * and the previous resource allocations.
1288 */
1289 mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0);
1290 if (mr_is_umem) {
1291 umem_len = ptob(btopr(bind->bi_len +
1292 ((uintptr_t)bind->bi_addr & PAGEOFFSET)));
1293 umem_addr = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET);
1294 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
1295 DDI_UMEMLOCK_LONGTERM);
1296 status = umem_lockmemory(umem_addr, umem_len, umem_flags,
1297 &umem_cookie, &tavor_umem_cbops, NULL);
1298 if (status != 0) {
1299 /* Set "status" and "errormsg" and goto failure */
1300 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umem pin");
1301 goto mrcommon_fail3;
1302 }
1303
1304 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1305 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1306
1307 bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len,
1308 B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
1309 if (bind->bi_buf == NULL) {
1310 /* Set "status" and "errormsg" and goto failure */
1311 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed iosetup");
1312 goto mrcommon_fail3;
1313 }
1314 bind->bi_type = TAVOR_BINDHDL_UBUF;
1315 bind->bi_buf->b_flags |= B_READ;
1316
1317 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1318 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1319
1320 umapdb = tavor_umap_db_alloc(state->ts_instance,
1321 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
1322 (uint64_t)(uintptr_t)rsrc);
1323 if (umapdb == NULL) {
1324 /* Set "status" and "errormsg" and goto failure */
1325 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add");
1326 goto mrcommon_fail4;
1327 }
1328 }
1329
1330 /*
1331 * Setup the bindinfo for the mtt bind call
1332 */
1333 bh = &mr->mr_bindinfo;
1334 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bh))
1335 bcopy(bind, bh, sizeof (tavor_bind_info_t));
1336 bh->bi_bypass = bind_type;
1337 status = tavor_mr_mtt_bind(state, bh, bind_dmahdl, &mtt,
1338 &mtt_pgsize_bits);
1339 if (status != DDI_SUCCESS) {
1340 /* Set "status" and "errormsg" and goto failure */
1341 TAVOR_TNF_FAIL(status, "failed mtt bind");
1342 /*
1343 * When mtt_bind fails, freerbuf has already been done,
1344 * so make sure not to call it again.
1345 */
1346 bind->bi_type = bh->bi_type;
1347 goto mrcommon_fail5;
1348 }
1349 mr->mr_logmttpgsz = mtt_pgsize_bits;
1350
1351 /*
1352 * Allocate MTT reference count (to track shared memory regions).
1353 * This reference count resource may never be used on the given
1354 * memory region, but if it is ever later registered as "shared"
1355 * memory region then this resource will be necessary. If we fail
1356 * here, we do pretty much the same as above to clean up.
1357 */
1358 status = tavor_rsrc_alloc(state, TAVOR_REFCNT, 1, sleep,
1359 &mtt_refcnt);
1360 if (status != DDI_SUCCESS) {
1361 /* Set "status" and "errormsg" and goto failure */
1362 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed refence count");
1363 goto mrcommon_fail6;
1364 }
1365 mr->mr_mttrefcntp = mtt_refcnt;
1366 swrc_tmp = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr;
1367 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp))
1368 TAVOR_MTT_REFCNT_INIT(swrc_tmp);
1369
1370 /*
1371 * Fill in the MPT entry. This is the final step before passing
1372 * ownership of the MPT entry to the Tavor hardware. We use all of
1373 * the information collected/calculated above to fill in the
1374 * requisite portions of the MPT.
1375 */
1376 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
1377 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE;
1378 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0;
1379 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1380 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0;
1381 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0;
1382 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0;
1383 mpt_entry.lr = 1;
1384 mpt_entry.reg_win = TAVOR_MPT_IS_REGION;
1385 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC;
1386 mpt_entry.mem_key = mr->mr_lkey;
1387 mpt_entry.pd = pd->pd_pdnum;
1388 if (bind_override_addr == 0) {
1389 mpt_entry.start_addr = bh->bi_addr;
1390 } else {
1391 bh->bi_addr = bh->bi_addr & ((1 << mr->mr_logmttpgsz) - 1);
1392 mpt_entry.start_addr = bh->bi_addr;
1393 }
1394 mpt_entry.reg_win_len = bh->bi_len;
1395 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND;
1396 mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT);
1397 mpt_entry.mttseg_addr_h = mtt_addr >> 32;
1398 mpt_entry.mttseg_addr_l = mtt_addr >> 6;
1399
1400 /*
1401 * Write the MPT entry to hardware. Lastly, we pass ownership of
1402 * the entry to the hardware. Note: in general, this operation
1403 * shouldn't fail. But if it does, we have to undo everything we've
1404 * done above before returning error.
1405 */
1406 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1407 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep);
1408 if (status != TAVOR_CMD_SUCCESS) {
1409 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n",
1410 status);
1411 TNF_PROBE_1(tavor_mr_common_reg_sw2hw_mpt_cmd_fail,
1412 TAVOR_TNF_ERROR, "", tnf_uint, status, status);
1413 /* Set "status" and "errormsg" and goto failure */
1414 TAVOR_TNF_FAIL(ibc_get_ci_failure(0),
1415 "tavor SW2HW_MPT command");
1416 goto mrcommon_fail7;
1417 }
1418
1419 /*
1420 * Fill in the rest of the Tavor Memory Region handle. Having
1421 * successfully transferred ownership of the MPT, we can update the
1422 * following fields for use in further operations on the MR.
1423 */
1424 mr->mr_mptrsrcp = mpt;
1425 mr->mr_mttrsrcp = mtt;
1426 mr->mr_pdhdl = pd;
1427 mr->mr_rsrcp = rsrc;
1428 mr->mr_is_umem = mr_is_umem;
1429 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL;
1430 mr->mr_umem_cbfunc = NULL;
1431 mr->mr_umem_cbarg1 = NULL;
1432 mr->mr_umem_cbarg2 = NULL;
1433
1434 /*
1435 * If this is userland memory, then we need to insert the previously
1436 * allocated entry into the "userland resources database". This will
1437 * allow for later coordination between the tavor_umap_umemlock_cb()
1438 * callback and tavor_mr_deregister().
1439 */
1440 if (mr_is_umem) {
1441 tavor_umap_db_add(umapdb);
1442 }
1443
1444 *mrhdl = mr;
1445
1446 TAVOR_TNF_EXIT(tavor_mr_common_reg);
1447 return (DDI_SUCCESS);
1448
1449 /*
1450 * The following is cleanup for all possible failure cases in this routine
1451 */
1452 mrcommon_fail7:
1453 tavor_rsrc_free(state, &mtt_refcnt);
1454 mrcommon_fail6:
1455 tavor_rsrc_free(state, &mtt);
1456 tavor_mr_mem_unbind(state, bh);
1457 bind->bi_type = bh->bi_type;
1458 mrcommon_fail5:
1459 if (mr_is_umem) {
1460 tavor_umap_db_free(umapdb);
1461 }
1462 mrcommon_fail4:
1463 if (mr_is_umem) {
1464 /*
1465 * Free up the memory ddi_umem_iosetup() allocates
1466 * internally.
1467 */
1468 if (bind->bi_type == TAVOR_BINDHDL_UBUF) {
1469 freerbuf(bind->bi_buf);
1470 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1471 bind->bi_type = TAVOR_BINDHDL_NONE;
1472 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1473 }
1474 ddi_umem_unlock(umem_cookie);
1475 }
1476 mrcommon_fail3:
1477 tavor_rsrc_free(state, &rsrc);
1478 mrcommon_fail2:
1479 tavor_rsrc_free(state, &mpt);
1480 mrcommon_fail1:
1481 tavor_pd_refcnt_dec(pd);
1482 mrcommon_fail:
1483 TNF_PROBE_1(tavor_mr_common_reg_fail, TAVOR_TNF_ERROR, "",
1484 tnf_string, msg, errormsg);
1485 TAVOR_TNF_EXIT(tavor_mr_common_reg);
1486 return (status);
1487 }
1488
1489 int
tavor_dma_mr_register(tavor_state_t * state,tavor_pdhdl_t pd,ibt_dmr_attr_t * mr_attr,tavor_mrhdl_t * mrhdl)1490 tavor_dma_mr_register(tavor_state_t *state, tavor_pdhdl_t pd,
1491 ibt_dmr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl)
1492 {
1493 tavor_rsrc_t *mpt, *rsrc;
1494 tavor_hw_mpt_t mpt_entry;
1495 tavor_mrhdl_t mr;
1496 ibt_mr_flags_t flags;
1497 uint_t sleep;
1498 int status;
1499
1500 /* Extract the flags field */
1501 flags = mr_attr->dmr_flags;
1502
1503 /*
1504 * Check the sleep flag. Ensure that it is consistent with the
1505 * current thread context (i.e. if we are currently in the interrupt
1506 * context, then we shouldn't be attempting to sleep).
1507 */
1508 sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP;
1509 if ((sleep == TAVOR_SLEEP) &&
1510 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
1511 status = IBT_INVALID_PARAM;
1512 goto mrcommon_fail;
1513 }
1514
1515 /* Increment the reference count on the protection domain (PD) */
1516 tavor_pd_refcnt_inc(pd);
1517
1518 /*
1519 * Allocate an MPT entry. This will be filled in with all the
1520 * necessary parameters to define the memory region. And then
1521 * ownership will be passed to the hardware in the final step
1522 * below. If we fail here, we must undo the protection domain
1523 * reference count.
1524 */
1525 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
1526 if (status != DDI_SUCCESS) {
1527 status = IBT_INSUFF_RESOURCE;
1528 goto mrcommon_fail1;
1529 }
1530
1531 /*
1532 * Allocate the software structure for tracking the memory region (i.e.
1533 * the Tavor Memory Region handle). If we fail here, we must undo
1534 * the protection domain reference count and the previous resource
1535 * allocation.
1536 */
1537 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
1538 if (status != DDI_SUCCESS) {
1539 status = IBT_INSUFF_RESOURCE;
1540 goto mrcommon_fail2;
1541 }
1542 mr = (tavor_mrhdl_t)rsrc->tr_addr;
1543 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
1544 bzero(mr, sizeof (*mr));
1545
1546 /*
1547 * Setup and validate the memory region access flags. This means
1548 * translating the IBTF's enable flags into the access flags that
1549 * will be used in later operations.
1550 */
1551 mr->mr_accflag = 0;
1552 if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1553 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1554 if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1555 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1556 if (flags & IBT_MR_ENABLE_REMOTE_READ)
1557 mr->mr_accflag |= IBT_MR_REMOTE_READ;
1558 if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1559 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1560 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1561 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1562
1563 /*
1564 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed
1565 * from a certain number of "constrained" bits (the least significant
1566 * bits) and some number of "unconstrained" bits. The constrained
1567 * bits must be set to the index of the entry in the MPT table, but
1568 * the unconstrained bits can be set to any value we wish. Note:
1569 * if no remote access is required, then the RKey value is not filled
1570 * in. Otherwise both Rkey and LKey are given the same value.
1571 */
1572 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey);
1573 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
1574 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
1575 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
1576 mr->mr_rkey = mr->mr_lkey;
1577 }
1578
1579 /*
1580 * Fill in the MPT entry. This is the final step before passing
1581 * ownership of the MPT entry to the Tavor hardware. We use all of
1582 * the information collected/calculated above to fill in the
1583 * requisite portions of the MPT.
1584 */
1585 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
1586
1587 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE;
1588 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0;
1589 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1590 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0;
1591 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0;
1592 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0;
1593 mpt_entry.lr = 1;
1594 mpt_entry.phys_addr = 1; /* critical bit for this */
1595 mpt_entry.reg_win = TAVOR_MPT_IS_REGION;
1596
1597 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC;
1598 mpt_entry.mem_key = mr->mr_lkey;
1599 mpt_entry.pd = pd->pd_pdnum;
1600 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND;
1601
1602 mpt_entry.start_addr = mr_attr->dmr_paddr;
1603 mpt_entry.reg_win_len = mr_attr->dmr_len;
1604
1605 mpt_entry.mttseg_addr_h = 0;
1606 mpt_entry.mttseg_addr_l = 0;
1607
1608 /*
1609 * Write the MPT entry to hardware. Lastly, we pass ownership of
1610 * the entry to the hardware if needed. Note: in general, this
1611 * operation shouldn't fail. But if it does, we have to undo
1612 * everything we've done above before returning error.
1613 *
1614 * For Tavor, this routine (which is common to the contexts) will only
1615 * set the ownership if needed - the process of passing the context
1616 * itself to HW will take care of setting up the MPT (based on type
1617 * and index).
1618 */
1619
1620 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1621 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep);
1622 if (status != TAVOR_CMD_SUCCESS) {
1623 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n",
1624 status);
1625 status = ibc_get_ci_failure(0);
1626 goto mrcommon_fail7;
1627 }
1628
1629 /*
1630 * Fill in the rest of the Tavor Memory Region handle. Having
1631 * successfully transferred ownership of the MPT, we can update the
1632 * following fields for use in further operations on the MR.
1633 */
1634 mr->mr_mptrsrcp = mpt;
1635 mr->mr_mttrsrcp = NULL;
1636 mr->mr_pdhdl = pd;
1637 mr->mr_rsrcp = rsrc;
1638 mr->mr_is_umem = 0;
1639 mr->mr_umemcookie = NULL;
1640 mr->mr_umem_cbfunc = NULL;
1641 mr->mr_umem_cbarg1 = NULL;
1642 mr->mr_umem_cbarg2 = NULL;
1643
1644 *mrhdl = mr;
1645
1646 return (DDI_SUCCESS);
1647
1648 /*
1649 * The following is cleanup for all possible failure cases in this routine
1650 */
1651 mrcommon_fail7:
1652 tavor_rsrc_free(state, &rsrc);
1653 mrcommon_fail2:
1654 tavor_rsrc_free(state, &mpt);
1655 mrcommon_fail1:
1656 tavor_pd_refcnt_dec(pd);
1657 mrcommon_fail:
1658 return (status);
1659 }
1660
1661 /*
1662 * tavor_mr_mtt_bind()
1663 * Context: Can be called from interrupt or base context.
1664 */
1665 int
tavor_mr_mtt_bind(tavor_state_t * state,tavor_bind_info_t * bind,ddi_dma_handle_t bind_dmahdl,tavor_rsrc_t ** mtt,uint_t * mtt_pgsize_bits)1666 tavor_mr_mtt_bind(tavor_state_t *state, tavor_bind_info_t *bind,
1667 ddi_dma_handle_t bind_dmahdl, tavor_rsrc_t **mtt, uint_t *mtt_pgsize_bits)
1668 {
1669 uint64_t nummtt;
1670 uint_t sleep;
1671 int status;
1672 char *errormsg;
1673
1674 TAVOR_TNF_ENTER(tavor_mr_common_reg);
1675
1676 /*
1677 * Check the sleep flag. Ensure that it is consistent with the
1678 * current thread context (i.e. if we are currently in the interrupt
1679 * context, then we shouldn't be attempting to sleep).
1680 */
1681 sleep = (bind->bi_flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP;
1682 if ((sleep == TAVOR_SLEEP) &&
1683 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
1684 /* Set "status" and "errormsg" and goto failure */
1685 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags");
1686 goto mrmttbind_fail;
1687 }
1688
1689 /*
1690 * Bind the memory and determine the mapped addresses. This is
1691 * the first of two routines that do all the "heavy lifting" for
1692 * the Tavor memory registration routines. The tavor_mr_mem_bind()
1693 * routine takes the "bind" struct with all its fields filled
1694 * in and returns a list of DMA cookies (for the PCI mapped addresses
1695 * corresponding to the specified address region) which are used by
1696 * the tavor_mr_fast_mtt_write() routine below. If we fail here, we
1697 * must undo all the previous resource allocation (and PD reference
1698 * count).
1699 */
1700 status = tavor_mr_mem_bind(state, bind, bind_dmahdl, sleep);
1701 if (status != DDI_SUCCESS) {
1702 /* Set "status" and "errormsg" and goto failure */
1703 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed mem bind");
1704 goto mrmttbind_fail;
1705 }
1706
1707 /*
1708 * Determine number of pages spanned. This routine uses the
1709 * information in the "bind" struct to determine the required
1710 * number of MTT entries needed (and returns the suggested page size -
1711 * as a "power-of-2" - for each MTT entry).
1712 */
1713 nummtt = tavor_mr_nummtt_needed(state, bind, mtt_pgsize_bits);
1714
1715 /*
1716 * Allocate the MTT entries. Use the calculations performed above to
1717 * allocate the required number of MTT entries. Note: MTT entries are
1718 * allocated in "MTT segments" which consist of complete cachelines
1719 * (i.e. 8 entries, 16 entries, etc.) So the TAVOR_NUMMTT_TO_MTTSEG()
1720 * macro is used to do the proper conversion. If we fail here, we
1721 * must not only undo all the previous resource allocation (and PD
1722 * reference count), but we must also unbind the memory.
1723 */
1724 status = tavor_rsrc_alloc(state, TAVOR_MTT,
1725 TAVOR_NUMMTT_TO_MTTSEG(nummtt), sleep, mtt);
1726 if (status != DDI_SUCCESS) {
1727 /* Set "status" and "errormsg" and goto failure */
1728 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MTT");
1729 goto mrmttbind_fail2;
1730 }
1731
1732 /*
1733 * Write the mapped addresses into the MTT entries. This is part two
1734 * of the "heavy lifting" routines that we talked about above. Note:
1735 * we pass the suggested page size from the earlier operation here.
1736 * And if we fail here, we again do pretty much the same huge clean up.
1737 */
1738 status = tavor_mr_fast_mtt_write(*mtt, bind, *mtt_pgsize_bits);
1739 if (status != DDI_SUCCESS) {
1740 /* Set "status" and "errormsg" and goto failure */
1741 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), "failed write mtt");
1742 goto mrmttbind_fail3;
1743 }
1744 TAVOR_TNF_EXIT(tavor_mr_mtt_bind);
1745 return (DDI_SUCCESS);
1746
1747 /*
1748 * The following is cleanup for all possible failure cases in this routine
1749 */
1750 mrmttbind_fail3:
1751 tavor_rsrc_free(state, mtt);
1752 mrmttbind_fail2:
1753 tavor_mr_mem_unbind(state, bind);
1754 mrmttbind_fail:
1755 TNF_PROBE_1(tavor_mr_mtt_bind_fail, TAVOR_TNF_ERROR, "",
1756 tnf_string, msg, errormsg);
1757 TAVOR_TNF_EXIT(tavor_mr_mtt_bind);
1758 return (status);
1759 }
1760
1761
1762 /*
1763 * tavor_mr_mtt_unbind()
1764 * Context: Can be called from interrupt or base context.
1765 */
1766 int
tavor_mr_mtt_unbind(tavor_state_t * state,tavor_bind_info_t * bind,tavor_rsrc_t * mtt)1767 tavor_mr_mtt_unbind(tavor_state_t *state, tavor_bind_info_t *bind,
1768 tavor_rsrc_t *mtt)
1769 {
1770 TAVOR_TNF_ENTER(tavor_mr_mtt_unbind);
1771
1772 /*
1773 * Free up the MTT entries and unbind the memory. Here, as above, we
1774 * attempt to free these resources only if it is appropriate to do so.
1775 */
1776 tavor_mr_mem_unbind(state, bind);
1777 tavor_rsrc_free(state, &mtt);
1778
1779 TAVOR_TNF_EXIT(tavor_mr_mtt_unbind);
1780 return (DDI_SUCCESS);
1781 }
1782
1783
1784 /*
1785 * tavor_mr_common_rereg()
1786 * Context: Can be called from interrupt or base context.
1787 */
1788 static int
tavor_mr_common_rereg(tavor_state_t * state,tavor_mrhdl_t mr,tavor_pdhdl_t pd,tavor_bind_info_t * bind,tavor_mrhdl_t * mrhdl_new,tavor_mr_options_t * op)1789 tavor_mr_common_rereg(tavor_state_t *state, tavor_mrhdl_t mr,
1790 tavor_pdhdl_t pd, tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl_new,
1791 tavor_mr_options_t *op)
1792 {
1793 tavor_rsrc_t *mpt;
1794 ibt_mr_attr_flags_t acc_flags_to_use;
1795 ibt_mr_flags_t flags;
1796 tavor_pdhdl_t pd_to_use;
1797 tavor_hw_mpt_t mpt_entry;
1798 uint64_t mtt_addr_to_use, vaddr_to_use, len_to_use;
1799 uint_t sleep, dereg_level;
1800 int status;
1801 char *errormsg;
1802
1803 TAVOR_TNF_ENTER(tavor_mr_common_rereg);
1804
1805 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1806
1807 /*
1808 * Check here to see if the memory region corresponds to a userland
1809 * mapping. Reregistration of userland memory regions is not
1810 * currently supported. Return failure. XXX
1811 */
1812 if (mr->mr_is_umem) {
1813 /* Set "status" and "errormsg" and goto failure */
1814 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl");
1815 goto mrrereg_fail;
1816 }
1817
1818 mutex_enter(&mr->mr_lock);
1819
1820 /* Pull MPT resource pointer from the Tavor Memory Region handle */
1821 mpt = mr->mr_mptrsrcp;
1822
1823 /* Extract the flags field from the tavor_bind_info_t */
1824 flags = bind->bi_flags;
1825
1826 /*
1827 * Check the sleep flag. Ensure that it is consistent with the
1828 * current thread context (i.e. if we are currently in the interrupt
1829 * context, then we shouldn't be attempting to sleep).
1830 */
1831 sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP;
1832 if ((sleep == TAVOR_SLEEP) &&
1833 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
1834 mutex_exit(&mr->mr_lock);
1835 /* Set "status" and "errormsg" and goto failure */
1836 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags");
1837 goto mrrereg_fail;
1838 }
1839
1840 /*
1841 * First step is to temporarily invalidate the MPT entry. This
1842 * regains ownership from the hardware, and gives us the opportunity
1843 * to modify the entry. Note: The HW2SW_MPT command returns the
1844 * current MPT entry contents. These are saved away here because
1845 * they will be reused in a later step below. If the region has
1846 * bound memory windows that we fail returning an "in use" error code.
1847 * Otherwise, this is an unexpected error and we deregister the
1848 * memory region and return error.
1849 *
1850 * We use TAVOR_CMD_NOSLEEP_SPIN here always because we must protect
1851 * against holding the lock around this rereg call in all contexts.
1852 */
1853 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, &mpt_entry,
1854 sizeof (tavor_hw_mpt_t), mpt->tr_indx, TAVOR_CMD_NOSLEEP_SPIN);
1855 if (status != TAVOR_CMD_SUCCESS) {
1856 mutex_exit(&mr->mr_lock);
1857 if (status == TAVOR_CMD_REG_BOUND) {
1858 TAVOR_TNF_EXIT(tavor_mr_common_rereg);
1859 return (IBT_MR_IN_USE);
1860 } else {
1861 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command failed: "
1862 "%08x\n", status);
1863
1864 /*
1865 * Call deregister and ensure that all current
1866 * resources get freed up
1867 */
1868 if (tavor_mr_deregister(state, &mr,
1869 TAVOR_MR_DEREG_ALL, sleep) != DDI_SUCCESS) {
1870 TAVOR_WARNING(state, "failed to deregister "
1871 "memory region");
1872 }
1873 TNF_PROBE_1(tavor_mr_common_rereg_hw2sw_mpt_cmd_fail,
1874 TAVOR_TNF_ERROR, "", tnf_uint, status, status);
1875 TAVOR_TNF_EXIT(tavor_mr_common_rereg);
1876 return (ibc_get_ci_failure(0));
1877 }
1878 }
1879
1880 /*
1881 * If we're changing the protection domain, then validate the new one
1882 */
1883 if (flags & IBT_MR_CHANGE_PD) {
1884
1885 /* Check for valid PD handle pointer */
1886 if (pd == NULL) {
1887 mutex_exit(&mr->mr_lock);
1888 /*
1889 * Call deregister and ensure that all current
1890 * resources get properly freed up. Unnecessary
1891 * here to attempt to regain software ownership
1892 * of the MPT entry as that has already been
1893 * done above.
1894 */
1895 if (tavor_mr_deregister(state, &mr,
1896 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) !=
1897 DDI_SUCCESS) {
1898 TAVOR_WARNING(state, "failed to deregister "
1899 "memory region");
1900 }
1901 /* Set "status" and "errormsg" and goto failure */
1902 TAVOR_TNF_FAIL(IBT_PD_HDL_INVALID, "invalid PD handle");
1903 goto mrrereg_fail;
1904 }
1905
1906 /* Use the new PD handle in all operations below */
1907 pd_to_use = pd;
1908
1909 } else {
1910 /* Use the current PD handle in all operations below */
1911 pd_to_use = mr->mr_pdhdl;
1912 }
1913
1914 /*
1915 * If we're changing access permissions, then validate the new ones
1916 */
1917 if (flags & IBT_MR_CHANGE_ACCESS) {
1918 /*
1919 * Validate the access flags. Both remote write and remote
1920 * atomic require the local write flag to be set
1921 */
1922 if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) ||
1923 (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) &&
1924 !(flags & IBT_MR_ENABLE_LOCAL_WRITE)) {
1925 mutex_exit(&mr->mr_lock);
1926 /*
1927 * Call deregister and ensure that all current
1928 * resources get properly freed up. Unnecessary
1929 * here to attempt to regain software ownership
1930 * of the MPT entry as that has already been
1931 * done above.
1932 */
1933 if (tavor_mr_deregister(state, &mr,
1934 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) !=
1935 DDI_SUCCESS) {
1936 TAVOR_WARNING(state, "failed to deregister "
1937 "memory region");
1938 }
1939 /* Set "status" and "errormsg" and goto failure */
1940 TAVOR_TNF_FAIL(IBT_MR_ACCESS_REQ_INVALID,
1941 "invalid access flags");
1942 goto mrrereg_fail;
1943 }
1944
1945 /*
1946 * Setup and validate the memory region access flags. This
1947 * means translating the IBTF's enable flags into the access
1948 * flags that will be used in later operations.
1949 */
1950 acc_flags_to_use = 0;
1951 if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1952 acc_flags_to_use |= IBT_MR_WINDOW_BIND;
1953 if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1954 acc_flags_to_use |= IBT_MR_LOCAL_WRITE;
1955 if (flags & IBT_MR_ENABLE_REMOTE_READ)
1956 acc_flags_to_use |= IBT_MR_REMOTE_READ;
1957 if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1958 acc_flags_to_use |= IBT_MR_REMOTE_WRITE;
1959 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1960 acc_flags_to_use |= IBT_MR_REMOTE_ATOMIC;
1961
1962 } else {
1963 acc_flags_to_use = mr->mr_accflag;
1964 }
1965
1966 /*
1967 * If we're modifying the translation, then figure out whether
1968 * we can reuse the current MTT resources. This means calling
1969 * tavor_mr_rereg_xlat_helper() which does most of the heavy lifting
1970 * for the reregistration. If the current memory region contains
1971 * sufficient MTT entries for the new regions, then it will be
1972 * reused and filled in. Otherwise, new entries will be allocated,
1973 * the old ones will be freed, and the new entries will be filled
1974 * in. Note: If we're not modifying the translation, then we
1975 * should already have all the information we need to update the MPT.
1976 * Also note: If tavor_mr_rereg_xlat_helper() fails, it will return
1977 * a "dereg_level" which is the level of cleanup that needs to be
1978 * passed to tavor_mr_deregister() to finish the cleanup.
1979 */
1980 if (flags & IBT_MR_CHANGE_TRANSLATION) {
1981 status = tavor_mr_rereg_xlat_helper(state, mr, bind, op,
1982 &mtt_addr_to_use, sleep, &dereg_level);
1983 if (status != DDI_SUCCESS) {
1984 mutex_exit(&mr->mr_lock);
1985 /*
1986 * Call deregister and ensure that all resources get
1987 * properly freed up.
1988 */
1989 if (tavor_mr_deregister(state, &mr, dereg_level,
1990 sleep) != DDI_SUCCESS) {
1991 TAVOR_WARNING(state, "failed to deregister "
1992 "memory region");
1993 }
1994
1995 /* Set "status" and "errormsg" and goto failure */
1996 TAVOR_TNF_FAIL(status, "failed rereg helper");
1997 goto mrrereg_fail;
1998 }
1999 vaddr_to_use = mr->mr_bindinfo.bi_addr;
2000 len_to_use = mr->mr_bindinfo.bi_len;
2001 } else {
2002 mtt_addr_to_use = (((uint64_t)mpt_entry.mttseg_addr_h << 32) |
2003 ((uint64_t)mpt_entry.mttseg_addr_l << 6));
2004 vaddr_to_use = mr->mr_bindinfo.bi_addr;
2005 len_to_use = mr->mr_bindinfo.bi_len;
2006 }
2007
2008 /*
2009 * Calculate new keys (Lkey, Rkey) from MPT index. Just like they were
2010 * when the region was first registered, each key is formed from
2011 * "constrained" bits and "unconstrained" bits. Note: If no remote
2012 * access is required, then the RKey value is not filled in. Otherwise
2013 * both Rkey and LKey are given the same value.
2014 */
2015 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey);
2016 if ((acc_flags_to_use & IBT_MR_REMOTE_READ) ||
2017 (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ||
2018 (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC)) {
2019 mr->mr_rkey = mr->mr_lkey;
2020 }
2021
2022 /*
2023 * Update the MPT entry with the new information. Some of this
2024 * information is retained from the previous operation, some of
2025 * it is new based on request.
2026 */
2027 mpt_entry.en_bind = (acc_flags_to_use & IBT_MR_WINDOW_BIND) ? 1 : 0;
2028 mpt_entry.atomic = (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
2029 mpt_entry.rw = (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ? 1 : 0;
2030 mpt_entry.rr = (acc_flags_to_use & IBT_MR_REMOTE_READ) ? 1 : 0;
2031 mpt_entry.lw = (acc_flags_to_use & IBT_MR_LOCAL_WRITE) ? 1 : 0;
2032 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC;
2033 mpt_entry.mem_key = mr->mr_lkey;
2034 mpt_entry.pd = pd_to_use->pd_pdnum;
2035 mpt_entry.start_addr = vaddr_to_use;
2036 mpt_entry.reg_win_len = len_to_use;
2037 mpt_entry.mttseg_addr_h = mtt_addr_to_use >> 32;
2038 mpt_entry.mttseg_addr_l = mtt_addr_to_use >> 6;
2039
2040 /*
2041 * Write the updated MPT entry to hardware
2042 *
2043 * We use TAVOR_CMD_NOSLEEP_SPIN here always because we must protect
2044 * against holding the lock around this rereg call in all contexts.
2045 */
2046 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2047 sizeof (tavor_hw_mpt_t), mpt->tr_indx, TAVOR_CMD_NOSLEEP_SPIN);
2048 if (status != TAVOR_CMD_SUCCESS) {
2049 mutex_exit(&mr->mr_lock);
2050 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n",
2051 status);
2052 /*
2053 * Call deregister and ensure that all current resources get
2054 * properly freed up. Unnecessary here to attempt to regain
2055 * software ownership of the MPT entry as that has already
2056 * been done above.
2057 */
2058 if (tavor_mr_deregister(state, &mr,
2059 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) {
2060 TAVOR_WARNING(state, "failed to deregister memory "
2061 "region");
2062 }
2063 TNF_PROBE_1(tavor_mr_common_rereg_sw2hw_mpt_cmd_fail,
2064 TAVOR_TNF_ERROR, "", tnf_uint, status, status);
2065 TAVOR_TNF_EXIT(tavor_mr_common_rereg);
2066 return (ibc_get_ci_failure(0));
2067 }
2068
2069 /*
2070 * If we're changing PD, then update their reference counts now.
2071 * This means decrementing the reference count on the old PD and
2072 * incrementing the reference count on the new PD.
2073 */
2074 if (flags & IBT_MR_CHANGE_PD) {
2075 tavor_pd_refcnt_dec(mr->mr_pdhdl);
2076 tavor_pd_refcnt_inc(pd);
2077 }
2078
2079 /*
2080 * Update the contents of the Tavor Memory Region handle to reflect
2081 * what has been changed.
2082 */
2083 mr->mr_pdhdl = pd_to_use;
2084 mr->mr_accflag = acc_flags_to_use;
2085 mr->mr_is_umem = 0;
2086 mr->mr_umemcookie = NULL;
2087
2088 /* New MR handle is same as the old */
2089 *mrhdl_new = mr;
2090 mutex_exit(&mr->mr_lock);
2091
2092 TAVOR_TNF_EXIT(tavor_mr_common_rereg);
2093 return (DDI_SUCCESS);
2094
2095 mrrereg_fail:
2096 TNF_PROBE_1(tavor_mr_common_rereg_fail, TAVOR_TNF_ERROR, "",
2097 tnf_string, msg, errormsg);
2098 TAVOR_TNF_EXIT(tavor_mr_common_rereg);
2099 return (status);
2100 }
2101
2102
2103 /*
2104 * tavor_mr_rereg_xlat_helper
2105 * Context: Can be called from interrupt or base context.
2106 * Note: This routine expects the "mr_lock" to be held when it
2107 * is called. Upon returning failure, this routine passes information
2108 * about what "dereg_level" should be passed to tavor_mr_deregister().
2109 */
2110 static int
tavor_mr_rereg_xlat_helper(tavor_state_t * state,tavor_mrhdl_t mr,tavor_bind_info_t * bind,tavor_mr_options_t * op,uint64_t * mtt_addr,uint_t sleep,uint_t * dereg_level)2111 tavor_mr_rereg_xlat_helper(tavor_state_t *state, tavor_mrhdl_t mr,
2112 tavor_bind_info_t *bind, tavor_mr_options_t *op, uint64_t *mtt_addr,
2113 uint_t sleep, uint_t *dereg_level)
2114 {
2115 tavor_rsrc_pool_info_t *rsrc_pool;
2116 tavor_rsrc_t *mtt, *mtt_refcnt;
2117 tavor_sw_refcnt_t *swrc_old, *swrc_new;
2118 ddi_dma_handle_t dmahdl;
2119 uint64_t nummtt_needed, nummtt_in_currrsrc, max_sz;
2120 uint64_t mtt_ddrbaseaddr;
2121 uint_t mtt_pgsize_bits, bind_type, reuse_dmahdl;
2122 int status;
2123 char *errormsg;
2124
2125 TAVOR_TNF_ENTER(tavor_mr_rereg_xlat_helper);
2126
2127 ASSERT(MUTEX_HELD(&mr->mr_lock));
2128
2129 /*
2130 * Check the "options" flag. Currently this flag tells the driver
2131 * whether or not the region should be bound normally (i.e. with
2132 * entries written into the PCI IOMMU) or whether it should be
2133 * registered to bypass the IOMMU.
2134 */
2135 if (op == NULL) {
2136 bind_type = TAVOR_BINDMEM_NORMAL;
2137 } else {
2138 bind_type = op->mro_bind_type;
2139 }
2140
2141 /*
2142 * Check for invalid length. Check is the length is zero or if the
2143 * length is larger than the maximum configured value. Return error
2144 * if it is.
2145 */
2146 max_sz = ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_mrw_sz);
2147 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
2148 /*
2149 * Deregister will be called upon returning failure from this
2150 * routine. This will ensure that all current resources get
2151 * properly freed up. Unnecessary to attempt to regain
2152 * software ownership of the MPT entry as that has already
2153 * been done above (in tavor_mr_reregister())
2154 */
2155 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT;
2156
2157 /* Set "status" and "errormsg" and goto failure */
2158 TAVOR_TNF_FAIL(IBT_MR_LEN_INVALID, "invalid length");
2159 goto mrrereghelp_fail;
2160 }
2161
2162 /*
2163 * Determine the number of pages necessary for new region and the
2164 * number of pages supported by the current MTT resources
2165 */
2166 nummtt_needed = tavor_mr_nummtt_needed(state, bind, &mtt_pgsize_bits);
2167 nummtt_in_currrsrc = mr->mr_mttrsrcp->tr_len >> TAVOR_MTT_SIZE_SHIFT;
2168
2169 /*
2170 * Depending on whether we have enough pages or not, the next step is
2171 * to fill in a set of MTT entries that reflect the new mapping. In
2172 * the first case below, we already have enough entries. This means
2173 * we need to unbind the memory from the previous mapping, bind the
2174 * memory for the new mapping, write the new MTT entries, and update
2175 * the mr to reflect the changes.
2176 * In the second case below, we do not have enough entries in the
2177 * current mapping. So, in this case, we need not only to unbind the
2178 * current mapping, but we need to free up the MTT resources associated
2179 * with that mapping. After we've successfully done that, we continue
2180 * by binding the new memory, allocating new MTT entries, writing the
2181 * new MTT entries, and updating the mr to reflect the changes.
2182 */
2183
2184 /*
2185 * If this region is being shared (i.e. MTT refcount != 1), then we
2186 * can't reuse the current MTT resources regardless of their size.
2187 * Instead we'll need to alloc new ones (below) just as if there
2188 * hadn't been enough room in the current entries.
2189 */
2190 swrc_old = (tavor_sw_refcnt_t *)mr->mr_mttrefcntp->tr_addr;
2191 if (TAVOR_MTT_IS_NOT_SHARED(swrc_old) &&
2192 (nummtt_needed <= nummtt_in_currrsrc)) {
2193
2194 /*
2195 * Unbind the old mapping for this memory region, but retain
2196 * the ddi_dma_handle_t (if possible) for reuse in the bind
2197 * operation below. Note: If original memory region was
2198 * bound for IOMMU bypass and the new region can not use
2199 * bypass, then a new DMA handle will be necessary.
2200 */
2201 if (TAVOR_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
2202 mr->mr_bindinfo.bi_free_dmahdl = 0;
2203 tavor_mr_mem_unbind(state, &mr->mr_bindinfo);
2204 dmahdl = mr->mr_bindinfo.bi_dmahdl;
2205 reuse_dmahdl = 1;
2206 } else {
2207 tavor_mr_mem_unbind(state, &mr->mr_bindinfo);
2208 dmahdl = NULL;
2209 reuse_dmahdl = 0;
2210 }
2211
2212 /*
2213 * Bind the new memory and determine the mapped addresses.
2214 * As described, this routine and tavor_mr_fast_mtt_write()
2215 * do the majority of the work for the memory registration
2216 * operations. Note: When we successfully finish the binding,
2217 * we will set the "bi_free_dmahdl" flag to indicate that
2218 * even though we may have reused the ddi_dma_handle_t we do
2219 * wish it to be freed up at some later time. Note also that
2220 * if we fail, we may need to cleanup the ddi_dma_handle_t.
2221 */
2222 bind->bi_bypass = bind_type;
2223 status = tavor_mr_mem_bind(state, bind, dmahdl, sleep);
2224 if (status != DDI_SUCCESS) {
2225 if (reuse_dmahdl) {
2226 ddi_dma_free_handle(&dmahdl);
2227 }
2228
2229 /*
2230 * Deregister will be called upon returning failure
2231 * from this routine. This will ensure that all
2232 * current resources get properly freed up.
2233 * Unnecessary to attempt to regain software ownership
2234 * of the MPT entry as that has already been done
2235 * above (in tavor_mr_reregister()). Also unnecessary
2236 * to attempt to unbind the memory.
2237 */
2238 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2239
2240 /* Set "status" and "errormsg" and goto failure */
2241 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed mem bind");
2242 goto mrrereghelp_fail;
2243 }
2244 if (reuse_dmahdl) {
2245 bind->bi_free_dmahdl = 1;
2246 }
2247
2248 /*
2249 * Using the new mapping, but reusing the current MTT
2250 * resources, write the updated entries to MTT
2251 */
2252 mtt = mr->mr_mttrsrcp;
2253 status = tavor_mr_fast_mtt_write(mtt, bind, mtt_pgsize_bits);
2254 if (status != DDI_SUCCESS) {
2255 /*
2256 * Deregister will be called upon returning failure
2257 * from this routine. This will ensure that all
2258 * current resources get properly freed up.
2259 * Unnecessary to attempt to regain software ownership
2260 * of the MPT entry as that has already been done
2261 * above (in tavor_mr_reregister()). Also unnecessary
2262 * to attempt to unbind the memory.
2263 *
2264 * But we do need to unbind the newly bound memory
2265 * before returning.
2266 */
2267 tavor_mr_mem_unbind(state, bind);
2268 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2269
2270 /* Set "status" and "errormsg" and goto failure */
2271 TAVOR_TNF_FAIL(ibc_get_ci_failure(0),
2272 "failed write mtt");
2273 goto mrrereghelp_fail;
2274 }
2275
2276 /* Put the updated information into the Mem Region handle */
2277 mr->mr_bindinfo = *bind;
2278 mr->mr_logmttpgsz = mtt_pgsize_bits;
2279
2280 } else {
2281 /*
2282 * Check if the memory region MTT is shared by any other MRs.
2283 * Since the resource may be shared between multiple memory
2284 * regions (as a result of a "RegisterSharedMR()" verb) it is
2285 * important that we not unbind any resources prematurely.
2286 */
2287 if (!TAVOR_MTT_IS_SHARED(swrc_old)) {
2288 /*
2289 * Unbind the old mapping for this memory region, but
2290 * retain the ddi_dma_handle_t for reuse in the bind
2291 * operation below. Note: This can only be done here
2292 * because the region being reregistered is not
2293 * currently shared. Also if original memory region
2294 * was bound for IOMMU bypass and the new region can
2295 * not use bypass, then a new DMA handle will be
2296 * necessary.
2297 */
2298 if (TAVOR_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
2299 mr->mr_bindinfo.bi_free_dmahdl = 0;
2300 tavor_mr_mem_unbind(state, &mr->mr_bindinfo);
2301 dmahdl = mr->mr_bindinfo.bi_dmahdl;
2302 reuse_dmahdl = 1;
2303 } else {
2304 tavor_mr_mem_unbind(state, &mr->mr_bindinfo);
2305 dmahdl = NULL;
2306 reuse_dmahdl = 0;
2307 }
2308 } else {
2309 dmahdl = NULL;
2310 reuse_dmahdl = 0;
2311 }
2312
2313 /*
2314 * Bind the new memory and determine the mapped addresses.
2315 * As described, this routine and tavor_mr_fast_mtt_write()
2316 * do the majority of the work for the memory registration
2317 * operations. Note: When we successfully finish the binding,
2318 * we will set the "bi_free_dmahdl" flag to indicate that
2319 * even though we may have reused the ddi_dma_handle_t we do
2320 * wish it to be freed up at some later time. Note also that
2321 * if we fail, we may need to cleanup the ddi_dma_handle_t.
2322 */
2323 bind->bi_bypass = bind_type;
2324 status = tavor_mr_mem_bind(state, bind, dmahdl, sleep);
2325 if (status != DDI_SUCCESS) {
2326 if (reuse_dmahdl) {
2327 ddi_dma_free_handle(&dmahdl);
2328 }
2329
2330 /*
2331 * Deregister will be called upon returning failure
2332 * from this routine. This will ensure that all
2333 * current resources get properly freed up.
2334 * Unnecessary to attempt to regain software ownership
2335 * of the MPT entry as that has already been done
2336 * above (in tavor_mr_reregister()). Also unnecessary
2337 * to attempt to unbind the memory.
2338 */
2339 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2340
2341 /* Set "status" and "errormsg" and goto failure */
2342 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed mem bind");
2343 goto mrrereghelp_fail;
2344 }
2345 if (reuse_dmahdl) {
2346 bind->bi_free_dmahdl = 1;
2347 }
2348
2349 /*
2350 * Allocate the new MTT entries resource
2351 */
2352 status = tavor_rsrc_alloc(state, TAVOR_MTT,
2353 TAVOR_NUMMTT_TO_MTTSEG(nummtt_needed), sleep, &mtt);
2354 if (status != DDI_SUCCESS) {
2355 /*
2356 * Deregister will be called upon returning failure
2357 * from this routine. This will ensure that all
2358 * current resources get properly freed up.
2359 * Unnecessary to attempt to regain software ownership
2360 * of the MPT entry as that has already been done
2361 * above (in tavor_mr_reregister()). Also unnecessary
2362 * to attempt to unbind the memory.
2363 *
2364 * But we do need to unbind the newly bound memory
2365 * before returning.
2366 */
2367 tavor_mr_mem_unbind(state, bind);
2368 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2369
2370 /* Set "status" and "errormsg" and goto failure */
2371 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MTT");
2372 goto mrrereghelp_fail;
2373 }
2374
2375 /*
2376 * Allocate MTT reference count (to track shared memory
2377 * regions). As mentioned elsewhere above, this reference
2378 * count resource may never be used on the given memory region,
2379 * but if it is ever later registered as a "shared" memory
2380 * region then this resource will be necessary. Note: This
2381 * is only necessary here if the existing memory region is
2382 * already being shared (because otherwise we already have
2383 * a useable reference count resource).
2384 */
2385 if (TAVOR_MTT_IS_SHARED(swrc_old)) {
2386 status = tavor_rsrc_alloc(state, TAVOR_REFCNT, 1,
2387 sleep, &mtt_refcnt);
2388 if (status != DDI_SUCCESS) {
2389 /*
2390 * Deregister will be called upon returning
2391 * failure from this routine. This will ensure
2392 * that all current resources get properly
2393 * freed up. Unnecessary to attempt to regain
2394 * software ownership of the MPT entry as that
2395 * has already been done above (in
2396 * tavor_mr_reregister()). Also unnecessary
2397 * to attempt to unbind the memory.
2398 *
2399 * But we need to unbind the newly bound
2400 * memory and free up the newly allocated MTT
2401 * entries before returning.
2402 */
2403 tavor_mr_mem_unbind(state, bind);
2404 tavor_rsrc_free(state, &mtt);
2405 *dereg_level =
2406 TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2407
2408 /* Set "status"/"errormsg", goto failure */
2409 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE,
2410 "failed reference count");
2411 goto mrrereghelp_fail;
2412 }
2413 swrc_new = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr;
2414 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_new))
2415 TAVOR_MTT_REFCNT_INIT(swrc_new);
2416 } else {
2417 mtt_refcnt = mr->mr_mttrefcntp;
2418 }
2419
2420 /*
2421 * Using the new mapping and the new MTT resources, write the
2422 * updated entries to MTT
2423 */
2424 status = tavor_mr_fast_mtt_write(mtt, bind, mtt_pgsize_bits);
2425 if (status != DDI_SUCCESS) {
2426 /*
2427 * Deregister will be called upon returning failure
2428 * from this routine. This will ensure that all
2429 * current resources get properly freed up.
2430 * Unnecessary to attempt to regain software ownership
2431 * of the MPT entry as that has already been done
2432 * above (in tavor_mr_reregister()). Also unnecessary
2433 * to attempt to unbind the memory.
2434 *
2435 * But we need to unbind the newly bound memory,
2436 * free up the newly allocated MTT entries, and
2437 * (possibly) free the new MTT reference count
2438 * resource before returning.
2439 */
2440 if (TAVOR_MTT_IS_SHARED(swrc_old)) {
2441 tavor_rsrc_free(state, &mtt_refcnt);
2442 }
2443 tavor_mr_mem_unbind(state, bind);
2444 tavor_rsrc_free(state, &mtt);
2445 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2446
2447 /* Set "status" and "errormsg" and goto failure */
2448 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed write mtt");
2449 goto mrrereghelp_fail;
2450 }
2451
2452 /*
2453 * Check if the memory region MTT is shared by any other MRs.
2454 * Since the resource may be shared between multiple memory
2455 * regions (as a result of a "RegisterSharedMR()" verb) it is
2456 * important that we not free up any resources prematurely.
2457 */
2458 if (TAVOR_MTT_IS_SHARED(swrc_old)) {
2459 /* Decrement MTT reference count for "old" region */
2460 (void) tavor_mtt_refcnt_dec(mr->mr_mttrefcntp);
2461 } else {
2462 /* Free up the old MTT entries resource */
2463 tavor_rsrc_free(state, &mr->mr_mttrsrcp);
2464 }
2465
2466 /* Put the updated information into the mrhdl */
2467 mr->mr_bindinfo = *bind;
2468 mr->mr_logmttpgsz = mtt_pgsize_bits;
2469 mr->mr_mttrsrcp = mtt;
2470 mr->mr_mttrefcntp = mtt_refcnt;
2471 }
2472
2473 /*
2474 * Calculate and return the updated MTT address (in the DDR address
2475 * space). This will be used by the caller (tavor_mr_reregister) in
2476 * the updated MPT entry
2477 */
2478 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT];
2479 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset;
2480 *mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx <<
2481 TAVOR_MTT_SIZE_SHIFT);
2482
2483 TAVOR_TNF_EXIT(tavor_mr_rereg_xlat_helper);
2484 return (DDI_SUCCESS);
2485
2486 mrrereghelp_fail:
2487 TNF_PROBE_1(tavor_mr_rereg_xlat_helper_fail, TAVOR_TNF_ERROR, "",
2488 tnf_string, msg, errormsg);
2489 TAVOR_TNF_EXIT(tavor_mr_rereg_xlat_helper);
2490 return (status);
2491 }
2492
2493
2494 /*
2495 * tavor_mr_nummtt_needed()
2496 * Context: Can be called from interrupt or base context.
2497 */
2498 /* ARGSUSED */
2499 static uint64_t
tavor_mr_nummtt_needed(tavor_state_t * state,tavor_bind_info_t * bind,uint_t * mtt_pgsize_bits)2500 tavor_mr_nummtt_needed(tavor_state_t *state, tavor_bind_info_t *bind,
2501 uint_t *mtt_pgsize_bits)
2502 {
2503 uint64_t pg_offset_mask;
2504 uint64_t pg_offset, tmp_length;
2505
2506 /*
2507 * For now we specify the page size as 8Kb (the default page size for
2508 * the sun4u architecture), or 4Kb for x86. Figure out optimal page
2509 * size by examining the dmacookies XXX
2510 */
2511 *mtt_pgsize_bits = PAGESHIFT;
2512
2513 pg_offset_mask = ((uint64_t)1 << *mtt_pgsize_bits) - 1;
2514 pg_offset = bind->bi_addr & pg_offset_mask;
2515 tmp_length = pg_offset + (bind->bi_len - 1);
2516 return ((tmp_length >> *mtt_pgsize_bits) + 1);
2517 }
2518
2519
2520 /*
2521 * tavor_mr_mem_bind()
2522 * Context: Can be called from interrupt or base context.
2523 */
2524 static int
tavor_mr_mem_bind(tavor_state_t * state,tavor_bind_info_t * bind,ddi_dma_handle_t dmahdl,uint_t sleep)2525 tavor_mr_mem_bind(tavor_state_t *state, tavor_bind_info_t *bind,
2526 ddi_dma_handle_t dmahdl, uint_t sleep)
2527 {
2528 ddi_dma_attr_t dma_attr;
2529 int (*callback)(caddr_t);
2530 uint_t dma_xfer_mode;
2531 int status;
2532
2533 /* bi_type must be set to a meaningful value to get a bind handle */
2534 ASSERT(bind->bi_type == TAVOR_BINDHDL_VADDR ||
2535 bind->bi_type == TAVOR_BINDHDL_BUF ||
2536 bind->bi_type == TAVOR_BINDHDL_UBUF);
2537
2538 TAVOR_TNF_ENTER(tavor_mr_mem_bind);
2539
2540 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
2541
2542 /* Set the callback flag appropriately */
2543 callback = (sleep == TAVOR_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT;
2544
2545 /* Determine whether to map STREAMING or CONSISTENT */
2546 dma_xfer_mode = (bind->bi_flags & IBT_MR_NONCOHERENT) ?
2547 DDI_DMA_STREAMING : DDI_DMA_CONSISTENT;
2548
2549 /*
2550 * Initialize many of the default DMA attributes. Then, if we're
2551 * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag.
2552 */
2553 if (dmahdl == NULL) {
2554 tavor_dma_attr_init(&dma_attr);
2555 #ifdef __sparc
2556 /*
2557 * First, disable streaming and switch to consistent if
2558 * configured to do so and IOMMU BYPASS is enabled.
2559 */
2560 if (state->ts_cfg_profile->cp_disable_streaming_on_bypass &&
2561 dma_xfer_mode == DDI_DMA_STREAMING &&
2562 bind->bi_bypass == TAVOR_BINDMEM_BYPASS) {
2563 dma_xfer_mode = DDI_DMA_CONSISTENT;
2564 }
2565
2566 /*
2567 * Then, if streaming is still specified, then "bypass" is not
2568 * allowed.
2569 */
2570 if ((dma_xfer_mode == DDI_DMA_CONSISTENT) &&
2571 (bind->bi_bypass == TAVOR_BINDMEM_BYPASS)) {
2572 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
2573 }
2574 #endif
2575 /* Allocate a DMA handle for the binding */
2576 status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr,
2577 callback, NULL, &bind->bi_dmahdl);
2578 if (status != DDI_SUCCESS) {
2579 TNF_PROBE_0(tavor_mr_mem_bind_dmahdl_fail,
2580 TAVOR_TNF_ERROR, "");
2581 TAVOR_TNF_EXIT(tavor_mr_mem_bind);
2582 return (status);
2583 }
2584 bind->bi_free_dmahdl = 1;
2585
2586 } else {
2587 bind->bi_dmahdl = dmahdl;
2588 bind->bi_free_dmahdl = 0;
2589 }
2590
2591 /*
2592 * Bind the memory to get the PCI mapped addresses. The decision
2593 * to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle()
2594 * is determined by the "bi_type" flag. Note: if the bind operation
2595 * fails then we have to free up the DMA handle and return error.
2596 */
2597 if (bind->bi_type == TAVOR_BINDHDL_VADDR) {
2598 status = ddi_dma_addr_bind_handle(bind->bi_dmahdl, NULL,
2599 (caddr_t)(uintptr_t)bind->bi_addr, bind->bi_len,
2600 (DDI_DMA_RDWR | dma_xfer_mode), callback, NULL,
2601 &bind->bi_dmacookie, &bind->bi_cookiecnt);
2602 } else { /* TAVOR_BINDHDL_BUF || TAVOR_BINDHDL_UBUF */
2603 status = ddi_dma_buf_bind_handle(bind->bi_dmahdl,
2604 bind->bi_buf, (DDI_DMA_RDWR | dma_xfer_mode), callback,
2605 NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt);
2606 }
2607
2608 if (status != DDI_DMA_MAPPED) {
2609 if (bind->bi_free_dmahdl != 0) {
2610 ddi_dma_free_handle(&bind->bi_dmahdl);
2611 }
2612 TNF_PROBE_0(tavor_mr_mem_bind_dmabind_fail, TAVOR_TNF_ERROR,
2613 "");
2614 TAVOR_TNF_EXIT(tavor_mr_mem_bind);
2615 return (status);
2616 }
2617
2618 TAVOR_TNF_EXIT(tavor_mr_mem_bind);
2619 return (DDI_SUCCESS);
2620 }
2621
2622
2623 /*
2624 * tavor_mr_mem_unbind()
2625 * Context: Can be called from interrupt or base context.
2626 */
2627 static void
tavor_mr_mem_unbind(tavor_state_t * state,tavor_bind_info_t * bind)2628 tavor_mr_mem_unbind(tavor_state_t *state, tavor_bind_info_t *bind)
2629 {
2630 int status;
2631
2632 TAVOR_TNF_ENTER(tavor_mr_mem_unbind);
2633
2634 /*
2635 * In case of TAVOR_BINDHDL_UBUF, the memory bi_buf points to
2636 * is actually allocated by ddi_umem_iosetup() internally, then
2637 * it's required to free it here. Reset bi_type to TAVOR_BINDHDL_NONE
2638 * not to free it again later.
2639 */
2640 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
2641 if (bind->bi_type == TAVOR_BINDHDL_UBUF) {
2642 freerbuf(bind->bi_buf);
2643 bind->bi_type = TAVOR_BINDHDL_NONE;
2644 }
2645 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
2646
2647 /*
2648 * Unbind the DMA memory for the region
2649 *
2650 * Note: The only way ddi_dma_unbind_handle() currently
2651 * can return an error is if the handle passed in is invalid.
2652 * Since this should never happen, we choose to return void
2653 * from this function! If this does return an error, however,
2654 * then we print a warning message to the console.
2655 */
2656 status = ddi_dma_unbind_handle(bind->bi_dmahdl);
2657 if (status != DDI_SUCCESS) {
2658 TAVOR_WARNING(state, "failed to unbind DMA mapping");
2659 TNF_PROBE_0(tavor_mr_mem_unbind_dmaunbind_fail,
2660 TAVOR_TNF_ERROR, "");
2661 TAVOR_TNF_EXIT(tavor_mr_mem_unbind);
2662 return;
2663 }
2664
2665 /* Free up the DMA handle */
2666 if (bind->bi_free_dmahdl != 0) {
2667 ddi_dma_free_handle(&bind->bi_dmahdl);
2668 }
2669
2670 TAVOR_TNF_EXIT(tavor_mr_mem_unbind);
2671 }
2672
2673
2674 /*
2675 * tavor_mr_fast_mtt_write()
2676 * Context: Can be called from interrupt or base context.
2677 */
2678 static int
tavor_mr_fast_mtt_write(tavor_rsrc_t * mtt,tavor_bind_info_t * bind,uint32_t mtt_pgsize_bits)2679 tavor_mr_fast_mtt_write(tavor_rsrc_t *mtt, tavor_bind_info_t *bind,
2680 uint32_t mtt_pgsize_bits)
2681 {
2682 ddi_dma_cookie_t dmacookie;
2683 uint_t cookie_cnt;
2684 uint64_t *mtt_table;
2685 uint64_t mtt_entry;
2686 uint64_t addr, endaddr;
2687 uint64_t pagesize;
2688 int i;
2689
2690 TAVOR_TNF_ENTER(tavor_mr_fast_mtt_write);
2691
2692 /* Calculate page size from the suggested value passed in */
2693 pagesize = ((uint64_t)1 << mtt_pgsize_bits);
2694
2695 /*
2696 * Walk the "cookie list" and fill in the MTT table entries
2697 */
2698 i = 0;
2699 mtt_table = (uint64_t *)mtt->tr_addr;
2700 dmacookie = bind->bi_dmacookie;
2701 cookie_cnt = bind->bi_cookiecnt;
2702 while (cookie_cnt-- > 0) {
2703 addr = dmacookie.dmac_laddress;
2704 endaddr = addr + (dmacookie.dmac_size - 1);
2705 addr = addr & ~((uint64_t)pagesize - 1);
2706 while (addr <= endaddr) {
2707 /*
2708 * Fill in the mapped addresses (calculated above) and
2709 * set TAVOR_MTT_ENTRY_PRESET flag for each MTT entry.
2710 */
2711 mtt_entry = addr | TAVOR_MTT_ENTRY_PRESET;
2712 ddi_put64(mtt->tr_acchdl, &mtt_table[i], mtt_entry);
2713 addr += pagesize;
2714 i++;
2715
2716 if (addr == 0) {
2717 static int do_once = 1;
2718 _NOTE(SCHEME_PROTECTS_DATA("safe sharing",
2719 do_once))
2720 if (do_once) {
2721 do_once = 0;
2722 cmn_err(CE_NOTE, "probable error in "
2723 "dma_cookie address from caller\n");
2724 }
2725 break;
2726 }
2727 }
2728
2729 /*
2730 * When we've reached the end of the current DMA cookie,
2731 * jump to the next cookie (if there are more)
2732 */
2733 if (cookie_cnt != 0) {
2734 ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie);
2735 }
2736 }
2737
2738 TAVOR_TNF_EXIT(tavor_mr_fast_mtt_write);
2739 return (DDI_SUCCESS);
2740 }
2741
2742 /*
2743 * tavor_mtt_refcnt_inc()
2744 * Context: Can be called from interrupt or base context.
2745 */
2746 static int
tavor_mtt_refcnt_inc(tavor_rsrc_t * rsrc)2747 tavor_mtt_refcnt_inc(tavor_rsrc_t *rsrc)
2748 {
2749 tavor_sw_refcnt_t *rc;
2750 uint32_t cnt;
2751
2752 rc = (tavor_sw_refcnt_t *)rsrc->tr_addr;
2753
2754 /* Increment the MTT's reference count */
2755 mutex_enter(&rc->swrc_lock);
2756 TNF_PROBE_1_DEBUG(tavor_mtt_refcnt_inc, TAVOR_TNF_TRACE, "",
2757 tnf_uint, refcnt, rc->swrc_refcnt);
2758 cnt = rc->swrc_refcnt++;
2759 mutex_exit(&rc->swrc_lock);
2760
2761 return (cnt);
2762 }
2763
2764
2765 /*
2766 * tavor_mtt_refcnt_dec()
2767 * Context: Can be called from interrupt or base context.
2768 */
2769 static int
tavor_mtt_refcnt_dec(tavor_rsrc_t * rsrc)2770 tavor_mtt_refcnt_dec(tavor_rsrc_t *rsrc)
2771 {
2772 tavor_sw_refcnt_t *rc;
2773 uint32_t cnt;
2774
2775 rc = (tavor_sw_refcnt_t *)rsrc->tr_addr;
2776
2777 /* Decrement the MTT's reference count */
2778 mutex_enter(&rc->swrc_lock);
2779 cnt = --rc->swrc_refcnt;
2780 TNF_PROBE_1_DEBUG(tavor_mtt_refcnt_dec, TAVOR_TNF_TRACE, "",
2781 tnf_uint, refcnt, rc->swrc_refcnt);
2782 mutex_exit(&rc->swrc_lock);
2783
2784 return (cnt);
2785 }
2786