1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * hermon_mr.c
28 * Hermon Memory Region/Window Routines
29 *
30 * Implements all the routines necessary to provide the requisite memory
31 * registration verbs. These include operations like RegisterMemRegion(),
32 * DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion,
33 * etc., that affect Memory Regions. It also includes the verbs that
34 * affect Memory Windows, including AllocMemWindow(), FreeMemWindow(),
35 * and QueryMemWindow().
36 */
37
38 #include <sys/types.h>
39 #include <sys/conf.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/modctl.h>
43 #include <sys/esunddi.h>
44
45 #include <sys/ib/adapters/hermon/hermon.h>
46
47 extern uint32_t hermon_kernel_data_ro;
48 extern uint32_t hermon_user_data_ro;
49 extern int hermon_rdma_debug;
50
51 /*
52 * Used by hermon_mr_keycalc() below to fill in the "unconstrained" portion
53 * of Hermon memory keys (LKeys and RKeys)
54 */
55 static uint_t hermon_memkey_cnt = 0x00;
56 #define HERMON_MEMKEY_SHIFT 24
57
58 /* initial state of an MPT */
59 #define HERMON_MPT_SW_OWNERSHIP 0xF /* memory regions */
60 #define HERMON_MPT_FREE 0x3 /* allocate lkey */
61
62 static int hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd,
63 hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
64 hermon_mpt_rsrc_type_t mpt_type);
65 static int hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr,
66 hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new,
67 hermon_mr_options_t *op);
68 static int hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr,
69 hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr,
70 uint_t sleep, uint_t *dereg_level);
71 static uint64_t hermon_mr_nummtt_needed(hermon_state_t *state,
72 hermon_bind_info_t *bind, uint_t *mtt_pgsize);
73 static int hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind,
74 ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer);
75 static void hermon_mr_mem_unbind(hermon_state_t *state,
76 hermon_bind_info_t *bind);
77 static int hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt,
78 hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits);
79 static int hermon_mr_fast_mtt_write_fmr(hermon_state_t *state,
80 hermon_rsrc_t *mtt, ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits);
81 static uint_t hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc);
82 static uint_t hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc);
83
84
85 /*
86 * The Hermon umem_lockmemory() callback ops. When userland memory is
87 * registered, these callback ops are specified. The hermon_umap_umemlock_cb()
88 * callback will be called whenever the memory for the corresponding
89 * ddi_umem_cookie_t is being freed.
90 */
91 static struct umem_callback_ops hermon_umem_cbops = {
92 UMEM_CALLBACK_VERSION,
93 hermon_umap_umemlock_cb,
94 };
95
96
97
98 /*
99 * hermon_mr_register()
100 * Context: Can be called from interrupt or base context.
101 */
102 int
hermon_mr_register(hermon_state_t * state,hermon_pdhdl_t pd,ibt_mr_attr_t * mr_attr,hermon_mrhdl_t * mrhdl,hermon_mr_options_t * op,hermon_mpt_rsrc_type_t mpt_type)103 hermon_mr_register(hermon_state_t *state, hermon_pdhdl_t pd,
104 ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
105 hermon_mpt_rsrc_type_t mpt_type)
106 {
107 hermon_bind_info_t bind;
108 int status;
109
110 /*
111 * Fill in the "bind" struct. This struct provides the majority
112 * of the information that will be used to distinguish between an
113 * "addr" binding (as is the case here) and a "buf" binding (see
114 * below). The "bind" struct is later passed to hermon_mr_mem_bind()
115 * which does most of the "heavy lifting" for the Hermon memory
116 * registration routines.
117 */
118 bind.bi_type = HERMON_BINDHDL_VADDR;
119 bind.bi_addr = mr_attr->mr_vaddr;
120 bind.bi_len = mr_attr->mr_len;
121 bind.bi_as = mr_attr->mr_as;
122 bind.bi_flags = mr_attr->mr_flags;
123 status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op,
124 mpt_type);
125 return (status);
126 }
127
128
129 /*
130 * hermon_mr_register_buf()
131 * Context: Can be called from interrupt or base context.
132 */
133 int
hermon_mr_register_buf(hermon_state_t * state,hermon_pdhdl_t pd,ibt_smr_attr_t * mr_attr,struct buf * buf,hermon_mrhdl_t * mrhdl,hermon_mr_options_t * op,hermon_mpt_rsrc_type_t mpt_type)134 hermon_mr_register_buf(hermon_state_t *state, hermon_pdhdl_t pd,
135 ibt_smr_attr_t *mr_attr, struct buf *buf, hermon_mrhdl_t *mrhdl,
136 hermon_mr_options_t *op, hermon_mpt_rsrc_type_t mpt_type)
137 {
138 hermon_bind_info_t bind;
139 int status;
140
141 /*
142 * Fill in the "bind" struct. This struct provides the majority
143 * of the information that will be used to distinguish between an
144 * "addr" binding (see above) and a "buf" binding (as is the case
145 * here). The "bind" struct is later passed to hermon_mr_mem_bind()
146 * which does most of the "heavy lifting" for the Hermon memory
147 * registration routines. Note: We have chosen to provide
148 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
149 * not set). It is not critical what value we choose here as it need
150 * only be unique for the given RKey (which will happen by default),
151 * so the choice here is somewhat arbitrary.
152 */
153 bind.bi_type = HERMON_BINDHDL_BUF;
154 bind.bi_buf = buf;
155 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
156 bind.bi_addr = mr_attr->mr_vaddr;
157 } else {
158 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr;
159 }
160 bind.bi_as = NULL;
161 bind.bi_len = (uint64_t)buf->b_bcount;
162 bind.bi_flags = mr_attr->mr_flags;
163 status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op, mpt_type);
164 return (status);
165 }
166
167
168 /*
169 * hermon_mr_register_shared()
170 * Context: Can be called from interrupt or base context.
171 */
172 int
hermon_mr_register_shared(hermon_state_t * state,hermon_mrhdl_t mrhdl,hermon_pdhdl_t pd,ibt_smr_attr_t * mr_attr,hermon_mrhdl_t * mrhdl_new)173 hermon_mr_register_shared(hermon_state_t *state, hermon_mrhdl_t mrhdl,
174 hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new)
175 {
176 hermon_rsrc_t *mpt, *mtt, *rsrc;
177 hermon_umap_db_entry_t *umapdb;
178 hermon_hw_dmpt_t mpt_entry;
179 hermon_mrhdl_t mr;
180 hermon_bind_info_t *bind;
181 ddi_umem_cookie_t umem_cookie;
182 size_t umem_len;
183 caddr_t umem_addr;
184 uint64_t mtt_addr, pgsize_msk;
185 uint_t sleep, mr_is_umem;
186 int status, umem_flags;
187
188 /*
189 * Check the sleep flag. Ensure that it is consistent with the
190 * current thread context (i.e. if we are currently in the interrupt
191 * context, then we shouldn't be attempting to sleep).
192 */
193 sleep = (mr_attr->mr_flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP :
194 HERMON_SLEEP;
195 if ((sleep == HERMON_SLEEP) &&
196 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
197 status = IBT_INVALID_PARAM;
198 goto mrshared_fail;
199 }
200
201 /* Increment the reference count on the protection domain (PD) */
202 hermon_pd_refcnt_inc(pd);
203
204 /*
205 * Allocate an MPT entry. This will be filled in with all the
206 * necessary parameters to define the shared memory region.
207 * Specifically, it will be made to reference the currently existing
208 * MTT entries and ownership of the MPT will be passed to the hardware
209 * in the last step below. If we fail here, we must undo the
210 * protection domain reference count.
211 */
212 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
213 if (status != DDI_SUCCESS) {
214 status = IBT_INSUFF_RESOURCE;
215 goto mrshared_fail1;
216 }
217
218 /*
219 * Allocate the software structure for tracking the shared memory
220 * region (i.e. the Hermon Memory Region handle). If we fail here, we
221 * must undo the protection domain reference count and the previous
222 * resource allocation.
223 */
224 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
225 if (status != DDI_SUCCESS) {
226 status = IBT_INSUFF_RESOURCE;
227 goto mrshared_fail2;
228 }
229 mr = (hermon_mrhdl_t)rsrc->hr_addr;
230 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
231
232 /*
233 * Setup and validate the memory region access flags. This means
234 * translating the IBTF's enable flags into the access flags that
235 * will be used in later operations.
236 */
237 mr->mr_accflag = 0;
238 if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND)
239 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
240 if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
241 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
242 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ)
243 mr->mr_accflag |= IBT_MR_REMOTE_READ;
244 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
245 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
246 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
247 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
248
249 /*
250 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed
251 * from a certain number of "constrained" bits (the least significant
252 * bits) and some number of "unconstrained" bits. The constrained
253 * bits must be set to the index of the entry in the MPT table, but
254 * the unconstrained bits can be set to any value we wish. Note:
255 * if no remote access is required, then the RKey value is not filled
256 * in. Otherwise both Rkey and LKey are given the same value.
257 */
258 mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
259
260 /* Grab the MR lock for the current memory region */
261 mutex_enter(&mrhdl->mr_lock);
262
263 /*
264 * Check here to see if the memory region has already been partially
265 * deregistered as a result of a hermon_umap_umemlock_cb() callback.
266 * If so, this is an error, return failure.
267 */
268 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
269 mutex_exit(&mrhdl->mr_lock);
270 status = IBT_MR_HDL_INVALID;
271 goto mrshared_fail3;
272 }
273
274 /*
275 * Determine if the original memory was from userland and, if so, pin
276 * the pages (again) with umem_lockmemory(). This will guarantee a
277 * separate callback for each of this shared region's MR handles.
278 * If this is userland memory, then allocate an entry in the
279 * "userland resources database". This will later be added to
280 * the database (after all further memory registration operations are
281 * successful). If we fail here, we must undo all the above setup.
282 */
283 mr_is_umem = mrhdl->mr_is_umem;
284 if (mr_is_umem) {
285 umem_len = ptob(btopr(mrhdl->mr_bindinfo.bi_len));
286 umem_addr = (caddr_t)((uintptr_t)mrhdl->mr_bindinfo.bi_addr &
287 ~PAGEOFFSET);
288 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
289 DDI_UMEMLOCK_LONGTERM);
290 status = umem_lockmemory(umem_addr, umem_len, umem_flags,
291 &umem_cookie, &hermon_umem_cbops, NULL);
292 if (status != 0) {
293 mutex_exit(&mrhdl->mr_lock);
294 status = IBT_INSUFF_RESOURCE;
295 goto mrshared_fail3;
296 }
297
298 umapdb = hermon_umap_db_alloc(state->hs_instance,
299 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
300 (uint64_t)(uintptr_t)rsrc);
301 if (umapdb == NULL) {
302 mutex_exit(&mrhdl->mr_lock);
303 status = IBT_INSUFF_RESOURCE;
304 goto mrshared_fail4;
305 }
306 }
307
308 /*
309 * Copy the MTT resource pointer (and additional parameters) from
310 * the original Hermon Memory Region handle. Note: this is normally
311 * where the hermon_mr_mem_bind() routine would be called, but because
312 * we already have bound and filled-in MTT entries it is simply a
313 * matter here of managing the MTT reference count and grabbing the
314 * address of the MTT table entries (for filling in the shared region's
315 * MPT entry).
316 */
317 mr->mr_mttrsrcp = mrhdl->mr_mttrsrcp;
318 mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz;
319 mr->mr_bindinfo = mrhdl->mr_bindinfo;
320 mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp;
321 mutex_exit(&mrhdl->mr_lock);
322 bind = &mr->mr_bindinfo;
323 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
324 mtt = mr->mr_mttrsrcp;
325
326 /*
327 * Increment the MTT reference count (to reflect the fact that
328 * the MTT is now shared)
329 */
330 (void) hermon_mtt_refcnt_inc(mr->mr_mttrefcntp);
331
332 /*
333 * Update the new "bind" virtual address. Do some extra work here
334 * to ensure proper alignment. That is, make sure that the page
335 * offset for the beginning of the old range is the same as the
336 * offset for this new mapping
337 */
338 pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1);
339 bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) |
340 (mr->mr_bindinfo.bi_addr & pgsize_msk));
341
342 /*
343 * Fill in the MPT entry. This is the final step before passing
344 * ownership of the MPT entry to the Hermon hardware. We use all of
345 * the information collected/calculated above to fill in the
346 * requisite portions of the MPT.
347 */
348 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
349 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0;
350 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
351 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0;
352 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0;
353 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0;
354 mpt_entry.lr = 1;
355 mpt_entry.reg_win = HERMON_MPT_IS_REGION;
356 mpt_entry.entity_sz = mr->mr_logmttpgsz;
357 mpt_entry.mem_key = mr->mr_lkey;
358 mpt_entry.pd = pd->pd_pdnum;
359 mpt_entry.start_addr = bind->bi_addr;
360 mpt_entry.reg_win_len = bind->bi_len;
361 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
362 mpt_entry.mtt_addr_h = mtt_addr >> 32;
363 mpt_entry.mtt_addr_l = mtt_addr >> 3;
364
365 /*
366 * Write the MPT entry to hardware. Lastly, we pass ownership of
367 * the entry to the hardware. Note: in general, this operation
368 * shouldn't fail. But if it does, we have to undo everything we've
369 * done above before returning error.
370 */
371 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
372 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
373 if (status != HERMON_CMD_SUCCESS) {
374 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
375 status);
376 if (status == HERMON_CMD_INVALID_STATUS) {
377 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
378 }
379 status = ibc_get_ci_failure(0);
380 goto mrshared_fail5;
381 }
382
383 /*
384 * Fill in the rest of the Hermon Memory Region handle. Having
385 * successfully transferred ownership of the MPT, we can update the
386 * following fields for use in further operations on the MR.
387 */
388 mr->mr_mptrsrcp = mpt;
389 mr->mr_mttrsrcp = mtt;
390 mr->mr_mpt_type = HERMON_MPT_DMPT;
391 mr->mr_pdhdl = pd;
392 mr->mr_rsrcp = rsrc;
393 mr->mr_is_umem = mr_is_umem;
394 mr->mr_is_fmr = 0;
395 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL;
396 mr->mr_umem_cbfunc = NULL;
397 mr->mr_umem_cbarg1 = NULL;
398 mr->mr_umem_cbarg2 = NULL;
399 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey);
400 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey);
401
402 /*
403 * If this is userland memory, then we need to insert the previously
404 * allocated entry into the "userland resources database". This will
405 * allow for later coordination between the hermon_umap_umemlock_cb()
406 * callback and hermon_mr_deregister().
407 */
408 if (mr_is_umem) {
409 hermon_umap_db_add(umapdb);
410 }
411
412 *mrhdl_new = mr;
413
414 return (DDI_SUCCESS);
415
416 /*
417 * The following is cleanup for all possible failure cases in this routine
418 */
419 mrshared_fail5:
420 (void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp);
421 if (mr_is_umem) {
422 hermon_umap_db_free(umapdb);
423 }
424 mrshared_fail4:
425 if (mr_is_umem) {
426 ddi_umem_unlock(umem_cookie);
427 }
428 mrshared_fail3:
429 hermon_rsrc_free(state, &rsrc);
430 mrshared_fail2:
431 hermon_rsrc_free(state, &mpt);
432 mrshared_fail1:
433 hermon_pd_refcnt_dec(pd);
434 mrshared_fail:
435 return (status);
436 }
437
438 /*
439 * hermon_mr_alloc_fmr()
440 * Context: Can be called from interrupt or base context.
441 */
442 int
hermon_mr_alloc_fmr(hermon_state_t * state,hermon_pdhdl_t pd,hermon_fmrhdl_t fmr_pool,hermon_mrhdl_t * mrhdl)443 hermon_mr_alloc_fmr(hermon_state_t *state, hermon_pdhdl_t pd,
444 hermon_fmrhdl_t fmr_pool, hermon_mrhdl_t *mrhdl)
445 {
446 hermon_rsrc_t *mpt, *mtt, *rsrc;
447 hermon_hw_dmpt_t mpt_entry;
448 hermon_mrhdl_t mr;
449 hermon_bind_info_t bind;
450 uint64_t mtt_addr;
451 uint64_t nummtt;
452 uint_t sleep, mtt_pgsize_bits;
453 int status;
454 offset_t i;
455 hermon_icm_table_t *icm_table;
456 hermon_dma_info_t *dma_info;
457 uint32_t index1, index2, rindx;
458
459 /*
460 * Check the sleep flag. Ensure that it is consistent with the
461 * current thread context (i.e. if we are currently in the interrupt
462 * context, then we shouldn't be attempting to sleep).
463 */
464 sleep = (fmr_pool->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP :
465 HERMON_NOSLEEP;
466 if ((sleep == HERMON_SLEEP) &&
467 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
468 return (IBT_INVALID_PARAM);
469 }
470
471 /* Increment the reference count on the protection domain (PD) */
472 hermon_pd_refcnt_inc(pd);
473
474 /*
475 * Allocate an MPT entry. This will be filled in with all the
476 * necessary parameters to define the FMR. Specifically, it will be
477 * made to reference the currently existing MTT entries and ownership
478 * of the MPT will be passed to the hardware in the last step below.
479 * If we fail here, we must undo the protection domain reference count.
480 */
481
482 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
483 if (status != DDI_SUCCESS) {
484 status = IBT_INSUFF_RESOURCE;
485 goto fmralloc_fail1;
486 }
487
488 /*
489 * Allocate the software structure for tracking the fmr memory
490 * region (i.e. the Hermon Memory Region handle). If we fail here, we
491 * must undo the protection domain reference count and the previous
492 * resource allocation.
493 */
494 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
495 if (status != DDI_SUCCESS) {
496 status = IBT_INSUFF_RESOURCE;
497 goto fmralloc_fail2;
498 }
499 mr = (hermon_mrhdl_t)rsrc->hr_addr;
500 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
501
502 /*
503 * Setup and validate the memory region access flags. This means
504 * translating the IBTF's enable flags into the access flags that
505 * will be used in later operations.
506 */
507 mr->mr_accflag = 0;
508 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
509 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
510 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_READ)
511 mr->mr_accflag |= IBT_MR_REMOTE_READ;
512 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
513 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
514 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
515 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
516
517 /*
518 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed
519 * from a certain number of "constrained" bits (the least significant
520 * bits) and some number of "unconstrained" bits. The constrained
521 * bits must be set to the index of the entry in the MPT table, but
522 * the unconstrained bits can be set to any value we wish. Note:
523 * if no remote access is required, then the RKey value is not filled
524 * in. Otherwise both Rkey and LKey are given the same value.
525 */
526 mr->mr_fmr_key = 1; /* ready for the next reload */
527 mr->mr_rkey = mr->mr_lkey = mpt->hr_indx;
528
529 /*
530 * Determine number of pages spanned. This routine uses the
531 * information in the "bind" struct to determine the required
532 * number of MTT entries needed (and returns the suggested page size -
533 * as a "power-of-2" - for each MTT entry).
534 */
535 /* Assume address will be page aligned later */
536 bind.bi_addr = 0;
537 /* Calculate size based on given max pages */
538 bind.bi_len = fmr_pool->fmr_max_pages << PAGESHIFT;
539 nummtt = hermon_mr_nummtt_needed(state, &bind, &mtt_pgsize_bits);
540
541 /*
542 * Allocate the MTT entries. Use the calculations performed above to
543 * allocate the required number of MTT entries. If we fail here, we
544 * must not only undo all the previous resource allocation (and PD
545 * reference count), but we must also unbind the memory.
546 */
547 status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt);
548 if (status != DDI_SUCCESS) {
549 IBTF_DPRINTF_L2("FMR", "FATAL: too few MTTs");
550 status = IBT_INSUFF_RESOURCE;
551 goto fmralloc_fail3;
552 }
553 mr->mr_logmttpgsz = mtt_pgsize_bits;
554
555 /*
556 * Fill in the MPT entry. This is the final step before passing
557 * ownership of the MPT entry to the Hermon hardware. We use all of
558 * the information collected/calculated above to fill in the
559 * requisite portions of the MPT.
560 */
561 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
562 mpt_entry.en_bind = 0;
563 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
564 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0;
565 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0;
566 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0;
567 mpt_entry.lr = 1;
568 mpt_entry.reg_win = HERMON_MPT_IS_REGION;
569 mpt_entry.pd = pd->pd_pdnum;
570
571 mpt_entry.entity_sz = mr->mr_logmttpgsz;
572 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
573 mpt_entry.fast_reg_en = 1;
574 mpt_entry.mtt_size = (uint_t)nummtt;
575 mpt_entry.mtt_addr_h = mtt_addr >> 32;
576 mpt_entry.mtt_addr_l = mtt_addr >> 3;
577 mpt_entry.mem_key = mr->mr_lkey;
578
579 /*
580 * FMR sets these to 0 for now. Later during actual fmr registration
581 * these values are filled in.
582 */
583 mpt_entry.start_addr = 0;
584 mpt_entry.reg_win_len = 0;
585
586 /*
587 * Write the MPT entry to hardware. Lastly, we pass ownership of
588 * the entry to the hardware. Note: in general, this operation
589 * shouldn't fail. But if it does, we have to undo everything we've
590 * done above before returning error.
591 */
592 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
593 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
594 if (status != HERMON_CMD_SUCCESS) {
595 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
596 status);
597 if (status == HERMON_CMD_INVALID_STATUS) {
598 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
599 }
600 status = ibc_get_ci_failure(0);
601 goto fmralloc_fail4;
602 }
603
604 /*
605 * Fill in the rest of the Hermon Memory Region handle. Having
606 * successfully transferred ownership of the MPT, we can update the
607 * following fields for use in further operations on the MR. Also, set
608 * that this is an FMR region.
609 */
610 mr->mr_mptrsrcp = mpt;
611 mr->mr_mttrsrcp = mtt;
612
613 mr->mr_mpt_type = HERMON_MPT_DMPT;
614 mr->mr_pdhdl = pd;
615 mr->mr_rsrcp = rsrc;
616 mr->mr_is_fmr = 1;
617 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey);
618 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey);
619 mr->mr_mttaddr = mtt_addr;
620 (void) memcpy(&mr->mr_bindinfo, &bind, sizeof (hermon_bind_info_t));
621
622 /* initialize hr_addr for use during register/deregister/invalidate */
623 icm_table = &state->hs_icm[HERMON_DMPT];
624 rindx = mpt->hr_indx;
625 hermon_index(index1, index2, rindx, icm_table, i);
626 dma_info = icm_table->icm_dma[index1] + index2;
627 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mpt))
628 mpt->hr_addr = (void *)((uintptr_t)(dma_info->vaddr + i * mpt->hr_len));
629
630 *mrhdl = mr;
631
632 return (DDI_SUCCESS);
633
634 /*
635 * The following is cleanup for all possible failure cases in this routine
636 */
637 fmralloc_fail4:
638 kmem_free(mtt, sizeof (hermon_rsrc_t) * nummtt);
639 fmralloc_fail3:
640 hermon_rsrc_free(state, &rsrc);
641 fmralloc_fail2:
642 hermon_rsrc_free(state, &mpt);
643 fmralloc_fail1:
644 hermon_pd_refcnt_dec(pd);
645 fmralloc_fail:
646 return (status);
647 }
648
649
650 /*
651 * hermon_mr_register_physical_fmr()
652 * Context: Can be called from interrupt or base context.
653 */
654 /*ARGSUSED*/
655 int
hermon_mr_register_physical_fmr(hermon_state_t * state,ibt_pmr_attr_t * mem_pattr_p,hermon_mrhdl_t mr,ibt_pmr_desc_t * mem_desc_p)656 hermon_mr_register_physical_fmr(hermon_state_t *state,
657 ibt_pmr_attr_t *mem_pattr_p, hermon_mrhdl_t mr, ibt_pmr_desc_t *mem_desc_p)
658 {
659 hermon_rsrc_t *mpt;
660 uint64_t *mpt_table;
661 int status;
662 uint32_t key;
663
664 mutex_enter(&mr->mr_lock);
665 mpt = mr->mr_mptrsrcp;
666 mpt_table = (uint64_t *)mpt->hr_addr;
667
668 /* Write MPT status to SW bit */
669 *(uint8_t *)mpt_table = 0xF0;
670
671 membar_producer();
672
673 /*
674 * Write the mapped addresses into the MTT entries. FMR needs to do
675 * this a little differently, so we call the fmr specific fast mtt
676 * write here.
677 */
678 status = hermon_mr_fast_mtt_write_fmr(state, mr->mr_mttrsrcp,
679 mem_pattr_p, mr->mr_logmttpgsz);
680 if (status != DDI_SUCCESS) {
681 mutex_exit(&mr->mr_lock);
682 status = ibc_get_ci_failure(0);
683 goto fmr_reg_fail1;
684 }
685
686 /*
687 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed
688 * from a certain number of "constrained" bits (the least significant
689 * bits) and some number of "unconstrained" bits. The constrained
690 * bits must be set to the index of the entry in the MPT table, but
691 * the unconstrained bits can be set to any value we wish. Note:
692 * if no remote access is required, then the RKey value is not filled
693 * in. Otherwise both Rkey and LKey are given the same value.
694 */
695 key = mpt->hr_indx | (mr->mr_fmr_key++ << HERMON_MEMKEY_SHIFT);
696 mr->mr_lkey = mr->mr_rkey = hermon_mr_key_swap(key);
697
698 /* write mem key value */
699 *(uint32_t *)&mpt_table[1] = htonl(key);
700
701 /* write length value */
702 mpt_table[3] = htonll(mem_pattr_p->pmr_len);
703
704 /* write start addr value */
705 mpt_table[2] = htonll(mem_pattr_p->pmr_iova);
706
707 /* write lkey value */
708 *(uint32_t *)&mpt_table[4] = htonl(key);
709
710 membar_producer();
711
712 /* Write MPT status to HW bit */
713 *(uint8_t *)mpt_table = 0x00;
714
715 /* Fill in return parameters */
716 mem_desc_p->pmd_lkey = mr->mr_lkey;
717 mem_desc_p->pmd_rkey = mr->mr_rkey;
718 mem_desc_p->pmd_iova = mem_pattr_p->pmr_iova;
719 mem_desc_p->pmd_phys_buf_list_sz = mem_pattr_p->pmr_len;
720
721 /* Fill in MR bindinfo struct for later sync or query operations */
722 mr->mr_bindinfo.bi_addr = mem_pattr_p->pmr_iova;
723 mr->mr_bindinfo.bi_flags = mem_pattr_p->pmr_flags & IBT_MR_NONCOHERENT;
724
725 mutex_exit(&mr->mr_lock);
726
727 return (DDI_SUCCESS);
728
729 fmr_reg_fail1:
730 /*
731 * Note, we fail here, and purposely leave the memory ownership in
732 * software. The memory tables may be corrupt, so we leave the region
733 * unregistered.
734 */
735 return (status);
736 }
737
738
739 /*
740 * hermon_mr_deregister()
741 * Context: Can be called from interrupt or base context.
742 */
743 /* ARGSUSED */
744 int
hermon_mr_deregister(hermon_state_t * state,hermon_mrhdl_t * mrhdl,uint_t level,uint_t sleep)745 hermon_mr_deregister(hermon_state_t *state, hermon_mrhdl_t *mrhdl, uint_t level,
746 uint_t sleep)
747 {
748 hermon_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt;
749 hermon_umap_db_entry_t *umapdb;
750 hermon_pdhdl_t pd;
751 hermon_mrhdl_t mr;
752 hermon_bind_info_t *bind;
753 uint64_t value;
754 int status;
755 uint_t shared_mtt;
756
757 /*
758 * Check the sleep flag. Ensure that it is consistent with the
759 * current thread context (i.e. if we are currently in the interrupt
760 * context, then we shouldn't be attempting to sleep).
761 */
762 if ((sleep == HERMON_SLEEP) &&
763 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
764 status = IBT_INVALID_PARAM;
765 return (status);
766 }
767
768 /*
769 * Pull all the necessary information from the Hermon Memory Region
770 * handle. This is necessary here because the resource for the
771 * MR handle is going to be freed up as part of the this
772 * deregistration
773 */
774 mr = *mrhdl;
775 mutex_enter(&mr->mr_lock);
776 mpt = mr->mr_mptrsrcp;
777 mtt = mr->mr_mttrsrcp;
778 mtt_refcnt = mr->mr_mttrefcntp;
779 rsrc = mr->mr_rsrcp;
780 pd = mr->mr_pdhdl;
781 bind = &mr->mr_bindinfo;
782
783 /*
784 * Check here if the memory region is really an FMR. If so, this is a
785 * bad thing and we shouldn't be here. Return failure.
786 */
787 if (mr->mr_is_fmr) {
788 mutex_exit(&mr->mr_lock);
789 return (IBT_INVALID_PARAM);
790 }
791
792 /*
793 * Check here to see if the memory region has already been partially
794 * deregistered as a result of the hermon_umap_umemlock_cb() callback.
795 * If so, then jump to the end and free the remaining resources.
796 */
797 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
798 goto mrdereg_finish_cleanup;
799 }
800 if (hermon_rdma_debug & 0x4)
801 IBTF_DPRINTF_L2("mr", "dereg: mr %p key %x",
802 mr, mr->mr_rkey);
803
804 /*
805 * We must drop the "mr_lock" here to ensure that both SLEEP and
806 * NOSLEEP calls into the firmware work as expected. Also, if two
807 * threads are attemping to access this MR (via de-register,
808 * re-register, or otherwise), then we allow the firmware to enforce
809 * the checking, that only one deregister is valid.
810 */
811 mutex_exit(&mr->mr_lock);
812
813 /*
814 * Reclaim MPT entry from hardware (if necessary). Since the
815 * hermon_mr_deregister() routine is used in the memory region
816 * reregistration process as well, it is possible that we will
817 * not always wish to reclaim ownership of the MPT. Check the
818 * "level" arg and, if necessary, attempt to reclaim it. If
819 * the ownership transfer fails for any reason, we check to see
820 * what command status was returned from the hardware. The only
821 * "expected" error status is the one that indicates an attempt to
822 * deregister a memory region that has memory windows bound to it
823 */
824 if (level >= HERMON_MR_DEREG_ALL) {
825 if (mr->mr_mpt_type >= HERMON_MPT_DMPT) {
826 status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT,
827 NULL, 0, mpt->hr_indx, sleep);
828 if (status != HERMON_CMD_SUCCESS) {
829 if (status == HERMON_CMD_REG_BOUND) {
830 return (IBT_MR_IN_USE);
831 } else {
832 cmn_err(CE_CONT, "Hermon: HW2SW_MPT "
833 "command failed: %08x\n", status);
834 if (status ==
835 HERMON_CMD_INVALID_STATUS) {
836 hermon_fm_ereport(state,
837 HCA_SYS_ERR,
838 DDI_SERVICE_LOST);
839 }
840 return (IBT_INVALID_PARAM);
841 }
842 }
843 }
844 }
845
846 /*
847 * Re-grab the mr_lock here. Since further access to the protected
848 * 'mr' structure is needed, and we would have returned previously for
849 * the multiple deregistration case, we can safely grab the lock here.
850 */
851 mutex_enter(&mr->mr_lock);
852
853 /*
854 * If the memory had come from userland, then we do a lookup in the
855 * "userland resources database". On success, we free the entry, call
856 * ddi_umem_unlock(), and continue the cleanup. On failure (which is
857 * an indication that the umem_lockmemory() callback has called
858 * hermon_mr_deregister()), we call ddi_umem_unlock() and invalidate
859 * the "mr_umemcookie" field in the MR handle (this will be used
860 * later to detect that only partial cleaup still remains to be done
861 * on the MR handle).
862 */
863 if (mr->mr_is_umem) {
864 status = hermon_umap_db_find(state->hs_instance,
865 (uint64_t)(uintptr_t)mr->mr_umemcookie,
866 MLNX_UMAP_MRMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE,
867 &umapdb);
868 if (status == DDI_SUCCESS) {
869 hermon_umap_db_free(umapdb);
870 ddi_umem_unlock(mr->mr_umemcookie);
871 } else {
872 ddi_umem_unlock(mr->mr_umemcookie);
873 mr->mr_umemcookie = NULL;
874 }
875 }
876
877 /* mtt_refcnt is NULL in the case of hermon_dma_mr_register() */
878 if (mtt_refcnt != NULL) {
879 /*
880 * Decrement the MTT reference count. Since the MTT resource
881 * may be shared between multiple memory regions (as a result
882 * of a "RegisterSharedMR" verb) it is important that we not
883 * free up or unbind resources prematurely. If it's not shared
884 * (as indicated by the return status), then free the resource.
885 */
886 shared_mtt = hermon_mtt_refcnt_dec(mtt_refcnt);
887 if (!shared_mtt) {
888 hermon_rsrc_free(state, &mtt_refcnt);
889 }
890
891 /*
892 * Free up the MTT entries and unbind the memory. Here,
893 * as above, we attempt to free these resources only if
894 * it is appropriate to do so.
895 * Note, 'bind' is NULL in the alloc_lkey case.
896 */
897 if (!shared_mtt) {
898 if (level >= HERMON_MR_DEREG_NO_HW2SW_MPT) {
899 hermon_mr_mem_unbind(state, bind);
900 }
901 hermon_rsrc_free(state, &mtt);
902 }
903 }
904
905 /*
906 * If the MR handle has been invalidated, then drop the
907 * lock and return success. Note: This only happens because
908 * the umem_lockmemory() callback has been triggered. The
909 * cleanup here is partial, and further cleanup (in a
910 * subsequent hermon_mr_deregister() call) will be necessary.
911 */
912 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
913 mutex_exit(&mr->mr_lock);
914 return (DDI_SUCCESS);
915 }
916
917 mrdereg_finish_cleanup:
918 mutex_exit(&mr->mr_lock);
919
920 /* Free the Hermon Memory Region handle */
921 hermon_rsrc_free(state, &rsrc);
922
923 /* Free up the MPT entry resource */
924 if (mpt != NULL)
925 hermon_rsrc_free(state, &mpt);
926
927 /* Decrement the reference count on the protection domain (PD) */
928 hermon_pd_refcnt_dec(pd);
929
930 /* Set the mrhdl pointer to NULL and return success */
931 *mrhdl = NULL;
932
933 return (DDI_SUCCESS);
934 }
935
936 /*
937 * hermon_mr_dealloc_fmr()
938 * Context: Can be called from interrupt or base context.
939 */
940 /* ARGSUSED */
941 int
hermon_mr_dealloc_fmr(hermon_state_t * state,hermon_mrhdl_t * mrhdl)942 hermon_mr_dealloc_fmr(hermon_state_t *state, hermon_mrhdl_t *mrhdl)
943 {
944 hermon_rsrc_t *mpt, *mtt, *rsrc;
945 hermon_pdhdl_t pd;
946 hermon_mrhdl_t mr;
947
948 /*
949 * Pull all the necessary information from the Hermon Memory Region
950 * handle. This is necessary here because the resource for the
951 * MR handle is going to be freed up as part of the this
952 * deregistration
953 */
954 mr = *mrhdl;
955 mutex_enter(&mr->mr_lock);
956 mpt = mr->mr_mptrsrcp;
957 mtt = mr->mr_mttrsrcp;
958 rsrc = mr->mr_rsrcp;
959 pd = mr->mr_pdhdl;
960 mutex_exit(&mr->mr_lock);
961
962 /* Free the MTT entries */
963 hermon_rsrc_free(state, &mtt);
964
965 /* Free the Hermon Memory Region handle */
966 hermon_rsrc_free(state, &rsrc);
967
968 /* Free up the MPT entry resource */
969 hermon_rsrc_free(state, &mpt);
970
971 /* Decrement the reference count on the protection domain (PD) */
972 hermon_pd_refcnt_dec(pd);
973
974 /* Set the mrhdl pointer to NULL and return success */
975 *mrhdl = NULL;
976
977 return (DDI_SUCCESS);
978 }
979
980
981 /*
982 * hermon_mr_query()
983 * Context: Can be called from interrupt or base context.
984 */
985 /* ARGSUSED */
986 int
hermon_mr_query(hermon_state_t * state,hermon_mrhdl_t mr,ibt_mr_query_attr_t * attr)987 hermon_mr_query(hermon_state_t *state, hermon_mrhdl_t mr,
988 ibt_mr_query_attr_t *attr)
989 {
990 int status;
991 hermon_hw_dmpt_t mpt_entry;
992 uint32_t lkey;
993
994 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr))
995
996 mutex_enter(&mr->mr_lock);
997
998 /*
999 * Check here to see if the memory region has already been partially
1000 * deregistered as a result of a hermon_umap_umemlock_cb() callback.
1001 * If so, this is an error, return failure.
1002 */
1003 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
1004 mutex_exit(&mr->mr_lock);
1005 return (IBT_MR_HDL_INVALID);
1006 }
1007
1008 status = hermon_cmn_query_cmd_post(state, QUERY_MPT, 0,
1009 mr->mr_lkey >> 8, &mpt_entry, sizeof (hermon_hw_dmpt_t),
1010 HERMON_NOSLEEP);
1011 if (status != HERMON_CMD_SUCCESS) {
1012 cmn_err(CE_CONT, "Hermon: QUERY_MPT failed: status %x", status);
1013 mutex_exit(&mr->mr_lock);
1014 return (ibc_get_ci_failure(0));
1015 }
1016
1017 /* Update the mr sw struct from the hw struct. */
1018 lkey = mpt_entry.mem_key;
1019 mr->mr_lkey = mr->mr_rkey = (lkey >> 8) | (lkey << 24);
1020 mr->mr_bindinfo.bi_addr = mpt_entry.start_addr;
1021 mr->mr_bindinfo.bi_len = mpt_entry.reg_win_len;
1022 mr->mr_accflag = (mr->mr_accflag & IBT_MR_RO_DISABLED) |
1023 (mpt_entry.lw ? IBT_MR_LOCAL_WRITE : 0) |
1024 (mpt_entry.rr ? IBT_MR_REMOTE_READ : 0) |
1025 (mpt_entry.rw ? IBT_MR_REMOTE_WRITE : 0) |
1026 (mpt_entry.atomic ? IBT_MR_REMOTE_ATOMIC : 0) |
1027 (mpt_entry.en_bind ? IBT_MR_WINDOW_BIND : 0);
1028 mr->mr_mttaddr = ((uint64_t)mpt_entry.mtt_addr_h << 32) |
1029 (mpt_entry.mtt_addr_l << 3);
1030 mr->mr_logmttpgsz = mpt_entry.entity_sz;
1031
1032 /* Fill in the queried attributes */
1033 attr->mr_lkey_state =
1034 (mpt_entry.status == HERMON_MPT_FREE) ? IBT_KEY_FREE :
1035 (mpt_entry.status == HERMON_MPT_SW_OWNERSHIP) ? IBT_KEY_INVALID :
1036 IBT_KEY_VALID;
1037 attr->mr_phys_buf_list_sz = mpt_entry.mtt_size;
1038 attr->mr_attr_flags = mr->mr_accflag;
1039 attr->mr_pd = (ibt_pd_hdl_t)mr->mr_pdhdl;
1040
1041 /* Fill in the "local" attributes */
1042 attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey;
1043 attr->mr_lbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
1044 attr->mr_lbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len;
1045
1046 /*
1047 * Fill in the "remote" attributes (if necessary). Note: the
1048 * remote attributes are only valid if the memory region has one
1049 * or more of the remote access flags set.
1050 */
1051 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
1052 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
1053 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
1054 attr->mr_rkey = (ibt_rkey_t)mr->mr_rkey;
1055 attr->mr_rbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
1056 attr->mr_rbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len;
1057 }
1058
1059 /*
1060 * If region is mapped for streaming (i.e. noncoherent), then set sync
1061 * is required
1062 */
1063 attr->mr_sync_required = (mr->mr_bindinfo.bi_flags &
1064 IBT_MR_NONCOHERENT) ? B_TRUE : B_FALSE;
1065
1066 mutex_exit(&mr->mr_lock);
1067 return (DDI_SUCCESS);
1068 }
1069
1070
1071 /*
1072 * hermon_mr_reregister()
1073 * Context: Can be called from interrupt or base context.
1074 */
1075 int
hermon_mr_reregister(hermon_state_t * state,hermon_mrhdl_t mr,hermon_pdhdl_t pd,ibt_mr_attr_t * mr_attr,hermon_mrhdl_t * mrhdl_new,hermon_mr_options_t * op)1076 hermon_mr_reregister(hermon_state_t *state, hermon_mrhdl_t mr,
1077 hermon_pdhdl_t pd, ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new,
1078 hermon_mr_options_t *op)
1079 {
1080 hermon_bind_info_t bind;
1081 int status;
1082
1083 /*
1084 * Fill in the "bind" struct. This struct provides the majority
1085 * of the information that will be used to distinguish between an
1086 * "addr" binding (as is the case here) and a "buf" binding (see
1087 * below). The "bind" struct is later passed to hermon_mr_mem_bind()
1088 * which does most of the "heavy lifting" for the Hermon memory
1089 * registration (and reregistration) routines.
1090 */
1091 bind.bi_type = HERMON_BINDHDL_VADDR;
1092 bind.bi_addr = mr_attr->mr_vaddr;
1093 bind.bi_len = mr_attr->mr_len;
1094 bind.bi_as = mr_attr->mr_as;
1095 bind.bi_flags = mr_attr->mr_flags;
1096 status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
1097 return (status);
1098 }
1099
1100
1101 /*
1102 * hermon_mr_reregister_buf()
1103 * Context: Can be called from interrupt or base context.
1104 */
1105 int
hermon_mr_reregister_buf(hermon_state_t * state,hermon_mrhdl_t mr,hermon_pdhdl_t pd,ibt_smr_attr_t * mr_attr,struct buf * buf,hermon_mrhdl_t * mrhdl_new,hermon_mr_options_t * op)1106 hermon_mr_reregister_buf(hermon_state_t *state, hermon_mrhdl_t mr,
1107 hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf,
1108 hermon_mrhdl_t *mrhdl_new, hermon_mr_options_t *op)
1109 {
1110 hermon_bind_info_t bind;
1111 int status;
1112
1113 /*
1114 * Fill in the "bind" struct. This struct provides the majority
1115 * of the information that will be used to distinguish between an
1116 * "addr" binding (see above) and a "buf" binding (as is the case
1117 * here). The "bind" struct is later passed to hermon_mr_mem_bind()
1118 * which does most of the "heavy lifting" for the Hermon memory
1119 * registration routines. Note: We have chosen to provide
1120 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
1121 * not set). It is not critical what value we choose here as it need
1122 * only be unique for the given RKey (which will happen by default),
1123 * so the choice here is somewhat arbitrary.
1124 */
1125 bind.bi_type = HERMON_BINDHDL_BUF;
1126 bind.bi_buf = buf;
1127 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
1128 bind.bi_addr = mr_attr->mr_vaddr;
1129 } else {
1130 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr;
1131 }
1132 bind.bi_len = (uint64_t)buf->b_bcount;
1133 bind.bi_flags = mr_attr->mr_flags;
1134 bind.bi_as = NULL;
1135 status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
1136 return (status);
1137 }
1138
1139
1140 /*
1141 * hermon_mr_sync()
1142 * Context: Can be called from interrupt or base context.
1143 */
1144 /* ARGSUSED */
1145 int
hermon_mr_sync(hermon_state_t * state,ibt_mr_sync_t * mr_segs,size_t num_segs)1146 hermon_mr_sync(hermon_state_t *state, ibt_mr_sync_t *mr_segs, size_t num_segs)
1147 {
1148 hermon_mrhdl_t mrhdl;
1149 uint64_t seg_vaddr, seg_len, seg_end;
1150 uint64_t mr_start, mr_end;
1151 uint_t type;
1152 int status, i;
1153
1154 /* Process each of the ibt_mr_sync_t's */
1155 for (i = 0; i < num_segs; i++) {
1156 mrhdl = (hermon_mrhdl_t)mr_segs[i].ms_handle;
1157
1158 /* Check for valid memory region handle */
1159 if (mrhdl == NULL) {
1160 status = IBT_MR_HDL_INVALID;
1161 goto mrsync_fail;
1162 }
1163
1164 mutex_enter(&mrhdl->mr_lock);
1165
1166 /*
1167 * Check here to see if the memory region has already been
1168 * partially deregistered as a result of a
1169 * hermon_umap_umemlock_cb() callback. If so, this is an
1170 * error, return failure.
1171 */
1172 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
1173 mutex_exit(&mrhdl->mr_lock);
1174 status = IBT_MR_HDL_INVALID;
1175 goto mrsync_fail;
1176 }
1177
1178 /* Check for valid bounds on sync request */
1179 seg_vaddr = mr_segs[i].ms_vaddr;
1180 seg_len = mr_segs[i].ms_len;
1181 seg_end = seg_vaddr + seg_len - 1;
1182 mr_start = mrhdl->mr_bindinfo.bi_addr;
1183 mr_end = mr_start + mrhdl->mr_bindinfo.bi_len - 1;
1184 if ((seg_vaddr < mr_start) || (seg_vaddr > mr_end)) {
1185 mutex_exit(&mrhdl->mr_lock);
1186 status = IBT_MR_VA_INVALID;
1187 goto mrsync_fail;
1188 }
1189 if ((seg_end < mr_start) || (seg_end > mr_end)) {
1190 mutex_exit(&mrhdl->mr_lock);
1191 status = IBT_MR_LEN_INVALID;
1192 goto mrsync_fail;
1193 }
1194
1195 /* Determine what type (i.e. direction) for sync */
1196 if (mr_segs[i].ms_flags & IBT_SYNC_READ) {
1197 type = DDI_DMA_SYNC_FORDEV;
1198 } else if (mr_segs[i].ms_flags & IBT_SYNC_WRITE) {
1199 type = DDI_DMA_SYNC_FORCPU;
1200 } else {
1201 mutex_exit(&mrhdl->mr_lock);
1202 status = IBT_INVALID_PARAM;
1203 goto mrsync_fail;
1204 }
1205
1206 (void) ddi_dma_sync(mrhdl->mr_bindinfo.bi_dmahdl,
1207 (off_t)(seg_vaddr - mr_start), (size_t)seg_len, type);
1208
1209 mutex_exit(&mrhdl->mr_lock);
1210 }
1211
1212 return (DDI_SUCCESS);
1213
1214 mrsync_fail:
1215 return (status);
1216 }
1217
1218
1219 /*
1220 * hermon_mw_alloc()
1221 * Context: Can be called from interrupt or base context.
1222 */
1223 int
hermon_mw_alloc(hermon_state_t * state,hermon_pdhdl_t pd,ibt_mw_flags_t flags,hermon_mwhdl_t * mwhdl)1224 hermon_mw_alloc(hermon_state_t *state, hermon_pdhdl_t pd, ibt_mw_flags_t flags,
1225 hermon_mwhdl_t *mwhdl)
1226 {
1227 hermon_rsrc_t *mpt, *rsrc;
1228 hermon_hw_dmpt_t mpt_entry;
1229 hermon_mwhdl_t mw;
1230 uint_t sleep;
1231 int status;
1232
1233 if (state != NULL) /* XXX - bogus test that is always TRUE */
1234 return (IBT_INSUFF_RESOURCE);
1235
1236 /*
1237 * Check the sleep flag. Ensure that it is consistent with the
1238 * current thread context (i.e. if we are currently in the interrupt
1239 * context, then we shouldn't be attempting to sleep).
1240 */
1241 sleep = (flags & IBT_MW_NOSLEEP) ? HERMON_NOSLEEP : HERMON_SLEEP;
1242 if ((sleep == HERMON_SLEEP) &&
1243 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1244 status = IBT_INVALID_PARAM;
1245 goto mwalloc_fail;
1246 }
1247
1248 /* Increment the reference count on the protection domain (PD) */
1249 hermon_pd_refcnt_inc(pd);
1250
1251 /*
1252 * Allocate an MPT entry (for use as a memory window). Since the
1253 * Hermon hardware uses the MPT entry for memory regions and for
1254 * memory windows, we will fill in this MPT with all the necessary
1255 * parameters for the memory window. And then (just as we do for
1256 * memory regions) ownership will be passed to the hardware in the
1257 * final step below. If we fail here, we must undo the protection
1258 * domain reference count.
1259 */
1260 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
1261 if (status != DDI_SUCCESS) {
1262 status = IBT_INSUFF_RESOURCE;
1263 goto mwalloc_fail1;
1264 }
1265
1266 /*
1267 * Allocate the software structure for tracking the memory window (i.e.
1268 * the Hermon Memory Window handle). Note: This is actually the same
1269 * software structure used for tracking memory regions, but since many
1270 * of the same properties are needed, only a single structure is
1271 * necessary. If we fail here, we must undo the protection domain
1272 * reference count and the previous resource allocation.
1273 */
1274 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
1275 if (status != DDI_SUCCESS) {
1276 status = IBT_INSUFF_RESOURCE;
1277 goto mwalloc_fail2;
1278 }
1279 mw = (hermon_mwhdl_t)rsrc->hr_addr;
1280 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
1281
1282 /*
1283 * Calculate an "unbound" RKey from MPT index. In much the same way
1284 * as we do for memory regions (above), this key is constructed from
1285 * a "constrained" (which depends on the MPT index) and an
1286 * "unconstrained" portion (which may be arbitrarily chosen).
1287 */
1288 mw->mr_rkey = hermon_mr_keycalc(mpt->hr_indx);
1289
1290 /*
1291 * Fill in the MPT entry. This is the final step before passing
1292 * ownership of the MPT entry to the Hermon hardware. We use all of
1293 * the information collected/calculated above to fill in the
1294 * requisite portions of the MPT. Note: fewer entries in the MPT
1295 * entry are necessary to allocate a memory window.
1296 */
1297 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
1298 mpt_entry.reg_win = HERMON_MPT_IS_WINDOW;
1299 mpt_entry.mem_key = mw->mr_rkey;
1300 mpt_entry.pd = pd->pd_pdnum;
1301 mpt_entry.lr = 1;
1302
1303 /*
1304 * Write the MPT entry to hardware. Lastly, we pass ownership of
1305 * the entry to the hardware. Note: in general, this operation
1306 * shouldn't fail. But if it does, we have to undo everything we've
1307 * done above before returning error.
1308 */
1309 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1310 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
1311 if (status != HERMON_CMD_SUCCESS) {
1312 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
1313 status);
1314 if (status == HERMON_CMD_INVALID_STATUS) {
1315 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1316 }
1317 status = ibc_get_ci_failure(0);
1318 goto mwalloc_fail3;
1319 }
1320
1321 /*
1322 * Fill in the rest of the Hermon Memory Window handle. Having
1323 * successfully transferred ownership of the MPT, we can update the
1324 * following fields for use in further operations on the MW.
1325 */
1326 mw->mr_mptrsrcp = mpt;
1327 mw->mr_pdhdl = pd;
1328 mw->mr_rsrcp = rsrc;
1329 mw->mr_rkey = hermon_mr_key_swap(mw->mr_rkey);
1330 *mwhdl = mw;
1331
1332 return (DDI_SUCCESS);
1333
1334 mwalloc_fail3:
1335 hermon_rsrc_free(state, &rsrc);
1336 mwalloc_fail2:
1337 hermon_rsrc_free(state, &mpt);
1338 mwalloc_fail1:
1339 hermon_pd_refcnt_dec(pd);
1340 mwalloc_fail:
1341 return (status);
1342 }
1343
1344
1345 /*
1346 * hermon_mw_free()
1347 * Context: Can be called from interrupt or base context.
1348 */
1349 int
hermon_mw_free(hermon_state_t * state,hermon_mwhdl_t * mwhdl,uint_t sleep)1350 hermon_mw_free(hermon_state_t *state, hermon_mwhdl_t *mwhdl, uint_t sleep)
1351 {
1352 hermon_rsrc_t *mpt, *rsrc;
1353 hermon_mwhdl_t mw;
1354 int status;
1355 hermon_pdhdl_t pd;
1356
1357 /*
1358 * Check the sleep flag. Ensure that it is consistent with the
1359 * current thread context (i.e. if we are currently in the interrupt
1360 * context, then we shouldn't be attempting to sleep).
1361 */
1362 if ((sleep == HERMON_SLEEP) &&
1363 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1364 status = IBT_INVALID_PARAM;
1365 return (status);
1366 }
1367
1368 /*
1369 * Pull all the necessary information from the Hermon Memory Window
1370 * handle. This is necessary here because the resource for the
1371 * MW handle is going to be freed up as part of the this operation.
1372 */
1373 mw = *mwhdl;
1374 mutex_enter(&mw->mr_lock);
1375 mpt = mw->mr_mptrsrcp;
1376 rsrc = mw->mr_rsrcp;
1377 pd = mw->mr_pdhdl;
1378 mutex_exit(&mw->mr_lock);
1379 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
1380
1381 /*
1382 * Reclaim the MPT entry from hardware. Note: in general, it is
1383 * unexpected for this operation to return an error.
1384 */
1385 status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL,
1386 0, mpt->hr_indx, sleep);
1387 if (status != HERMON_CMD_SUCCESS) {
1388 cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: %08x\n",
1389 status);
1390 if (status == HERMON_CMD_INVALID_STATUS) {
1391 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1392 }
1393 return (ibc_get_ci_failure(0));
1394 }
1395
1396 /* Free the Hermon Memory Window handle */
1397 hermon_rsrc_free(state, &rsrc);
1398
1399 /* Free up the MPT entry resource */
1400 hermon_rsrc_free(state, &mpt);
1401
1402 /* Decrement the reference count on the protection domain (PD) */
1403 hermon_pd_refcnt_dec(pd);
1404
1405 /* Set the mwhdl pointer to NULL and return success */
1406 *mwhdl = NULL;
1407
1408 return (DDI_SUCCESS);
1409 }
1410
1411
1412 /*
1413 * hermon_mr_keycalc()
1414 * Context: Can be called from interrupt or base context.
1415 * NOTE: Produces a key in the form of
1416 * KKKKKKKK IIIIIIII IIIIIIII IIIIIIIII
1417 * where K == the arbitrary bits and I == the index
1418 */
1419 uint32_t
hermon_mr_keycalc(uint32_t indx)1420 hermon_mr_keycalc(uint32_t indx)
1421 {
1422 uint32_t tmp_key, tmp_indx;
1423
1424 /*
1425 * Generate a simple key from counter. Note: We increment this
1426 * static variable _intentionally_ without any kind of mutex around
1427 * it. First, single-threading all operations through a single lock
1428 * would be a bad idea (from a performance point-of-view). Second,
1429 * the upper "unconstrained" bits don't really have to be unique
1430 * because the lower bits are guaranteed to be (although we do make a
1431 * best effort to ensure that they are). Third, the window for the
1432 * race (where both threads read and update the counter at the same
1433 * time) is incredibly small.
1434 * And, lastly, we'd like to make this into a "random" key
1435 */
1436 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(hermon_memkey_cnt))
1437 tmp_key = (hermon_memkey_cnt++) << HERMON_MEMKEY_SHIFT;
1438 tmp_indx = indx & 0xffffff;
1439 return (tmp_key | tmp_indx);
1440 }
1441
1442
1443 /*
1444 * hermon_mr_key_swap()
1445 * Context: Can be called from interrupt or base context.
1446 * NOTE: Produces a key in the form of
1447 * IIIIIIII IIIIIIII IIIIIIIII KKKKKKKK
1448 * where K == the arbitrary bits and I == the index
1449 */
1450 uint32_t
hermon_mr_key_swap(uint32_t indx)1451 hermon_mr_key_swap(uint32_t indx)
1452 {
1453 /*
1454 * The memory key format to pass down to the hardware is
1455 * (key[7:0],index[23:0]), which defines the index to the
1456 * hardware resource. When the driver passes this as a memory
1457 * key, (i.e. to retrieve a resource) the format is
1458 * (index[23:0],key[7:0]).
1459 */
1460 return (((indx >> 24) & 0x000000ff) | ((indx << 8) & 0xffffff00));
1461 }
1462
1463 /*
1464 * hermon_mr_common_reg()
1465 * Context: Can be called from interrupt or base context.
1466 */
1467 static int
hermon_mr_common_reg(hermon_state_t * state,hermon_pdhdl_t pd,hermon_bind_info_t * bind,hermon_mrhdl_t * mrhdl,hermon_mr_options_t * op,hermon_mpt_rsrc_type_t mpt_type)1468 hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd,
1469 hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
1470 hermon_mpt_rsrc_type_t mpt_type)
1471 {
1472 hermon_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt;
1473 hermon_umap_db_entry_t *umapdb;
1474 hermon_sw_refcnt_t *swrc_tmp;
1475 hermon_hw_dmpt_t mpt_entry;
1476 hermon_mrhdl_t mr;
1477 ibt_mr_flags_t flags;
1478 hermon_bind_info_t *bh;
1479 ddi_dma_handle_t bind_dmahdl;
1480 ddi_umem_cookie_t umem_cookie;
1481 size_t umem_len;
1482 caddr_t umem_addr;
1483 uint64_t mtt_addr, max_sz;
1484 uint_t sleep, mtt_pgsize_bits, bind_type, mr_is_umem;
1485 int status, umem_flags, bind_override_addr;
1486
1487 /*
1488 * Check the "options" flag. Currently this flag tells the driver
1489 * whether or not the region should be bound normally (i.e. with
1490 * entries written into the PCI IOMMU), whether it should be
1491 * registered to bypass the IOMMU, and whether or not the resulting
1492 * address should be "zero-based" (to aid the alignment restrictions
1493 * for QPs).
1494 */
1495 if (op == NULL) {
1496 bind_type = HERMON_BINDMEM_NORMAL;
1497 bind_dmahdl = NULL;
1498 bind_override_addr = 0;
1499 } else {
1500 bind_type = op->mro_bind_type;
1501 bind_dmahdl = op->mro_bind_dmahdl;
1502 bind_override_addr = op->mro_bind_override_addr;
1503 }
1504
1505 /* check what kind of mpt to use */
1506
1507 /* Extract the flags field from the hermon_bind_info_t */
1508 flags = bind->bi_flags;
1509
1510 /*
1511 * Check for invalid length. Check is the length is zero or if the
1512 * length is larger than the maximum configured value. Return error
1513 * if it is.
1514 */
1515 max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz);
1516 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
1517 status = IBT_MR_LEN_INVALID;
1518 goto mrcommon_fail;
1519 }
1520
1521 /*
1522 * Check the sleep flag. Ensure that it is consistent with the
1523 * current thread context (i.e. if we are currently in the interrupt
1524 * context, then we shouldn't be attempting to sleep).
1525 */
1526 sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
1527 if ((sleep == HERMON_SLEEP) &&
1528 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1529 status = IBT_INVALID_PARAM;
1530 goto mrcommon_fail;
1531 }
1532
1533 /* Increment the reference count on the protection domain (PD) */
1534 hermon_pd_refcnt_inc(pd);
1535
1536 /*
1537 * Allocate an MPT entry. This will be filled in with all the
1538 * necessary parameters to define the memory region. And then
1539 * ownership will be passed to the hardware in the final step
1540 * below. If we fail here, we must undo the protection domain
1541 * reference count.
1542 */
1543 if (mpt_type == HERMON_MPT_DMPT) {
1544 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
1545 if (status != DDI_SUCCESS) {
1546 status = IBT_INSUFF_RESOURCE;
1547 goto mrcommon_fail1;
1548 }
1549 } else {
1550 mpt = NULL;
1551 }
1552
1553 /*
1554 * Allocate the software structure for tracking the memory region (i.e.
1555 * the Hermon Memory Region handle). If we fail here, we must undo
1556 * the protection domain reference count and the previous resource
1557 * allocation.
1558 */
1559 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
1560 if (status != DDI_SUCCESS) {
1561 status = IBT_INSUFF_RESOURCE;
1562 goto mrcommon_fail2;
1563 }
1564 mr = (hermon_mrhdl_t)rsrc->hr_addr;
1565 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
1566
1567 /*
1568 * Setup and validate the memory region access flags. This means
1569 * translating the IBTF's enable flags into the access flags that
1570 * will be used in later operations.
1571 */
1572 mr->mr_accflag = 0;
1573 if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1574 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1575 if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1576 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1577 if (flags & IBT_MR_ENABLE_REMOTE_READ)
1578 mr->mr_accflag |= IBT_MR_REMOTE_READ;
1579 if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1580 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1581 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1582 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1583
1584 /*
1585 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed
1586 * from a certain number of "constrained" bits (the least significant
1587 * bits) and some number of "unconstrained" bits. The constrained
1588 * bits must be set to the index of the entry in the MPT table, but
1589 * the unconstrained bits can be set to any value we wish. Note:
1590 * if no remote access is required, then the RKey value is not filled
1591 * in. Otherwise both Rkey and LKey are given the same value.
1592 */
1593 if (mpt)
1594 mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
1595
1596 /*
1597 * Determine if the memory is from userland and pin the pages
1598 * with umem_lockmemory() if necessary.
1599 * Then, if this is userland memory, allocate an entry in the
1600 * "userland resources database". This will later be added to
1601 * the database (after all further memory registration operations are
1602 * successful). If we fail here, we must undo the reference counts
1603 * and the previous resource allocations.
1604 */
1605 mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0);
1606 if (mr_is_umem) {
1607 umem_len = ptob(btopr(bind->bi_len +
1608 ((uintptr_t)bind->bi_addr & PAGEOFFSET)));
1609 umem_addr = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET);
1610 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
1611 DDI_UMEMLOCK_LONGTERM);
1612 status = umem_lockmemory(umem_addr, umem_len, umem_flags,
1613 &umem_cookie, &hermon_umem_cbops, NULL);
1614 if (status != 0) {
1615 status = IBT_INSUFF_RESOURCE;
1616 goto mrcommon_fail3;
1617 }
1618
1619 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1620 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1621
1622 bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len,
1623 B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
1624 if (bind->bi_buf == NULL) {
1625 status = IBT_INSUFF_RESOURCE;
1626 goto mrcommon_fail3;
1627 }
1628 bind->bi_type = HERMON_BINDHDL_UBUF;
1629 bind->bi_buf->b_flags |= B_READ;
1630
1631 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1632 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1633
1634 umapdb = hermon_umap_db_alloc(state->hs_instance,
1635 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
1636 (uint64_t)(uintptr_t)rsrc);
1637 if (umapdb == NULL) {
1638 status = IBT_INSUFF_RESOURCE;
1639 goto mrcommon_fail4;
1640 }
1641 }
1642
1643 /*
1644 * Setup the bindinfo for the mtt bind call
1645 */
1646 bh = &mr->mr_bindinfo;
1647 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bh))
1648 bcopy(bind, bh, sizeof (hermon_bind_info_t));
1649 bh->bi_bypass = bind_type;
1650 status = hermon_mr_mtt_bind(state, bh, bind_dmahdl, &mtt,
1651 &mtt_pgsize_bits, mpt != NULL);
1652 if (status != DDI_SUCCESS) {
1653 /*
1654 * When mtt_bind fails, freerbuf has already been done,
1655 * so make sure not to call it again.
1656 */
1657 bind->bi_type = bh->bi_type;
1658 goto mrcommon_fail5;
1659 }
1660 mr->mr_logmttpgsz = mtt_pgsize_bits;
1661
1662 /*
1663 * Allocate MTT reference count (to track shared memory regions).
1664 * This reference count resource may never be used on the given
1665 * memory region, but if it is ever later registered as "shared"
1666 * memory region then this resource will be necessary. If we fail
1667 * here, we do pretty much the same as above to clean up.
1668 */
1669 status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep,
1670 &mtt_refcnt);
1671 if (status != DDI_SUCCESS) {
1672 status = IBT_INSUFF_RESOURCE;
1673 goto mrcommon_fail6;
1674 }
1675 mr->mr_mttrefcntp = mtt_refcnt;
1676 swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
1677 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp))
1678 HERMON_MTT_REFCNT_INIT(swrc_tmp);
1679
1680 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
1681
1682 /*
1683 * Fill in the MPT entry. This is the final step before passing
1684 * ownership of the MPT entry to the Hermon hardware. We use all of
1685 * the information collected/calculated above to fill in the
1686 * requisite portions of the MPT. Do this ONLY for DMPTs.
1687 */
1688 if (mpt == NULL)
1689 goto no_passown;
1690
1691 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
1692
1693 mpt_entry.status = HERMON_MPT_SW_OWNERSHIP;
1694 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0;
1695 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1696 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0;
1697 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0;
1698 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0;
1699 mpt_entry.lr = 1;
1700 mpt_entry.phys_addr = 0;
1701 mpt_entry.reg_win = HERMON_MPT_IS_REGION;
1702
1703 mpt_entry.entity_sz = mr->mr_logmttpgsz;
1704 mpt_entry.mem_key = mr->mr_lkey;
1705 mpt_entry.pd = pd->pd_pdnum;
1706 mpt_entry.rem_acc_en = 0;
1707 mpt_entry.fast_reg_en = 0;
1708 mpt_entry.en_inval = 0;
1709 mpt_entry.lkey = 0;
1710 mpt_entry.win_cnt = 0;
1711
1712 if (bind_override_addr == 0) {
1713 mpt_entry.start_addr = bh->bi_addr;
1714 } else {
1715 bh->bi_addr = bh->bi_addr & ((1 << mr->mr_logmttpgsz) - 1);
1716 mpt_entry.start_addr = bh->bi_addr;
1717 }
1718 mpt_entry.reg_win_len = bh->bi_len;
1719
1720 mpt_entry.mtt_addr_h = mtt_addr >> 32; /* only 8 more bits */
1721 mpt_entry.mtt_addr_l = mtt_addr >> 3; /* only 29 bits */
1722
1723 /*
1724 * Write the MPT entry to hardware. Lastly, we pass ownership of
1725 * the entry to the hardware if needed. Note: in general, this
1726 * operation shouldn't fail. But if it does, we have to undo
1727 * everything we've done above before returning error.
1728 *
1729 * For Hermon, this routine (which is common to the contexts) will only
1730 * set the ownership if needed - the process of passing the context
1731 * itself to HW will take care of setting up the MPT (based on type
1732 * and index).
1733 */
1734
1735 mpt_entry.bnd_qp = 0; /* dMPT for a qp, check for window */
1736 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1737 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
1738 if (status != HERMON_CMD_SUCCESS) {
1739 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
1740 status);
1741 if (status == HERMON_CMD_INVALID_STATUS) {
1742 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1743 }
1744 status = ibc_get_ci_failure(0);
1745 goto mrcommon_fail7;
1746 }
1747 if (hermon_rdma_debug & 0x4)
1748 IBTF_DPRINTF_L2("mr", " reg: mr %p key %x",
1749 mr, hermon_mr_key_swap(mr->mr_rkey));
1750 no_passown:
1751
1752 /*
1753 * Fill in the rest of the Hermon Memory Region handle. Having
1754 * successfully transferred ownership of the MPT, we can update the
1755 * following fields for use in further operations on the MR.
1756 */
1757 mr->mr_mttaddr = mtt_addr;
1758
1759 mr->mr_log2_pgsz = (mr->mr_logmttpgsz - HERMON_PAGESHIFT);
1760 mr->mr_mptrsrcp = mpt;
1761 mr->mr_mttrsrcp = mtt;
1762 mr->mr_pdhdl = pd;
1763 mr->mr_rsrcp = rsrc;
1764 mr->mr_is_umem = mr_is_umem;
1765 mr->mr_is_fmr = 0;
1766 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL;
1767 mr->mr_umem_cbfunc = NULL;
1768 mr->mr_umem_cbarg1 = NULL;
1769 mr->mr_umem_cbarg2 = NULL;
1770 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey);
1771 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey);
1772 mr->mr_mpt_type = mpt_type;
1773
1774 /*
1775 * If this is userland memory, then we need to insert the previously
1776 * allocated entry into the "userland resources database". This will
1777 * allow for later coordination between the hermon_umap_umemlock_cb()
1778 * callback and hermon_mr_deregister().
1779 */
1780 if (mr_is_umem) {
1781 hermon_umap_db_add(umapdb);
1782 }
1783
1784 *mrhdl = mr;
1785
1786 return (DDI_SUCCESS);
1787
1788 /*
1789 * The following is cleanup for all possible failure cases in this routine
1790 */
1791 mrcommon_fail7:
1792 hermon_rsrc_free(state, &mtt_refcnt);
1793 mrcommon_fail6:
1794 hermon_mr_mem_unbind(state, bh);
1795 bind->bi_type = bh->bi_type;
1796 mrcommon_fail5:
1797 if (mr_is_umem) {
1798 hermon_umap_db_free(umapdb);
1799 }
1800 mrcommon_fail4:
1801 if (mr_is_umem) {
1802 /*
1803 * Free up the memory ddi_umem_iosetup() allocates
1804 * internally.
1805 */
1806 if (bind->bi_type == HERMON_BINDHDL_UBUF) {
1807 freerbuf(bind->bi_buf);
1808 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1809 bind->bi_type = HERMON_BINDHDL_NONE;
1810 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1811 }
1812 ddi_umem_unlock(umem_cookie);
1813 }
1814 mrcommon_fail3:
1815 hermon_rsrc_free(state, &rsrc);
1816 mrcommon_fail2:
1817 if (mpt != NULL)
1818 hermon_rsrc_free(state, &mpt);
1819 mrcommon_fail1:
1820 hermon_pd_refcnt_dec(pd);
1821 mrcommon_fail:
1822 return (status);
1823 }
1824
1825 /*
1826 * hermon_dma_mr_register()
1827 * Context: Can be called from base context.
1828 */
1829 int
hermon_dma_mr_register(hermon_state_t * state,hermon_pdhdl_t pd,ibt_dmr_attr_t * mr_attr,hermon_mrhdl_t * mrhdl)1830 hermon_dma_mr_register(hermon_state_t *state, hermon_pdhdl_t pd,
1831 ibt_dmr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl)
1832 {
1833 hermon_rsrc_t *mpt, *rsrc;
1834 hermon_hw_dmpt_t mpt_entry;
1835 hermon_mrhdl_t mr;
1836 ibt_mr_flags_t flags;
1837 uint_t sleep;
1838 int status;
1839
1840 /* Extract the flags field */
1841 flags = mr_attr->dmr_flags;
1842
1843 /*
1844 * Check the sleep flag. Ensure that it is consistent with the
1845 * current thread context (i.e. if we are currently in the interrupt
1846 * context, then we shouldn't be attempting to sleep).
1847 */
1848 sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
1849 if ((sleep == HERMON_SLEEP) &&
1850 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1851 status = IBT_INVALID_PARAM;
1852 goto mrcommon_fail;
1853 }
1854
1855 /* Increment the reference count on the protection domain (PD) */
1856 hermon_pd_refcnt_inc(pd);
1857
1858 /*
1859 * Allocate an MPT entry. This will be filled in with all the
1860 * necessary parameters to define the memory region. And then
1861 * ownership will be passed to the hardware in the final step
1862 * below. If we fail here, we must undo the protection domain
1863 * reference count.
1864 */
1865 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
1866 if (status != DDI_SUCCESS) {
1867 status = IBT_INSUFF_RESOURCE;
1868 goto mrcommon_fail1;
1869 }
1870
1871 /*
1872 * Allocate the software structure for tracking the memory region (i.e.
1873 * the Hermon Memory Region handle). If we fail here, we must undo
1874 * the protection domain reference count and the previous resource
1875 * allocation.
1876 */
1877 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
1878 if (status != DDI_SUCCESS) {
1879 status = IBT_INSUFF_RESOURCE;
1880 goto mrcommon_fail2;
1881 }
1882 mr = (hermon_mrhdl_t)rsrc->hr_addr;
1883 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
1884 bzero(mr, sizeof (*mr));
1885
1886 /*
1887 * Setup and validate the memory region access flags. This means
1888 * translating the IBTF's enable flags into the access flags that
1889 * will be used in later operations.
1890 */
1891 mr->mr_accflag = 0;
1892 if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1893 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1894 if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1895 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1896 if (flags & IBT_MR_ENABLE_REMOTE_READ)
1897 mr->mr_accflag |= IBT_MR_REMOTE_READ;
1898 if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1899 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1900 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1901 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1902
1903 /*
1904 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed
1905 * from a certain number of "constrained" bits (the least significant
1906 * bits) and some number of "unconstrained" bits. The constrained
1907 * bits must be set to the index of the entry in the MPT table, but
1908 * the unconstrained bits can be set to any value we wish. Note:
1909 * if no remote access is required, then the RKey value is not filled
1910 * in. Otherwise both Rkey and LKey are given the same value.
1911 */
1912 if (mpt)
1913 mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
1914
1915 /*
1916 * Fill in the MPT entry. This is the final step before passing
1917 * ownership of the MPT entry to the Hermon hardware. We use all of
1918 * the information collected/calculated above to fill in the
1919 * requisite portions of the MPT. Do this ONLY for DMPTs.
1920 */
1921 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
1922
1923 mpt_entry.status = HERMON_MPT_SW_OWNERSHIP;
1924 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0;
1925 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1926 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0;
1927 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0;
1928 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0;
1929 mpt_entry.lr = 1;
1930 mpt_entry.phys_addr = 1; /* critical bit for this */
1931 mpt_entry.reg_win = HERMON_MPT_IS_REGION;
1932
1933 mpt_entry.entity_sz = mr->mr_logmttpgsz;
1934 mpt_entry.mem_key = mr->mr_lkey;
1935 mpt_entry.pd = pd->pd_pdnum;
1936 mpt_entry.rem_acc_en = 0;
1937 mpt_entry.fast_reg_en = 0;
1938 mpt_entry.en_inval = 0;
1939 mpt_entry.lkey = 0;
1940 mpt_entry.win_cnt = 0;
1941
1942 mpt_entry.start_addr = mr_attr->dmr_paddr;
1943 mpt_entry.reg_win_len = mr_attr->dmr_len;
1944 if (mr_attr->dmr_len == 0)
1945 mpt_entry.len_b64 = 1; /* needed for 2^^64 length */
1946
1947 mpt_entry.mtt_addr_h = 0;
1948 mpt_entry.mtt_addr_l = 0;
1949
1950 /*
1951 * Write the MPT entry to hardware. Lastly, we pass ownership of
1952 * the entry to the hardware if needed. Note: in general, this
1953 * operation shouldn't fail. But if it does, we have to undo
1954 * everything we've done above before returning error.
1955 *
1956 * For Hermon, this routine (which is common to the contexts) will only
1957 * set the ownership if needed - the process of passing the context
1958 * itself to HW will take care of setting up the MPT (based on type
1959 * and index).
1960 */
1961
1962 mpt_entry.bnd_qp = 0; /* dMPT for a qp, check for window */
1963 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1964 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
1965 if (status != HERMON_CMD_SUCCESS) {
1966 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
1967 status);
1968 if (status == HERMON_CMD_INVALID_STATUS) {
1969 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1970 }
1971 status = ibc_get_ci_failure(0);
1972 goto mrcommon_fail7;
1973 }
1974
1975 /*
1976 * Fill in the rest of the Hermon Memory Region handle. Having
1977 * successfully transferred ownership of the MPT, we can update the
1978 * following fields for use in further operations on the MR.
1979 */
1980 mr->mr_mttaddr = 0;
1981
1982 mr->mr_log2_pgsz = 0;
1983 mr->mr_mptrsrcp = mpt;
1984 mr->mr_mttrsrcp = NULL;
1985 mr->mr_pdhdl = pd;
1986 mr->mr_rsrcp = rsrc;
1987 mr->mr_is_umem = 0;
1988 mr->mr_is_fmr = 0;
1989 mr->mr_umemcookie = NULL;
1990 mr->mr_umem_cbfunc = NULL;
1991 mr->mr_umem_cbarg1 = NULL;
1992 mr->mr_umem_cbarg2 = NULL;
1993 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey);
1994 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey);
1995 mr->mr_mpt_type = HERMON_MPT_DMPT;
1996
1997 *mrhdl = mr;
1998
1999 return (DDI_SUCCESS);
2000
2001 /*
2002 * The following is cleanup for all possible failure cases in this routine
2003 */
2004 mrcommon_fail7:
2005 hermon_rsrc_free(state, &rsrc);
2006 mrcommon_fail2:
2007 hermon_rsrc_free(state, &mpt);
2008 mrcommon_fail1:
2009 hermon_pd_refcnt_dec(pd);
2010 mrcommon_fail:
2011 return (status);
2012 }
2013
2014 /*
2015 * hermon_mr_alloc_lkey()
2016 * Context: Can be called from base context.
2017 */
2018 int
hermon_mr_alloc_lkey(hermon_state_t * state,hermon_pdhdl_t pd,ibt_lkey_flags_t flags,uint_t nummtt,hermon_mrhdl_t * mrhdl)2019 hermon_mr_alloc_lkey(hermon_state_t *state, hermon_pdhdl_t pd,
2020 ibt_lkey_flags_t flags, uint_t nummtt, hermon_mrhdl_t *mrhdl)
2021 {
2022 hermon_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt;
2023 hermon_sw_refcnt_t *swrc_tmp;
2024 hermon_hw_dmpt_t mpt_entry;
2025 hermon_mrhdl_t mr;
2026 uint64_t mtt_addr;
2027 uint_t sleep;
2028 int status;
2029
2030 /* Increment the reference count on the protection domain (PD) */
2031 hermon_pd_refcnt_inc(pd);
2032
2033 sleep = (flags & IBT_KEY_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
2034
2035 /*
2036 * Allocate an MPT entry. This will be filled in with "some" of the
2037 * necessary parameters to define the memory region. And then
2038 * ownership will be passed to the hardware in the final step
2039 * below. If we fail here, we must undo the protection domain
2040 * reference count.
2041 *
2042 * The MTTs will get filled in when the FRWR is processed.
2043 */
2044 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
2045 if (status != DDI_SUCCESS) {
2046 status = IBT_INSUFF_RESOURCE;
2047 goto alloclkey_fail1;
2048 }
2049
2050 /*
2051 * Allocate the software structure for tracking the memory region (i.e.
2052 * the Hermon Memory Region handle). If we fail here, we must undo
2053 * the protection domain reference count and the previous resource
2054 * allocation.
2055 */
2056 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
2057 if (status != DDI_SUCCESS) {
2058 status = IBT_INSUFF_RESOURCE;
2059 goto alloclkey_fail2;
2060 }
2061 mr = (hermon_mrhdl_t)rsrc->hr_addr;
2062 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
2063 bzero(mr, sizeof (*mr));
2064 mr->mr_bindinfo.bi_type = HERMON_BINDHDL_LKEY;
2065
2066 mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
2067
2068 status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt);
2069 if (status != DDI_SUCCESS) {
2070 status = IBT_INSUFF_RESOURCE;
2071 goto alloclkey_fail3;
2072 }
2073 mr->mr_logmttpgsz = PAGESHIFT;
2074
2075 /*
2076 * Allocate MTT reference count (to track shared memory regions).
2077 * This reference count resource may never be used on the given
2078 * memory region, but if it is ever later registered as "shared"
2079 * memory region then this resource will be necessary. If we fail
2080 * here, we do pretty much the same as above to clean up.
2081 */
2082 status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep,
2083 &mtt_refcnt);
2084 if (status != DDI_SUCCESS) {
2085 status = IBT_INSUFF_RESOURCE;
2086 goto alloclkey_fail4;
2087 }
2088 mr->mr_mttrefcntp = mtt_refcnt;
2089 swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
2090 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp))
2091 HERMON_MTT_REFCNT_INIT(swrc_tmp);
2092
2093 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
2094
2095 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
2096 mpt_entry.status = HERMON_MPT_FREE;
2097 mpt_entry.lw = 1;
2098 mpt_entry.lr = 1;
2099 mpt_entry.reg_win = HERMON_MPT_IS_REGION;
2100 mpt_entry.entity_sz = mr->mr_logmttpgsz;
2101 mpt_entry.mem_key = mr->mr_lkey;
2102 mpt_entry.pd = pd->pd_pdnum;
2103 mpt_entry.fast_reg_en = 1;
2104 mpt_entry.rem_acc_en = 1;
2105 mpt_entry.en_inval = 1;
2106 if (flags & IBT_KEY_REMOTE) {
2107 mpt_entry.ren_inval = 1;
2108 }
2109 mpt_entry.mtt_size = nummtt;
2110 mpt_entry.mtt_addr_h = mtt_addr >> 32; /* only 8 more bits */
2111 mpt_entry.mtt_addr_l = mtt_addr >> 3; /* only 29 bits */
2112
2113 /*
2114 * Write the MPT entry to hardware. Lastly, we pass ownership of
2115 * the entry to the hardware if needed. Note: in general, this
2116 * operation shouldn't fail. But if it does, we have to undo
2117 * everything we've done above before returning error.
2118 *
2119 * For Hermon, this routine (which is common to the contexts) will only
2120 * set the ownership if needed - the process of passing the context
2121 * itself to HW will take care of setting up the MPT (based on type
2122 * and index).
2123 */
2124 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2125 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
2126 if (status != HERMON_CMD_SUCCESS) {
2127 cmn_err(CE_CONT, "Hermon: alloc_lkey: SW2HW_MPT command "
2128 "failed: %08x\n", status);
2129 if (status == HERMON_CMD_INVALID_STATUS) {
2130 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2131 }
2132 status = ibc_get_ci_failure(0);
2133 goto alloclkey_fail5;
2134 }
2135
2136 /*
2137 * Fill in the rest of the Hermon Memory Region handle. Having
2138 * successfully transferred ownership of the MPT, we can update the
2139 * following fields for use in further operations on the MR.
2140 */
2141 mr->mr_accflag = IBT_MR_LOCAL_WRITE;
2142 mr->mr_mttaddr = mtt_addr;
2143 mr->mr_log2_pgsz = (mr->mr_logmttpgsz - HERMON_PAGESHIFT);
2144 mr->mr_mptrsrcp = mpt;
2145 mr->mr_mttrsrcp = mtt;
2146 mr->mr_pdhdl = pd;
2147 mr->mr_rsrcp = rsrc;
2148 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey);
2149 mr->mr_rkey = mr->mr_lkey;
2150 mr->mr_mpt_type = HERMON_MPT_DMPT;
2151
2152 *mrhdl = mr;
2153 return (DDI_SUCCESS);
2154
2155 alloclkey_fail5:
2156 hermon_rsrc_free(state, &mtt_refcnt);
2157 alloclkey_fail4:
2158 hermon_rsrc_free(state, &mtt);
2159 alloclkey_fail3:
2160 hermon_rsrc_free(state, &rsrc);
2161 alloclkey_fail2:
2162 hermon_rsrc_free(state, &mpt);
2163 alloclkey_fail1:
2164 hermon_pd_refcnt_dec(pd);
2165 return (status);
2166 }
2167
2168 /*
2169 * hermon_mr_fexch_mpt_init()
2170 * Context: Can be called from base context.
2171 *
2172 * This is the same as alloc_lkey, but not returning an mrhdl.
2173 */
2174 int
hermon_mr_fexch_mpt_init(hermon_state_t * state,hermon_pdhdl_t pd,uint32_t mpt_indx,uint_t nummtt,uint64_t mtt_addr,uint_t sleep)2175 hermon_mr_fexch_mpt_init(hermon_state_t *state, hermon_pdhdl_t pd,
2176 uint32_t mpt_indx, uint_t nummtt, uint64_t mtt_addr, uint_t sleep)
2177 {
2178 hermon_hw_dmpt_t mpt_entry;
2179 int status;
2180
2181 /*
2182 * The MTTs will get filled in when the FRWR is processed.
2183 */
2184
2185 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
2186 mpt_entry.status = HERMON_MPT_FREE;
2187 mpt_entry.lw = 1;
2188 mpt_entry.lr = 1;
2189 mpt_entry.rw = 1;
2190 mpt_entry.rr = 1;
2191 mpt_entry.reg_win = HERMON_MPT_IS_REGION;
2192 mpt_entry.entity_sz = PAGESHIFT;
2193 mpt_entry.mem_key = mpt_indx;
2194 mpt_entry.pd = pd->pd_pdnum;
2195 mpt_entry.fast_reg_en = 1;
2196 mpt_entry.rem_acc_en = 1;
2197 mpt_entry.en_inval = 1;
2198 mpt_entry.ren_inval = 1;
2199 mpt_entry.mtt_size = nummtt;
2200 mpt_entry.mtt_addr_h = mtt_addr >> 32; /* only 8 more bits */
2201 mpt_entry.mtt_addr_l = mtt_addr >> 3; /* only 29 bits */
2202
2203 /*
2204 * Write the MPT entry to hardware. Lastly, we pass ownership of
2205 * the entry to the hardware if needed. Note: in general, this
2206 * operation shouldn't fail. But if it does, we have to undo
2207 * everything we've done above before returning error.
2208 *
2209 * For Hermon, this routine (which is common to the contexts) will only
2210 * set the ownership if needed - the process of passing the context
2211 * itself to HW will take care of setting up the MPT (based on type
2212 * and index).
2213 */
2214 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2215 sizeof (hermon_hw_dmpt_t), mpt_indx, sleep);
2216 if (status != HERMON_CMD_SUCCESS) {
2217 cmn_err(CE_CONT, "Hermon: fexch_mpt_init: SW2HW_MPT command "
2218 "failed: %08x\n", status);
2219 if (status == HERMON_CMD_INVALID_STATUS) {
2220 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2221 }
2222 status = ibc_get_ci_failure(0);
2223 return (status);
2224 }
2225 /* Increment the reference count on the protection domain (PD) */
2226 hermon_pd_refcnt_inc(pd);
2227
2228 return (DDI_SUCCESS);
2229 }
2230
2231 /*
2232 * hermon_mr_fexch_mpt_fini()
2233 * Context: Can be called from base context.
2234 *
2235 * This is the same as deregister_mr, without an mrhdl.
2236 */
2237 int
hermon_mr_fexch_mpt_fini(hermon_state_t * state,hermon_pdhdl_t pd,uint32_t mpt_indx,uint_t sleep)2238 hermon_mr_fexch_mpt_fini(hermon_state_t *state, hermon_pdhdl_t pd,
2239 uint32_t mpt_indx, uint_t sleep)
2240 {
2241 int status;
2242
2243 status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT,
2244 NULL, 0, mpt_indx, sleep);
2245 if (status != DDI_SUCCESS) {
2246 cmn_err(CE_CONT, "Hermon: fexch_mpt_fini: HW2SW_MPT command "
2247 "failed: %08x\n", status);
2248 if (status == HERMON_CMD_INVALID_STATUS) {
2249 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2250 }
2251 status = ibc_get_ci_failure(0);
2252 return (status);
2253 }
2254
2255 /* Decrement the reference count on the protection domain (PD) */
2256 hermon_pd_refcnt_dec(pd);
2257
2258 return (DDI_SUCCESS);
2259 }
2260
2261 /*
2262 * hermon_mr_mtt_bind()
2263 * Context: Can be called from interrupt or base context.
2264 */
2265 int
hermon_mr_mtt_bind(hermon_state_t * state,hermon_bind_info_t * bind,ddi_dma_handle_t bind_dmahdl,hermon_rsrc_t ** mtt,uint_t * mtt_pgsize_bits,uint_t is_buffer)2266 hermon_mr_mtt_bind(hermon_state_t *state, hermon_bind_info_t *bind,
2267 ddi_dma_handle_t bind_dmahdl, hermon_rsrc_t **mtt, uint_t *mtt_pgsize_bits,
2268 uint_t is_buffer)
2269 {
2270 uint64_t nummtt;
2271 uint_t sleep;
2272 int status;
2273
2274 /*
2275 * Check the sleep flag. Ensure that it is consistent with the
2276 * current thread context (i.e. if we are currently in the interrupt
2277 * context, then we shouldn't be attempting to sleep).
2278 */
2279 sleep = (bind->bi_flags & IBT_MR_NOSLEEP) ?
2280 HERMON_NOSLEEP : HERMON_SLEEP;
2281 if ((sleep == HERMON_SLEEP) &&
2282 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
2283 status = IBT_INVALID_PARAM;
2284 goto mrmttbind_fail;
2285 }
2286
2287 /*
2288 * Bind the memory and determine the mapped addresses. This is
2289 * the first of two routines that do all the "heavy lifting" for
2290 * the Hermon memory registration routines. The hermon_mr_mem_bind()
2291 * routine takes the "bind" struct with all its fields filled
2292 * in and returns a list of DMA cookies (for the PCI mapped addresses
2293 * corresponding to the specified address region) which are used by
2294 * the hermon_mr_fast_mtt_write() routine below. If we fail here, we
2295 * must undo all the previous resource allocation (and PD reference
2296 * count).
2297 */
2298 status = hermon_mr_mem_bind(state, bind, bind_dmahdl, sleep, is_buffer);
2299 if (status != DDI_SUCCESS) {
2300 status = IBT_INSUFF_RESOURCE;
2301 goto mrmttbind_fail;
2302 }
2303
2304 /*
2305 * Determine number of pages spanned. This routine uses the
2306 * information in the "bind" struct to determine the required
2307 * number of MTT entries needed (and returns the suggested page size -
2308 * as a "power-of-2" - for each MTT entry).
2309 */
2310 nummtt = hermon_mr_nummtt_needed(state, bind, mtt_pgsize_bits);
2311
2312 /*
2313 * Allocate the MTT entries. Use the calculations performed above to
2314 * allocate the required number of MTT entries. If we fail here, we
2315 * must not only undo all the previous resource allocation (and PD
2316 * reference count), but we must also unbind the memory.
2317 */
2318 status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, mtt);
2319 if (status != DDI_SUCCESS) {
2320 status = IBT_INSUFF_RESOURCE;
2321 goto mrmttbind_fail2;
2322 }
2323
2324 /*
2325 * Write the mapped addresses into the MTT entries. This is part two
2326 * of the "heavy lifting" routines that we talked about above. Note:
2327 * we pass the suggested page size from the earlier operation here.
2328 * And if we fail here, we again do pretty much the same huge clean up.
2329 */
2330 status = hermon_mr_fast_mtt_write(state, *mtt, bind, *mtt_pgsize_bits);
2331 if (status != DDI_SUCCESS) {
2332 /*
2333 * hermon_mr_fast_mtt_write() returns DDI_FAILURE
2334 * only if it detects a HW error during DMA.
2335 */
2336 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2337 status = ibc_get_ci_failure(0);
2338 goto mrmttbind_fail3;
2339 }
2340 return (DDI_SUCCESS);
2341
2342 /*
2343 * The following is cleanup for all possible failure cases in this routine
2344 */
2345 mrmttbind_fail3:
2346 hermon_rsrc_free(state, mtt);
2347 mrmttbind_fail2:
2348 hermon_mr_mem_unbind(state, bind);
2349 mrmttbind_fail:
2350 return (status);
2351 }
2352
2353
2354 /*
2355 * hermon_mr_mtt_unbind()
2356 * Context: Can be called from interrupt or base context.
2357 */
2358 int
hermon_mr_mtt_unbind(hermon_state_t * state,hermon_bind_info_t * bind,hermon_rsrc_t * mtt)2359 hermon_mr_mtt_unbind(hermon_state_t *state, hermon_bind_info_t *bind,
2360 hermon_rsrc_t *mtt)
2361 {
2362 /*
2363 * Free up the MTT entries and unbind the memory. Here, as above, we
2364 * attempt to free these resources only if it is appropriate to do so.
2365 */
2366 hermon_mr_mem_unbind(state, bind);
2367 hermon_rsrc_free(state, &mtt);
2368
2369 return (DDI_SUCCESS);
2370 }
2371
2372
2373 /*
2374 * hermon_mr_common_rereg()
2375 * Context: Can be called from interrupt or base context.
2376 */
2377 static int
hermon_mr_common_rereg(hermon_state_t * state,hermon_mrhdl_t mr,hermon_pdhdl_t pd,hermon_bind_info_t * bind,hermon_mrhdl_t * mrhdl_new,hermon_mr_options_t * op)2378 hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr,
2379 hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new,
2380 hermon_mr_options_t *op)
2381 {
2382 hermon_rsrc_t *mpt;
2383 ibt_mr_attr_flags_t acc_flags_to_use;
2384 ibt_mr_flags_t flags;
2385 hermon_pdhdl_t pd_to_use;
2386 hermon_hw_dmpt_t mpt_entry;
2387 uint64_t mtt_addr_to_use, vaddr_to_use, len_to_use;
2388 uint_t sleep, dereg_level;
2389 int status;
2390
2391 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
2392
2393 /*
2394 * Check here to see if the memory region corresponds to a userland
2395 * mapping. Reregistration of userland memory regions is not
2396 * currently supported. Return failure.
2397 */
2398 if (mr->mr_is_umem) {
2399 status = IBT_MR_HDL_INVALID;
2400 goto mrrereg_fail;
2401 }
2402
2403 mutex_enter(&mr->mr_lock);
2404
2405 /* Pull MPT resource pointer from the Hermon Memory Region handle */
2406 mpt = mr->mr_mptrsrcp;
2407
2408 /* Extract the flags field from the hermon_bind_info_t */
2409 flags = bind->bi_flags;
2410
2411 /*
2412 * Check the sleep flag. Ensure that it is consistent with the
2413 * current thread context (i.e. if we are currently in the interrupt
2414 * context, then we shouldn't be attempting to sleep).
2415 */
2416 sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
2417 if ((sleep == HERMON_SLEEP) &&
2418 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
2419 mutex_exit(&mr->mr_lock);
2420 status = IBT_INVALID_PARAM;
2421 goto mrrereg_fail;
2422 }
2423
2424 /*
2425 * First step is to temporarily invalidate the MPT entry. This
2426 * regains ownership from the hardware, and gives us the opportunity
2427 * to modify the entry. Note: The HW2SW_MPT command returns the
2428 * current MPT entry contents. These are saved away here because
2429 * they will be reused in a later step below. If the region has
2430 * bound memory windows that we fail returning an "in use" error code.
2431 * Otherwise, this is an unexpected error and we deregister the
2432 * memory region and return error.
2433 *
2434 * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect
2435 * against holding the lock around this rereg call in all contexts.
2436 */
2437 status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, &mpt_entry,
2438 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN);
2439 if (status != HERMON_CMD_SUCCESS) {
2440 mutex_exit(&mr->mr_lock);
2441 if (status == HERMON_CMD_REG_BOUND) {
2442 return (IBT_MR_IN_USE);
2443 } else {
2444 cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: "
2445 "%08x\n", status);
2446 if (status == HERMON_CMD_INVALID_STATUS) {
2447 hermon_fm_ereport(state, HCA_SYS_ERR,
2448 HCA_ERR_SRV_LOST);
2449 }
2450 /*
2451 * Call deregister and ensure that all current
2452 * resources get freed up
2453 */
2454 if (hermon_mr_deregister(state, &mr,
2455 HERMON_MR_DEREG_ALL, sleep) != DDI_SUCCESS) {
2456 HERMON_WARNING(state, "failed to deregister "
2457 "memory region");
2458 }
2459 return (ibc_get_ci_failure(0));
2460 }
2461 }
2462
2463 /*
2464 * If we're changing the protection domain, then validate the new one
2465 */
2466 if (flags & IBT_MR_CHANGE_PD) {
2467
2468 /* Check for valid PD handle pointer */
2469 if (pd == NULL) {
2470 mutex_exit(&mr->mr_lock);
2471 /*
2472 * Call deregister and ensure that all current
2473 * resources get properly freed up. Unnecessary
2474 * here to attempt to regain software ownership
2475 * of the MPT entry as that has already been
2476 * done above.
2477 */
2478 if (hermon_mr_deregister(state, &mr,
2479 HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) !=
2480 DDI_SUCCESS) {
2481 HERMON_WARNING(state, "failed to deregister "
2482 "memory region");
2483 }
2484 status = IBT_PD_HDL_INVALID;
2485 goto mrrereg_fail;
2486 }
2487
2488 /* Use the new PD handle in all operations below */
2489 pd_to_use = pd;
2490
2491 } else {
2492 /* Use the current PD handle in all operations below */
2493 pd_to_use = mr->mr_pdhdl;
2494 }
2495
2496 /*
2497 * If we're changing access permissions, then validate the new ones
2498 */
2499 if (flags & IBT_MR_CHANGE_ACCESS) {
2500 /*
2501 * Validate the access flags. Both remote write and remote
2502 * atomic require the local write flag to be set
2503 */
2504 if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) ||
2505 (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) &&
2506 !(flags & IBT_MR_ENABLE_LOCAL_WRITE)) {
2507 mutex_exit(&mr->mr_lock);
2508 /*
2509 * Call deregister and ensure that all current
2510 * resources get properly freed up. Unnecessary
2511 * here to attempt to regain software ownership
2512 * of the MPT entry as that has already been
2513 * done above.
2514 */
2515 if (hermon_mr_deregister(state, &mr,
2516 HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) !=
2517 DDI_SUCCESS) {
2518 HERMON_WARNING(state, "failed to deregister "
2519 "memory region");
2520 }
2521 status = IBT_MR_ACCESS_REQ_INVALID;
2522 goto mrrereg_fail;
2523 }
2524
2525 /*
2526 * Setup and validate the memory region access flags. This
2527 * means translating the IBTF's enable flags into the access
2528 * flags that will be used in later operations.
2529 */
2530 acc_flags_to_use = 0;
2531 if (flags & IBT_MR_ENABLE_WINDOW_BIND)
2532 acc_flags_to_use |= IBT_MR_WINDOW_BIND;
2533 if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
2534 acc_flags_to_use |= IBT_MR_LOCAL_WRITE;
2535 if (flags & IBT_MR_ENABLE_REMOTE_READ)
2536 acc_flags_to_use |= IBT_MR_REMOTE_READ;
2537 if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
2538 acc_flags_to_use |= IBT_MR_REMOTE_WRITE;
2539 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
2540 acc_flags_to_use |= IBT_MR_REMOTE_ATOMIC;
2541
2542 } else {
2543 acc_flags_to_use = mr->mr_accflag;
2544 }
2545
2546 /*
2547 * If we're modifying the translation, then figure out whether
2548 * we can reuse the current MTT resources. This means calling
2549 * hermon_mr_rereg_xlat_helper() which does most of the heavy lifting
2550 * for the reregistration. If the current memory region contains
2551 * sufficient MTT entries for the new regions, then it will be
2552 * reused and filled in. Otherwise, new entries will be allocated,
2553 * the old ones will be freed, and the new entries will be filled
2554 * in. Note: If we're not modifying the translation, then we
2555 * should already have all the information we need to update the MPT.
2556 * Also note: If hermon_mr_rereg_xlat_helper() fails, it will return
2557 * a "dereg_level" which is the level of cleanup that needs to be
2558 * passed to hermon_mr_deregister() to finish the cleanup.
2559 */
2560 if (flags & IBT_MR_CHANGE_TRANSLATION) {
2561 status = hermon_mr_rereg_xlat_helper(state, mr, bind, op,
2562 &mtt_addr_to_use, sleep, &dereg_level);
2563 if (status != DDI_SUCCESS) {
2564 mutex_exit(&mr->mr_lock);
2565 /*
2566 * Call deregister and ensure that all resources get
2567 * properly freed up.
2568 */
2569 if (hermon_mr_deregister(state, &mr, dereg_level,
2570 sleep) != DDI_SUCCESS) {
2571 HERMON_WARNING(state, "failed to deregister "
2572 "memory region");
2573 }
2574 goto mrrereg_fail;
2575 }
2576 vaddr_to_use = mr->mr_bindinfo.bi_addr;
2577 len_to_use = mr->mr_bindinfo.bi_len;
2578 } else {
2579 mtt_addr_to_use = mr->mr_mttaddr;
2580 vaddr_to_use = mr->mr_bindinfo.bi_addr;
2581 len_to_use = mr->mr_bindinfo.bi_len;
2582 }
2583
2584 /*
2585 * Calculate new keys (Lkey, Rkey) from MPT index. Just like they were
2586 * when the region was first registered, each key is formed from
2587 * "constrained" bits and "unconstrained" bits. Note: If no remote
2588 * access is required, then the RKey value is not filled in. Otherwise
2589 * both Rkey and LKey are given the same value.
2590 */
2591 mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
2592 if ((acc_flags_to_use & IBT_MR_REMOTE_READ) ||
2593 (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ||
2594 (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC)) {
2595 mr->mr_rkey = mr->mr_lkey;
2596 } else
2597 mr->mr_rkey = 0;
2598
2599 /*
2600 * Fill in the MPT entry. This is the final step before passing
2601 * ownership of the MPT entry to the Hermon hardware. We use all of
2602 * the information collected/calculated above to fill in the
2603 * requisite portions of the MPT.
2604 */
2605 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
2606
2607 mpt_entry.status = HERMON_MPT_SW_OWNERSHIP;
2608 mpt_entry.en_bind = (acc_flags_to_use & IBT_MR_WINDOW_BIND) ? 1 : 0;
2609 mpt_entry.atomic = (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
2610 mpt_entry.rw = (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ? 1 : 0;
2611 mpt_entry.rr = (acc_flags_to_use & IBT_MR_REMOTE_READ) ? 1 : 0;
2612 mpt_entry.lw = (acc_flags_to_use & IBT_MR_LOCAL_WRITE) ? 1 : 0;
2613 mpt_entry.lr = 1;
2614 mpt_entry.phys_addr = 0;
2615 mpt_entry.reg_win = HERMON_MPT_IS_REGION;
2616
2617 mpt_entry.entity_sz = mr->mr_logmttpgsz;
2618 mpt_entry.mem_key = mr->mr_lkey;
2619 mpt_entry.pd = pd_to_use->pd_pdnum;
2620
2621 mpt_entry.start_addr = vaddr_to_use;
2622 mpt_entry.reg_win_len = len_to_use;
2623 mpt_entry.mtt_addr_h = mtt_addr_to_use >> 32;
2624 mpt_entry.mtt_addr_l = mtt_addr_to_use >> 3;
2625
2626 /*
2627 * Write the updated MPT entry to hardware
2628 *
2629 * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect
2630 * against holding the lock around this rereg call in all contexts.
2631 */
2632 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2633 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN);
2634 if (status != HERMON_CMD_SUCCESS) {
2635 mutex_exit(&mr->mr_lock);
2636 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
2637 status);
2638 if (status == HERMON_CMD_INVALID_STATUS) {
2639 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2640 }
2641 /*
2642 * Call deregister and ensure that all current resources get
2643 * properly freed up. Unnecessary here to attempt to regain
2644 * software ownership of the MPT entry as that has already
2645 * been done above.
2646 */
2647 if (hermon_mr_deregister(state, &mr,
2648 HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) {
2649 HERMON_WARNING(state, "failed to deregister memory "
2650 "region");
2651 }
2652 return (ibc_get_ci_failure(0));
2653 }
2654
2655 /*
2656 * If we're changing PD, then update their reference counts now.
2657 * This means decrementing the reference count on the old PD and
2658 * incrementing the reference count on the new PD.
2659 */
2660 if (flags & IBT_MR_CHANGE_PD) {
2661 hermon_pd_refcnt_dec(mr->mr_pdhdl);
2662 hermon_pd_refcnt_inc(pd);
2663 }
2664
2665 /*
2666 * Update the contents of the Hermon Memory Region handle to reflect
2667 * what has been changed.
2668 */
2669 mr->mr_pdhdl = pd_to_use;
2670 mr->mr_accflag = acc_flags_to_use;
2671 mr->mr_is_umem = 0;
2672 mr->mr_is_fmr = 0;
2673 mr->mr_umemcookie = NULL;
2674 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey);
2675 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey);
2676
2677 /* New MR handle is same as the old */
2678 *mrhdl_new = mr;
2679 mutex_exit(&mr->mr_lock);
2680
2681 return (DDI_SUCCESS);
2682
2683 mrrereg_fail:
2684 return (status);
2685 }
2686
2687
2688 /*
2689 * hermon_mr_rereg_xlat_helper
2690 * Context: Can be called from interrupt or base context.
2691 * Note: This routine expects the "mr_lock" to be held when it
2692 * is called. Upon returning failure, this routine passes information
2693 * about what "dereg_level" should be passed to hermon_mr_deregister().
2694 */
2695 static int
hermon_mr_rereg_xlat_helper(hermon_state_t * state,hermon_mrhdl_t mr,hermon_bind_info_t * bind,hermon_mr_options_t * op,uint64_t * mtt_addr,uint_t sleep,uint_t * dereg_level)2696 hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr,
2697 hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr,
2698 uint_t sleep, uint_t *dereg_level)
2699 {
2700 hermon_rsrc_t *mtt, *mtt_refcnt;
2701 hermon_sw_refcnt_t *swrc_old, *swrc_new;
2702 ddi_dma_handle_t dmahdl;
2703 uint64_t nummtt_needed, nummtt_in_currrsrc, max_sz;
2704 uint_t mtt_pgsize_bits, bind_type, reuse_dmahdl;
2705 int status;
2706
2707 ASSERT(MUTEX_HELD(&mr->mr_lock));
2708
2709 /*
2710 * Check the "options" flag. Currently this flag tells the driver
2711 * whether or not the region should be bound normally (i.e. with
2712 * entries written into the PCI IOMMU) or whether it should be
2713 * registered to bypass the IOMMU.
2714 */
2715 if (op == NULL) {
2716 bind_type = HERMON_BINDMEM_NORMAL;
2717 } else {
2718 bind_type = op->mro_bind_type;
2719 }
2720
2721 /*
2722 * Check for invalid length. Check is the length is zero or if the
2723 * length is larger than the maximum configured value. Return error
2724 * if it is.
2725 */
2726 max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz);
2727 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
2728 /*
2729 * Deregister will be called upon returning failure from this
2730 * routine. This will ensure that all current resources get
2731 * properly freed up. Unnecessary to attempt to regain
2732 * software ownership of the MPT entry as that has already
2733 * been done above (in hermon_mr_reregister())
2734 */
2735 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT;
2736
2737 status = IBT_MR_LEN_INVALID;
2738 goto mrrereghelp_fail;
2739 }
2740
2741 /*
2742 * Determine the number of pages necessary for new region and the
2743 * number of pages supported by the current MTT resources
2744 */
2745 nummtt_needed = hermon_mr_nummtt_needed(state, bind, &mtt_pgsize_bits);
2746 nummtt_in_currrsrc = mr->mr_mttrsrcp->hr_len >> HERMON_MTT_SIZE_SHIFT;
2747
2748 /*
2749 * Depending on whether we have enough pages or not, the next step is
2750 * to fill in a set of MTT entries that reflect the new mapping. In
2751 * the first case below, we already have enough entries. This means
2752 * we need to unbind the memory from the previous mapping, bind the
2753 * memory for the new mapping, write the new MTT entries, and update
2754 * the mr to reflect the changes.
2755 * In the second case below, we do not have enough entries in the
2756 * current mapping. So, in this case, we need not only to unbind the
2757 * current mapping, but we need to free up the MTT resources associated
2758 * with that mapping. After we've successfully done that, we continue
2759 * by binding the new memory, allocating new MTT entries, writing the
2760 * new MTT entries, and updating the mr to reflect the changes.
2761 */
2762
2763 /*
2764 * If this region is being shared (i.e. MTT refcount != 1), then we
2765 * can't reuse the current MTT resources regardless of their size.
2766 * Instead we'll need to alloc new ones (below) just as if there
2767 * hadn't been enough room in the current entries.
2768 */
2769 swrc_old = (hermon_sw_refcnt_t *)mr->mr_mttrefcntp->hr_addr;
2770 if (HERMON_MTT_IS_NOT_SHARED(swrc_old) &&
2771 (nummtt_needed <= nummtt_in_currrsrc)) {
2772
2773 /*
2774 * Unbind the old mapping for this memory region, but retain
2775 * the ddi_dma_handle_t (if possible) for reuse in the bind
2776 * operation below. Note: If original memory region was
2777 * bound for IOMMU bypass and the new region can not use
2778 * bypass, then a new DMA handle will be necessary.
2779 */
2780 if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
2781 mr->mr_bindinfo.bi_free_dmahdl = 0;
2782 hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2783 dmahdl = mr->mr_bindinfo.bi_dmahdl;
2784 reuse_dmahdl = 1;
2785 } else {
2786 hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2787 dmahdl = NULL;
2788 reuse_dmahdl = 0;
2789 }
2790
2791 /*
2792 * Bind the new memory and determine the mapped addresses.
2793 * As described, this routine and hermon_mr_fast_mtt_write()
2794 * do the majority of the work for the memory registration
2795 * operations. Note: When we successfully finish the binding,
2796 * we will set the "bi_free_dmahdl" flag to indicate that
2797 * even though we may have reused the ddi_dma_handle_t we do
2798 * wish it to be freed up at some later time. Note also that
2799 * if we fail, we may need to cleanup the ddi_dma_handle_t.
2800 */
2801 bind->bi_bypass = bind_type;
2802 status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1);
2803 if (status != DDI_SUCCESS) {
2804 if (reuse_dmahdl) {
2805 ddi_dma_free_handle(&dmahdl);
2806 }
2807
2808 /*
2809 * Deregister will be called upon returning failure
2810 * from this routine. This will ensure that all
2811 * current resources get properly freed up.
2812 * Unnecessary to attempt to regain software ownership
2813 * of the MPT entry as that has already been done
2814 * above (in hermon_mr_reregister()). Also unnecessary
2815 * to attempt to unbind the memory.
2816 */
2817 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2818
2819 status = IBT_INSUFF_RESOURCE;
2820 goto mrrereghelp_fail;
2821 }
2822 if (reuse_dmahdl) {
2823 bind->bi_free_dmahdl = 1;
2824 }
2825
2826 /*
2827 * Using the new mapping, but reusing the current MTT
2828 * resources, write the updated entries to MTT
2829 */
2830 mtt = mr->mr_mttrsrcp;
2831 status = hermon_mr_fast_mtt_write(state, mtt, bind,
2832 mtt_pgsize_bits);
2833 if (status != DDI_SUCCESS) {
2834 /*
2835 * Deregister will be called upon returning failure
2836 * from this routine. This will ensure that all
2837 * current resources get properly freed up.
2838 * Unnecessary to attempt to regain software ownership
2839 * of the MPT entry as that has already been done
2840 * above (in hermon_mr_reregister()). Also unnecessary
2841 * to attempt to unbind the memory.
2842 *
2843 * But we do need to unbind the newly bound memory
2844 * before returning.
2845 */
2846 hermon_mr_mem_unbind(state, bind);
2847 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2848
2849 /*
2850 * hermon_mr_fast_mtt_write() returns DDI_FAILURE
2851 * only if it detects a HW error during DMA.
2852 */
2853 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2854 status = ibc_get_ci_failure(0);
2855 goto mrrereghelp_fail;
2856 }
2857
2858 /* Put the updated information into the Mem Region handle */
2859 mr->mr_bindinfo = *bind;
2860 mr->mr_logmttpgsz = mtt_pgsize_bits;
2861
2862 } else {
2863 /*
2864 * Check if the memory region MTT is shared by any other MRs.
2865 * Since the resource may be shared between multiple memory
2866 * regions (as a result of a "RegisterSharedMR()" verb) it is
2867 * important that we not unbind any resources prematurely.
2868 */
2869 if (!HERMON_MTT_IS_SHARED(swrc_old)) {
2870 /*
2871 * Unbind the old mapping for this memory region, but
2872 * retain the ddi_dma_handle_t for reuse in the bind
2873 * operation below. Note: This can only be done here
2874 * because the region being reregistered is not
2875 * currently shared. Also if original memory region
2876 * was bound for IOMMU bypass and the new region can
2877 * not use bypass, then a new DMA handle will be
2878 * necessary.
2879 */
2880 if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
2881 mr->mr_bindinfo.bi_free_dmahdl = 0;
2882 hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2883 dmahdl = mr->mr_bindinfo.bi_dmahdl;
2884 reuse_dmahdl = 1;
2885 } else {
2886 hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2887 dmahdl = NULL;
2888 reuse_dmahdl = 0;
2889 }
2890 } else {
2891 dmahdl = NULL;
2892 reuse_dmahdl = 0;
2893 }
2894
2895 /*
2896 * Bind the new memory and determine the mapped addresses.
2897 * As described, this routine and hermon_mr_fast_mtt_write()
2898 * do the majority of the work for the memory registration
2899 * operations. Note: When we successfully finish the binding,
2900 * we will set the "bi_free_dmahdl" flag to indicate that
2901 * even though we may have reused the ddi_dma_handle_t we do
2902 * wish it to be freed up at some later time. Note also that
2903 * if we fail, we may need to cleanup the ddi_dma_handle_t.
2904 */
2905 bind->bi_bypass = bind_type;
2906 status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1);
2907 if (status != DDI_SUCCESS) {
2908 if (reuse_dmahdl) {
2909 ddi_dma_free_handle(&dmahdl);
2910 }
2911
2912 /*
2913 * Deregister will be called upon returning failure
2914 * from this routine. This will ensure that all
2915 * current resources get properly freed up.
2916 * Unnecessary to attempt to regain software ownership
2917 * of the MPT entry as that has already been done
2918 * above (in hermon_mr_reregister()). Also unnecessary
2919 * to attempt to unbind the memory.
2920 */
2921 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2922
2923 status = IBT_INSUFF_RESOURCE;
2924 goto mrrereghelp_fail;
2925 }
2926 if (reuse_dmahdl) {
2927 bind->bi_free_dmahdl = 1;
2928 }
2929
2930 /*
2931 * Allocate the new MTT entries resource
2932 */
2933 status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt_needed,
2934 sleep, &mtt);
2935 if (status != DDI_SUCCESS) {
2936 /*
2937 * Deregister will be called upon returning failure
2938 * from this routine. This will ensure that all
2939 * current resources get properly freed up.
2940 * Unnecessary to attempt to regain software ownership
2941 * of the MPT entry as that has already been done
2942 * above (in hermon_mr_reregister()). Also unnecessary
2943 * to attempt to unbind the memory.
2944 *
2945 * But we do need to unbind the newly bound memory
2946 * before returning.
2947 */
2948 hermon_mr_mem_unbind(state, bind);
2949 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2950
2951 status = IBT_INSUFF_RESOURCE;
2952 goto mrrereghelp_fail;
2953 }
2954
2955 /*
2956 * Allocate MTT reference count (to track shared memory
2957 * regions). As mentioned elsewhere above, this reference
2958 * count resource may never be used on the given memory region,
2959 * but if it is ever later registered as a "shared" memory
2960 * region then this resource will be necessary. Note: This
2961 * is only necessary here if the existing memory region is
2962 * already being shared (because otherwise we already have
2963 * a useable reference count resource).
2964 */
2965 if (HERMON_MTT_IS_SHARED(swrc_old)) {
2966 status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1,
2967 sleep, &mtt_refcnt);
2968 if (status != DDI_SUCCESS) {
2969 /*
2970 * Deregister will be called upon returning
2971 * failure from this routine. This will ensure
2972 * that all current resources get properly
2973 * freed up. Unnecessary to attempt to regain
2974 * software ownership of the MPT entry as that
2975 * has already been done above (in
2976 * hermon_mr_reregister()). Also unnecessary
2977 * to attempt to unbind the memory.
2978 *
2979 * But we need to unbind the newly bound
2980 * memory and free up the newly allocated MTT
2981 * entries before returning.
2982 */
2983 hermon_mr_mem_unbind(state, bind);
2984 hermon_rsrc_free(state, &mtt);
2985 *dereg_level =
2986 HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2987
2988 status = IBT_INSUFF_RESOURCE;
2989 goto mrrereghelp_fail;
2990 }
2991 swrc_new = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
2992 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_new))
2993 HERMON_MTT_REFCNT_INIT(swrc_new);
2994 } else {
2995 mtt_refcnt = mr->mr_mttrefcntp;
2996 }
2997
2998 /*
2999 * Using the new mapping and the new MTT resources, write the
3000 * updated entries to MTT
3001 */
3002 status = hermon_mr_fast_mtt_write(state, mtt, bind,
3003 mtt_pgsize_bits);
3004 if (status != DDI_SUCCESS) {
3005 /*
3006 * Deregister will be called upon returning failure
3007 * from this routine. This will ensure that all
3008 * current resources get properly freed up.
3009 * Unnecessary to attempt to regain software ownership
3010 * of the MPT entry as that has already been done
3011 * above (in hermon_mr_reregister()). Also unnecessary
3012 * to attempt to unbind the memory.
3013 *
3014 * But we need to unbind the newly bound memory,
3015 * free up the newly allocated MTT entries, and
3016 * (possibly) free the new MTT reference count
3017 * resource before returning.
3018 */
3019 if (HERMON_MTT_IS_SHARED(swrc_old)) {
3020 hermon_rsrc_free(state, &mtt_refcnt);
3021 }
3022 hermon_mr_mem_unbind(state, bind);
3023 hermon_rsrc_free(state, &mtt);
3024 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
3025
3026 status = IBT_INSUFF_RESOURCE;
3027 goto mrrereghelp_fail;
3028 }
3029
3030 /*
3031 * Check if the memory region MTT is shared by any other MRs.
3032 * Since the resource may be shared between multiple memory
3033 * regions (as a result of a "RegisterSharedMR()" verb) it is
3034 * important that we not free up any resources prematurely.
3035 */
3036 if (HERMON_MTT_IS_SHARED(swrc_old)) {
3037 /* Decrement MTT reference count for "old" region */
3038 (void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp);
3039 } else {
3040 /* Free up the old MTT entries resource */
3041 hermon_rsrc_free(state, &mr->mr_mttrsrcp);
3042 }
3043
3044 /* Put the updated information into the mrhdl */
3045 mr->mr_bindinfo = *bind;
3046 mr->mr_logmttpgsz = mtt_pgsize_bits;
3047 mr->mr_mttrsrcp = mtt;
3048 mr->mr_mttrefcntp = mtt_refcnt;
3049 }
3050
3051 /*
3052 * Calculate and return the updated MTT address (in the DDR address
3053 * space). This will be used by the caller (hermon_mr_reregister) in
3054 * the updated MPT entry
3055 */
3056 *mtt_addr = mtt->hr_indx << HERMON_MTT_SIZE_SHIFT;
3057
3058 return (DDI_SUCCESS);
3059
3060 mrrereghelp_fail:
3061 return (status);
3062 }
3063
3064
3065 /*
3066 * hermon_mr_nummtt_needed()
3067 * Context: Can be called from interrupt or base context.
3068 */
3069 /* ARGSUSED */
3070 static uint64_t
hermon_mr_nummtt_needed(hermon_state_t * state,hermon_bind_info_t * bind,uint_t * mtt_pgsize_bits)3071 hermon_mr_nummtt_needed(hermon_state_t *state, hermon_bind_info_t *bind,
3072 uint_t *mtt_pgsize_bits)
3073 {
3074 uint64_t pg_offset_mask;
3075 uint64_t pg_offset, tmp_length;
3076
3077 /*
3078 * For now we specify the page size as 8Kb (the default page size for
3079 * the sun4u architecture), or 4Kb for x86. Figure out optimal page
3080 * size by examining the dmacookies
3081 */
3082 *mtt_pgsize_bits = PAGESHIFT;
3083
3084 pg_offset_mask = ((uint64_t)1 << *mtt_pgsize_bits) - 1;
3085 pg_offset = bind->bi_addr & pg_offset_mask;
3086 tmp_length = pg_offset + (bind->bi_len - 1);
3087 return ((tmp_length >> *mtt_pgsize_bits) + 1);
3088 }
3089
3090
3091 /*
3092 * hermon_mr_mem_bind()
3093 * Context: Can be called from interrupt or base context.
3094 */
3095 static int
hermon_mr_mem_bind(hermon_state_t * state,hermon_bind_info_t * bind,ddi_dma_handle_t dmahdl,uint_t sleep,uint_t is_buffer)3096 hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind,
3097 ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer)
3098 {
3099 ddi_dma_attr_t dma_attr;
3100 int (*callback)(caddr_t);
3101 int status;
3102
3103 /* bi_type must be set to a meaningful value to get a bind handle */
3104 ASSERT(bind->bi_type == HERMON_BINDHDL_VADDR ||
3105 bind->bi_type == HERMON_BINDHDL_BUF ||
3106 bind->bi_type == HERMON_BINDHDL_UBUF);
3107
3108 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
3109
3110 /* Set the callback flag appropriately */
3111 callback = (sleep == HERMON_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT;
3112
3113 /*
3114 * Initialize many of the default DMA attributes. Then, if we're
3115 * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag.
3116 */
3117 if (dmahdl == NULL) {
3118 hermon_dma_attr_init(state, &dma_attr);
3119 #ifdef __sparc
3120 if (bind->bi_bypass == HERMON_BINDMEM_BYPASS) {
3121 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
3122 }
3123 #endif
3124
3125 /* set RO if needed - tunable set and 'is_buffer' is non-0 */
3126 if (is_buffer) {
3127 if (! (bind->bi_flags & IBT_MR_DISABLE_RO)) {
3128 if ((bind->bi_type != HERMON_BINDHDL_UBUF) &&
3129 (hermon_kernel_data_ro ==
3130 HERMON_RO_ENABLED)) {
3131 dma_attr.dma_attr_flags |=
3132 DDI_DMA_RELAXED_ORDERING;
3133 }
3134 if (((bind->bi_type == HERMON_BINDHDL_UBUF) &&
3135 (hermon_user_data_ro ==
3136 HERMON_RO_ENABLED))) {
3137 dma_attr.dma_attr_flags |=
3138 DDI_DMA_RELAXED_ORDERING;
3139 }
3140 }
3141 }
3142
3143 /* Allocate a DMA handle for the binding */
3144 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
3145 callback, NULL, &bind->bi_dmahdl);
3146 if (status != DDI_SUCCESS) {
3147 return (status);
3148 }
3149 bind->bi_free_dmahdl = 1;
3150
3151 } else {
3152 bind->bi_dmahdl = dmahdl;
3153 bind->bi_free_dmahdl = 0;
3154 }
3155
3156
3157 /*
3158 * Bind the memory to get the PCI mapped addresses. The decision
3159 * to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle()
3160 * is determined by the "bi_type" flag. Note: if the bind operation
3161 * fails then we have to free up the DMA handle and return error.
3162 */
3163 if (bind->bi_type == HERMON_BINDHDL_VADDR) {
3164 status = ddi_dma_addr_bind_handle(bind->bi_dmahdl, NULL,
3165 (caddr_t)(uintptr_t)bind->bi_addr, bind->bi_len,
3166 (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback, NULL,
3167 &bind->bi_dmacookie, &bind->bi_cookiecnt);
3168
3169 } else { /* HERMON_BINDHDL_BUF or HERMON_BINDHDL_UBUF */
3170
3171 status = ddi_dma_buf_bind_handle(bind->bi_dmahdl,
3172 bind->bi_buf, (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback,
3173 NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt);
3174 }
3175 if (status != DDI_DMA_MAPPED) {
3176 if (bind->bi_free_dmahdl != 0) {
3177 ddi_dma_free_handle(&bind->bi_dmahdl);
3178 }
3179 return (status);
3180 }
3181
3182 return (DDI_SUCCESS);
3183 }
3184
3185
3186 /*
3187 * hermon_mr_mem_unbind()
3188 * Context: Can be called from interrupt or base context.
3189 */
3190 static void
hermon_mr_mem_unbind(hermon_state_t * state,hermon_bind_info_t * bind)3191 hermon_mr_mem_unbind(hermon_state_t *state, hermon_bind_info_t *bind)
3192 {
3193 int status;
3194
3195 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
3196 /* there is nothing to unbind for alloc_lkey */
3197 if (bind->bi_type == HERMON_BINDHDL_LKEY)
3198 return;
3199
3200 /*
3201 * In case of HERMON_BINDHDL_UBUF, the memory bi_buf points to
3202 * is actually allocated by ddi_umem_iosetup() internally, then
3203 * it's required to free it here. Reset bi_type to HERMON_BINDHDL_NONE
3204 * not to free it again later.
3205 */
3206 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
3207 if (bind->bi_type == HERMON_BINDHDL_UBUF) {
3208 freerbuf(bind->bi_buf);
3209 bind->bi_type = HERMON_BINDHDL_NONE;
3210 }
3211 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
3212
3213 /*
3214 * Unbind the DMA memory for the region
3215 *
3216 * Note: The only way ddi_dma_unbind_handle() currently
3217 * can return an error is if the handle passed in is invalid.
3218 * Since this should never happen, we choose to return void
3219 * from this function! If this does return an error, however,
3220 * then we print a warning message to the console.
3221 */
3222 status = ddi_dma_unbind_handle(bind->bi_dmahdl);
3223 if (status != DDI_SUCCESS) {
3224 HERMON_WARNING(state, "failed to unbind DMA mapping");
3225 return;
3226 }
3227
3228 /* Free up the DMA handle */
3229 if (bind->bi_free_dmahdl != 0) {
3230 ddi_dma_free_handle(&bind->bi_dmahdl);
3231 }
3232 }
3233
3234
3235 /*
3236 * hermon_mr_fast_mtt_write()
3237 * Context: Can be called from interrupt or base context.
3238 */
3239 static int
hermon_mr_fast_mtt_write(hermon_state_t * state,hermon_rsrc_t * mtt,hermon_bind_info_t * bind,uint32_t mtt_pgsize_bits)3240 hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt,
3241 hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits)
3242 {
3243 hermon_icm_table_t *icm_table;
3244 hermon_dma_info_t *dma_info;
3245 uint32_t index1, index2, rindx;
3246 ddi_dma_cookie_t dmacookie;
3247 uint_t cookie_cnt;
3248 uint64_t *mtt_table;
3249 uint64_t mtt_entry;
3250 uint64_t addr, endaddr;
3251 uint64_t pagesize;
3252 offset_t i, start;
3253 uint_t per_span;
3254 int sync_needed;
3255
3256 /*
3257 * XXX According to the PRM, we are to use the WRITE_MTT
3258 * command to write out MTTs. Tavor does not do this,
3259 * instead taking advantage of direct access to the MTTs,
3260 * and knowledge that Mellanox FMR relies on our ability
3261 * to write directly to the MTTs without any further
3262 * notification to the firmware. Likewise, we will choose
3263 * to not use the WRITE_MTT command, but to simply write
3264 * out the MTTs.
3265 */
3266
3267 /* Calculate page size from the suggested value passed in */
3268 pagesize = ((uint64_t)1 << mtt_pgsize_bits);
3269
3270 /* Walk the "cookie list" and fill in the MTT table entries */
3271 dmacookie = bind->bi_dmacookie;
3272 cookie_cnt = bind->bi_cookiecnt;
3273
3274 icm_table = &state->hs_icm[HERMON_MTT];
3275 rindx = mtt->hr_indx;
3276 hermon_index(index1, index2, rindx, icm_table, i);
3277 start = i;
3278
3279 per_span = icm_table->span;
3280 dma_info = icm_table->icm_dma[index1] + index2;
3281 mtt_table = (uint64_t *)(uintptr_t)dma_info->vaddr;
3282
3283 sync_needed = 0;
3284 while (cookie_cnt-- > 0) {
3285 addr = dmacookie.dmac_laddress;
3286 endaddr = addr + (dmacookie.dmac_size - 1);
3287 addr = addr & ~((uint64_t)pagesize - 1);
3288
3289 while (addr <= endaddr) {
3290
3291 /*
3292 * Fill in the mapped addresses (calculated above) and
3293 * set HERMON_MTT_ENTRY_PRESENT flag for each MTT entry.
3294 */
3295 mtt_entry = addr | HERMON_MTT_ENTRY_PRESENT;
3296 mtt_table[i] = htonll(mtt_entry);
3297 i++;
3298 rindx++;
3299
3300 if (i == per_span) {
3301
3302 (void) ddi_dma_sync(dma_info->dma_hdl,
3303 start * sizeof (hermon_hw_mtt_t),
3304 (i - start) * sizeof (hermon_hw_mtt_t),
3305 DDI_DMA_SYNC_FORDEV);
3306
3307 if ((addr + pagesize > endaddr) &&
3308 (cookie_cnt == 0))
3309 return (DDI_SUCCESS);
3310
3311 hermon_index(index1, index2, rindx, icm_table,
3312 i);
3313 start = i * sizeof (hermon_hw_mtt_t);
3314 dma_info = icm_table->icm_dma[index1] + index2;
3315 mtt_table =
3316 (uint64_t *)(uintptr_t)dma_info->vaddr;
3317
3318 sync_needed = 0;
3319 } else {
3320 sync_needed = 1;
3321 }
3322
3323 addr += pagesize;
3324 if (addr == 0) {
3325 static int do_once = 1;
3326 _NOTE(SCHEME_PROTECTS_DATA("safe sharing",
3327 do_once))
3328 if (do_once) {
3329 do_once = 0;
3330 cmn_err(CE_NOTE, "probable error in "
3331 "dma_cookie address from caller\n");
3332 }
3333 break;
3334 }
3335 }
3336
3337 /*
3338 * When we've reached the end of the current DMA cookie,
3339 * jump to the next cookie (if there are more)
3340 */
3341 if (cookie_cnt != 0) {
3342 ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie);
3343 }
3344 }
3345
3346 /* done all the cookies, now sync the memory for the device */
3347 if (sync_needed)
3348 (void) ddi_dma_sync(dma_info->dma_hdl,
3349 start * sizeof (hermon_hw_mtt_t),
3350 (i - start) * sizeof (hermon_hw_mtt_t),
3351 DDI_DMA_SYNC_FORDEV);
3352
3353 return (DDI_SUCCESS);
3354 }
3355
3356 /*
3357 * hermon_mr_fast_mtt_write_fmr()
3358 * Context: Can be called from interrupt or base context.
3359 */
3360 /* ARGSUSED */
3361 static int
hermon_mr_fast_mtt_write_fmr(hermon_state_t * state,hermon_rsrc_t * mtt,ibt_pmr_attr_t * mem_pattr,uint32_t mtt_pgsize_bits)3362 hermon_mr_fast_mtt_write_fmr(hermon_state_t *state, hermon_rsrc_t *mtt,
3363 ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits)
3364 {
3365 hermon_icm_table_t *icm_table;
3366 hermon_dma_info_t *dma_info;
3367 uint32_t index1, index2, rindx;
3368 uint64_t *mtt_table;
3369 offset_t i, j;
3370 uint_t per_span;
3371
3372 icm_table = &state->hs_icm[HERMON_MTT];
3373 rindx = mtt->hr_indx;
3374 hermon_index(index1, index2, rindx, icm_table, i);
3375 per_span = icm_table->span;
3376 dma_info = icm_table->icm_dma[index1] + index2;
3377 mtt_table = (uint64_t *)(uintptr_t)dma_info->vaddr;
3378
3379 /*
3380 * Fill in the MTT table entries
3381 */
3382 for (j = 0; j < mem_pattr->pmr_num_buf; j++) {
3383 mtt_table[i] = mem_pattr->pmr_addr_list[j].p_laddr;
3384 i++;
3385 rindx++;
3386 if (i == per_span) {
3387 hermon_index(index1, index2, rindx, icm_table, i);
3388 dma_info = icm_table->icm_dma[index1] + index2;
3389 mtt_table = (uint64_t *)(uintptr_t)dma_info->vaddr;
3390 }
3391 }
3392
3393 return (DDI_SUCCESS);
3394 }
3395
3396
3397 /*
3398 * hermon_mtt_refcnt_inc()
3399 * Context: Can be called from interrupt or base context.
3400 */
3401 static uint_t
hermon_mtt_refcnt_inc(hermon_rsrc_t * rsrc)3402 hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc)
3403 {
3404 hermon_sw_refcnt_t *rc;
3405
3406 rc = (hermon_sw_refcnt_t *)rsrc->hr_addr;
3407 return (atomic_inc_uint_nv(&rc->swrc_refcnt));
3408 }
3409
3410
3411 /*
3412 * hermon_mtt_refcnt_dec()
3413 * Context: Can be called from interrupt or base context.
3414 */
3415 static uint_t
hermon_mtt_refcnt_dec(hermon_rsrc_t * rsrc)3416 hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc)
3417 {
3418 hermon_sw_refcnt_t *rc;
3419
3420 rc = (hermon_sw_refcnt_t *)rsrc->hr_addr;
3421 return (atomic_dec_uint_nv(&rc->swrc_refcnt));
3422 }
3423