xref: /titanic_51/usr/src/uts/common/io/ib/adapters/hermon/hermon_mr.c (revision 17a2b317610f531d565bf4e940433aab2d9e6985)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * hermon_mr.c
28  *    Hermon Memory Region/Window Routines
29  *
30  *    Implements all the routines necessary to provide the requisite memory
31  *    registration verbs.  These include operations like RegisterMemRegion(),
32  *    DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion,
33  *    etc., that affect Memory Regions.  It also includes the verbs that
34  *    affect Memory Windows, including AllocMemWindow(), FreeMemWindow(),
35  *    and QueryMemWindow().
36  */
37 
38 #include <sys/types.h>
39 #include <sys/conf.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/modctl.h>
43 #include <sys/esunddi.h>
44 
45 #include <sys/ib/adapters/hermon/hermon.h>
46 
47 extern uint32_t hermon_kernel_data_ro;
48 extern uint32_t hermon_user_data_ro;
49 extern int hermon_rdma_debug;
50 
51 /*
52  * Used by hermon_mr_keycalc() below to fill in the "unconstrained" portion
53  * of Hermon memory keys (LKeys and RKeys)
54  */
55 static	uint_t hermon_memkey_cnt = 0x00;
56 #define	HERMON_MEMKEY_SHIFT	24
57 
58 /* initial state of an MPT */
59 #define	HERMON_MPT_SW_OWNERSHIP	0xF	/* memory regions */
60 #define	HERMON_MPT_FREE		0x3	/* allocate lkey */
61 
62 static int hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd,
63     hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
64     hermon_mpt_rsrc_type_t mpt_type);
65 static int hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr,
66     hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new,
67     hermon_mr_options_t *op);
68 static int hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr,
69     hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr,
70     uint_t sleep, uint_t *dereg_level);
71 static uint64_t hermon_mr_nummtt_needed(hermon_state_t *state,
72     hermon_bind_info_t *bind, uint_t *mtt_pgsize);
73 static int hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind,
74     ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer);
75 static void hermon_mr_mem_unbind(hermon_state_t *state,
76     hermon_bind_info_t *bind);
77 static int hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt,
78     hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits);
79 static int hermon_mr_fast_mtt_write_fmr(hermon_state_t *state,
80     hermon_rsrc_t *mtt, ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits);
81 static uint_t hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc);
82 static uint_t hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc);
83 
84 
85 /*
86  * The Hermon umem_lockmemory() callback ops.  When userland memory is
87  * registered, these callback ops are specified.  The hermon_umap_umemlock_cb()
88  * callback will be called whenever the memory for the corresponding
89  * ddi_umem_cookie_t is being freed.
90  */
91 static struct umem_callback_ops hermon_umem_cbops = {
92 	UMEM_CALLBACK_VERSION,
93 	hermon_umap_umemlock_cb,
94 };
95 
96 
97 
98 /*
99  * hermon_mr_register()
100  *    Context: Can be called from interrupt or base context.
101  */
102 int
103 hermon_mr_register(hermon_state_t *state, hermon_pdhdl_t pd,
104     ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
105     hermon_mpt_rsrc_type_t mpt_type)
106 {
107 	hermon_bind_info_t	bind;
108 	int			status;
109 
110 	/*
111 	 * Fill in the "bind" struct.  This struct provides the majority
112 	 * of the information that will be used to distinguish between an
113 	 * "addr" binding (as is the case here) and a "buf" binding (see
114 	 * below).  The "bind" struct is later passed to hermon_mr_mem_bind()
115 	 * which does most of the "heavy lifting" for the Hermon memory
116 	 * registration routines.
117 	 */
118 	bind.bi_type  = HERMON_BINDHDL_VADDR;
119 	bind.bi_addr  = mr_attr->mr_vaddr;
120 	bind.bi_len   = mr_attr->mr_len;
121 	bind.bi_as    = mr_attr->mr_as;
122 	bind.bi_flags = mr_attr->mr_flags;
123 	status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op,
124 	    mpt_type);
125 	return (status);
126 }
127 
128 
129 /*
130  * hermon_mr_register_buf()
131  *    Context: Can be called from interrupt or base context.
132  */
133 int
134 hermon_mr_register_buf(hermon_state_t *state, hermon_pdhdl_t pd,
135     ibt_smr_attr_t *mr_attr, struct buf *buf, hermon_mrhdl_t *mrhdl,
136     hermon_mr_options_t *op, hermon_mpt_rsrc_type_t mpt_type)
137 {
138 	hermon_bind_info_t	bind;
139 	int			status;
140 
141 	/*
142 	 * Fill in the "bind" struct.  This struct provides the majority
143 	 * of the information that will be used to distinguish between an
144 	 * "addr" binding (see above) and a "buf" binding (as is the case
145 	 * here).  The "bind" struct is later passed to hermon_mr_mem_bind()
146 	 * which does most of the "heavy lifting" for the Hermon memory
147 	 * registration routines.  Note: We have chosen to provide
148 	 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
149 	 * not set).  It is not critical what value we choose here as it need
150 	 * only be unique for the given RKey (which will happen by default),
151 	 * so the choice here is somewhat arbitrary.
152 	 */
153 	bind.bi_type  = HERMON_BINDHDL_BUF;
154 	bind.bi_buf   = buf;
155 	if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
156 		bind.bi_addr  = mr_attr->mr_vaddr;
157 	} else {
158 		bind.bi_addr  = (uint64_t)(uintptr_t)buf->b_un.b_addr;
159 	}
160 	bind.bi_as    = NULL;
161 	bind.bi_len   = (uint64_t)buf->b_bcount;
162 	bind.bi_flags = mr_attr->mr_flags;
163 	status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op, mpt_type);
164 	return (status);
165 }
166 
167 
168 /*
169  * hermon_mr_register_shared()
170  *    Context: Can be called from interrupt or base context.
171  */
172 int
173 hermon_mr_register_shared(hermon_state_t *state, hermon_mrhdl_t mrhdl,
174     hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new)
175 {
176 	hermon_rsrc_t		*mpt, *mtt, *rsrc;
177 	hermon_umap_db_entry_t	*umapdb;
178 	hermon_hw_dmpt_t	mpt_entry;
179 	hermon_mrhdl_t		mr;
180 	hermon_bind_info_t	*bind;
181 	ddi_umem_cookie_t	umem_cookie;
182 	size_t			umem_len;
183 	caddr_t			umem_addr;
184 	uint64_t		mtt_addr, pgsize_msk;
185 	uint_t			sleep, mr_is_umem;
186 	int			status, umem_flags;
187 
188 	/*
189 	 * Check the sleep flag.  Ensure that it is consistent with the
190 	 * current thread context (i.e. if we are currently in the interrupt
191 	 * context, then we shouldn't be attempting to sleep).
192 	 */
193 	sleep = (mr_attr->mr_flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP :
194 	    HERMON_SLEEP;
195 	if ((sleep == HERMON_SLEEP) &&
196 	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
197 		status = IBT_INVALID_PARAM;
198 		goto mrshared_fail;
199 	}
200 
201 	/* Increment the reference count on the protection domain (PD) */
202 	hermon_pd_refcnt_inc(pd);
203 
204 	/*
205 	 * Allocate an MPT entry.  This will be filled in with all the
206 	 * necessary parameters to define the shared memory region.
207 	 * Specifically, it will be made to reference the currently existing
208 	 * MTT entries and ownership of the MPT will be passed to the hardware
209 	 * in the last step below.  If we fail here, we must undo the
210 	 * protection domain reference count.
211 	 */
212 	status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
213 	if (status != DDI_SUCCESS) {
214 		status = IBT_INSUFF_RESOURCE;
215 		goto mrshared_fail1;
216 	}
217 
218 	/*
219 	 * Allocate the software structure for tracking the shared memory
220 	 * region (i.e. the Hermon Memory Region handle).  If we fail here, we
221 	 * must undo the protection domain reference count and the previous
222 	 * resource allocation.
223 	 */
224 	status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
225 	if (status != DDI_SUCCESS) {
226 		status = IBT_INSUFF_RESOURCE;
227 		goto mrshared_fail2;
228 	}
229 	mr = (hermon_mrhdl_t)rsrc->hr_addr;
230 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
231 
232 	/*
233 	 * Setup and validate the memory region access flags.  This means
234 	 * translating the IBTF's enable flags into the access flags that
235 	 * will be used in later operations.
236 	 */
237 	mr->mr_accflag = 0;
238 	if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND)
239 		mr->mr_accflag |= IBT_MR_WINDOW_BIND;
240 	if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
241 		mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
242 	if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ)
243 		mr->mr_accflag |= IBT_MR_REMOTE_READ;
244 	if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
245 		mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
246 	if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
247 		mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
248 
249 	/*
250 	 * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
251 	 * from a certain number of "constrained" bits (the least significant
252 	 * bits) and some number of "unconstrained" bits.  The constrained
253 	 * bits must be set to the index of the entry in the MPT table, but
254 	 * the unconstrained bits can be set to any value we wish.  Note:
255 	 * if no remote access is required, then the RKey value is not filled
256 	 * in.  Otherwise both Rkey and LKey are given the same value.
257 	 */
258 	mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
259 
260 	/* Grab the MR lock for the current memory region */
261 	mutex_enter(&mrhdl->mr_lock);
262 
263 	/*
264 	 * Check here to see if the memory region has already been partially
265 	 * deregistered as a result of a hermon_umap_umemlock_cb() callback.
266 	 * If so, this is an error, return failure.
267 	 */
268 	if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
269 		mutex_exit(&mrhdl->mr_lock);
270 		status = IBT_MR_HDL_INVALID;
271 		goto mrshared_fail3;
272 	}
273 
274 	/*
275 	 * Determine if the original memory was from userland and, if so, pin
276 	 * the pages (again) with umem_lockmemory().  This will guarantee a
277 	 * separate callback for each of this shared region's MR handles.
278 	 * If this is userland memory, then allocate an entry in the
279 	 * "userland resources database".  This will later be added to
280 	 * the database (after all further memory registration operations are
281 	 * successful).  If we fail here, we must undo all the above setup.
282 	 */
283 	mr_is_umem = mrhdl->mr_is_umem;
284 	if (mr_is_umem) {
285 		umem_len   = ptob(btopr(mrhdl->mr_bindinfo.bi_len));
286 		umem_addr  = (caddr_t)((uintptr_t)mrhdl->mr_bindinfo.bi_addr &
287 		    ~PAGEOFFSET);
288 		umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
289 		    DDI_UMEMLOCK_LONGTERM);
290 		status = umem_lockmemory(umem_addr, umem_len, umem_flags,
291 		    &umem_cookie, &hermon_umem_cbops, NULL);
292 		if (status != 0) {
293 			mutex_exit(&mrhdl->mr_lock);
294 			status = IBT_INSUFF_RESOURCE;
295 			goto mrshared_fail3;
296 		}
297 
298 		umapdb = hermon_umap_db_alloc(state->hs_instance,
299 		    (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
300 		    (uint64_t)(uintptr_t)rsrc);
301 		if (umapdb == NULL) {
302 			mutex_exit(&mrhdl->mr_lock);
303 			status = IBT_INSUFF_RESOURCE;
304 			goto mrshared_fail4;
305 		}
306 	}
307 
308 	/*
309 	 * Copy the MTT resource pointer (and additional parameters) from
310 	 * the original Hermon Memory Region handle.  Note: this is normally
311 	 * where the hermon_mr_mem_bind() routine would be called, but because
312 	 * we already have bound and filled-in MTT entries it is simply a
313 	 * matter here of managing the MTT reference count and grabbing the
314 	 * address of the MTT table entries (for filling in the shared region's
315 	 * MPT entry).
316 	 */
317 	mr->mr_mttrsrcp	  = mrhdl->mr_mttrsrcp;
318 	mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz;
319 	mr->mr_bindinfo	  = mrhdl->mr_bindinfo;
320 	mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp;
321 	mutex_exit(&mrhdl->mr_lock);
322 	bind = &mr->mr_bindinfo;
323 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
324 	mtt = mr->mr_mttrsrcp;
325 
326 	/*
327 	 * Increment the MTT reference count (to reflect the fact that
328 	 * the MTT is now shared)
329 	 */
330 	(void) hermon_mtt_refcnt_inc(mr->mr_mttrefcntp);
331 
332 	/*
333 	 * Update the new "bind" virtual address.  Do some extra work here
334 	 * to ensure proper alignment.  That is, make sure that the page
335 	 * offset for the beginning of the old range is the same as the
336 	 * offset for this new mapping
337 	 */
338 	pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1);
339 	bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) |
340 	    (mr->mr_bindinfo.bi_addr & pgsize_msk));
341 
342 	/*
343 	 * Fill in the MPT entry.  This is the final step before passing
344 	 * ownership of the MPT entry to the Hermon hardware.  We use all of
345 	 * the information collected/calculated above to fill in the
346 	 * requisite portions of the MPT.
347 	 */
348 	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
349 	mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
350 	mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
351 	mpt_entry.rw	  = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
352 	mpt_entry.rr	  = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
353 	mpt_entry.lw	  = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
354 	mpt_entry.lr	  = 1;
355 	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
356 	mpt_entry.entity_sz	= mr->mr_logmttpgsz;
357 	mpt_entry.mem_key	= mr->mr_lkey;
358 	mpt_entry.pd		= pd->pd_pdnum;
359 	mpt_entry.start_addr	= bind->bi_addr;
360 	mpt_entry.reg_win_len	= bind->bi_len;
361 	mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
362 	mpt_entry.mtt_addr_h = mtt_addr >> 32;
363 	mpt_entry.mtt_addr_l = mtt_addr >> 3;
364 
365 	/*
366 	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
367 	 * the entry to the hardware.  Note: in general, this operation
368 	 * shouldn't fail.  But if it does, we have to undo everything we've
369 	 * done above before returning error.
370 	 */
371 	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
372 	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
373 	if (status != HERMON_CMD_SUCCESS) {
374 		cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
375 		    status);
376 		if (status == HERMON_CMD_INVALID_STATUS) {
377 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
378 		}
379 		status = ibc_get_ci_failure(0);
380 		goto mrshared_fail5;
381 	}
382 
383 	/*
384 	 * Fill in the rest of the Hermon Memory Region handle.  Having
385 	 * successfully transferred ownership of the MPT, we can update the
386 	 * following fields for use in further operations on the MR.
387 	 */
388 	mr->mr_mptrsrcp	  = mpt;
389 	mr->mr_mttrsrcp	  = mtt;
390 	mr->mr_mpt_type	  = HERMON_MPT_DMPT;
391 	mr->mr_pdhdl	  = pd;
392 	mr->mr_rsrcp	  = rsrc;
393 	mr->mr_is_umem	  = mr_is_umem;
394 	mr->mr_is_fmr	  = 0;
395 	mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL;
396 	mr->mr_umem_cbfunc = NULL;
397 	mr->mr_umem_cbarg1 = NULL;
398 	mr->mr_umem_cbarg2 = NULL;
399 	mr->mr_lkey	   = hermon_mr_key_swap(mr->mr_lkey);
400 	mr->mr_rkey	   = hermon_mr_key_swap(mr->mr_rkey);
401 
402 	/*
403 	 * If this is userland memory, then we need to insert the previously
404 	 * allocated entry into the "userland resources database".  This will
405 	 * allow for later coordination between the hermon_umap_umemlock_cb()
406 	 * callback and hermon_mr_deregister().
407 	 */
408 	if (mr_is_umem) {
409 		hermon_umap_db_add(umapdb);
410 	}
411 
412 	*mrhdl_new = mr;
413 
414 	return (DDI_SUCCESS);
415 
416 /*
417  * The following is cleanup for all possible failure cases in this routine
418  */
419 mrshared_fail5:
420 	(void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp);
421 	if (mr_is_umem) {
422 		hermon_umap_db_free(umapdb);
423 	}
424 mrshared_fail4:
425 	if (mr_is_umem) {
426 		ddi_umem_unlock(umem_cookie);
427 	}
428 mrshared_fail3:
429 	hermon_rsrc_free(state, &rsrc);
430 mrshared_fail2:
431 	hermon_rsrc_free(state, &mpt);
432 mrshared_fail1:
433 	hermon_pd_refcnt_dec(pd);
434 mrshared_fail:
435 	return (status);
436 }
437 
438 /*
439  * hermon_mr_alloc_fmr()
440  *    Context: Can be called from interrupt or base context.
441  */
442 int
443 hermon_mr_alloc_fmr(hermon_state_t *state, hermon_pdhdl_t pd,
444     hermon_fmrhdl_t fmr_pool, hermon_mrhdl_t *mrhdl)
445 {
446 	hermon_rsrc_t		*mpt, *mtt, *rsrc;
447 	hermon_hw_dmpt_t	mpt_entry;
448 	hermon_mrhdl_t		mr;
449 	hermon_bind_info_t	bind;
450 	uint64_t		mtt_addr;
451 	uint64_t		nummtt;
452 	uint_t			sleep, mtt_pgsize_bits;
453 	int			status;
454 	offset_t		i;
455 	hermon_icm_table_t	*icm_table;
456 	hermon_dma_info_t	*dma_info;
457 	uint32_t		index1, index2, rindx;
458 
459 	/*
460 	 * Check the sleep flag.  Ensure that it is consistent with the
461 	 * current thread context (i.e. if we are currently in the interrupt
462 	 * context, then we shouldn't be attempting to sleep).
463 	 */
464 	sleep = (fmr_pool->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP :
465 	    HERMON_NOSLEEP;
466 	if ((sleep == HERMON_SLEEP) &&
467 	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
468 		return (IBT_INVALID_PARAM);
469 	}
470 
471 	/* Increment the reference count on the protection domain (PD) */
472 	hermon_pd_refcnt_inc(pd);
473 
474 	/*
475 	 * Allocate an MPT entry.  This will be filled in with all the
476 	 * necessary parameters to define the FMR.  Specifically, it will be
477 	 * made to reference the currently existing MTT entries and ownership
478 	 * of the MPT will be passed to the hardware in the last step below.
479 	 * If we fail here, we must undo the protection domain reference count.
480 	 */
481 
482 	status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
483 	if (status != DDI_SUCCESS) {
484 		status = IBT_INSUFF_RESOURCE;
485 		goto fmralloc_fail1;
486 	}
487 
488 	/*
489 	 * Allocate the software structure for tracking the fmr memory
490 	 * region (i.e. the Hermon Memory Region handle).  If we fail here, we
491 	 * must undo the protection domain reference count and the previous
492 	 * resource allocation.
493 	 */
494 	status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
495 	if (status != DDI_SUCCESS) {
496 		status = IBT_INSUFF_RESOURCE;
497 		goto fmralloc_fail2;
498 	}
499 	mr = (hermon_mrhdl_t)rsrc->hr_addr;
500 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
501 
502 	/*
503 	 * Setup and validate the memory region access flags.  This means
504 	 * translating the IBTF's enable flags into the access flags that
505 	 * will be used in later operations.
506 	 */
507 	mr->mr_accflag = 0;
508 	if (fmr_pool->fmr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
509 		mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
510 	if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_READ)
511 		mr->mr_accflag |= IBT_MR_REMOTE_READ;
512 	if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
513 		mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
514 	if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
515 		mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
516 
517 	/*
518 	 * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
519 	 * from a certain number of "constrained" bits (the least significant
520 	 * bits) and some number of "unconstrained" bits.  The constrained
521 	 * bits must be set to the index of the entry in the MPT table, but
522 	 * the unconstrained bits can be set to any value we wish.  Note:
523 	 * if no remote access is required, then the RKey value is not filled
524 	 * in.  Otherwise both Rkey and LKey are given the same value.
525 	 */
526 	mr->mr_fmr_key = 1;	/* ready for the next reload */
527 	mr->mr_rkey = mr->mr_lkey = mpt->hr_indx;
528 
529 	/*
530 	 * Determine number of pages spanned.  This routine uses the
531 	 * information in the "bind" struct to determine the required
532 	 * number of MTT entries needed (and returns the suggested page size -
533 	 * as a "power-of-2" - for each MTT entry).
534 	 */
535 	/* Assume address will be page aligned later */
536 	bind.bi_addr = 0;
537 	/* Calculate size based on given max pages */
538 	bind.bi_len = fmr_pool->fmr_max_pages << PAGESHIFT;
539 	nummtt = hermon_mr_nummtt_needed(state, &bind, &mtt_pgsize_bits);
540 
541 	/*
542 	 * Allocate the MTT entries.  Use the calculations performed above to
543 	 * allocate the required number of MTT entries.  If we fail here, we
544 	 * must not only undo all the previous resource allocation (and PD
545 	 * reference count), but we must also unbind the memory.
546 	 */
547 	status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt);
548 	if (status != DDI_SUCCESS) {
549 		IBTF_DPRINTF_L2("FMR", "FATAL: too few MTTs");
550 		status = IBT_INSUFF_RESOURCE;
551 		goto fmralloc_fail3;
552 	}
553 	mr->mr_logmttpgsz = mtt_pgsize_bits;
554 
555 	/*
556 	 * Fill in the MPT entry.  This is the final step before passing
557 	 * ownership of the MPT entry to the Hermon hardware.  We use all of
558 	 * the information collected/calculated above to fill in the
559 	 * requisite portions of the MPT.
560 	 */
561 	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
562 	mpt_entry.en_bind = 0;
563 	mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
564 	mpt_entry.rw	  = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
565 	mpt_entry.rr	  = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
566 	mpt_entry.lw	  = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
567 	mpt_entry.lr	  = 1;
568 	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
569 	mpt_entry.pd		= pd->pd_pdnum;
570 
571 	mpt_entry.entity_sz	= mr->mr_logmttpgsz;
572 	mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
573 	mpt_entry.fast_reg_en = 1;
574 	mpt_entry.mtt_size = (uint_t)nummtt;
575 	mpt_entry.mtt_addr_h = mtt_addr >> 32;
576 	mpt_entry.mtt_addr_l = mtt_addr >> 3;
577 	mpt_entry.mem_key = mr->mr_lkey;
578 
579 	/*
580 	 * FMR sets these to 0 for now.  Later during actual fmr registration
581 	 * these values are filled in.
582 	 */
583 	mpt_entry.start_addr	= 0;
584 	mpt_entry.reg_win_len	= 0;
585 
586 	/*
587 	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
588 	 * the entry to the hardware.  Note: in general, this operation
589 	 * shouldn't fail.  But if it does, we have to undo everything we've
590 	 * done above before returning error.
591 	 */
592 	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
593 	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
594 	if (status != HERMON_CMD_SUCCESS) {
595 		cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
596 		    status);
597 		if (status == HERMON_CMD_INVALID_STATUS) {
598 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
599 		}
600 		status = ibc_get_ci_failure(0);
601 		goto fmralloc_fail4;
602 	}
603 
604 	/*
605 	 * Fill in the rest of the Hermon Memory Region handle.  Having
606 	 * successfully transferred ownership of the MPT, we can update the
607 	 * following fields for use in further operations on the MR.  Also, set
608 	 * that this is an FMR region.
609 	 */
610 	mr->mr_mptrsrcp	  = mpt;
611 	mr->mr_mttrsrcp	  = mtt;
612 
613 	mr->mr_mpt_type   = HERMON_MPT_DMPT;
614 	mr->mr_pdhdl	  = pd;
615 	mr->mr_rsrcp	  = rsrc;
616 	mr->mr_is_fmr	  = 1;
617 	mr->mr_lkey	   = hermon_mr_key_swap(mr->mr_lkey);
618 	mr->mr_rkey	   = hermon_mr_key_swap(mr->mr_rkey);
619 	mr->mr_mttaddr	   = mtt_addr;
620 	(void) memcpy(&mr->mr_bindinfo, &bind, sizeof (hermon_bind_info_t));
621 
622 	/* initialize hr_addr for use during register/deregister/invalidate */
623 	icm_table = &state->hs_icm[HERMON_DMPT];
624 	rindx = mpt->hr_indx;
625 	hermon_index(index1, index2, rindx, icm_table, i);
626 	dma_info = icm_table->icm_dma[index1] + index2;
627 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mpt))
628 	mpt->hr_addr = (void *)((uintptr_t)(dma_info->vaddr + i * mpt->hr_len));
629 
630 	*mrhdl = mr;
631 
632 	return (DDI_SUCCESS);
633 
634 /*
635  * The following is cleanup for all possible failure cases in this routine
636  */
637 fmralloc_fail4:
638 	kmem_free(mtt, sizeof (hermon_rsrc_t) * nummtt);
639 fmralloc_fail3:
640 	hermon_rsrc_free(state, &rsrc);
641 fmralloc_fail2:
642 	hermon_rsrc_free(state, &mpt);
643 fmralloc_fail1:
644 	hermon_pd_refcnt_dec(pd);
645 fmralloc_fail:
646 	return (status);
647 }
648 
649 
650 /*
651  * hermon_mr_register_physical_fmr()
652  *    Context: Can be called from interrupt or base context.
653  */
654 /*ARGSUSED*/
655 int
656 hermon_mr_register_physical_fmr(hermon_state_t *state,
657     ibt_pmr_attr_t *mem_pattr_p, hermon_mrhdl_t mr, ibt_pmr_desc_t *mem_desc_p)
658 {
659 	hermon_rsrc_t		*mpt;
660 	uint64_t		*mpt_table;
661 	int			status;
662 	uint32_t		key;
663 
664 	mutex_enter(&mr->mr_lock);
665 	mpt = mr->mr_mptrsrcp;
666 	mpt_table = (uint64_t *)mpt->hr_addr;
667 
668 	/* Write MPT status to SW bit */
669 	*(uint8_t *)mpt_table = 0xF0;
670 
671 	membar_producer();
672 
673 	/*
674 	 * Write the mapped addresses into the MTT entries.  FMR needs to do
675 	 * this a little differently, so we call the fmr specific fast mtt
676 	 * write here.
677 	 */
678 	status = hermon_mr_fast_mtt_write_fmr(state, mr->mr_mttrsrcp,
679 	    mem_pattr_p, mr->mr_logmttpgsz);
680 	if (status != DDI_SUCCESS) {
681 		mutex_exit(&mr->mr_lock);
682 		status = ibc_get_ci_failure(0);
683 		goto fmr_reg_fail1;
684 	}
685 
686 	/*
687 	 * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
688 	 * from a certain number of "constrained" bits (the least significant
689 	 * bits) and some number of "unconstrained" bits.  The constrained
690 	 * bits must be set to the index of the entry in the MPT table, but
691 	 * the unconstrained bits can be set to any value we wish.  Note:
692 	 * if no remote access is required, then the RKey value is not filled
693 	 * in.  Otherwise both Rkey and LKey are given the same value.
694 	 */
695 	key = mpt->hr_indx | (mr->mr_fmr_key++ << HERMON_MEMKEY_SHIFT);
696 	mr->mr_lkey = mr->mr_rkey = hermon_mr_key_swap(key);
697 
698 	/* write mem key value */
699 	*(uint32_t *)&mpt_table[1] = htonl(key);
700 
701 	/* write length value */
702 	mpt_table[3] = htonll(mem_pattr_p->pmr_len);
703 
704 	/* write start addr value */
705 	mpt_table[2] = htonll(mem_pattr_p->pmr_iova);
706 
707 	/* write lkey value */
708 	*(uint32_t *)&mpt_table[4] = htonl(key);
709 
710 	membar_producer();
711 
712 	/* Write MPT status to HW bit */
713 	*(uint8_t *)mpt_table = 0x00;
714 
715 	/* Fill in return parameters */
716 	mem_desc_p->pmd_lkey = mr->mr_lkey;
717 	mem_desc_p->pmd_rkey = mr->mr_rkey;
718 	mem_desc_p->pmd_iova = mem_pattr_p->pmr_iova;
719 	mem_desc_p->pmd_phys_buf_list_sz = mem_pattr_p->pmr_len;
720 
721 	/* Fill in MR bindinfo struct for later sync or query operations */
722 	mr->mr_bindinfo.bi_addr = mem_pattr_p->pmr_iova;
723 	mr->mr_bindinfo.bi_flags = mem_pattr_p->pmr_flags & IBT_MR_NONCOHERENT;
724 
725 	mutex_exit(&mr->mr_lock);
726 
727 	return (DDI_SUCCESS);
728 
729 fmr_reg_fail1:
730 	/*
731 	 * Note, we fail here, and purposely leave the memory ownership in
732 	 * software.  The memory tables may be corrupt, so we leave the region
733 	 * unregistered.
734 	 */
735 	return (status);
736 }
737 
738 
739 /*
740  * hermon_mr_deregister()
741  *    Context: Can be called from interrupt or base context.
742  */
743 /* ARGSUSED */
744 int
745 hermon_mr_deregister(hermon_state_t *state, hermon_mrhdl_t *mrhdl, uint_t level,
746     uint_t sleep)
747 {
748 	hermon_rsrc_t		*mpt, *mtt, *rsrc, *mtt_refcnt;
749 	hermon_umap_db_entry_t	*umapdb;
750 	hermon_pdhdl_t		pd;
751 	hermon_mrhdl_t		mr;
752 	hermon_bind_info_t	*bind;
753 	uint64_t		value;
754 	int			status;
755 	uint_t			shared_mtt;
756 
757 	/*
758 	 * Check the sleep flag.  Ensure that it is consistent with the
759 	 * current thread context (i.e. if we are currently in the interrupt
760 	 * context, then we shouldn't be attempting to sleep).
761 	 */
762 	if ((sleep == HERMON_SLEEP) &&
763 	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
764 		status = IBT_INVALID_PARAM;
765 		return (status);
766 	}
767 
768 	/*
769 	 * Pull all the necessary information from the Hermon Memory Region
770 	 * handle.  This is necessary here because the resource for the
771 	 * MR handle is going to be freed up as part of the this
772 	 * deregistration
773 	 */
774 	mr	= *mrhdl;
775 	mutex_enter(&mr->mr_lock);
776 	mpt	= mr->mr_mptrsrcp;
777 	mtt	= mr->mr_mttrsrcp;
778 	mtt_refcnt = mr->mr_mttrefcntp;
779 	rsrc	= mr->mr_rsrcp;
780 	pd	= mr->mr_pdhdl;
781 	bind	= &mr->mr_bindinfo;
782 
783 	/*
784 	 * Check here if the memory region is really an FMR.  If so, this is a
785 	 * bad thing and we shouldn't be here.  Return failure.
786 	 */
787 	if (mr->mr_is_fmr) {
788 		mutex_exit(&mr->mr_lock);
789 		return (IBT_INVALID_PARAM);
790 	}
791 
792 	/*
793 	 * Check here to see if the memory region has already been partially
794 	 * deregistered as a result of the hermon_umap_umemlock_cb() callback.
795 	 * If so, then jump to the end and free the remaining resources.
796 	 */
797 	if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
798 		goto mrdereg_finish_cleanup;
799 	}
800 	if (hermon_rdma_debug & 0x4)
801 		IBTF_DPRINTF_L2("mr", "dereg: mr %p  key %x",
802 		    mr, mr->mr_rkey);
803 
804 	/*
805 	 * We must drop the "mr_lock" here to ensure that both SLEEP and
806 	 * NOSLEEP calls into the firmware work as expected.  Also, if two
807 	 * threads are attemping to access this MR (via de-register,
808 	 * re-register, or otherwise), then we allow the firmware to enforce
809 	 * the checking, that only one deregister is valid.
810 	 */
811 	mutex_exit(&mr->mr_lock);
812 
813 	/*
814 	 * Reclaim MPT entry from hardware (if necessary).  Since the
815 	 * hermon_mr_deregister() routine is used in the memory region
816 	 * reregistration process as well, it is possible that we will
817 	 * not always wish to reclaim ownership of the MPT.  Check the
818 	 * "level" arg and, if necessary, attempt to reclaim it.  If
819 	 * the ownership transfer fails for any reason, we check to see
820 	 * what command status was returned from the hardware.  The only
821 	 * "expected" error status is the one that indicates an attempt to
822 	 * deregister a memory region that has memory windows bound to it
823 	 */
824 	if (level >= HERMON_MR_DEREG_ALL) {
825 		if (mr->mr_mpt_type >= HERMON_MPT_DMPT) {
826 			status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT,
827 			    NULL, 0, mpt->hr_indx, sleep);
828 			if (status != HERMON_CMD_SUCCESS) {
829 				if (status == HERMON_CMD_REG_BOUND) {
830 					return (IBT_MR_IN_USE);
831 				} else {
832 					cmn_err(CE_CONT, "Hermon: HW2SW_MPT "
833 					    "command failed: %08x\n", status);
834 					if (status ==
835 					    HERMON_CMD_INVALID_STATUS) {
836 						hermon_fm_ereport(state,
837 						    HCA_SYS_ERR,
838 						    DDI_SERVICE_LOST);
839 					}
840 					return (IBT_INVALID_PARAM);
841 				}
842 			}
843 		}
844 	}
845 
846 	/*
847 	 * Re-grab the mr_lock here.  Since further access to the protected
848 	 * 'mr' structure is needed, and we would have returned previously for
849 	 * the multiple deregistration case, we can safely grab the lock here.
850 	 */
851 	mutex_enter(&mr->mr_lock);
852 
853 	/*
854 	 * If the memory had come from userland, then we do a lookup in the
855 	 * "userland resources database".  On success, we free the entry, call
856 	 * ddi_umem_unlock(), and continue the cleanup.  On failure (which is
857 	 * an indication that the umem_lockmemory() callback has called
858 	 * hermon_mr_deregister()), we call ddi_umem_unlock() and invalidate
859 	 * the "mr_umemcookie" field in the MR handle (this will be used
860 	 * later to detect that only partial cleaup still remains to be done
861 	 * on the MR handle).
862 	 */
863 	if (mr->mr_is_umem) {
864 		status = hermon_umap_db_find(state->hs_instance,
865 		    (uint64_t)(uintptr_t)mr->mr_umemcookie,
866 		    MLNX_UMAP_MRMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE,
867 		    &umapdb);
868 		if (status == DDI_SUCCESS) {
869 			hermon_umap_db_free(umapdb);
870 			ddi_umem_unlock(mr->mr_umemcookie);
871 		} else {
872 			ddi_umem_unlock(mr->mr_umemcookie);
873 			mr->mr_umemcookie = NULL;
874 		}
875 	}
876 
877 	/* mtt_refcnt is NULL in the case of hermon_dma_mr_register() */
878 	if (mtt_refcnt != NULL) {
879 		/*
880 		 * Decrement the MTT reference count.  Since the MTT resource
881 		 * may be shared between multiple memory regions (as a result
882 		 * of a "RegisterSharedMR" verb) it is important that we not
883 		 * free up or unbind resources prematurely.  If it's not shared
884 		 * (as indicated by the return status), then free the resource.
885 		 */
886 		shared_mtt = hermon_mtt_refcnt_dec(mtt_refcnt);
887 		if (!shared_mtt) {
888 			hermon_rsrc_free(state, &mtt_refcnt);
889 		}
890 
891 		/*
892 		 * Free up the MTT entries and unbind the memory.  Here,
893 		 * as above, we attempt to free these resources only if
894 		 * it is appropriate to do so.
895 		 * Note, 'bind' is NULL in the alloc_lkey case.
896 		 */
897 		if (!shared_mtt) {
898 			if (level >= HERMON_MR_DEREG_NO_HW2SW_MPT) {
899 				hermon_mr_mem_unbind(state, bind);
900 			}
901 			hermon_rsrc_free(state, &mtt);
902 		}
903 	}
904 
905 	/*
906 	 * If the MR handle has been invalidated, then drop the
907 	 * lock and return success.  Note: This only happens because
908 	 * the umem_lockmemory() callback has been triggered.  The
909 	 * cleanup here is partial, and further cleanup (in a
910 	 * subsequent hermon_mr_deregister() call) will be necessary.
911 	 */
912 	if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
913 		mutex_exit(&mr->mr_lock);
914 		return (DDI_SUCCESS);
915 	}
916 
917 mrdereg_finish_cleanup:
918 	mutex_exit(&mr->mr_lock);
919 
920 	/* Free the Hermon Memory Region handle */
921 	hermon_rsrc_free(state, &rsrc);
922 
923 	/* Free up the MPT entry resource */
924 	if (mpt != NULL)
925 		hermon_rsrc_free(state, &mpt);
926 
927 	/* Decrement the reference count on the protection domain (PD) */
928 	hermon_pd_refcnt_dec(pd);
929 
930 	/* Set the mrhdl pointer to NULL and return success */
931 	*mrhdl = NULL;
932 
933 	return (DDI_SUCCESS);
934 }
935 
936 /*
937  * hermon_mr_dealloc_fmr()
938  *    Context: Can be called from interrupt or base context.
939  */
940 /* ARGSUSED */
941 int
942 hermon_mr_dealloc_fmr(hermon_state_t *state, hermon_mrhdl_t *mrhdl)
943 {
944 	hermon_rsrc_t		*mpt, *mtt, *rsrc;
945 	hermon_pdhdl_t		pd;
946 	hermon_mrhdl_t		mr;
947 
948 	/*
949 	 * Pull all the necessary information from the Hermon Memory Region
950 	 * handle.  This is necessary here because the resource for the
951 	 * MR handle is going to be freed up as part of the this
952 	 * deregistration
953 	 */
954 	mr	= *mrhdl;
955 	mutex_enter(&mr->mr_lock);
956 	mpt	= mr->mr_mptrsrcp;
957 	mtt	= mr->mr_mttrsrcp;
958 	rsrc	= mr->mr_rsrcp;
959 	pd	= mr->mr_pdhdl;
960 	mutex_exit(&mr->mr_lock);
961 
962 	/* Free the MTT entries */
963 	hermon_rsrc_free(state, &mtt);
964 
965 	/* Free the Hermon Memory Region handle */
966 	hermon_rsrc_free(state, &rsrc);
967 
968 	/* Free up the MPT entry resource */
969 	hermon_rsrc_free(state, &mpt);
970 
971 	/* Decrement the reference count on the protection domain (PD) */
972 	hermon_pd_refcnt_dec(pd);
973 
974 	/* Set the mrhdl pointer to NULL and return success */
975 	*mrhdl = NULL;
976 
977 	return (DDI_SUCCESS);
978 }
979 
980 
981 /*
982  * hermon_mr_query()
983  *    Context: Can be called from interrupt or base context.
984  */
985 /* ARGSUSED */
986 int
987 hermon_mr_query(hermon_state_t *state, hermon_mrhdl_t mr,
988     ibt_mr_query_attr_t *attr)
989 {
990 	int			status;
991 	hermon_hw_dmpt_t	mpt_entry;
992 	uint32_t		lkey;
993 
994 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr))
995 
996 	mutex_enter(&mr->mr_lock);
997 
998 	/*
999 	 * Check here to see if the memory region has already been partially
1000 	 * deregistered as a result of a hermon_umap_umemlock_cb() callback.
1001 	 * If so, this is an error, return failure.
1002 	 */
1003 	if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
1004 		mutex_exit(&mr->mr_lock);
1005 		return (IBT_MR_HDL_INVALID);
1006 	}
1007 
1008 	status = hermon_cmn_query_cmd_post(state, QUERY_MPT, 0,
1009 	    mr->mr_lkey >> 8, &mpt_entry, sizeof (hermon_hw_dmpt_t),
1010 	    HERMON_NOSLEEP);
1011 	if (status != HERMON_CMD_SUCCESS) {
1012 		cmn_err(CE_CONT, "Hermon: QUERY_MPT failed: status %x", status);
1013 		mutex_exit(&mr->mr_lock);
1014 		return (ibc_get_ci_failure(0));
1015 	}
1016 
1017 	/* Update the mr sw struct from the hw struct. */
1018 	lkey = mpt_entry.mem_key;
1019 	mr->mr_lkey = mr->mr_rkey = (lkey >> 8) | (lkey << 24);
1020 	mr->mr_bindinfo.bi_addr = mpt_entry.start_addr;
1021 	mr->mr_bindinfo.bi_len = mpt_entry.reg_win_len;
1022 	mr->mr_accflag = (mr->mr_accflag & IBT_MR_RO_DISABLED) |
1023 	    (mpt_entry.lw ? IBT_MR_LOCAL_WRITE : 0) |
1024 	    (mpt_entry.rr ? IBT_MR_REMOTE_READ : 0) |
1025 	    (mpt_entry.rw ? IBT_MR_REMOTE_WRITE : 0) |
1026 	    (mpt_entry.atomic ? IBT_MR_REMOTE_ATOMIC : 0) |
1027 	    (mpt_entry.en_bind ? IBT_MR_WINDOW_BIND : 0);
1028 	mr->mr_mttaddr = ((uint64_t)mpt_entry.mtt_addr_h << 32) |
1029 	    (mpt_entry.mtt_addr_l << 3);
1030 	mr->mr_logmttpgsz = mpt_entry.entity_sz;
1031 
1032 	/* Fill in the queried attributes */
1033 	attr->mr_lkey_state =
1034 	    (mpt_entry.status == HERMON_MPT_FREE) ? IBT_KEY_FREE :
1035 	    (mpt_entry.status == HERMON_MPT_SW_OWNERSHIP) ? IBT_KEY_INVALID :
1036 	    IBT_KEY_VALID;
1037 	attr->mr_phys_buf_list_sz = mpt_entry.mtt_size;
1038 	attr->mr_attr_flags = mr->mr_accflag;
1039 	attr->mr_pd = (ibt_pd_hdl_t)mr->mr_pdhdl;
1040 
1041 	/* Fill in the "local" attributes */
1042 	attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey;
1043 	attr->mr_lbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
1044 	attr->mr_lbounds.pb_len  = (size_t)mr->mr_bindinfo.bi_len;
1045 
1046 	/*
1047 	 * Fill in the "remote" attributes (if necessary).  Note: the
1048 	 * remote attributes are only valid if the memory region has one
1049 	 * or more of the remote access flags set.
1050 	 */
1051 	if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
1052 	    (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
1053 	    (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
1054 		attr->mr_rkey = (ibt_rkey_t)mr->mr_rkey;
1055 		attr->mr_rbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
1056 		attr->mr_rbounds.pb_len  = (size_t)mr->mr_bindinfo.bi_len;
1057 	}
1058 
1059 	/*
1060 	 * If region is mapped for streaming (i.e. noncoherent), then set sync
1061 	 * is required
1062 	 */
1063 	attr->mr_sync_required = (mr->mr_bindinfo.bi_flags &
1064 	    IBT_MR_NONCOHERENT) ? B_TRUE : B_FALSE;
1065 
1066 	mutex_exit(&mr->mr_lock);
1067 	return (DDI_SUCCESS);
1068 }
1069 
1070 
1071 /*
1072  * hermon_mr_reregister()
1073  *    Context: Can be called from interrupt or base context.
1074  */
1075 int
1076 hermon_mr_reregister(hermon_state_t *state, hermon_mrhdl_t mr,
1077     hermon_pdhdl_t pd, ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new,
1078     hermon_mr_options_t *op)
1079 {
1080 	hermon_bind_info_t	bind;
1081 	int			status;
1082 
1083 	/*
1084 	 * Fill in the "bind" struct.  This struct provides the majority
1085 	 * of the information that will be used to distinguish between an
1086 	 * "addr" binding (as is the case here) and a "buf" binding (see
1087 	 * below).  The "bind" struct is later passed to hermon_mr_mem_bind()
1088 	 * which does most of the "heavy lifting" for the Hermon memory
1089 	 * registration (and reregistration) routines.
1090 	 */
1091 	bind.bi_type  = HERMON_BINDHDL_VADDR;
1092 	bind.bi_addr  = mr_attr->mr_vaddr;
1093 	bind.bi_len   = mr_attr->mr_len;
1094 	bind.bi_as    = mr_attr->mr_as;
1095 	bind.bi_flags = mr_attr->mr_flags;
1096 	status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
1097 	return (status);
1098 }
1099 
1100 
1101 /*
1102  * hermon_mr_reregister_buf()
1103  *    Context: Can be called from interrupt or base context.
1104  */
1105 int
1106 hermon_mr_reregister_buf(hermon_state_t *state, hermon_mrhdl_t mr,
1107     hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf,
1108     hermon_mrhdl_t *mrhdl_new, hermon_mr_options_t *op)
1109 {
1110 	hermon_bind_info_t	bind;
1111 	int			status;
1112 
1113 	/*
1114 	 * Fill in the "bind" struct.  This struct provides the majority
1115 	 * of the information that will be used to distinguish between an
1116 	 * "addr" binding (see above) and a "buf" binding (as is the case
1117 	 * here).  The "bind" struct is later passed to hermon_mr_mem_bind()
1118 	 * which does most of the "heavy lifting" for the Hermon memory
1119 	 * registration routines.  Note: We have chosen to provide
1120 	 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
1121 	 * not set).  It is not critical what value we choose here as it need
1122 	 * only be unique for the given RKey (which will happen by default),
1123 	 * so the choice here is somewhat arbitrary.
1124 	 */
1125 	bind.bi_type  = HERMON_BINDHDL_BUF;
1126 	bind.bi_buf   = buf;
1127 	if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
1128 		bind.bi_addr  = mr_attr->mr_vaddr;
1129 	} else {
1130 		bind.bi_addr  = (uint64_t)(uintptr_t)buf->b_un.b_addr;
1131 	}
1132 	bind.bi_len   = (uint64_t)buf->b_bcount;
1133 	bind.bi_flags = mr_attr->mr_flags;
1134 	bind.bi_as    = NULL;
1135 	status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
1136 	return (status);
1137 }
1138 
1139 
1140 /*
1141  * hermon_mr_sync()
1142  *    Context: Can be called from interrupt or base context.
1143  */
1144 /* ARGSUSED */
1145 int
1146 hermon_mr_sync(hermon_state_t *state, ibt_mr_sync_t *mr_segs, size_t num_segs)
1147 {
1148 	hermon_mrhdl_t		mrhdl;
1149 	uint64_t		seg_vaddr, seg_len, seg_end;
1150 	uint64_t		mr_start, mr_end;
1151 	uint_t			type;
1152 	int			status, i;
1153 
1154 	/* Process each of the ibt_mr_sync_t's */
1155 	for (i = 0; i < num_segs; i++) {
1156 		mrhdl = (hermon_mrhdl_t)mr_segs[i].ms_handle;
1157 
1158 		/* Check for valid memory region handle */
1159 		if (mrhdl == NULL) {
1160 			status = IBT_MR_HDL_INVALID;
1161 			goto mrsync_fail;
1162 		}
1163 
1164 		mutex_enter(&mrhdl->mr_lock);
1165 
1166 		/*
1167 		 * Check here to see if the memory region has already been
1168 		 * partially deregistered as a result of a
1169 		 * hermon_umap_umemlock_cb() callback.  If so, this is an
1170 		 * error, return failure.
1171 		 */
1172 		if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
1173 			mutex_exit(&mrhdl->mr_lock);
1174 			status = IBT_MR_HDL_INVALID;
1175 			goto mrsync_fail;
1176 		}
1177 
1178 		/* Check for valid bounds on sync request */
1179 		seg_vaddr = mr_segs[i].ms_vaddr;
1180 		seg_len	  = mr_segs[i].ms_len;
1181 		seg_end	  = seg_vaddr + seg_len - 1;
1182 		mr_start  = mrhdl->mr_bindinfo.bi_addr;
1183 		mr_end	  = mr_start + mrhdl->mr_bindinfo.bi_len - 1;
1184 		if ((seg_vaddr < mr_start) || (seg_vaddr > mr_end)) {
1185 			mutex_exit(&mrhdl->mr_lock);
1186 			status = IBT_MR_VA_INVALID;
1187 			goto mrsync_fail;
1188 		}
1189 		if ((seg_end < mr_start) || (seg_end > mr_end)) {
1190 			mutex_exit(&mrhdl->mr_lock);
1191 			status = IBT_MR_LEN_INVALID;
1192 			goto mrsync_fail;
1193 		}
1194 
1195 		/* Determine what type (i.e. direction) for sync */
1196 		if (mr_segs[i].ms_flags & IBT_SYNC_READ) {
1197 			type = DDI_DMA_SYNC_FORDEV;
1198 		} else if (mr_segs[i].ms_flags & IBT_SYNC_WRITE) {
1199 			type = DDI_DMA_SYNC_FORCPU;
1200 		} else {
1201 			mutex_exit(&mrhdl->mr_lock);
1202 			status = IBT_INVALID_PARAM;
1203 			goto mrsync_fail;
1204 		}
1205 
1206 		(void) ddi_dma_sync(mrhdl->mr_bindinfo.bi_dmahdl,
1207 		    (off_t)(seg_vaddr - mr_start), (size_t)seg_len, type);
1208 
1209 		mutex_exit(&mrhdl->mr_lock);
1210 	}
1211 
1212 	return (DDI_SUCCESS);
1213 
1214 mrsync_fail:
1215 	return (status);
1216 }
1217 
1218 
1219 /*
1220  * hermon_mw_alloc()
1221  *    Context: Can be called from interrupt or base context.
1222  */
1223 int
1224 hermon_mw_alloc(hermon_state_t *state, hermon_pdhdl_t pd, ibt_mw_flags_t flags,
1225     hermon_mwhdl_t *mwhdl)
1226 {
1227 	hermon_rsrc_t		*mpt, *rsrc;
1228 	hermon_hw_dmpt_t		mpt_entry;
1229 	hermon_mwhdl_t		mw;
1230 	uint_t			sleep;
1231 	int			status;
1232 
1233 	if (state != NULL)	/* XXX - bogus test that is always TRUE */
1234 		return (IBT_INSUFF_RESOURCE);
1235 
1236 	/*
1237 	 * Check the sleep flag.  Ensure that it is consistent with the
1238 	 * current thread context (i.e. if we are currently in the interrupt
1239 	 * context, then we shouldn't be attempting to sleep).
1240 	 */
1241 	sleep = (flags & IBT_MW_NOSLEEP) ? HERMON_NOSLEEP : HERMON_SLEEP;
1242 	if ((sleep == HERMON_SLEEP) &&
1243 	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1244 		status = IBT_INVALID_PARAM;
1245 		goto mwalloc_fail;
1246 	}
1247 
1248 	/* Increment the reference count on the protection domain (PD) */
1249 	hermon_pd_refcnt_inc(pd);
1250 
1251 	/*
1252 	 * Allocate an MPT entry (for use as a memory window).  Since the
1253 	 * Hermon hardware uses the MPT entry for memory regions and for
1254 	 * memory windows, we will fill in this MPT with all the necessary
1255 	 * parameters for the memory window.  And then (just as we do for
1256 	 * memory regions) ownership will be passed to the hardware in the
1257 	 * final step below.  If we fail here, we must undo the protection
1258 	 * domain reference count.
1259 	 */
1260 	status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
1261 	if (status != DDI_SUCCESS) {
1262 		status = IBT_INSUFF_RESOURCE;
1263 		goto mwalloc_fail1;
1264 	}
1265 
1266 	/*
1267 	 * Allocate the software structure for tracking the memory window (i.e.
1268 	 * the Hermon Memory Window handle).  Note: This is actually the same
1269 	 * software structure used for tracking memory regions, but since many
1270 	 * of the same properties are needed, only a single structure is
1271 	 * necessary.  If we fail here, we must undo the protection domain
1272 	 * reference count and the previous resource allocation.
1273 	 */
1274 	status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
1275 	if (status != DDI_SUCCESS) {
1276 		status = IBT_INSUFF_RESOURCE;
1277 		goto mwalloc_fail2;
1278 	}
1279 	mw = (hermon_mwhdl_t)rsrc->hr_addr;
1280 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
1281 
1282 	/*
1283 	 * Calculate an "unbound" RKey from MPT index.  In much the same way
1284 	 * as we do for memory regions (above), this key is constructed from
1285 	 * a "constrained" (which depends on the MPT index) and an
1286 	 * "unconstrained" portion (which may be arbitrarily chosen).
1287 	 */
1288 	mw->mr_rkey = hermon_mr_keycalc(mpt->hr_indx);
1289 
1290 	/*
1291 	 * Fill in the MPT entry.  This is the final step before passing
1292 	 * ownership of the MPT entry to the Hermon hardware.  We use all of
1293 	 * the information collected/calculated above to fill in the
1294 	 * requisite portions of the MPT.  Note: fewer entries in the MPT
1295 	 * entry are necessary to allocate a memory window.
1296 	 */
1297 	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
1298 	mpt_entry.reg_win	= HERMON_MPT_IS_WINDOW;
1299 	mpt_entry.mem_key	= mw->mr_rkey;
1300 	mpt_entry.pd		= pd->pd_pdnum;
1301 	mpt_entry.lr		= 1;
1302 
1303 	/*
1304 	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
1305 	 * the entry to the hardware.  Note: in general, this operation
1306 	 * shouldn't fail.  But if it does, we have to undo everything we've
1307 	 * done above before returning error.
1308 	 */
1309 	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1310 	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
1311 	if (status != HERMON_CMD_SUCCESS) {
1312 		cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
1313 		    status);
1314 		if (status == HERMON_CMD_INVALID_STATUS) {
1315 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1316 		}
1317 		status = ibc_get_ci_failure(0);
1318 		goto mwalloc_fail3;
1319 	}
1320 
1321 	/*
1322 	 * Fill in the rest of the Hermon Memory Window handle.  Having
1323 	 * successfully transferred ownership of the MPT, we can update the
1324 	 * following fields for use in further operations on the MW.
1325 	 */
1326 	mw->mr_mptrsrcp	= mpt;
1327 	mw->mr_pdhdl	= pd;
1328 	mw->mr_rsrcp	= rsrc;
1329 	mw->mr_rkey	= hermon_mr_key_swap(mw->mr_rkey);
1330 	*mwhdl = mw;
1331 
1332 	return (DDI_SUCCESS);
1333 
1334 mwalloc_fail3:
1335 	hermon_rsrc_free(state, &rsrc);
1336 mwalloc_fail2:
1337 	hermon_rsrc_free(state, &mpt);
1338 mwalloc_fail1:
1339 	hermon_pd_refcnt_dec(pd);
1340 mwalloc_fail:
1341 	return (status);
1342 }
1343 
1344 
1345 /*
1346  * hermon_mw_free()
1347  *    Context: Can be called from interrupt or base context.
1348  */
1349 int
1350 hermon_mw_free(hermon_state_t *state, hermon_mwhdl_t *mwhdl, uint_t sleep)
1351 {
1352 	hermon_rsrc_t		*mpt, *rsrc;
1353 	hermon_mwhdl_t		mw;
1354 	int			status;
1355 	hermon_pdhdl_t		pd;
1356 
1357 	/*
1358 	 * Check the sleep flag.  Ensure that it is consistent with the
1359 	 * current thread context (i.e. if we are currently in the interrupt
1360 	 * context, then we shouldn't be attempting to sleep).
1361 	 */
1362 	if ((sleep == HERMON_SLEEP) &&
1363 	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1364 		status = IBT_INVALID_PARAM;
1365 		return (status);
1366 	}
1367 
1368 	/*
1369 	 * Pull all the necessary information from the Hermon Memory Window
1370 	 * handle.  This is necessary here because the resource for the
1371 	 * MW handle is going to be freed up as part of the this operation.
1372 	 */
1373 	mw	= *mwhdl;
1374 	mutex_enter(&mw->mr_lock);
1375 	mpt	= mw->mr_mptrsrcp;
1376 	rsrc	= mw->mr_rsrcp;
1377 	pd	= mw->mr_pdhdl;
1378 	mutex_exit(&mw->mr_lock);
1379 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
1380 
1381 	/*
1382 	 * Reclaim the MPT entry from hardware.  Note: in general, it is
1383 	 * unexpected for this operation to return an error.
1384 	 */
1385 	status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL,
1386 	    0, mpt->hr_indx, sleep);
1387 	if (status != HERMON_CMD_SUCCESS) {
1388 		cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: %08x\n",
1389 		    status);
1390 		if (status == HERMON_CMD_INVALID_STATUS) {
1391 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1392 		}
1393 		return (ibc_get_ci_failure(0));
1394 	}
1395 
1396 	/* Free the Hermon Memory Window handle */
1397 	hermon_rsrc_free(state, &rsrc);
1398 
1399 	/* Free up the MPT entry resource */
1400 	hermon_rsrc_free(state, &mpt);
1401 
1402 	/* Decrement the reference count on the protection domain (PD) */
1403 	hermon_pd_refcnt_dec(pd);
1404 
1405 	/* Set the mwhdl pointer to NULL and return success */
1406 	*mwhdl = NULL;
1407 
1408 	return (DDI_SUCCESS);
1409 }
1410 
1411 
1412 /*
1413  * hermon_mr_keycalc()
1414  *    Context: Can be called from interrupt or base context.
1415  *    NOTE:  Produces a key in the form of
1416  *		KKKKKKKK IIIIIIII IIIIIIII IIIIIIIII
1417  *    where K == the arbitrary bits and I == the index
1418  */
1419 uint32_t
1420 hermon_mr_keycalc(uint32_t indx)
1421 {
1422 	uint32_t tmp_key, tmp_indx;
1423 
1424 	/*
1425 	 * Generate a simple key from counter.  Note:  We increment this
1426 	 * static variable _intentionally_ without any kind of mutex around
1427 	 * it.  First, single-threading all operations through a single lock
1428 	 * would be a bad idea (from a performance point-of-view).  Second,
1429 	 * the upper "unconstrained" bits don't really have to be unique
1430 	 * because the lower bits are guaranteed to be (although we do make a
1431 	 * best effort to ensure that they are).  Third, the window for the
1432 	 * race (where both threads read and update the counter at the same
1433 	 * time) is incredibly small.
1434 	 * And, lastly, we'd like to make this into a "random" key
1435 	 */
1436 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(hermon_memkey_cnt))
1437 	tmp_key = (hermon_memkey_cnt++) << HERMON_MEMKEY_SHIFT;
1438 	tmp_indx = indx & 0xffffff;
1439 	return (tmp_key | tmp_indx);
1440 }
1441 
1442 
1443 /*
1444  * hermon_mr_key_swap()
1445  *    Context: Can be called from interrupt or base context.
1446  *    NOTE:  Produces a key in the form of
1447  *		IIIIIIII IIIIIIII IIIIIIIII KKKKKKKK
1448  *    where K == the arbitrary bits and I == the index
1449  */
1450 uint32_t
1451 hermon_mr_key_swap(uint32_t indx)
1452 {
1453 	/*
1454 	 * The memory key format to pass down to the hardware is
1455 	 * (key[7:0],index[23:0]), which defines the index to the
1456 	 * hardware resource. When the driver passes this as a memory
1457 	 * key, (i.e. to retrieve a resource) the format is
1458 	 * (index[23:0],key[7:0]).
1459 	 */
1460 	return (((indx >> 24) & 0x000000ff) | ((indx << 8) & 0xffffff00));
1461 }
1462 
1463 /*
1464  * hermon_mr_common_reg()
1465  *    Context: Can be called from interrupt or base context.
1466  */
1467 static int
1468 hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd,
1469     hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
1470     hermon_mpt_rsrc_type_t mpt_type)
1471 {
1472 	hermon_rsrc_t		*mpt, *mtt, *rsrc, *mtt_refcnt;
1473 	hermon_umap_db_entry_t	*umapdb;
1474 	hermon_sw_refcnt_t	*swrc_tmp;
1475 	hermon_hw_dmpt_t	mpt_entry;
1476 	hermon_mrhdl_t		mr;
1477 	ibt_mr_flags_t		flags;
1478 	hermon_bind_info_t	*bh;
1479 	ddi_dma_handle_t	bind_dmahdl;
1480 	ddi_umem_cookie_t	umem_cookie;
1481 	size_t			umem_len;
1482 	caddr_t			umem_addr;
1483 	uint64_t		mtt_addr, max_sz;
1484 	uint_t			sleep, mtt_pgsize_bits, bind_type, mr_is_umem;
1485 	int			status, umem_flags, bind_override_addr;
1486 
1487 	/*
1488 	 * Check the "options" flag.  Currently this flag tells the driver
1489 	 * whether or not the region should be bound normally (i.e. with
1490 	 * entries written into the PCI IOMMU), whether it should be
1491 	 * registered to bypass the IOMMU, and whether or not the resulting
1492 	 * address should be "zero-based" (to aid the alignment restrictions
1493 	 * for QPs).
1494 	 */
1495 	if (op == NULL) {
1496 		bind_type   = HERMON_BINDMEM_NORMAL;
1497 		bind_dmahdl = NULL;
1498 		bind_override_addr = 0;
1499 	} else {
1500 		bind_type	   = op->mro_bind_type;
1501 		bind_dmahdl	   = op->mro_bind_dmahdl;
1502 		bind_override_addr = op->mro_bind_override_addr;
1503 	}
1504 
1505 	/* check what kind of mpt to use */
1506 
1507 	/* Extract the flags field from the hermon_bind_info_t */
1508 	flags = bind->bi_flags;
1509 
1510 	/*
1511 	 * Check for invalid length.  Check is the length is zero or if the
1512 	 * length is larger than the maximum configured value.  Return error
1513 	 * if it is.
1514 	 */
1515 	max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz);
1516 	if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
1517 		status = IBT_MR_LEN_INVALID;
1518 		goto mrcommon_fail;
1519 	}
1520 
1521 	/*
1522 	 * Check the sleep flag.  Ensure that it is consistent with the
1523 	 * current thread context (i.e. if we are currently in the interrupt
1524 	 * context, then we shouldn't be attempting to sleep).
1525 	 */
1526 	sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
1527 	if ((sleep == HERMON_SLEEP) &&
1528 	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1529 		status = IBT_INVALID_PARAM;
1530 		goto mrcommon_fail;
1531 	}
1532 
1533 	/* Increment the reference count on the protection domain (PD) */
1534 	hermon_pd_refcnt_inc(pd);
1535 
1536 	/*
1537 	 * Allocate an MPT entry.  This will be filled in with all the
1538 	 * necessary parameters to define the memory region.  And then
1539 	 * ownership will be passed to the hardware in the final step
1540 	 * below.  If we fail here, we must undo the protection domain
1541 	 * reference count.
1542 	 */
1543 	if (mpt_type == HERMON_MPT_DMPT) {
1544 		status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
1545 		if (status != DDI_SUCCESS) {
1546 			status = IBT_INSUFF_RESOURCE;
1547 			goto mrcommon_fail1;
1548 		}
1549 	} else {
1550 		mpt = NULL;
1551 	}
1552 
1553 	/*
1554 	 * Allocate the software structure for tracking the memory region (i.e.
1555 	 * the Hermon Memory Region handle).  If we fail here, we must undo
1556 	 * the protection domain reference count and the previous resource
1557 	 * allocation.
1558 	 */
1559 	status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
1560 	if (status != DDI_SUCCESS) {
1561 		status = IBT_INSUFF_RESOURCE;
1562 		goto mrcommon_fail2;
1563 	}
1564 	mr = (hermon_mrhdl_t)rsrc->hr_addr;
1565 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
1566 
1567 	/*
1568 	 * Setup and validate the memory region access flags.  This means
1569 	 * translating the IBTF's enable flags into the access flags that
1570 	 * will be used in later operations.
1571 	 */
1572 	mr->mr_accflag = 0;
1573 	if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1574 		mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1575 	if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1576 		mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1577 	if (flags & IBT_MR_ENABLE_REMOTE_READ)
1578 		mr->mr_accflag |= IBT_MR_REMOTE_READ;
1579 	if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1580 		mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1581 	if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1582 		mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1583 
1584 	/*
1585 	 * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
1586 	 * from a certain number of "constrained" bits (the least significant
1587 	 * bits) and some number of "unconstrained" bits.  The constrained
1588 	 * bits must be set to the index of the entry in the MPT table, but
1589 	 * the unconstrained bits can be set to any value we wish.  Note:
1590 	 * if no remote access is required, then the RKey value is not filled
1591 	 * in.  Otherwise both Rkey and LKey are given the same value.
1592 	 */
1593 	if (mpt)
1594 		mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
1595 
1596 	/*
1597 	 * Determine if the memory is from userland and pin the pages
1598 	 * with umem_lockmemory() if necessary.
1599 	 * Then, if this is userland memory, allocate an entry in the
1600 	 * "userland resources database".  This will later be added to
1601 	 * the database (after all further memory registration operations are
1602 	 * successful).  If we fail here, we must undo the reference counts
1603 	 * and the previous resource allocations.
1604 	 */
1605 	mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0);
1606 	if (mr_is_umem) {
1607 		umem_len   = ptob(btopr(bind->bi_len +
1608 		    ((uintptr_t)bind->bi_addr & PAGEOFFSET)));
1609 		umem_addr  = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET);
1610 		umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
1611 		    DDI_UMEMLOCK_LONGTERM);
1612 		status = umem_lockmemory(umem_addr, umem_len, umem_flags,
1613 		    &umem_cookie, &hermon_umem_cbops, NULL);
1614 		if (status != 0) {
1615 			status = IBT_INSUFF_RESOURCE;
1616 			goto mrcommon_fail3;
1617 		}
1618 
1619 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1620 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1621 
1622 		bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len,
1623 		    B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
1624 		if (bind->bi_buf == NULL) {
1625 			status = IBT_INSUFF_RESOURCE;
1626 			goto mrcommon_fail3;
1627 		}
1628 		bind->bi_type = HERMON_BINDHDL_UBUF;
1629 		bind->bi_buf->b_flags |= B_READ;
1630 
1631 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1632 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1633 
1634 		umapdb = hermon_umap_db_alloc(state->hs_instance,
1635 		    (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
1636 		    (uint64_t)(uintptr_t)rsrc);
1637 		if (umapdb == NULL) {
1638 			status = IBT_INSUFF_RESOURCE;
1639 			goto mrcommon_fail4;
1640 		}
1641 	}
1642 
1643 	/*
1644 	 * Setup the bindinfo for the mtt bind call
1645 	 */
1646 	bh = &mr->mr_bindinfo;
1647 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bh))
1648 	bcopy(bind, bh, sizeof (hermon_bind_info_t));
1649 	bh->bi_bypass = bind_type;
1650 	status = hermon_mr_mtt_bind(state, bh, bind_dmahdl, &mtt,
1651 	    &mtt_pgsize_bits, mpt != NULL);
1652 	if (status != DDI_SUCCESS) {
1653 		/*
1654 		 * When mtt_bind fails, freerbuf has already been done,
1655 		 * so make sure not to call it again.
1656 		 */
1657 		bind->bi_type = bh->bi_type;
1658 		goto mrcommon_fail5;
1659 	}
1660 	mr->mr_logmttpgsz = mtt_pgsize_bits;
1661 
1662 	/*
1663 	 * Allocate MTT reference count (to track shared memory regions).
1664 	 * This reference count resource may never be used on the given
1665 	 * memory region, but if it is ever later registered as "shared"
1666 	 * memory region then this resource will be necessary.  If we fail
1667 	 * here, we do pretty much the same as above to clean up.
1668 	 */
1669 	status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep,
1670 	    &mtt_refcnt);
1671 	if (status != DDI_SUCCESS) {
1672 		status = IBT_INSUFF_RESOURCE;
1673 		goto mrcommon_fail6;
1674 	}
1675 	mr->mr_mttrefcntp = mtt_refcnt;
1676 	swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
1677 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp))
1678 	HERMON_MTT_REFCNT_INIT(swrc_tmp);
1679 
1680 	mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
1681 
1682 	/*
1683 	 * Fill in the MPT entry.  This is the final step before passing
1684 	 * ownership of the MPT entry to the Hermon hardware.  We use all of
1685 	 * the information collected/calculated above to fill in the
1686 	 * requisite portions of the MPT.  Do this ONLY for DMPTs.
1687 	 */
1688 	if (mpt == NULL)
1689 		goto no_passown;
1690 
1691 	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
1692 
1693 	mpt_entry.status  = HERMON_MPT_SW_OWNERSHIP;
1694 	mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
1695 	mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1696 	mpt_entry.rw	  = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
1697 	mpt_entry.rr	  = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
1698 	mpt_entry.lw	  = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
1699 	mpt_entry.lr	  = 1;
1700 	mpt_entry.phys_addr = 0;
1701 	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
1702 
1703 	mpt_entry.entity_sz	= mr->mr_logmttpgsz;
1704 	mpt_entry.mem_key	= mr->mr_lkey;
1705 	mpt_entry.pd		= pd->pd_pdnum;
1706 	mpt_entry.rem_acc_en = 0;
1707 	mpt_entry.fast_reg_en = 0;
1708 	mpt_entry.en_inval = 0;
1709 	mpt_entry.lkey = 0;
1710 	mpt_entry.win_cnt = 0;
1711 
1712 	if (bind_override_addr == 0) {
1713 		mpt_entry.start_addr = bh->bi_addr;
1714 	} else {
1715 		bh->bi_addr = bh->bi_addr & ((1 << mr->mr_logmttpgsz) - 1);
1716 		mpt_entry.start_addr = bh->bi_addr;
1717 	}
1718 	mpt_entry.reg_win_len	= bh->bi_len;
1719 
1720 	mpt_entry.mtt_addr_h = mtt_addr >> 32;  /* only 8 more bits */
1721 	mpt_entry.mtt_addr_l = mtt_addr >> 3;	/* only 29 bits */
1722 
1723 	/*
1724 	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
1725 	 * the entry to the hardware if needed.  Note: in general, this
1726 	 * operation shouldn't fail.  But if it does, we have to undo
1727 	 * everything we've done above before returning error.
1728 	 *
1729 	 * For Hermon, this routine (which is common to the contexts) will only
1730 	 * set the ownership if needed - the process of passing the context
1731 	 * itself to HW will take care of setting up the MPT (based on type
1732 	 * and index).
1733 	 */
1734 
1735 	mpt_entry.bnd_qp = 0;	/* dMPT for a qp, check for window */
1736 	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1737 	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
1738 	if (status != HERMON_CMD_SUCCESS) {
1739 		cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
1740 		    status);
1741 		if (status == HERMON_CMD_INVALID_STATUS) {
1742 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1743 		}
1744 		status = ibc_get_ci_failure(0);
1745 		goto mrcommon_fail7;
1746 	}
1747 	if (hermon_rdma_debug & 0x4)
1748 		IBTF_DPRINTF_L2("mr", "  reg: mr %p  key %x",
1749 		    mr, hermon_mr_key_swap(mr->mr_rkey));
1750 no_passown:
1751 
1752 	/*
1753 	 * Fill in the rest of the Hermon Memory Region handle.  Having
1754 	 * successfully transferred ownership of the MPT, we can update the
1755 	 * following fields for use in further operations on the MR.
1756 	 */
1757 	mr->mr_mttaddr	   = mtt_addr;
1758 
1759 	mr->mr_log2_pgsz   = (mr->mr_logmttpgsz - HERMON_PAGESHIFT);
1760 	mr->mr_mptrsrcp	   = mpt;
1761 	mr->mr_mttrsrcp	   = mtt;
1762 	mr->mr_pdhdl	   = pd;
1763 	mr->mr_rsrcp	   = rsrc;
1764 	mr->mr_is_umem	   = mr_is_umem;
1765 	mr->mr_is_fmr	   = 0;
1766 	mr->mr_umemcookie  = (mr_is_umem != 0) ? umem_cookie : NULL;
1767 	mr->mr_umem_cbfunc = NULL;
1768 	mr->mr_umem_cbarg1 = NULL;
1769 	mr->mr_umem_cbarg2 = NULL;
1770 	mr->mr_lkey	   = hermon_mr_key_swap(mr->mr_lkey);
1771 	mr->mr_rkey	   = hermon_mr_key_swap(mr->mr_rkey);
1772 	mr->mr_mpt_type	   = mpt_type;
1773 
1774 	/*
1775 	 * If this is userland memory, then we need to insert the previously
1776 	 * allocated entry into the "userland resources database".  This will
1777 	 * allow for later coordination between the hermon_umap_umemlock_cb()
1778 	 * callback and hermon_mr_deregister().
1779 	 */
1780 	if (mr_is_umem) {
1781 		hermon_umap_db_add(umapdb);
1782 	}
1783 
1784 	*mrhdl = mr;
1785 
1786 	return (DDI_SUCCESS);
1787 
1788 /*
1789  * The following is cleanup for all possible failure cases in this routine
1790  */
1791 mrcommon_fail7:
1792 	hermon_rsrc_free(state, &mtt_refcnt);
1793 mrcommon_fail6:
1794 	hermon_mr_mem_unbind(state, bh);
1795 	bind->bi_type = bh->bi_type;
1796 mrcommon_fail5:
1797 	if (mr_is_umem) {
1798 		hermon_umap_db_free(umapdb);
1799 	}
1800 mrcommon_fail4:
1801 	if (mr_is_umem) {
1802 		/*
1803 		 * Free up the memory ddi_umem_iosetup() allocates
1804 		 * internally.
1805 		 */
1806 		if (bind->bi_type == HERMON_BINDHDL_UBUF) {
1807 			freerbuf(bind->bi_buf);
1808 			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1809 			bind->bi_type = HERMON_BINDHDL_NONE;
1810 			_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1811 		}
1812 		ddi_umem_unlock(umem_cookie);
1813 	}
1814 mrcommon_fail3:
1815 	hermon_rsrc_free(state, &rsrc);
1816 mrcommon_fail2:
1817 	if (mpt != NULL)
1818 		hermon_rsrc_free(state, &mpt);
1819 mrcommon_fail1:
1820 	hermon_pd_refcnt_dec(pd);
1821 mrcommon_fail:
1822 	return (status);
1823 }
1824 
1825 /*
1826  * hermon_dma_mr_register()
1827  *    Context: Can be called from base context.
1828  */
1829 int
1830 hermon_dma_mr_register(hermon_state_t *state, hermon_pdhdl_t pd,
1831     ibt_dmr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl)
1832 {
1833 	hermon_rsrc_t		*mpt, *rsrc;
1834 	hermon_hw_dmpt_t	mpt_entry;
1835 	hermon_mrhdl_t		mr;
1836 	ibt_mr_flags_t		flags;
1837 	uint_t			sleep;
1838 	int			status;
1839 
1840 	/* Extract the flags field */
1841 	flags = mr_attr->dmr_flags;
1842 
1843 	/*
1844 	 * Check the sleep flag.  Ensure that it is consistent with the
1845 	 * current thread context (i.e. if we are currently in the interrupt
1846 	 * context, then we shouldn't be attempting to sleep).
1847 	 */
1848 	sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
1849 	if ((sleep == HERMON_SLEEP) &&
1850 	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1851 		status = IBT_INVALID_PARAM;
1852 		goto mrcommon_fail;
1853 	}
1854 
1855 	/* Increment the reference count on the protection domain (PD) */
1856 	hermon_pd_refcnt_inc(pd);
1857 
1858 	/*
1859 	 * Allocate an MPT entry.  This will be filled in with all the
1860 	 * necessary parameters to define the memory region.  And then
1861 	 * ownership will be passed to the hardware in the final step
1862 	 * below.  If we fail here, we must undo the protection domain
1863 	 * reference count.
1864 	 */
1865 	status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
1866 	if (status != DDI_SUCCESS) {
1867 		status = IBT_INSUFF_RESOURCE;
1868 		goto mrcommon_fail1;
1869 	}
1870 
1871 	/*
1872 	 * Allocate the software structure for tracking the memory region (i.e.
1873 	 * the Hermon Memory Region handle).  If we fail here, we must undo
1874 	 * the protection domain reference count and the previous resource
1875 	 * allocation.
1876 	 */
1877 	status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
1878 	if (status != DDI_SUCCESS) {
1879 		status = IBT_INSUFF_RESOURCE;
1880 		goto mrcommon_fail2;
1881 	}
1882 	mr = (hermon_mrhdl_t)rsrc->hr_addr;
1883 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
1884 	bzero(mr, sizeof (*mr));
1885 
1886 	/*
1887 	 * Setup and validate the memory region access flags.  This means
1888 	 * translating the IBTF's enable flags into the access flags that
1889 	 * will be used in later operations.
1890 	 */
1891 	mr->mr_accflag = 0;
1892 	if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1893 		mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1894 	if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1895 		mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1896 	if (flags & IBT_MR_ENABLE_REMOTE_READ)
1897 		mr->mr_accflag |= IBT_MR_REMOTE_READ;
1898 	if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1899 		mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1900 	if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1901 		mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1902 
1903 	/*
1904 	 * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
1905 	 * from a certain number of "constrained" bits (the least significant
1906 	 * bits) and some number of "unconstrained" bits.  The constrained
1907 	 * bits must be set to the index of the entry in the MPT table, but
1908 	 * the unconstrained bits can be set to any value we wish.  Note:
1909 	 * if no remote access is required, then the RKey value is not filled
1910 	 * in.  Otherwise both Rkey and LKey are given the same value.
1911 	 */
1912 	if (mpt)
1913 		mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
1914 
1915 	/*
1916 	 * Fill in the MPT entry.  This is the final step before passing
1917 	 * ownership of the MPT entry to the Hermon hardware.  We use all of
1918 	 * the information collected/calculated above to fill in the
1919 	 * requisite portions of the MPT.  Do this ONLY for DMPTs.
1920 	 */
1921 	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
1922 
1923 	mpt_entry.status  = HERMON_MPT_SW_OWNERSHIP;
1924 	mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
1925 	mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1926 	mpt_entry.rw	  = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
1927 	mpt_entry.rr	  = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
1928 	mpt_entry.lw	  = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
1929 	mpt_entry.lr	  = 1;
1930 	mpt_entry.phys_addr = 1;	/* critical bit for this */
1931 	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
1932 
1933 	mpt_entry.entity_sz	= mr->mr_logmttpgsz;
1934 	mpt_entry.mem_key	= mr->mr_lkey;
1935 	mpt_entry.pd		= pd->pd_pdnum;
1936 	mpt_entry.rem_acc_en = 0;
1937 	mpt_entry.fast_reg_en = 0;
1938 	mpt_entry.en_inval = 0;
1939 	mpt_entry.lkey = 0;
1940 	mpt_entry.win_cnt = 0;
1941 
1942 	mpt_entry.start_addr = mr_attr->dmr_paddr;
1943 	mpt_entry.reg_win_len = mr_attr->dmr_len;
1944 	if (mr_attr->dmr_len == 0)
1945 		mpt_entry.len_b64 = 1;	/* needed for 2^^64 length */
1946 
1947 	mpt_entry.mtt_addr_h = 0;
1948 	mpt_entry.mtt_addr_l = 0;
1949 
1950 	/*
1951 	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
1952 	 * the entry to the hardware if needed.  Note: in general, this
1953 	 * operation shouldn't fail.  But if it does, we have to undo
1954 	 * everything we've done above before returning error.
1955 	 *
1956 	 * For Hermon, this routine (which is common to the contexts) will only
1957 	 * set the ownership if needed - the process of passing the context
1958 	 * itself to HW will take care of setting up the MPT (based on type
1959 	 * and index).
1960 	 */
1961 
1962 	mpt_entry.bnd_qp = 0;	/* dMPT for a qp, check for window */
1963 	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1964 	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
1965 	if (status != HERMON_CMD_SUCCESS) {
1966 		cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
1967 		    status);
1968 		if (status == HERMON_CMD_INVALID_STATUS) {
1969 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1970 		}
1971 		status = ibc_get_ci_failure(0);
1972 		goto mrcommon_fail7;
1973 	}
1974 
1975 	/*
1976 	 * Fill in the rest of the Hermon Memory Region handle.  Having
1977 	 * successfully transferred ownership of the MPT, we can update the
1978 	 * following fields for use in further operations on the MR.
1979 	 */
1980 	mr->mr_mttaddr	   = 0;
1981 
1982 	mr->mr_log2_pgsz   = 0;
1983 	mr->mr_mptrsrcp	   = mpt;
1984 	mr->mr_mttrsrcp	   = NULL;
1985 	mr->mr_pdhdl	   = pd;
1986 	mr->mr_rsrcp	   = rsrc;
1987 	mr->mr_is_umem	   = 0;
1988 	mr->mr_is_fmr	   = 0;
1989 	mr->mr_umemcookie  = NULL;
1990 	mr->mr_umem_cbfunc = NULL;
1991 	mr->mr_umem_cbarg1 = NULL;
1992 	mr->mr_umem_cbarg2 = NULL;
1993 	mr->mr_lkey	   = hermon_mr_key_swap(mr->mr_lkey);
1994 	mr->mr_rkey	   = hermon_mr_key_swap(mr->mr_rkey);
1995 	mr->mr_mpt_type	   = HERMON_MPT_DMPT;
1996 
1997 	*mrhdl = mr;
1998 
1999 	return (DDI_SUCCESS);
2000 
2001 /*
2002  * The following is cleanup for all possible failure cases in this routine
2003  */
2004 mrcommon_fail7:
2005 	hermon_rsrc_free(state, &rsrc);
2006 mrcommon_fail2:
2007 	hermon_rsrc_free(state, &mpt);
2008 mrcommon_fail1:
2009 	hermon_pd_refcnt_dec(pd);
2010 mrcommon_fail:
2011 	return (status);
2012 }
2013 
2014 /*
2015  * hermon_mr_alloc_lkey()
2016  *    Context: Can be called from base context.
2017  */
2018 int
2019 hermon_mr_alloc_lkey(hermon_state_t *state, hermon_pdhdl_t pd,
2020     ibt_lkey_flags_t flags, uint_t nummtt, hermon_mrhdl_t *mrhdl)
2021 {
2022 	hermon_rsrc_t		*mpt, *mtt, *rsrc, *mtt_refcnt;
2023 	hermon_sw_refcnt_t	*swrc_tmp;
2024 	hermon_hw_dmpt_t	mpt_entry;
2025 	hermon_mrhdl_t		mr;
2026 	uint64_t		mtt_addr;
2027 	uint_t			sleep;
2028 	int			status;
2029 
2030 	/* Increment the reference count on the protection domain (PD) */
2031 	hermon_pd_refcnt_inc(pd);
2032 
2033 	sleep = (flags & IBT_KEY_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
2034 
2035 	/*
2036 	 * Allocate an MPT entry.  This will be filled in with "some" of the
2037 	 * necessary parameters to define the memory region.  And then
2038 	 * ownership will be passed to the hardware in the final step
2039 	 * below.  If we fail here, we must undo the protection domain
2040 	 * reference count.
2041 	 *
2042 	 * The MTTs will get filled in when the FRWR is processed.
2043 	 */
2044 	status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
2045 	if (status != DDI_SUCCESS) {
2046 		status = IBT_INSUFF_RESOURCE;
2047 		goto alloclkey_fail1;
2048 	}
2049 
2050 	/*
2051 	 * Allocate the software structure for tracking the memory region (i.e.
2052 	 * the Hermon Memory Region handle).  If we fail here, we must undo
2053 	 * the protection domain reference count and the previous resource
2054 	 * allocation.
2055 	 */
2056 	status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
2057 	if (status != DDI_SUCCESS) {
2058 		status = IBT_INSUFF_RESOURCE;
2059 		goto alloclkey_fail2;
2060 	}
2061 	mr = (hermon_mrhdl_t)rsrc->hr_addr;
2062 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
2063 	bzero(mr, sizeof (*mr));
2064 	mr->mr_bindinfo.bi_type = HERMON_BINDHDL_LKEY;
2065 
2066 	mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
2067 
2068 	status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt);
2069 	if (status != DDI_SUCCESS) {
2070 		status = IBT_INSUFF_RESOURCE;
2071 		goto alloclkey_fail3;
2072 	}
2073 	mr->mr_logmttpgsz = PAGESHIFT;
2074 
2075 	/*
2076 	 * Allocate MTT reference count (to track shared memory regions).
2077 	 * This reference count resource may never be used on the given
2078 	 * memory region, but if it is ever later registered as "shared"
2079 	 * memory region then this resource will be necessary.  If we fail
2080 	 * here, we do pretty much the same as above to clean up.
2081 	 */
2082 	status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep,
2083 	    &mtt_refcnt);
2084 	if (status != DDI_SUCCESS) {
2085 		status = IBT_INSUFF_RESOURCE;
2086 		goto alloclkey_fail4;
2087 	}
2088 	mr->mr_mttrefcntp = mtt_refcnt;
2089 	swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
2090 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp))
2091 	HERMON_MTT_REFCNT_INIT(swrc_tmp);
2092 
2093 	mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
2094 
2095 	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
2096 	mpt_entry.status = HERMON_MPT_FREE;
2097 	mpt_entry.lw = 1;
2098 	mpt_entry.lr = 1;
2099 	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
2100 	mpt_entry.entity_sz = mr->mr_logmttpgsz;
2101 	mpt_entry.mem_key = mr->mr_lkey;
2102 	mpt_entry.pd = pd->pd_pdnum;
2103 	mpt_entry.fast_reg_en = 1;
2104 	mpt_entry.rem_acc_en = 1;
2105 	mpt_entry.en_inval = 1;
2106 	if (flags & IBT_KEY_REMOTE) {
2107 		mpt_entry.ren_inval = 1;
2108 	}
2109 	mpt_entry.mtt_size = nummtt;
2110 	mpt_entry.mtt_addr_h = mtt_addr >> 32;	/* only 8 more bits */
2111 	mpt_entry.mtt_addr_l = mtt_addr >> 3;	/* only 29 bits */
2112 
2113 	/*
2114 	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
2115 	 * the entry to the hardware if needed.  Note: in general, this
2116 	 * operation shouldn't fail.  But if it does, we have to undo
2117 	 * everything we've done above before returning error.
2118 	 *
2119 	 * For Hermon, this routine (which is common to the contexts) will only
2120 	 * set the ownership if needed - the process of passing the context
2121 	 * itself to HW will take care of setting up the MPT (based on type
2122 	 * and index).
2123 	 */
2124 	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2125 	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
2126 	if (status != HERMON_CMD_SUCCESS) {
2127 		cmn_err(CE_CONT, "Hermon: alloc_lkey: SW2HW_MPT command "
2128 		    "failed: %08x\n", status);
2129 		if (status == HERMON_CMD_INVALID_STATUS) {
2130 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2131 		}
2132 		status = ibc_get_ci_failure(0);
2133 		goto alloclkey_fail5;
2134 	}
2135 
2136 	/*
2137 	 * Fill in the rest of the Hermon Memory Region handle.  Having
2138 	 * successfully transferred ownership of the MPT, we can update the
2139 	 * following fields for use in further operations on the MR.
2140 	 */
2141 	mr->mr_accflag = IBT_MR_LOCAL_WRITE;
2142 	mr->mr_mttaddr = mtt_addr;
2143 	mr->mr_log2_pgsz = (mr->mr_logmttpgsz - HERMON_PAGESHIFT);
2144 	mr->mr_mptrsrcp = mpt;
2145 	mr->mr_mttrsrcp = mtt;
2146 	mr->mr_pdhdl = pd;
2147 	mr->mr_rsrcp = rsrc;
2148 	mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey);
2149 	mr->mr_rkey = mr->mr_lkey;
2150 	mr->mr_mpt_type = HERMON_MPT_DMPT;
2151 
2152 	*mrhdl = mr;
2153 	return (DDI_SUCCESS);
2154 
2155 alloclkey_fail5:
2156 	hermon_rsrc_free(state, &mtt_refcnt);
2157 alloclkey_fail4:
2158 	hermon_rsrc_free(state, &mtt);
2159 alloclkey_fail3:
2160 	hermon_rsrc_free(state, &rsrc);
2161 alloclkey_fail2:
2162 	hermon_rsrc_free(state, &mpt);
2163 alloclkey_fail1:
2164 	hermon_pd_refcnt_dec(pd);
2165 	return (status);
2166 }
2167 
2168 /*
2169  * hermon_mr_fexch_mpt_init()
2170  *    Context: Can be called from base context.
2171  *
2172  * This is the same as alloc_lkey, but not returning an mrhdl.
2173  */
2174 int
2175 hermon_mr_fexch_mpt_init(hermon_state_t *state, hermon_pdhdl_t pd,
2176     uint32_t mpt_indx, uint_t nummtt, uint64_t mtt_addr, uint_t sleep)
2177 {
2178 	hermon_hw_dmpt_t	mpt_entry;
2179 	int			status;
2180 
2181 	/*
2182 	 * The MTTs will get filled in when the FRWR is processed.
2183 	 */
2184 
2185 	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
2186 	mpt_entry.status = HERMON_MPT_FREE;
2187 	mpt_entry.lw = 1;
2188 	mpt_entry.lr = 1;
2189 	mpt_entry.rw = 1;
2190 	mpt_entry.rr = 1;
2191 	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
2192 	mpt_entry.entity_sz = PAGESHIFT;
2193 	mpt_entry.mem_key = mpt_indx;
2194 	mpt_entry.pd = pd->pd_pdnum;
2195 	mpt_entry.fast_reg_en = 1;
2196 	mpt_entry.rem_acc_en = 1;
2197 	mpt_entry.en_inval = 1;
2198 	mpt_entry.ren_inval = 1;
2199 	mpt_entry.mtt_size = nummtt;
2200 	mpt_entry.mtt_addr_h = mtt_addr >> 32;	/* only 8 more bits */
2201 	mpt_entry.mtt_addr_l = mtt_addr >> 3;	/* only 29 bits */
2202 
2203 	/*
2204 	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
2205 	 * the entry to the hardware if needed.  Note: in general, this
2206 	 * operation shouldn't fail.  But if it does, we have to undo
2207 	 * everything we've done above before returning error.
2208 	 *
2209 	 * For Hermon, this routine (which is common to the contexts) will only
2210 	 * set the ownership if needed - the process of passing the context
2211 	 * itself to HW will take care of setting up the MPT (based on type
2212 	 * and index).
2213 	 */
2214 	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2215 	    sizeof (hermon_hw_dmpt_t), mpt_indx, sleep);
2216 	if (status != HERMON_CMD_SUCCESS) {
2217 		cmn_err(CE_CONT, "Hermon: fexch_mpt_init: SW2HW_MPT command "
2218 		    "failed: %08x\n", status);
2219 		if (status == HERMON_CMD_INVALID_STATUS) {
2220 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2221 		}
2222 		status = ibc_get_ci_failure(0);
2223 		return (status);
2224 	}
2225 	/* Increment the reference count on the protection domain (PD) */
2226 	hermon_pd_refcnt_inc(pd);
2227 
2228 	return (DDI_SUCCESS);
2229 }
2230 
2231 /*
2232  * hermon_mr_fexch_mpt_fini()
2233  *    Context: Can be called from base context.
2234  *
2235  * This is the same as deregister_mr, without an mrhdl.
2236  */
2237 int
2238 hermon_mr_fexch_mpt_fini(hermon_state_t *state, hermon_pdhdl_t pd,
2239     uint32_t mpt_indx, uint_t sleep)
2240 {
2241 	int			status;
2242 
2243 	status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT,
2244 	    NULL, 0, mpt_indx, sleep);
2245 	if (status != DDI_SUCCESS) {
2246 		cmn_err(CE_CONT, "Hermon: fexch_mpt_fini: HW2SW_MPT command "
2247 		    "failed: %08x\n", status);
2248 		if (status == HERMON_CMD_INVALID_STATUS) {
2249 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2250 		}
2251 		status = ibc_get_ci_failure(0);
2252 		return (status);
2253 	}
2254 
2255 	/* Decrement the reference count on the protection domain (PD) */
2256 	hermon_pd_refcnt_dec(pd);
2257 
2258 	return (DDI_SUCCESS);
2259 }
2260 
2261 /*
2262  * hermon_mr_mtt_bind()
2263  *    Context: Can be called from interrupt or base context.
2264  */
2265 int
2266 hermon_mr_mtt_bind(hermon_state_t *state, hermon_bind_info_t *bind,
2267     ddi_dma_handle_t bind_dmahdl, hermon_rsrc_t **mtt, uint_t *mtt_pgsize_bits,
2268     uint_t is_buffer)
2269 {
2270 	uint64_t		nummtt;
2271 	uint_t			sleep;
2272 	int			status;
2273 
2274 	/*
2275 	 * Check the sleep flag.  Ensure that it is consistent with the
2276 	 * current thread context (i.e. if we are currently in the interrupt
2277 	 * context, then we shouldn't be attempting to sleep).
2278 	 */
2279 	sleep = (bind->bi_flags & IBT_MR_NOSLEEP) ?
2280 	    HERMON_NOSLEEP : HERMON_SLEEP;
2281 	if ((sleep == HERMON_SLEEP) &&
2282 	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
2283 		status = IBT_INVALID_PARAM;
2284 		goto mrmttbind_fail;
2285 	}
2286 
2287 	/*
2288 	 * Bind the memory and determine the mapped addresses.  This is
2289 	 * the first of two routines that do all the "heavy lifting" for
2290 	 * the Hermon memory registration routines.  The hermon_mr_mem_bind()
2291 	 * routine takes the "bind" struct with all its fields filled
2292 	 * in and returns a list of DMA cookies (for the PCI mapped addresses
2293 	 * corresponding to the specified address region) which are used by
2294 	 * the hermon_mr_fast_mtt_write() routine below.  If we fail here, we
2295 	 * must undo all the previous resource allocation (and PD reference
2296 	 * count).
2297 	 */
2298 	status = hermon_mr_mem_bind(state, bind, bind_dmahdl, sleep, is_buffer);
2299 	if (status != DDI_SUCCESS) {
2300 		status = IBT_INSUFF_RESOURCE;
2301 		goto mrmttbind_fail;
2302 	}
2303 
2304 	/*
2305 	 * Determine number of pages spanned.  This routine uses the
2306 	 * information in the "bind" struct to determine the required
2307 	 * number of MTT entries needed (and returns the suggested page size -
2308 	 * as a "power-of-2" - for each MTT entry).
2309 	 */
2310 	nummtt = hermon_mr_nummtt_needed(state, bind, mtt_pgsize_bits);
2311 
2312 	/*
2313 	 * Allocate the MTT entries.  Use the calculations performed above to
2314 	 * allocate the required number of MTT entries. If we fail here, we
2315 	 * must not only undo all the previous resource allocation (and PD
2316 	 * reference count), but we must also unbind the memory.
2317 	 */
2318 	status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, mtt);
2319 	if (status != DDI_SUCCESS) {
2320 		status = IBT_INSUFF_RESOURCE;
2321 		goto mrmttbind_fail2;
2322 	}
2323 
2324 	/*
2325 	 * Write the mapped addresses into the MTT entries.  This is part two
2326 	 * of the "heavy lifting" routines that we talked about above.  Note:
2327 	 * we pass the suggested page size from the earlier operation here.
2328 	 * And if we fail here, we again do pretty much the same huge clean up.
2329 	 */
2330 	status = hermon_mr_fast_mtt_write(state, *mtt, bind, *mtt_pgsize_bits);
2331 	if (status != DDI_SUCCESS) {
2332 		/*
2333 		 * hermon_mr_fast_mtt_write() returns DDI_FAILURE
2334 		 * only if it detects a HW error during DMA.
2335 		 */
2336 		hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2337 		status = ibc_get_ci_failure(0);
2338 		goto mrmttbind_fail3;
2339 	}
2340 	return (DDI_SUCCESS);
2341 
2342 /*
2343  * The following is cleanup for all possible failure cases in this routine
2344  */
2345 mrmttbind_fail3:
2346 	hermon_rsrc_free(state, mtt);
2347 mrmttbind_fail2:
2348 	hermon_mr_mem_unbind(state, bind);
2349 mrmttbind_fail:
2350 	return (status);
2351 }
2352 
2353 
2354 /*
2355  * hermon_mr_mtt_unbind()
2356  *    Context: Can be called from interrupt or base context.
2357  */
2358 int
2359 hermon_mr_mtt_unbind(hermon_state_t *state, hermon_bind_info_t *bind,
2360     hermon_rsrc_t *mtt)
2361 {
2362 	/*
2363 	 * Free up the MTT entries and unbind the memory.  Here, as above, we
2364 	 * attempt to free these resources only if it is appropriate to do so.
2365 	 */
2366 	hermon_mr_mem_unbind(state, bind);
2367 	hermon_rsrc_free(state, &mtt);
2368 
2369 	return (DDI_SUCCESS);
2370 }
2371 
2372 
2373 /*
2374  * hermon_mr_common_rereg()
2375  *    Context: Can be called from interrupt or base context.
2376  */
2377 static int
2378 hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr,
2379     hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new,
2380     hermon_mr_options_t *op)
2381 {
2382 	hermon_rsrc_t		*mpt;
2383 	ibt_mr_attr_flags_t	acc_flags_to_use;
2384 	ibt_mr_flags_t		flags;
2385 	hermon_pdhdl_t		pd_to_use;
2386 	hermon_hw_dmpt_t	mpt_entry;
2387 	uint64_t		mtt_addr_to_use, vaddr_to_use, len_to_use;
2388 	uint_t			sleep, dereg_level;
2389 	int			status;
2390 
2391 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
2392 
2393 	/*
2394 	 * Check here to see if the memory region corresponds to a userland
2395 	 * mapping.  Reregistration of userland memory regions is not
2396 	 * currently supported.  Return failure.
2397 	 */
2398 	if (mr->mr_is_umem) {
2399 		status = IBT_MR_HDL_INVALID;
2400 		goto mrrereg_fail;
2401 	}
2402 
2403 	mutex_enter(&mr->mr_lock);
2404 
2405 	/* Pull MPT resource pointer from the Hermon Memory Region handle */
2406 	mpt = mr->mr_mptrsrcp;
2407 
2408 	/* Extract the flags field from the hermon_bind_info_t */
2409 	flags = bind->bi_flags;
2410 
2411 	/*
2412 	 * Check the sleep flag.  Ensure that it is consistent with the
2413 	 * current thread context (i.e. if we are currently in the interrupt
2414 	 * context, then we shouldn't be attempting to sleep).
2415 	 */
2416 	sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
2417 	if ((sleep == HERMON_SLEEP) &&
2418 	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
2419 		mutex_exit(&mr->mr_lock);
2420 		status = IBT_INVALID_PARAM;
2421 		goto mrrereg_fail;
2422 	}
2423 
2424 	/*
2425 	 * First step is to temporarily invalidate the MPT entry.  This
2426 	 * regains ownership from the hardware, and gives us the opportunity
2427 	 * to modify the entry.  Note: The HW2SW_MPT command returns the
2428 	 * current MPT entry contents.  These are saved away here because
2429 	 * they will be reused in a later step below.  If the region has
2430 	 * bound memory windows that we fail returning an "in use" error code.
2431 	 * Otherwise, this is an unexpected error and we deregister the
2432 	 * memory region and return error.
2433 	 *
2434 	 * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect
2435 	 * against holding the lock around this rereg call in all contexts.
2436 	 */
2437 	status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, &mpt_entry,
2438 	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN);
2439 	if (status != HERMON_CMD_SUCCESS) {
2440 		mutex_exit(&mr->mr_lock);
2441 		if (status == HERMON_CMD_REG_BOUND) {
2442 			return (IBT_MR_IN_USE);
2443 		} else {
2444 			cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: "
2445 			    "%08x\n", status);
2446 			if (status == HERMON_CMD_INVALID_STATUS) {
2447 				hermon_fm_ereport(state, HCA_SYS_ERR,
2448 				    HCA_ERR_SRV_LOST);
2449 			}
2450 			/*
2451 			 * Call deregister and ensure that all current
2452 			 * resources get freed up
2453 			 */
2454 			if (hermon_mr_deregister(state, &mr,
2455 			    HERMON_MR_DEREG_ALL, sleep) != DDI_SUCCESS) {
2456 				HERMON_WARNING(state, "failed to deregister "
2457 				    "memory region");
2458 			}
2459 			return (ibc_get_ci_failure(0));
2460 		}
2461 	}
2462 
2463 	/*
2464 	 * If we're changing the protection domain, then validate the new one
2465 	 */
2466 	if (flags & IBT_MR_CHANGE_PD) {
2467 
2468 		/* Check for valid PD handle pointer */
2469 		if (pd == NULL) {
2470 			mutex_exit(&mr->mr_lock);
2471 			/*
2472 			 * Call deregister and ensure that all current
2473 			 * resources get properly freed up. Unnecessary
2474 			 * here to attempt to regain software ownership
2475 			 * of the MPT entry as that has already been
2476 			 * done above.
2477 			 */
2478 			if (hermon_mr_deregister(state, &mr,
2479 			    HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) !=
2480 			    DDI_SUCCESS) {
2481 				HERMON_WARNING(state, "failed to deregister "
2482 				    "memory region");
2483 			}
2484 			status = IBT_PD_HDL_INVALID;
2485 			goto mrrereg_fail;
2486 		}
2487 
2488 		/* Use the new PD handle in all operations below */
2489 		pd_to_use = pd;
2490 
2491 	} else {
2492 		/* Use the current PD handle in all operations below */
2493 		pd_to_use = mr->mr_pdhdl;
2494 	}
2495 
2496 	/*
2497 	 * If we're changing access permissions, then validate the new ones
2498 	 */
2499 	if (flags & IBT_MR_CHANGE_ACCESS) {
2500 		/*
2501 		 * Validate the access flags.  Both remote write and remote
2502 		 * atomic require the local write flag to be set
2503 		 */
2504 		if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) ||
2505 		    (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) &&
2506 		    !(flags & IBT_MR_ENABLE_LOCAL_WRITE)) {
2507 			mutex_exit(&mr->mr_lock);
2508 			/*
2509 			 * Call deregister and ensure that all current
2510 			 * resources get properly freed up. Unnecessary
2511 			 * here to attempt to regain software ownership
2512 			 * of the MPT entry as that has already been
2513 			 * done above.
2514 			 */
2515 			if (hermon_mr_deregister(state, &mr,
2516 			    HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) !=
2517 			    DDI_SUCCESS) {
2518 				HERMON_WARNING(state, "failed to deregister "
2519 				    "memory region");
2520 			}
2521 			status = IBT_MR_ACCESS_REQ_INVALID;
2522 			goto mrrereg_fail;
2523 		}
2524 
2525 		/*
2526 		 * Setup and validate the memory region access flags.  This
2527 		 * means translating the IBTF's enable flags into the access
2528 		 * flags that will be used in later operations.
2529 		 */
2530 		acc_flags_to_use = 0;
2531 		if (flags & IBT_MR_ENABLE_WINDOW_BIND)
2532 			acc_flags_to_use |= IBT_MR_WINDOW_BIND;
2533 		if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
2534 			acc_flags_to_use |= IBT_MR_LOCAL_WRITE;
2535 		if (flags & IBT_MR_ENABLE_REMOTE_READ)
2536 			acc_flags_to_use |= IBT_MR_REMOTE_READ;
2537 		if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
2538 			acc_flags_to_use |= IBT_MR_REMOTE_WRITE;
2539 		if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
2540 			acc_flags_to_use |= IBT_MR_REMOTE_ATOMIC;
2541 
2542 	} else {
2543 		acc_flags_to_use = mr->mr_accflag;
2544 	}
2545 
2546 	/*
2547 	 * If we're modifying the translation, then figure out whether
2548 	 * we can reuse the current MTT resources.  This means calling
2549 	 * hermon_mr_rereg_xlat_helper() which does most of the heavy lifting
2550 	 * for the reregistration.  If the current memory region contains
2551 	 * sufficient MTT entries for the new regions, then it will be
2552 	 * reused and filled in.  Otherwise, new entries will be allocated,
2553 	 * the old ones will be freed, and the new entries will be filled
2554 	 * in.  Note:  If we're not modifying the translation, then we
2555 	 * should already have all the information we need to update the MPT.
2556 	 * Also note: If hermon_mr_rereg_xlat_helper() fails, it will return
2557 	 * a "dereg_level" which is the level of cleanup that needs to be
2558 	 * passed to hermon_mr_deregister() to finish the cleanup.
2559 	 */
2560 	if (flags & IBT_MR_CHANGE_TRANSLATION) {
2561 		status = hermon_mr_rereg_xlat_helper(state, mr, bind, op,
2562 		    &mtt_addr_to_use, sleep, &dereg_level);
2563 		if (status != DDI_SUCCESS) {
2564 			mutex_exit(&mr->mr_lock);
2565 			/*
2566 			 * Call deregister and ensure that all resources get
2567 			 * properly freed up.
2568 			 */
2569 			if (hermon_mr_deregister(state, &mr, dereg_level,
2570 			    sleep) != DDI_SUCCESS) {
2571 				HERMON_WARNING(state, "failed to deregister "
2572 				    "memory region");
2573 			}
2574 			goto mrrereg_fail;
2575 		}
2576 		vaddr_to_use = mr->mr_bindinfo.bi_addr;
2577 		len_to_use   = mr->mr_bindinfo.bi_len;
2578 	} else {
2579 		mtt_addr_to_use = mr->mr_mttaddr;
2580 		vaddr_to_use = mr->mr_bindinfo.bi_addr;
2581 		len_to_use   = mr->mr_bindinfo.bi_len;
2582 	}
2583 
2584 	/*
2585 	 * Calculate new keys (Lkey, Rkey) from MPT index.  Just like they were
2586 	 * when the region was first registered, each key is formed from
2587 	 * "constrained" bits and "unconstrained" bits.  Note:  If no remote
2588 	 * access is required, then the RKey value is not filled in.  Otherwise
2589 	 * both Rkey and LKey are given the same value.
2590 	 */
2591 	mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
2592 	if ((acc_flags_to_use & IBT_MR_REMOTE_READ) ||
2593 	    (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ||
2594 	    (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC)) {
2595 		mr->mr_rkey = mr->mr_lkey;
2596 	} else
2597 		mr->mr_rkey = 0;
2598 
2599 	/*
2600 	 * Fill in the MPT entry.  This is the final step before passing
2601 	 * ownership of the MPT entry to the Hermon hardware.  We use all of
2602 	 * the information collected/calculated above to fill in the
2603 	 * requisite portions of the MPT.
2604 	 */
2605 	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
2606 
2607 	mpt_entry.status  = HERMON_MPT_SW_OWNERSHIP;
2608 	mpt_entry.en_bind = (acc_flags_to_use & IBT_MR_WINDOW_BIND)   ? 1 : 0;
2609 	mpt_entry.atomic  = (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
2610 	mpt_entry.rw	  = (acc_flags_to_use & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
2611 	mpt_entry.rr	  = (acc_flags_to_use & IBT_MR_REMOTE_READ)   ? 1 : 0;
2612 	mpt_entry.lw	  = (acc_flags_to_use & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
2613 	mpt_entry.lr	  = 1;
2614 	mpt_entry.phys_addr = 0;
2615 	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
2616 
2617 	mpt_entry.entity_sz	= mr->mr_logmttpgsz;
2618 	mpt_entry.mem_key	= mr->mr_lkey;
2619 	mpt_entry.pd		= pd_to_use->pd_pdnum;
2620 
2621 	mpt_entry.start_addr	= vaddr_to_use;
2622 	mpt_entry.reg_win_len	= len_to_use;
2623 	mpt_entry.mtt_addr_h = mtt_addr_to_use >> 32;
2624 	mpt_entry.mtt_addr_l = mtt_addr_to_use >> 3;
2625 
2626 	/*
2627 	 * Write the updated MPT entry to hardware
2628 	 *
2629 	 * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect
2630 	 * against holding the lock around this rereg call in all contexts.
2631 	 */
2632 	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2633 	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN);
2634 	if (status != HERMON_CMD_SUCCESS) {
2635 		mutex_exit(&mr->mr_lock);
2636 		cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
2637 		    status);
2638 		if (status == HERMON_CMD_INVALID_STATUS) {
2639 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2640 		}
2641 		/*
2642 		 * Call deregister and ensure that all current resources get
2643 		 * properly freed up. Unnecessary here to attempt to regain
2644 		 * software ownership of the MPT entry as that has already
2645 		 * been done above.
2646 		 */
2647 		if (hermon_mr_deregister(state, &mr,
2648 		    HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) {
2649 			HERMON_WARNING(state, "failed to deregister memory "
2650 			    "region");
2651 		}
2652 		return (ibc_get_ci_failure(0));
2653 	}
2654 
2655 	/*
2656 	 * If we're changing PD, then update their reference counts now.
2657 	 * This means decrementing the reference count on the old PD and
2658 	 * incrementing the reference count on the new PD.
2659 	 */
2660 	if (flags & IBT_MR_CHANGE_PD) {
2661 		hermon_pd_refcnt_dec(mr->mr_pdhdl);
2662 		hermon_pd_refcnt_inc(pd);
2663 	}
2664 
2665 	/*
2666 	 * Update the contents of the Hermon Memory Region handle to reflect
2667 	 * what has been changed.
2668 	 */
2669 	mr->mr_pdhdl	  = pd_to_use;
2670 	mr->mr_accflag	  = acc_flags_to_use;
2671 	mr->mr_is_umem	  = 0;
2672 	mr->mr_is_fmr	  = 0;
2673 	mr->mr_umemcookie = NULL;
2674 	mr->mr_lkey	  = hermon_mr_key_swap(mr->mr_lkey);
2675 	mr->mr_rkey	  = hermon_mr_key_swap(mr->mr_rkey);
2676 
2677 	/* New MR handle is same as the old */
2678 	*mrhdl_new = mr;
2679 	mutex_exit(&mr->mr_lock);
2680 
2681 	return (DDI_SUCCESS);
2682 
2683 mrrereg_fail:
2684 	return (status);
2685 }
2686 
2687 
2688 /*
2689  * hermon_mr_rereg_xlat_helper
2690  *    Context: Can be called from interrupt or base context.
2691  *    Note: This routine expects the "mr_lock" to be held when it
2692  *    is called.  Upon returning failure, this routine passes information
2693  *    about what "dereg_level" should be passed to hermon_mr_deregister().
2694  */
2695 static int
2696 hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr,
2697     hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr,
2698     uint_t sleep, uint_t *dereg_level)
2699 {
2700 	hermon_rsrc_t		*mtt, *mtt_refcnt;
2701 	hermon_sw_refcnt_t	*swrc_old, *swrc_new;
2702 	ddi_dma_handle_t	dmahdl;
2703 	uint64_t		nummtt_needed, nummtt_in_currrsrc, max_sz;
2704 	uint_t			mtt_pgsize_bits, bind_type, reuse_dmahdl;
2705 	int			status;
2706 
2707 	ASSERT(MUTEX_HELD(&mr->mr_lock));
2708 
2709 	/*
2710 	 * Check the "options" flag.  Currently this flag tells the driver
2711 	 * whether or not the region should be bound normally (i.e. with
2712 	 * entries written into the PCI IOMMU) or whether it should be
2713 	 * registered to bypass the IOMMU.
2714 	 */
2715 	if (op == NULL) {
2716 		bind_type = HERMON_BINDMEM_NORMAL;
2717 	} else {
2718 		bind_type = op->mro_bind_type;
2719 	}
2720 
2721 	/*
2722 	 * Check for invalid length.  Check is the length is zero or if the
2723 	 * length is larger than the maximum configured value.  Return error
2724 	 * if it is.
2725 	 */
2726 	max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz);
2727 	if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
2728 		/*
2729 		 * Deregister will be called upon returning failure from this
2730 		 * routine. This will ensure that all current resources get
2731 		 * properly freed up. Unnecessary to attempt to regain
2732 		 * software ownership of the MPT entry as that has already
2733 		 * been done above (in hermon_mr_reregister())
2734 		 */
2735 		*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT;
2736 
2737 		status = IBT_MR_LEN_INVALID;
2738 		goto mrrereghelp_fail;
2739 	}
2740 
2741 	/*
2742 	 * Determine the number of pages necessary for new region and the
2743 	 * number of pages supported by the current MTT resources
2744 	 */
2745 	nummtt_needed = hermon_mr_nummtt_needed(state, bind, &mtt_pgsize_bits);
2746 	nummtt_in_currrsrc = mr->mr_mttrsrcp->hr_len >> HERMON_MTT_SIZE_SHIFT;
2747 
2748 	/*
2749 	 * Depending on whether we have enough pages or not, the next step is
2750 	 * to fill in a set of MTT entries that reflect the new mapping.  In
2751 	 * the first case below, we already have enough entries.  This means
2752 	 * we need to unbind the memory from the previous mapping, bind the
2753 	 * memory for the new mapping, write the new MTT entries, and update
2754 	 * the mr to reflect the changes.
2755 	 * In the second case below, we do not have enough entries in the
2756 	 * current mapping.  So, in this case, we need not only to unbind the
2757 	 * current mapping, but we need to free up the MTT resources associated
2758 	 * with that mapping.  After we've successfully done that, we continue
2759 	 * by binding the new memory, allocating new MTT entries, writing the
2760 	 * new MTT entries, and updating the mr to reflect the changes.
2761 	 */
2762 
2763 	/*
2764 	 * If this region is being shared (i.e. MTT refcount != 1), then we
2765 	 * can't reuse the current MTT resources regardless of their size.
2766 	 * Instead we'll need to alloc new ones (below) just as if there
2767 	 * hadn't been enough room in the current entries.
2768 	 */
2769 	swrc_old = (hermon_sw_refcnt_t *)mr->mr_mttrefcntp->hr_addr;
2770 	if (HERMON_MTT_IS_NOT_SHARED(swrc_old) &&
2771 	    (nummtt_needed <= nummtt_in_currrsrc)) {
2772 
2773 		/*
2774 		 * Unbind the old mapping for this memory region, but retain
2775 		 * the ddi_dma_handle_t (if possible) for reuse in the bind
2776 		 * operation below.  Note:  If original memory region was
2777 		 * bound for IOMMU bypass and the new region can not use
2778 		 * bypass, then a new DMA handle will be necessary.
2779 		 */
2780 		if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
2781 			mr->mr_bindinfo.bi_free_dmahdl = 0;
2782 			hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2783 			dmahdl = mr->mr_bindinfo.bi_dmahdl;
2784 			reuse_dmahdl = 1;
2785 		} else {
2786 			hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2787 			dmahdl = NULL;
2788 			reuse_dmahdl = 0;
2789 		}
2790 
2791 		/*
2792 		 * Bind the new memory and determine the mapped addresses.
2793 		 * As described, this routine and hermon_mr_fast_mtt_write()
2794 		 * do the majority of the work for the memory registration
2795 		 * operations.  Note:  When we successfully finish the binding,
2796 		 * we will set the "bi_free_dmahdl" flag to indicate that
2797 		 * even though we may have reused the ddi_dma_handle_t we do
2798 		 * wish it to be freed up at some later time.  Note also that
2799 		 * if we fail, we may need to cleanup the ddi_dma_handle_t.
2800 		 */
2801 		bind->bi_bypass	= bind_type;
2802 		status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1);
2803 		if (status != DDI_SUCCESS) {
2804 			if (reuse_dmahdl) {
2805 				ddi_dma_free_handle(&dmahdl);
2806 			}
2807 
2808 			/*
2809 			 * Deregister will be called upon returning failure
2810 			 * from this routine. This will ensure that all
2811 			 * current resources get properly freed up.
2812 			 * Unnecessary to attempt to regain software ownership
2813 			 * of the MPT entry as that has already been done
2814 			 * above (in hermon_mr_reregister()).  Also unnecessary
2815 			 * to attempt to unbind the memory.
2816 			 */
2817 			*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2818 
2819 			status = IBT_INSUFF_RESOURCE;
2820 			goto mrrereghelp_fail;
2821 		}
2822 		if (reuse_dmahdl) {
2823 			bind->bi_free_dmahdl = 1;
2824 		}
2825 
2826 		/*
2827 		 * Using the new mapping, but reusing the current MTT
2828 		 * resources, write the updated entries to MTT
2829 		 */
2830 		mtt    = mr->mr_mttrsrcp;
2831 		status = hermon_mr_fast_mtt_write(state, mtt, bind,
2832 		    mtt_pgsize_bits);
2833 		if (status != DDI_SUCCESS) {
2834 			/*
2835 			 * Deregister will be called upon returning failure
2836 			 * from this routine. This will ensure that all
2837 			 * current resources get properly freed up.
2838 			 * Unnecessary to attempt to regain software ownership
2839 			 * of the MPT entry as that has already been done
2840 			 * above (in hermon_mr_reregister()).  Also unnecessary
2841 			 * to attempt to unbind the memory.
2842 			 *
2843 			 * But we do need to unbind the newly bound memory
2844 			 * before returning.
2845 			 */
2846 			hermon_mr_mem_unbind(state, bind);
2847 			*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2848 
2849 			/*
2850 			 * hermon_mr_fast_mtt_write() returns DDI_FAILURE
2851 			 * only if it detects a HW error during DMA.
2852 			 */
2853 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2854 			status = ibc_get_ci_failure(0);
2855 			goto mrrereghelp_fail;
2856 		}
2857 
2858 		/* Put the updated information into the Mem Region handle */
2859 		mr->mr_bindinfo	  = *bind;
2860 		mr->mr_logmttpgsz = mtt_pgsize_bits;
2861 
2862 	} else {
2863 		/*
2864 		 * Check if the memory region MTT is shared by any other MRs.
2865 		 * Since the resource may be shared between multiple memory
2866 		 * regions (as a result of a "RegisterSharedMR()" verb) it is
2867 		 * important that we not unbind any resources prematurely.
2868 		 */
2869 		if (!HERMON_MTT_IS_SHARED(swrc_old)) {
2870 			/*
2871 			 * Unbind the old mapping for this memory region, but
2872 			 * retain the ddi_dma_handle_t for reuse in the bind
2873 			 * operation below. Note: This can only be done here
2874 			 * because the region being reregistered is not
2875 			 * currently shared.  Also if original memory region
2876 			 * was bound for IOMMU bypass and the new region can
2877 			 * not use bypass, then a new DMA handle will be
2878 			 * necessary.
2879 			 */
2880 			if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
2881 				mr->mr_bindinfo.bi_free_dmahdl = 0;
2882 				hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2883 				dmahdl = mr->mr_bindinfo.bi_dmahdl;
2884 				reuse_dmahdl = 1;
2885 			} else {
2886 				hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2887 				dmahdl = NULL;
2888 				reuse_dmahdl = 0;
2889 			}
2890 		} else {
2891 			dmahdl = NULL;
2892 			reuse_dmahdl = 0;
2893 		}
2894 
2895 		/*
2896 		 * Bind the new memory and determine the mapped addresses.
2897 		 * As described, this routine and hermon_mr_fast_mtt_write()
2898 		 * do the majority of the work for the memory registration
2899 		 * operations.  Note:  When we successfully finish the binding,
2900 		 * we will set the "bi_free_dmahdl" flag to indicate that
2901 		 * even though we may have reused the ddi_dma_handle_t we do
2902 		 * wish it to be freed up at some later time.  Note also that
2903 		 * if we fail, we may need to cleanup the ddi_dma_handle_t.
2904 		 */
2905 		bind->bi_bypass	= bind_type;
2906 		status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1);
2907 		if (status != DDI_SUCCESS) {
2908 			if (reuse_dmahdl) {
2909 				ddi_dma_free_handle(&dmahdl);
2910 			}
2911 
2912 			/*
2913 			 * Deregister will be called upon returning failure
2914 			 * from this routine. This will ensure that all
2915 			 * current resources get properly freed up.
2916 			 * Unnecessary to attempt to regain software ownership
2917 			 * of the MPT entry as that has already been done
2918 			 * above (in hermon_mr_reregister()).  Also unnecessary
2919 			 * to attempt to unbind the memory.
2920 			 */
2921 			*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2922 
2923 			status = IBT_INSUFF_RESOURCE;
2924 			goto mrrereghelp_fail;
2925 		}
2926 		if (reuse_dmahdl) {
2927 			bind->bi_free_dmahdl = 1;
2928 		}
2929 
2930 		/*
2931 		 * Allocate the new MTT entries resource
2932 		 */
2933 		status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt_needed,
2934 		    sleep, &mtt);
2935 		if (status != DDI_SUCCESS) {
2936 			/*
2937 			 * Deregister will be called upon returning failure
2938 			 * from this routine. This will ensure that all
2939 			 * current resources get properly freed up.
2940 			 * Unnecessary to attempt to regain software ownership
2941 			 * of the MPT entry as that has already been done
2942 			 * above (in hermon_mr_reregister()).  Also unnecessary
2943 			 * to attempt to unbind the memory.
2944 			 *
2945 			 * But we do need to unbind the newly bound memory
2946 			 * before returning.
2947 			 */
2948 			hermon_mr_mem_unbind(state, bind);
2949 			*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2950 
2951 			status = IBT_INSUFF_RESOURCE;
2952 			goto mrrereghelp_fail;
2953 		}
2954 
2955 		/*
2956 		 * Allocate MTT reference count (to track shared memory
2957 		 * regions).  As mentioned elsewhere above, this reference
2958 		 * count resource may never be used on the given memory region,
2959 		 * but if it is ever later registered as a "shared" memory
2960 		 * region then this resource will be necessary.  Note:  This
2961 		 * is only necessary here if the existing memory region is
2962 		 * already being shared (because otherwise we already have
2963 		 * a useable reference count resource).
2964 		 */
2965 		if (HERMON_MTT_IS_SHARED(swrc_old)) {
2966 			status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1,
2967 			    sleep, &mtt_refcnt);
2968 			if (status != DDI_SUCCESS) {
2969 				/*
2970 				 * Deregister will be called upon returning
2971 				 * failure from this routine. This will ensure
2972 				 * that all current resources get properly
2973 				 * freed up.  Unnecessary to attempt to regain
2974 				 * software ownership of the MPT entry as that
2975 				 * has already been done above (in
2976 				 * hermon_mr_reregister()).  Also unnecessary
2977 				 * to attempt to unbind the memory.
2978 				 *
2979 				 * But we need to unbind the newly bound
2980 				 * memory and free up the newly allocated MTT
2981 				 * entries before returning.
2982 				 */
2983 				hermon_mr_mem_unbind(state, bind);
2984 				hermon_rsrc_free(state, &mtt);
2985 				*dereg_level =
2986 				    HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2987 
2988 				status = IBT_INSUFF_RESOURCE;
2989 				goto mrrereghelp_fail;
2990 			}
2991 			swrc_new = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
2992 			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_new))
2993 			HERMON_MTT_REFCNT_INIT(swrc_new);
2994 		} else {
2995 			mtt_refcnt = mr->mr_mttrefcntp;
2996 		}
2997 
2998 		/*
2999 		 * Using the new mapping and the new MTT resources, write the
3000 		 * updated entries to MTT
3001 		 */
3002 		status = hermon_mr_fast_mtt_write(state, mtt, bind,
3003 		    mtt_pgsize_bits);
3004 		if (status != DDI_SUCCESS) {
3005 			/*
3006 			 * Deregister will be called upon returning failure
3007 			 * from this routine. This will ensure that all
3008 			 * current resources get properly freed up.
3009 			 * Unnecessary to attempt to regain software ownership
3010 			 * of the MPT entry as that has already been done
3011 			 * above (in hermon_mr_reregister()).  Also unnecessary
3012 			 * to attempt to unbind the memory.
3013 			 *
3014 			 * But we need to unbind the newly bound memory,
3015 			 * free up the newly allocated MTT entries, and
3016 			 * (possibly) free the new MTT reference count
3017 			 * resource before returning.
3018 			 */
3019 			if (HERMON_MTT_IS_SHARED(swrc_old)) {
3020 				hermon_rsrc_free(state, &mtt_refcnt);
3021 			}
3022 			hermon_mr_mem_unbind(state, bind);
3023 			hermon_rsrc_free(state, &mtt);
3024 			*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
3025 
3026 			status = IBT_INSUFF_RESOURCE;
3027 			goto mrrereghelp_fail;
3028 		}
3029 
3030 		/*
3031 		 * Check if the memory region MTT is shared by any other MRs.
3032 		 * Since the resource may be shared between multiple memory
3033 		 * regions (as a result of a "RegisterSharedMR()" verb) it is
3034 		 * important that we not free up any resources prematurely.
3035 		 */
3036 		if (HERMON_MTT_IS_SHARED(swrc_old)) {
3037 			/* Decrement MTT reference count for "old" region */
3038 			(void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp);
3039 		} else {
3040 			/* Free up the old MTT entries resource */
3041 			hermon_rsrc_free(state, &mr->mr_mttrsrcp);
3042 		}
3043 
3044 		/* Put the updated information into the mrhdl */
3045 		mr->mr_bindinfo	  = *bind;
3046 		mr->mr_logmttpgsz = mtt_pgsize_bits;
3047 		mr->mr_mttrsrcp   = mtt;
3048 		mr->mr_mttrefcntp = mtt_refcnt;
3049 	}
3050 
3051 	/*
3052 	 * Calculate and return the updated MTT address (in the DDR address
3053 	 * space).  This will be used by the caller (hermon_mr_reregister) in
3054 	 * the updated MPT entry
3055 	 */
3056 	*mtt_addr = mtt->hr_indx << HERMON_MTT_SIZE_SHIFT;
3057 
3058 	return (DDI_SUCCESS);
3059 
3060 mrrereghelp_fail:
3061 	return (status);
3062 }
3063 
3064 
3065 /*
3066  * hermon_mr_nummtt_needed()
3067  *    Context: Can be called from interrupt or base context.
3068  */
3069 /* ARGSUSED */
3070 static uint64_t
3071 hermon_mr_nummtt_needed(hermon_state_t *state, hermon_bind_info_t *bind,
3072     uint_t *mtt_pgsize_bits)
3073 {
3074 	uint64_t	pg_offset_mask;
3075 	uint64_t	pg_offset, tmp_length;
3076 
3077 	/*
3078 	 * For now we specify the page size as 8Kb (the default page size for
3079 	 * the sun4u architecture), or 4Kb for x86.  Figure out optimal page
3080 	 * size by examining the dmacookies
3081 	 */
3082 	*mtt_pgsize_bits = PAGESHIFT;
3083 
3084 	pg_offset_mask = ((uint64_t)1 << *mtt_pgsize_bits) - 1;
3085 	pg_offset = bind->bi_addr & pg_offset_mask;
3086 	tmp_length = pg_offset + (bind->bi_len - 1);
3087 	return ((tmp_length >> *mtt_pgsize_bits) + 1);
3088 }
3089 
3090 
3091 /*
3092  * hermon_mr_mem_bind()
3093  *    Context: Can be called from interrupt or base context.
3094  */
3095 static int
3096 hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind,
3097     ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer)
3098 {
3099 	ddi_dma_attr_t	dma_attr;
3100 	int		(*callback)(caddr_t);
3101 	int		status;
3102 
3103 	/* bi_type must be set to a meaningful value to get a bind handle */
3104 	ASSERT(bind->bi_type == HERMON_BINDHDL_VADDR ||
3105 	    bind->bi_type == HERMON_BINDHDL_BUF ||
3106 	    bind->bi_type == HERMON_BINDHDL_UBUF);
3107 
3108 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
3109 
3110 	/* Set the callback flag appropriately */
3111 	callback = (sleep == HERMON_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT;
3112 
3113 	/*
3114 	 * Initialize many of the default DMA attributes.  Then, if we're
3115 	 * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag.
3116 	 */
3117 	if (dmahdl == NULL) {
3118 		hermon_dma_attr_init(state, &dma_attr);
3119 #ifdef	__sparc
3120 		if (bind->bi_bypass == HERMON_BINDMEM_BYPASS) {
3121 			dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
3122 		}
3123 #endif
3124 
3125 		/* set RO if needed - tunable set and 'is_buffer' is non-0 */
3126 		if (is_buffer) {
3127 			if (! (bind->bi_flags & IBT_MR_DISABLE_RO)) {
3128 				if ((bind->bi_type != HERMON_BINDHDL_UBUF) &&
3129 				    (hermon_kernel_data_ro ==
3130 				    HERMON_RO_ENABLED)) {
3131 					dma_attr.dma_attr_flags |=
3132 					    DDI_DMA_RELAXED_ORDERING;
3133 				}
3134 				if (((bind->bi_type == HERMON_BINDHDL_UBUF) &&
3135 				    (hermon_user_data_ro ==
3136 				    HERMON_RO_ENABLED))) {
3137 					dma_attr.dma_attr_flags |=
3138 					    DDI_DMA_RELAXED_ORDERING;
3139 				}
3140 			}
3141 		}
3142 
3143 		/* Allocate a DMA handle for the binding */
3144 		status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
3145 		    callback, NULL, &bind->bi_dmahdl);
3146 		if (status != DDI_SUCCESS) {
3147 			return (status);
3148 		}
3149 		bind->bi_free_dmahdl = 1;
3150 
3151 	} else  {
3152 		bind->bi_dmahdl = dmahdl;
3153 		bind->bi_free_dmahdl = 0;
3154 	}
3155 
3156 
3157 	/*
3158 	 * Bind the memory to get the PCI mapped addresses.  The decision
3159 	 * to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle()
3160 	 * is determined by the "bi_type" flag.  Note: if the bind operation
3161 	 * fails then we have to free up the DMA handle and return error.
3162 	 */
3163 	if (bind->bi_type == HERMON_BINDHDL_VADDR) {
3164 		status = ddi_dma_addr_bind_handle(bind->bi_dmahdl, NULL,
3165 		    (caddr_t)(uintptr_t)bind->bi_addr, bind->bi_len,
3166 		    (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback, NULL,
3167 		    &bind->bi_dmacookie, &bind->bi_cookiecnt);
3168 
3169 	} else {  /* HERMON_BINDHDL_BUF or HERMON_BINDHDL_UBUF */
3170 
3171 		status = ddi_dma_buf_bind_handle(bind->bi_dmahdl,
3172 		    bind->bi_buf, (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback,
3173 		    NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt);
3174 	}
3175 	if (status != DDI_DMA_MAPPED) {
3176 		if (bind->bi_free_dmahdl != 0) {
3177 			ddi_dma_free_handle(&bind->bi_dmahdl);
3178 		}
3179 		return (status);
3180 	}
3181 
3182 	return (DDI_SUCCESS);
3183 }
3184 
3185 
3186 /*
3187  * hermon_mr_mem_unbind()
3188  *    Context: Can be called from interrupt or base context.
3189  */
3190 static void
3191 hermon_mr_mem_unbind(hermon_state_t *state, hermon_bind_info_t *bind)
3192 {
3193 	int	status;
3194 
3195 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
3196 	/* there is nothing to unbind for alloc_lkey */
3197 	if (bind->bi_type == HERMON_BINDHDL_LKEY)
3198 		return;
3199 
3200 	/*
3201 	 * In case of HERMON_BINDHDL_UBUF, the memory bi_buf points to
3202 	 * is actually allocated by ddi_umem_iosetup() internally, then
3203 	 * it's required to free it here. Reset bi_type to HERMON_BINDHDL_NONE
3204 	 * not to free it again later.
3205 	 */
3206 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
3207 	if (bind->bi_type == HERMON_BINDHDL_UBUF) {
3208 		freerbuf(bind->bi_buf);
3209 		bind->bi_type = HERMON_BINDHDL_NONE;
3210 	}
3211 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
3212 
3213 	/*
3214 	 * Unbind the DMA memory for the region
3215 	 *
3216 	 * Note: The only way ddi_dma_unbind_handle() currently
3217 	 * can return an error is if the handle passed in is invalid.
3218 	 * Since this should never happen, we choose to return void
3219 	 * from this function!  If this does return an error, however,
3220 	 * then we print a warning message to the console.
3221 	 */
3222 	status = ddi_dma_unbind_handle(bind->bi_dmahdl);
3223 	if (status != DDI_SUCCESS) {
3224 		HERMON_WARNING(state, "failed to unbind DMA mapping");
3225 		return;
3226 	}
3227 
3228 	/* Free up the DMA handle */
3229 	if (bind->bi_free_dmahdl != 0) {
3230 		ddi_dma_free_handle(&bind->bi_dmahdl);
3231 	}
3232 }
3233 
3234 
3235 /*
3236  * hermon_mr_fast_mtt_write()
3237  *    Context: Can be called from interrupt or base context.
3238  */
3239 static int
3240 hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt,
3241     hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits)
3242 {
3243 	hermon_icm_table_t	*icm_table;
3244 	hermon_dma_info_t	*dma_info;
3245 	uint32_t		index1, index2, rindx;
3246 	ddi_dma_cookie_t	dmacookie;
3247 	uint_t			cookie_cnt;
3248 	uint64_t		*mtt_table;
3249 	uint64_t		mtt_entry;
3250 	uint64_t		addr, endaddr;
3251 	uint64_t		pagesize;
3252 	offset_t		i, start;
3253 	uint_t			per_span;
3254 	int			sync_needed;
3255 
3256 	/*
3257 	 * XXX According to the PRM, we are to use the WRITE_MTT
3258 	 * command to write out MTTs. Tavor does not do this,
3259 	 * instead taking advantage of direct access to the MTTs,
3260 	 * and knowledge that Mellanox FMR relies on our ability
3261 	 * to write directly to the MTTs without any further
3262 	 * notification to the firmware. Likewise, we will choose
3263 	 * to not use the WRITE_MTT command, but to simply write
3264 	 * out the MTTs.
3265 	 */
3266 
3267 	/* Calculate page size from the suggested value passed in */
3268 	pagesize = ((uint64_t)1 << mtt_pgsize_bits);
3269 
3270 	/* Walk the "cookie list" and fill in the MTT table entries */
3271 	dmacookie  = bind->bi_dmacookie;
3272 	cookie_cnt = bind->bi_cookiecnt;
3273 
3274 	icm_table = &state->hs_icm[HERMON_MTT];
3275 	rindx = mtt->hr_indx;
3276 	hermon_index(index1, index2, rindx, icm_table, i);
3277 	start = i;
3278 
3279 	per_span   = icm_table->span;
3280 	dma_info   = icm_table->icm_dma[index1] + index2;
3281 	mtt_table  = (uint64_t *)(uintptr_t)dma_info->vaddr;
3282 
3283 	sync_needed = 0;
3284 	while (cookie_cnt-- > 0) {
3285 		addr    = dmacookie.dmac_laddress;
3286 		endaddr = addr + (dmacookie.dmac_size - 1);
3287 		addr    = addr & ~((uint64_t)pagesize - 1);
3288 
3289 		while (addr <= endaddr) {
3290 
3291 			/*
3292 			 * Fill in the mapped addresses (calculated above) and
3293 			 * set HERMON_MTT_ENTRY_PRESENT flag for each MTT entry.
3294 			 */
3295 			mtt_entry = addr | HERMON_MTT_ENTRY_PRESENT;
3296 			mtt_table[i] = htonll(mtt_entry);
3297 			i++;
3298 			rindx++;
3299 
3300 			if (i == per_span) {
3301 
3302 				(void) ddi_dma_sync(dma_info->dma_hdl,
3303 				    start * sizeof (hermon_hw_mtt_t),
3304 				    (i - start) * sizeof (hermon_hw_mtt_t),
3305 				    DDI_DMA_SYNC_FORDEV);
3306 
3307 				if ((addr + pagesize > endaddr) &&
3308 				    (cookie_cnt == 0))
3309 					return (DDI_SUCCESS);
3310 
3311 				hermon_index(index1, index2, rindx, icm_table,
3312 				    i);
3313 				start = i * sizeof (hermon_hw_mtt_t);
3314 				dma_info = icm_table->icm_dma[index1] + index2;
3315 				mtt_table =
3316 				    (uint64_t *)(uintptr_t)dma_info->vaddr;
3317 
3318 				sync_needed = 0;
3319 			} else {
3320 				sync_needed = 1;
3321 			}
3322 
3323 			addr += pagesize;
3324 			if (addr == 0) {
3325 				static int do_once = 1;
3326 				_NOTE(SCHEME_PROTECTS_DATA("safe sharing",
3327 				    do_once))
3328 				if (do_once) {
3329 					do_once = 0;
3330 					cmn_err(CE_NOTE, "probable error in "
3331 					    "dma_cookie address from caller\n");
3332 				}
3333 				break;
3334 			}
3335 		}
3336 
3337 		/*
3338 		 * When we've reached the end of the current DMA cookie,
3339 		 * jump to the next cookie (if there are more)
3340 		 */
3341 		if (cookie_cnt != 0) {
3342 			ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie);
3343 		}
3344 	}
3345 
3346 	/* done all the cookies, now sync the memory for the device */
3347 	if (sync_needed)
3348 		(void) ddi_dma_sync(dma_info->dma_hdl,
3349 		    start * sizeof (hermon_hw_mtt_t),
3350 		    (i - start) * sizeof (hermon_hw_mtt_t),
3351 		    DDI_DMA_SYNC_FORDEV);
3352 
3353 	return (DDI_SUCCESS);
3354 }
3355 
3356 /*
3357  * hermon_mr_fast_mtt_write_fmr()
3358  *    Context: Can be called from interrupt or base context.
3359  */
3360 /* ARGSUSED */
3361 static int
3362 hermon_mr_fast_mtt_write_fmr(hermon_state_t *state, hermon_rsrc_t *mtt,
3363     ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits)
3364 {
3365 	hermon_icm_table_t	*icm_table;
3366 	hermon_dma_info_t	*dma_info;
3367 	uint32_t		index1, index2, rindx;
3368 	uint64_t		*mtt_table;
3369 	offset_t		i, j;
3370 	uint_t			per_span;
3371 
3372 	icm_table = &state->hs_icm[HERMON_MTT];
3373 	rindx = mtt->hr_indx;
3374 	hermon_index(index1, index2, rindx, icm_table, i);
3375 	per_span   = icm_table->span;
3376 	dma_info   = icm_table->icm_dma[index1] + index2;
3377 	mtt_table  = (uint64_t *)(uintptr_t)dma_info->vaddr;
3378 
3379 	/*
3380 	 * Fill in the MTT table entries
3381 	 */
3382 	for (j = 0; j < mem_pattr->pmr_num_buf; j++) {
3383 		mtt_table[i] = mem_pattr->pmr_addr_list[j].p_laddr;
3384 		i++;
3385 		rindx++;
3386 		if (i == per_span) {
3387 			hermon_index(index1, index2, rindx, icm_table, i);
3388 			dma_info = icm_table->icm_dma[index1] + index2;
3389 			mtt_table = (uint64_t *)(uintptr_t)dma_info->vaddr;
3390 		}
3391 	}
3392 
3393 	return (DDI_SUCCESS);
3394 }
3395 
3396 
3397 /*
3398  * hermon_mtt_refcnt_inc()
3399  *    Context: Can be called from interrupt or base context.
3400  */
3401 static uint_t
3402 hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc)
3403 {
3404 	hermon_sw_refcnt_t *rc;
3405 
3406 	rc = (hermon_sw_refcnt_t *)rsrc->hr_addr;
3407 	return (atomic_inc_uint_nv(&rc->swrc_refcnt));
3408 }
3409 
3410 
3411 /*
3412  * hermon_mtt_refcnt_dec()
3413  *    Context: Can be called from interrupt or base context.
3414  */
3415 static uint_t
3416 hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc)
3417 {
3418 	hermon_sw_refcnt_t *rc;
3419 
3420 	rc = (hermon_sw_refcnt_t *)rsrc->hr_addr;
3421 	return (atomic_dec_uint_nv(&rc->swrc_refcnt));
3422 }
3423