xref: /illumos-gate/usr/src/uts/common/io/ib/adapters/hermon/hermon_mr.c (revision 33efde4275d24731ef87927237b0ffb0630b6b2d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * hermon_mr.c
28  *    Hermon Memory Region/Window Routines
29  *
30  *    Implements all the routines necessary to provide the requisite memory
31  *    registration verbs.  These include operations like RegisterMemRegion(),
32  *    DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion,
33  *    etc., that affect Memory Regions.  It also includes the verbs that
34  *    affect Memory Windows, including AllocMemWindow(), FreeMemWindow(),
35  *    and QueryMemWindow().
36  */
37 
38 #include <sys/types.h>
39 #include <sys/conf.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/modctl.h>
43 #include <sys/esunddi.h>
44 
45 #include <sys/ib/adapters/hermon/hermon.h>
46 
47 extern uint32_t hermon_kernel_data_ro;
48 extern uint32_t hermon_user_data_ro;
49 extern int hermon_rdma_debug;
50 
51 /*
52  * Used by hermon_mr_keycalc() below to fill in the "unconstrained" portion
53  * of Hermon memory keys (LKeys and RKeys)
54  */
55 static	uint_t hermon_memkey_cnt = 0x00;
56 #define	HERMON_MEMKEY_SHIFT	24
57 
58 /* initial state of an MPT */
59 #define	HERMON_MPT_SW_OWNERSHIP	0xF	/* memory regions */
60 #define	HERMON_MPT_FREE		0x3	/* allocate lkey */
61 
62 static int hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd,
63     hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
64     hermon_mpt_rsrc_type_t mpt_type);
65 static int hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr,
66     hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new,
67     hermon_mr_options_t *op);
68 static int hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr,
69     hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr,
70     uint_t sleep, uint_t *dereg_level);
71 static uint64_t hermon_mr_nummtt_needed(hermon_state_t *state,
72     hermon_bind_info_t *bind, uint_t *mtt_pgsize);
73 static int hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind,
74     ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer);
75 static void hermon_mr_mem_unbind(hermon_state_t *state,
76     hermon_bind_info_t *bind);
77 static int hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt,
78     hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits);
79 static int hermon_mr_fast_mtt_write_fmr(hermon_state_t *state,
80     hermon_rsrc_t *mtt, ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits);
81 static uint_t hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc);
82 static uint_t hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc);
83 
84 
85 /*
86  * The Hermon umem_lockmemory() callback ops.  When userland memory is
87  * registered, these callback ops are specified.  The hermon_umap_umemlock_cb()
88  * callback will be called whenever the memory for the corresponding
89  * ddi_umem_cookie_t is being freed.
90  */
91 static struct umem_callback_ops hermon_umem_cbops = {
92 	UMEM_CALLBACK_VERSION,
93 	hermon_umap_umemlock_cb,
94 };
95 
96 
97 
98 /*
99  * hermon_mr_register()
100  *    Context: Can be called from interrupt or base context.
101  */
102 int
hermon_mr_register(hermon_state_t * state,hermon_pdhdl_t pd,ibt_mr_attr_t * mr_attr,hermon_mrhdl_t * mrhdl,hermon_mr_options_t * op,hermon_mpt_rsrc_type_t mpt_type)103 hermon_mr_register(hermon_state_t *state, hermon_pdhdl_t pd,
104     ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
105     hermon_mpt_rsrc_type_t mpt_type)
106 {
107 	hermon_bind_info_t	bind;
108 	int			status;
109 
110 	/*
111 	 * Fill in the "bind" struct.  This struct provides the majority
112 	 * of the information that will be used to distinguish between an
113 	 * "addr" binding (as is the case here) and a "buf" binding (see
114 	 * below).  The "bind" struct is later passed to hermon_mr_mem_bind()
115 	 * which does most of the "heavy lifting" for the Hermon memory
116 	 * registration routines.
117 	 */
118 	bind.bi_type  = HERMON_BINDHDL_VADDR;
119 	bind.bi_addr  = mr_attr->mr_vaddr;
120 	bind.bi_len   = mr_attr->mr_len;
121 	bind.bi_as    = mr_attr->mr_as;
122 	bind.bi_flags = mr_attr->mr_flags;
123 	status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op,
124 	    mpt_type);
125 	return (status);
126 }
127 
128 
129 /*
130  * hermon_mr_register_buf()
131  *    Context: Can be called from interrupt or base context.
132  */
133 int
hermon_mr_register_buf(hermon_state_t * state,hermon_pdhdl_t pd,ibt_smr_attr_t * mr_attr,struct buf * buf,hermon_mrhdl_t * mrhdl,hermon_mr_options_t * op,hermon_mpt_rsrc_type_t mpt_type)134 hermon_mr_register_buf(hermon_state_t *state, hermon_pdhdl_t pd,
135     ibt_smr_attr_t *mr_attr, struct buf *buf, hermon_mrhdl_t *mrhdl,
136     hermon_mr_options_t *op, hermon_mpt_rsrc_type_t mpt_type)
137 {
138 	hermon_bind_info_t	bind;
139 	int			status;
140 
141 	/*
142 	 * Fill in the "bind" struct.  This struct provides the majority
143 	 * of the information that will be used to distinguish between an
144 	 * "addr" binding (see above) and a "buf" binding (as is the case
145 	 * here).  The "bind" struct is later passed to hermon_mr_mem_bind()
146 	 * which does most of the "heavy lifting" for the Hermon memory
147 	 * registration routines.  Note: We have chosen to provide
148 	 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
149 	 * not set).  It is not critical what value we choose here as it need
150 	 * only be unique for the given RKey (which will happen by default),
151 	 * so the choice here is somewhat arbitrary.
152 	 */
153 	bind.bi_type  = HERMON_BINDHDL_BUF;
154 	bind.bi_buf   = buf;
155 	if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
156 		bind.bi_addr  = mr_attr->mr_vaddr;
157 	} else {
158 		bind.bi_addr  = (uint64_t)(uintptr_t)buf->b_un.b_addr;
159 	}
160 	bind.bi_as    = NULL;
161 	bind.bi_len   = (uint64_t)buf->b_bcount;
162 	bind.bi_flags = mr_attr->mr_flags;
163 	status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op, mpt_type);
164 	return (status);
165 }
166 
167 
168 /*
169  * hermon_mr_register_shared()
170  *    Context: Can be called from interrupt or base context.
171  */
172 int
hermon_mr_register_shared(hermon_state_t * state,hermon_mrhdl_t mrhdl,hermon_pdhdl_t pd,ibt_smr_attr_t * mr_attr,hermon_mrhdl_t * mrhdl_new)173 hermon_mr_register_shared(hermon_state_t *state, hermon_mrhdl_t mrhdl,
174     hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new)
175 {
176 	hermon_rsrc_t		*mpt, *mtt, *rsrc;
177 	hermon_umap_db_entry_t	*umapdb;
178 	hermon_hw_dmpt_t	mpt_entry;
179 	hermon_mrhdl_t		mr;
180 	hermon_bind_info_t	*bind;
181 	ddi_umem_cookie_t	umem_cookie;
182 	size_t			umem_len;
183 	caddr_t			umem_addr;
184 	uint64_t		mtt_addr, pgsize_msk;
185 	uint_t			sleep, mr_is_umem;
186 	int			status, umem_flags;
187 
188 	/*
189 	 * Check the sleep flag.  Ensure that it is consistent with the
190 	 * current thread context (i.e. if we are currently in the interrupt
191 	 * context, then we shouldn't be attempting to sleep).
192 	 */
193 	sleep = (mr_attr->mr_flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP :
194 	    HERMON_SLEEP;
195 	if ((sleep == HERMON_SLEEP) &&
196 	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
197 		status = IBT_INVALID_PARAM;
198 		goto mrshared_fail;
199 	}
200 
201 	/* Increment the reference count on the protection domain (PD) */
202 	hermon_pd_refcnt_inc(pd);
203 
204 	/*
205 	 * Allocate an MPT entry.  This will be filled in with all the
206 	 * necessary parameters to define the shared memory region.
207 	 * Specifically, it will be made to reference the currently existing
208 	 * MTT entries and ownership of the MPT will be passed to the hardware
209 	 * in the last step below.  If we fail here, we must undo the
210 	 * protection domain reference count.
211 	 */
212 	status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
213 	if (status != DDI_SUCCESS) {
214 		status = IBT_INSUFF_RESOURCE;
215 		goto mrshared_fail1;
216 	}
217 
218 	/*
219 	 * Allocate the software structure for tracking the shared memory
220 	 * region (i.e. the Hermon Memory Region handle).  If we fail here, we
221 	 * must undo the protection domain reference count and the previous
222 	 * resource allocation.
223 	 */
224 	status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
225 	if (status != DDI_SUCCESS) {
226 		status = IBT_INSUFF_RESOURCE;
227 		goto mrshared_fail2;
228 	}
229 	mr = (hermon_mrhdl_t)rsrc->hr_addr;
230 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
231 
232 	/*
233 	 * Setup and validate the memory region access flags.  This means
234 	 * translating the IBTF's enable flags into the access flags that
235 	 * will be used in later operations.
236 	 */
237 	mr->mr_accflag = 0;
238 	if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND)
239 		mr->mr_accflag |= IBT_MR_WINDOW_BIND;
240 	if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
241 		mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
242 	if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ)
243 		mr->mr_accflag |= IBT_MR_REMOTE_READ;
244 	if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
245 		mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
246 	if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
247 		mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
248 
249 	/*
250 	 * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
251 	 * from a certain number of "constrained" bits (the least significant
252 	 * bits) and some number of "unconstrained" bits.  The constrained
253 	 * bits must be set to the index of the entry in the MPT table, but
254 	 * the unconstrained bits can be set to any value we wish.  Note:
255 	 * if no remote access is required, then the RKey value is not filled
256 	 * in.  Otherwise both Rkey and LKey are given the same value.
257 	 */
258 	mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
259 
260 	/* Grab the MR lock for the current memory region */
261 	mutex_enter(&mrhdl->mr_lock);
262 
263 	/*
264 	 * Check here to see if the memory region has already been partially
265 	 * deregistered as a result of a hermon_umap_umemlock_cb() callback.
266 	 * If so, this is an error, return failure.
267 	 */
268 	if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
269 		mutex_exit(&mrhdl->mr_lock);
270 		status = IBT_MR_HDL_INVALID;
271 		goto mrshared_fail3;
272 	}
273 
274 	/*
275 	 * Determine if the original memory was from userland and, if so, pin
276 	 * the pages (again) with umem_lockmemory().  This will guarantee a
277 	 * separate callback for each of this shared region's MR handles.
278 	 * If this is userland memory, then allocate an entry in the
279 	 * "userland resources database".  This will later be added to
280 	 * the database (after all further memory registration operations are
281 	 * successful).  If we fail here, we must undo all the above setup.
282 	 */
283 	mr_is_umem = mrhdl->mr_is_umem;
284 	if (mr_is_umem) {
285 		umem_len   = ptob(btopr(mrhdl->mr_bindinfo.bi_len));
286 		umem_addr  = (caddr_t)((uintptr_t)mrhdl->mr_bindinfo.bi_addr &
287 		    ~PAGEOFFSET);
288 		umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
289 		    DDI_UMEMLOCK_LONGTERM);
290 		status = umem_lockmemory(umem_addr, umem_len, umem_flags,
291 		    &umem_cookie, &hermon_umem_cbops, NULL);
292 		if (status != 0) {
293 			mutex_exit(&mrhdl->mr_lock);
294 			status = IBT_INSUFF_RESOURCE;
295 			goto mrshared_fail3;
296 		}
297 
298 		umapdb = hermon_umap_db_alloc(state->hs_instance,
299 		    (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
300 		    (uint64_t)(uintptr_t)rsrc);
301 		if (umapdb == NULL) {
302 			mutex_exit(&mrhdl->mr_lock);
303 			status = IBT_INSUFF_RESOURCE;
304 			goto mrshared_fail4;
305 		}
306 	}
307 
308 	/*
309 	 * Copy the MTT resource pointer (and additional parameters) from
310 	 * the original Hermon Memory Region handle.  Note: this is normally
311 	 * where the hermon_mr_mem_bind() routine would be called, but because
312 	 * we already have bound and filled-in MTT entries it is simply a
313 	 * matter here of managing the MTT reference count and grabbing the
314 	 * address of the MTT table entries (for filling in the shared region's
315 	 * MPT entry).
316 	 */
317 	mr->mr_mttrsrcp	  = mrhdl->mr_mttrsrcp;
318 	mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz;
319 	mr->mr_bindinfo	  = mrhdl->mr_bindinfo;
320 	mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp;
321 	mutex_exit(&mrhdl->mr_lock);
322 	bind = &mr->mr_bindinfo;
323 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
324 	mtt = mr->mr_mttrsrcp;
325 
326 	/*
327 	 * Increment the MTT reference count (to reflect the fact that
328 	 * the MTT is now shared)
329 	 */
330 	(void) hermon_mtt_refcnt_inc(mr->mr_mttrefcntp);
331 
332 	/*
333 	 * Update the new "bind" virtual address.  Do some extra work here
334 	 * to ensure proper alignment.  That is, make sure that the page
335 	 * offset for the beginning of the old range is the same as the
336 	 * offset for this new mapping
337 	 */
338 	pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1);
339 	bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) |
340 	    (mr->mr_bindinfo.bi_addr & pgsize_msk));
341 
342 	/*
343 	 * Fill in the MPT entry.  This is the final step before passing
344 	 * ownership of the MPT entry to the Hermon hardware.  We use all of
345 	 * the information collected/calculated above to fill in the
346 	 * requisite portions of the MPT.
347 	 */
348 	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
349 	mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
350 	mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
351 	mpt_entry.rw	  = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
352 	mpt_entry.rr	  = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
353 	mpt_entry.lw	  = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
354 	mpt_entry.lr	  = 1;
355 	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
356 	mpt_entry.entity_sz	= mr->mr_logmttpgsz;
357 	mpt_entry.mem_key	= mr->mr_lkey;
358 	mpt_entry.pd		= pd->pd_pdnum;
359 	mpt_entry.start_addr	= bind->bi_addr;
360 	mpt_entry.reg_win_len	= bind->bi_len;
361 	mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
362 	mpt_entry.mtt_addr_h = mtt_addr >> 32;
363 	mpt_entry.mtt_addr_l = mtt_addr >> 3;
364 
365 	/*
366 	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
367 	 * the entry to the hardware.  Note: in general, this operation
368 	 * shouldn't fail.  But if it does, we have to undo everything we've
369 	 * done above before returning error.
370 	 */
371 	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
372 	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
373 	if (status != HERMON_CMD_SUCCESS) {
374 		cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
375 		    status);
376 		if (status == HERMON_CMD_INVALID_STATUS) {
377 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
378 		}
379 		status = ibc_get_ci_failure(0);
380 		goto mrshared_fail5;
381 	}
382 
383 	/*
384 	 * Fill in the rest of the Hermon Memory Region handle.  Having
385 	 * successfully transferred ownership of the MPT, we can update the
386 	 * following fields for use in further operations on the MR.
387 	 */
388 	mr->mr_mptrsrcp	  = mpt;
389 	mr->mr_mttrsrcp	  = mtt;
390 	mr->mr_mpt_type	  = HERMON_MPT_DMPT;
391 	mr->mr_pdhdl	  = pd;
392 	mr->mr_rsrcp	  = rsrc;
393 	mr->mr_is_umem	  = mr_is_umem;
394 	mr->mr_is_fmr	  = 0;
395 	mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL;
396 	mr->mr_umem_cbfunc = NULL;
397 	mr->mr_umem_cbarg1 = NULL;
398 	mr->mr_umem_cbarg2 = NULL;
399 	mr->mr_lkey	   = hermon_mr_key_swap(mr->mr_lkey);
400 	mr->mr_rkey	   = hermon_mr_key_swap(mr->mr_rkey);
401 
402 	/*
403 	 * If this is userland memory, then we need to insert the previously
404 	 * allocated entry into the "userland resources database".  This will
405 	 * allow for later coordination between the hermon_umap_umemlock_cb()
406 	 * callback and hermon_mr_deregister().
407 	 */
408 	if (mr_is_umem) {
409 		hermon_umap_db_add(umapdb);
410 	}
411 
412 	*mrhdl_new = mr;
413 
414 	return (DDI_SUCCESS);
415 
416 /*
417  * The following is cleanup for all possible failure cases in this routine
418  */
419 mrshared_fail5:
420 	(void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp);
421 	if (mr_is_umem) {
422 		hermon_umap_db_free(umapdb);
423 	}
424 mrshared_fail4:
425 	if (mr_is_umem) {
426 		ddi_umem_unlock(umem_cookie);
427 	}
428 mrshared_fail3:
429 	hermon_rsrc_free(state, &rsrc);
430 mrshared_fail2:
431 	hermon_rsrc_free(state, &mpt);
432 mrshared_fail1:
433 	hermon_pd_refcnt_dec(pd);
434 mrshared_fail:
435 	return (status);
436 }
437 
438 /*
439  * hermon_mr_alloc_fmr()
440  *    Context: Can be called from interrupt or base context.
441  */
442 int
hermon_mr_alloc_fmr(hermon_state_t * state,hermon_pdhdl_t pd,hermon_fmrhdl_t fmr_pool,hermon_mrhdl_t * mrhdl)443 hermon_mr_alloc_fmr(hermon_state_t *state, hermon_pdhdl_t pd,
444     hermon_fmrhdl_t fmr_pool, hermon_mrhdl_t *mrhdl)
445 {
446 	hermon_rsrc_t		*mpt, *mtt, *rsrc;
447 	hermon_hw_dmpt_t	mpt_entry;
448 	hermon_mrhdl_t		mr;
449 	hermon_bind_info_t	bind;
450 	uint64_t		mtt_addr;
451 	uint64_t		nummtt;
452 	uint_t			sleep, mtt_pgsize_bits;
453 	int			status;
454 	offset_t		i;
455 	hermon_icm_table_t	*icm_table;
456 	hermon_dma_info_t	*dma_info;
457 	uint32_t		index1, index2, rindx;
458 
459 	/*
460 	 * Check the sleep flag.  Ensure that it is consistent with the
461 	 * current thread context (i.e. if we are currently in the interrupt
462 	 * context, then we shouldn't be attempting to sleep).
463 	 */
464 	sleep = (fmr_pool->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP :
465 	    HERMON_NOSLEEP;
466 	if ((sleep == HERMON_SLEEP) &&
467 	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
468 		return (IBT_INVALID_PARAM);
469 	}
470 
471 	/* Increment the reference count on the protection domain (PD) */
472 	hermon_pd_refcnt_inc(pd);
473 
474 	/*
475 	 * Allocate an MPT entry.  This will be filled in with all the
476 	 * necessary parameters to define the FMR.  Specifically, it will be
477 	 * made to reference the currently existing MTT entries and ownership
478 	 * of the MPT will be passed to the hardware in the last step below.
479 	 * If we fail here, we must undo the protection domain reference count.
480 	 */
481 
482 	status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
483 	if (status != DDI_SUCCESS) {
484 		status = IBT_INSUFF_RESOURCE;
485 		goto fmralloc_fail1;
486 	}
487 
488 	/*
489 	 * Allocate the software structure for tracking the fmr memory
490 	 * region (i.e. the Hermon Memory Region handle).  If we fail here, we
491 	 * must undo the protection domain reference count and the previous
492 	 * resource allocation.
493 	 */
494 	status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
495 	if (status != DDI_SUCCESS) {
496 		status = IBT_INSUFF_RESOURCE;
497 		goto fmralloc_fail2;
498 	}
499 	mr = (hermon_mrhdl_t)rsrc->hr_addr;
500 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
501 
502 	/*
503 	 * Setup and validate the memory region access flags.  This means
504 	 * translating the IBTF's enable flags into the access flags that
505 	 * will be used in later operations.
506 	 */
507 	mr->mr_accflag = 0;
508 	if (fmr_pool->fmr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
509 		mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
510 	if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_READ)
511 		mr->mr_accflag |= IBT_MR_REMOTE_READ;
512 	if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
513 		mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
514 	if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
515 		mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
516 
517 	/*
518 	 * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
519 	 * from a certain number of "constrained" bits (the least significant
520 	 * bits) and some number of "unconstrained" bits.  The constrained
521 	 * bits must be set to the index of the entry in the MPT table, but
522 	 * the unconstrained bits can be set to any value we wish.  Note:
523 	 * if no remote access is required, then the RKey value is not filled
524 	 * in.  Otherwise both Rkey and LKey are given the same value.
525 	 */
526 	mr->mr_fmr_key = 1;	/* ready for the next reload */
527 	mr->mr_rkey = mr->mr_lkey = mpt->hr_indx;
528 
529 	/*
530 	 * Determine number of pages spanned.  This routine uses the
531 	 * information in the "bind" struct to determine the required
532 	 * number of MTT entries needed (and returns the suggested page size -
533 	 * as a "power-of-2" - for each MTT entry).
534 	 */
535 	/* Assume address will be page aligned later */
536 	bind.bi_addr = 0;
537 	/* Calculate size based on given max pages */
538 	bind.bi_len = fmr_pool->fmr_max_pages << PAGESHIFT;
539 	nummtt = hermon_mr_nummtt_needed(state, &bind, &mtt_pgsize_bits);
540 
541 	/*
542 	 * Allocate the MTT entries.  Use the calculations performed above to
543 	 * allocate the required number of MTT entries.  If we fail here, we
544 	 * must not only undo all the previous resource allocation (and PD
545 	 * reference count), but we must also unbind the memory.
546 	 */
547 	status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt);
548 	if (status != DDI_SUCCESS) {
549 		IBTF_DPRINTF_L2("FMR", "FATAL: too few MTTs");
550 		status = IBT_INSUFF_RESOURCE;
551 		goto fmralloc_fail3;
552 	}
553 	mr->mr_logmttpgsz = mtt_pgsize_bits;
554 
555 	/*
556 	 * Fill in the MPT entry.  This is the final step before passing
557 	 * ownership of the MPT entry to the Hermon hardware.  We use all of
558 	 * the information collected/calculated above to fill in the
559 	 * requisite portions of the MPT.
560 	 */
561 	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
562 	mpt_entry.en_bind = 0;
563 	mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
564 	mpt_entry.rw	  = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
565 	mpt_entry.rr	  = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
566 	mpt_entry.lw	  = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
567 	mpt_entry.lr	  = 1;
568 	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
569 	mpt_entry.pd		= pd->pd_pdnum;
570 
571 	mpt_entry.entity_sz	= mr->mr_logmttpgsz;
572 	mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
573 	mpt_entry.fast_reg_en = 1;
574 	mpt_entry.mtt_size = (uint_t)nummtt;
575 	mpt_entry.mtt_addr_h = mtt_addr >> 32;
576 	mpt_entry.mtt_addr_l = mtt_addr >> 3;
577 	mpt_entry.mem_key = mr->mr_lkey;
578 
579 	/*
580 	 * FMR sets these to 0 for now.  Later during actual fmr registration
581 	 * these values are filled in.
582 	 */
583 	mpt_entry.start_addr	= 0;
584 	mpt_entry.reg_win_len	= 0;
585 
586 	/*
587 	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
588 	 * the entry to the hardware.  Note: in general, this operation
589 	 * shouldn't fail.  But if it does, we have to undo everything we've
590 	 * done above before returning error.
591 	 */
592 	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
593 	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
594 	if (status != HERMON_CMD_SUCCESS) {
595 		cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
596 		    status);
597 		if (status == HERMON_CMD_INVALID_STATUS) {
598 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
599 		}
600 		status = ibc_get_ci_failure(0);
601 		goto fmralloc_fail4;
602 	}
603 
604 	/*
605 	 * Fill in the rest of the Hermon Memory Region handle.  Having
606 	 * successfully transferred ownership of the MPT, we can update the
607 	 * following fields for use in further operations on the MR.  Also, set
608 	 * that this is an FMR region.
609 	 */
610 	mr->mr_mptrsrcp	  = mpt;
611 	mr->mr_mttrsrcp	  = mtt;
612 
613 	mr->mr_mpt_type   = HERMON_MPT_DMPT;
614 	mr->mr_pdhdl	  = pd;
615 	mr->mr_rsrcp	  = rsrc;
616 	mr->mr_is_fmr	  = 1;
617 	mr->mr_lkey	   = hermon_mr_key_swap(mr->mr_lkey);
618 	mr->mr_rkey	   = hermon_mr_key_swap(mr->mr_rkey);
619 	mr->mr_mttaddr	   = mtt_addr;
620 	(void) memcpy(&mr->mr_bindinfo, &bind, sizeof (hermon_bind_info_t));
621 
622 	/* initialize hr_addr for use during register/deregister/invalidate */
623 	icm_table = &state->hs_icm[HERMON_DMPT];
624 	rindx = mpt->hr_indx;
625 	hermon_index(index1, index2, rindx, icm_table, i);
626 	dma_info = icm_table->icm_dma[index1] + index2;
627 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mpt))
628 	mpt->hr_addr = (void *)((uintptr_t)(dma_info->vaddr + i * mpt->hr_len));
629 
630 	*mrhdl = mr;
631 
632 	return (DDI_SUCCESS);
633 
634 /*
635  * The following is cleanup for all possible failure cases in this routine
636  */
637 fmralloc_fail4:
638 	kmem_free(mtt, sizeof (hermon_rsrc_t) * nummtt);
639 fmralloc_fail3:
640 	hermon_rsrc_free(state, &rsrc);
641 fmralloc_fail2:
642 	hermon_rsrc_free(state, &mpt);
643 fmralloc_fail1:
644 	hermon_pd_refcnt_dec(pd);
645 	return (status);
646 }
647 
648 
649 /*
650  * hermon_mr_register_physical_fmr()
651  *    Context: Can be called from interrupt or base context.
652  */
653 /*ARGSUSED*/
654 int
hermon_mr_register_physical_fmr(hermon_state_t * state,ibt_pmr_attr_t * mem_pattr_p,hermon_mrhdl_t mr,ibt_pmr_desc_t * mem_desc_p)655 hermon_mr_register_physical_fmr(hermon_state_t *state,
656     ibt_pmr_attr_t *mem_pattr_p, hermon_mrhdl_t mr, ibt_pmr_desc_t *mem_desc_p)
657 {
658 	hermon_rsrc_t		*mpt;
659 	uint64_t		*mpt_table;
660 	int			status;
661 	uint32_t		key;
662 
663 	mutex_enter(&mr->mr_lock);
664 	mpt = mr->mr_mptrsrcp;
665 	mpt_table = (uint64_t *)mpt->hr_addr;
666 
667 	/* Write MPT status to SW bit */
668 	*(uint8_t *)mpt_table = 0xF0;
669 
670 	membar_producer();
671 
672 	/*
673 	 * Write the mapped addresses into the MTT entries.  FMR needs to do
674 	 * this a little differently, so we call the fmr specific fast mtt
675 	 * write here.
676 	 */
677 	status = hermon_mr_fast_mtt_write_fmr(state, mr->mr_mttrsrcp,
678 	    mem_pattr_p, mr->mr_logmttpgsz);
679 	if (status != DDI_SUCCESS) {
680 		mutex_exit(&mr->mr_lock);
681 		status = ibc_get_ci_failure(0);
682 		goto fmr_reg_fail1;
683 	}
684 
685 	/*
686 	 * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
687 	 * from a certain number of "constrained" bits (the least significant
688 	 * bits) and some number of "unconstrained" bits.  The constrained
689 	 * bits must be set to the index of the entry in the MPT table, but
690 	 * the unconstrained bits can be set to any value we wish.  Note:
691 	 * if no remote access is required, then the RKey value is not filled
692 	 * in.  Otherwise both Rkey and LKey are given the same value.
693 	 */
694 	key = mpt->hr_indx | (mr->mr_fmr_key++ << HERMON_MEMKEY_SHIFT);
695 	mr->mr_lkey = mr->mr_rkey = hermon_mr_key_swap(key);
696 
697 	/* write mem key value */
698 	*(uint32_t *)&mpt_table[1] = htonl(key);
699 
700 	/* write length value */
701 	mpt_table[3] = htonll(mem_pattr_p->pmr_len);
702 
703 	/* write start addr value */
704 	mpt_table[2] = htonll(mem_pattr_p->pmr_iova);
705 
706 	/* write lkey value */
707 	*(uint32_t *)&mpt_table[4] = htonl(key);
708 
709 	membar_producer();
710 
711 	/* Write MPT status to HW bit */
712 	*(uint8_t *)mpt_table = 0x00;
713 
714 	/* Fill in return parameters */
715 	mem_desc_p->pmd_lkey = mr->mr_lkey;
716 	mem_desc_p->pmd_rkey = mr->mr_rkey;
717 	mem_desc_p->pmd_iova = mem_pattr_p->pmr_iova;
718 	mem_desc_p->pmd_phys_buf_list_sz = mem_pattr_p->pmr_len;
719 
720 	/* Fill in MR bindinfo struct for later sync or query operations */
721 	mr->mr_bindinfo.bi_addr = mem_pattr_p->pmr_iova;
722 	mr->mr_bindinfo.bi_flags = mem_pattr_p->pmr_flags & IBT_MR_NONCOHERENT;
723 
724 	mutex_exit(&mr->mr_lock);
725 
726 	return (DDI_SUCCESS);
727 
728 fmr_reg_fail1:
729 	/*
730 	 * Note, we fail here, and purposely leave the memory ownership in
731 	 * software.  The memory tables may be corrupt, so we leave the region
732 	 * unregistered.
733 	 */
734 	return (status);
735 }
736 
737 
738 /*
739  * hermon_mr_deregister()
740  *    Context: Can be called from interrupt or base context.
741  */
742 /* ARGSUSED */
743 int
hermon_mr_deregister(hermon_state_t * state,hermon_mrhdl_t * mrhdl,uint_t level,uint_t sleep)744 hermon_mr_deregister(hermon_state_t *state, hermon_mrhdl_t *mrhdl, uint_t level,
745     uint_t sleep)
746 {
747 	hermon_rsrc_t		*mpt, *mtt, *rsrc, *mtt_refcnt;
748 	hermon_umap_db_entry_t	*umapdb;
749 	hermon_pdhdl_t		pd;
750 	hermon_mrhdl_t		mr;
751 	hermon_bind_info_t	*bind;
752 	uint64_t		value;
753 	int			status;
754 	uint_t			shared_mtt;
755 
756 	/*
757 	 * Check the sleep flag.  Ensure that it is consistent with the
758 	 * current thread context (i.e. if we are currently in the interrupt
759 	 * context, then we shouldn't be attempting to sleep).
760 	 */
761 	if ((sleep == HERMON_SLEEP) &&
762 	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
763 		status = IBT_INVALID_PARAM;
764 		return (status);
765 	}
766 
767 	/*
768 	 * Pull all the necessary information from the Hermon Memory Region
769 	 * handle.  This is necessary here because the resource for the
770 	 * MR handle is going to be freed up as part of the this
771 	 * deregistration
772 	 */
773 	mr	= *mrhdl;
774 	mutex_enter(&mr->mr_lock);
775 	mpt	= mr->mr_mptrsrcp;
776 	mtt	= mr->mr_mttrsrcp;
777 	mtt_refcnt = mr->mr_mttrefcntp;
778 	rsrc	= mr->mr_rsrcp;
779 	pd	= mr->mr_pdhdl;
780 	bind	= &mr->mr_bindinfo;
781 
782 	/*
783 	 * Check here if the memory region is really an FMR.  If so, this is a
784 	 * bad thing and we shouldn't be here.  Return failure.
785 	 */
786 	if (mr->mr_is_fmr) {
787 		mutex_exit(&mr->mr_lock);
788 		return (IBT_INVALID_PARAM);
789 	}
790 
791 	/*
792 	 * Check here to see if the memory region has already been partially
793 	 * deregistered as a result of the hermon_umap_umemlock_cb() callback.
794 	 * If so, then jump to the end and free the remaining resources.
795 	 */
796 	if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
797 		goto mrdereg_finish_cleanup;
798 	}
799 	if (hermon_rdma_debug & 0x4)
800 		IBTF_DPRINTF_L2("mr", "dereg: mr %p  key %x",
801 		    mr, mr->mr_rkey);
802 
803 	/*
804 	 * We must drop the "mr_lock" here to ensure that both SLEEP and
805 	 * NOSLEEP calls into the firmware work as expected.  Also, if two
806 	 * threads are attemping to access this MR (via de-register,
807 	 * re-register, or otherwise), then we allow the firmware to enforce
808 	 * the checking, that only one deregister is valid.
809 	 */
810 	mutex_exit(&mr->mr_lock);
811 
812 	/*
813 	 * Reclaim MPT entry from hardware (if necessary).  Since the
814 	 * hermon_mr_deregister() routine is used in the memory region
815 	 * reregistration process as well, it is possible that we will
816 	 * not always wish to reclaim ownership of the MPT.  Check the
817 	 * "level" arg and, if necessary, attempt to reclaim it.  If
818 	 * the ownership transfer fails for any reason, we check to see
819 	 * what command status was returned from the hardware.  The only
820 	 * "expected" error status is the one that indicates an attempt to
821 	 * deregister a memory region that has memory windows bound to it
822 	 */
823 	if (level >= HERMON_MR_DEREG_ALL) {
824 		if (mr->mr_mpt_type >= HERMON_MPT_DMPT) {
825 			status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT,
826 			    NULL, 0, mpt->hr_indx, sleep);
827 			if (status != HERMON_CMD_SUCCESS) {
828 				if (status == HERMON_CMD_REG_BOUND) {
829 					return (IBT_MR_IN_USE);
830 				} else {
831 					cmn_err(CE_CONT, "Hermon: HW2SW_MPT "
832 					    "command failed: %08x\n", status);
833 					if (status ==
834 					    HERMON_CMD_INVALID_STATUS) {
835 						hermon_fm_ereport(state,
836 						    HCA_SYS_ERR,
837 						    DDI_SERVICE_LOST);
838 					}
839 					return (IBT_INVALID_PARAM);
840 				}
841 			}
842 		}
843 	}
844 
845 	/*
846 	 * Re-grab the mr_lock here.  Since further access to the protected
847 	 * 'mr' structure is needed, and we would have returned previously for
848 	 * the multiple deregistration case, we can safely grab the lock here.
849 	 */
850 	mutex_enter(&mr->mr_lock);
851 
852 	/*
853 	 * If the memory had come from userland, then we do a lookup in the
854 	 * "userland resources database".  On success, we free the entry, call
855 	 * ddi_umem_unlock(), and continue the cleanup.  On failure (which is
856 	 * an indication that the umem_lockmemory() callback has called
857 	 * hermon_mr_deregister()), we call ddi_umem_unlock() and invalidate
858 	 * the "mr_umemcookie" field in the MR handle (this will be used
859 	 * later to detect that only partial cleaup still remains to be done
860 	 * on the MR handle).
861 	 */
862 	if (mr->mr_is_umem) {
863 		status = hermon_umap_db_find(state->hs_instance,
864 		    (uint64_t)(uintptr_t)mr->mr_umemcookie,
865 		    MLNX_UMAP_MRMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE,
866 		    &umapdb);
867 		if (status == DDI_SUCCESS) {
868 			hermon_umap_db_free(umapdb);
869 			ddi_umem_unlock(mr->mr_umemcookie);
870 		} else {
871 			ddi_umem_unlock(mr->mr_umemcookie);
872 			mr->mr_umemcookie = NULL;
873 		}
874 	}
875 
876 	/* mtt_refcnt is NULL in the case of hermon_dma_mr_register() */
877 	if (mtt_refcnt != NULL) {
878 		/*
879 		 * Decrement the MTT reference count.  Since the MTT resource
880 		 * may be shared between multiple memory regions (as a result
881 		 * of a "RegisterSharedMR" verb) it is important that we not
882 		 * free up or unbind resources prematurely.  If it's not shared
883 		 * (as indicated by the return status), then free the resource.
884 		 */
885 		shared_mtt = hermon_mtt_refcnt_dec(mtt_refcnt);
886 		if (!shared_mtt) {
887 			hermon_rsrc_free(state, &mtt_refcnt);
888 		}
889 
890 		/*
891 		 * Free up the MTT entries and unbind the memory.  Here,
892 		 * as above, we attempt to free these resources only if
893 		 * it is appropriate to do so.
894 		 * Note, 'bind' is NULL in the alloc_lkey case.
895 		 */
896 		if (!shared_mtt) {
897 			if (level >= HERMON_MR_DEREG_NO_HW2SW_MPT) {
898 				hermon_mr_mem_unbind(state, bind);
899 			}
900 			hermon_rsrc_free(state, &mtt);
901 		}
902 	}
903 
904 	/*
905 	 * If the MR handle has been invalidated, then drop the
906 	 * lock and return success.  Note: This only happens because
907 	 * the umem_lockmemory() callback has been triggered.  The
908 	 * cleanup here is partial, and further cleanup (in a
909 	 * subsequent hermon_mr_deregister() call) will be necessary.
910 	 */
911 	if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
912 		mutex_exit(&mr->mr_lock);
913 		return (DDI_SUCCESS);
914 	}
915 
916 mrdereg_finish_cleanup:
917 	mutex_exit(&mr->mr_lock);
918 
919 	/* Free the Hermon Memory Region handle */
920 	hermon_rsrc_free(state, &rsrc);
921 
922 	/* Free up the MPT entry resource */
923 	if (mpt != NULL)
924 		hermon_rsrc_free(state, &mpt);
925 
926 	/* Decrement the reference count on the protection domain (PD) */
927 	hermon_pd_refcnt_dec(pd);
928 
929 	/* Set the mrhdl pointer to NULL and return success */
930 	*mrhdl = NULL;
931 
932 	return (DDI_SUCCESS);
933 }
934 
935 /*
936  * hermon_mr_dealloc_fmr()
937  *    Context: Can be called from interrupt or base context.
938  */
939 /* ARGSUSED */
940 int
hermon_mr_dealloc_fmr(hermon_state_t * state,hermon_mrhdl_t * mrhdl)941 hermon_mr_dealloc_fmr(hermon_state_t *state, hermon_mrhdl_t *mrhdl)
942 {
943 	hermon_rsrc_t		*mpt, *mtt, *rsrc;
944 	hermon_pdhdl_t		pd;
945 	hermon_mrhdl_t		mr;
946 
947 	/*
948 	 * Pull all the necessary information from the Hermon Memory Region
949 	 * handle.  This is necessary here because the resource for the
950 	 * MR handle is going to be freed up as part of the this
951 	 * deregistration
952 	 */
953 	mr	= *mrhdl;
954 	mutex_enter(&mr->mr_lock);
955 	mpt	= mr->mr_mptrsrcp;
956 	mtt	= mr->mr_mttrsrcp;
957 	rsrc	= mr->mr_rsrcp;
958 	pd	= mr->mr_pdhdl;
959 	mutex_exit(&mr->mr_lock);
960 
961 	/* Free the MTT entries */
962 	hermon_rsrc_free(state, &mtt);
963 
964 	/* Free the Hermon Memory Region handle */
965 	hermon_rsrc_free(state, &rsrc);
966 
967 	/* Free up the MPT entry resource */
968 	hermon_rsrc_free(state, &mpt);
969 
970 	/* Decrement the reference count on the protection domain (PD) */
971 	hermon_pd_refcnt_dec(pd);
972 
973 	/* Set the mrhdl pointer to NULL and return success */
974 	*mrhdl = NULL;
975 
976 	return (DDI_SUCCESS);
977 }
978 
979 
980 /*
981  * hermon_mr_query()
982  *    Context: Can be called from interrupt or base context.
983  */
984 /* ARGSUSED */
985 int
hermon_mr_query(hermon_state_t * state,hermon_mrhdl_t mr,ibt_mr_query_attr_t * attr)986 hermon_mr_query(hermon_state_t *state, hermon_mrhdl_t mr,
987     ibt_mr_query_attr_t *attr)
988 {
989 	int			status;
990 	hermon_hw_dmpt_t	mpt_entry;
991 	uint32_t		lkey;
992 
993 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr))
994 
995 	mutex_enter(&mr->mr_lock);
996 
997 	/*
998 	 * Check here to see if the memory region has already been partially
999 	 * deregistered as a result of a hermon_umap_umemlock_cb() callback.
1000 	 * If so, this is an error, return failure.
1001 	 */
1002 	if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
1003 		mutex_exit(&mr->mr_lock);
1004 		return (IBT_MR_HDL_INVALID);
1005 	}
1006 
1007 	status = hermon_cmn_query_cmd_post(state, QUERY_MPT, 0,
1008 	    mr->mr_lkey >> 8, &mpt_entry, sizeof (hermon_hw_dmpt_t),
1009 	    HERMON_NOSLEEP);
1010 	if (status != HERMON_CMD_SUCCESS) {
1011 		cmn_err(CE_CONT, "Hermon: QUERY_MPT failed: status %x", status);
1012 		mutex_exit(&mr->mr_lock);
1013 		return (ibc_get_ci_failure(0));
1014 	}
1015 
1016 	/* Update the mr sw struct from the hw struct. */
1017 	lkey = mpt_entry.mem_key;
1018 	mr->mr_lkey = mr->mr_rkey = (lkey >> 8) | (lkey << 24);
1019 	mr->mr_bindinfo.bi_addr = mpt_entry.start_addr;
1020 	mr->mr_bindinfo.bi_len = mpt_entry.reg_win_len;
1021 	mr->mr_accflag = (mr->mr_accflag & IBT_MR_RO_DISABLED) |
1022 	    (mpt_entry.lw ? IBT_MR_LOCAL_WRITE : 0) |
1023 	    (mpt_entry.rr ? IBT_MR_REMOTE_READ : 0) |
1024 	    (mpt_entry.rw ? IBT_MR_REMOTE_WRITE : 0) |
1025 	    (mpt_entry.atomic ? IBT_MR_REMOTE_ATOMIC : 0) |
1026 	    (mpt_entry.en_bind ? IBT_MR_WINDOW_BIND : 0);
1027 	mr->mr_mttaddr = ((uint64_t)mpt_entry.mtt_addr_h << 32) |
1028 	    (mpt_entry.mtt_addr_l << 3);
1029 	mr->mr_logmttpgsz = mpt_entry.entity_sz;
1030 
1031 	/* Fill in the queried attributes */
1032 	attr->mr_lkey_state =
1033 	    (mpt_entry.status == HERMON_MPT_FREE) ? IBT_KEY_FREE :
1034 	    (mpt_entry.status == HERMON_MPT_SW_OWNERSHIP) ? IBT_KEY_INVALID :
1035 	    IBT_KEY_VALID;
1036 	attr->mr_phys_buf_list_sz = mpt_entry.mtt_size;
1037 	attr->mr_attr_flags = mr->mr_accflag;
1038 	attr->mr_pd = (ibt_pd_hdl_t)mr->mr_pdhdl;
1039 
1040 	/* Fill in the "local" attributes */
1041 	attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey;
1042 	attr->mr_lbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
1043 	attr->mr_lbounds.pb_len  = (size_t)mr->mr_bindinfo.bi_len;
1044 
1045 	/*
1046 	 * Fill in the "remote" attributes (if necessary).  Note: the
1047 	 * remote attributes are only valid if the memory region has one
1048 	 * or more of the remote access flags set.
1049 	 */
1050 	if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
1051 	    (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
1052 	    (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
1053 		attr->mr_rkey = (ibt_rkey_t)mr->mr_rkey;
1054 		attr->mr_rbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
1055 		attr->mr_rbounds.pb_len  = (size_t)mr->mr_bindinfo.bi_len;
1056 	}
1057 
1058 	/*
1059 	 * If region is mapped for streaming (i.e. noncoherent), then set sync
1060 	 * is required
1061 	 */
1062 	attr->mr_sync_required = (mr->mr_bindinfo.bi_flags &
1063 	    IBT_MR_NONCOHERENT) ? B_TRUE : B_FALSE;
1064 
1065 	mutex_exit(&mr->mr_lock);
1066 	return (DDI_SUCCESS);
1067 }
1068 
1069 
1070 /*
1071  * hermon_mr_reregister()
1072  *    Context: Can be called from interrupt or base context.
1073  */
1074 int
hermon_mr_reregister(hermon_state_t * state,hermon_mrhdl_t mr,hermon_pdhdl_t pd,ibt_mr_attr_t * mr_attr,hermon_mrhdl_t * mrhdl_new,hermon_mr_options_t * op)1075 hermon_mr_reregister(hermon_state_t *state, hermon_mrhdl_t mr,
1076     hermon_pdhdl_t pd, ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new,
1077     hermon_mr_options_t *op)
1078 {
1079 	hermon_bind_info_t	bind;
1080 	int			status;
1081 
1082 	/*
1083 	 * Fill in the "bind" struct.  This struct provides the majority
1084 	 * of the information that will be used to distinguish between an
1085 	 * "addr" binding (as is the case here) and a "buf" binding (see
1086 	 * below).  The "bind" struct is later passed to hermon_mr_mem_bind()
1087 	 * which does most of the "heavy lifting" for the Hermon memory
1088 	 * registration (and reregistration) routines.
1089 	 */
1090 	bind.bi_type  = HERMON_BINDHDL_VADDR;
1091 	bind.bi_addr  = mr_attr->mr_vaddr;
1092 	bind.bi_len   = mr_attr->mr_len;
1093 	bind.bi_as    = mr_attr->mr_as;
1094 	bind.bi_flags = mr_attr->mr_flags;
1095 	status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
1096 	return (status);
1097 }
1098 
1099 
1100 /*
1101  * hermon_mr_reregister_buf()
1102  *    Context: Can be called from interrupt or base context.
1103  */
1104 int
hermon_mr_reregister_buf(hermon_state_t * state,hermon_mrhdl_t mr,hermon_pdhdl_t pd,ibt_smr_attr_t * mr_attr,struct buf * buf,hermon_mrhdl_t * mrhdl_new,hermon_mr_options_t * op)1105 hermon_mr_reregister_buf(hermon_state_t *state, hermon_mrhdl_t mr,
1106     hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf,
1107     hermon_mrhdl_t *mrhdl_new, hermon_mr_options_t *op)
1108 {
1109 	hermon_bind_info_t	bind;
1110 	int			status;
1111 
1112 	/*
1113 	 * Fill in the "bind" struct.  This struct provides the majority
1114 	 * of the information that will be used to distinguish between an
1115 	 * "addr" binding (see above) and a "buf" binding (as is the case
1116 	 * here).  The "bind" struct is later passed to hermon_mr_mem_bind()
1117 	 * which does most of the "heavy lifting" for the Hermon memory
1118 	 * registration routines.  Note: We have chosen to provide
1119 	 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
1120 	 * not set).  It is not critical what value we choose here as it need
1121 	 * only be unique for the given RKey (which will happen by default),
1122 	 * so the choice here is somewhat arbitrary.
1123 	 */
1124 	bind.bi_type  = HERMON_BINDHDL_BUF;
1125 	bind.bi_buf   = buf;
1126 	if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
1127 		bind.bi_addr  = mr_attr->mr_vaddr;
1128 	} else {
1129 		bind.bi_addr  = (uint64_t)(uintptr_t)buf->b_un.b_addr;
1130 	}
1131 	bind.bi_len   = (uint64_t)buf->b_bcount;
1132 	bind.bi_flags = mr_attr->mr_flags;
1133 	bind.bi_as    = NULL;
1134 	status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
1135 	return (status);
1136 }
1137 
1138 
1139 /*
1140  * hermon_mr_sync()
1141  *    Context: Can be called from interrupt or base context.
1142  */
1143 /* ARGSUSED */
1144 int
hermon_mr_sync(hermon_state_t * state,ibt_mr_sync_t * mr_segs,size_t num_segs)1145 hermon_mr_sync(hermon_state_t *state, ibt_mr_sync_t *mr_segs, size_t num_segs)
1146 {
1147 	hermon_mrhdl_t		mrhdl;
1148 	uint64_t		seg_vaddr, seg_len, seg_end;
1149 	uint64_t		mr_start, mr_end;
1150 	uint_t			type;
1151 	int			status, i;
1152 
1153 	/* Process each of the ibt_mr_sync_t's */
1154 	for (i = 0; i < num_segs; i++) {
1155 		mrhdl = (hermon_mrhdl_t)mr_segs[i].ms_handle;
1156 
1157 		/* Check for valid memory region handle */
1158 		if (mrhdl == NULL) {
1159 			status = IBT_MR_HDL_INVALID;
1160 			goto mrsync_fail;
1161 		}
1162 
1163 		mutex_enter(&mrhdl->mr_lock);
1164 
1165 		/*
1166 		 * Check here to see if the memory region has already been
1167 		 * partially deregistered as a result of a
1168 		 * hermon_umap_umemlock_cb() callback.  If so, this is an
1169 		 * error, return failure.
1170 		 */
1171 		if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
1172 			mutex_exit(&mrhdl->mr_lock);
1173 			status = IBT_MR_HDL_INVALID;
1174 			goto mrsync_fail;
1175 		}
1176 
1177 		/* Check for valid bounds on sync request */
1178 		seg_vaddr = mr_segs[i].ms_vaddr;
1179 		seg_len	  = mr_segs[i].ms_len;
1180 		seg_end	  = seg_vaddr + seg_len - 1;
1181 		mr_start  = mrhdl->mr_bindinfo.bi_addr;
1182 		mr_end	  = mr_start + mrhdl->mr_bindinfo.bi_len - 1;
1183 		if ((seg_vaddr < mr_start) || (seg_vaddr > mr_end)) {
1184 			mutex_exit(&mrhdl->mr_lock);
1185 			status = IBT_MR_VA_INVALID;
1186 			goto mrsync_fail;
1187 		}
1188 		if ((seg_end < mr_start) || (seg_end > mr_end)) {
1189 			mutex_exit(&mrhdl->mr_lock);
1190 			status = IBT_MR_LEN_INVALID;
1191 			goto mrsync_fail;
1192 		}
1193 
1194 		/* Determine what type (i.e. direction) for sync */
1195 		if (mr_segs[i].ms_flags & IBT_SYNC_READ) {
1196 			type = DDI_DMA_SYNC_FORDEV;
1197 		} else if (mr_segs[i].ms_flags & IBT_SYNC_WRITE) {
1198 			type = DDI_DMA_SYNC_FORCPU;
1199 		} else {
1200 			mutex_exit(&mrhdl->mr_lock);
1201 			status = IBT_INVALID_PARAM;
1202 			goto mrsync_fail;
1203 		}
1204 
1205 		(void) ddi_dma_sync(mrhdl->mr_bindinfo.bi_dmahdl,
1206 		    (off_t)(seg_vaddr - mr_start), (size_t)seg_len, type);
1207 
1208 		mutex_exit(&mrhdl->mr_lock);
1209 	}
1210 
1211 	return (DDI_SUCCESS);
1212 
1213 mrsync_fail:
1214 	return (status);
1215 }
1216 
1217 
1218 /*
1219  * hermon_mw_alloc()
1220  *    Context: Can be called from interrupt or base context.
1221  */
1222 int
hermon_mw_alloc(hermon_state_t * state,hermon_pdhdl_t pd,ibt_mw_flags_t flags,hermon_mwhdl_t * mwhdl)1223 hermon_mw_alloc(hermon_state_t *state, hermon_pdhdl_t pd, ibt_mw_flags_t flags,
1224     hermon_mwhdl_t *mwhdl)
1225 {
1226 	hermon_rsrc_t		*mpt, *rsrc;
1227 	hermon_hw_dmpt_t		mpt_entry;
1228 	hermon_mwhdl_t		mw;
1229 	uint_t			sleep;
1230 	int			status;
1231 
1232 	if (state != NULL)	/* XXX - bogus test that is always TRUE */
1233 		return (IBT_INSUFF_RESOURCE);
1234 
1235 	/*
1236 	 * Check the sleep flag.  Ensure that it is consistent with the
1237 	 * current thread context (i.e. if we are currently in the interrupt
1238 	 * context, then we shouldn't be attempting to sleep).
1239 	 */
1240 	sleep = (flags & IBT_MW_NOSLEEP) ? HERMON_NOSLEEP : HERMON_SLEEP;
1241 	if ((sleep == HERMON_SLEEP) &&
1242 	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1243 		status = IBT_INVALID_PARAM;
1244 		goto mwalloc_fail;
1245 	}
1246 
1247 	/* Increment the reference count on the protection domain (PD) */
1248 	hermon_pd_refcnt_inc(pd);
1249 
1250 	/*
1251 	 * Allocate an MPT entry (for use as a memory window).  Since the
1252 	 * Hermon hardware uses the MPT entry for memory regions and for
1253 	 * memory windows, we will fill in this MPT with all the necessary
1254 	 * parameters for the memory window.  And then (just as we do for
1255 	 * memory regions) ownership will be passed to the hardware in the
1256 	 * final step below.  If we fail here, we must undo the protection
1257 	 * domain reference count.
1258 	 */
1259 	status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
1260 	if (status != DDI_SUCCESS) {
1261 		status = IBT_INSUFF_RESOURCE;
1262 		goto mwalloc_fail1;
1263 	}
1264 
1265 	/*
1266 	 * Allocate the software structure for tracking the memory window (i.e.
1267 	 * the Hermon Memory Window handle).  Note: This is actually the same
1268 	 * software structure used for tracking memory regions, but since many
1269 	 * of the same properties are needed, only a single structure is
1270 	 * necessary.  If we fail here, we must undo the protection domain
1271 	 * reference count and the previous resource allocation.
1272 	 */
1273 	status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
1274 	if (status != DDI_SUCCESS) {
1275 		status = IBT_INSUFF_RESOURCE;
1276 		goto mwalloc_fail2;
1277 	}
1278 	mw = (hermon_mwhdl_t)rsrc->hr_addr;
1279 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
1280 
1281 	/*
1282 	 * Calculate an "unbound" RKey from MPT index.  In much the same way
1283 	 * as we do for memory regions (above), this key is constructed from
1284 	 * a "constrained" (which depends on the MPT index) and an
1285 	 * "unconstrained" portion (which may be arbitrarily chosen).
1286 	 */
1287 	mw->mr_rkey = hermon_mr_keycalc(mpt->hr_indx);
1288 
1289 	/*
1290 	 * Fill in the MPT entry.  This is the final step before passing
1291 	 * ownership of the MPT entry to the Hermon hardware.  We use all of
1292 	 * the information collected/calculated above to fill in the
1293 	 * requisite portions of the MPT.  Note: fewer entries in the MPT
1294 	 * entry are necessary to allocate a memory window.
1295 	 */
1296 	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
1297 	mpt_entry.reg_win	= HERMON_MPT_IS_WINDOW;
1298 	mpt_entry.mem_key	= mw->mr_rkey;
1299 	mpt_entry.pd		= pd->pd_pdnum;
1300 	mpt_entry.lr		= 1;
1301 
1302 	/*
1303 	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
1304 	 * the entry to the hardware.  Note: in general, this operation
1305 	 * shouldn't fail.  But if it does, we have to undo everything we've
1306 	 * done above before returning error.
1307 	 */
1308 	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1309 	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
1310 	if (status != HERMON_CMD_SUCCESS) {
1311 		cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
1312 		    status);
1313 		if (status == HERMON_CMD_INVALID_STATUS) {
1314 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1315 		}
1316 		status = ibc_get_ci_failure(0);
1317 		goto mwalloc_fail3;
1318 	}
1319 
1320 	/*
1321 	 * Fill in the rest of the Hermon Memory Window handle.  Having
1322 	 * successfully transferred ownership of the MPT, we can update the
1323 	 * following fields for use in further operations on the MW.
1324 	 */
1325 	mw->mr_mptrsrcp	= mpt;
1326 	mw->mr_pdhdl	= pd;
1327 	mw->mr_rsrcp	= rsrc;
1328 	mw->mr_rkey	= hermon_mr_key_swap(mw->mr_rkey);
1329 	*mwhdl = mw;
1330 
1331 	return (DDI_SUCCESS);
1332 
1333 mwalloc_fail3:
1334 	hermon_rsrc_free(state, &rsrc);
1335 mwalloc_fail2:
1336 	hermon_rsrc_free(state, &mpt);
1337 mwalloc_fail1:
1338 	hermon_pd_refcnt_dec(pd);
1339 mwalloc_fail:
1340 	return (status);
1341 }
1342 
1343 
1344 /*
1345  * hermon_mw_free()
1346  *    Context: Can be called from interrupt or base context.
1347  */
1348 int
hermon_mw_free(hermon_state_t * state,hermon_mwhdl_t * mwhdl,uint_t sleep)1349 hermon_mw_free(hermon_state_t *state, hermon_mwhdl_t *mwhdl, uint_t sleep)
1350 {
1351 	hermon_rsrc_t		*mpt, *rsrc;
1352 	hermon_mwhdl_t		mw;
1353 	int			status;
1354 	hermon_pdhdl_t		pd;
1355 
1356 	/*
1357 	 * Check the sleep flag.  Ensure that it is consistent with the
1358 	 * current thread context (i.e. if we are currently in the interrupt
1359 	 * context, then we shouldn't be attempting to sleep).
1360 	 */
1361 	if ((sleep == HERMON_SLEEP) &&
1362 	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1363 		status = IBT_INVALID_PARAM;
1364 		return (status);
1365 	}
1366 
1367 	/*
1368 	 * Pull all the necessary information from the Hermon Memory Window
1369 	 * handle.  This is necessary here because the resource for the
1370 	 * MW handle is going to be freed up as part of the this operation.
1371 	 */
1372 	mw	= *mwhdl;
1373 	mutex_enter(&mw->mr_lock);
1374 	mpt	= mw->mr_mptrsrcp;
1375 	rsrc	= mw->mr_rsrcp;
1376 	pd	= mw->mr_pdhdl;
1377 	mutex_exit(&mw->mr_lock);
1378 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
1379 
1380 	/*
1381 	 * Reclaim the MPT entry from hardware.  Note: in general, it is
1382 	 * unexpected for this operation to return an error.
1383 	 */
1384 	status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL,
1385 	    0, mpt->hr_indx, sleep);
1386 	if (status != HERMON_CMD_SUCCESS) {
1387 		cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: %08x\n",
1388 		    status);
1389 		if (status == HERMON_CMD_INVALID_STATUS) {
1390 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1391 		}
1392 		return (ibc_get_ci_failure(0));
1393 	}
1394 
1395 	/* Free the Hermon Memory Window handle */
1396 	hermon_rsrc_free(state, &rsrc);
1397 
1398 	/* Free up the MPT entry resource */
1399 	hermon_rsrc_free(state, &mpt);
1400 
1401 	/* Decrement the reference count on the protection domain (PD) */
1402 	hermon_pd_refcnt_dec(pd);
1403 
1404 	/* Set the mwhdl pointer to NULL and return success */
1405 	*mwhdl = NULL;
1406 
1407 	return (DDI_SUCCESS);
1408 }
1409 
1410 
1411 /*
1412  * hermon_mr_keycalc()
1413  *    Context: Can be called from interrupt or base context.
1414  *    NOTE:  Produces a key in the form of
1415  *		KKKKKKKK IIIIIIII IIIIIIII IIIIIIIII
1416  *    where K == the arbitrary bits and I == the index
1417  */
1418 uint32_t
hermon_mr_keycalc(uint32_t indx)1419 hermon_mr_keycalc(uint32_t indx)
1420 {
1421 	uint32_t tmp_key, tmp_indx;
1422 
1423 	/*
1424 	 * Generate a simple key from counter.  Note:  We increment this
1425 	 * static variable _intentionally_ without any kind of mutex around
1426 	 * it.  First, single-threading all operations through a single lock
1427 	 * would be a bad idea (from a performance point-of-view).  Second,
1428 	 * the upper "unconstrained" bits don't really have to be unique
1429 	 * because the lower bits are guaranteed to be (although we do make a
1430 	 * best effort to ensure that they are).  Third, the window for the
1431 	 * race (where both threads read and update the counter at the same
1432 	 * time) is incredibly small.
1433 	 * And, lastly, we'd like to make this into a "random" key
1434 	 */
1435 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(hermon_memkey_cnt))
1436 	tmp_key = (hermon_memkey_cnt++) << HERMON_MEMKEY_SHIFT;
1437 	tmp_indx = indx & 0xffffff;
1438 	return (tmp_key | tmp_indx);
1439 }
1440 
1441 
1442 /*
1443  * hermon_mr_key_swap()
1444  *    Context: Can be called from interrupt or base context.
1445  *    NOTE:  Produces a key in the form of
1446  *		IIIIIIII IIIIIIII IIIIIIIII KKKKKKKK
1447  *    where K == the arbitrary bits and I == the index
1448  */
1449 uint32_t
hermon_mr_key_swap(uint32_t indx)1450 hermon_mr_key_swap(uint32_t indx)
1451 {
1452 	/*
1453 	 * The memory key format to pass down to the hardware is
1454 	 * (key[7:0],index[23:0]), which defines the index to the
1455 	 * hardware resource. When the driver passes this as a memory
1456 	 * key, (i.e. to retrieve a resource) the format is
1457 	 * (index[23:0],key[7:0]).
1458 	 */
1459 	return (((indx >> 24) & 0x000000ff) | ((indx << 8) & 0xffffff00));
1460 }
1461 
1462 /*
1463  * hermon_mr_common_reg()
1464  *    Context: Can be called from interrupt or base context.
1465  */
1466 static int
hermon_mr_common_reg(hermon_state_t * state,hermon_pdhdl_t pd,hermon_bind_info_t * bind,hermon_mrhdl_t * mrhdl,hermon_mr_options_t * op,hermon_mpt_rsrc_type_t mpt_type)1467 hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd,
1468     hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
1469     hermon_mpt_rsrc_type_t mpt_type)
1470 {
1471 	hermon_rsrc_t		*mpt, *mtt, *rsrc, *mtt_refcnt;
1472 	hermon_umap_db_entry_t	*umapdb;
1473 	hermon_sw_refcnt_t	*swrc_tmp;
1474 	hermon_hw_dmpt_t	mpt_entry;
1475 	hermon_mrhdl_t		mr;
1476 	ibt_mr_flags_t		flags;
1477 	hermon_bind_info_t	*bh;
1478 	ddi_dma_handle_t	bind_dmahdl;
1479 	ddi_umem_cookie_t	umem_cookie;
1480 	size_t			umem_len;
1481 	caddr_t			umem_addr;
1482 	uint64_t		mtt_addr, max_sz;
1483 	uint_t			sleep, mtt_pgsize_bits, bind_type, mr_is_umem;
1484 	int			status, umem_flags, bind_override_addr;
1485 
1486 	/*
1487 	 * Check the "options" flag.  Currently this flag tells the driver
1488 	 * whether or not the region should be bound normally (i.e. with
1489 	 * entries written into the PCI IOMMU), whether it should be
1490 	 * registered to bypass the IOMMU, and whether or not the resulting
1491 	 * address should be "zero-based" (to aid the alignment restrictions
1492 	 * for QPs).
1493 	 */
1494 	if (op == NULL) {
1495 		bind_type   = HERMON_BINDMEM_NORMAL;
1496 		bind_dmahdl = NULL;
1497 		bind_override_addr = 0;
1498 	} else {
1499 		bind_type	   = op->mro_bind_type;
1500 		bind_dmahdl	   = op->mro_bind_dmahdl;
1501 		bind_override_addr = op->mro_bind_override_addr;
1502 	}
1503 
1504 	/* check what kind of mpt to use */
1505 
1506 	/* Extract the flags field from the hermon_bind_info_t */
1507 	flags = bind->bi_flags;
1508 
1509 	/*
1510 	 * Check for invalid length.  Check is the length is zero or if the
1511 	 * length is larger than the maximum configured value.  Return error
1512 	 * if it is.
1513 	 */
1514 	max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz);
1515 	if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
1516 		status = IBT_MR_LEN_INVALID;
1517 		goto mrcommon_fail;
1518 	}
1519 
1520 	/*
1521 	 * Check the sleep flag.  Ensure that it is consistent with the
1522 	 * current thread context (i.e. if we are currently in the interrupt
1523 	 * context, then we shouldn't be attempting to sleep).
1524 	 */
1525 	sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
1526 	if ((sleep == HERMON_SLEEP) &&
1527 	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1528 		status = IBT_INVALID_PARAM;
1529 		goto mrcommon_fail;
1530 	}
1531 
1532 	/* Increment the reference count on the protection domain (PD) */
1533 	hermon_pd_refcnt_inc(pd);
1534 
1535 	/*
1536 	 * Allocate an MPT entry.  This will be filled in with all the
1537 	 * necessary parameters to define the memory region.  And then
1538 	 * ownership will be passed to the hardware in the final step
1539 	 * below.  If we fail here, we must undo the protection domain
1540 	 * reference count.
1541 	 */
1542 	if (mpt_type == HERMON_MPT_DMPT) {
1543 		status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
1544 		if (status != DDI_SUCCESS) {
1545 			status = IBT_INSUFF_RESOURCE;
1546 			goto mrcommon_fail1;
1547 		}
1548 	} else {
1549 		mpt = NULL;
1550 	}
1551 
1552 	/*
1553 	 * Allocate the software structure for tracking the memory region (i.e.
1554 	 * the Hermon Memory Region handle).  If we fail here, we must undo
1555 	 * the protection domain reference count and the previous resource
1556 	 * allocation.
1557 	 */
1558 	status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
1559 	if (status != DDI_SUCCESS) {
1560 		status = IBT_INSUFF_RESOURCE;
1561 		goto mrcommon_fail2;
1562 	}
1563 	mr = (hermon_mrhdl_t)rsrc->hr_addr;
1564 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
1565 
1566 	/*
1567 	 * Setup and validate the memory region access flags.  This means
1568 	 * translating the IBTF's enable flags into the access flags that
1569 	 * will be used in later operations.
1570 	 */
1571 	mr->mr_accflag = 0;
1572 	if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1573 		mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1574 	if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1575 		mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1576 	if (flags & IBT_MR_ENABLE_REMOTE_READ)
1577 		mr->mr_accflag |= IBT_MR_REMOTE_READ;
1578 	if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1579 		mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1580 	if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1581 		mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1582 
1583 	/*
1584 	 * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
1585 	 * from a certain number of "constrained" bits (the least significant
1586 	 * bits) and some number of "unconstrained" bits.  The constrained
1587 	 * bits must be set to the index of the entry in the MPT table, but
1588 	 * the unconstrained bits can be set to any value we wish.  Note:
1589 	 * if no remote access is required, then the RKey value is not filled
1590 	 * in.  Otherwise both Rkey and LKey are given the same value.
1591 	 */
1592 	if (mpt)
1593 		mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
1594 
1595 	/*
1596 	 * Determine if the memory is from userland and pin the pages
1597 	 * with umem_lockmemory() if necessary.
1598 	 * Then, if this is userland memory, allocate an entry in the
1599 	 * "userland resources database".  This will later be added to
1600 	 * the database (after all further memory registration operations are
1601 	 * successful).  If we fail here, we must undo the reference counts
1602 	 * and the previous resource allocations.
1603 	 */
1604 	mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0);
1605 	if (mr_is_umem) {
1606 		umem_len   = ptob(btopr(bind->bi_len +
1607 		    ((uintptr_t)bind->bi_addr & PAGEOFFSET)));
1608 		umem_addr  = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET);
1609 		umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
1610 		    DDI_UMEMLOCK_LONGTERM);
1611 		status = umem_lockmemory(umem_addr, umem_len, umem_flags,
1612 		    &umem_cookie, &hermon_umem_cbops, NULL);
1613 		if (status != 0) {
1614 			status = IBT_INSUFF_RESOURCE;
1615 			goto mrcommon_fail3;
1616 		}
1617 
1618 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1619 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1620 
1621 		bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len,
1622 		    B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
1623 		if (bind->bi_buf == NULL) {
1624 			status = IBT_INSUFF_RESOURCE;
1625 			goto mrcommon_fail3;
1626 		}
1627 		bind->bi_type = HERMON_BINDHDL_UBUF;
1628 		bind->bi_buf->b_flags |= B_READ;
1629 
1630 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1631 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1632 
1633 		umapdb = hermon_umap_db_alloc(state->hs_instance,
1634 		    (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
1635 		    (uint64_t)(uintptr_t)rsrc);
1636 		if (umapdb == NULL) {
1637 			status = IBT_INSUFF_RESOURCE;
1638 			goto mrcommon_fail4;
1639 		}
1640 	}
1641 
1642 	/*
1643 	 * Setup the bindinfo for the mtt bind call
1644 	 */
1645 	bh = &mr->mr_bindinfo;
1646 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bh))
1647 	bcopy(bind, bh, sizeof (hermon_bind_info_t));
1648 	bh->bi_bypass = bind_type;
1649 	status = hermon_mr_mtt_bind(state, bh, bind_dmahdl, &mtt,
1650 	    &mtt_pgsize_bits, mpt != NULL);
1651 	if (status != DDI_SUCCESS) {
1652 		/*
1653 		 * When mtt_bind fails, freerbuf has already been done,
1654 		 * so make sure not to call it again.
1655 		 */
1656 		bind->bi_type = bh->bi_type;
1657 		goto mrcommon_fail5;
1658 	}
1659 	mr->mr_logmttpgsz = mtt_pgsize_bits;
1660 
1661 	/*
1662 	 * Allocate MTT reference count (to track shared memory regions).
1663 	 * This reference count resource may never be used on the given
1664 	 * memory region, but if it is ever later registered as "shared"
1665 	 * memory region then this resource will be necessary.  If we fail
1666 	 * here, we do pretty much the same as above to clean up.
1667 	 */
1668 	status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep,
1669 	    &mtt_refcnt);
1670 	if (status != DDI_SUCCESS) {
1671 		status = IBT_INSUFF_RESOURCE;
1672 		goto mrcommon_fail6;
1673 	}
1674 	mr->mr_mttrefcntp = mtt_refcnt;
1675 	swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
1676 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp))
1677 	HERMON_MTT_REFCNT_INIT(swrc_tmp);
1678 
1679 	mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
1680 
1681 	/*
1682 	 * Fill in the MPT entry.  This is the final step before passing
1683 	 * ownership of the MPT entry to the Hermon hardware.  We use all of
1684 	 * the information collected/calculated above to fill in the
1685 	 * requisite portions of the MPT.  Do this ONLY for DMPTs.
1686 	 */
1687 	if (mpt == NULL)
1688 		goto no_passown;
1689 
1690 	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
1691 
1692 	mpt_entry.status  = HERMON_MPT_SW_OWNERSHIP;
1693 	mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
1694 	mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1695 	mpt_entry.rw	  = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
1696 	mpt_entry.rr	  = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
1697 	mpt_entry.lw	  = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
1698 	mpt_entry.lr	  = 1;
1699 	mpt_entry.phys_addr = 0;
1700 	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
1701 
1702 	mpt_entry.entity_sz	= mr->mr_logmttpgsz;
1703 	mpt_entry.mem_key	= mr->mr_lkey;
1704 	mpt_entry.pd		= pd->pd_pdnum;
1705 	mpt_entry.rem_acc_en = 0;
1706 	mpt_entry.fast_reg_en = 0;
1707 	mpt_entry.en_inval = 0;
1708 	mpt_entry.lkey = 0;
1709 	mpt_entry.win_cnt = 0;
1710 
1711 	if (bind_override_addr == 0) {
1712 		mpt_entry.start_addr = bh->bi_addr;
1713 	} else {
1714 		bh->bi_addr = bh->bi_addr & ((1 << mr->mr_logmttpgsz) - 1);
1715 		mpt_entry.start_addr = bh->bi_addr;
1716 	}
1717 	mpt_entry.reg_win_len	= bh->bi_len;
1718 
1719 	mpt_entry.mtt_addr_h = mtt_addr >> 32;  /* only 8 more bits */
1720 	mpt_entry.mtt_addr_l = mtt_addr >> 3;	/* only 29 bits */
1721 
1722 	/*
1723 	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
1724 	 * the entry to the hardware if needed.  Note: in general, this
1725 	 * operation shouldn't fail.  But if it does, we have to undo
1726 	 * everything we've done above before returning error.
1727 	 *
1728 	 * For Hermon, this routine (which is common to the contexts) will only
1729 	 * set the ownership if needed - the process of passing the context
1730 	 * itself to HW will take care of setting up the MPT (based on type
1731 	 * and index).
1732 	 */
1733 
1734 	mpt_entry.bnd_qp = 0;	/* dMPT for a qp, check for window */
1735 	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1736 	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
1737 	if (status != HERMON_CMD_SUCCESS) {
1738 		cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
1739 		    status);
1740 		if (status == HERMON_CMD_INVALID_STATUS) {
1741 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1742 		}
1743 		status = ibc_get_ci_failure(0);
1744 		goto mrcommon_fail7;
1745 	}
1746 	if (hermon_rdma_debug & 0x4)
1747 		IBTF_DPRINTF_L2("mr", "  reg: mr %p  key %x",
1748 		    mr, hermon_mr_key_swap(mr->mr_rkey));
1749 no_passown:
1750 
1751 	/*
1752 	 * Fill in the rest of the Hermon Memory Region handle.  Having
1753 	 * successfully transferred ownership of the MPT, we can update the
1754 	 * following fields for use in further operations on the MR.
1755 	 */
1756 	mr->mr_mttaddr	   = mtt_addr;
1757 
1758 	mr->mr_log2_pgsz   = (mr->mr_logmttpgsz - HERMON_PAGESHIFT);
1759 	mr->mr_mptrsrcp	   = mpt;
1760 	mr->mr_mttrsrcp	   = mtt;
1761 	mr->mr_pdhdl	   = pd;
1762 	mr->mr_rsrcp	   = rsrc;
1763 	mr->mr_is_umem	   = mr_is_umem;
1764 	mr->mr_is_fmr	   = 0;
1765 	mr->mr_umemcookie  = (mr_is_umem != 0) ? umem_cookie : NULL;
1766 	mr->mr_umem_cbfunc = NULL;
1767 	mr->mr_umem_cbarg1 = NULL;
1768 	mr->mr_umem_cbarg2 = NULL;
1769 	mr->mr_lkey	   = hermon_mr_key_swap(mr->mr_lkey);
1770 	mr->mr_rkey	   = hermon_mr_key_swap(mr->mr_rkey);
1771 	mr->mr_mpt_type	   = mpt_type;
1772 
1773 	/*
1774 	 * If this is userland memory, then we need to insert the previously
1775 	 * allocated entry into the "userland resources database".  This will
1776 	 * allow for later coordination between the hermon_umap_umemlock_cb()
1777 	 * callback and hermon_mr_deregister().
1778 	 */
1779 	if (mr_is_umem) {
1780 		hermon_umap_db_add(umapdb);
1781 	}
1782 
1783 	*mrhdl = mr;
1784 
1785 	return (DDI_SUCCESS);
1786 
1787 /*
1788  * The following is cleanup for all possible failure cases in this routine
1789  */
1790 mrcommon_fail7:
1791 	hermon_rsrc_free(state, &mtt_refcnt);
1792 mrcommon_fail6:
1793 	hermon_mr_mem_unbind(state, bh);
1794 	bind->bi_type = bh->bi_type;
1795 mrcommon_fail5:
1796 	if (mr_is_umem) {
1797 		hermon_umap_db_free(umapdb);
1798 	}
1799 mrcommon_fail4:
1800 	if (mr_is_umem) {
1801 		/*
1802 		 * Free up the memory ddi_umem_iosetup() allocates
1803 		 * internally.
1804 		 */
1805 		if (bind->bi_type == HERMON_BINDHDL_UBUF) {
1806 			freerbuf(bind->bi_buf);
1807 			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1808 			bind->bi_type = HERMON_BINDHDL_NONE;
1809 			_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1810 		}
1811 		ddi_umem_unlock(umem_cookie);
1812 	}
1813 mrcommon_fail3:
1814 	hermon_rsrc_free(state, &rsrc);
1815 mrcommon_fail2:
1816 	if (mpt != NULL)
1817 		hermon_rsrc_free(state, &mpt);
1818 mrcommon_fail1:
1819 	hermon_pd_refcnt_dec(pd);
1820 mrcommon_fail:
1821 	return (status);
1822 }
1823 
1824 /*
1825  * hermon_dma_mr_register()
1826  *    Context: Can be called from base context.
1827  */
1828 int
hermon_dma_mr_register(hermon_state_t * state,hermon_pdhdl_t pd,ibt_dmr_attr_t * mr_attr,hermon_mrhdl_t * mrhdl)1829 hermon_dma_mr_register(hermon_state_t *state, hermon_pdhdl_t pd,
1830     ibt_dmr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl)
1831 {
1832 	hermon_rsrc_t		*mpt, *rsrc;
1833 	hermon_hw_dmpt_t	mpt_entry;
1834 	hermon_mrhdl_t		mr;
1835 	ibt_mr_flags_t		flags;
1836 	uint_t			sleep;
1837 	int			status;
1838 
1839 	/* Extract the flags field */
1840 	flags = mr_attr->dmr_flags;
1841 
1842 	/*
1843 	 * Check the sleep flag.  Ensure that it is consistent with the
1844 	 * current thread context (i.e. if we are currently in the interrupt
1845 	 * context, then we shouldn't be attempting to sleep).
1846 	 */
1847 	sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
1848 	if ((sleep == HERMON_SLEEP) &&
1849 	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1850 		status = IBT_INVALID_PARAM;
1851 		goto mrcommon_fail;
1852 	}
1853 
1854 	/* Increment the reference count on the protection domain (PD) */
1855 	hermon_pd_refcnt_inc(pd);
1856 
1857 	/*
1858 	 * Allocate an MPT entry.  This will be filled in with all the
1859 	 * necessary parameters to define the memory region.  And then
1860 	 * ownership will be passed to the hardware in the final step
1861 	 * below.  If we fail here, we must undo the protection domain
1862 	 * reference count.
1863 	 */
1864 	status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
1865 	if (status != DDI_SUCCESS) {
1866 		status = IBT_INSUFF_RESOURCE;
1867 		goto mrcommon_fail1;
1868 	}
1869 
1870 	/*
1871 	 * Allocate the software structure for tracking the memory region (i.e.
1872 	 * the Hermon Memory Region handle).  If we fail here, we must undo
1873 	 * the protection domain reference count and the previous resource
1874 	 * allocation.
1875 	 */
1876 	status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
1877 	if (status != DDI_SUCCESS) {
1878 		status = IBT_INSUFF_RESOURCE;
1879 		goto mrcommon_fail2;
1880 	}
1881 	mr = (hermon_mrhdl_t)rsrc->hr_addr;
1882 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
1883 	bzero(mr, sizeof (*mr));
1884 
1885 	/*
1886 	 * Setup and validate the memory region access flags.  This means
1887 	 * translating the IBTF's enable flags into the access flags that
1888 	 * will be used in later operations.
1889 	 */
1890 	mr->mr_accflag = 0;
1891 	if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1892 		mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1893 	if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1894 		mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1895 	if (flags & IBT_MR_ENABLE_REMOTE_READ)
1896 		mr->mr_accflag |= IBT_MR_REMOTE_READ;
1897 	if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1898 		mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1899 	if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1900 		mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1901 
1902 	/*
1903 	 * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
1904 	 * from a certain number of "constrained" bits (the least significant
1905 	 * bits) and some number of "unconstrained" bits.  The constrained
1906 	 * bits must be set to the index of the entry in the MPT table, but
1907 	 * the unconstrained bits can be set to any value we wish.  Note:
1908 	 * if no remote access is required, then the RKey value is not filled
1909 	 * in.  Otherwise both Rkey and LKey are given the same value.
1910 	 */
1911 	if (mpt)
1912 		mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
1913 
1914 	/*
1915 	 * Fill in the MPT entry.  This is the final step before passing
1916 	 * ownership of the MPT entry to the Hermon hardware.  We use all of
1917 	 * the information collected/calculated above to fill in the
1918 	 * requisite portions of the MPT.  Do this ONLY for DMPTs.
1919 	 */
1920 	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
1921 
1922 	mpt_entry.status  = HERMON_MPT_SW_OWNERSHIP;
1923 	mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
1924 	mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1925 	mpt_entry.rw	  = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
1926 	mpt_entry.rr	  = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
1927 	mpt_entry.lw	  = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
1928 	mpt_entry.lr	  = 1;
1929 	mpt_entry.phys_addr = 1;	/* critical bit for this */
1930 	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
1931 
1932 	mpt_entry.entity_sz	= mr->mr_logmttpgsz;
1933 	mpt_entry.mem_key	= mr->mr_lkey;
1934 	mpt_entry.pd		= pd->pd_pdnum;
1935 	mpt_entry.rem_acc_en = 0;
1936 	mpt_entry.fast_reg_en = 0;
1937 	mpt_entry.en_inval = 0;
1938 	mpt_entry.lkey = 0;
1939 	mpt_entry.win_cnt = 0;
1940 
1941 	mpt_entry.start_addr = mr_attr->dmr_paddr;
1942 	mpt_entry.reg_win_len = mr_attr->dmr_len;
1943 	if (mr_attr->dmr_len == 0)
1944 		mpt_entry.len_b64 = 1;	/* needed for 2^^64 length */
1945 
1946 	mpt_entry.mtt_addr_h = 0;
1947 	mpt_entry.mtt_addr_l = 0;
1948 
1949 	/*
1950 	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
1951 	 * the entry to the hardware if needed.  Note: in general, this
1952 	 * operation shouldn't fail.  But if it does, we have to undo
1953 	 * everything we've done above before returning error.
1954 	 *
1955 	 * For Hermon, this routine (which is common to the contexts) will only
1956 	 * set the ownership if needed - the process of passing the context
1957 	 * itself to HW will take care of setting up the MPT (based on type
1958 	 * and index).
1959 	 */
1960 
1961 	mpt_entry.bnd_qp = 0;	/* dMPT for a qp, check for window */
1962 	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1963 	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
1964 	if (status != HERMON_CMD_SUCCESS) {
1965 		cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
1966 		    status);
1967 		if (status == HERMON_CMD_INVALID_STATUS) {
1968 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1969 		}
1970 		status = ibc_get_ci_failure(0);
1971 		goto mrcommon_fail7;
1972 	}
1973 
1974 	/*
1975 	 * Fill in the rest of the Hermon Memory Region handle.  Having
1976 	 * successfully transferred ownership of the MPT, we can update the
1977 	 * following fields for use in further operations on the MR.
1978 	 */
1979 	mr->mr_mttaddr	   = 0;
1980 
1981 	mr->mr_log2_pgsz   = 0;
1982 	mr->mr_mptrsrcp	   = mpt;
1983 	mr->mr_mttrsrcp	   = NULL;
1984 	mr->mr_pdhdl	   = pd;
1985 	mr->mr_rsrcp	   = rsrc;
1986 	mr->mr_is_umem	   = 0;
1987 	mr->mr_is_fmr	   = 0;
1988 	mr->mr_umemcookie  = NULL;
1989 	mr->mr_umem_cbfunc = NULL;
1990 	mr->mr_umem_cbarg1 = NULL;
1991 	mr->mr_umem_cbarg2 = NULL;
1992 	mr->mr_lkey	   = hermon_mr_key_swap(mr->mr_lkey);
1993 	mr->mr_rkey	   = hermon_mr_key_swap(mr->mr_rkey);
1994 	mr->mr_mpt_type	   = HERMON_MPT_DMPT;
1995 
1996 	*mrhdl = mr;
1997 
1998 	return (DDI_SUCCESS);
1999 
2000 /*
2001  * The following is cleanup for all possible failure cases in this routine
2002  */
2003 mrcommon_fail7:
2004 	hermon_rsrc_free(state, &rsrc);
2005 mrcommon_fail2:
2006 	hermon_rsrc_free(state, &mpt);
2007 mrcommon_fail1:
2008 	hermon_pd_refcnt_dec(pd);
2009 mrcommon_fail:
2010 	return (status);
2011 }
2012 
2013 /*
2014  * hermon_mr_alloc_lkey()
2015  *    Context: Can be called from base context.
2016  */
2017 int
hermon_mr_alloc_lkey(hermon_state_t * state,hermon_pdhdl_t pd,ibt_lkey_flags_t flags,uint_t nummtt,hermon_mrhdl_t * mrhdl)2018 hermon_mr_alloc_lkey(hermon_state_t *state, hermon_pdhdl_t pd,
2019     ibt_lkey_flags_t flags, uint_t nummtt, hermon_mrhdl_t *mrhdl)
2020 {
2021 	hermon_rsrc_t		*mpt, *mtt, *rsrc, *mtt_refcnt;
2022 	hermon_sw_refcnt_t	*swrc_tmp;
2023 	hermon_hw_dmpt_t	mpt_entry;
2024 	hermon_mrhdl_t		mr;
2025 	uint64_t		mtt_addr;
2026 	uint_t			sleep;
2027 	int			status;
2028 
2029 	/* Increment the reference count on the protection domain (PD) */
2030 	hermon_pd_refcnt_inc(pd);
2031 
2032 	sleep = (flags & IBT_KEY_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
2033 
2034 	/*
2035 	 * Allocate an MPT entry.  This will be filled in with "some" of the
2036 	 * necessary parameters to define the memory region.  And then
2037 	 * ownership will be passed to the hardware in the final step
2038 	 * below.  If we fail here, we must undo the protection domain
2039 	 * reference count.
2040 	 *
2041 	 * The MTTs will get filled in when the FRWR is processed.
2042 	 */
2043 	status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
2044 	if (status != DDI_SUCCESS) {
2045 		status = IBT_INSUFF_RESOURCE;
2046 		goto alloclkey_fail1;
2047 	}
2048 
2049 	/*
2050 	 * Allocate the software structure for tracking the memory region (i.e.
2051 	 * the Hermon Memory Region handle).  If we fail here, we must undo
2052 	 * the protection domain reference count and the previous resource
2053 	 * allocation.
2054 	 */
2055 	status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
2056 	if (status != DDI_SUCCESS) {
2057 		status = IBT_INSUFF_RESOURCE;
2058 		goto alloclkey_fail2;
2059 	}
2060 	mr = (hermon_mrhdl_t)rsrc->hr_addr;
2061 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
2062 	bzero(mr, sizeof (*mr));
2063 	mr->mr_bindinfo.bi_type = HERMON_BINDHDL_LKEY;
2064 
2065 	mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
2066 
2067 	status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt);
2068 	if (status != DDI_SUCCESS) {
2069 		status = IBT_INSUFF_RESOURCE;
2070 		goto alloclkey_fail3;
2071 	}
2072 	mr->mr_logmttpgsz = PAGESHIFT;
2073 
2074 	/*
2075 	 * Allocate MTT reference count (to track shared memory regions).
2076 	 * This reference count resource may never be used on the given
2077 	 * memory region, but if it is ever later registered as "shared"
2078 	 * memory region then this resource will be necessary.  If we fail
2079 	 * here, we do pretty much the same as above to clean up.
2080 	 */
2081 	status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep,
2082 	    &mtt_refcnt);
2083 	if (status != DDI_SUCCESS) {
2084 		status = IBT_INSUFF_RESOURCE;
2085 		goto alloclkey_fail4;
2086 	}
2087 	mr->mr_mttrefcntp = mtt_refcnt;
2088 	swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
2089 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp))
2090 	HERMON_MTT_REFCNT_INIT(swrc_tmp);
2091 
2092 	mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
2093 
2094 	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
2095 	mpt_entry.status = HERMON_MPT_FREE;
2096 	mpt_entry.lw = 1;
2097 	mpt_entry.lr = 1;
2098 	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
2099 	mpt_entry.entity_sz = mr->mr_logmttpgsz;
2100 	mpt_entry.mem_key = mr->mr_lkey;
2101 	mpt_entry.pd = pd->pd_pdnum;
2102 	mpt_entry.fast_reg_en = 1;
2103 	mpt_entry.rem_acc_en = 1;
2104 	mpt_entry.en_inval = 1;
2105 	if (flags & IBT_KEY_REMOTE) {
2106 		mpt_entry.ren_inval = 1;
2107 	}
2108 	mpt_entry.mtt_size = nummtt;
2109 	mpt_entry.mtt_addr_h = mtt_addr >> 32;	/* only 8 more bits */
2110 	mpt_entry.mtt_addr_l = mtt_addr >> 3;	/* only 29 bits */
2111 
2112 	/*
2113 	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
2114 	 * the entry to the hardware if needed.  Note: in general, this
2115 	 * operation shouldn't fail.  But if it does, we have to undo
2116 	 * everything we've done above before returning error.
2117 	 *
2118 	 * For Hermon, this routine (which is common to the contexts) will only
2119 	 * set the ownership if needed - the process of passing the context
2120 	 * itself to HW will take care of setting up the MPT (based on type
2121 	 * and index).
2122 	 */
2123 	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2124 	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
2125 	if (status != HERMON_CMD_SUCCESS) {
2126 		cmn_err(CE_CONT, "Hermon: alloc_lkey: SW2HW_MPT command "
2127 		    "failed: %08x\n", status);
2128 		if (status == HERMON_CMD_INVALID_STATUS) {
2129 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2130 		}
2131 		status = ibc_get_ci_failure(0);
2132 		goto alloclkey_fail5;
2133 	}
2134 
2135 	/*
2136 	 * Fill in the rest of the Hermon Memory Region handle.  Having
2137 	 * successfully transferred ownership of the MPT, we can update the
2138 	 * following fields for use in further operations on the MR.
2139 	 */
2140 	mr->mr_accflag = IBT_MR_LOCAL_WRITE;
2141 	mr->mr_mttaddr = mtt_addr;
2142 	mr->mr_log2_pgsz = (mr->mr_logmttpgsz - HERMON_PAGESHIFT);
2143 	mr->mr_mptrsrcp = mpt;
2144 	mr->mr_mttrsrcp = mtt;
2145 	mr->mr_pdhdl = pd;
2146 	mr->mr_rsrcp = rsrc;
2147 	mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey);
2148 	mr->mr_rkey = mr->mr_lkey;
2149 	mr->mr_mpt_type = HERMON_MPT_DMPT;
2150 
2151 	*mrhdl = mr;
2152 	return (DDI_SUCCESS);
2153 
2154 alloclkey_fail5:
2155 	hermon_rsrc_free(state, &mtt_refcnt);
2156 alloclkey_fail4:
2157 	hermon_rsrc_free(state, &mtt);
2158 alloclkey_fail3:
2159 	hermon_rsrc_free(state, &rsrc);
2160 alloclkey_fail2:
2161 	hermon_rsrc_free(state, &mpt);
2162 alloclkey_fail1:
2163 	hermon_pd_refcnt_dec(pd);
2164 	return (status);
2165 }
2166 
2167 /*
2168  * hermon_mr_fexch_mpt_init()
2169  *    Context: Can be called from base context.
2170  *
2171  * This is the same as alloc_lkey, but not returning an mrhdl.
2172  */
2173 int
hermon_mr_fexch_mpt_init(hermon_state_t * state,hermon_pdhdl_t pd,uint32_t mpt_indx,uint_t nummtt,uint64_t mtt_addr,uint_t sleep)2174 hermon_mr_fexch_mpt_init(hermon_state_t *state, hermon_pdhdl_t pd,
2175     uint32_t mpt_indx, uint_t nummtt, uint64_t mtt_addr, uint_t sleep)
2176 {
2177 	hermon_hw_dmpt_t	mpt_entry;
2178 	int			status;
2179 
2180 	/*
2181 	 * The MTTs will get filled in when the FRWR is processed.
2182 	 */
2183 
2184 	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
2185 	mpt_entry.status = HERMON_MPT_FREE;
2186 	mpt_entry.lw = 1;
2187 	mpt_entry.lr = 1;
2188 	mpt_entry.rw = 1;
2189 	mpt_entry.rr = 1;
2190 	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
2191 	mpt_entry.entity_sz = PAGESHIFT;
2192 	mpt_entry.mem_key = mpt_indx;
2193 	mpt_entry.pd = pd->pd_pdnum;
2194 	mpt_entry.fast_reg_en = 1;
2195 	mpt_entry.rem_acc_en = 1;
2196 	mpt_entry.en_inval = 1;
2197 	mpt_entry.ren_inval = 1;
2198 	mpt_entry.mtt_size = nummtt;
2199 	mpt_entry.mtt_addr_h = mtt_addr >> 32;	/* only 8 more bits */
2200 	mpt_entry.mtt_addr_l = mtt_addr >> 3;	/* only 29 bits */
2201 
2202 	/*
2203 	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
2204 	 * the entry to the hardware if needed.  Note: in general, this
2205 	 * operation shouldn't fail.  But if it does, we have to undo
2206 	 * everything we've done above before returning error.
2207 	 *
2208 	 * For Hermon, this routine (which is common to the contexts) will only
2209 	 * set the ownership if needed - the process of passing the context
2210 	 * itself to HW will take care of setting up the MPT (based on type
2211 	 * and index).
2212 	 */
2213 	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2214 	    sizeof (hermon_hw_dmpt_t), mpt_indx, sleep);
2215 	if (status != HERMON_CMD_SUCCESS) {
2216 		cmn_err(CE_CONT, "Hermon: fexch_mpt_init: SW2HW_MPT command "
2217 		    "failed: %08x\n", status);
2218 		if (status == HERMON_CMD_INVALID_STATUS) {
2219 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2220 		}
2221 		status = ibc_get_ci_failure(0);
2222 		return (status);
2223 	}
2224 	/* Increment the reference count on the protection domain (PD) */
2225 	hermon_pd_refcnt_inc(pd);
2226 
2227 	return (DDI_SUCCESS);
2228 }
2229 
2230 /*
2231  * hermon_mr_fexch_mpt_fini()
2232  *    Context: Can be called from base context.
2233  *
2234  * This is the same as deregister_mr, without an mrhdl.
2235  */
2236 int
hermon_mr_fexch_mpt_fini(hermon_state_t * state,hermon_pdhdl_t pd,uint32_t mpt_indx,uint_t sleep)2237 hermon_mr_fexch_mpt_fini(hermon_state_t *state, hermon_pdhdl_t pd,
2238     uint32_t mpt_indx, uint_t sleep)
2239 {
2240 	int			status;
2241 
2242 	status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT,
2243 	    NULL, 0, mpt_indx, sleep);
2244 	if (status != DDI_SUCCESS) {
2245 		cmn_err(CE_CONT, "Hermon: fexch_mpt_fini: HW2SW_MPT command "
2246 		    "failed: %08x\n", status);
2247 		if (status == HERMON_CMD_INVALID_STATUS) {
2248 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2249 		}
2250 		status = ibc_get_ci_failure(0);
2251 		return (status);
2252 	}
2253 
2254 	/* Decrement the reference count on the protection domain (PD) */
2255 	hermon_pd_refcnt_dec(pd);
2256 
2257 	return (DDI_SUCCESS);
2258 }
2259 
2260 /*
2261  * hermon_mr_mtt_bind()
2262  *    Context: Can be called from interrupt or base context.
2263  */
2264 int
hermon_mr_mtt_bind(hermon_state_t * state,hermon_bind_info_t * bind,ddi_dma_handle_t bind_dmahdl,hermon_rsrc_t ** mtt,uint_t * mtt_pgsize_bits,uint_t is_buffer)2265 hermon_mr_mtt_bind(hermon_state_t *state, hermon_bind_info_t *bind,
2266     ddi_dma_handle_t bind_dmahdl, hermon_rsrc_t **mtt, uint_t *mtt_pgsize_bits,
2267     uint_t is_buffer)
2268 {
2269 	uint64_t		nummtt;
2270 	uint_t			sleep;
2271 	int			status;
2272 
2273 	/*
2274 	 * Check the sleep flag.  Ensure that it is consistent with the
2275 	 * current thread context (i.e. if we are currently in the interrupt
2276 	 * context, then we shouldn't be attempting to sleep).
2277 	 */
2278 	sleep = (bind->bi_flags & IBT_MR_NOSLEEP) ?
2279 	    HERMON_NOSLEEP : HERMON_SLEEP;
2280 	if ((sleep == HERMON_SLEEP) &&
2281 	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
2282 		status = IBT_INVALID_PARAM;
2283 		goto mrmttbind_fail;
2284 	}
2285 
2286 	/*
2287 	 * Bind the memory and determine the mapped addresses.  This is
2288 	 * the first of two routines that do all the "heavy lifting" for
2289 	 * the Hermon memory registration routines.  The hermon_mr_mem_bind()
2290 	 * routine takes the "bind" struct with all its fields filled
2291 	 * in and returns a list of DMA cookies (for the PCI mapped addresses
2292 	 * corresponding to the specified address region) which are used by
2293 	 * the hermon_mr_fast_mtt_write() routine below.  If we fail here, we
2294 	 * must undo all the previous resource allocation (and PD reference
2295 	 * count).
2296 	 */
2297 	status = hermon_mr_mem_bind(state, bind, bind_dmahdl, sleep, is_buffer);
2298 	if (status != DDI_SUCCESS) {
2299 		status = IBT_INSUFF_RESOURCE;
2300 		goto mrmttbind_fail;
2301 	}
2302 
2303 	/*
2304 	 * Determine number of pages spanned.  This routine uses the
2305 	 * information in the "bind" struct to determine the required
2306 	 * number of MTT entries needed (and returns the suggested page size -
2307 	 * as a "power-of-2" - for each MTT entry).
2308 	 */
2309 	nummtt = hermon_mr_nummtt_needed(state, bind, mtt_pgsize_bits);
2310 
2311 	/*
2312 	 * Allocate the MTT entries.  Use the calculations performed above to
2313 	 * allocate the required number of MTT entries. If we fail here, we
2314 	 * must not only undo all the previous resource allocation (and PD
2315 	 * reference count), but we must also unbind the memory.
2316 	 */
2317 	status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, mtt);
2318 	if (status != DDI_SUCCESS) {
2319 		status = IBT_INSUFF_RESOURCE;
2320 		goto mrmttbind_fail2;
2321 	}
2322 
2323 	/*
2324 	 * Write the mapped addresses into the MTT entries.  This is part two
2325 	 * of the "heavy lifting" routines that we talked about above.  Note:
2326 	 * we pass the suggested page size from the earlier operation here.
2327 	 * And if we fail here, we again do pretty much the same huge clean up.
2328 	 */
2329 	status = hermon_mr_fast_mtt_write(state, *mtt, bind, *mtt_pgsize_bits);
2330 	if (status != DDI_SUCCESS) {
2331 		/*
2332 		 * hermon_mr_fast_mtt_write() returns DDI_FAILURE
2333 		 * only if it detects a HW error during DMA.
2334 		 */
2335 		hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2336 		status = ibc_get_ci_failure(0);
2337 		goto mrmttbind_fail3;
2338 	}
2339 	return (DDI_SUCCESS);
2340 
2341 /*
2342  * The following is cleanup for all possible failure cases in this routine
2343  */
2344 mrmttbind_fail3:
2345 	hermon_rsrc_free(state, mtt);
2346 mrmttbind_fail2:
2347 	hermon_mr_mem_unbind(state, bind);
2348 mrmttbind_fail:
2349 	return (status);
2350 }
2351 
2352 
2353 /*
2354  * hermon_mr_mtt_unbind()
2355  *    Context: Can be called from interrupt or base context.
2356  */
2357 int
hermon_mr_mtt_unbind(hermon_state_t * state,hermon_bind_info_t * bind,hermon_rsrc_t * mtt)2358 hermon_mr_mtt_unbind(hermon_state_t *state, hermon_bind_info_t *bind,
2359     hermon_rsrc_t *mtt)
2360 {
2361 	/*
2362 	 * Free up the MTT entries and unbind the memory.  Here, as above, we
2363 	 * attempt to free these resources only if it is appropriate to do so.
2364 	 */
2365 	hermon_mr_mem_unbind(state, bind);
2366 	hermon_rsrc_free(state, &mtt);
2367 
2368 	return (DDI_SUCCESS);
2369 }
2370 
2371 
2372 /*
2373  * hermon_mr_common_rereg()
2374  *    Context: Can be called from interrupt or base context.
2375  */
2376 static int
hermon_mr_common_rereg(hermon_state_t * state,hermon_mrhdl_t mr,hermon_pdhdl_t pd,hermon_bind_info_t * bind,hermon_mrhdl_t * mrhdl_new,hermon_mr_options_t * op)2377 hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr,
2378     hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new,
2379     hermon_mr_options_t *op)
2380 {
2381 	hermon_rsrc_t		*mpt;
2382 	ibt_mr_attr_flags_t	acc_flags_to_use;
2383 	ibt_mr_flags_t		flags;
2384 	hermon_pdhdl_t		pd_to_use;
2385 	hermon_hw_dmpt_t	mpt_entry;
2386 	uint64_t		mtt_addr_to_use, vaddr_to_use, len_to_use;
2387 	uint_t			sleep, dereg_level;
2388 	int			status;
2389 
2390 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
2391 
2392 	/*
2393 	 * Check here to see if the memory region corresponds to a userland
2394 	 * mapping.  Reregistration of userland memory regions is not
2395 	 * currently supported.  Return failure.
2396 	 */
2397 	if (mr->mr_is_umem) {
2398 		status = IBT_MR_HDL_INVALID;
2399 		goto mrrereg_fail;
2400 	}
2401 
2402 	mutex_enter(&mr->mr_lock);
2403 
2404 	/* Pull MPT resource pointer from the Hermon Memory Region handle */
2405 	mpt = mr->mr_mptrsrcp;
2406 
2407 	/* Extract the flags field from the hermon_bind_info_t */
2408 	flags = bind->bi_flags;
2409 
2410 	/*
2411 	 * Check the sleep flag.  Ensure that it is consistent with the
2412 	 * current thread context (i.e. if we are currently in the interrupt
2413 	 * context, then we shouldn't be attempting to sleep).
2414 	 */
2415 	sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
2416 	if ((sleep == HERMON_SLEEP) &&
2417 	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
2418 		mutex_exit(&mr->mr_lock);
2419 		status = IBT_INVALID_PARAM;
2420 		goto mrrereg_fail;
2421 	}
2422 
2423 	/*
2424 	 * First step is to temporarily invalidate the MPT entry.  This
2425 	 * regains ownership from the hardware, and gives us the opportunity
2426 	 * to modify the entry.  Note: The HW2SW_MPT command returns the
2427 	 * current MPT entry contents.  These are saved away here because
2428 	 * they will be reused in a later step below.  If the region has
2429 	 * bound memory windows that we fail returning an "in use" error code.
2430 	 * Otherwise, this is an unexpected error and we deregister the
2431 	 * memory region and return error.
2432 	 *
2433 	 * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect
2434 	 * against holding the lock around this rereg call in all contexts.
2435 	 */
2436 	status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, &mpt_entry,
2437 	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN);
2438 	if (status != HERMON_CMD_SUCCESS) {
2439 		mutex_exit(&mr->mr_lock);
2440 		if (status == HERMON_CMD_REG_BOUND) {
2441 			return (IBT_MR_IN_USE);
2442 		} else {
2443 			cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: "
2444 			    "%08x\n", status);
2445 			if (status == HERMON_CMD_INVALID_STATUS) {
2446 				hermon_fm_ereport(state, HCA_SYS_ERR,
2447 				    HCA_ERR_SRV_LOST);
2448 			}
2449 			/*
2450 			 * Call deregister and ensure that all current
2451 			 * resources get freed up
2452 			 */
2453 			if (hermon_mr_deregister(state, &mr,
2454 			    HERMON_MR_DEREG_ALL, sleep) != DDI_SUCCESS) {
2455 				HERMON_WARNING(state, "failed to deregister "
2456 				    "memory region");
2457 			}
2458 			return (ibc_get_ci_failure(0));
2459 		}
2460 	}
2461 
2462 	/*
2463 	 * If we're changing the protection domain, then validate the new one
2464 	 */
2465 	if (flags & IBT_MR_CHANGE_PD) {
2466 
2467 		/* Check for valid PD handle pointer */
2468 		if (pd == NULL) {
2469 			mutex_exit(&mr->mr_lock);
2470 			/*
2471 			 * Call deregister and ensure that all current
2472 			 * resources get properly freed up. Unnecessary
2473 			 * here to attempt to regain software ownership
2474 			 * of the MPT entry as that has already been
2475 			 * done above.
2476 			 */
2477 			if (hermon_mr_deregister(state, &mr,
2478 			    HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) !=
2479 			    DDI_SUCCESS) {
2480 				HERMON_WARNING(state, "failed to deregister "
2481 				    "memory region");
2482 			}
2483 			status = IBT_PD_HDL_INVALID;
2484 			goto mrrereg_fail;
2485 		}
2486 
2487 		/* Use the new PD handle in all operations below */
2488 		pd_to_use = pd;
2489 
2490 	} else {
2491 		/* Use the current PD handle in all operations below */
2492 		pd_to_use = mr->mr_pdhdl;
2493 	}
2494 
2495 	/*
2496 	 * If we're changing access permissions, then validate the new ones
2497 	 */
2498 	if (flags & IBT_MR_CHANGE_ACCESS) {
2499 		/*
2500 		 * Validate the access flags.  Both remote write and remote
2501 		 * atomic require the local write flag to be set
2502 		 */
2503 		if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) ||
2504 		    (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) &&
2505 		    !(flags & IBT_MR_ENABLE_LOCAL_WRITE)) {
2506 			mutex_exit(&mr->mr_lock);
2507 			/*
2508 			 * Call deregister and ensure that all current
2509 			 * resources get properly freed up. Unnecessary
2510 			 * here to attempt to regain software ownership
2511 			 * of the MPT entry as that has already been
2512 			 * done above.
2513 			 */
2514 			if (hermon_mr_deregister(state, &mr,
2515 			    HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) !=
2516 			    DDI_SUCCESS) {
2517 				HERMON_WARNING(state, "failed to deregister "
2518 				    "memory region");
2519 			}
2520 			status = IBT_MR_ACCESS_REQ_INVALID;
2521 			goto mrrereg_fail;
2522 		}
2523 
2524 		/*
2525 		 * Setup and validate the memory region access flags.  This
2526 		 * means translating the IBTF's enable flags into the access
2527 		 * flags that will be used in later operations.
2528 		 */
2529 		acc_flags_to_use = 0;
2530 		if (flags & IBT_MR_ENABLE_WINDOW_BIND)
2531 			acc_flags_to_use |= IBT_MR_WINDOW_BIND;
2532 		if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
2533 			acc_flags_to_use |= IBT_MR_LOCAL_WRITE;
2534 		if (flags & IBT_MR_ENABLE_REMOTE_READ)
2535 			acc_flags_to_use |= IBT_MR_REMOTE_READ;
2536 		if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
2537 			acc_flags_to_use |= IBT_MR_REMOTE_WRITE;
2538 		if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
2539 			acc_flags_to_use |= IBT_MR_REMOTE_ATOMIC;
2540 
2541 	} else {
2542 		acc_flags_to_use = mr->mr_accflag;
2543 	}
2544 
2545 	/*
2546 	 * If we're modifying the translation, then figure out whether
2547 	 * we can reuse the current MTT resources.  This means calling
2548 	 * hermon_mr_rereg_xlat_helper() which does most of the heavy lifting
2549 	 * for the reregistration.  If the current memory region contains
2550 	 * sufficient MTT entries for the new regions, then it will be
2551 	 * reused and filled in.  Otherwise, new entries will be allocated,
2552 	 * the old ones will be freed, and the new entries will be filled
2553 	 * in.  Note:  If we're not modifying the translation, then we
2554 	 * should already have all the information we need to update the MPT.
2555 	 * Also note: If hermon_mr_rereg_xlat_helper() fails, it will return
2556 	 * a "dereg_level" which is the level of cleanup that needs to be
2557 	 * passed to hermon_mr_deregister() to finish the cleanup.
2558 	 */
2559 	if (flags & IBT_MR_CHANGE_TRANSLATION) {
2560 		status = hermon_mr_rereg_xlat_helper(state, mr, bind, op,
2561 		    &mtt_addr_to_use, sleep, &dereg_level);
2562 		if (status != DDI_SUCCESS) {
2563 			mutex_exit(&mr->mr_lock);
2564 			/*
2565 			 * Call deregister and ensure that all resources get
2566 			 * properly freed up.
2567 			 */
2568 			if (hermon_mr_deregister(state, &mr, dereg_level,
2569 			    sleep) != DDI_SUCCESS) {
2570 				HERMON_WARNING(state, "failed to deregister "
2571 				    "memory region");
2572 			}
2573 			goto mrrereg_fail;
2574 		}
2575 		vaddr_to_use = mr->mr_bindinfo.bi_addr;
2576 		len_to_use   = mr->mr_bindinfo.bi_len;
2577 	} else {
2578 		mtt_addr_to_use = mr->mr_mttaddr;
2579 		vaddr_to_use = mr->mr_bindinfo.bi_addr;
2580 		len_to_use   = mr->mr_bindinfo.bi_len;
2581 	}
2582 
2583 	/*
2584 	 * Calculate new keys (Lkey, Rkey) from MPT index.  Just like they were
2585 	 * when the region was first registered, each key is formed from
2586 	 * "constrained" bits and "unconstrained" bits.  Note:  If no remote
2587 	 * access is required, then the RKey value is not filled in.  Otherwise
2588 	 * both Rkey and LKey are given the same value.
2589 	 */
2590 	mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
2591 	if ((acc_flags_to_use & IBT_MR_REMOTE_READ) ||
2592 	    (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ||
2593 	    (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC)) {
2594 		mr->mr_rkey = mr->mr_lkey;
2595 	} else
2596 		mr->mr_rkey = 0;
2597 
2598 	/*
2599 	 * Fill in the MPT entry.  This is the final step before passing
2600 	 * ownership of the MPT entry to the Hermon hardware.  We use all of
2601 	 * the information collected/calculated above to fill in the
2602 	 * requisite portions of the MPT.
2603 	 */
2604 	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
2605 
2606 	mpt_entry.status  = HERMON_MPT_SW_OWNERSHIP;
2607 	mpt_entry.en_bind = (acc_flags_to_use & IBT_MR_WINDOW_BIND)   ? 1 : 0;
2608 	mpt_entry.atomic  = (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
2609 	mpt_entry.rw	  = (acc_flags_to_use & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
2610 	mpt_entry.rr	  = (acc_flags_to_use & IBT_MR_REMOTE_READ)   ? 1 : 0;
2611 	mpt_entry.lw	  = (acc_flags_to_use & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
2612 	mpt_entry.lr	  = 1;
2613 	mpt_entry.phys_addr = 0;
2614 	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
2615 
2616 	mpt_entry.entity_sz	= mr->mr_logmttpgsz;
2617 	mpt_entry.mem_key	= mr->mr_lkey;
2618 	mpt_entry.pd		= pd_to_use->pd_pdnum;
2619 
2620 	mpt_entry.start_addr	= vaddr_to_use;
2621 	mpt_entry.reg_win_len	= len_to_use;
2622 	mpt_entry.mtt_addr_h = mtt_addr_to_use >> 32;
2623 	mpt_entry.mtt_addr_l = mtt_addr_to_use >> 3;
2624 
2625 	/*
2626 	 * Write the updated MPT entry to hardware
2627 	 *
2628 	 * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect
2629 	 * against holding the lock around this rereg call in all contexts.
2630 	 */
2631 	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2632 	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN);
2633 	if (status != HERMON_CMD_SUCCESS) {
2634 		mutex_exit(&mr->mr_lock);
2635 		cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
2636 		    status);
2637 		if (status == HERMON_CMD_INVALID_STATUS) {
2638 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2639 		}
2640 		/*
2641 		 * Call deregister and ensure that all current resources get
2642 		 * properly freed up. Unnecessary here to attempt to regain
2643 		 * software ownership of the MPT entry as that has already
2644 		 * been done above.
2645 		 */
2646 		if (hermon_mr_deregister(state, &mr,
2647 		    HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) {
2648 			HERMON_WARNING(state, "failed to deregister memory "
2649 			    "region");
2650 		}
2651 		return (ibc_get_ci_failure(0));
2652 	}
2653 
2654 	/*
2655 	 * If we're changing PD, then update their reference counts now.
2656 	 * This means decrementing the reference count on the old PD and
2657 	 * incrementing the reference count on the new PD.
2658 	 */
2659 	if (flags & IBT_MR_CHANGE_PD) {
2660 		hermon_pd_refcnt_dec(mr->mr_pdhdl);
2661 		hermon_pd_refcnt_inc(pd);
2662 	}
2663 
2664 	/*
2665 	 * Update the contents of the Hermon Memory Region handle to reflect
2666 	 * what has been changed.
2667 	 */
2668 	mr->mr_pdhdl	  = pd_to_use;
2669 	mr->mr_accflag	  = acc_flags_to_use;
2670 	mr->mr_is_umem	  = 0;
2671 	mr->mr_is_fmr	  = 0;
2672 	mr->mr_umemcookie = NULL;
2673 	mr->mr_lkey	  = hermon_mr_key_swap(mr->mr_lkey);
2674 	mr->mr_rkey	  = hermon_mr_key_swap(mr->mr_rkey);
2675 
2676 	/* New MR handle is same as the old */
2677 	*mrhdl_new = mr;
2678 	mutex_exit(&mr->mr_lock);
2679 
2680 	return (DDI_SUCCESS);
2681 
2682 mrrereg_fail:
2683 	return (status);
2684 }
2685 
2686 
2687 /*
2688  * hermon_mr_rereg_xlat_helper
2689  *    Context: Can be called from interrupt or base context.
2690  *    Note: This routine expects the "mr_lock" to be held when it
2691  *    is called.  Upon returning failure, this routine passes information
2692  *    about what "dereg_level" should be passed to hermon_mr_deregister().
2693  */
2694 static int
hermon_mr_rereg_xlat_helper(hermon_state_t * state,hermon_mrhdl_t mr,hermon_bind_info_t * bind,hermon_mr_options_t * op,uint64_t * mtt_addr,uint_t sleep,uint_t * dereg_level)2695 hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr,
2696     hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr,
2697     uint_t sleep, uint_t *dereg_level)
2698 {
2699 	hermon_rsrc_t		*mtt, *mtt_refcnt;
2700 	hermon_sw_refcnt_t	*swrc_old, *swrc_new;
2701 	ddi_dma_handle_t	dmahdl;
2702 	uint64_t		nummtt_needed, nummtt_in_currrsrc, max_sz;
2703 	uint_t			mtt_pgsize_bits, bind_type, reuse_dmahdl;
2704 	int			status;
2705 
2706 	ASSERT(MUTEX_HELD(&mr->mr_lock));
2707 
2708 	/*
2709 	 * Check the "options" flag.  Currently this flag tells the driver
2710 	 * whether or not the region should be bound normally (i.e. with
2711 	 * entries written into the PCI IOMMU) or whether it should be
2712 	 * registered to bypass the IOMMU.
2713 	 */
2714 	if (op == NULL) {
2715 		bind_type = HERMON_BINDMEM_NORMAL;
2716 	} else {
2717 		bind_type = op->mro_bind_type;
2718 	}
2719 
2720 	/*
2721 	 * Check for invalid length.  Check is the length is zero or if the
2722 	 * length is larger than the maximum configured value.  Return error
2723 	 * if it is.
2724 	 */
2725 	max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz);
2726 	if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
2727 		/*
2728 		 * Deregister will be called upon returning failure from this
2729 		 * routine. This will ensure that all current resources get
2730 		 * properly freed up. Unnecessary to attempt to regain
2731 		 * software ownership of the MPT entry as that has already
2732 		 * been done above (in hermon_mr_reregister())
2733 		 */
2734 		*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT;
2735 
2736 		status = IBT_MR_LEN_INVALID;
2737 		goto mrrereghelp_fail;
2738 	}
2739 
2740 	/*
2741 	 * Determine the number of pages necessary for new region and the
2742 	 * number of pages supported by the current MTT resources
2743 	 */
2744 	nummtt_needed = hermon_mr_nummtt_needed(state, bind, &mtt_pgsize_bits);
2745 	nummtt_in_currrsrc = mr->mr_mttrsrcp->hr_len >> HERMON_MTT_SIZE_SHIFT;
2746 
2747 	/*
2748 	 * Depending on whether we have enough pages or not, the next step is
2749 	 * to fill in a set of MTT entries that reflect the new mapping.  In
2750 	 * the first case below, we already have enough entries.  This means
2751 	 * we need to unbind the memory from the previous mapping, bind the
2752 	 * memory for the new mapping, write the new MTT entries, and update
2753 	 * the mr to reflect the changes.
2754 	 * In the second case below, we do not have enough entries in the
2755 	 * current mapping.  So, in this case, we need not only to unbind the
2756 	 * current mapping, but we need to free up the MTT resources associated
2757 	 * with that mapping.  After we've successfully done that, we continue
2758 	 * by binding the new memory, allocating new MTT entries, writing the
2759 	 * new MTT entries, and updating the mr to reflect the changes.
2760 	 */
2761 
2762 	/*
2763 	 * If this region is being shared (i.e. MTT refcount != 1), then we
2764 	 * can't reuse the current MTT resources regardless of their size.
2765 	 * Instead we'll need to alloc new ones (below) just as if there
2766 	 * hadn't been enough room in the current entries.
2767 	 */
2768 	swrc_old = (hermon_sw_refcnt_t *)mr->mr_mttrefcntp->hr_addr;
2769 	if (HERMON_MTT_IS_NOT_SHARED(swrc_old) &&
2770 	    (nummtt_needed <= nummtt_in_currrsrc)) {
2771 
2772 		/*
2773 		 * Unbind the old mapping for this memory region, but retain
2774 		 * the ddi_dma_handle_t (if possible) for reuse in the bind
2775 		 * operation below.  Note:  If original memory region was
2776 		 * bound for IOMMU bypass and the new region can not use
2777 		 * bypass, then a new DMA handle will be necessary.
2778 		 */
2779 		if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
2780 			mr->mr_bindinfo.bi_free_dmahdl = 0;
2781 			hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2782 			dmahdl = mr->mr_bindinfo.bi_dmahdl;
2783 			reuse_dmahdl = 1;
2784 		} else {
2785 			hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2786 			dmahdl = NULL;
2787 			reuse_dmahdl = 0;
2788 		}
2789 
2790 		/*
2791 		 * Bind the new memory and determine the mapped addresses.
2792 		 * As described, this routine and hermon_mr_fast_mtt_write()
2793 		 * do the majority of the work for the memory registration
2794 		 * operations.  Note:  When we successfully finish the binding,
2795 		 * we will set the "bi_free_dmahdl" flag to indicate that
2796 		 * even though we may have reused the ddi_dma_handle_t we do
2797 		 * wish it to be freed up at some later time.  Note also that
2798 		 * if we fail, we may need to cleanup the ddi_dma_handle_t.
2799 		 */
2800 		bind->bi_bypass	= bind_type;
2801 		status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1);
2802 		if (status != DDI_SUCCESS) {
2803 			if (reuse_dmahdl) {
2804 				ddi_dma_free_handle(&dmahdl);
2805 			}
2806 
2807 			/*
2808 			 * Deregister will be called upon returning failure
2809 			 * from this routine. This will ensure that all
2810 			 * current resources get properly freed up.
2811 			 * Unnecessary to attempt to regain software ownership
2812 			 * of the MPT entry as that has already been done
2813 			 * above (in hermon_mr_reregister()).  Also unnecessary
2814 			 * to attempt to unbind the memory.
2815 			 */
2816 			*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2817 
2818 			status = IBT_INSUFF_RESOURCE;
2819 			goto mrrereghelp_fail;
2820 		}
2821 		if (reuse_dmahdl) {
2822 			bind->bi_free_dmahdl = 1;
2823 		}
2824 
2825 		/*
2826 		 * Using the new mapping, but reusing the current MTT
2827 		 * resources, write the updated entries to MTT
2828 		 */
2829 		mtt    = mr->mr_mttrsrcp;
2830 		status = hermon_mr_fast_mtt_write(state, mtt, bind,
2831 		    mtt_pgsize_bits);
2832 		if (status != DDI_SUCCESS) {
2833 			/*
2834 			 * Deregister will be called upon returning failure
2835 			 * from this routine. This will ensure that all
2836 			 * current resources get properly freed up.
2837 			 * Unnecessary to attempt to regain software ownership
2838 			 * of the MPT entry as that has already been done
2839 			 * above (in hermon_mr_reregister()).  Also unnecessary
2840 			 * to attempt to unbind the memory.
2841 			 *
2842 			 * But we do need to unbind the newly bound memory
2843 			 * before returning.
2844 			 */
2845 			hermon_mr_mem_unbind(state, bind);
2846 			*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2847 
2848 			/*
2849 			 * hermon_mr_fast_mtt_write() returns DDI_FAILURE
2850 			 * only if it detects a HW error during DMA.
2851 			 */
2852 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2853 			status = ibc_get_ci_failure(0);
2854 			goto mrrereghelp_fail;
2855 		}
2856 
2857 		/* Put the updated information into the Mem Region handle */
2858 		mr->mr_bindinfo	  = *bind;
2859 		mr->mr_logmttpgsz = mtt_pgsize_bits;
2860 
2861 	} else {
2862 		/*
2863 		 * Check if the memory region MTT is shared by any other MRs.
2864 		 * Since the resource may be shared between multiple memory
2865 		 * regions (as a result of a "RegisterSharedMR()" verb) it is
2866 		 * important that we not unbind any resources prematurely.
2867 		 */
2868 		if (!HERMON_MTT_IS_SHARED(swrc_old)) {
2869 			/*
2870 			 * Unbind the old mapping for this memory region, but
2871 			 * retain the ddi_dma_handle_t for reuse in the bind
2872 			 * operation below. Note: This can only be done here
2873 			 * because the region being reregistered is not
2874 			 * currently shared.  Also if original memory region
2875 			 * was bound for IOMMU bypass and the new region can
2876 			 * not use bypass, then a new DMA handle will be
2877 			 * necessary.
2878 			 */
2879 			if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
2880 				mr->mr_bindinfo.bi_free_dmahdl = 0;
2881 				hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2882 				dmahdl = mr->mr_bindinfo.bi_dmahdl;
2883 				reuse_dmahdl = 1;
2884 			} else {
2885 				hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2886 				dmahdl = NULL;
2887 				reuse_dmahdl = 0;
2888 			}
2889 		} else {
2890 			dmahdl = NULL;
2891 			reuse_dmahdl = 0;
2892 		}
2893 
2894 		/*
2895 		 * Bind the new memory and determine the mapped addresses.
2896 		 * As described, this routine and hermon_mr_fast_mtt_write()
2897 		 * do the majority of the work for the memory registration
2898 		 * operations.  Note:  When we successfully finish the binding,
2899 		 * we will set the "bi_free_dmahdl" flag to indicate that
2900 		 * even though we may have reused the ddi_dma_handle_t we do
2901 		 * wish it to be freed up at some later time.  Note also that
2902 		 * if we fail, we may need to cleanup the ddi_dma_handle_t.
2903 		 */
2904 		bind->bi_bypass	= bind_type;
2905 		status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1);
2906 		if (status != DDI_SUCCESS) {
2907 			if (reuse_dmahdl) {
2908 				ddi_dma_free_handle(&dmahdl);
2909 			}
2910 
2911 			/*
2912 			 * Deregister will be called upon returning failure
2913 			 * from this routine. This will ensure that all
2914 			 * current resources get properly freed up.
2915 			 * Unnecessary to attempt to regain software ownership
2916 			 * of the MPT entry as that has already been done
2917 			 * above (in hermon_mr_reregister()).  Also unnecessary
2918 			 * to attempt to unbind the memory.
2919 			 */
2920 			*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2921 
2922 			status = IBT_INSUFF_RESOURCE;
2923 			goto mrrereghelp_fail;
2924 		}
2925 		if (reuse_dmahdl) {
2926 			bind->bi_free_dmahdl = 1;
2927 		}
2928 
2929 		/*
2930 		 * Allocate the new MTT entries resource
2931 		 */
2932 		status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt_needed,
2933 		    sleep, &mtt);
2934 		if (status != DDI_SUCCESS) {
2935 			/*
2936 			 * Deregister will be called upon returning failure
2937 			 * from this routine. This will ensure that all
2938 			 * current resources get properly freed up.
2939 			 * Unnecessary to attempt to regain software ownership
2940 			 * of the MPT entry as that has already been done
2941 			 * above (in hermon_mr_reregister()).  Also unnecessary
2942 			 * to attempt to unbind the memory.
2943 			 *
2944 			 * But we do need to unbind the newly bound memory
2945 			 * before returning.
2946 			 */
2947 			hermon_mr_mem_unbind(state, bind);
2948 			*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2949 
2950 			status = IBT_INSUFF_RESOURCE;
2951 			goto mrrereghelp_fail;
2952 		}
2953 
2954 		/*
2955 		 * Allocate MTT reference count (to track shared memory
2956 		 * regions).  As mentioned elsewhere above, this reference
2957 		 * count resource may never be used on the given memory region,
2958 		 * but if it is ever later registered as a "shared" memory
2959 		 * region then this resource will be necessary.  Note:  This
2960 		 * is only necessary here if the existing memory region is
2961 		 * already being shared (because otherwise we already have
2962 		 * a useable reference count resource).
2963 		 */
2964 		if (HERMON_MTT_IS_SHARED(swrc_old)) {
2965 			status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1,
2966 			    sleep, &mtt_refcnt);
2967 			if (status != DDI_SUCCESS) {
2968 				/*
2969 				 * Deregister will be called upon returning
2970 				 * failure from this routine. This will ensure
2971 				 * that all current resources get properly
2972 				 * freed up.  Unnecessary to attempt to regain
2973 				 * software ownership of the MPT entry as that
2974 				 * has already been done above (in
2975 				 * hermon_mr_reregister()).  Also unnecessary
2976 				 * to attempt to unbind the memory.
2977 				 *
2978 				 * But we need to unbind the newly bound
2979 				 * memory and free up the newly allocated MTT
2980 				 * entries before returning.
2981 				 */
2982 				hermon_mr_mem_unbind(state, bind);
2983 				hermon_rsrc_free(state, &mtt);
2984 				*dereg_level =
2985 				    HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2986 
2987 				status = IBT_INSUFF_RESOURCE;
2988 				goto mrrereghelp_fail;
2989 			}
2990 			swrc_new = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
2991 			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_new))
2992 			HERMON_MTT_REFCNT_INIT(swrc_new);
2993 		} else {
2994 			mtt_refcnt = mr->mr_mttrefcntp;
2995 		}
2996 
2997 		/*
2998 		 * Using the new mapping and the new MTT resources, write the
2999 		 * updated entries to MTT
3000 		 */
3001 		status = hermon_mr_fast_mtt_write(state, mtt, bind,
3002 		    mtt_pgsize_bits);
3003 		if (status != DDI_SUCCESS) {
3004 			/*
3005 			 * Deregister will be called upon returning failure
3006 			 * from this routine. This will ensure that all
3007 			 * current resources get properly freed up.
3008 			 * Unnecessary to attempt to regain software ownership
3009 			 * of the MPT entry as that has already been done
3010 			 * above (in hermon_mr_reregister()).  Also unnecessary
3011 			 * to attempt to unbind the memory.
3012 			 *
3013 			 * But we need to unbind the newly bound memory,
3014 			 * free up the newly allocated MTT entries, and
3015 			 * (possibly) free the new MTT reference count
3016 			 * resource before returning.
3017 			 */
3018 			if (HERMON_MTT_IS_SHARED(swrc_old)) {
3019 				hermon_rsrc_free(state, &mtt_refcnt);
3020 			}
3021 			hermon_mr_mem_unbind(state, bind);
3022 			hermon_rsrc_free(state, &mtt);
3023 			*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
3024 
3025 			status = IBT_INSUFF_RESOURCE;
3026 			goto mrrereghelp_fail;
3027 		}
3028 
3029 		/*
3030 		 * Check if the memory region MTT is shared by any other MRs.
3031 		 * Since the resource may be shared between multiple memory
3032 		 * regions (as a result of a "RegisterSharedMR()" verb) it is
3033 		 * important that we not free up any resources prematurely.
3034 		 */
3035 		if (HERMON_MTT_IS_SHARED(swrc_old)) {
3036 			/* Decrement MTT reference count for "old" region */
3037 			(void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp);
3038 		} else {
3039 			/* Free up the old MTT entries resource */
3040 			hermon_rsrc_free(state, &mr->mr_mttrsrcp);
3041 		}
3042 
3043 		/* Put the updated information into the mrhdl */
3044 		mr->mr_bindinfo	  = *bind;
3045 		mr->mr_logmttpgsz = mtt_pgsize_bits;
3046 		mr->mr_mttrsrcp   = mtt;
3047 		mr->mr_mttrefcntp = mtt_refcnt;
3048 	}
3049 
3050 	/*
3051 	 * Calculate and return the updated MTT address (in the DDR address
3052 	 * space).  This will be used by the caller (hermon_mr_reregister) in
3053 	 * the updated MPT entry
3054 	 */
3055 	*mtt_addr = mtt->hr_indx << HERMON_MTT_SIZE_SHIFT;
3056 
3057 	return (DDI_SUCCESS);
3058 
3059 mrrereghelp_fail:
3060 	return (status);
3061 }
3062 
3063 
3064 /*
3065  * hermon_mr_nummtt_needed()
3066  *    Context: Can be called from interrupt or base context.
3067  */
3068 /* ARGSUSED */
3069 static uint64_t
hermon_mr_nummtt_needed(hermon_state_t * state,hermon_bind_info_t * bind,uint_t * mtt_pgsize_bits)3070 hermon_mr_nummtt_needed(hermon_state_t *state, hermon_bind_info_t *bind,
3071     uint_t *mtt_pgsize_bits)
3072 {
3073 	uint64_t	pg_offset_mask;
3074 	uint64_t	pg_offset, tmp_length;
3075 
3076 	/*
3077 	 * For now we specify the page size as 8Kb (the default page size for
3078 	 * the sun4u architecture), or 4Kb for x86.  Figure out optimal page
3079 	 * size by examining the dmacookies
3080 	 */
3081 	*mtt_pgsize_bits = PAGESHIFT;
3082 
3083 	pg_offset_mask = ((uint64_t)1 << *mtt_pgsize_bits) - 1;
3084 	pg_offset = bind->bi_addr & pg_offset_mask;
3085 	tmp_length = pg_offset + (bind->bi_len - 1);
3086 	return ((tmp_length >> *mtt_pgsize_bits) + 1);
3087 }
3088 
3089 
3090 /*
3091  * hermon_mr_mem_bind()
3092  *    Context: Can be called from interrupt or base context.
3093  */
3094 static int
hermon_mr_mem_bind(hermon_state_t * state,hermon_bind_info_t * bind,ddi_dma_handle_t dmahdl,uint_t sleep,uint_t is_buffer)3095 hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind,
3096     ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer)
3097 {
3098 	ddi_dma_attr_t	dma_attr;
3099 	int		(*callback)(caddr_t);
3100 	int		status;
3101 
3102 	/* bi_type must be set to a meaningful value to get a bind handle */
3103 	ASSERT(bind->bi_type == HERMON_BINDHDL_VADDR ||
3104 	    bind->bi_type == HERMON_BINDHDL_BUF ||
3105 	    bind->bi_type == HERMON_BINDHDL_UBUF);
3106 
3107 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
3108 
3109 	/* Set the callback flag appropriately */
3110 	callback = (sleep == HERMON_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT;
3111 
3112 	/*
3113 	 * Initialize many of the default DMA attributes.  Then, if we're
3114 	 * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag.
3115 	 */
3116 	if (dmahdl == NULL) {
3117 		hermon_dma_attr_init(state, &dma_attr);
3118 #ifdef	__sparc
3119 		if (bind->bi_bypass == HERMON_BINDMEM_BYPASS) {
3120 			dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
3121 		}
3122 #endif
3123 
3124 		/* set RO if needed - tunable set and 'is_buffer' is non-0 */
3125 		if (is_buffer) {
3126 			if (! (bind->bi_flags & IBT_MR_DISABLE_RO)) {
3127 				if ((bind->bi_type != HERMON_BINDHDL_UBUF) &&
3128 				    (hermon_kernel_data_ro ==
3129 				    HERMON_RO_ENABLED)) {
3130 					dma_attr.dma_attr_flags |=
3131 					    DDI_DMA_RELAXED_ORDERING;
3132 				}
3133 				if (((bind->bi_type == HERMON_BINDHDL_UBUF) &&
3134 				    (hermon_user_data_ro ==
3135 				    HERMON_RO_ENABLED))) {
3136 					dma_attr.dma_attr_flags |=
3137 					    DDI_DMA_RELAXED_ORDERING;
3138 				}
3139 			}
3140 		}
3141 
3142 		/* Allocate a DMA handle for the binding */
3143 		status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
3144 		    callback, NULL, &bind->bi_dmahdl);
3145 		if (status != DDI_SUCCESS) {
3146 			return (status);
3147 		}
3148 		bind->bi_free_dmahdl = 1;
3149 
3150 	} else  {
3151 		bind->bi_dmahdl = dmahdl;
3152 		bind->bi_free_dmahdl = 0;
3153 	}
3154 
3155 
3156 	/*
3157 	 * Bind the memory to get the PCI mapped addresses.  The decision
3158 	 * to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle()
3159 	 * is determined by the "bi_type" flag.  Note: if the bind operation
3160 	 * fails then we have to free up the DMA handle and return error.
3161 	 */
3162 	if (bind->bi_type == HERMON_BINDHDL_VADDR) {
3163 		status = ddi_dma_addr_bind_handle(bind->bi_dmahdl, NULL,
3164 		    (caddr_t)(uintptr_t)bind->bi_addr, bind->bi_len,
3165 		    (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback, NULL,
3166 		    &bind->bi_dmacookie, &bind->bi_cookiecnt);
3167 
3168 	} else {  /* HERMON_BINDHDL_BUF or HERMON_BINDHDL_UBUF */
3169 
3170 		status = ddi_dma_buf_bind_handle(bind->bi_dmahdl,
3171 		    bind->bi_buf, (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback,
3172 		    NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt);
3173 	}
3174 	if (status != DDI_DMA_MAPPED) {
3175 		if (bind->bi_free_dmahdl != 0) {
3176 			ddi_dma_free_handle(&bind->bi_dmahdl);
3177 		}
3178 		return (status);
3179 	}
3180 
3181 	return (DDI_SUCCESS);
3182 }
3183 
3184 
3185 /*
3186  * hermon_mr_mem_unbind()
3187  *    Context: Can be called from interrupt or base context.
3188  */
3189 static void
hermon_mr_mem_unbind(hermon_state_t * state,hermon_bind_info_t * bind)3190 hermon_mr_mem_unbind(hermon_state_t *state, hermon_bind_info_t *bind)
3191 {
3192 	int	status;
3193 
3194 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
3195 	/* there is nothing to unbind for alloc_lkey */
3196 	if (bind->bi_type == HERMON_BINDHDL_LKEY)
3197 		return;
3198 
3199 	/*
3200 	 * In case of HERMON_BINDHDL_UBUF, the memory bi_buf points to
3201 	 * is actually allocated by ddi_umem_iosetup() internally, then
3202 	 * it's required to free it here. Reset bi_type to HERMON_BINDHDL_NONE
3203 	 * not to free it again later.
3204 	 */
3205 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
3206 	if (bind->bi_type == HERMON_BINDHDL_UBUF) {
3207 		freerbuf(bind->bi_buf);
3208 		bind->bi_type = HERMON_BINDHDL_NONE;
3209 	}
3210 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
3211 
3212 	/*
3213 	 * Unbind the DMA memory for the region
3214 	 *
3215 	 * Note: The only way ddi_dma_unbind_handle() currently
3216 	 * can return an error is if the handle passed in is invalid.
3217 	 * Since this should never happen, we choose to return void
3218 	 * from this function!  If this does return an error, however,
3219 	 * then we print a warning message to the console.
3220 	 */
3221 	status = ddi_dma_unbind_handle(bind->bi_dmahdl);
3222 	if (status != DDI_SUCCESS) {
3223 		HERMON_WARNING(state, "failed to unbind DMA mapping");
3224 		return;
3225 	}
3226 
3227 	/* Free up the DMA handle */
3228 	if (bind->bi_free_dmahdl != 0) {
3229 		ddi_dma_free_handle(&bind->bi_dmahdl);
3230 	}
3231 }
3232 
3233 
3234 /*
3235  * hermon_mr_fast_mtt_write()
3236  *    Context: Can be called from interrupt or base context.
3237  */
3238 static int
hermon_mr_fast_mtt_write(hermon_state_t * state,hermon_rsrc_t * mtt,hermon_bind_info_t * bind,uint32_t mtt_pgsize_bits)3239 hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt,
3240     hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits)
3241 {
3242 	hermon_icm_table_t	*icm_table;
3243 	hermon_dma_info_t	*dma_info;
3244 	uint32_t		index1, index2, rindx;
3245 	ddi_dma_cookie_t	dmacookie;
3246 	uint_t			cookie_cnt;
3247 	uint64_t		*mtt_table;
3248 	uint64_t		mtt_entry;
3249 	uint64_t		addr, endaddr;
3250 	uint64_t		pagesize;
3251 	offset_t		i, start;
3252 	uint_t			per_span;
3253 	int			sync_needed;
3254 
3255 	/*
3256 	 * XXX According to the PRM, we are to use the WRITE_MTT
3257 	 * command to write out MTTs. Tavor does not do this,
3258 	 * instead taking advantage of direct access to the MTTs,
3259 	 * and knowledge that Mellanox FMR relies on our ability
3260 	 * to write directly to the MTTs without any further
3261 	 * notification to the firmware. Likewise, we will choose
3262 	 * to not use the WRITE_MTT command, but to simply write
3263 	 * out the MTTs.
3264 	 */
3265 
3266 	/* Calculate page size from the suggested value passed in */
3267 	pagesize = ((uint64_t)1 << mtt_pgsize_bits);
3268 
3269 	/* Walk the "cookie list" and fill in the MTT table entries */
3270 	dmacookie  = bind->bi_dmacookie;
3271 	cookie_cnt = bind->bi_cookiecnt;
3272 
3273 	icm_table = &state->hs_icm[HERMON_MTT];
3274 	rindx = mtt->hr_indx;
3275 	hermon_index(index1, index2, rindx, icm_table, i);
3276 	start = i;
3277 
3278 	per_span   = icm_table->span;
3279 	dma_info   = icm_table->icm_dma[index1] + index2;
3280 	mtt_table  = (uint64_t *)(uintptr_t)dma_info->vaddr;
3281 
3282 	sync_needed = 0;
3283 	while (cookie_cnt-- > 0) {
3284 		addr    = dmacookie.dmac_laddress;
3285 		endaddr = addr + (dmacookie.dmac_size - 1);
3286 		addr    = addr & ~((uint64_t)pagesize - 1);
3287 
3288 		while (addr <= endaddr) {
3289 
3290 			/*
3291 			 * Fill in the mapped addresses (calculated above) and
3292 			 * set HERMON_MTT_ENTRY_PRESENT flag for each MTT entry.
3293 			 */
3294 			mtt_entry = addr | HERMON_MTT_ENTRY_PRESENT;
3295 			mtt_table[i] = htonll(mtt_entry);
3296 			i++;
3297 			rindx++;
3298 
3299 			if (i == per_span) {
3300 
3301 				(void) ddi_dma_sync(dma_info->dma_hdl,
3302 				    start * sizeof (hermon_hw_mtt_t),
3303 				    (i - start) * sizeof (hermon_hw_mtt_t),
3304 				    DDI_DMA_SYNC_FORDEV);
3305 
3306 				if ((addr + pagesize > endaddr) &&
3307 				    (cookie_cnt == 0))
3308 					return (DDI_SUCCESS);
3309 
3310 				hermon_index(index1, index2, rindx, icm_table,
3311 				    i);
3312 				start = i * sizeof (hermon_hw_mtt_t);
3313 				dma_info = icm_table->icm_dma[index1] + index2;
3314 				mtt_table =
3315 				    (uint64_t *)(uintptr_t)dma_info->vaddr;
3316 
3317 				sync_needed = 0;
3318 			} else {
3319 				sync_needed = 1;
3320 			}
3321 
3322 			addr += pagesize;
3323 			if (addr == 0) {
3324 				static int do_once = 1;
3325 				_NOTE(SCHEME_PROTECTS_DATA("safe sharing",
3326 				    do_once))
3327 				if (do_once) {
3328 					do_once = 0;
3329 					cmn_err(CE_NOTE, "probable error in "
3330 					    "dma_cookie address from caller\n");
3331 				}
3332 				break;
3333 			}
3334 		}
3335 
3336 		/*
3337 		 * When we've reached the end of the current DMA cookie,
3338 		 * jump to the next cookie (if there are more)
3339 		 */
3340 		if (cookie_cnt != 0) {
3341 			ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie);
3342 		}
3343 	}
3344 
3345 	/* done all the cookies, now sync the memory for the device */
3346 	if (sync_needed)
3347 		(void) ddi_dma_sync(dma_info->dma_hdl,
3348 		    start * sizeof (hermon_hw_mtt_t),
3349 		    (i - start) * sizeof (hermon_hw_mtt_t),
3350 		    DDI_DMA_SYNC_FORDEV);
3351 
3352 	return (DDI_SUCCESS);
3353 }
3354 
3355 /*
3356  * hermon_mr_fast_mtt_write_fmr()
3357  *    Context: Can be called from interrupt or base context.
3358  */
3359 /* ARGSUSED */
3360 static int
hermon_mr_fast_mtt_write_fmr(hermon_state_t * state,hermon_rsrc_t * mtt,ibt_pmr_attr_t * mem_pattr,uint32_t mtt_pgsize_bits)3361 hermon_mr_fast_mtt_write_fmr(hermon_state_t *state, hermon_rsrc_t *mtt,
3362     ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits)
3363 {
3364 	hermon_icm_table_t	*icm_table;
3365 	hermon_dma_info_t	*dma_info;
3366 	uint32_t		index1, index2, rindx;
3367 	uint64_t		*mtt_table;
3368 	offset_t		i, j;
3369 	uint_t			per_span;
3370 
3371 	icm_table = &state->hs_icm[HERMON_MTT];
3372 	rindx = mtt->hr_indx;
3373 	hermon_index(index1, index2, rindx, icm_table, i);
3374 	per_span   = icm_table->span;
3375 	dma_info   = icm_table->icm_dma[index1] + index2;
3376 	mtt_table  = (uint64_t *)(uintptr_t)dma_info->vaddr;
3377 
3378 	/*
3379 	 * Fill in the MTT table entries
3380 	 */
3381 	for (j = 0; j < mem_pattr->pmr_num_buf; j++) {
3382 		mtt_table[i] = mem_pattr->pmr_addr_list[j].p_laddr;
3383 		i++;
3384 		rindx++;
3385 		if (i == per_span) {
3386 			hermon_index(index1, index2, rindx, icm_table, i);
3387 			dma_info = icm_table->icm_dma[index1] + index2;
3388 			mtt_table = (uint64_t *)(uintptr_t)dma_info->vaddr;
3389 		}
3390 	}
3391 
3392 	return (DDI_SUCCESS);
3393 }
3394 
3395 
3396 /*
3397  * hermon_mtt_refcnt_inc()
3398  *    Context: Can be called from interrupt or base context.
3399  */
3400 static uint_t
hermon_mtt_refcnt_inc(hermon_rsrc_t * rsrc)3401 hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc)
3402 {
3403 	hermon_sw_refcnt_t *rc;
3404 
3405 	rc = (hermon_sw_refcnt_t *)rsrc->hr_addr;
3406 	return (atomic_inc_uint_nv(&rc->swrc_refcnt));
3407 }
3408 
3409 
3410 /*
3411  * hermon_mtt_refcnt_dec()
3412  *    Context: Can be called from interrupt or base context.
3413  */
3414 static uint_t
hermon_mtt_refcnt_dec(hermon_rsrc_t * rsrc)3415 hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc)
3416 {
3417 	hermon_sw_refcnt_t *rc;
3418 
3419 	rc = (hermon_sw_refcnt_t *)rsrc->hr_addr;
3420 	return (atomic_dec_uint_nv(&rc->swrc_refcnt));
3421 }
3422