xref: /titanic_52/usr/src/uts/common/io/ib/adapters/tavor/tavor_umap.c (revision 9e39c5ba00a55fa05777cc94b148296af305e135)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * tavor_umap.c
29  *    Tavor Userland Mapping Routines
30  *
31  *    Implements all the routines necessary for enabling direct userland
32  *    access to the Tavor hardware.  This includes all routines necessary for
33  *    maintaining the "userland resources database" and all the support routines
34  *    for the devmap calls.
35  */
36 
37 #include <sys/types.h>
38 #include <sys/conf.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41 #include <sys/modctl.h>
42 #include <sys/file.h>
43 #include <sys/avl.h>
44 #include <sys/sysmacros.h>
45 
46 #include <sys/ib/adapters/tavor/tavor.h>
47 
48 /* Tavor HCA state pointer (extern) */
49 extern void *tavor_statep;
50 
51 /* Tavor HCA Userland Resource Database (extern) */
52 extern tavor_umap_db_t tavor_userland_rsrc_db;
53 
54 static int tavor_umap_uarpg(tavor_state_t *state, devmap_cookie_t dhp,
55     tavor_rsrc_t *rsrcp, size_t *maplen, int *err);
56 static int tavor_umap_cqmem(tavor_state_t *state, devmap_cookie_t dhp,
57     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
58 static int tavor_umap_qpmem(tavor_state_t *state, devmap_cookie_t dhp,
59     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
60 static int tavor_umap_srqmem(tavor_state_t *state, devmap_cookie_t dhp,
61     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
62 static int tavor_devmap_umem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
63     offset_t off, size_t len, void **pvtp);
64 static int tavor_devmap_umem_dup(devmap_cookie_t dhp, void *pvtp,
65     devmap_cookie_t new_dhp, void **new_pvtp);
66 static void tavor_devmap_umem_unmap(devmap_cookie_t dhp, void *pvtp,
67     offset_t off, size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
68     devmap_cookie_t new_dhp2, void **pvtp2);
69 static int tavor_devmap_devmem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
70     offset_t off, size_t len, void **pvtp);
71 static int tavor_devmap_devmem_dup(devmap_cookie_t dhp, void *pvtp,
72     devmap_cookie_t new_dhp, void **new_pvtp);
73 static void tavor_devmap_devmem_unmap(devmap_cookie_t dhp, void *pvtp,
74     offset_t off, size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
75     devmap_cookie_t new_dhp2, void **pvtp2);
76 static ibt_status_t tavor_umap_mr_data_in(tavor_mrhdl_t mr,
77     ibt_mr_data_in_t *data, size_t data_sz);
78 static ibt_status_t tavor_umap_cq_data_out(tavor_cqhdl_t cq,
79     mlnx_umap_cq_data_out_t *data, size_t data_sz);
80 static ibt_status_t tavor_umap_qp_data_out(tavor_qphdl_t qp,
81     mlnx_umap_qp_data_out_t *data, size_t data_sz);
82 static ibt_status_t tavor_umap_srq_data_out(tavor_srqhdl_t srq,
83     mlnx_umap_srq_data_out_t *data, size_t data_sz);
84 static int tavor_umap_db_compare(const void *query, const void *entry);
85 static ibt_status_t tavor_umap_pd_data_out(tavor_pdhdl_t pd,
86     mlnx_umap_pd_data_out_t *data, size_t data_sz);
87 
88 
89 /*
90  * These callbacks are passed to devmap_umem_setup() and devmap_devmem_setup(),
91  * respectively.  They are used to handle (among other things) partial
92  * unmappings and to provide a method for invalidating mappings inherited
93  * as a result of a fork(2) system call.
94  */
95 static struct devmap_callback_ctl tavor_devmap_umem_cbops = {
96 	DEVMAP_OPS_REV,
97 	tavor_devmap_umem_map,
98 	NULL,
99 	tavor_devmap_umem_dup,
100 	tavor_devmap_umem_unmap
101 };
102 static struct devmap_callback_ctl tavor_devmap_devmem_cbops = {
103 	DEVMAP_OPS_REV,
104 	tavor_devmap_devmem_map,
105 	NULL,
106 	tavor_devmap_devmem_dup,
107 	tavor_devmap_devmem_unmap
108 };
109 
110 /*
111  * tavor_devmap()
112  *    Context: Can be called from user context.
113  */
114 /* ARGSUSED */
115 int
116 tavor_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
117     size_t *maplen, uint_t model)
118 {
119 	tavor_state_t	*state;
120 	tavor_rsrc_t 	*rsrcp;
121 	minor_t		instance;
122 	uint64_t	key, value;
123 	uint_t		type;
124 	int		err, status;
125 
126 	TAVOR_TNF_ENTER(tavor_devmap);
127 
128 	/* Get Tavor softstate structure from instance */
129 	instance = TAVOR_DEV_INSTANCE(dev);
130 	state = ddi_get_soft_state(tavor_statep, instance);
131 	if (state == NULL) {
132 		TNF_PROBE_0(tavor_devmap_gss_fail, TAVOR_TNF_ERROR, "");
133 		TAVOR_TNF_EXIT(tavor_devmap);
134 		return (ENXIO);
135 	}
136 
137 	/*
138 	 * Access to Tavor devmap interface is not allowed in
139 	 * "maintenance mode".
140 	 */
141 	if (state->ts_operational_mode == TAVOR_MAINTENANCE_MODE) {
142 		TNF_PROBE_0(tavor_devmap_maintenance_mode_fail,
143 		    TAVOR_TNF_ERROR, "");
144 		TAVOR_TNF_EXIT(tavor_devmap);
145 		return (EFAULT);
146 	}
147 
148 	/*
149 	 * The bottom bits of "offset" are undefined (number depends on
150 	 * system PAGESIZE).  Shifting these off leaves us with a "key".
151 	 * The "key" is actually a combination of both a real key value
152 	 * (for the purpose of database lookup) and a "type" value.  We
153 	 * extract this information before doing the database lookup.
154 	 */
155 	key  = off >> PAGESHIFT;
156 	type = key & MLNX_UMAP_RSRC_TYPE_MASK;
157 	key  = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
158 	status = tavor_umap_db_find(instance, key, type, &value, 0, NULL);
159 	if (status == DDI_SUCCESS) {
160 		rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
161 
162 		switch (type) {
163 		case MLNX_UMAP_UARPG_RSRC:
164 			/*
165 			 * Double check that process who open()'d Tavor is
166 			 * same process attempting to mmap() UAR page.
167 			 */
168 			if (key != ddi_get_pid()) {
169 				TNF_PROBE_0(tavor_devmap_uarpg_invpid_fail,
170 				    TAVOR_TNF_ERROR, "");
171 				TAVOR_TNF_EXIT(tavor_devmap);
172 				return (EINVAL);
173 			}
174 
175 			/* Map the UAR page out for userland access */
176 			status = tavor_umap_uarpg(state, dhp, rsrcp, maplen,
177 			    &err);
178 			if (status != DDI_SUCCESS) {
179 				TNF_PROBE_0(tavor_devmap_uarpg_map_fail,
180 				    TAVOR_TNF_ERROR, "");
181 				TAVOR_TNF_EXIT(tavor_devmap);
182 				return (err);
183 			}
184 			break;
185 
186 		case MLNX_UMAP_CQMEM_RSRC:
187 			/* Map the CQ memory out for userland access */
188 			status = tavor_umap_cqmem(state, dhp, rsrcp, off,
189 			    maplen, &err);
190 			if (status != DDI_SUCCESS) {
191 				TNF_PROBE_0(tavor_devmap_cqmem_map_fail,
192 				    TAVOR_TNF_ERROR, "");
193 				TAVOR_TNF_EXIT(tavor_devmap);
194 				return (err);
195 			}
196 			break;
197 
198 		case MLNX_UMAP_QPMEM_RSRC:
199 			/* Map the QP memory out for userland access */
200 			status = tavor_umap_qpmem(state, dhp, rsrcp, off,
201 			    maplen, &err);
202 			if (status != DDI_SUCCESS) {
203 				TNF_PROBE_0(tavor_devmap_qpmem_map_fail,
204 				    TAVOR_TNF_ERROR, "");
205 				TAVOR_TNF_EXIT(tavor_devmap);
206 				return (err);
207 			}
208 			break;
209 
210 		case MLNX_UMAP_SRQMEM_RSRC:
211 			/* Map the SRQ memory out for userland access */
212 			status = tavor_umap_srqmem(state, dhp, rsrcp, off,
213 			    maplen, &err);
214 			if (status != DDI_SUCCESS) {
215 				TNF_PROBE_0(tavor_devmap_srqmem_map_fail,
216 				    TAVOR_TNF_ERROR, "");
217 				TAVOR_TNF_EXIT(tavor_devmap);
218 				return (err);
219 			}
220 			break;
221 
222 		default:
223 			TAVOR_WARNING(state, "unexpected rsrc type in devmap");
224 			TNF_PROBE_0(tavor_devmap_invrsrc_fail,
225 			    TAVOR_TNF_ERROR, "");
226 			TAVOR_TNF_EXIT(tavor_devmap);
227 			return (EINVAL);
228 		}
229 	} else {
230 		TNF_PROBE_0(tavor_devmap_umap_lookup_fail, TAVOR_TNF_ERROR, "");
231 		TAVOR_TNF_EXIT(tavor_devmap);
232 		return (EINVAL);
233 	}
234 
235 	TAVOR_TNF_EXIT(tavor_devmap);
236 	return (0);
237 }
238 
239 
240 /*
241  * tavor_umap_uarpg()
242  *    Context: Can be called from user context.
243  */
244 static int
245 tavor_umap_uarpg(tavor_state_t *state, devmap_cookie_t dhp,
246     tavor_rsrc_t *rsrcp, size_t *maplen, int *err)
247 {
248 	int		status;
249 	uint_t		maxprot;
250 
251 	TAVOR_TNF_ENTER(tavor_umap_uarpg);
252 
253 	/* Map out the UAR page (doorbell page) */
254 	maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
255 	status = devmap_devmem_setup(dhp, state->ts_dip,
256 	    &tavor_devmap_devmem_cbops, TAVOR_UAR_BAR, (rsrcp->tr_indx <<
257 	    PAGESHIFT), PAGESIZE, maxprot, DEVMAP_ALLOW_REMAP,
258 	    &state->ts_reg_accattr);
259 	if (status < 0) {
260 		*err = status;
261 		TNF_PROBE_0(tavor_umap_uarpg_devmap_fail, TAVOR_TNF_ERROR, "");
262 		TAVOR_TNF_EXIT(tavor_umap_uarpg);
263 		return (DDI_FAILURE);
264 	}
265 
266 	*maplen = PAGESIZE;
267 	TAVOR_TNF_EXIT(tavor_umap_uarpg);
268 	return (DDI_SUCCESS);
269 }
270 
271 
272 /*
273  * tavor_umap_cqmem()
274  *    Context: Can be called from user context.
275  */
276 /* ARGSUSED */
277 static int
278 tavor_umap_cqmem(tavor_state_t *state, devmap_cookie_t dhp,
279     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
280 {
281 	tavor_cqhdl_t	cq;
282 	size_t		size;
283 	uint_t		maxprot;
284 	int		status;
285 
286 	TAVOR_TNF_ENTER(tavor_umap_cqmem);
287 
288 	/* Extract the Tavor CQ handle pointer from the tavor_rsrc_t */
289 	cq = (tavor_cqhdl_t)rsrcp->tr_addr;
290 
291 	/* Round-up the CQ size to system page size */
292 	size = ptob(btopr(cq->cq_cqinfo.qa_size));
293 
294 	/* Map out the CQ memory */
295 	maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
296 	status = devmap_umem_setup(dhp, state->ts_dip,
297 	    &tavor_devmap_umem_cbops, cq->cq_cqinfo.qa_umemcookie, 0, size,
298 	    maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
299 	if (status < 0) {
300 		*err = status;
301 		TNF_PROBE_0(tavor_umap_cqmem_devmap_fail, TAVOR_TNF_ERROR, "");
302 		TAVOR_TNF_EXIT(tavor_umap_cqmem);
303 		return (DDI_FAILURE);
304 	}
305 	*maplen = size;
306 
307 	TAVOR_TNF_EXIT(tavor_umap_cqmem);
308 	return (DDI_SUCCESS);
309 }
310 
311 
312 /*
313  * tavor_umap_qpmem()
314  *    Context: Can be called from user context.
315  */
316 /* ARGSUSED */
317 static int
318 tavor_umap_qpmem(tavor_state_t *state, devmap_cookie_t dhp,
319     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
320 {
321 	tavor_qphdl_t	qp;
322 	offset_t	offset;
323 	size_t		size;
324 	uint_t		maxprot;
325 	int		status;
326 
327 	TAVOR_TNF_ENTER(tavor_umap_qpmem);
328 
329 	/* Extract the Tavor QP handle pointer from the tavor_rsrc_t */
330 	qp = (tavor_qphdl_t)rsrcp->tr_addr;
331 
332 	/*
333 	 * Calculate the offset of the first work queue (send or recv) into
334 	 * the memory (ddi_umem_alloc()) allocated previously for the QP.
335 	 */
336 	offset = (offset_t)((uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
337 	    (uintptr_t)qp->qp_wqinfo.qa_buf_real);
338 
339 	/* Round-up the QP work queue sizes to system page size */
340 	size = ptob(btopr(qp->qp_wqinfo.qa_size));
341 
342 	/* Map out the QP memory */
343 	maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
344 	status = devmap_umem_setup(dhp, state->ts_dip,
345 	    &tavor_devmap_umem_cbops, qp->qp_wqinfo.qa_umemcookie, offset,
346 	    size, maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
347 	if (status < 0) {
348 		*err = status;
349 		TNF_PROBE_0(tavor_umap_qpmem_devmap_fail, TAVOR_TNF_ERROR, "");
350 		TAVOR_TNF_EXIT(tavor_umap_qpmem);
351 		return (DDI_FAILURE);
352 	}
353 	*maplen = size;
354 
355 	TAVOR_TNF_EXIT(tavor_umap_qpmem);
356 	return (DDI_SUCCESS);
357 }
358 
359 
360 /*
361  * tavor_umap_srqmem()
362  *    Context: Can be called from user context.
363  */
364 /* ARGSUSED */
365 static int
366 tavor_umap_srqmem(tavor_state_t *state, devmap_cookie_t dhp,
367     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
368 {
369 	tavor_srqhdl_t	srq;
370 	offset_t	offset;
371 	size_t		size;
372 	uint_t		maxprot;
373 	int		status;
374 
375 	TAVOR_TNF_ENTER(tavor_umap_srqmem);
376 
377 	/* Extract the Tavor SRQ handle pointer from the tavor_rsrc_t */
378 	srq = (tavor_srqhdl_t)rsrcp->tr_addr;
379 
380 	/*
381 	 * Calculate the offset of the first shared recv queue into the memory
382 	 * (ddi_umem_alloc()) allocated previously for the SRQ.
383 	 */
384 	offset = (offset_t)((uintptr_t)srq->srq_wqinfo.qa_buf_aligned -
385 	    (uintptr_t)srq->srq_wqinfo.qa_buf_real);
386 
387 	/* Round-up the SRQ work queue sizes to system page size */
388 	size = ptob(btopr(srq->srq_wqinfo.qa_size));
389 
390 	/* Map out the QP memory */
391 	maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
392 	status = devmap_umem_setup(dhp, state->ts_dip,
393 	    &tavor_devmap_umem_cbops, srq->srq_wqinfo.qa_umemcookie, offset,
394 	    size, maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
395 	if (status < 0) {
396 		*err = status;
397 		TNF_PROBE_0(tavor_umap_srqmem_devmap_fail, TAVOR_TNF_ERROR, "");
398 		TAVOR_TNF_EXIT(tavor_umap_srqmem);
399 		return (DDI_FAILURE);
400 	}
401 	*maplen = size;
402 
403 	TAVOR_TNF_EXIT(tavor_umap_srqmem);
404 	return (DDI_SUCCESS);
405 }
406 
407 
408 /*
409  * tavor_devmap_umem_map()
410  *    Context: Can be called from kernel context.
411  */
412 /* ARGSUSED */
413 static int
414 tavor_devmap_umem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
415     offset_t off, size_t len, void **pvtp)
416 {
417 	tavor_state_t		*state;
418 	tavor_devmap_track_t	*dvm_track;
419 	tavor_cqhdl_t		cq;
420 	tavor_qphdl_t		qp;
421 	tavor_srqhdl_t		srq;
422 	minor_t			instance;
423 	uint64_t		key;
424 	uint_t			type;
425 
426 	TAVOR_TNF_ENTER(tavor_devmap_umem_map);
427 
428 	/* Get Tavor softstate structure from instance */
429 	instance = TAVOR_DEV_INSTANCE(dev);
430 	state = ddi_get_soft_state(tavor_statep, instance);
431 	if (state == NULL) {
432 		TNF_PROBE_0(tavor_devmap_umem_map_gss_fail, TAVOR_TNF_ERROR,
433 		    "");
434 		TAVOR_TNF_EXIT(tavor_devmap_umem_map);
435 		return (ENXIO);
436 	}
437 
438 	/*
439 	 * The bottom bits of "offset" are undefined (number depends on
440 	 * system PAGESIZE).  Shifting these off leaves us with a "key".
441 	 * The "key" is actually a combination of both a real key value
442 	 * (for the purpose of database lookup) and a "type" value.  Although
443 	 * we are not going to do any database lookup per se, we do want
444 	 * to extract the "key" and the "type" (to enable faster lookup of
445 	 * the appropriate CQ or QP handle).
446 	 */
447 	key  = off >> PAGESHIFT;
448 	type = key & MLNX_UMAP_RSRC_TYPE_MASK;
449 	key  = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
450 
451 	/*
452 	 * Allocate an entry to track the mapping and unmapping (specifically,
453 	 * partial unmapping) of this resource.
454 	 */
455 	dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
456 	    sizeof (tavor_devmap_track_t), KM_SLEEP);
457 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
458 	dvm_track->tdt_offset = off;
459 	dvm_track->tdt_state  = state;
460 	dvm_track->tdt_refcnt = 1;
461 	mutex_init(&dvm_track->tdt_lock, NULL, MUTEX_DRIVER,
462 	    DDI_INTR_PRI(state->ts_intrmsi_pri));
463 
464 	/*
465 	 * Depending of the type of resource that has been mapped out, we
466 	 * need to update the QP or CQ handle to reflect that it has, in
467 	 * fact, been mapped.  This allows the driver code which frees a QP
468 	 * or a CQ to know whether it is appropriate to do a
469 	 * devmap_devmem_remap() to invalidate the userland mapping for the
470 	 * corresponding queue's memory.
471 	 */
472 	if (type == MLNX_UMAP_CQMEM_RSRC) {
473 
474 		/* Use "key" (CQ number) to do fast lookup of CQ handle */
475 		cq = tavor_cqhdl_from_cqnum(state, key);
476 
477 		/*
478 		 * Update the handle to the userland mapping.  Note:  If
479 		 * the CQ already has a valid userland mapping, then stop
480 		 * and return failure.
481 		 */
482 		mutex_enter(&cq->cq_lock);
483 		if (cq->cq_umap_dhp == NULL) {
484 			cq->cq_umap_dhp = dhp;
485 			dvm_track->tdt_size = cq->cq_cqinfo.qa_size;
486 			mutex_exit(&cq->cq_lock);
487 		} else {
488 			mutex_exit(&cq->cq_lock);
489 			goto umem_map_fail;
490 		}
491 
492 	} else if (type == MLNX_UMAP_QPMEM_RSRC) {
493 
494 		/* Use "key" (QP number) to do fast lookup of QP handle */
495 		qp = tavor_qphdl_from_qpnum(state, key);
496 
497 		/*
498 		 * Update the handle to the userland mapping.  Note:  If
499 		 * the CQ already has a valid userland mapping, then stop
500 		 * and return failure.
501 		 */
502 		mutex_enter(&qp->qp_lock);
503 		if (qp->qp_umap_dhp == NULL) {
504 			qp->qp_umap_dhp = dhp;
505 			dvm_track->tdt_size = qp->qp_wqinfo.qa_size;
506 			mutex_exit(&qp->qp_lock);
507 		} else {
508 			mutex_exit(&qp->qp_lock);
509 			goto umem_map_fail;
510 		}
511 
512 	} else if (type == MLNX_UMAP_SRQMEM_RSRC) {
513 
514 		/* Use "key" (SRQ number) to do fast lookup on SRQ handle */
515 		srq = tavor_srqhdl_from_srqnum(state, key);
516 
517 		/*
518 		 * Update the handle to the userland mapping.  Note:  If the
519 		 * SRQ already has a valid userland mapping, then stop and
520 		 * return failure.
521 		 */
522 		mutex_enter(&srq->srq_lock);
523 		if (srq->srq_umap_dhp == NULL) {
524 			srq->srq_umap_dhp = dhp;
525 			dvm_track->tdt_size = srq->srq_wqinfo.qa_size;
526 			mutex_exit(&srq->srq_lock);
527 		} else {
528 			mutex_exit(&srq->srq_lock);
529 			goto umem_map_fail;
530 		}
531 	}
532 
533 	/*
534 	 * Pass the private "Tavor devmap tracking structure" back.  This
535 	 * pointer will be returned in subsequent "unmap" callbacks.
536 	 */
537 	*pvtp = dvm_track;
538 
539 	TAVOR_TNF_EXIT(tavor_devmap_umem_map);
540 	return (DDI_SUCCESS);
541 
542 umem_map_fail:
543 	mutex_destroy(&dvm_track->tdt_lock);
544 	kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
545 	TAVOR_TNF_EXIT(tavor_devmap_umem_map);
546 	return (DDI_FAILURE);
547 }
548 
549 
550 /*
551  * tavor_devmap_umem_dup()
552  *    Context: Can be called from kernel context.
553  */
554 /* ARGSUSED */
555 static int
556 tavor_devmap_umem_dup(devmap_cookie_t dhp, void *pvtp, devmap_cookie_t new_dhp,
557     void **new_pvtp)
558 {
559 	tavor_state_t		*state;
560 	tavor_devmap_track_t	*dvm_track, *new_dvm_track;
561 	uint_t			maxprot;
562 	int			status;
563 
564 	TAVOR_TNF_ENTER(tavor_devmap_umem_dup);
565 
566 	/*
567 	 * Extract the Tavor softstate pointer from "Tavor devmap tracking
568 	 * structure" (in "pvtp").
569 	 */
570 	dvm_track = (tavor_devmap_track_t *)pvtp;
571 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
572 	state = dvm_track->tdt_state;
573 
574 	/*
575 	 * Since this devmap_dup() entry point is generally called
576 	 * when a process does fork(2), it is incumbent upon the driver
577 	 * to insure that the child does not inherit a valid copy of
578 	 * the parent's QP or CQ resource.  This is accomplished by using
579 	 * devmap_devmem_remap() to invalidate the child's mapping to the
580 	 * kernel memory.
581 	 */
582 	maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
583 	status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0,
584 	    dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL);
585 	if (status != DDI_SUCCESS) {
586 		TAVOR_WARNING(state, "failed in tavor_devmap_umem_dup()");
587 		TAVOR_TNF_EXIT(tavor_devmap_umem_dup);
588 		return (status);
589 	}
590 
591 	/*
592 	 * Allocate a new entry to track the subsequent unmapping
593 	 * (specifically, all partial unmappings) of the child's newly
594 	 * invalidated resource.  Note: Setting the "tdt_size" field to
595 	 * zero here is an indication to the devmap_unmap() entry point
596 	 * that this mapping is invalid, and that its subsequent unmapping
597 	 * should not affect any of the parent's CQ or QP resources.
598 	 */
599 	new_dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
600 	    sizeof (tavor_devmap_track_t), KM_SLEEP);
601 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*new_dvm_track))
602 	new_dvm_track->tdt_offset = 0;
603 	new_dvm_track->tdt_state  = state;
604 	new_dvm_track->tdt_refcnt = 1;
605 	new_dvm_track->tdt_size	  = 0;
606 	mutex_init(&new_dvm_track->tdt_lock, NULL, MUTEX_DRIVER,
607 	    DDI_INTR_PRI(state->ts_intrmsi_pri));
608 	*new_pvtp = new_dvm_track;
609 
610 	TAVOR_TNF_EXIT(tavor_devmap_umem_dup);
611 	return (DDI_SUCCESS);
612 }
613 
614 
615 /*
616  * tavor_devmap_umem_unmap()
617  *    Context: Can be called from kernel context.
618  */
619 /* ARGSUSED */
620 static void
621 tavor_devmap_umem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
622     size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
623     devmap_cookie_t new_dhp2, void **pvtp2)
624 {
625 	tavor_state_t 		*state;
626 	tavor_rsrc_t 		*rsrcp;
627 	tavor_devmap_track_t	*dvm_track;
628 	tavor_cqhdl_t		cq;
629 	tavor_qphdl_t		qp;
630 	tavor_srqhdl_t		srq;
631 	uint64_t		key, value;
632 	uint_t			type;
633 	uint_t			size;
634 	int			status;
635 
636 	TAVOR_TNF_ENTER(tavor_devmap_umem_unmap);
637 
638 	/*
639 	 * Extract the Tavor softstate pointer from "Tavor devmap tracking
640 	 * structure" (in "pvtp").
641 	 */
642 	dvm_track = (tavor_devmap_track_t *)pvtp;
643 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
644 	state	  = dvm_track->tdt_state;
645 
646 	/*
647 	 * Extract the "offset" from the "Tavor devmap tracking structure".
648 	 * Note: The input argument "off" is ignored here because the
649 	 * Tavor mapping interfaces define a very specific meaning to
650 	 * each "logical offset".  Also extract the "key" and "type" encoded
651 	 * in the logical offset.
652 	 */
653 	key  = dvm_track->tdt_offset >> PAGESHIFT;
654 	type = key & MLNX_UMAP_RSRC_TYPE_MASK;
655 	key  = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
656 
657 	/*
658 	 * Extract the "size" of the mapping.  If this size is determined
659 	 * to be zero, then it is an indication of a previously invalidated
660 	 * mapping, and no CQ or QP resources should be affected.
661 	 */
662 	size = dvm_track->tdt_size;
663 
664 	/*
665 	 * If only the "middle portion of a given mapping is being unmapped,
666 	 * then we are effectively creating one new piece of mapped memory.
667 	 * (Original region is divided into three pieces of which the middle
668 	 * piece is being removed.  This leaves two pieces.  Since we started
669 	 * with one piece and now have two pieces, we need to increment the
670 	 * counter in the "Tavor devmap tracking structure".
671 	 *
672 	 * If, however, the whole mapped region is being unmapped, then we
673 	 * have started with one region which we are completely removing.
674 	 * In this case, we need to decrement the counter in the "Tavor
675 	 * devmap tracking structure".
676 	 *
677 	 * In each of the remaining cases, we will have started with one
678 	 * mapped region and ended with one (different) region.  So no counter
679 	 * modification is necessary.
680 	 */
681 	mutex_enter(&dvm_track->tdt_lock);
682 	if ((new_dhp1 == NULL) && (new_dhp2 == NULL)) {
683 		dvm_track->tdt_refcnt--;
684 	} else if ((new_dhp1 != NULL) && (new_dhp2 != NULL)) {
685 		dvm_track->tdt_refcnt++;
686 	}
687 	mutex_exit(&dvm_track->tdt_lock);
688 
689 	/*
690 	 * For each of the cases where the region is being divided, then we
691 	 * need to pass back the "Tavor devmap tracking structure".  This way
692 	 * we get it back when each of the remaining pieces is subsequently
693 	 * unmapped.
694 	 */
695 	if (new_dhp1 != NULL) {
696 		*pvtp1 = pvtp;
697 	}
698 	if (new_dhp2 != NULL) {
699 		*pvtp2 = pvtp;
700 	}
701 
702 	/*
703 	 * If the "Tavor devmap tracking structure" is no longer being
704 	 * referenced, then free it up.  Otherwise, return.
705 	 */
706 	if (dvm_track->tdt_refcnt == 0) {
707 		mutex_destroy(&dvm_track->tdt_lock);
708 		kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
709 
710 		/*
711 		 * If the mapping was invalid (see explanation above), then
712 		 * no further processing is necessary.
713 		 */
714 		if (size == 0) {
715 			TAVOR_TNF_EXIT(tavor_devmap_umem_unmap);
716 			return;
717 		}
718 	} else {
719 		TAVOR_TNF_EXIT(tavor_devmap_umem_unmap);
720 		return;
721 	}
722 
723 	/*
724 	 * Now that we can guarantee that the user memory is fully unmapped,
725 	 * we can use the "key" and "type" values to try to find the entry
726 	 * in the "userland resources database".  If it's found, then it
727 	 * indicates that the queue memory (CQ or QP) has not yet been freed.
728 	 * In this case, we update the corresponding CQ or QP handle to
729 	 * indicate that the "devmap_devmem_remap()" call will be unnecessary.
730 	 * If it's _not_ found, then it indicates that the CQ or QP memory
731 	 * was, in fact, freed before it was unmapped (thus requiring a
732 	 * previous invalidation by remapping - which will already have
733 	 * been done in the free routine).
734 	 */
735 	status = tavor_umap_db_find(state->ts_instance, key, type, &value,
736 	    0, NULL);
737 	if (status == DDI_SUCCESS) {
738 		/*
739 		 * Depending on the type of the mapped resource (CQ or QP),
740 		 * update handle to indicate that no invalidation remapping
741 		 * will be necessary.
742 		 */
743 		if (type == MLNX_UMAP_CQMEM_RSRC) {
744 
745 			/* Use "value" to convert to CQ handle */
746 			rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
747 			cq = (tavor_cqhdl_t)rsrcp->tr_addr;
748 
749 			/*
750 			 * Invalidate the handle to the userland mapping.
751 			 * Note: We must ensure that the mapping being
752 			 * unmapped here is the current one for the CQ.  It
753 			 * is possible that it might not be if this CQ has
754 			 * been resized and the previous CQ memory has not
755 			 * yet been unmapped.  But in that case, because of
756 			 * the devmap_devmem_remap(), there is no longer any
757 			 * association between the mapping and the real CQ
758 			 * kernel memory.
759 			 */
760 			mutex_enter(&cq->cq_lock);
761 			if (cq->cq_umap_dhp == dhp) {
762 				cq->cq_umap_dhp = (devmap_cookie_t)NULL;
763 			}
764 			mutex_exit(&cq->cq_lock);
765 
766 		} else if (type == MLNX_UMAP_QPMEM_RSRC) {
767 
768 			/* Use "value" to convert to QP handle */
769 			rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
770 			qp = (tavor_qphdl_t)rsrcp->tr_addr;
771 
772 			/*
773 			 * Invalidate the handle to the userland mapping.
774 			 * Note: we ensure that the mapping being unmapped
775 			 * here is the current one for the QP.  This is
776 			 * more of a sanity check here since, unlike CQs
777 			 * (above) we do not support resize of QPs.
778 			 */
779 			mutex_enter(&qp->qp_lock);
780 			if (qp->qp_umap_dhp == dhp) {
781 				qp->qp_umap_dhp = (devmap_cookie_t)NULL;
782 			}
783 			mutex_exit(&qp->qp_lock);
784 
785 		} else if (type == MLNX_UMAP_SRQMEM_RSRC) {
786 
787 			/* Use "value" to convert to SRQ handle */
788 			rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
789 			srq = (tavor_srqhdl_t)rsrcp->tr_addr;
790 
791 			/*
792 			 * Invalidate the handle to the userland mapping.
793 			 * Note: we ensure that the mapping being unmapped
794 			 * here is the current one for the QP.  This is
795 			 * more of a sanity check here since, unlike CQs
796 			 * (above) we do not support resize of QPs.
797 			 */
798 			mutex_enter(&srq->srq_lock);
799 			if (srq->srq_umap_dhp == dhp) {
800 				srq->srq_umap_dhp = (devmap_cookie_t)NULL;
801 			}
802 			mutex_exit(&srq->srq_lock);
803 		}
804 	}
805 
806 	TAVOR_TNF_EXIT(tavor_devmap_umem_unmap);
807 }
808 
809 
810 /*
811  * tavor_devmap_devmem_map()
812  *    Context: Can be called from kernel context.
813  */
814 /* ARGSUSED */
815 static int
816 tavor_devmap_devmem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
817     offset_t off, size_t len, void **pvtp)
818 {
819 	tavor_state_t		*state;
820 	tavor_devmap_track_t	*dvm_track;
821 	minor_t			instance;
822 
823 	TAVOR_TNF_ENTER(tavor_devmap_devmem_map);
824 
825 	/* Get Tavor softstate structure from instance */
826 	instance = TAVOR_DEV_INSTANCE(dev);
827 	state = ddi_get_soft_state(tavor_statep, instance);
828 	if (state == NULL) {
829 		TNF_PROBE_0(tavor_devmap_devmem_map_gss_fail, TAVOR_TNF_ERROR,
830 		    "");
831 		TAVOR_TNF_EXIT(tavor_devmap_devmem_map);
832 		return (ENXIO);
833 	}
834 
835 	/*
836 	 * Allocate an entry to track the mapping and unmapping of this
837 	 * resource.  Note:  We don't need to initialize the "refcnt" or
838 	 * "offset" fields here, nor do we need to initialize the mutex
839 	 * used with the "refcnt".  Since UAR pages are single pages, they
840 	 * are not subject to "partial" unmappings.  This makes these other
841 	 * fields unnecessary.
842 	 */
843 	dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
844 	    sizeof (tavor_devmap_track_t), KM_SLEEP);
845 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
846 	dvm_track->tdt_state  = state;
847 	dvm_track->tdt_size   = PAGESIZE;
848 
849 	/*
850 	 * Pass the private "Tavor devmap tracking structure" back.  This
851 	 * pointer will be returned in a subsequent "unmap" callback.
852 	 */
853 	*pvtp = dvm_track;
854 
855 	TAVOR_TNF_EXIT(tavor_devmap_devmem_map);
856 	return (DDI_SUCCESS);
857 }
858 
859 
860 /*
861  * tavor_devmap_devmem_dup()
862  *    Context: Can be called from kernel context.
863  */
864 /* ARGSUSED */
865 static int
866 tavor_devmap_devmem_dup(devmap_cookie_t dhp, void *pvtp,
867     devmap_cookie_t new_dhp, void **new_pvtp)
868 {
869 	tavor_state_t		*state;
870 	tavor_devmap_track_t	*dvm_track;
871 	uint_t			maxprot;
872 	int			status;
873 
874 	TAVOR_TNF_ENTER(tavor_devmap_devmem_dup);
875 
876 	/*
877 	 * Extract the Tavor softstate pointer from "Tavor devmap tracking
878 	 * structure" (in "pvtp").  Note: If the tracking structure is NULL
879 	 * here, it means that the mapping corresponds to an invalid mapping.
880 	 * In this case, it can be safely ignored ("new_pvtp" set to NULL).
881 	 */
882 	dvm_track = (tavor_devmap_track_t *)pvtp;
883 	if (dvm_track == NULL) {
884 		*new_pvtp = NULL;
885 		TAVOR_TNF_EXIT(tavor_devmap_devmem_dup);
886 		return (DDI_SUCCESS);
887 	}
888 
889 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
890 	state = dvm_track->tdt_state;
891 
892 	/*
893 	 * Since this devmap_dup() entry point is generally called
894 	 * when a process does fork(2), it is incumbent upon the driver
895 	 * to insure that the child does not inherit a valid copy of
896 	 * the parent's resource.  This is accomplished by using
897 	 * devmap_devmem_remap() to invalidate the child's mapping to the
898 	 * kernel memory.
899 	 */
900 	maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
901 	status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0,
902 	    dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL);
903 	if (status != DDI_SUCCESS) {
904 		TAVOR_WARNING(state, "failed in tavor_devmap_devmem_dup()");
905 		TAVOR_TNF_EXIT(tavor_devmap_devmem_dup);
906 		return (status);
907 	}
908 
909 	/*
910 	 * Since the region is invalid, there is no need for us to
911 	 * allocate and continue to track an additional "Tavor devmap
912 	 * tracking structure".  Instead we return NULL here, which is an
913 	 * indication to the devmap_unmap() entry point that this entry
914 	 * can be safely ignored.
915 	 */
916 	*new_pvtp = NULL;
917 
918 	TAVOR_TNF_EXIT(tavor_devmap_devmem_dup);
919 	return (DDI_SUCCESS);
920 }
921 
922 
923 /*
924  * tavor_devmap_devmem_unmap()
925  *    Context: Can be called from kernel context.
926  */
927 /* ARGSUSED */
928 static void
929 tavor_devmap_devmem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
930     size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
931     devmap_cookie_t new_dhp2, void **pvtp2)
932 {
933 	tavor_devmap_track_t	*dvm_track;
934 
935 	TAVOR_TNF_ENTER(tavor_devmap_devmem_unmap);
936 
937 	/*
938 	 * Free up the "Tavor devmap tracking structure" (in "pvtp").
939 	 * There cannot be "partial" unmappings here because all UAR pages
940 	 * are single pages.  Note: If the tracking structure is NULL here,
941 	 * it means that the mapping corresponds to an invalid mapping.  In
942 	 * this case, it can be safely ignored.
943 	 */
944 	dvm_track = (tavor_devmap_track_t *)pvtp;
945 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
946 	if (dvm_track == NULL) {
947 		TAVOR_TNF_EXIT(tavor_devmap_devmem_unmap);
948 		return;
949 	}
950 
951 	kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
952 	TAVOR_TNF_EXIT(tavor_devmap_devmem_unmap);
953 }
954 
955 
956 /*
957  * tavor_umap_ci_data_in()
958  *    Context: Can be called from user or kernel context.
959  */
960 /* ARGSUSED */
961 ibt_status_t
962 tavor_umap_ci_data_in(tavor_state_t *state, ibt_ci_data_flags_t flags,
963     ibt_object_type_t object, void *hdl, void *data_p, size_t data_sz)
964 {
965 	int	status;
966 
967 	TAVOR_TNF_ENTER(tavor_umap_ci_data_in);
968 
969 	/*
970 	 * Depending on the type of object about which additional information
971 	 * is being provided (currently only MR is supported), we call the
972 	 * appropriate resource-specific function.
973 	 */
974 	switch (object) {
975 	case IBT_HDL_MR:
976 		status = tavor_umap_mr_data_in((tavor_mrhdl_t)hdl,
977 		    (ibt_mr_data_in_t *)data_p, data_sz);
978 		if (status != DDI_SUCCESS) {
979 			TNF_PROBE_0(tavor_umap_mr_data_in_fail,
980 			    TAVOR_TNF_ERROR, "");
981 			TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
982 			return (status);
983 		}
984 		break;
985 
986 	/*
987 	 * For other possible valid IBT types, we return IBT_NOT_SUPPORTED,
988 	 * since the Tavor driver does not support these.
989 	 */
990 	case IBT_HDL_HCA:
991 	case IBT_HDL_QP:
992 	case IBT_HDL_CQ:
993 	case IBT_HDL_PD:
994 	case IBT_HDL_MW:
995 	case IBT_HDL_AH:
996 	case IBT_HDL_SCHED:
997 	case IBT_HDL_EEC:
998 	case IBT_HDL_RDD:
999 	case IBT_HDL_SRQ:
1000 		TNF_PROBE_0(tavor_umap_ci_data_in_unsupp_type,
1001 		    TAVOR_TNF_ERROR, "");
1002 		TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
1003 		return (IBT_NOT_SUPPORTED);
1004 
1005 	/*
1006 	 * Any other types are invalid.
1007 	 */
1008 	default:
1009 		TNF_PROBE_0(tavor_umap_ci_data_in_invtype_fail,
1010 		    TAVOR_TNF_ERROR, "");
1011 		TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
1012 		return (IBT_INVALID_PARAM);
1013 	}
1014 
1015 	TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
1016 	return (DDI_SUCCESS);
1017 }
1018 
1019 
1020 /*
1021  * tavor_umap_mr_data_in()
1022  *    Context: Can be called from user or kernel context.
1023  */
1024 static ibt_status_t
1025 tavor_umap_mr_data_in(tavor_mrhdl_t mr, ibt_mr_data_in_t *data,
1026     size_t data_sz)
1027 {
1028 	TAVOR_TNF_ENTER(tavor_umap_mr_data_in);
1029 
1030 	if (data->mr_rev != IBT_MR_DATA_IN_IF_VERSION) {
1031 		TNF_PROBE_0(tavor_umap_mr_data_in_ver_fail,
1032 		    TAVOR_TNF_ERROR, "");
1033 		TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1034 		return (IBT_NOT_SUPPORTED);
1035 	}
1036 
1037 	/* Check for valid MR handle pointer */
1038 	if (mr == NULL) {
1039 		TNF_PROBE_0(tavor_umap_mr_data_in_invmrhdl_fail,
1040 		    TAVOR_TNF_ERROR, "");
1041 		TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1042 		return (IBT_MR_HDL_INVALID);
1043 	}
1044 
1045 	/* Check for valid MR input structure size */
1046 	if (data_sz < sizeof (ibt_mr_data_in_t)) {
1047 		TNF_PROBE_0(tavor_umap_mr_data_in_invdatasz_fail,
1048 		    TAVOR_TNF_ERROR, "");
1049 		TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1050 		return (IBT_INSUFF_RESOURCE);
1051 	}
1052 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1053 
1054 	/*
1055 	 * Ensure that the MR corresponds to userland memory and that it is
1056 	 * a currently valid memory region as well.
1057 	 */
1058 	mutex_enter(&mr->mr_lock);
1059 	if ((mr->mr_is_umem == 0) || (mr->mr_umemcookie == NULL)) {
1060 		mutex_exit(&mr->mr_lock);
1061 		TNF_PROBE_0(tavor_umap_mr_data_in_invumem_fail,
1062 		    TAVOR_TNF_ERROR, "");
1063 		TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1064 		return (IBT_MR_HDL_INVALID);
1065 	}
1066 
1067 	/*
1068 	 * If it has passed all the above checks, then extract the callback
1069 	 * function and argument from the input structure.  Copy them into
1070 	 * the MR handle.  This function will be called only if the memory
1071 	 * corresponding to the MR handle gets a umem_lockmemory() callback.
1072 	 */
1073 	mr->mr_umem_cbfunc = data->mr_func;
1074 	mr->mr_umem_cbarg1 = data->mr_arg1;
1075 	mr->mr_umem_cbarg2 = data->mr_arg2;
1076 	mutex_exit(&mr->mr_lock);
1077 
1078 	TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1079 	return (DDI_SUCCESS);
1080 }
1081 
1082 
1083 /*
1084  * tavor_umap_ci_data_out()
1085  *    Context: Can be called from user or kernel context.
1086  */
1087 /* ARGSUSED */
1088 ibt_status_t
1089 tavor_umap_ci_data_out(tavor_state_t *state, ibt_ci_data_flags_t flags,
1090     ibt_object_type_t object, void *hdl, void *data_p, size_t data_sz)
1091 {
1092 	int	status;
1093 
1094 	TAVOR_TNF_ENTER(tavor_umap_ci_data_out);
1095 
1096 	/*
1097 	 * Depending on the type of object about which additional information
1098 	 * is being requested (CQ or QP), we call the appropriate resource-
1099 	 * specific mapping function.
1100 	 */
1101 	switch (object) {
1102 	case IBT_HDL_CQ:
1103 		status = tavor_umap_cq_data_out((tavor_cqhdl_t)hdl,
1104 		    (mlnx_umap_cq_data_out_t *)data_p, data_sz);
1105 		if (status != DDI_SUCCESS) {
1106 			TNF_PROBE_0(tavor_umap_cq_data_out_fail,
1107 			    TAVOR_TNF_ERROR, "");
1108 			TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1109 			return (status);
1110 		}
1111 		break;
1112 
1113 	case IBT_HDL_QP:
1114 		status = tavor_umap_qp_data_out((tavor_qphdl_t)hdl,
1115 		    (mlnx_umap_qp_data_out_t *)data_p, data_sz);
1116 		if (status != DDI_SUCCESS) {
1117 			TNF_PROBE_0(tavor_umap_qp_data_out_fail,
1118 			    TAVOR_TNF_ERROR, "");
1119 			TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1120 			return (status);
1121 		}
1122 		break;
1123 
1124 	case IBT_HDL_SRQ:
1125 		status = tavor_umap_srq_data_out((tavor_srqhdl_t)hdl,
1126 		    (mlnx_umap_srq_data_out_t *)data_p, data_sz);
1127 		if (status != DDI_SUCCESS) {
1128 			TNF_PROBE_0(tavor_umap_srq_data_out_fail,
1129 			    TAVOR_TNF_ERROR, "");
1130 			TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1131 			return (status);
1132 		}
1133 		break;
1134 
1135 	/*
1136 	 * For other possible valid IBT types, we return IBT_NOT_SUPPORTED,
1137 	 * since the Tavor driver does not support these.
1138 	 */
1139 	case IBT_HDL_PD:
1140 		status = tavor_umap_pd_data_out((tavor_pdhdl_t)hdl,
1141 		    (mlnx_umap_pd_data_out_t *)data_p, data_sz);
1142 		if (status != DDI_SUCCESS) {
1143 			TNF_PROBE_0(tavor_umap_pd_data_out_fail,
1144 			    TAVOR_TNF_ERROR, "");
1145 			TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1146 			return (status);
1147 		}
1148 		break;
1149 
1150 	case IBT_HDL_HCA:
1151 	case IBT_HDL_MR:
1152 	case IBT_HDL_MW:
1153 	case IBT_HDL_AH:
1154 	case IBT_HDL_SCHED:
1155 	case IBT_HDL_EEC:
1156 	case IBT_HDL_RDD:
1157 		TNF_PROBE_0(tavor_umap_ci_data_out_unsupp_type,
1158 		    TAVOR_TNF_ERROR, "");
1159 		TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1160 		return (IBT_NOT_SUPPORTED);
1161 
1162 	/*
1163 	 * Any other types are invalid.
1164 	 */
1165 	default:
1166 		TNF_PROBE_0(tavor_umap_ci_data_out_invtype_fail,
1167 		    TAVOR_TNF_ERROR, "");
1168 		TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1169 		return (IBT_INVALID_PARAM);
1170 	}
1171 
1172 	TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1173 	return (DDI_SUCCESS);
1174 }
1175 
1176 
1177 /*
1178  * tavor_umap_cq_data_out()
1179  *    Context: Can be called from user or kernel context.
1180  */
1181 static ibt_status_t
1182 tavor_umap_cq_data_out(tavor_cqhdl_t cq, mlnx_umap_cq_data_out_t *data,
1183     size_t data_sz)
1184 {
1185 	TAVOR_TNF_ENTER(tavor_umap_cq_data_out);
1186 
1187 	/* Check for valid CQ handle pointer */
1188 	if (cq == NULL) {
1189 		TNF_PROBE_0(tavor_umap_cq_data_out_invcqhdl_fail,
1190 		    TAVOR_TNF_ERROR, "");
1191 		TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1192 		return (IBT_CQ_HDL_INVALID);
1193 	}
1194 
1195 	/* Check for valid CQ mapping structure size */
1196 	if (data_sz < sizeof (mlnx_umap_cq_data_out_t)) {
1197 		TNF_PROBE_0(tavor_umap_cq_data_out_invdatasz_fail,
1198 		    TAVOR_TNF_ERROR, "");
1199 		TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1200 		return (IBT_INSUFF_RESOURCE);
1201 	}
1202 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1203 
1204 	/*
1205 	 * If it has passed all the above checks, then fill in all the useful
1206 	 * mapping information (including the mapping offset that will be
1207 	 * passed back to the devmap() interface during a subsequent mmap()
1208 	 * call.
1209 	 *
1210 	 * The "offset" for CQ mmap()'s looks like this:
1211 	 * +----------------------------------------+--------+--------------+
1212 	 * |		   CQ Number		    |  0x33  | Reserved (0) |
1213 	 * +----------------------------------------+--------+--------------+
1214 	 *	   (64 - 8 - PAGESHIFT) bits	    8 bits	PAGESHIFT bits
1215 	 *
1216 	 * This returns information about the mapping offset, the length of
1217 	 * the CQ memory, the CQ number (for use in later CQ doorbells), the
1218 	 * number of CQEs the CQ memory can hold, and the size of each CQE.
1219 	 */
1220 	data->mcq_rev		= MLNX_UMAP_IF_VERSION;
1221 	data->mcq_mapoffset	= ((((uint64_t)cq->cq_cqnum <<
1222 	    MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_CQMEM_RSRC) << PAGESHIFT);
1223 	data->mcq_maplen	= cq->cq_cqinfo.qa_size;
1224 	data->mcq_cqnum		= cq->cq_cqnum;
1225 	data->mcq_numcqe	= cq->cq_bufsz;
1226 	data->mcq_cqesz		= sizeof (tavor_hw_cqe_t);
1227 
1228 	TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1229 	return (DDI_SUCCESS);
1230 }
1231 
1232 
1233 /*
1234  * tavor_umap_qp_data_out()
1235  *    Context: Can be called from user or kernel context.
1236  */
1237 static ibt_status_t
1238 tavor_umap_qp_data_out(tavor_qphdl_t qp, mlnx_umap_qp_data_out_t *data,
1239     size_t data_sz)
1240 {
1241 	TAVOR_TNF_ENTER(tavor_umap_qp_data_out);
1242 
1243 	/* Check for valid QP handle pointer */
1244 	if (qp == NULL) {
1245 		TNF_PROBE_0(tavor_umap_qp_data_out_invqphdl_fail,
1246 		    TAVOR_TNF_ERROR, "");
1247 		TAVOR_TNF_EXIT(tavor_umap_qp_data_out);
1248 		return (IBT_QP_HDL_INVALID);
1249 	}
1250 
1251 	/* Check for valid QP mapping structure size */
1252 	if (data_sz < sizeof (mlnx_umap_qp_data_out_t)) {
1253 		TNF_PROBE_0(tavor_umap_qp_data_out_invdatasz_fail,
1254 		    TAVOR_TNF_ERROR, "");
1255 		TAVOR_TNF_EXIT(tavor_umap_qp_data_out);
1256 		return (IBT_INSUFF_RESOURCE);
1257 	}
1258 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1259 
1260 	/*
1261 	 * If it has passed all the checks, then fill in all the useful
1262 	 * mapping information (including the mapping offset that will be
1263 	 * passed back to the devmap() interface during a subsequent mmap()
1264 	 * call.
1265 	 *
1266 	 * The "offset" for QP mmap()'s looks like this:
1267 	 * +----------------------------------------+--------+--------------+
1268 	 * |		   QP Number		    |  0x44  | Reserved (0) |
1269 	 * +----------------------------------------+--------+--------------+
1270 	 *	   (64 - 8 - PAGESHIFT) bits	    8 bits	PAGESHIFT bits
1271 	 *
1272 	 * This returns information about the mapping offset, the length of
1273 	 * the QP memory, and the QP number (for use in later send and recv
1274 	 * doorbells).  It also returns the following information for both
1275 	 * the receive work queue and the send work queue, respectively:  the
1276 	 * offset (from the base mapped address) of the start of the given
1277 	 * work queue, the 64-bit IB virtual address that corresponds to
1278 	 * the base mapped address (needed for posting WQEs though the
1279 	 * QP doorbells), the number of WQEs the given work queue can hold,
1280 	 * and the size of each WQE for the given work queue.
1281 	 */
1282 	data->mqp_rev		= MLNX_UMAP_IF_VERSION;
1283 	data->mqp_mapoffset	= ((((uint64_t)qp->qp_qpnum <<
1284 	    MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_QPMEM_RSRC) << PAGESHIFT);
1285 	data->mqp_maplen	= qp->qp_wqinfo.qa_size;
1286 	data->mqp_qpnum		= qp->qp_qpnum;
1287 
1288 	/*
1289 	 * If this QP is associated with a shared receive queue (SRQ),
1290 	 * then return invalid RecvQ parameters.  Otherwise, return
1291 	 * the proper parameter values.
1292 	 */
1293 	if (qp->qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
1294 		data->mqp_rq_off	= (uint32_t)qp->qp_wqinfo.qa_size;
1295 		data->mqp_rq_desc_addr	= (uint32_t)qp->qp_wqinfo.qa_size;
1296 		data->mqp_rq_numwqe	= 0;
1297 		data->mqp_rq_wqesz	= 0;
1298 	} else {
1299 		data->mqp_rq_off	= (uintptr_t)qp->qp_rq_buf -
1300 		    (uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1301 		data->mqp_rq_desc_addr	= (uint32_t)((uintptr_t)qp->qp_rq_buf -
1302 		    qp->qp_desc_off);
1303 		data->mqp_rq_numwqe	= qp->qp_rq_bufsz;
1304 		data->mqp_rq_wqesz	= (1 << qp->qp_rq_log_wqesz);
1305 	}
1306 	data->mqp_sq_off	= (uintptr_t)qp->qp_sq_buf -
1307 	    (uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1308 	data->mqp_sq_desc_addr	= (uint32_t)((uintptr_t)qp->qp_sq_buf -
1309 	    qp->qp_desc_off);
1310 	data->mqp_sq_numwqe	= qp->qp_sq_bufsz;
1311 	data->mqp_sq_wqesz	= (1 << qp->qp_sq_log_wqesz);
1312 
1313 	TAVOR_TNF_EXIT(tavor_umap_qp_data_out);
1314 	return (DDI_SUCCESS);
1315 }
1316 
1317 
1318 /*
1319  * tavor_umap_srq_data_out()
1320  *    Context: Can be called from user or kernel context.
1321  */
1322 static ibt_status_t
1323 tavor_umap_srq_data_out(tavor_srqhdl_t srq, mlnx_umap_srq_data_out_t *data,
1324     size_t data_sz)
1325 {
1326 	TAVOR_TNF_ENTER(tavor_umap_srq_data_out);
1327 
1328 	/* Check for valid SRQ handle pointer */
1329 	if (srq == NULL) {
1330 		TNF_PROBE_0(tavor_umap_srq_data_out_invsrqhdl_fail,
1331 		    TAVOR_TNF_ERROR, "");
1332 		TAVOR_TNF_EXIT(tavor_umap_srq_data_out);
1333 		return (IBT_SRQ_HDL_INVALID);
1334 	}
1335 
1336 	/* Check for valid SRQ mapping structure size */
1337 	if (data_sz < sizeof (mlnx_umap_srq_data_out_t)) {
1338 		TNF_PROBE_0(tavor_umap_srq_data_out_invdatasz_fail,
1339 		    TAVOR_TNF_ERROR, "");
1340 		TAVOR_TNF_EXIT(tavor_umap_srq_data_out);
1341 		return (IBT_INSUFF_RESOURCE);
1342 	}
1343 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1344 
1345 	/*
1346 	 * If it has passed all the checks, then fill in all the useful
1347 	 * mapping information (including the mapping offset that will be
1348 	 * passed back to the devmap() interface during a subsequent mmap()
1349 	 * call.
1350 	 *
1351 	 * The "offset" for SRQ mmap()'s looks like this:
1352 	 * +----------------------------------------+--------+--------------+
1353 	 * |		   SRQ Number		    |  0x66  | Reserved (0) |
1354 	 * +----------------------------------------+--------+--------------+
1355 	 *	   (64 - 8 - PAGESHIFT) bits	    8 bits	PAGESHIFT bits
1356 	 *
1357 	 * This returns information about the mapping offset, the length of the
1358 	 * SRQ memory, and the SRQ number (for use in later send and recv
1359 	 * doorbells).  It also returns the following information for the
1360 	 * shared receive queue: the offset (from the base mapped address) of
1361 	 * the start of the given work queue, the 64-bit IB virtual address
1362 	 * that corresponds to the base mapped address (needed for posting WQEs
1363 	 * though the QP doorbells), the number of WQEs the given work queue
1364 	 * can hold, and the size of each WQE for the given work queue.
1365 	 */
1366 	data->msrq_rev		= MLNX_UMAP_IF_VERSION;
1367 	data->msrq_mapoffset	= ((((uint64_t)srq->srq_srqnum <<
1368 	    MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_SRQMEM_RSRC) << PAGESHIFT);
1369 	data->msrq_maplen	= srq->srq_wqinfo.qa_size;
1370 	data->msrq_srqnum	= srq->srq_srqnum;
1371 
1372 	data->msrq_desc_addr	= (uint32_t)((uintptr_t)srq->srq_wq_buf -
1373 	    srq->srq_desc_off);
1374 	data->msrq_numwqe	= srq->srq_wq_bufsz;
1375 	data->msrq_wqesz	= (1 << srq->srq_wq_log_wqesz);
1376 
1377 	TAVOR_TNF_EXIT(tavor_umap_srq_data_out);
1378 	return (DDI_SUCCESS);
1379 }
1380 
1381 /*
1382  * tavor_umap_pd_data_out()
1383  *    Context: Can be called from user or kernel context.
1384  */
1385 static ibt_status_t
1386 tavor_umap_pd_data_out(tavor_pdhdl_t pd, mlnx_umap_pd_data_out_t *data,
1387     size_t data_sz)
1388 {
1389 	TAVOR_TNF_ENTER(tavor_umap_pd_data_out);
1390 
1391 	/* Check for valid PD handle pointer */
1392 	if (pd == NULL) {
1393 		TNF_PROBE_0(tavor_umap_pd_data_out_invpdhdl_fail,
1394 		    TAVOR_TNF_ERROR, "");
1395 		TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1396 		return (IBT_PD_HDL_INVALID);
1397 	}
1398 
1399 	/* Check for valid PD mapping structure size */
1400 	if (data_sz < sizeof (mlnx_umap_pd_data_out_t)) {
1401 		TNF_PROBE_0(tavor_umap_pd_data_out_invdatasz_fail,
1402 		    TAVOR_TNF_ERROR, "");
1403 		TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1404 		return (IBT_INSUFF_RESOURCE);
1405 	}
1406 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1407 
1408 	/*
1409 	 * If it has passed all the checks, then fill the PD table index
1410 	 * (the PD table allocated index for the PD pd_pdnum)
1411 	 */
1412 	data->mpd_rev	= MLNX_UMAP_IF_VERSION;
1413 	data->mpd_pdnum	= pd->pd_pdnum;
1414 
1415 	TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1416 	return (DDI_SUCCESS);
1417 }
1418 
1419 /*
1420  * tavor_umap_db_init()
1421  *    Context: Only called from attach() path context
1422  */
1423 void
1424 tavor_umap_db_init(void)
1425 {
1426 	TAVOR_TNF_ENTER(tavor_umap_db_init);
1427 
1428 	/*
1429 	 * Initialize the lock used by the Tavor "userland resources database"
1430 	 * This is used to ensure atomic access to add, remove, and find
1431 	 * entries in the database.
1432 	 */
1433 	mutex_init(&tavor_userland_rsrc_db.tdl_umapdb_lock, NULL,
1434 	    MUTEX_DRIVER, NULL);
1435 
1436 	/*
1437 	 * Initialize the AVL tree used for the "userland resources
1438 	 * database".  Using an AVL tree here provides the ability to
1439 	 * scale the database size to large numbers of resources.  The
1440 	 * entries in the tree are "tavor_umap_db_entry_t".
1441 	 * The tree is searched with the help of the
1442 	 * tavor_umap_db_compare() routine.
1443 	 */
1444 	avl_create(&tavor_userland_rsrc_db.tdl_umapdb_avl,
1445 	    tavor_umap_db_compare, sizeof (tavor_umap_db_entry_t),
1446 	    offsetof(tavor_umap_db_entry_t, tdbe_avlnode));
1447 
1448 	TAVOR_TNF_EXIT(tavor_umap_db_init);
1449 }
1450 
1451 
1452 /*
1453  * tavor_umap_db_fini()
1454  *    Context: Only called from attach() and/or detach() path contexts
1455  */
1456 void
1457 tavor_umap_db_fini(void)
1458 {
1459 	TAVOR_TNF_ENTER(tavor_umap_db_fini);
1460 
1461 	/* Destroy the AVL tree for the "userland resources database" */
1462 	avl_destroy(&tavor_userland_rsrc_db.tdl_umapdb_avl);
1463 
1464 	/* Destroy the lock for the "userland resources database" */
1465 	mutex_destroy(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1466 
1467 	TAVOR_TNF_EXIT(tavor_umap_db_fini);
1468 }
1469 
1470 
1471 /*
1472  * tavor_umap_db_alloc()
1473  *    Context: Can be called from user or kernel context.
1474  */
1475 tavor_umap_db_entry_t *
1476 tavor_umap_db_alloc(uint_t instance, uint64_t key, uint_t type, uint64_t value)
1477 {
1478 	tavor_umap_db_entry_t	*umapdb;
1479 
1480 	TAVOR_TNF_ENTER(tavor_umap_db_alloc);
1481 
1482 	/* Allocate an entry to add to the "userland resources database" */
1483 	umapdb = kmem_zalloc(sizeof (tavor_umap_db_entry_t), KM_NOSLEEP);
1484 	if (umapdb == NULL) {
1485 		TNF_PROBE_0(tavor_umap_db_alloc_kmz_fail, TAVOR_TNF_ERROR, "");
1486 		TAVOR_TNF_EXIT(tavor_umap_db_alloc);
1487 		return (NULL);
1488 	}
1489 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb))
1490 
1491 	/* Fill in the fields in the database entry */
1492 	umapdb->tdbe_common.tdb_instance  = instance;
1493 	umapdb->tdbe_common.tdb_type	  = type;
1494 	umapdb->tdbe_common.tdb_key	  = key;
1495 	umapdb->tdbe_common.tdb_value	  = value;
1496 
1497 	TAVOR_TNF_EXIT(tavor_umap_db_alloc);
1498 	return (umapdb);
1499 }
1500 
1501 
1502 /*
1503  * tavor_umap_db_free()
1504  *    Context: Can be called from user or kernel context.
1505  */
1506 void
1507 tavor_umap_db_free(tavor_umap_db_entry_t *umapdb)
1508 {
1509 	TAVOR_TNF_ENTER(tavor_umap_db_free);
1510 
1511 	/* Free the database entry */
1512 	kmem_free(umapdb, sizeof (tavor_umap_db_entry_t));
1513 
1514 	TAVOR_TNF_EXIT(tavor_umap_db_free);
1515 }
1516 
1517 
1518 /*
1519  * tavor_umap_db_add()
1520  *    Context: Can be called from user or kernel context.
1521  */
1522 void
1523 tavor_umap_db_add(tavor_umap_db_entry_t *umapdb)
1524 {
1525 	TAVOR_TNF_ENTER(tavor_umap_db_add);
1526 
1527 	mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1528 	tavor_umap_db_add_nolock(umapdb);
1529 	mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1530 
1531 	TAVOR_TNF_EXIT(tavor_umap_db_add);
1532 }
1533 
1534 
1535 /*
1536  * tavor_umap_db_add_nolock()
1537  *    Context: Can be called from user or kernel context.
1538  */
1539 void
1540 tavor_umap_db_add_nolock(tavor_umap_db_entry_t *umapdb)
1541 {
1542 	tavor_umap_db_query_t	query;
1543 	avl_index_t		where;
1544 
1545 	TAVOR_TNF_ENTER(tavor_umap_db_add_nolock);
1546 
1547 	ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1548 
1549 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb))
1550 
1551 	/*
1552 	 * Copy the common portion of the "to-be-added" database entry
1553 	 * into the "tavor_umap_db_query_t" structure.  We use this structure
1554 	 * (with no flags set) to find the appropriate location in the
1555 	 * "userland resources database" for the new entry to be added.
1556 	 *
1557 	 * Note: we expect that this entry should not be found in the
1558 	 * database (unless something bad has happened).
1559 	 */
1560 	query.tqdb_common = umapdb->tdbe_common;
1561 	query.tqdb_flags  = 0;
1562 	(void) avl_find(&tavor_userland_rsrc_db.tdl_umapdb_avl, &query,
1563 	    &where);
1564 
1565 	/*
1566 	 * Now, using the "where" field from the avl_find() operation
1567 	 * above, we will insert the new database entry ("umapdb").
1568 	 */
1569 	avl_insert(&tavor_userland_rsrc_db.tdl_umapdb_avl, umapdb,
1570 	    where);
1571 
1572 	TAVOR_TNF_EXIT(tavor_umap_db_add_nolock);
1573 }
1574 
1575 
1576 /*
1577  * tavor_umap_db_find()
1578  *    Context: Can be called from user or kernel context.
1579  */
1580 int
1581 tavor_umap_db_find(uint_t instance, uint64_t key, uint_t type,
1582     uint64_t *value, uint_t flag, tavor_umap_db_entry_t	**umapdb)
1583 {
1584 	int	status;
1585 
1586 	TAVOR_TNF_ENTER(tavor_umap_db_find);
1587 
1588 	mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1589 	status = tavor_umap_db_find_nolock(instance, key, type, value, flag,
1590 	    umapdb);
1591 	mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1592 
1593 	TAVOR_TNF_EXIT(tavor_umap_db_find);
1594 	return (status);
1595 }
1596 
1597 
1598 /*
1599  * tavor_umap_db_find_nolock()
1600  *    Context: Can be called from user or kernel context.
1601  */
1602 int
1603 tavor_umap_db_find_nolock(uint_t instance, uint64_t key, uint_t type,
1604     uint64_t *value, uint_t flags, tavor_umap_db_entry_t **umapdb)
1605 {
1606 	tavor_umap_db_query_t	query;
1607 	tavor_umap_db_entry_t	*entry;
1608 	avl_index_t		where;
1609 
1610 	TAVOR_TNF_ENTER(tavor_umap_db_find_nolock);
1611 
1612 	ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1613 
1614 	/*
1615 	 * Fill in key, type, instance, and flags values of the
1616 	 * tavor_umap_db_query_t in preparation for the database
1617 	 * lookup.
1618 	 */
1619 	query.tqdb_flags		= flags;
1620 	query.tqdb_common.tdb_key	= key;
1621 	query.tqdb_common.tdb_type	= type;
1622 	query.tqdb_common.tdb_instance	= instance;
1623 
1624 	/*
1625 	 * Perform the database query.  If no entry is found, then
1626 	 * return failure, else continue.
1627 	 */
1628 	entry = (tavor_umap_db_entry_t *)avl_find(
1629 	    &tavor_userland_rsrc_db.tdl_umapdb_avl, &query, &where);
1630 	if (entry == NULL) {
1631 		TAVOR_TNF_EXIT(tavor_umap_db_find_nolock);
1632 		return (DDI_FAILURE);
1633 	}
1634 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*entry))
1635 
1636 	/*
1637 	 * If the flags argument specifies that the entry should
1638 	 * be removed if found, then call avl_remove() to remove
1639 	 * the entry from the database.
1640 	 */
1641 	if (flags & TAVOR_UMAP_DB_REMOVE) {
1642 
1643 		avl_remove(&tavor_userland_rsrc_db.tdl_umapdb_avl, entry);
1644 
1645 		/*
1646 		 * The database entry is returned with the expectation
1647 		 * that the caller will use tavor_umap_db_free() to
1648 		 * free the entry's memory.  ASSERT that this is non-NULL.
1649 		 * NULL pointer should never be passed for the
1650 		 * TAVOR_UMAP_DB_REMOVE case.
1651 		 */
1652 		ASSERT(umapdb != NULL);
1653 	}
1654 
1655 	/*
1656 	 * If the caller would like visibility to the database entry
1657 	 * (indicated through the use of a non-NULL "umapdb" argument),
1658 	 * then fill it in.
1659 	 */
1660 	if (umapdb != NULL) {
1661 		*umapdb = entry;
1662 	}
1663 
1664 	/* Extract value field from database entry and return success */
1665 	*value = entry->tdbe_common.tdb_value;
1666 
1667 	TAVOR_TNF_EXIT(tavor_umap_db_find_nolock);
1668 	return (DDI_SUCCESS);
1669 }
1670 
1671 
1672 /*
1673  * tavor_umap_umemlock_cb()
1674  *    Context: Can be called from callback context.
1675  */
1676 void
1677 tavor_umap_umemlock_cb(ddi_umem_cookie_t *umem_cookie)
1678 {
1679 	tavor_umap_db_entry_t	*umapdb;
1680 	tavor_state_t		*state;
1681 	tavor_rsrc_t 		*rsrcp;
1682 	tavor_mrhdl_t		mr;
1683 	uint64_t		value;
1684 	uint_t			instance;
1685 	int			status;
1686 	void			(*mr_callback)(void *, void *);
1687 	void			*mr_cbarg1, *mr_cbarg2;
1688 
1689 	TAVOR_TNF_ENTER(tavor_umap_umemlock_cb);
1690 
1691 	/*
1692 	 * If this was userland memory, then we need to remove its entry
1693 	 * from the "userland resources database".  Note:  We use the
1694 	 * TAVOR_UMAP_DB_IGNORE_INSTANCE flag here because we don't know
1695 	 * which instance was used when the entry was added (but we want
1696 	 * to know after the entry is found using the other search criteria).
1697 	 */
1698 	status = tavor_umap_db_find(0, (uint64_t)(uintptr_t)umem_cookie,
1699 	    MLNX_UMAP_MRMEM_RSRC, &value, (TAVOR_UMAP_DB_REMOVE |
1700 	    TAVOR_UMAP_DB_IGNORE_INSTANCE), &umapdb);
1701 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb))
1702 	if (status == DDI_SUCCESS) {
1703 		instance = umapdb->tdbe_common.tdb_instance;
1704 		state = ddi_get_soft_state(tavor_statep, instance);
1705 		if (state == NULL) {
1706 			cmn_err(CE_WARN, "Unable to match Tavor instance\n");
1707 			TNF_PROBE_0(tavor_umap_umemlock_cb_gss_fail,
1708 			    TAVOR_TNF_ERROR, "");
1709 			TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1710 			return;
1711 		}
1712 
1713 		/* Free the database entry */
1714 		tavor_umap_db_free(umapdb);
1715 
1716 		/* Use "value" to convert to an MR handle */
1717 		rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
1718 		mr = (tavor_mrhdl_t)rsrcp->tr_addr;
1719 
1720 		/*
1721 		 * If a callback has been provided, call it first.  This
1722 		 * callback is expected to do any cleanup necessary to
1723 		 * guarantee that the subsequent MR deregister (below)
1724 		 * will succeed.  Specifically, this means freeing up memory
1725 		 * windows which might have been associated with the MR.
1726 		 */
1727 		mutex_enter(&mr->mr_lock);
1728 		mr_callback = mr->mr_umem_cbfunc;
1729 		mr_cbarg1   = mr->mr_umem_cbarg1;
1730 		mr_cbarg2   = mr->mr_umem_cbarg2;
1731 		mutex_exit(&mr->mr_lock);
1732 		if (mr_callback != NULL) {
1733 			mr_callback(mr_cbarg1, mr_cbarg2);
1734 		}
1735 
1736 		/*
1737 		 * Then call tavor_mr_deregister() to release the resources
1738 		 * associated with the MR handle.  Note: Because this routine
1739 		 * will also check for whether the ddi_umem_cookie_t is in the
1740 		 * database, it will take responsibility for disabling the
1741 		 * memory region and calling ddi_umem_unlock().
1742 		 */
1743 		status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
1744 		    TAVOR_SLEEP);
1745 		if (status != DDI_SUCCESS) {
1746 			TAVOR_WARNING(state, "Unexpected failure in "
1747 			    "deregister from callback\n");
1748 			TNF_PROBE_0(tavor_umap_umemlock_cb_dereg_fail,
1749 			    TAVOR_TNF_ERROR, "");
1750 			TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1751 		}
1752 	}
1753 
1754 	TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1755 }
1756 
1757 
1758 /*
1759  * tavor_umap_db_compare()
1760  *    Context: Can be called from user or kernel context.
1761  */
1762 static int
1763 tavor_umap_db_compare(const void *q, const void *e)
1764 {
1765 	tavor_umap_db_common_t	*entry_common, *query_common;
1766 	uint_t			query_flags;
1767 
1768 	TAVOR_TNF_ENTER(tavor_umap_db_compare);
1769 
1770 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*((tavor_umap_db_query_t *)q)))
1771 
1772 	entry_common = &((tavor_umap_db_entry_t *)e)->tdbe_common;
1773 	query_common = &((tavor_umap_db_query_t *)q)->tqdb_common;
1774 	query_flags  = ((tavor_umap_db_query_t *)q)->tqdb_flags;
1775 
1776 	/*
1777 	 * The first comparison is done on the "key" value in "query"
1778 	 * and "entry".  If they are not equal, then the appropriate
1779 	 * search direction is returned.  Else, we continue by
1780 	 * comparing "type".
1781 	 */
1782 	if (query_common->tdb_key < entry_common->tdb_key) {
1783 		TAVOR_TNF_EXIT(tavor_umap_db_compare);
1784 		return (-1);
1785 	} else if (query_common->tdb_key > entry_common->tdb_key) {
1786 		TAVOR_TNF_EXIT(tavor_umap_db_compare);
1787 		return (+1);
1788 	}
1789 
1790 	/*
1791 	 * If the search reaches this point, then "query" and "entry"
1792 	 * have equal key values.  So we continue be comparing their
1793 	 * "type" values.  Again, if they are not equal, then the
1794 	 * appropriate search direction is returned.  Else, we continue
1795 	 * by comparing "instance".
1796 	 */
1797 	if (query_common->tdb_type < entry_common->tdb_type) {
1798 		TAVOR_TNF_EXIT(tavor_umap_db_compare);
1799 		return (-1);
1800 	} else if (query_common->tdb_type > entry_common->tdb_type) {
1801 		TAVOR_TNF_EXIT(tavor_umap_db_compare);
1802 		return (+1);
1803 	}
1804 
1805 	/*
1806 	 * If the search reaches this point, then "query" and "entry"
1807 	 * have exactly the same key and type values.  Now we consult
1808 	 * the "flags" field in the query to determine whether the
1809 	 * "instance" is relevant to the search.  If the
1810 	 * TAVOR_UMAP_DB_IGNORE_INSTANCE flags is set, then return
1811 	 * success (0) here.  Otherwise, continue the search by comparing
1812 	 * instance values and returning the appropriate search direction.
1813 	 */
1814 	if (query_flags & TAVOR_UMAP_DB_IGNORE_INSTANCE) {
1815 		TAVOR_TNF_EXIT(tavor_umap_db_compare);
1816 		return (0);
1817 	}
1818 
1819 	/*
1820 	 * If the search has reached this point, then "query" and "entry"
1821 	 * can only be differentiated by their instance values.  If these
1822 	 * are not equal, then return the appropriate search direction.
1823 	 * Else, we return success (0).
1824 	 */
1825 	if (query_common->tdb_instance < entry_common->tdb_instance) {
1826 		TAVOR_TNF_EXIT(tavor_umap_db_compare);
1827 		return (-1);
1828 	} else if (query_common->tdb_instance > entry_common->tdb_instance) {
1829 		TAVOR_TNF_EXIT(tavor_umap_db_compare);
1830 		return (+1);
1831 	}
1832 
1833 	/* Everything matches... so return success */
1834 	TAVOR_TNF_EXIT(tavor_umap_db_compare);
1835 	return (0);
1836 }
1837 
1838 
1839 /*
1840  * tavor_umap_db_set_onclose_cb()
1841  *    Context: Can be called from user or kernel context.
1842  */
1843 int
1844 tavor_umap_db_set_onclose_cb(dev_t dev, uint64_t flag,
1845     void (*callback)(void *), void *arg)
1846 {
1847 	tavor_umap_db_priv_t	*priv;
1848 	tavor_umap_db_entry_t	*umapdb;
1849 	minor_t			instance;
1850 	uint64_t		value;
1851 	int			status;
1852 
1853 	TAVOR_TNF_ENTER(tavor_umap_db_set_onclose_cb);
1854 
1855 	instance = TAVOR_DEV_INSTANCE(dev);
1856 	if (instance == -1) {
1857 		TNF_PROBE_0(tavor_umap_db_set_onclose_cb_inst_fail,
1858 		    TAVOR_TNF_ERROR, "");
1859 		TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1860 		return (DDI_FAILURE);
1861 	}
1862 
1863 	if (flag != TAVOR_ONCLOSE_FLASH_INPROGRESS) {
1864 		TNF_PROBE_0(tavor_umap_db_set_onclose_cb_invflag_fail,
1865 		    TAVOR_TNF_ERROR, "");
1866 		TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1867 		return (DDI_FAILURE);
1868 	}
1869 
1870 	/*
1871 	 * Grab the lock for the "userland resources database" and find
1872 	 * the entry corresponding to this minor number.  Once it's found,
1873 	 * allocate (if necessary) and add an entry (in the "tdb_priv"
1874 	 * field) to indicate that further processing may be needed during
1875 	 * Tavor's close() handling.
1876 	 */
1877 	mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1878 	status = tavor_umap_db_find_nolock(instance, dev,
1879 	    MLNX_UMAP_PID_RSRC, &value, 0, &umapdb);
1880 	if (status != DDI_SUCCESS) {
1881 		TNF_PROBE_0(tavor_umap_db_set_onclose_cb_find_fail,
1882 		    TAVOR_TNF_ERROR, "");
1883 		mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1884 		TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1885 		return (DDI_FAILURE);
1886 	}
1887 
1888 	priv = (tavor_umap_db_priv_t *)umapdb->tdbe_common.tdb_priv;
1889 	if (priv == NULL) {
1890 		priv = (tavor_umap_db_priv_t *)kmem_zalloc(
1891 		    sizeof (tavor_umap_db_priv_t), KM_NOSLEEP);
1892 		if (priv == NULL) {
1893 			TNF_PROBE_0(tavor_umap_db_set_onclose_cb_kmz_fail,
1894 			    TAVOR_TNF_ERROR, "");
1895 			mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1896 			TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1897 			return (DDI_FAILURE);
1898 		}
1899 	}
1900 
1901 	/*
1902 	 * Save away the callback and argument to be used during Tavor's
1903 	 * close() processing.
1904 	 */
1905 	priv->tdp_cb	= callback;
1906 	priv->tdp_arg	= arg;
1907 
1908 	umapdb->tdbe_common.tdb_priv = (void *)priv;
1909 	mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1910 
1911 	TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1912 	return (DDI_SUCCESS);
1913 }
1914 
1915 
1916 /*
1917  * tavor_umap_db_clear_onclose_cb()
1918  *    Context: Can be called from user or kernel context.
1919  */
1920 int
1921 tavor_umap_db_clear_onclose_cb(dev_t dev, uint64_t flag)
1922 {
1923 	tavor_umap_db_priv_t	*priv;
1924 	tavor_umap_db_entry_t	*umapdb;
1925 	minor_t			instance;
1926 	uint64_t		value;
1927 	int			status;
1928 
1929 	TAVOR_TNF_ENTER(tavor_umap_db_set_onclose_cb);
1930 
1931 	instance = TAVOR_DEV_INSTANCE(dev);
1932 	if (instance == -1) {
1933 		TNF_PROBE_0(tavor_umap_db_clear_onclose_cb_inst_fail,
1934 		    TAVOR_TNF_ERROR, "");
1935 		TAVOR_TNF_EXIT(tavor_umap_db_clear_onclose_cb);
1936 		return (DDI_FAILURE);
1937 	}
1938 
1939 	if (flag != TAVOR_ONCLOSE_FLASH_INPROGRESS) {
1940 		TNF_PROBE_0(tavor_umap_db_clear_onclose_cb_invflag_fail,
1941 		    TAVOR_TNF_ERROR, "");
1942 		TAVOR_TNF_EXIT(tavor_umap_db_clear_onclose_cb);
1943 		return (DDI_FAILURE);
1944 	}
1945 
1946 	/*
1947 	 * Grab the lock for the "userland resources database" and find
1948 	 * the entry corresponding to this minor number.  Once it's found,
1949 	 * remove the entry (in the "tdb_priv" field) that indicated the
1950 	 * need for further processing during Tavor's close().  Free the
1951 	 * entry, if appropriate.
1952 	 */
1953 	mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1954 	status = tavor_umap_db_find_nolock(instance, dev,
1955 	    MLNX_UMAP_PID_RSRC, &value, 0, &umapdb);
1956 	if (status != DDI_SUCCESS) {
1957 		TNF_PROBE_0(tavor_umap_db_clear_onclose_cb_find_fail,
1958 		    TAVOR_TNF_ERROR, "");
1959 		mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1960 		TAVOR_TNF_EXIT(tavor_umap_db_clear_onclose_cb);
1961 		return (DDI_FAILURE);
1962 	}
1963 
1964 	priv = (tavor_umap_db_priv_t *)umapdb->tdbe_common.tdb_priv;
1965 	if (priv != NULL) {
1966 		kmem_free(priv, sizeof (tavor_umap_db_priv_t));
1967 		priv = NULL;
1968 	}
1969 
1970 	umapdb->tdbe_common.tdb_priv = (void *)priv;
1971 	mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1972 	return (DDI_SUCCESS);
1973 }
1974 
1975 
1976 /*
1977  * tavor_umap_db_clear_onclose_cb()
1978  *    Context: Can be called from user or kernel context.
1979  */
1980 void
1981 tavor_umap_db_handle_onclose_cb(tavor_umap_db_priv_t *priv)
1982 {
1983 	void	(*callback)(void *);
1984 
1985 	ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1986 
1987 	/*
1988 	 * Call the callback.
1989 	 *    Note: Currently there is only one callback (in "tdp_cb"), but
1990 	 *    in the future there may be more, depending on what other types
1991 	 *    of interaction there are between userland processes and the
1992 	 *    driver.
1993 	 */
1994 	callback = priv->tdp_cb;
1995 	callback(priv->tdp_arg);
1996 }
1997