1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * tavor_umap.c
29 * Tavor Userland Mapping Routines
30 *
31 * Implements all the routines necessary for enabling direct userland
32 * access to the Tavor hardware. This includes all routines necessary for
33 * maintaining the "userland resources database" and all the support routines
34 * for the devmap calls.
35 */
36
37 #include <sys/types.h>
38 #include <sys/conf.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41 #include <sys/modctl.h>
42 #include <sys/file.h>
43 #include <sys/avl.h>
44 #include <sys/sysmacros.h>
45
46 #include <sys/ib/adapters/tavor/tavor.h>
47
48 /* Tavor HCA state pointer (extern) */
49 extern void *tavor_statep;
50
51 /* Tavor HCA Userland Resource Database (extern) */
52 extern tavor_umap_db_t tavor_userland_rsrc_db;
53
54 static int tavor_umap_uarpg(tavor_state_t *state, devmap_cookie_t dhp,
55 tavor_rsrc_t *rsrcp, size_t *maplen, int *err);
56 static int tavor_umap_cqmem(tavor_state_t *state, devmap_cookie_t dhp,
57 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
58 static int tavor_umap_qpmem(tavor_state_t *state, devmap_cookie_t dhp,
59 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
60 static int tavor_umap_srqmem(tavor_state_t *state, devmap_cookie_t dhp,
61 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
62 static int tavor_devmap_umem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
63 offset_t off, size_t len, void **pvtp);
64 static int tavor_devmap_umem_dup(devmap_cookie_t dhp, void *pvtp,
65 devmap_cookie_t new_dhp, void **new_pvtp);
66 static void tavor_devmap_umem_unmap(devmap_cookie_t dhp, void *pvtp,
67 offset_t off, size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
68 devmap_cookie_t new_dhp2, void **pvtp2);
69 static int tavor_devmap_devmem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
70 offset_t off, size_t len, void **pvtp);
71 static int tavor_devmap_devmem_dup(devmap_cookie_t dhp, void *pvtp,
72 devmap_cookie_t new_dhp, void **new_pvtp);
73 static void tavor_devmap_devmem_unmap(devmap_cookie_t dhp, void *pvtp,
74 offset_t off, size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
75 devmap_cookie_t new_dhp2, void **pvtp2);
76 static ibt_status_t tavor_umap_mr_data_in(tavor_mrhdl_t mr,
77 ibt_mr_data_in_t *data, size_t data_sz);
78 static ibt_status_t tavor_umap_cq_data_out(tavor_cqhdl_t cq,
79 mlnx_umap_cq_data_out_t *data, size_t data_sz);
80 static ibt_status_t tavor_umap_qp_data_out(tavor_qphdl_t qp,
81 mlnx_umap_qp_data_out_t *data, size_t data_sz);
82 static ibt_status_t tavor_umap_srq_data_out(tavor_srqhdl_t srq,
83 mlnx_umap_srq_data_out_t *data, size_t data_sz);
84 static int tavor_umap_db_compare(const void *query, const void *entry);
85 static ibt_status_t tavor_umap_pd_data_out(tavor_pdhdl_t pd,
86 mlnx_umap_pd_data_out_t *data, size_t data_sz);
87
88
89 /*
90 * These callbacks are passed to devmap_umem_setup() and devmap_devmem_setup(),
91 * respectively. They are used to handle (among other things) partial
92 * unmappings and to provide a method for invalidating mappings inherited
93 * as a result of a fork(2) system call.
94 */
95 static struct devmap_callback_ctl tavor_devmap_umem_cbops = {
96 DEVMAP_OPS_REV,
97 tavor_devmap_umem_map,
98 NULL,
99 tavor_devmap_umem_dup,
100 tavor_devmap_umem_unmap
101 };
102 static struct devmap_callback_ctl tavor_devmap_devmem_cbops = {
103 DEVMAP_OPS_REV,
104 tavor_devmap_devmem_map,
105 NULL,
106 tavor_devmap_devmem_dup,
107 tavor_devmap_devmem_unmap
108 };
109
110 /*
111 * tavor_devmap()
112 * Context: Can be called from user context.
113 */
114 /* ARGSUSED */
115 int
tavor_devmap(dev_t dev,devmap_cookie_t dhp,offset_t off,size_t len,size_t * maplen,uint_t model)116 tavor_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
117 size_t *maplen, uint_t model)
118 {
119 tavor_state_t *state;
120 tavor_rsrc_t *rsrcp;
121 minor_t instance;
122 uint64_t key, value;
123 uint_t type;
124 int err, status;
125
126 TAVOR_TNF_ENTER(tavor_devmap);
127
128 /* Get Tavor softstate structure from instance */
129 instance = TAVOR_DEV_INSTANCE(dev);
130 state = ddi_get_soft_state(tavor_statep, instance);
131 if (state == NULL) {
132 TNF_PROBE_0(tavor_devmap_gss_fail, TAVOR_TNF_ERROR, "");
133 TAVOR_TNF_EXIT(tavor_devmap);
134 return (ENXIO);
135 }
136
137 /*
138 * Access to Tavor devmap interface is not allowed in
139 * "maintenance mode".
140 */
141 if (state->ts_operational_mode == TAVOR_MAINTENANCE_MODE) {
142 TNF_PROBE_0(tavor_devmap_maintenance_mode_fail,
143 TAVOR_TNF_ERROR, "");
144 TAVOR_TNF_EXIT(tavor_devmap);
145 return (EFAULT);
146 }
147
148 /*
149 * The bottom bits of "offset" are undefined (number depends on
150 * system PAGESIZE). Shifting these off leaves us with a "key".
151 * The "key" is actually a combination of both a real key value
152 * (for the purpose of database lookup) and a "type" value. We
153 * extract this information before doing the database lookup.
154 */
155 key = off >> PAGESHIFT;
156 type = key & MLNX_UMAP_RSRC_TYPE_MASK;
157 key = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
158 status = tavor_umap_db_find(instance, key, type, &value, 0, NULL);
159 if (status == DDI_SUCCESS) {
160 rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
161
162 switch (type) {
163 case MLNX_UMAP_UARPG_RSRC:
164 /*
165 * Double check that process who open()'d Tavor is
166 * same process attempting to mmap() UAR page.
167 */
168 if (key != ddi_get_pid()) {
169 TNF_PROBE_0(tavor_devmap_uarpg_invpid_fail,
170 TAVOR_TNF_ERROR, "");
171 TAVOR_TNF_EXIT(tavor_devmap);
172 return (EINVAL);
173 }
174
175 /* Map the UAR page out for userland access */
176 status = tavor_umap_uarpg(state, dhp, rsrcp, maplen,
177 &err);
178 if (status != DDI_SUCCESS) {
179 TNF_PROBE_0(tavor_devmap_uarpg_map_fail,
180 TAVOR_TNF_ERROR, "");
181 TAVOR_TNF_EXIT(tavor_devmap);
182 return (err);
183 }
184 break;
185
186 case MLNX_UMAP_CQMEM_RSRC:
187 /* Map the CQ memory out for userland access */
188 status = tavor_umap_cqmem(state, dhp, rsrcp, off,
189 maplen, &err);
190 if (status != DDI_SUCCESS) {
191 TNF_PROBE_0(tavor_devmap_cqmem_map_fail,
192 TAVOR_TNF_ERROR, "");
193 TAVOR_TNF_EXIT(tavor_devmap);
194 return (err);
195 }
196 break;
197
198 case MLNX_UMAP_QPMEM_RSRC:
199 /* Map the QP memory out for userland access */
200 status = tavor_umap_qpmem(state, dhp, rsrcp, off,
201 maplen, &err);
202 if (status != DDI_SUCCESS) {
203 TNF_PROBE_0(tavor_devmap_qpmem_map_fail,
204 TAVOR_TNF_ERROR, "");
205 TAVOR_TNF_EXIT(tavor_devmap);
206 return (err);
207 }
208 break;
209
210 case MLNX_UMAP_SRQMEM_RSRC:
211 /* Map the SRQ memory out for userland access */
212 status = tavor_umap_srqmem(state, dhp, rsrcp, off,
213 maplen, &err);
214 if (status != DDI_SUCCESS) {
215 TNF_PROBE_0(tavor_devmap_srqmem_map_fail,
216 TAVOR_TNF_ERROR, "");
217 TAVOR_TNF_EXIT(tavor_devmap);
218 return (err);
219 }
220 break;
221
222 default:
223 TAVOR_WARNING(state, "unexpected rsrc type in devmap");
224 TNF_PROBE_0(tavor_devmap_invrsrc_fail,
225 TAVOR_TNF_ERROR, "");
226 TAVOR_TNF_EXIT(tavor_devmap);
227 return (EINVAL);
228 }
229 } else {
230 TNF_PROBE_0(tavor_devmap_umap_lookup_fail, TAVOR_TNF_ERROR, "");
231 TAVOR_TNF_EXIT(tavor_devmap);
232 return (EINVAL);
233 }
234
235 TAVOR_TNF_EXIT(tavor_devmap);
236 return (0);
237 }
238
239
240 /*
241 * tavor_umap_uarpg()
242 * Context: Can be called from user context.
243 */
244 static int
tavor_umap_uarpg(tavor_state_t * state,devmap_cookie_t dhp,tavor_rsrc_t * rsrcp,size_t * maplen,int * err)245 tavor_umap_uarpg(tavor_state_t *state, devmap_cookie_t dhp,
246 tavor_rsrc_t *rsrcp, size_t *maplen, int *err)
247 {
248 int status;
249 uint_t maxprot;
250
251 TAVOR_TNF_ENTER(tavor_umap_uarpg);
252
253 /* Map out the UAR page (doorbell page) */
254 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
255 status = devmap_devmem_setup(dhp, state->ts_dip,
256 &tavor_devmap_devmem_cbops, TAVOR_UAR_BAR, (rsrcp->tr_indx <<
257 PAGESHIFT), PAGESIZE, maxprot, DEVMAP_ALLOW_REMAP,
258 &state->ts_reg_accattr);
259 if (status < 0) {
260 *err = status;
261 TNF_PROBE_0(tavor_umap_uarpg_devmap_fail, TAVOR_TNF_ERROR, "");
262 TAVOR_TNF_EXIT(tavor_umap_uarpg);
263 return (DDI_FAILURE);
264 }
265
266 *maplen = PAGESIZE;
267 TAVOR_TNF_EXIT(tavor_umap_uarpg);
268 return (DDI_SUCCESS);
269 }
270
271
272 /*
273 * tavor_umap_cqmem()
274 * Context: Can be called from user context.
275 */
276 /* ARGSUSED */
277 static int
tavor_umap_cqmem(tavor_state_t * state,devmap_cookie_t dhp,tavor_rsrc_t * rsrcp,offset_t off,size_t * maplen,int * err)278 tavor_umap_cqmem(tavor_state_t *state, devmap_cookie_t dhp,
279 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
280 {
281 tavor_cqhdl_t cq;
282 size_t size;
283 uint_t maxprot;
284 int status;
285
286 TAVOR_TNF_ENTER(tavor_umap_cqmem);
287
288 /* Extract the Tavor CQ handle pointer from the tavor_rsrc_t */
289 cq = (tavor_cqhdl_t)rsrcp->tr_addr;
290
291 /* Round-up the CQ size to system page size */
292 size = ptob(btopr(cq->cq_cqinfo.qa_size));
293
294 /* Map out the CQ memory */
295 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
296 status = devmap_umem_setup(dhp, state->ts_dip,
297 &tavor_devmap_umem_cbops, cq->cq_cqinfo.qa_umemcookie, 0, size,
298 maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
299 if (status < 0) {
300 *err = status;
301 TNF_PROBE_0(tavor_umap_cqmem_devmap_fail, TAVOR_TNF_ERROR, "");
302 TAVOR_TNF_EXIT(tavor_umap_cqmem);
303 return (DDI_FAILURE);
304 }
305 *maplen = size;
306
307 TAVOR_TNF_EXIT(tavor_umap_cqmem);
308 return (DDI_SUCCESS);
309 }
310
311
312 /*
313 * tavor_umap_qpmem()
314 * Context: Can be called from user context.
315 */
316 /* ARGSUSED */
317 static int
tavor_umap_qpmem(tavor_state_t * state,devmap_cookie_t dhp,tavor_rsrc_t * rsrcp,offset_t off,size_t * maplen,int * err)318 tavor_umap_qpmem(tavor_state_t *state, devmap_cookie_t dhp,
319 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
320 {
321 tavor_qphdl_t qp;
322 offset_t offset;
323 size_t size;
324 uint_t maxprot;
325 int status;
326
327 TAVOR_TNF_ENTER(tavor_umap_qpmem);
328
329 /* Extract the Tavor QP handle pointer from the tavor_rsrc_t */
330 qp = (tavor_qphdl_t)rsrcp->tr_addr;
331
332 /*
333 * Calculate the offset of the first work queue (send or recv) into
334 * the memory (ddi_umem_alloc()) allocated previously for the QP.
335 */
336 offset = (offset_t)((uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
337 (uintptr_t)qp->qp_wqinfo.qa_buf_real);
338
339 /* Round-up the QP work queue sizes to system page size */
340 size = ptob(btopr(qp->qp_wqinfo.qa_size));
341
342 /* Map out the QP memory */
343 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
344 status = devmap_umem_setup(dhp, state->ts_dip,
345 &tavor_devmap_umem_cbops, qp->qp_wqinfo.qa_umemcookie, offset,
346 size, maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
347 if (status < 0) {
348 *err = status;
349 TNF_PROBE_0(tavor_umap_qpmem_devmap_fail, TAVOR_TNF_ERROR, "");
350 TAVOR_TNF_EXIT(tavor_umap_qpmem);
351 return (DDI_FAILURE);
352 }
353 *maplen = size;
354
355 TAVOR_TNF_EXIT(tavor_umap_qpmem);
356 return (DDI_SUCCESS);
357 }
358
359
360 /*
361 * tavor_umap_srqmem()
362 * Context: Can be called from user context.
363 */
364 /* ARGSUSED */
365 static int
tavor_umap_srqmem(tavor_state_t * state,devmap_cookie_t dhp,tavor_rsrc_t * rsrcp,offset_t off,size_t * maplen,int * err)366 tavor_umap_srqmem(tavor_state_t *state, devmap_cookie_t dhp,
367 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
368 {
369 tavor_srqhdl_t srq;
370 offset_t offset;
371 size_t size;
372 uint_t maxprot;
373 int status;
374
375 TAVOR_TNF_ENTER(tavor_umap_srqmem);
376
377 /* Extract the Tavor SRQ handle pointer from the tavor_rsrc_t */
378 srq = (tavor_srqhdl_t)rsrcp->tr_addr;
379
380 /*
381 * Calculate the offset of the first shared recv queue into the memory
382 * (ddi_umem_alloc()) allocated previously for the SRQ.
383 */
384 offset = (offset_t)((uintptr_t)srq->srq_wqinfo.qa_buf_aligned -
385 (uintptr_t)srq->srq_wqinfo.qa_buf_real);
386
387 /* Round-up the SRQ work queue sizes to system page size */
388 size = ptob(btopr(srq->srq_wqinfo.qa_size));
389
390 /* Map out the QP memory */
391 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
392 status = devmap_umem_setup(dhp, state->ts_dip,
393 &tavor_devmap_umem_cbops, srq->srq_wqinfo.qa_umemcookie, offset,
394 size, maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
395 if (status < 0) {
396 *err = status;
397 TNF_PROBE_0(tavor_umap_srqmem_devmap_fail, TAVOR_TNF_ERROR, "");
398 TAVOR_TNF_EXIT(tavor_umap_srqmem);
399 return (DDI_FAILURE);
400 }
401 *maplen = size;
402
403 TAVOR_TNF_EXIT(tavor_umap_srqmem);
404 return (DDI_SUCCESS);
405 }
406
407
408 /*
409 * tavor_devmap_umem_map()
410 * Context: Can be called from kernel context.
411 */
412 /* ARGSUSED */
413 static int
tavor_devmap_umem_map(devmap_cookie_t dhp,dev_t dev,uint_t flags,offset_t off,size_t len,void ** pvtp)414 tavor_devmap_umem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
415 offset_t off, size_t len, void **pvtp)
416 {
417 tavor_state_t *state;
418 tavor_devmap_track_t *dvm_track;
419 tavor_cqhdl_t cq;
420 tavor_qphdl_t qp;
421 tavor_srqhdl_t srq;
422 minor_t instance;
423 uint64_t key;
424 uint_t type;
425
426 TAVOR_TNF_ENTER(tavor_devmap_umem_map);
427
428 /* Get Tavor softstate structure from instance */
429 instance = TAVOR_DEV_INSTANCE(dev);
430 state = ddi_get_soft_state(tavor_statep, instance);
431 if (state == NULL) {
432 TNF_PROBE_0(tavor_devmap_umem_map_gss_fail, TAVOR_TNF_ERROR,
433 "");
434 TAVOR_TNF_EXIT(tavor_devmap_umem_map);
435 return (ENXIO);
436 }
437
438 /*
439 * The bottom bits of "offset" are undefined (number depends on
440 * system PAGESIZE). Shifting these off leaves us with a "key".
441 * The "key" is actually a combination of both a real key value
442 * (for the purpose of database lookup) and a "type" value. Although
443 * we are not going to do any database lookup per se, we do want
444 * to extract the "key" and the "type" (to enable faster lookup of
445 * the appropriate CQ or QP handle).
446 */
447 key = off >> PAGESHIFT;
448 type = key & MLNX_UMAP_RSRC_TYPE_MASK;
449 key = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
450
451 /*
452 * Allocate an entry to track the mapping and unmapping (specifically,
453 * partial unmapping) of this resource.
454 */
455 dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
456 sizeof (tavor_devmap_track_t), KM_SLEEP);
457 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
458 dvm_track->tdt_offset = off;
459 dvm_track->tdt_state = state;
460 dvm_track->tdt_refcnt = 1;
461 mutex_init(&dvm_track->tdt_lock, NULL, MUTEX_DRIVER,
462 DDI_INTR_PRI(state->ts_intrmsi_pri));
463
464 /*
465 * Depending of the type of resource that has been mapped out, we
466 * need to update the QP or CQ handle to reflect that it has, in
467 * fact, been mapped. This allows the driver code which frees a QP
468 * or a CQ to know whether it is appropriate to do a
469 * devmap_devmem_remap() to invalidate the userland mapping for the
470 * corresponding queue's memory.
471 */
472 if (type == MLNX_UMAP_CQMEM_RSRC) {
473
474 /* Use "key" (CQ number) to do fast lookup of CQ handle */
475 cq = tavor_cqhdl_from_cqnum(state, key);
476
477 /*
478 * Update the handle to the userland mapping. Note: If
479 * the CQ already has a valid userland mapping, then stop
480 * and return failure.
481 */
482 mutex_enter(&cq->cq_lock);
483 if (cq->cq_umap_dhp == NULL) {
484 cq->cq_umap_dhp = dhp;
485 dvm_track->tdt_size = cq->cq_cqinfo.qa_size;
486 mutex_exit(&cq->cq_lock);
487 } else {
488 mutex_exit(&cq->cq_lock);
489 goto umem_map_fail;
490 }
491
492 } else if (type == MLNX_UMAP_QPMEM_RSRC) {
493
494 /* Use "key" (QP number) to do fast lookup of QP handle */
495 qp = tavor_qphdl_from_qpnum(state, key);
496
497 /*
498 * Update the handle to the userland mapping. Note: If
499 * the CQ already has a valid userland mapping, then stop
500 * and return failure.
501 */
502 mutex_enter(&qp->qp_lock);
503 if (qp->qp_umap_dhp == NULL) {
504 qp->qp_umap_dhp = dhp;
505 dvm_track->tdt_size = qp->qp_wqinfo.qa_size;
506 mutex_exit(&qp->qp_lock);
507 } else {
508 mutex_exit(&qp->qp_lock);
509 goto umem_map_fail;
510 }
511
512 } else if (type == MLNX_UMAP_SRQMEM_RSRC) {
513
514 /* Use "key" (SRQ number) to do fast lookup on SRQ handle */
515 srq = tavor_srqhdl_from_srqnum(state, key);
516
517 /*
518 * Update the handle to the userland mapping. Note: If the
519 * SRQ already has a valid userland mapping, then stop and
520 * return failure.
521 */
522 mutex_enter(&srq->srq_lock);
523 if (srq->srq_umap_dhp == NULL) {
524 srq->srq_umap_dhp = dhp;
525 dvm_track->tdt_size = srq->srq_wqinfo.qa_size;
526 mutex_exit(&srq->srq_lock);
527 } else {
528 mutex_exit(&srq->srq_lock);
529 goto umem_map_fail;
530 }
531 }
532
533 /*
534 * Pass the private "Tavor devmap tracking structure" back. This
535 * pointer will be returned in subsequent "unmap" callbacks.
536 */
537 *pvtp = dvm_track;
538
539 TAVOR_TNF_EXIT(tavor_devmap_umem_map);
540 return (DDI_SUCCESS);
541
542 umem_map_fail:
543 mutex_destroy(&dvm_track->tdt_lock);
544 kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
545 TAVOR_TNF_EXIT(tavor_devmap_umem_map);
546 return (DDI_FAILURE);
547 }
548
549
550 /*
551 * tavor_devmap_umem_dup()
552 * Context: Can be called from kernel context.
553 */
554 /* ARGSUSED */
555 static int
tavor_devmap_umem_dup(devmap_cookie_t dhp,void * pvtp,devmap_cookie_t new_dhp,void ** new_pvtp)556 tavor_devmap_umem_dup(devmap_cookie_t dhp, void *pvtp, devmap_cookie_t new_dhp,
557 void **new_pvtp)
558 {
559 tavor_state_t *state;
560 tavor_devmap_track_t *dvm_track, *new_dvm_track;
561 uint_t maxprot;
562 int status;
563
564 TAVOR_TNF_ENTER(tavor_devmap_umem_dup);
565
566 /*
567 * Extract the Tavor softstate pointer from "Tavor devmap tracking
568 * structure" (in "pvtp").
569 */
570 dvm_track = (tavor_devmap_track_t *)pvtp;
571 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
572 state = dvm_track->tdt_state;
573
574 /*
575 * Since this devmap_dup() entry point is generally called
576 * when a process does fork(2), it is incumbent upon the driver
577 * to insure that the child does not inherit a valid copy of
578 * the parent's QP or CQ resource. This is accomplished by using
579 * devmap_devmem_remap() to invalidate the child's mapping to the
580 * kernel memory.
581 */
582 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
583 status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0,
584 dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL);
585 if (status != DDI_SUCCESS) {
586 TAVOR_WARNING(state, "failed in tavor_devmap_umem_dup()");
587 TAVOR_TNF_EXIT(tavor_devmap_umem_dup);
588 return (status);
589 }
590
591 /*
592 * Allocate a new entry to track the subsequent unmapping
593 * (specifically, all partial unmappings) of the child's newly
594 * invalidated resource. Note: Setting the "tdt_size" field to
595 * zero here is an indication to the devmap_unmap() entry point
596 * that this mapping is invalid, and that its subsequent unmapping
597 * should not affect any of the parent's CQ or QP resources.
598 */
599 new_dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
600 sizeof (tavor_devmap_track_t), KM_SLEEP);
601 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*new_dvm_track))
602 new_dvm_track->tdt_offset = 0;
603 new_dvm_track->tdt_state = state;
604 new_dvm_track->tdt_refcnt = 1;
605 new_dvm_track->tdt_size = 0;
606 mutex_init(&new_dvm_track->tdt_lock, NULL, MUTEX_DRIVER,
607 DDI_INTR_PRI(state->ts_intrmsi_pri));
608 *new_pvtp = new_dvm_track;
609
610 TAVOR_TNF_EXIT(tavor_devmap_umem_dup);
611 return (DDI_SUCCESS);
612 }
613
614
615 /*
616 * tavor_devmap_umem_unmap()
617 * Context: Can be called from kernel context.
618 */
619 /* ARGSUSED */
620 static void
tavor_devmap_umem_unmap(devmap_cookie_t dhp,void * pvtp,offset_t off,size_t len,devmap_cookie_t new_dhp1,void ** pvtp1,devmap_cookie_t new_dhp2,void ** pvtp2)621 tavor_devmap_umem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
622 size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
623 devmap_cookie_t new_dhp2, void **pvtp2)
624 {
625 tavor_state_t *state;
626 tavor_rsrc_t *rsrcp;
627 tavor_devmap_track_t *dvm_track;
628 tavor_cqhdl_t cq;
629 tavor_qphdl_t qp;
630 tavor_srqhdl_t srq;
631 uint64_t key, value;
632 uint_t type;
633 uint_t size;
634 int status;
635
636 TAVOR_TNF_ENTER(tavor_devmap_umem_unmap);
637
638 /*
639 * Extract the Tavor softstate pointer from "Tavor devmap tracking
640 * structure" (in "pvtp").
641 */
642 dvm_track = (tavor_devmap_track_t *)pvtp;
643 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
644 state = dvm_track->tdt_state;
645
646 /*
647 * Extract the "offset" from the "Tavor devmap tracking structure".
648 * Note: The input argument "off" is ignored here because the
649 * Tavor mapping interfaces define a very specific meaning to
650 * each "logical offset". Also extract the "key" and "type" encoded
651 * in the logical offset.
652 */
653 key = dvm_track->tdt_offset >> PAGESHIFT;
654 type = key & MLNX_UMAP_RSRC_TYPE_MASK;
655 key = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
656
657 /*
658 * Extract the "size" of the mapping. If this size is determined
659 * to be zero, then it is an indication of a previously invalidated
660 * mapping, and no CQ or QP resources should be affected.
661 */
662 size = dvm_track->tdt_size;
663
664 /*
665 * If only the "middle portion of a given mapping is being unmapped,
666 * then we are effectively creating one new piece of mapped memory.
667 * (Original region is divided into three pieces of which the middle
668 * piece is being removed. This leaves two pieces. Since we started
669 * with one piece and now have two pieces, we need to increment the
670 * counter in the "Tavor devmap tracking structure".
671 *
672 * If, however, the whole mapped region is being unmapped, then we
673 * have started with one region which we are completely removing.
674 * In this case, we need to decrement the counter in the "Tavor
675 * devmap tracking structure".
676 *
677 * In each of the remaining cases, we will have started with one
678 * mapped region and ended with one (different) region. So no counter
679 * modification is necessary.
680 */
681 mutex_enter(&dvm_track->tdt_lock);
682 if ((new_dhp1 == NULL) && (new_dhp2 == NULL)) {
683 dvm_track->tdt_refcnt--;
684 } else if ((new_dhp1 != NULL) && (new_dhp2 != NULL)) {
685 dvm_track->tdt_refcnt++;
686 }
687 mutex_exit(&dvm_track->tdt_lock);
688
689 /*
690 * For each of the cases where the region is being divided, then we
691 * need to pass back the "Tavor devmap tracking structure". This way
692 * we get it back when each of the remaining pieces is subsequently
693 * unmapped.
694 */
695 if (new_dhp1 != NULL) {
696 *pvtp1 = pvtp;
697 }
698 if (new_dhp2 != NULL) {
699 *pvtp2 = pvtp;
700 }
701
702 /*
703 * If the "Tavor devmap tracking structure" is no longer being
704 * referenced, then free it up. Otherwise, return.
705 */
706 if (dvm_track->tdt_refcnt == 0) {
707 mutex_destroy(&dvm_track->tdt_lock);
708 kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
709
710 /*
711 * If the mapping was invalid (see explanation above), then
712 * no further processing is necessary.
713 */
714 if (size == 0) {
715 TAVOR_TNF_EXIT(tavor_devmap_umem_unmap);
716 return;
717 }
718 } else {
719 TAVOR_TNF_EXIT(tavor_devmap_umem_unmap);
720 return;
721 }
722
723 /*
724 * Now that we can guarantee that the user memory is fully unmapped,
725 * we can use the "key" and "type" values to try to find the entry
726 * in the "userland resources database". If it's found, then it
727 * indicates that the queue memory (CQ or QP) has not yet been freed.
728 * In this case, we update the corresponding CQ or QP handle to
729 * indicate that the "devmap_devmem_remap()" call will be unnecessary.
730 * If it's _not_ found, then it indicates that the CQ or QP memory
731 * was, in fact, freed before it was unmapped (thus requiring a
732 * previous invalidation by remapping - which will already have
733 * been done in the free routine).
734 */
735 status = tavor_umap_db_find(state->ts_instance, key, type, &value,
736 0, NULL);
737 if (status == DDI_SUCCESS) {
738 /*
739 * Depending on the type of the mapped resource (CQ or QP),
740 * update handle to indicate that no invalidation remapping
741 * will be necessary.
742 */
743 if (type == MLNX_UMAP_CQMEM_RSRC) {
744
745 /* Use "value" to convert to CQ handle */
746 rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
747 cq = (tavor_cqhdl_t)rsrcp->tr_addr;
748
749 /*
750 * Invalidate the handle to the userland mapping.
751 * Note: We must ensure that the mapping being
752 * unmapped here is the current one for the CQ. It
753 * is possible that it might not be if this CQ has
754 * been resized and the previous CQ memory has not
755 * yet been unmapped. But in that case, because of
756 * the devmap_devmem_remap(), there is no longer any
757 * association between the mapping and the real CQ
758 * kernel memory.
759 */
760 mutex_enter(&cq->cq_lock);
761 if (cq->cq_umap_dhp == dhp) {
762 cq->cq_umap_dhp = (devmap_cookie_t)NULL;
763 }
764 mutex_exit(&cq->cq_lock);
765
766 } else if (type == MLNX_UMAP_QPMEM_RSRC) {
767
768 /* Use "value" to convert to QP handle */
769 rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
770 qp = (tavor_qphdl_t)rsrcp->tr_addr;
771
772 /*
773 * Invalidate the handle to the userland mapping.
774 * Note: we ensure that the mapping being unmapped
775 * here is the current one for the QP. This is
776 * more of a sanity check here since, unlike CQs
777 * (above) we do not support resize of QPs.
778 */
779 mutex_enter(&qp->qp_lock);
780 if (qp->qp_umap_dhp == dhp) {
781 qp->qp_umap_dhp = (devmap_cookie_t)NULL;
782 }
783 mutex_exit(&qp->qp_lock);
784
785 } else if (type == MLNX_UMAP_SRQMEM_RSRC) {
786
787 /* Use "value" to convert to SRQ handle */
788 rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
789 srq = (tavor_srqhdl_t)rsrcp->tr_addr;
790
791 /*
792 * Invalidate the handle to the userland mapping.
793 * Note: we ensure that the mapping being unmapped
794 * here is the current one for the QP. This is
795 * more of a sanity check here since, unlike CQs
796 * (above) we do not support resize of QPs.
797 */
798 mutex_enter(&srq->srq_lock);
799 if (srq->srq_umap_dhp == dhp) {
800 srq->srq_umap_dhp = (devmap_cookie_t)NULL;
801 }
802 mutex_exit(&srq->srq_lock);
803 }
804 }
805
806 TAVOR_TNF_EXIT(tavor_devmap_umem_unmap);
807 }
808
809
810 /*
811 * tavor_devmap_devmem_map()
812 * Context: Can be called from kernel context.
813 */
814 /* ARGSUSED */
815 static int
tavor_devmap_devmem_map(devmap_cookie_t dhp,dev_t dev,uint_t flags,offset_t off,size_t len,void ** pvtp)816 tavor_devmap_devmem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
817 offset_t off, size_t len, void **pvtp)
818 {
819 tavor_state_t *state;
820 tavor_devmap_track_t *dvm_track;
821 minor_t instance;
822
823 TAVOR_TNF_ENTER(tavor_devmap_devmem_map);
824
825 /* Get Tavor softstate structure from instance */
826 instance = TAVOR_DEV_INSTANCE(dev);
827 state = ddi_get_soft_state(tavor_statep, instance);
828 if (state == NULL) {
829 TNF_PROBE_0(tavor_devmap_devmem_map_gss_fail, TAVOR_TNF_ERROR,
830 "");
831 TAVOR_TNF_EXIT(tavor_devmap_devmem_map);
832 return (ENXIO);
833 }
834
835 /*
836 * Allocate an entry to track the mapping and unmapping of this
837 * resource. Note: We don't need to initialize the "refcnt" or
838 * "offset" fields here, nor do we need to initialize the mutex
839 * used with the "refcnt". Since UAR pages are single pages, they
840 * are not subject to "partial" unmappings. This makes these other
841 * fields unnecessary.
842 */
843 dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
844 sizeof (tavor_devmap_track_t), KM_SLEEP);
845 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
846 dvm_track->tdt_state = state;
847 dvm_track->tdt_size = PAGESIZE;
848
849 /*
850 * Pass the private "Tavor devmap tracking structure" back. This
851 * pointer will be returned in a subsequent "unmap" callback.
852 */
853 *pvtp = dvm_track;
854
855 TAVOR_TNF_EXIT(tavor_devmap_devmem_map);
856 return (DDI_SUCCESS);
857 }
858
859
860 /*
861 * tavor_devmap_devmem_dup()
862 * Context: Can be called from kernel context.
863 */
864 /* ARGSUSED */
865 static int
tavor_devmap_devmem_dup(devmap_cookie_t dhp,void * pvtp,devmap_cookie_t new_dhp,void ** new_pvtp)866 tavor_devmap_devmem_dup(devmap_cookie_t dhp, void *pvtp,
867 devmap_cookie_t new_dhp, void **new_pvtp)
868 {
869 tavor_state_t *state;
870 tavor_devmap_track_t *dvm_track;
871 uint_t maxprot;
872 int status;
873
874 TAVOR_TNF_ENTER(tavor_devmap_devmem_dup);
875
876 /*
877 * Extract the Tavor softstate pointer from "Tavor devmap tracking
878 * structure" (in "pvtp"). Note: If the tracking structure is NULL
879 * here, it means that the mapping corresponds to an invalid mapping.
880 * In this case, it can be safely ignored ("new_pvtp" set to NULL).
881 */
882 dvm_track = (tavor_devmap_track_t *)pvtp;
883 if (dvm_track == NULL) {
884 *new_pvtp = NULL;
885 TAVOR_TNF_EXIT(tavor_devmap_devmem_dup);
886 return (DDI_SUCCESS);
887 }
888
889 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
890 state = dvm_track->tdt_state;
891
892 /*
893 * Since this devmap_dup() entry point is generally called
894 * when a process does fork(2), it is incumbent upon the driver
895 * to insure that the child does not inherit a valid copy of
896 * the parent's resource. This is accomplished by using
897 * devmap_devmem_remap() to invalidate the child's mapping to the
898 * kernel memory.
899 */
900 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
901 status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0,
902 dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL);
903 if (status != DDI_SUCCESS) {
904 TAVOR_WARNING(state, "failed in tavor_devmap_devmem_dup()");
905 TAVOR_TNF_EXIT(tavor_devmap_devmem_dup);
906 return (status);
907 }
908
909 /*
910 * Since the region is invalid, there is no need for us to
911 * allocate and continue to track an additional "Tavor devmap
912 * tracking structure". Instead we return NULL here, which is an
913 * indication to the devmap_unmap() entry point that this entry
914 * can be safely ignored.
915 */
916 *new_pvtp = NULL;
917
918 TAVOR_TNF_EXIT(tavor_devmap_devmem_dup);
919 return (DDI_SUCCESS);
920 }
921
922
923 /*
924 * tavor_devmap_devmem_unmap()
925 * Context: Can be called from kernel context.
926 */
927 /* ARGSUSED */
928 static void
tavor_devmap_devmem_unmap(devmap_cookie_t dhp,void * pvtp,offset_t off,size_t len,devmap_cookie_t new_dhp1,void ** pvtp1,devmap_cookie_t new_dhp2,void ** pvtp2)929 tavor_devmap_devmem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
930 size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
931 devmap_cookie_t new_dhp2, void **pvtp2)
932 {
933 tavor_devmap_track_t *dvm_track;
934
935 TAVOR_TNF_ENTER(tavor_devmap_devmem_unmap);
936
937 /*
938 * Free up the "Tavor devmap tracking structure" (in "pvtp").
939 * There cannot be "partial" unmappings here because all UAR pages
940 * are single pages. Note: If the tracking structure is NULL here,
941 * it means that the mapping corresponds to an invalid mapping. In
942 * this case, it can be safely ignored.
943 */
944 dvm_track = (tavor_devmap_track_t *)pvtp;
945 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
946 if (dvm_track == NULL) {
947 TAVOR_TNF_EXIT(tavor_devmap_devmem_unmap);
948 return;
949 }
950
951 kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
952 TAVOR_TNF_EXIT(tavor_devmap_devmem_unmap);
953 }
954
955
956 /*
957 * tavor_umap_ci_data_in()
958 * Context: Can be called from user or kernel context.
959 */
960 /* ARGSUSED */
961 ibt_status_t
tavor_umap_ci_data_in(tavor_state_t * state,ibt_ci_data_flags_t flags,ibt_object_type_t object,void * hdl,void * data_p,size_t data_sz)962 tavor_umap_ci_data_in(tavor_state_t *state, ibt_ci_data_flags_t flags,
963 ibt_object_type_t object, void *hdl, void *data_p, size_t data_sz)
964 {
965 int status;
966
967 TAVOR_TNF_ENTER(tavor_umap_ci_data_in);
968
969 /*
970 * Depending on the type of object about which additional information
971 * is being provided (currently only MR is supported), we call the
972 * appropriate resource-specific function.
973 */
974 switch (object) {
975 case IBT_HDL_MR:
976 status = tavor_umap_mr_data_in((tavor_mrhdl_t)hdl,
977 (ibt_mr_data_in_t *)data_p, data_sz);
978 if (status != DDI_SUCCESS) {
979 TNF_PROBE_0(tavor_umap_mr_data_in_fail,
980 TAVOR_TNF_ERROR, "");
981 TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
982 return (status);
983 }
984 break;
985
986 /*
987 * For other possible valid IBT types, we return IBT_NOT_SUPPORTED,
988 * since the Tavor driver does not support these.
989 */
990 case IBT_HDL_HCA:
991 case IBT_HDL_QP:
992 case IBT_HDL_CQ:
993 case IBT_HDL_PD:
994 case IBT_HDL_MW:
995 case IBT_HDL_AH:
996 case IBT_HDL_SCHED:
997 case IBT_HDL_EEC:
998 case IBT_HDL_RDD:
999 case IBT_HDL_SRQ:
1000 TNF_PROBE_0(tavor_umap_ci_data_in_unsupp_type,
1001 TAVOR_TNF_ERROR, "");
1002 TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
1003 return (IBT_NOT_SUPPORTED);
1004
1005 /*
1006 * Any other types are invalid.
1007 */
1008 default:
1009 TNF_PROBE_0(tavor_umap_ci_data_in_invtype_fail,
1010 TAVOR_TNF_ERROR, "");
1011 TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
1012 return (IBT_INVALID_PARAM);
1013 }
1014
1015 TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
1016 return (DDI_SUCCESS);
1017 }
1018
1019
1020 /*
1021 * tavor_umap_mr_data_in()
1022 * Context: Can be called from user or kernel context.
1023 */
1024 static ibt_status_t
tavor_umap_mr_data_in(tavor_mrhdl_t mr,ibt_mr_data_in_t * data,size_t data_sz)1025 tavor_umap_mr_data_in(tavor_mrhdl_t mr, ibt_mr_data_in_t *data,
1026 size_t data_sz)
1027 {
1028 TAVOR_TNF_ENTER(tavor_umap_mr_data_in);
1029
1030 if (data->mr_rev != IBT_MR_DATA_IN_IF_VERSION) {
1031 TNF_PROBE_0(tavor_umap_mr_data_in_ver_fail,
1032 TAVOR_TNF_ERROR, "");
1033 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1034 return (IBT_NOT_SUPPORTED);
1035 }
1036
1037 /* Check for valid MR handle pointer */
1038 if (mr == NULL) {
1039 TNF_PROBE_0(tavor_umap_mr_data_in_invmrhdl_fail,
1040 TAVOR_TNF_ERROR, "");
1041 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1042 return (IBT_MR_HDL_INVALID);
1043 }
1044
1045 /* Check for valid MR input structure size */
1046 if (data_sz < sizeof (ibt_mr_data_in_t)) {
1047 TNF_PROBE_0(tavor_umap_mr_data_in_invdatasz_fail,
1048 TAVOR_TNF_ERROR, "");
1049 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1050 return (IBT_INSUFF_RESOURCE);
1051 }
1052 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1053
1054 /*
1055 * Ensure that the MR corresponds to userland memory and that it is
1056 * a currently valid memory region as well.
1057 */
1058 mutex_enter(&mr->mr_lock);
1059 if ((mr->mr_is_umem == 0) || (mr->mr_umemcookie == NULL)) {
1060 mutex_exit(&mr->mr_lock);
1061 TNF_PROBE_0(tavor_umap_mr_data_in_invumem_fail,
1062 TAVOR_TNF_ERROR, "");
1063 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1064 return (IBT_MR_HDL_INVALID);
1065 }
1066
1067 /*
1068 * If it has passed all the above checks, then extract the callback
1069 * function and argument from the input structure. Copy them into
1070 * the MR handle. This function will be called only if the memory
1071 * corresponding to the MR handle gets a umem_lockmemory() callback.
1072 */
1073 mr->mr_umem_cbfunc = data->mr_func;
1074 mr->mr_umem_cbarg1 = data->mr_arg1;
1075 mr->mr_umem_cbarg2 = data->mr_arg2;
1076 mutex_exit(&mr->mr_lock);
1077
1078 TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1079 return (DDI_SUCCESS);
1080 }
1081
1082
1083 /*
1084 * tavor_umap_ci_data_out()
1085 * Context: Can be called from user or kernel context.
1086 */
1087 /* ARGSUSED */
1088 ibt_status_t
tavor_umap_ci_data_out(tavor_state_t * state,ibt_ci_data_flags_t flags,ibt_object_type_t object,void * hdl,void * data_p,size_t data_sz)1089 tavor_umap_ci_data_out(tavor_state_t *state, ibt_ci_data_flags_t flags,
1090 ibt_object_type_t object, void *hdl, void *data_p, size_t data_sz)
1091 {
1092 int status;
1093
1094 TAVOR_TNF_ENTER(tavor_umap_ci_data_out);
1095
1096 /*
1097 * Depending on the type of object about which additional information
1098 * is being requested (CQ or QP), we call the appropriate resource-
1099 * specific mapping function.
1100 */
1101 switch (object) {
1102 case IBT_HDL_CQ:
1103 status = tavor_umap_cq_data_out((tavor_cqhdl_t)hdl,
1104 (mlnx_umap_cq_data_out_t *)data_p, data_sz);
1105 if (status != DDI_SUCCESS) {
1106 TNF_PROBE_0(tavor_umap_cq_data_out_fail,
1107 TAVOR_TNF_ERROR, "");
1108 TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1109 return (status);
1110 }
1111 break;
1112
1113 case IBT_HDL_QP:
1114 status = tavor_umap_qp_data_out((tavor_qphdl_t)hdl,
1115 (mlnx_umap_qp_data_out_t *)data_p, data_sz);
1116 if (status != DDI_SUCCESS) {
1117 TNF_PROBE_0(tavor_umap_qp_data_out_fail,
1118 TAVOR_TNF_ERROR, "");
1119 TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1120 return (status);
1121 }
1122 break;
1123
1124 case IBT_HDL_SRQ:
1125 status = tavor_umap_srq_data_out((tavor_srqhdl_t)hdl,
1126 (mlnx_umap_srq_data_out_t *)data_p, data_sz);
1127 if (status != DDI_SUCCESS) {
1128 TNF_PROBE_0(tavor_umap_srq_data_out_fail,
1129 TAVOR_TNF_ERROR, "");
1130 TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1131 return (status);
1132 }
1133 break;
1134
1135 /*
1136 * For other possible valid IBT types, we return IBT_NOT_SUPPORTED,
1137 * since the Tavor driver does not support these.
1138 */
1139 case IBT_HDL_PD:
1140 status = tavor_umap_pd_data_out((tavor_pdhdl_t)hdl,
1141 (mlnx_umap_pd_data_out_t *)data_p, data_sz);
1142 if (status != DDI_SUCCESS) {
1143 TNF_PROBE_0(tavor_umap_pd_data_out_fail,
1144 TAVOR_TNF_ERROR, "");
1145 TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1146 return (status);
1147 }
1148 break;
1149
1150 case IBT_HDL_HCA:
1151 case IBT_HDL_MR:
1152 case IBT_HDL_MW:
1153 case IBT_HDL_AH:
1154 case IBT_HDL_SCHED:
1155 case IBT_HDL_EEC:
1156 case IBT_HDL_RDD:
1157 TNF_PROBE_0(tavor_umap_ci_data_out_unsupp_type,
1158 TAVOR_TNF_ERROR, "");
1159 TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1160 return (IBT_NOT_SUPPORTED);
1161
1162 /*
1163 * Any other types are invalid.
1164 */
1165 default:
1166 TNF_PROBE_0(tavor_umap_ci_data_out_invtype_fail,
1167 TAVOR_TNF_ERROR, "");
1168 TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1169 return (IBT_INVALID_PARAM);
1170 }
1171
1172 TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1173 return (DDI_SUCCESS);
1174 }
1175
1176
1177 /*
1178 * tavor_umap_cq_data_out()
1179 * Context: Can be called from user or kernel context.
1180 */
1181 static ibt_status_t
tavor_umap_cq_data_out(tavor_cqhdl_t cq,mlnx_umap_cq_data_out_t * data,size_t data_sz)1182 tavor_umap_cq_data_out(tavor_cqhdl_t cq, mlnx_umap_cq_data_out_t *data,
1183 size_t data_sz)
1184 {
1185 TAVOR_TNF_ENTER(tavor_umap_cq_data_out);
1186
1187 /* Check for valid CQ handle pointer */
1188 if (cq == NULL) {
1189 TNF_PROBE_0(tavor_umap_cq_data_out_invcqhdl_fail,
1190 TAVOR_TNF_ERROR, "");
1191 TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1192 return (IBT_CQ_HDL_INVALID);
1193 }
1194
1195 /* Check for valid CQ mapping structure size */
1196 if (data_sz < sizeof (mlnx_umap_cq_data_out_t)) {
1197 TNF_PROBE_0(tavor_umap_cq_data_out_invdatasz_fail,
1198 TAVOR_TNF_ERROR, "");
1199 TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1200 return (IBT_INSUFF_RESOURCE);
1201 }
1202 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1203
1204 /*
1205 * If it has passed all the above checks, then fill in all the useful
1206 * mapping information (including the mapping offset that will be
1207 * passed back to the devmap() interface during a subsequent mmap()
1208 * call.
1209 *
1210 * The "offset" for CQ mmap()'s looks like this:
1211 * +----------------------------------------+--------+--------------+
1212 * | CQ Number | 0x33 | Reserved (0) |
1213 * +----------------------------------------+--------+--------------+
1214 * (64 - 8 - PAGESHIFT) bits 8 bits PAGESHIFT bits
1215 *
1216 * This returns information about the mapping offset, the length of
1217 * the CQ memory, the CQ number (for use in later CQ doorbells), the
1218 * number of CQEs the CQ memory can hold, and the size of each CQE.
1219 */
1220 data->mcq_rev = MLNX_UMAP_IF_VERSION;
1221 data->mcq_mapoffset = ((((uint64_t)cq->cq_cqnum <<
1222 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_CQMEM_RSRC) << PAGESHIFT);
1223 data->mcq_maplen = cq->cq_cqinfo.qa_size;
1224 data->mcq_cqnum = cq->cq_cqnum;
1225 data->mcq_numcqe = cq->cq_bufsz;
1226 data->mcq_cqesz = sizeof (tavor_hw_cqe_t);
1227
1228 TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1229 return (DDI_SUCCESS);
1230 }
1231
1232
1233 /*
1234 * tavor_umap_qp_data_out()
1235 * Context: Can be called from user or kernel context.
1236 */
1237 static ibt_status_t
tavor_umap_qp_data_out(tavor_qphdl_t qp,mlnx_umap_qp_data_out_t * data,size_t data_sz)1238 tavor_umap_qp_data_out(tavor_qphdl_t qp, mlnx_umap_qp_data_out_t *data,
1239 size_t data_sz)
1240 {
1241 TAVOR_TNF_ENTER(tavor_umap_qp_data_out);
1242
1243 /* Check for valid QP handle pointer */
1244 if (qp == NULL) {
1245 TNF_PROBE_0(tavor_umap_qp_data_out_invqphdl_fail,
1246 TAVOR_TNF_ERROR, "");
1247 TAVOR_TNF_EXIT(tavor_umap_qp_data_out);
1248 return (IBT_QP_HDL_INVALID);
1249 }
1250
1251 /* Check for valid QP mapping structure size */
1252 if (data_sz < sizeof (mlnx_umap_qp_data_out_t)) {
1253 TNF_PROBE_0(tavor_umap_qp_data_out_invdatasz_fail,
1254 TAVOR_TNF_ERROR, "");
1255 TAVOR_TNF_EXIT(tavor_umap_qp_data_out);
1256 return (IBT_INSUFF_RESOURCE);
1257 }
1258 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1259
1260 /*
1261 * If it has passed all the checks, then fill in all the useful
1262 * mapping information (including the mapping offset that will be
1263 * passed back to the devmap() interface during a subsequent mmap()
1264 * call.
1265 *
1266 * The "offset" for QP mmap()'s looks like this:
1267 * +----------------------------------------+--------+--------------+
1268 * | QP Number | 0x44 | Reserved (0) |
1269 * +----------------------------------------+--------+--------------+
1270 * (64 - 8 - PAGESHIFT) bits 8 bits PAGESHIFT bits
1271 *
1272 * This returns information about the mapping offset, the length of
1273 * the QP memory, and the QP number (for use in later send and recv
1274 * doorbells). It also returns the following information for both
1275 * the receive work queue and the send work queue, respectively: the
1276 * offset (from the base mapped address) of the start of the given
1277 * work queue, the 64-bit IB virtual address that corresponds to
1278 * the base mapped address (needed for posting WQEs though the
1279 * QP doorbells), the number of WQEs the given work queue can hold,
1280 * and the size of each WQE for the given work queue.
1281 */
1282 data->mqp_rev = MLNX_UMAP_IF_VERSION;
1283 data->mqp_mapoffset = ((((uint64_t)qp->qp_qpnum <<
1284 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_QPMEM_RSRC) << PAGESHIFT);
1285 data->mqp_maplen = qp->qp_wqinfo.qa_size;
1286 data->mqp_qpnum = qp->qp_qpnum;
1287
1288 /*
1289 * If this QP is associated with a shared receive queue (SRQ),
1290 * then return invalid RecvQ parameters. Otherwise, return
1291 * the proper parameter values.
1292 */
1293 if (qp->qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
1294 data->mqp_rq_off = (uint32_t)qp->qp_wqinfo.qa_size;
1295 data->mqp_rq_desc_addr = (uint32_t)qp->qp_wqinfo.qa_size;
1296 data->mqp_rq_numwqe = 0;
1297 data->mqp_rq_wqesz = 0;
1298 } else {
1299 data->mqp_rq_off = (uintptr_t)qp->qp_rq_buf -
1300 (uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1301 data->mqp_rq_desc_addr = (uint32_t)((uintptr_t)qp->qp_rq_buf -
1302 qp->qp_desc_off);
1303 data->mqp_rq_numwqe = qp->qp_rq_bufsz;
1304 data->mqp_rq_wqesz = (1 << qp->qp_rq_log_wqesz);
1305 }
1306 data->mqp_sq_off = (uintptr_t)qp->qp_sq_buf -
1307 (uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1308 data->mqp_sq_desc_addr = (uint32_t)((uintptr_t)qp->qp_sq_buf -
1309 qp->qp_desc_off);
1310 data->mqp_sq_numwqe = qp->qp_sq_bufsz;
1311 data->mqp_sq_wqesz = (1 << qp->qp_sq_log_wqesz);
1312
1313 TAVOR_TNF_EXIT(tavor_umap_qp_data_out);
1314 return (DDI_SUCCESS);
1315 }
1316
1317
1318 /*
1319 * tavor_umap_srq_data_out()
1320 * Context: Can be called from user or kernel context.
1321 */
1322 static ibt_status_t
tavor_umap_srq_data_out(tavor_srqhdl_t srq,mlnx_umap_srq_data_out_t * data,size_t data_sz)1323 tavor_umap_srq_data_out(tavor_srqhdl_t srq, mlnx_umap_srq_data_out_t *data,
1324 size_t data_sz)
1325 {
1326 TAVOR_TNF_ENTER(tavor_umap_srq_data_out);
1327
1328 /* Check for valid SRQ handle pointer */
1329 if (srq == NULL) {
1330 TNF_PROBE_0(tavor_umap_srq_data_out_invsrqhdl_fail,
1331 TAVOR_TNF_ERROR, "");
1332 TAVOR_TNF_EXIT(tavor_umap_srq_data_out);
1333 return (IBT_SRQ_HDL_INVALID);
1334 }
1335
1336 /* Check for valid SRQ mapping structure size */
1337 if (data_sz < sizeof (mlnx_umap_srq_data_out_t)) {
1338 TNF_PROBE_0(tavor_umap_srq_data_out_invdatasz_fail,
1339 TAVOR_TNF_ERROR, "");
1340 TAVOR_TNF_EXIT(tavor_umap_srq_data_out);
1341 return (IBT_INSUFF_RESOURCE);
1342 }
1343 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1344
1345 /*
1346 * If it has passed all the checks, then fill in all the useful
1347 * mapping information (including the mapping offset that will be
1348 * passed back to the devmap() interface during a subsequent mmap()
1349 * call.
1350 *
1351 * The "offset" for SRQ mmap()'s looks like this:
1352 * +----------------------------------------+--------+--------------+
1353 * | SRQ Number | 0x66 | Reserved (0) |
1354 * +----------------------------------------+--------+--------------+
1355 * (64 - 8 - PAGESHIFT) bits 8 bits PAGESHIFT bits
1356 *
1357 * This returns information about the mapping offset, the length of the
1358 * SRQ memory, and the SRQ number (for use in later send and recv
1359 * doorbells). It also returns the following information for the
1360 * shared receive queue: the offset (from the base mapped address) of
1361 * the start of the given work queue, the 64-bit IB virtual address
1362 * that corresponds to the base mapped address (needed for posting WQEs
1363 * though the QP doorbells), the number of WQEs the given work queue
1364 * can hold, and the size of each WQE for the given work queue.
1365 */
1366 data->msrq_rev = MLNX_UMAP_IF_VERSION;
1367 data->msrq_mapoffset = ((((uint64_t)srq->srq_srqnum <<
1368 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_SRQMEM_RSRC) << PAGESHIFT);
1369 data->msrq_maplen = srq->srq_wqinfo.qa_size;
1370 data->msrq_srqnum = srq->srq_srqnum;
1371
1372 data->msrq_desc_addr = (uint32_t)((uintptr_t)srq->srq_wq_buf -
1373 srq->srq_desc_off);
1374 data->msrq_numwqe = srq->srq_wq_bufsz;
1375 data->msrq_wqesz = (1 << srq->srq_wq_log_wqesz);
1376
1377 TAVOR_TNF_EXIT(tavor_umap_srq_data_out);
1378 return (DDI_SUCCESS);
1379 }
1380
1381 /*
1382 * tavor_umap_pd_data_out()
1383 * Context: Can be called from user or kernel context.
1384 */
1385 static ibt_status_t
tavor_umap_pd_data_out(tavor_pdhdl_t pd,mlnx_umap_pd_data_out_t * data,size_t data_sz)1386 tavor_umap_pd_data_out(tavor_pdhdl_t pd, mlnx_umap_pd_data_out_t *data,
1387 size_t data_sz)
1388 {
1389 TAVOR_TNF_ENTER(tavor_umap_pd_data_out);
1390
1391 /* Check for valid PD handle pointer */
1392 if (pd == NULL) {
1393 TNF_PROBE_0(tavor_umap_pd_data_out_invpdhdl_fail,
1394 TAVOR_TNF_ERROR, "");
1395 TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1396 return (IBT_PD_HDL_INVALID);
1397 }
1398
1399 /* Check for valid PD mapping structure size */
1400 if (data_sz < sizeof (mlnx_umap_pd_data_out_t)) {
1401 TNF_PROBE_0(tavor_umap_pd_data_out_invdatasz_fail,
1402 TAVOR_TNF_ERROR, "");
1403 TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1404 return (IBT_INSUFF_RESOURCE);
1405 }
1406 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1407
1408 /*
1409 * If it has passed all the checks, then fill the PD table index
1410 * (the PD table allocated index for the PD pd_pdnum)
1411 */
1412 data->mpd_rev = MLNX_UMAP_IF_VERSION;
1413 data->mpd_pdnum = pd->pd_pdnum;
1414
1415 TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1416 return (DDI_SUCCESS);
1417 }
1418
1419 /*
1420 * tavor_umap_db_init()
1421 * Context: Only called from attach() path context
1422 */
1423 void
tavor_umap_db_init(void)1424 tavor_umap_db_init(void)
1425 {
1426 TAVOR_TNF_ENTER(tavor_umap_db_init);
1427
1428 /*
1429 * Initialize the lock used by the Tavor "userland resources database"
1430 * This is used to ensure atomic access to add, remove, and find
1431 * entries in the database.
1432 */
1433 mutex_init(&tavor_userland_rsrc_db.tdl_umapdb_lock, NULL,
1434 MUTEX_DRIVER, NULL);
1435
1436 /*
1437 * Initialize the AVL tree used for the "userland resources
1438 * database". Using an AVL tree here provides the ability to
1439 * scale the database size to large numbers of resources. The
1440 * entries in the tree are "tavor_umap_db_entry_t".
1441 * The tree is searched with the help of the
1442 * tavor_umap_db_compare() routine.
1443 */
1444 avl_create(&tavor_userland_rsrc_db.tdl_umapdb_avl,
1445 tavor_umap_db_compare, sizeof (tavor_umap_db_entry_t),
1446 offsetof(tavor_umap_db_entry_t, tdbe_avlnode));
1447
1448 TAVOR_TNF_EXIT(tavor_umap_db_init);
1449 }
1450
1451
1452 /*
1453 * tavor_umap_db_fini()
1454 * Context: Only called from attach() and/or detach() path contexts
1455 */
1456 void
tavor_umap_db_fini(void)1457 tavor_umap_db_fini(void)
1458 {
1459 TAVOR_TNF_ENTER(tavor_umap_db_fini);
1460
1461 /* Destroy the AVL tree for the "userland resources database" */
1462 avl_destroy(&tavor_userland_rsrc_db.tdl_umapdb_avl);
1463
1464 /* Destroy the lock for the "userland resources database" */
1465 mutex_destroy(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1466
1467 TAVOR_TNF_EXIT(tavor_umap_db_fini);
1468 }
1469
1470
1471 /*
1472 * tavor_umap_db_alloc()
1473 * Context: Can be called from user or kernel context.
1474 */
1475 tavor_umap_db_entry_t *
tavor_umap_db_alloc(uint_t instance,uint64_t key,uint_t type,uint64_t value)1476 tavor_umap_db_alloc(uint_t instance, uint64_t key, uint_t type, uint64_t value)
1477 {
1478 tavor_umap_db_entry_t *umapdb;
1479
1480 TAVOR_TNF_ENTER(tavor_umap_db_alloc);
1481
1482 /* Allocate an entry to add to the "userland resources database" */
1483 umapdb = kmem_zalloc(sizeof (tavor_umap_db_entry_t), KM_NOSLEEP);
1484 if (umapdb == NULL) {
1485 TNF_PROBE_0(tavor_umap_db_alloc_kmz_fail, TAVOR_TNF_ERROR, "");
1486 TAVOR_TNF_EXIT(tavor_umap_db_alloc);
1487 return (NULL);
1488 }
1489 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb))
1490
1491 /* Fill in the fields in the database entry */
1492 umapdb->tdbe_common.tdb_instance = instance;
1493 umapdb->tdbe_common.tdb_type = type;
1494 umapdb->tdbe_common.tdb_key = key;
1495 umapdb->tdbe_common.tdb_value = value;
1496
1497 TAVOR_TNF_EXIT(tavor_umap_db_alloc);
1498 return (umapdb);
1499 }
1500
1501
1502 /*
1503 * tavor_umap_db_free()
1504 * Context: Can be called from user or kernel context.
1505 */
1506 void
tavor_umap_db_free(tavor_umap_db_entry_t * umapdb)1507 tavor_umap_db_free(tavor_umap_db_entry_t *umapdb)
1508 {
1509 TAVOR_TNF_ENTER(tavor_umap_db_free);
1510
1511 /* Free the database entry */
1512 kmem_free(umapdb, sizeof (tavor_umap_db_entry_t));
1513
1514 TAVOR_TNF_EXIT(tavor_umap_db_free);
1515 }
1516
1517
1518 /*
1519 * tavor_umap_db_add()
1520 * Context: Can be called from user or kernel context.
1521 */
1522 void
tavor_umap_db_add(tavor_umap_db_entry_t * umapdb)1523 tavor_umap_db_add(tavor_umap_db_entry_t *umapdb)
1524 {
1525 TAVOR_TNF_ENTER(tavor_umap_db_add);
1526
1527 mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1528 tavor_umap_db_add_nolock(umapdb);
1529 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1530
1531 TAVOR_TNF_EXIT(tavor_umap_db_add);
1532 }
1533
1534
1535 /*
1536 * tavor_umap_db_add_nolock()
1537 * Context: Can be called from user or kernel context.
1538 */
1539 void
tavor_umap_db_add_nolock(tavor_umap_db_entry_t * umapdb)1540 tavor_umap_db_add_nolock(tavor_umap_db_entry_t *umapdb)
1541 {
1542 tavor_umap_db_query_t query;
1543 avl_index_t where;
1544
1545 TAVOR_TNF_ENTER(tavor_umap_db_add_nolock);
1546
1547 ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1548
1549 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb))
1550
1551 /*
1552 * Copy the common portion of the "to-be-added" database entry
1553 * into the "tavor_umap_db_query_t" structure. We use this structure
1554 * (with no flags set) to find the appropriate location in the
1555 * "userland resources database" for the new entry to be added.
1556 *
1557 * Note: we expect that this entry should not be found in the
1558 * database (unless something bad has happened).
1559 */
1560 query.tqdb_common = umapdb->tdbe_common;
1561 query.tqdb_flags = 0;
1562 (void) avl_find(&tavor_userland_rsrc_db.tdl_umapdb_avl, &query,
1563 &where);
1564
1565 /*
1566 * Now, using the "where" field from the avl_find() operation
1567 * above, we will insert the new database entry ("umapdb").
1568 */
1569 avl_insert(&tavor_userland_rsrc_db.tdl_umapdb_avl, umapdb,
1570 where);
1571
1572 TAVOR_TNF_EXIT(tavor_umap_db_add_nolock);
1573 }
1574
1575
1576 /*
1577 * tavor_umap_db_find()
1578 * Context: Can be called from user or kernel context.
1579 */
1580 int
tavor_umap_db_find(uint_t instance,uint64_t key,uint_t type,uint64_t * value,uint_t flag,tavor_umap_db_entry_t ** umapdb)1581 tavor_umap_db_find(uint_t instance, uint64_t key, uint_t type,
1582 uint64_t *value, uint_t flag, tavor_umap_db_entry_t **umapdb)
1583 {
1584 int status;
1585
1586 TAVOR_TNF_ENTER(tavor_umap_db_find);
1587
1588 mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1589 status = tavor_umap_db_find_nolock(instance, key, type, value, flag,
1590 umapdb);
1591 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1592
1593 TAVOR_TNF_EXIT(tavor_umap_db_find);
1594 return (status);
1595 }
1596
1597
1598 /*
1599 * tavor_umap_db_find_nolock()
1600 * Context: Can be called from user or kernel context.
1601 */
1602 int
tavor_umap_db_find_nolock(uint_t instance,uint64_t key,uint_t type,uint64_t * value,uint_t flags,tavor_umap_db_entry_t ** umapdb)1603 tavor_umap_db_find_nolock(uint_t instance, uint64_t key, uint_t type,
1604 uint64_t *value, uint_t flags, tavor_umap_db_entry_t **umapdb)
1605 {
1606 tavor_umap_db_query_t query;
1607 tavor_umap_db_entry_t *entry;
1608 avl_index_t where;
1609
1610 TAVOR_TNF_ENTER(tavor_umap_db_find_nolock);
1611
1612 ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1613
1614 /*
1615 * Fill in key, type, instance, and flags values of the
1616 * tavor_umap_db_query_t in preparation for the database
1617 * lookup.
1618 */
1619 query.tqdb_flags = flags;
1620 query.tqdb_common.tdb_key = key;
1621 query.tqdb_common.tdb_type = type;
1622 query.tqdb_common.tdb_instance = instance;
1623
1624 /*
1625 * Perform the database query. If no entry is found, then
1626 * return failure, else continue.
1627 */
1628 entry = (tavor_umap_db_entry_t *)avl_find(
1629 &tavor_userland_rsrc_db.tdl_umapdb_avl, &query, &where);
1630 if (entry == NULL) {
1631 TAVOR_TNF_EXIT(tavor_umap_db_find_nolock);
1632 return (DDI_FAILURE);
1633 }
1634 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*entry))
1635
1636 /*
1637 * If the flags argument specifies that the entry should
1638 * be removed if found, then call avl_remove() to remove
1639 * the entry from the database.
1640 */
1641 if (flags & TAVOR_UMAP_DB_REMOVE) {
1642
1643 avl_remove(&tavor_userland_rsrc_db.tdl_umapdb_avl, entry);
1644
1645 /*
1646 * The database entry is returned with the expectation
1647 * that the caller will use tavor_umap_db_free() to
1648 * free the entry's memory. ASSERT that this is non-NULL.
1649 * NULL pointer should never be passed for the
1650 * TAVOR_UMAP_DB_REMOVE case.
1651 */
1652 ASSERT(umapdb != NULL);
1653 }
1654
1655 /*
1656 * If the caller would like visibility to the database entry
1657 * (indicated through the use of a non-NULL "umapdb" argument),
1658 * then fill it in.
1659 */
1660 if (umapdb != NULL) {
1661 *umapdb = entry;
1662 }
1663
1664 /* Extract value field from database entry and return success */
1665 *value = entry->tdbe_common.tdb_value;
1666
1667 TAVOR_TNF_EXIT(tavor_umap_db_find_nolock);
1668 return (DDI_SUCCESS);
1669 }
1670
1671
1672 /*
1673 * tavor_umap_umemlock_cb()
1674 * Context: Can be called from callback context.
1675 */
1676 void
tavor_umap_umemlock_cb(ddi_umem_cookie_t * umem_cookie)1677 tavor_umap_umemlock_cb(ddi_umem_cookie_t *umem_cookie)
1678 {
1679 tavor_umap_db_entry_t *umapdb;
1680 tavor_state_t *state;
1681 tavor_rsrc_t *rsrcp;
1682 tavor_mrhdl_t mr;
1683 uint64_t value;
1684 uint_t instance;
1685 int status;
1686 void (*mr_callback)(void *, void *);
1687 void *mr_cbarg1, *mr_cbarg2;
1688
1689 TAVOR_TNF_ENTER(tavor_umap_umemlock_cb);
1690
1691 /*
1692 * If this was userland memory, then we need to remove its entry
1693 * from the "userland resources database". Note: We use the
1694 * TAVOR_UMAP_DB_IGNORE_INSTANCE flag here because we don't know
1695 * which instance was used when the entry was added (but we want
1696 * to know after the entry is found using the other search criteria).
1697 */
1698 status = tavor_umap_db_find(0, (uint64_t)(uintptr_t)umem_cookie,
1699 MLNX_UMAP_MRMEM_RSRC, &value, (TAVOR_UMAP_DB_REMOVE |
1700 TAVOR_UMAP_DB_IGNORE_INSTANCE), &umapdb);
1701 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb))
1702 if (status == DDI_SUCCESS) {
1703 instance = umapdb->tdbe_common.tdb_instance;
1704 state = ddi_get_soft_state(tavor_statep, instance);
1705 if (state == NULL) {
1706 cmn_err(CE_WARN, "Unable to match Tavor instance\n");
1707 TNF_PROBE_0(tavor_umap_umemlock_cb_gss_fail,
1708 TAVOR_TNF_ERROR, "");
1709 TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1710 return;
1711 }
1712
1713 /* Free the database entry */
1714 tavor_umap_db_free(umapdb);
1715
1716 /* Use "value" to convert to an MR handle */
1717 rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
1718 mr = (tavor_mrhdl_t)rsrcp->tr_addr;
1719
1720 /*
1721 * If a callback has been provided, call it first. This
1722 * callback is expected to do any cleanup necessary to
1723 * guarantee that the subsequent MR deregister (below)
1724 * will succeed. Specifically, this means freeing up memory
1725 * windows which might have been associated with the MR.
1726 */
1727 mutex_enter(&mr->mr_lock);
1728 mr_callback = mr->mr_umem_cbfunc;
1729 mr_cbarg1 = mr->mr_umem_cbarg1;
1730 mr_cbarg2 = mr->mr_umem_cbarg2;
1731 mutex_exit(&mr->mr_lock);
1732 if (mr_callback != NULL) {
1733 mr_callback(mr_cbarg1, mr_cbarg2);
1734 }
1735
1736 /*
1737 * Then call tavor_mr_deregister() to release the resources
1738 * associated with the MR handle. Note: Because this routine
1739 * will also check for whether the ddi_umem_cookie_t is in the
1740 * database, it will take responsibility for disabling the
1741 * memory region and calling ddi_umem_unlock().
1742 */
1743 status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
1744 TAVOR_SLEEP);
1745 if (status != DDI_SUCCESS) {
1746 TAVOR_WARNING(state, "Unexpected failure in "
1747 "deregister from callback\n");
1748 TNF_PROBE_0(tavor_umap_umemlock_cb_dereg_fail,
1749 TAVOR_TNF_ERROR, "");
1750 TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1751 }
1752 }
1753
1754 TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1755 }
1756
1757
1758 /*
1759 * tavor_umap_db_compare()
1760 * Context: Can be called from user or kernel context.
1761 */
1762 static int
tavor_umap_db_compare(const void * q,const void * e)1763 tavor_umap_db_compare(const void *q, const void *e)
1764 {
1765 tavor_umap_db_common_t *entry_common, *query_common;
1766 uint_t query_flags;
1767
1768 TAVOR_TNF_ENTER(tavor_umap_db_compare);
1769
1770 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*((tavor_umap_db_query_t *)q)))
1771
1772 entry_common = &((tavor_umap_db_entry_t *)e)->tdbe_common;
1773 query_common = &((tavor_umap_db_query_t *)q)->tqdb_common;
1774 query_flags = ((tavor_umap_db_query_t *)q)->tqdb_flags;
1775
1776 /*
1777 * The first comparison is done on the "key" value in "query"
1778 * and "entry". If they are not equal, then the appropriate
1779 * search direction is returned. Else, we continue by
1780 * comparing "type".
1781 */
1782 if (query_common->tdb_key < entry_common->tdb_key) {
1783 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1784 return (-1);
1785 } else if (query_common->tdb_key > entry_common->tdb_key) {
1786 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1787 return (+1);
1788 }
1789
1790 /*
1791 * If the search reaches this point, then "query" and "entry"
1792 * have equal key values. So we continue be comparing their
1793 * "type" values. Again, if they are not equal, then the
1794 * appropriate search direction is returned. Else, we continue
1795 * by comparing "instance".
1796 */
1797 if (query_common->tdb_type < entry_common->tdb_type) {
1798 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1799 return (-1);
1800 } else if (query_common->tdb_type > entry_common->tdb_type) {
1801 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1802 return (+1);
1803 }
1804
1805 /*
1806 * If the search reaches this point, then "query" and "entry"
1807 * have exactly the same key and type values. Now we consult
1808 * the "flags" field in the query to determine whether the
1809 * "instance" is relevant to the search. If the
1810 * TAVOR_UMAP_DB_IGNORE_INSTANCE flags is set, then return
1811 * success (0) here. Otherwise, continue the search by comparing
1812 * instance values and returning the appropriate search direction.
1813 */
1814 if (query_flags & TAVOR_UMAP_DB_IGNORE_INSTANCE) {
1815 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1816 return (0);
1817 }
1818
1819 /*
1820 * If the search has reached this point, then "query" and "entry"
1821 * can only be differentiated by their instance values. If these
1822 * are not equal, then return the appropriate search direction.
1823 * Else, we return success (0).
1824 */
1825 if (query_common->tdb_instance < entry_common->tdb_instance) {
1826 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1827 return (-1);
1828 } else if (query_common->tdb_instance > entry_common->tdb_instance) {
1829 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1830 return (+1);
1831 }
1832
1833 /* Everything matches... so return success */
1834 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1835 return (0);
1836 }
1837
1838
1839 /*
1840 * tavor_umap_db_set_onclose_cb()
1841 * Context: Can be called from user or kernel context.
1842 */
1843 int
tavor_umap_db_set_onclose_cb(dev_t dev,uint64_t flag,void (* callback)(void *),void * arg)1844 tavor_umap_db_set_onclose_cb(dev_t dev, uint64_t flag,
1845 void (*callback)(void *), void *arg)
1846 {
1847 tavor_umap_db_priv_t *priv;
1848 tavor_umap_db_entry_t *umapdb;
1849 minor_t instance;
1850 uint64_t value;
1851 int status;
1852
1853 TAVOR_TNF_ENTER(tavor_umap_db_set_onclose_cb);
1854
1855 instance = TAVOR_DEV_INSTANCE(dev);
1856 if (instance == -1) {
1857 TNF_PROBE_0(tavor_umap_db_set_onclose_cb_inst_fail,
1858 TAVOR_TNF_ERROR, "");
1859 TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1860 return (DDI_FAILURE);
1861 }
1862
1863 if (flag != TAVOR_ONCLOSE_FLASH_INPROGRESS) {
1864 TNF_PROBE_0(tavor_umap_db_set_onclose_cb_invflag_fail,
1865 TAVOR_TNF_ERROR, "");
1866 TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1867 return (DDI_FAILURE);
1868 }
1869
1870 /*
1871 * Grab the lock for the "userland resources database" and find
1872 * the entry corresponding to this minor number. Once it's found,
1873 * allocate (if necessary) and add an entry (in the "tdb_priv"
1874 * field) to indicate that further processing may be needed during
1875 * Tavor's close() handling.
1876 */
1877 mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1878 status = tavor_umap_db_find_nolock(instance, dev,
1879 MLNX_UMAP_PID_RSRC, &value, 0, &umapdb);
1880 if (status != DDI_SUCCESS) {
1881 TNF_PROBE_0(tavor_umap_db_set_onclose_cb_find_fail,
1882 TAVOR_TNF_ERROR, "");
1883 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1884 TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1885 return (DDI_FAILURE);
1886 }
1887
1888 priv = (tavor_umap_db_priv_t *)umapdb->tdbe_common.tdb_priv;
1889 if (priv == NULL) {
1890 priv = (tavor_umap_db_priv_t *)kmem_zalloc(
1891 sizeof (tavor_umap_db_priv_t), KM_NOSLEEP);
1892 if (priv == NULL) {
1893 TNF_PROBE_0(tavor_umap_db_set_onclose_cb_kmz_fail,
1894 TAVOR_TNF_ERROR, "");
1895 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1896 TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1897 return (DDI_FAILURE);
1898 }
1899 }
1900
1901 /*
1902 * Save away the callback and argument to be used during Tavor's
1903 * close() processing.
1904 */
1905 priv->tdp_cb = callback;
1906 priv->tdp_arg = arg;
1907
1908 umapdb->tdbe_common.tdb_priv = (void *)priv;
1909 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1910
1911 TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1912 return (DDI_SUCCESS);
1913 }
1914
1915
1916 /*
1917 * tavor_umap_db_clear_onclose_cb()
1918 * Context: Can be called from user or kernel context.
1919 */
1920 int
tavor_umap_db_clear_onclose_cb(dev_t dev,uint64_t flag)1921 tavor_umap_db_clear_onclose_cb(dev_t dev, uint64_t flag)
1922 {
1923 tavor_umap_db_priv_t *priv;
1924 tavor_umap_db_entry_t *umapdb;
1925 minor_t instance;
1926 uint64_t value;
1927 int status;
1928
1929 TAVOR_TNF_ENTER(tavor_umap_db_set_onclose_cb);
1930
1931 instance = TAVOR_DEV_INSTANCE(dev);
1932 if (instance == -1) {
1933 TNF_PROBE_0(tavor_umap_db_clear_onclose_cb_inst_fail,
1934 TAVOR_TNF_ERROR, "");
1935 TAVOR_TNF_EXIT(tavor_umap_db_clear_onclose_cb);
1936 return (DDI_FAILURE);
1937 }
1938
1939 if (flag != TAVOR_ONCLOSE_FLASH_INPROGRESS) {
1940 TNF_PROBE_0(tavor_umap_db_clear_onclose_cb_invflag_fail,
1941 TAVOR_TNF_ERROR, "");
1942 TAVOR_TNF_EXIT(tavor_umap_db_clear_onclose_cb);
1943 return (DDI_FAILURE);
1944 }
1945
1946 /*
1947 * Grab the lock for the "userland resources database" and find
1948 * the entry corresponding to this minor number. Once it's found,
1949 * remove the entry (in the "tdb_priv" field) that indicated the
1950 * need for further processing during Tavor's close(). Free the
1951 * entry, if appropriate.
1952 */
1953 mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1954 status = tavor_umap_db_find_nolock(instance, dev,
1955 MLNX_UMAP_PID_RSRC, &value, 0, &umapdb);
1956 if (status != DDI_SUCCESS) {
1957 TNF_PROBE_0(tavor_umap_db_clear_onclose_cb_find_fail,
1958 TAVOR_TNF_ERROR, "");
1959 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1960 TAVOR_TNF_EXIT(tavor_umap_db_clear_onclose_cb);
1961 return (DDI_FAILURE);
1962 }
1963
1964 priv = (tavor_umap_db_priv_t *)umapdb->tdbe_common.tdb_priv;
1965 if (priv != NULL) {
1966 kmem_free(priv, sizeof (tavor_umap_db_priv_t));
1967 priv = NULL;
1968 }
1969
1970 umapdb->tdbe_common.tdb_priv = (void *)priv;
1971 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1972 return (DDI_SUCCESS);
1973 }
1974
1975
1976 /*
1977 * tavor_umap_db_clear_onclose_cb()
1978 * Context: Can be called from user or kernel context.
1979 */
1980 void
tavor_umap_db_handle_onclose_cb(tavor_umap_db_priv_t * priv)1981 tavor_umap_db_handle_onclose_cb(tavor_umap_db_priv_t *priv)
1982 {
1983 void (*callback)(void *);
1984
1985 ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1986
1987 /*
1988 * Call the callback.
1989 * Note: Currently there is only one callback (in "tdp_cb"), but
1990 * in the future there may be more, depending on what other types
1991 * of interaction there are between userland processes and the
1992 * driver.
1993 */
1994 callback = priv->tdp_cb;
1995 callback(priv->tdp_arg);
1996 }
1997