1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * tavor_umap.c
29 * Tavor Userland Mapping Routines
30 *
31 * Implements all the routines necessary for enabling direct userland
32 * access to the Tavor hardware. This includes all routines necessary for
33 * maintaining the "userland resources database" and all the support routines
34 * for the devmap calls.
35 */
36
37 #include <sys/types.h>
38 #include <sys/conf.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41 #include <sys/modctl.h>
42 #include <sys/file.h>
43 #include <sys/avl.h>
44 #include <sys/sysmacros.h>
45
46 #include <sys/ib/adapters/tavor/tavor.h>
47
48 /* Tavor HCA state pointer (extern) */
49 extern void *tavor_statep;
50
51 /* Tavor HCA Userland Resource Database (extern) */
52 extern tavor_umap_db_t tavor_userland_rsrc_db;
53
54 static int tavor_umap_uarpg(tavor_state_t *state, devmap_cookie_t dhp,
55 tavor_rsrc_t *rsrcp, size_t *maplen, int *err);
56 static int tavor_umap_cqmem(tavor_state_t *state, devmap_cookie_t dhp,
57 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
58 static int tavor_umap_qpmem(tavor_state_t *state, devmap_cookie_t dhp,
59 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
60 static int tavor_umap_srqmem(tavor_state_t *state, devmap_cookie_t dhp,
61 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
62 static int tavor_devmap_umem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
63 offset_t off, size_t len, void **pvtp);
64 static int tavor_devmap_umem_dup(devmap_cookie_t dhp, void *pvtp,
65 devmap_cookie_t new_dhp, void **new_pvtp);
66 static void tavor_devmap_umem_unmap(devmap_cookie_t dhp, void *pvtp,
67 offset_t off, size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
68 devmap_cookie_t new_dhp2, void **pvtp2);
69 static int tavor_devmap_devmem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
70 offset_t off, size_t len, void **pvtp);
71 static int tavor_devmap_devmem_dup(devmap_cookie_t dhp, void *pvtp,
72 devmap_cookie_t new_dhp, void **new_pvtp);
73 static void tavor_devmap_devmem_unmap(devmap_cookie_t dhp, void *pvtp,
74 offset_t off, size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
75 devmap_cookie_t new_dhp2, void **pvtp2);
76 static ibt_status_t tavor_umap_mr_data_in(tavor_mrhdl_t mr,
77 ibt_mr_data_in_t *data, size_t data_sz);
78 static ibt_status_t tavor_umap_cq_data_out(tavor_cqhdl_t cq,
79 mlnx_umap_cq_data_out_t *data, size_t data_sz);
80 static ibt_status_t tavor_umap_qp_data_out(tavor_qphdl_t qp,
81 mlnx_umap_qp_data_out_t *data, size_t data_sz);
82 static ibt_status_t tavor_umap_srq_data_out(tavor_srqhdl_t srq,
83 mlnx_umap_srq_data_out_t *data, size_t data_sz);
84 static int tavor_umap_db_compare(const void *query, const void *entry);
85 static ibt_status_t tavor_umap_pd_data_out(tavor_pdhdl_t pd,
86 mlnx_umap_pd_data_out_t *data, size_t data_sz);
87
88
89 /*
90 * These callbacks are passed to devmap_umem_setup() and devmap_devmem_setup(),
91 * respectively. They are used to handle (among other things) partial
92 * unmappings and to provide a method for invalidating mappings inherited
93 * as a result of a fork(2) system call.
94 */
95 static struct devmap_callback_ctl tavor_devmap_umem_cbops = {
96 DEVMAP_OPS_REV,
97 tavor_devmap_umem_map,
98 NULL,
99 tavor_devmap_umem_dup,
100 tavor_devmap_umem_unmap
101 };
102 static struct devmap_callback_ctl tavor_devmap_devmem_cbops = {
103 DEVMAP_OPS_REV,
104 tavor_devmap_devmem_map,
105 NULL,
106 tavor_devmap_devmem_dup,
107 tavor_devmap_devmem_unmap
108 };
109
110 /*
111 * tavor_devmap()
112 * Context: Can be called from user context.
113 */
114 /* ARGSUSED */
115 int
tavor_devmap(dev_t dev,devmap_cookie_t dhp,offset_t off,size_t len,size_t * maplen,uint_t model)116 tavor_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
117 size_t *maplen, uint_t model)
118 {
119 tavor_state_t *state;
120 tavor_rsrc_t *rsrcp;
121 minor_t instance;
122 uint64_t key, value;
123 uint_t type;
124 int err, status;
125
126 /* Get Tavor softstate structure from instance */
127 instance = TAVOR_DEV_INSTANCE(dev);
128 state = ddi_get_soft_state(tavor_statep, instance);
129 if (state == NULL) {
130 return (ENXIO);
131 }
132
133 /*
134 * Access to Tavor devmap interface is not allowed in
135 * "maintenance mode".
136 */
137 if (state->ts_operational_mode == TAVOR_MAINTENANCE_MODE) {
138 return (EFAULT);
139 }
140
141 /*
142 * The bottom bits of "offset" are undefined (number depends on
143 * system PAGESIZE). Shifting these off leaves us with a "key".
144 * The "key" is actually a combination of both a real key value
145 * (for the purpose of database lookup) and a "type" value. We
146 * extract this information before doing the database lookup.
147 */
148 key = off >> PAGESHIFT;
149 type = key & MLNX_UMAP_RSRC_TYPE_MASK;
150 key = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
151 status = tavor_umap_db_find(instance, key, type, &value, 0, NULL);
152 if (status == DDI_SUCCESS) {
153 rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
154
155 switch (type) {
156 case MLNX_UMAP_UARPG_RSRC:
157 /*
158 * Double check that process who open()'d Tavor is
159 * same process attempting to mmap() UAR page.
160 */
161 if (key != ddi_get_pid()) {
162 return (EINVAL);
163 }
164
165 /* Map the UAR page out for userland access */
166 status = tavor_umap_uarpg(state, dhp, rsrcp, maplen,
167 &err);
168 if (status != DDI_SUCCESS) {
169 return (err);
170 }
171 break;
172
173 case MLNX_UMAP_CQMEM_RSRC:
174 /* Map the CQ memory out for userland access */
175 status = tavor_umap_cqmem(state, dhp, rsrcp, off,
176 maplen, &err);
177 if (status != DDI_SUCCESS) {
178 return (err);
179 }
180 break;
181
182 case MLNX_UMAP_QPMEM_RSRC:
183 /* Map the QP memory out for userland access */
184 status = tavor_umap_qpmem(state, dhp, rsrcp, off,
185 maplen, &err);
186 if (status != DDI_SUCCESS) {
187 return (err);
188 }
189 break;
190
191 case MLNX_UMAP_SRQMEM_RSRC:
192 /* Map the SRQ memory out for userland access */
193 status = tavor_umap_srqmem(state, dhp, rsrcp, off,
194 maplen, &err);
195 if (status != DDI_SUCCESS) {
196 return (err);
197 }
198 break;
199
200 default:
201 TAVOR_WARNING(state, "unexpected rsrc type in devmap");
202 return (EINVAL);
203 }
204 } else {
205 return (EINVAL);
206 }
207
208 return (0);
209 }
210
211
212 /*
213 * tavor_umap_uarpg()
214 * Context: Can be called from user context.
215 */
216 static int
tavor_umap_uarpg(tavor_state_t * state,devmap_cookie_t dhp,tavor_rsrc_t * rsrcp,size_t * maplen,int * err)217 tavor_umap_uarpg(tavor_state_t *state, devmap_cookie_t dhp,
218 tavor_rsrc_t *rsrcp, size_t *maplen, int *err)
219 {
220 int status;
221 uint_t maxprot;
222
223 /* Map out the UAR page (doorbell page) */
224 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
225 status = devmap_devmem_setup(dhp, state->ts_dip,
226 &tavor_devmap_devmem_cbops, TAVOR_UAR_BAR, (rsrcp->tr_indx <<
227 PAGESHIFT), PAGESIZE, maxprot, DEVMAP_ALLOW_REMAP,
228 &state->ts_reg_accattr);
229 if (status < 0) {
230 *err = status;
231 return (DDI_FAILURE);
232 }
233
234 *maplen = PAGESIZE;
235 return (DDI_SUCCESS);
236 }
237
238
239 /*
240 * tavor_umap_cqmem()
241 * Context: Can be called from user context.
242 */
243 /* ARGSUSED */
244 static int
tavor_umap_cqmem(tavor_state_t * state,devmap_cookie_t dhp,tavor_rsrc_t * rsrcp,offset_t off,size_t * maplen,int * err)245 tavor_umap_cqmem(tavor_state_t *state, devmap_cookie_t dhp,
246 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
247 {
248 tavor_cqhdl_t cq;
249 size_t size;
250 uint_t maxprot;
251 int status;
252
253 /* Extract the Tavor CQ handle pointer from the tavor_rsrc_t */
254 cq = (tavor_cqhdl_t)rsrcp->tr_addr;
255
256 /* Round-up the CQ size to system page size */
257 size = ptob(btopr(cq->cq_cqinfo.qa_size));
258
259 /* Map out the CQ memory */
260 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
261 status = devmap_umem_setup(dhp, state->ts_dip,
262 &tavor_devmap_umem_cbops, cq->cq_cqinfo.qa_umemcookie, 0, size,
263 maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
264 if (status < 0) {
265 *err = status;
266 return (DDI_FAILURE);
267 }
268 *maplen = size;
269
270 return (DDI_SUCCESS);
271 }
272
273
274 /*
275 * tavor_umap_qpmem()
276 * Context: Can be called from user context.
277 */
278 /* ARGSUSED */
279 static int
tavor_umap_qpmem(tavor_state_t * state,devmap_cookie_t dhp,tavor_rsrc_t * rsrcp,offset_t off,size_t * maplen,int * err)280 tavor_umap_qpmem(tavor_state_t *state, devmap_cookie_t dhp,
281 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
282 {
283 tavor_qphdl_t qp;
284 offset_t offset;
285 size_t size;
286 uint_t maxprot;
287 int status;
288
289 /* Extract the Tavor QP handle pointer from the tavor_rsrc_t */
290 qp = (tavor_qphdl_t)rsrcp->tr_addr;
291
292 /*
293 * Calculate the offset of the first work queue (send or recv) into
294 * the memory (ddi_umem_alloc()) allocated previously for the QP.
295 */
296 offset = (offset_t)((uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
297 (uintptr_t)qp->qp_wqinfo.qa_buf_real);
298
299 /* Round-up the QP work queue sizes to system page size */
300 size = ptob(btopr(qp->qp_wqinfo.qa_size));
301
302 /* Map out the QP memory */
303 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
304 status = devmap_umem_setup(dhp, state->ts_dip,
305 &tavor_devmap_umem_cbops, qp->qp_wqinfo.qa_umemcookie, offset,
306 size, maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
307 if (status < 0) {
308 *err = status;
309 return (DDI_FAILURE);
310 }
311 *maplen = size;
312
313 return (DDI_SUCCESS);
314 }
315
316
317 /*
318 * tavor_umap_srqmem()
319 * Context: Can be called from user context.
320 */
321 /* ARGSUSED */
322 static int
tavor_umap_srqmem(tavor_state_t * state,devmap_cookie_t dhp,tavor_rsrc_t * rsrcp,offset_t off,size_t * maplen,int * err)323 tavor_umap_srqmem(tavor_state_t *state, devmap_cookie_t dhp,
324 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
325 {
326 tavor_srqhdl_t srq;
327 offset_t offset;
328 size_t size;
329 uint_t maxprot;
330 int status;
331
332 /* Extract the Tavor SRQ handle pointer from the tavor_rsrc_t */
333 srq = (tavor_srqhdl_t)rsrcp->tr_addr;
334
335 /*
336 * Calculate the offset of the first shared recv queue into the memory
337 * (ddi_umem_alloc()) allocated previously for the SRQ.
338 */
339 offset = (offset_t)((uintptr_t)srq->srq_wqinfo.qa_buf_aligned -
340 (uintptr_t)srq->srq_wqinfo.qa_buf_real);
341
342 /* Round-up the SRQ work queue sizes to system page size */
343 size = ptob(btopr(srq->srq_wqinfo.qa_size));
344
345 /* Map out the QP memory */
346 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
347 status = devmap_umem_setup(dhp, state->ts_dip,
348 &tavor_devmap_umem_cbops, srq->srq_wqinfo.qa_umemcookie, offset,
349 size, maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
350 if (status < 0) {
351 *err = status;
352 return (DDI_FAILURE);
353 }
354 *maplen = size;
355
356 return (DDI_SUCCESS);
357 }
358
359
360 /*
361 * tavor_devmap_umem_map()
362 * Context: Can be called from kernel context.
363 */
364 /* ARGSUSED */
365 static int
tavor_devmap_umem_map(devmap_cookie_t dhp,dev_t dev,uint_t flags,offset_t off,size_t len,void ** pvtp)366 tavor_devmap_umem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
367 offset_t off, size_t len, void **pvtp)
368 {
369 tavor_state_t *state;
370 tavor_devmap_track_t *dvm_track;
371 tavor_cqhdl_t cq;
372 tavor_qphdl_t qp;
373 tavor_srqhdl_t srq;
374 minor_t instance;
375 uint64_t key;
376 uint_t type;
377
378 /* Get Tavor softstate structure from instance */
379 instance = TAVOR_DEV_INSTANCE(dev);
380 state = ddi_get_soft_state(tavor_statep, instance);
381 if (state == NULL) {
382 return (ENXIO);
383 }
384
385 /*
386 * The bottom bits of "offset" are undefined (number depends on
387 * system PAGESIZE). Shifting these off leaves us with a "key".
388 * The "key" is actually a combination of both a real key value
389 * (for the purpose of database lookup) and a "type" value. Although
390 * we are not going to do any database lookup per se, we do want
391 * to extract the "key" and the "type" (to enable faster lookup of
392 * the appropriate CQ or QP handle).
393 */
394 key = off >> PAGESHIFT;
395 type = key & MLNX_UMAP_RSRC_TYPE_MASK;
396 key = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
397
398 /*
399 * Allocate an entry to track the mapping and unmapping (specifically,
400 * partial unmapping) of this resource.
401 */
402 dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
403 sizeof (tavor_devmap_track_t), KM_SLEEP);
404 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
405 dvm_track->tdt_offset = off;
406 dvm_track->tdt_state = state;
407 dvm_track->tdt_refcnt = 1;
408 mutex_init(&dvm_track->tdt_lock, NULL, MUTEX_DRIVER,
409 DDI_INTR_PRI(state->ts_intrmsi_pri));
410
411 /*
412 * Depending of the type of resource that has been mapped out, we
413 * need to update the QP or CQ handle to reflect that it has, in
414 * fact, been mapped. This allows the driver code which frees a QP
415 * or a CQ to know whether it is appropriate to do a
416 * devmap_devmem_remap() to invalidate the userland mapping for the
417 * corresponding queue's memory.
418 */
419 if (type == MLNX_UMAP_CQMEM_RSRC) {
420
421 /* Use "key" (CQ number) to do fast lookup of CQ handle */
422 cq = tavor_cqhdl_from_cqnum(state, key);
423
424 /*
425 * Update the handle to the userland mapping. Note: If
426 * the CQ already has a valid userland mapping, then stop
427 * and return failure.
428 */
429 mutex_enter(&cq->cq_lock);
430 if (cq->cq_umap_dhp == NULL) {
431 cq->cq_umap_dhp = dhp;
432 dvm_track->tdt_size = cq->cq_cqinfo.qa_size;
433 mutex_exit(&cq->cq_lock);
434 } else {
435 mutex_exit(&cq->cq_lock);
436 goto umem_map_fail;
437 }
438
439 } else if (type == MLNX_UMAP_QPMEM_RSRC) {
440
441 /* Use "key" (QP number) to do fast lookup of QP handle */
442 qp = tavor_qphdl_from_qpnum(state, key);
443
444 /*
445 * Update the handle to the userland mapping. Note: If
446 * the CQ already has a valid userland mapping, then stop
447 * and return failure.
448 */
449 mutex_enter(&qp->qp_lock);
450 if (qp->qp_umap_dhp == NULL) {
451 qp->qp_umap_dhp = dhp;
452 dvm_track->tdt_size = qp->qp_wqinfo.qa_size;
453 mutex_exit(&qp->qp_lock);
454 } else {
455 mutex_exit(&qp->qp_lock);
456 goto umem_map_fail;
457 }
458
459 } else if (type == MLNX_UMAP_SRQMEM_RSRC) {
460
461 /* Use "key" (SRQ number) to do fast lookup on SRQ handle */
462 srq = tavor_srqhdl_from_srqnum(state, key);
463
464 /*
465 * Update the handle to the userland mapping. Note: If the
466 * SRQ already has a valid userland mapping, then stop and
467 * return failure.
468 */
469 mutex_enter(&srq->srq_lock);
470 if (srq->srq_umap_dhp == NULL) {
471 srq->srq_umap_dhp = dhp;
472 dvm_track->tdt_size = srq->srq_wqinfo.qa_size;
473 mutex_exit(&srq->srq_lock);
474 } else {
475 mutex_exit(&srq->srq_lock);
476 goto umem_map_fail;
477 }
478 }
479
480 /*
481 * Pass the private "Tavor devmap tracking structure" back. This
482 * pointer will be returned in subsequent "unmap" callbacks.
483 */
484 *pvtp = dvm_track;
485
486 return (DDI_SUCCESS);
487
488 umem_map_fail:
489 mutex_destroy(&dvm_track->tdt_lock);
490 kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
491 return (DDI_FAILURE);
492 }
493
494
495 /*
496 * tavor_devmap_umem_dup()
497 * Context: Can be called from kernel context.
498 */
499 /* ARGSUSED */
500 static int
tavor_devmap_umem_dup(devmap_cookie_t dhp,void * pvtp,devmap_cookie_t new_dhp,void ** new_pvtp)501 tavor_devmap_umem_dup(devmap_cookie_t dhp, void *pvtp, devmap_cookie_t new_dhp,
502 void **new_pvtp)
503 {
504 tavor_state_t *state;
505 tavor_devmap_track_t *dvm_track, *new_dvm_track;
506 uint_t maxprot;
507 int status;
508
509 /*
510 * Extract the Tavor softstate pointer from "Tavor devmap tracking
511 * structure" (in "pvtp").
512 */
513 dvm_track = (tavor_devmap_track_t *)pvtp;
514 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
515 state = dvm_track->tdt_state;
516
517 /*
518 * Since this devmap_dup() entry point is generally called
519 * when a process does fork(2), it is incumbent upon the driver
520 * to insure that the child does not inherit a valid copy of
521 * the parent's QP or CQ resource. This is accomplished by using
522 * devmap_devmem_remap() to invalidate the child's mapping to the
523 * kernel memory.
524 */
525 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
526 status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0,
527 dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL);
528 if (status != DDI_SUCCESS) {
529 TAVOR_WARNING(state, "failed in tavor_devmap_umem_dup()");
530 return (status);
531 }
532
533 /*
534 * Allocate a new entry to track the subsequent unmapping
535 * (specifically, all partial unmappings) of the child's newly
536 * invalidated resource. Note: Setting the "tdt_size" field to
537 * zero here is an indication to the devmap_unmap() entry point
538 * that this mapping is invalid, and that its subsequent unmapping
539 * should not affect any of the parent's CQ or QP resources.
540 */
541 new_dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
542 sizeof (tavor_devmap_track_t), KM_SLEEP);
543 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*new_dvm_track))
544 new_dvm_track->tdt_offset = 0;
545 new_dvm_track->tdt_state = state;
546 new_dvm_track->tdt_refcnt = 1;
547 new_dvm_track->tdt_size = 0;
548 mutex_init(&new_dvm_track->tdt_lock, NULL, MUTEX_DRIVER,
549 DDI_INTR_PRI(state->ts_intrmsi_pri));
550 *new_pvtp = new_dvm_track;
551
552 return (DDI_SUCCESS);
553 }
554
555
556 /*
557 * tavor_devmap_umem_unmap()
558 * Context: Can be called from kernel context.
559 */
560 /* ARGSUSED */
561 static void
tavor_devmap_umem_unmap(devmap_cookie_t dhp,void * pvtp,offset_t off,size_t len,devmap_cookie_t new_dhp1,void ** pvtp1,devmap_cookie_t new_dhp2,void ** pvtp2)562 tavor_devmap_umem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
563 size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
564 devmap_cookie_t new_dhp2, void **pvtp2)
565 {
566 tavor_state_t *state;
567 tavor_rsrc_t *rsrcp;
568 tavor_devmap_track_t *dvm_track;
569 tavor_cqhdl_t cq;
570 tavor_qphdl_t qp;
571 tavor_srqhdl_t srq;
572 uint64_t key, value;
573 uint_t type;
574 uint_t size;
575 int status;
576
577 /*
578 * Extract the Tavor softstate pointer from "Tavor devmap tracking
579 * structure" (in "pvtp").
580 */
581 dvm_track = (tavor_devmap_track_t *)pvtp;
582 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
583 state = dvm_track->tdt_state;
584
585 /*
586 * Extract the "offset" from the "Tavor devmap tracking structure".
587 * Note: The input argument "off" is ignored here because the
588 * Tavor mapping interfaces define a very specific meaning to
589 * each "logical offset". Also extract the "key" and "type" encoded
590 * in the logical offset.
591 */
592 key = dvm_track->tdt_offset >> PAGESHIFT;
593 type = key & MLNX_UMAP_RSRC_TYPE_MASK;
594 key = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
595
596 /*
597 * Extract the "size" of the mapping. If this size is determined
598 * to be zero, then it is an indication of a previously invalidated
599 * mapping, and no CQ or QP resources should be affected.
600 */
601 size = dvm_track->tdt_size;
602
603 /*
604 * If only the "middle portion of a given mapping is being unmapped,
605 * then we are effectively creating one new piece of mapped memory.
606 * (Original region is divided into three pieces of which the middle
607 * piece is being removed. This leaves two pieces. Since we started
608 * with one piece and now have two pieces, we need to increment the
609 * counter in the "Tavor devmap tracking structure".
610 *
611 * If, however, the whole mapped region is being unmapped, then we
612 * have started with one region which we are completely removing.
613 * In this case, we need to decrement the counter in the "Tavor
614 * devmap tracking structure".
615 *
616 * In each of the remaining cases, we will have started with one
617 * mapped region and ended with one (different) region. So no counter
618 * modification is necessary.
619 */
620 mutex_enter(&dvm_track->tdt_lock);
621 if ((new_dhp1 == NULL) && (new_dhp2 == NULL)) {
622 dvm_track->tdt_refcnt--;
623 } else if ((new_dhp1 != NULL) && (new_dhp2 != NULL)) {
624 dvm_track->tdt_refcnt++;
625 }
626 mutex_exit(&dvm_track->tdt_lock);
627
628 /*
629 * For each of the cases where the region is being divided, then we
630 * need to pass back the "Tavor devmap tracking structure". This way
631 * we get it back when each of the remaining pieces is subsequently
632 * unmapped.
633 */
634 if (new_dhp1 != NULL) {
635 *pvtp1 = pvtp;
636 }
637 if (new_dhp2 != NULL) {
638 *pvtp2 = pvtp;
639 }
640
641 /*
642 * If the "Tavor devmap tracking structure" is no longer being
643 * referenced, then free it up. Otherwise, return.
644 */
645 if (dvm_track->tdt_refcnt == 0) {
646 mutex_destroy(&dvm_track->tdt_lock);
647 kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
648
649 /*
650 * If the mapping was invalid (see explanation above), then
651 * no further processing is necessary.
652 */
653 if (size == 0) {
654 return;
655 }
656 } else {
657 return;
658 }
659
660 /*
661 * Now that we can guarantee that the user memory is fully unmapped,
662 * we can use the "key" and "type" values to try to find the entry
663 * in the "userland resources database". If it's found, then it
664 * indicates that the queue memory (CQ or QP) has not yet been freed.
665 * In this case, we update the corresponding CQ or QP handle to
666 * indicate that the "devmap_devmem_remap()" call will be unnecessary.
667 * If it's _not_ found, then it indicates that the CQ or QP memory
668 * was, in fact, freed before it was unmapped (thus requiring a
669 * previous invalidation by remapping - which will already have
670 * been done in the free routine).
671 */
672 status = tavor_umap_db_find(state->ts_instance, key, type, &value,
673 0, NULL);
674 if (status == DDI_SUCCESS) {
675 /*
676 * Depending on the type of the mapped resource (CQ or QP),
677 * update handle to indicate that no invalidation remapping
678 * will be necessary.
679 */
680 if (type == MLNX_UMAP_CQMEM_RSRC) {
681
682 /* Use "value" to convert to CQ handle */
683 rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
684 cq = (tavor_cqhdl_t)rsrcp->tr_addr;
685
686 /*
687 * Invalidate the handle to the userland mapping.
688 * Note: We must ensure that the mapping being
689 * unmapped here is the current one for the CQ. It
690 * is possible that it might not be if this CQ has
691 * been resized and the previous CQ memory has not
692 * yet been unmapped. But in that case, because of
693 * the devmap_devmem_remap(), there is no longer any
694 * association between the mapping and the real CQ
695 * kernel memory.
696 */
697 mutex_enter(&cq->cq_lock);
698 if (cq->cq_umap_dhp == dhp) {
699 cq->cq_umap_dhp = (devmap_cookie_t)NULL;
700 }
701 mutex_exit(&cq->cq_lock);
702
703 } else if (type == MLNX_UMAP_QPMEM_RSRC) {
704
705 /* Use "value" to convert to QP handle */
706 rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
707 qp = (tavor_qphdl_t)rsrcp->tr_addr;
708
709 /*
710 * Invalidate the handle to the userland mapping.
711 * Note: we ensure that the mapping being unmapped
712 * here is the current one for the QP. This is
713 * more of a sanity check here since, unlike CQs
714 * (above) we do not support resize of QPs.
715 */
716 mutex_enter(&qp->qp_lock);
717 if (qp->qp_umap_dhp == dhp) {
718 qp->qp_umap_dhp = (devmap_cookie_t)NULL;
719 }
720 mutex_exit(&qp->qp_lock);
721
722 } else if (type == MLNX_UMAP_SRQMEM_RSRC) {
723
724 /* Use "value" to convert to SRQ handle */
725 rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
726 srq = (tavor_srqhdl_t)rsrcp->tr_addr;
727
728 /*
729 * Invalidate the handle to the userland mapping.
730 * Note: we ensure that the mapping being unmapped
731 * here is the current one for the QP. This is
732 * more of a sanity check here since, unlike CQs
733 * (above) we do not support resize of QPs.
734 */
735 mutex_enter(&srq->srq_lock);
736 if (srq->srq_umap_dhp == dhp) {
737 srq->srq_umap_dhp = (devmap_cookie_t)NULL;
738 }
739 mutex_exit(&srq->srq_lock);
740 }
741 }
742 }
743
744
745 /*
746 * tavor_devmap_devmem_map()
747 * Context: Can be called from kernel context.
748 */
749 /* ARGSUSED */
750 static int
tavor_devmap_devmem_map(devmap_cookie_t dhp,dev_t dev,uint_t flags,offset_t off,size_t len,void ** pvtp)751 tavor_devmap_devmem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
752 offset_t off, size_t len, void **pvtp)
753 {
754 tavor_state_t *state;
755 tavor_devmap_track_t *dvm_track;
756 minor_t instance;
757
758 /* Get Tavor softstate structure from instance */
759 instance = TAVOR_DEV_INSTANCE(dev);
760 state = ddi_get_soft_state(tavor_statep, instance);
761 if (state == NULL) {
762 return (ENXIO);
763 }
764
765 /*
766 * Allocate an entry to track the mapping and unmapping of this
767 * resource. Note: We don't need to initialize the "refcnt" or
768 * "offset" fields here, nor do we need to initialize the mutex
769 * used with the "refcnt". Since UAR pages are single pages, they
770 * are not subject to "partial" unmappings. This makes these other
771 * fields unnecessary.
772 */
773 dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
774 sizeof (tavor_devmap_track_t), KM_SLEEP);
775 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
776 dvm_track->tdt_state = state;
777 dvm_track->tdt_size = PAGESIZE;
778
779 /*
780 * Pass the private "Tavor devmap tracking structure" back. This
781 * pointer will be returned in a subsequent "unmap" callback.
782 */
783 *pvtp = dvm_track;
784
785 return (DDI_SUCCESS);
786 }
787
788
789 /*
790 * tavor_devmap_devmem_dup()
791 * Context: Can be called from kernel context.
792 */
793 /* ARGSUSED */
794 static int
tavor_devmap_devmem_dup(devmap_cookie_t dhp,void * pvtp,devmap_cookie_t new_dhp,void ** new_pvtp)795 tavor_devmap_devmem_dup(devmap_cookie_t dhp, void *pvtp,
796 devmap_cookie_t new_dhp, void **new_pvtp)
797 {
798 tavor_state_t *state;
799 tavor_devmap_track_t *dvm_track;
800 uint_t maxprot;
801 int status;
802
803 /*
804 * Extract the Tavor softstate pointer from "Tavor devmap tracking
805 * structure" (in "pvtp"). Note: If the tracking structure is NULL
806 * here, it means that the mapping corresponds to an invalid mapping.
807 * In this case, it can be safely ignored ("new_pvtp" set to NULL).
808 */
809 dvm_track = (tavor_devmap_track_t *)pvtp;
810 if (dvm_track == NULL) {
811 *new_pvtp = NULL;
812 return (DDI_SUCCESS);
813 }
814
815 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
816 state = dvm_track->tdt_state;
817
818 /*
819 * Since this devmap_dup() entry point is generally called
820 * when a process does fork(2), it is incumbent upon the driver
821 * to insure that the child does not inherit a valid copy of
822 * the parent's resource. This is accomplished by using
823 * devmap_devmem_remap() to invalidate the child's mapping to the
824 * kernel memory.
825 */
826 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
827 status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0,
828 dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL);
829 if (status != DDI_SUCCESS) {
830 TAVOR_WARNING(state, "failed in tavor_devmap_devmem_dup()");
831 return (status);
832 }
833
834 /*
835 * Since the region is invalid, there is no need for us to
836 * allocate and continue to track an additional "Tavor devmap
837 * tracking structure". Instead we return NULL here, which is an
838 * indication to the devmap_unmap() entry point that this entry
839 * can be safely ignored.
840 */
841 *new_pvtp = NULL;
842
843 return (DDI_SUCCESS);
844 }
845
846
847 /*
848 * tavor_devmap_devmem_unmap()
849 * Context: Can be called from kernel context.
850 */
851 /* ARGSUSED */
852 static void
tavor_devmap_devmem_unmap(devmap_cookie_t dhp,void * pvtp,offset_t off,size_t len,devmap_cookie_t new_dhp1,void ** pvtp1,devmap_cookie_t new_dhp2,void ** pvtp2)853 tavor_devmap_devmem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
854 size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
855 devmap_cookie_t new_dhp2, void **pvtp2)
856 {
857 tavor_devmap_track_t *dvm_track;
858
859 /*
860 * Free up the "Tavor devmap tracking structure" (in "pvtp").
861 * There cannot be "partial" unmappings here because all UAR pages
862 * are single pages. Note: If the tracking structure is NULL here,
863 * it means that the mapping corresponds to an invalid mapping. In
864 * this case, it can be safely ignored.
865 */
866 dvm_track = (tavor_devmap_track_t *)pvtp;
867 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
868 if (dvm_track == NULL) {
869 return;
870 }
871
872 kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
873 }
874
875
876 /*
877 * tavor_umap_ci_data_in()
878 * Context: Can be called from user or kernel context.
879 */
880 /* ARGSUSED */
881 ibt_status_t
tavor_umap_ci_data_in(tavor_state_t * state,ibt_ci_data_flags_t flags,ibt_object_type_t object,void * hdl,void * data_p,size_t data_sz)882 tavor_umap_ci_data_in(tavor_state_t *state, ibt_ci_data_flags_t flags,
883 ibt_object_type_t object, void *hdl, void *data_p, size_t data_sz)
884 {
885 int status;
886
887 /*
888 * Depending on the type of object about which additional information
889 * is being provided (currently only MR is supported), we call the
890 * appropriate resource-specific function.
891 */
892 switch (object) {
893 case IBT_HDL_MR:
894 status = tavor_umap_mr_data_in((tavor_mrhdl_t)hdl,
895 (ibt_mr_data_in_t *)data_p, data_sz);
896 if (status != DDI_SUCCESS) {
897 return (status);
898 }
899 break;
900
901 /*
902 * For other possible valid IBT types, we return IBT_NOT_SUPPORTED,
903 * since the Tavor driver does not support these.
904 */
905 case IBT_HDL_HCA:
906 case IBT_HDL_QP:
907 case IBT_HDL_CQ:
908 case IBT_HDL_PD:
909 case IBT_HDL_MW:
910 case IBT_HDL_AH:
911 case IBT_HDL_SCHED:
912 case IBT_HDL_EEC:
913 case IBT_HDL_RDD:
914 case IBT_HDL_SRQ:
915 return (IBT_NOT_SUPPORTED);
916
917 /*
918 * Any other types are invalid.
919 */
920 default:
921 return (IBT_INVALID_PARAM);
922 }
923
924 return (DDI_SUCCESS);
925 }
926
927
928 /*
929 * tavor_umap_mr_data_in()
930 * Context: Can be called from user or kernel context.
931 */
932 static ibt_status_t
tavor_umap_mr_data_in(tavor_mrhdl_t mr,ibt_mr_data_in_t * data,size_t data_sz)933 tavor_umap_mr_data_in(tavor_mrhdl_t mr, ibt_mr_data_in_t *data,
934 size_t data_sz)
935 {
936 if (data->mr_rev != IBT_MR_DATA_IN_IF_VERSION) {
937 return (IBT_NOT_SUPPORTED);
938 }
939
940 /* Check for valid MR handle pointer */
941 if (mr == NULL) {
942 return (IBT_MR_HDL_INVALID);
943 }
944
945 /* Check for valid MR input structure size */
946 if (data_sz < sizeof (ibt_mr_data_in_t)) {
947 return (IBT_INSUFF_RESOURCE);
948 }
949 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
950
951 /*
952 * Ensure that the MR corresponds to userland memory and that it is
953 * a currently valid memory region as well.
954 */
955 mutex_enter(&mr->mr_lock);
956 if ((mr->mr_is_umem == 0) || (mr->mr_umemcookie == NULL)) {
957 mutex_exit(&mr->mr_lock);
958 return (IBT_MR_HDL_INVALID);
959 }
960
961 /*
962 * If it has passed all the above checks, then extract the callback
963 * function and argument from the input structure. Copy them into
964 * the MR handle. This function will be called only if the memory
965 * corresponding to the MR handle gets a umem_lockmemory() callback.
966 */
967 mr->mr_umem_cbfunc = data->mr_func;
968 mr->mr_umem_cbarg1 = data->mr_arg1;
969 mr->mr_umem_cbarg2 = data->mr_arg2;
970 mutex_exit(&mr->mr_lock);
971
972 return (DDI_SUCCESS);
973 }
974
975
976 /*
977 * tavor_umap_ci_data_out()
978 * Context: Can be called from user or kernel context.
979 */
980 /* ARGSUSED */
981 ibt_status_t
tavor_umap_ci_data_out(tavor_state_t * state,ibt_ci_data_flags_t flags,ibt_object_type_t object,void * hdl,void * data_p,size_t data_sz)982 tavor_umap_ci_data_out(tavor_state_t *state, ibt_ci_data_flags_t flags,
983 ibt_object_type_t object, void *hdl, void *data_p, size_t data_sz)
984 {
985 int status;
986
987 /*
988 * Depending on the type of object about which additional information
989 * is being requested (CQ or QP), we call the appropriate resource-
990 * specific mapping function.
991 */
992 switch (object) {
993 case IBT_HDL_CQ:
994 status = tavor_umap_cq_data_out((tavor_cqhdl_t)hdl,
995 (mlnx_umap_cq_data_out_t *)data_p, data_sz);
996 if (status != DDI_SUCCESS) {
997 return (status);
998 }
999 break;
1000
1001 case IBT_HDL_QP:
1002 status = tavor_umap_qp_data_out((tavor_qphdl_t)hdl,
1003 (mlnx_umap_qp_data_out_t *)data_p, data_sz);
1004 if (status != DDI_SUCCESS) {
1005 return (status);
1006 }
1007 break;
1008
1009 case IBT_HDL_SRQ:
1010 status = tavor_umap_srq_data_out((tavor_srqhdl_t)hdl,
1011 (mlnx_umap_srq_data_out_t *)data_p, data_sz);
1012 if (status != DDI_SUCCESS) {
1013 return (status);
1014 }
1015 break;
1016
1017 /*
1018 * For other possible valid IBT types, we return IBT_NOT_SUPPORTED,
1019 * since the Tavor driver does not support these.
1020 */
1021 case IBT_HDL_PD:
1022 status = tavor_umap_pd_data_out((tavor_pdhdl_t)hdl,
1023 (mlnx_umap_pd_data_out_t *)data_p, data_sz);
1024 if (status != DDI_SUCCESS) {
1025 return (status);
1026 }
1027 break;
1028
1029 case IBT_HDL_HCA:
1030 case IBT_HDL_MR:
1031 case IBT_HDL_MW:
1032 case IBT_HDL_AH:
1033 case IBT_HDL_SCHED:
1034 case IBT_HDL_EEC:
1035 case IBT_HDL_RDD:
1036 return (IBT_NOT_SUPPORTED);
1037
1038 /*
1039 * Any other types are invalid.
1040 */
1041 default:
1042 return (IBT_INVALID_PARAM);
1043 }
1044
1045 return (DDI_SUCCESS);
1046 }
1047
1048
1049 /*
1050 * tavor_umap_cq_data_out()
1051 * Context: Can be called from user or kernel context.
1052 */
1053 static ibt_status_t
tavor_umap_cq_data_out(tavor_cqhdl_t cq,mlnx_umap_cq_data_out_t * data,size_t data_sz)1054 tavor_umap_cq_data_out(tavor_cqhdl_t cq, mlnx_umap_cq_data_out_t *data,
1055 size_t data_sz)
1056 {
1057 /* Check for valid CQ handle pointer */
1058 if (cq == NULL) {
1059 return (IBT_CQ_HDL_INVALID);
1060 }
1061
1062 /* Check for valid CQ mapping structure size */
1063 if (data_sz < sizeof (mlnx_umap_cq_data_out_t)) {
1064 return (IBT_INSUFF_RESOURCE);
1065 }
1066 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1067
1068 /*
1069 * If it has passed all the above checks, then fill in all the useful
1070 * mapping information (including the mapping offset that will be
1071 * passed back to the devmap() interface during a subsequent mmap()
1072 * call.
1073 *
1074 * The "offset" for CQ mmap()'s looks like this:
1075 * +----------------------------------------+--------+--------------+
1076 * | CQ Number | 0x33 | Reserved (0) |
1077 * +----------------------------------------+--------+--------------+
1078 * (64 - 8 - PAGESHIFT) bits 8 bits PAGESHIFT bits
1079 *
1080 * This returns information about the mapping offset, the length of
1081 * the CQ memory, the CQ number (for use in later CQ doorbells), the
1082 * number of CQEs the CQ memory can hold, and the size of each CQE.
1083 */
1084 data->mcq_rev = MLNX_UMAP_IF_VERSION;
1085 data->mcq_mapoffset = ((((uint64_t)cq->cq_cqnum <<
1086 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_CQMEM_RSRC) << PAGESHIFT);
1087 data->mcq_maplen = cq->cq_cqinfo.qa_size;
1088 data->mcq_cqnum = cq->cq_cqnum;
1089 data->mcq_numcqe = cq->cq_bufsz;
1090 data->mcq_cqesz = sizeof (tavor_hw_cqe_t);
1091
1092 return (DDI_SUCCESS);
1093 }
1094
1095
1096 /*
1097 * tavor_umap_qp_data_out()
1098 * Context: Can be called from user or kernel context.
1099 */
1100 static ibt_status_t
tavor_umap_qp_data_out(tavor_qphdl_t qp,mlnx_umap_qp_data_out_t * data,size_t data_sz)1101 tavor_umap_qp_data_out(tavor_qphdl_t qp, mlnx_umap_qp_data_out_t *data,
1102 size_t data_sz)
1103 {
1104 /* Check for valid QP handle pointer */
1105 if (qp == NULL) {
1106 return (IBT_QP_HDL_INVALID);
1107 }
1108
1109 /* Check for valid QP mapping structure size */
1110 if (data_sz < sizeof (mlnx_umap_qp_data_out_t)) {
1111 return (IBT_INSUFF_RESOURCE);
1112 }
1113 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1114
1115 /*
1116 * If it has passed all the checks, then fill in all the useful
1117 * mapping information (including the mapping offset that will be
1118 * passed back to the devmap() interface during a subsequent mmap()
1119 * call.
1120 *
1121 * The "offset" for QP mmap()'s looks like this:
1122 * +----------------------------------------+--------+--------------+
1123 * | QP Number | 0x44 | Reserved (0) |
1124 * +----------------------------------------+--------+--------------+
1125 * (64 - 8 - PAGESHIFT) bits 8 bits PAGESHIFT bits
1126 *
1127 * This returns information about the mapping offset, the length of
1128 * the QP memory, and the QP number (for use in later send and recv
1129 * doorbells). It also returns the following information for both
1130 * the receive work queue and the send work queue, respectively: the
1131 * offset (from the base mapped address) of the start of the given
1132 * work queue, the 64-bit IB virtual address that corresponds to
1133 * the base mapped address (needed for posting WQEs though the
1134 * QP doorbells), the number of WQEs the given work queue can hold,
1135 * and the size of each WQE for the given work queue.
1136 */
1137 data->mqp_rev = MLNX_UMAP_IF_VERSION;
1138 data->mqp_mapoffset = ((((uint64_t)qp->qp_qpnum <<
1139 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_QPMEM_RSRC) << PAGESHIFT);
1140 data->mqp_maplen = qp->qp_wqinfo.qa_size;
1141 data->mqp_qpnum = qp->qp_qpnum;
1142
1143 /*
1144 * If this QP is associated with a shared receive queue (SRQ),
1145 * then return invalid RecvQ parameters. Otherwise, return
1146 * the proper parameter values.
1147 */
1148 if (qp->qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
1149 data->mqp_rq_off = (uint32_t)qp->qp_wqinfo.qa_size;
1150 data->mqp_rq_desc_addr = (uint32_t)qp->qp_wqinfo.qa_size;
1151 data->mqp_rq_numwqe = 0;
1152 data->mqp_rq_wqesz = 0;
1153 } else {
1154 data->mqp_rq_off = (uintptr_t)qp->qp_rq_buf -
1155 (uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1156 data->mqp_rq_desc_addr = (uint32_t)((uintptr_t)qp->qp_rq_buf -
1157 qp->qp_desc_off);
1158 data->mqp_rq_numwqe = qp->qp_rq_bufsz;
1159 data->mqp_rq_wqesz = (1 << qp->qp_rq_log_wqesz);
1160 }
1161 data->mqp_sq_off = (uintptr_t)qp->qp_sq_buf -
1162 (uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1163 data->mqp_sq_desc_addr = (uint32_t)((uintptr_t)qp->qp_sq_buf -
1164 qp->qp_desc_off);
1165 data->mqp_sq_numwqe = qp->qp_sq_bufsz;
1166 data->mqp_sq_wqesz = (1 << qp->qp_sq_log_wqesz);
1167
1168 return (DDI_SUCCESS);
1169 }
1170
1171
1172 /*
1173 * tavor_umap_srq_data_out()
1174 * Context: Can be called from user or kernel context.
1175 */
1176 static ibt_status_t
tavor_umap_srq_data_out(tavor_srqhdl_t srq,mlnx_umap_srq_data_out_t * data,size_t data_sz)1177 tavor_umap_srq_data_out(tavor_srqhdl_t srq, mlnx_umap_srq_data_out_t *data,
1178 size_t data_sz)
1179 {
1180 /* Check for valid SRQ handle pointer */
1181 if (srq == NULL) {
1182 return (IBT_SRQ_HDL_INVALID);
1183 }
1184
1185 /* Check for valid SRQ mapping structure size */
1186 if (data_sz < sizeof (mlnx_umap_srq_data_out_t)) {
1187 return (IBT_INSUFF_RESOURCE);
1188 }
1189 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1190
1191 /*
1192 * If it has passed all the checks, then fill in all the useful
1193 * mapping information (including the mapping offset that will be
1194 * passed back to the devmap() interface during a subsequent mmap()
1195 * call.
1196 *
1197 * The "offset" for SRQ mmap()'s looks like this:
1198 * +----------------------------------------+--------+--------------+
1199 * | SRQ Number | 0x66 | Reserved (0) |
1200 * +----------------------------------------+--------+--------------+
1201 * (64 - 8 - PAGESHIFT) bits 8 bits PAGESHIFT bits
1202 *
1203 * This returns information about the mapping offset, the length of the
1204 * SRQ memory, and the SRQ number (for use in later send and recv
1205 * doorbells). It also returns the following information for the
1206 * shared receive queue: the offset (from the base mapped address) of
1207 * the start of the given work queue, the 64-bit IB virtual address
1208 * that corresponds to the base mapped address (needed for posting WQEs
1209 * though the QP doorbells), the number of WQEs the given work queue
1210 * can hold, and the size of each WQE for the given work queue.
1211 */
1212 data->msrq_rev = MLNX_UMAP_IF_VERSION;
1213 data->msrq_mapoffset = ((((uint64_t)srq->srq_srqnum <<
1214 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_SRQMEM_RSRC) << PAGESHIFT);
1215 data->msrq_maplen = srq->srq_wqinfo.qa_size;
1216 data->msrq_srqnum = srq->srq_srqnum;
1217
1218 data->msrq_desc_addr = (uint32_t)((uintptr_t)srq->srq_wq_buf -
1219 srq->srq_desc_off);
1220 data->msrq_numwqe = srq->srq_wq_bufsz;
1221 data->msrq_wqesz = (1 << srq->srq_wq_log_wqesz);
1222
1223 return (DDI_SUCCESS);
1224 }
1225
1226 /*
1227 * tavor_umap_pd_data_out()
1228 * Context: Can be called from user or kernel context.
1229 */
1230 static ibt_status_t
tavor_umap_pd_data_out(tavor_pdhdl_t pd,mlnx_umap_pd_data_out_t * data,size_t data_sz)1231 tavor_umap_pd_data_out(tavor_pdhdl_t pd, mlnx_umap_pd_data_out_t *data,
1232 size_t data_sz)
1233 {
1234 /* Check for valid PD handle pointer */
1235 if (pd == NULL) {
1236 return (IBT_PD_HDL_INVALID);
1237 }
1238
1239 /* Check for valid PD mapping structure size */
1240 if (data_sz < sizeof (mlnx_umap_pd_data_out_t)) {
1241 return (IBT_INSUFF_RESOURCE);
1242 }
1243 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1244
1245 /*
1246 * If it has passed all the checks, then fill the PD table index
1247 * (the PD table allocated index for the PD pd_pdnum)
1248 */
1249 data->mpd_rev = MLNX_UMAP_IF_VERSION;
1250 data->mpd_pdnum = pd->pd_pdnum;
1251
1252 return (DDI_SUCCESS);
1253 }
1254
1255 /*
1256 * tavor_umap_db_init()
1257 * Context: Only called from attach() path context
1258 */
1259 void
tavor_umap_db_init(void)1260 tavor_umap_db_init(void)
1261 {
1262 /*
1263 * Initialize the lock used by the Tavor "userland resources database"
1264 * This is used to ensure atomic access to add, remove, and find
1265 * entries in the database.
1266 */
1267 mutex_init(&tavor_userland_rsrc_db.tdl_umapdb_lock, NULL,
1268 MUTEX_DRIVER, NULL);
1269
1270 /*
1271 * Initialize the AVL tree used for the "userland resources
1272 * database". Using an AVL tree here provides the ability to
1273 * scale the database size to large numbers of resources. The
1274 * entries in the tree are "tavor_umap_db_entry_t".
1275 * The tree is searched with the help of the
1276 * tavor_umap_db_compare() routine.
1277 */
1278 avl_create(&tavor_userland_rsrc_db.tdl_umapdb_avl,
1279 tavor_umap_db_compare, sizeof (tavor_umap_db_entry_t),
1280 offsetof(tavor_umap_db_entry_t, tdbe_avlnode));
1281 }
1282
1283
1284 /*
1285 * tavor_umap_db_fini()
1286 * Context: Only called from attach() and/or detach() path contexts
1287 */
1288 void
tavor_umap_db_fini(void)1289 tavor_umap_db_fini(void)
1290 {
1291 /* Destroy the AVL tree for the "userland resources database" */
1292 avl_destroy(&tavor_userland_rsrc_db.tdl_umapdb_avl);
1293
1294 /* Destroy the lock for the "userland resources database" */
1295 mutex_destroy(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1296 }
1297
1298
1299 /*
1300 * tavor_umap_db_alloc()
1301 * Context: Can be called from user or kernel context.
1302 */
1303 tavor_umap_db_entry_t *
tavor_umap_db_alloc(uint_t instance,uint64_t key,uint_t type,uint64_t value)1304 tavor_umap_db_alloc(uint_t instance, uint64_t key, uint_t type, uint64_t value)
1305 {
1306 tavor_umap_db_entry_t *umapdb;
1307
1308 /* Allocate an entry to add to the "userland resources database" */
1309 umapdb = kmem_zalloc(sizeof (tavor_umap_db_entry_t), KM_NOSLEEP);
1310 if (umapdb == NULL) {
1311 return (NULL);
1312 }
1313 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb))
1314
1315 /* Fill in the fields in the database entry */
1316 umapdb->tdbe_common.tdb_instance = instance;
1317 umapdb->tdbe_common.tdb_type = type;
1318 umapdb->tdbe_common.tdb_key = key;
1319 umapdb->tdbe_common.tdb_value = value;
1320
1321 return (umapdb);
1322 }
1323
1324
1325 /*
1326 * tavor_umap_db_free()
1327 * Context: Can be called from user or kernel context.
1328 */
1329 void
tavor_umap_db_free(tavor_umap_db_entry_t * umapdb)1330 tavor_umap_db_free(tavor_umap_db_entry_t *umapdb)
1331 {
1332 /* Free the database entry */
1333 kmem_free(umapdb, sizeof (tavor_umap_db_entry_t));
1334 }
1335
1336
1337 /*
1338 * tavor_umap_db_add()
1339 * Context: Can be called from user or kernel context.
1340 */
1341 void
tavor_umap_db_add(tavor_umap_db_entry_t * umapdb)1342 tavor_umap_db_add(tavor_umap_db_entry_t *umapdb)
1343 {
1344 mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1345 tavor_umap_db_add_nolock(umapdb);
1346 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1347 }
1348
1349
1350 /*
1351 * tavor_umap_db_add_nolock()
1352 * Context: Can be called from user or kernel context.
1353 */
1354 void
tavor_umap_db_add_nolock(tavor_umap_db_entry_t * umapdb)1355 tavor_umap_db_add_nolock(tavor_umap_db_entry_t *umapdb)
1356 {
1357 tavor_umap_db_query_t query;
1358 avl_index_t where;
1359
1360 ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1361
1362 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb))
1363
1364 /*
1365 * Copy the common portion of the "to-be-added" database entry
1366 * into the "tavor_umap_db_query_t" structure. We use this structure
1367 * (with no flags set) to find the appropriate location in the
1368 * "userland resources database" for the new entry to be added.
1369 *
1370 * Note: we expect that this entry should not be found in the
1371 * database (unless something bad has happened).
1372 */
1373 query.tqdb_common = umapdb->tdbe_common;
1374 query.tqdb_flags = 0;
1375 (void) avl_find(&tavor_userland_rsrc_db.tdl_umapdb_avl, &query,
1376 &where);
1377
1378 /*
1379 * Now, using the "where" field from the avl_find() operation
1380 * above, we will insert the new database entry ("umapdb").
1381 */
1382 avl_insert(&tavor_userland_rsrc_db.tdl_umapdb_avl, umapdb,
1383 where);
1384 }
1385
1386
1387 /*
1388 * tavor_umap_db_find()
1389 * Context: Can be called from user or kernel context.
1390 */
1391 int
tavor_umap_db_find(uint_t instance,uint64_t key,uint_t type,uint64_t * value,uint_t flag,tavor_umap_db_entry_t ** umapdb)1392 tavor_umap_db_find(uint_t instance, uint64_t key, uint_t type,
1393 uint64_t *value, uint_t flag, tavor_umap_db_entry_t **umapdb)
1394 {
1395 int status;
1396
1397 mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1398 status = tavor_umap_db_find_nolock(instance, key, type, value, flag,
1399 umapdb);
1400 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1401
1402 return (status);
1403 }
1404
1405
1406 /*
1407 * tavor_umap_db_find_nolock()
1408 * Context: Can be called from user or kernel context.
1409 */
1410 int
tavor_umap_db_find_nolock(uint_t instance,uint64_t key,uint_t type,uint64_t * value,uint_t flags,tavor_umap_db_entry_t ** umapdb)1411 tavor_umap_db_find_nolock(uint_t instance, uint64_t key, uint_t type,
1412 uint64_t *value, uint_t flags, tavor_umap_db_entry_t **umapdb)
1413 {
1414 tavor_umap_db_query_t query;
1415 tavor_umap_db_entry_t *entry;
1416 avl_index_t where;
1417
1418 ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1419
1420 /*
1421 * Fill in key, type, instance, and flags values of the
1422 * tavor_umap_db_query_t in preparation for the database
1423 * lookup.
1424 */
1425 query.tqdb_flags = flags;
1426 query.tqdb_common.tdb_key = key;
1427 query.tqdb_common.tdb_type = type;
1428 query.tqdb_common.tdb_instance = instance;
1429
1430 /*
1431 * Perform the database query. If no entry is found, then
1432 * return failure, else continue.
1433 */
1434 entry = (tavor_umap_db_entry_t *)avl_find(
1435 &tavor_userland_rsrc_db.tdl_umapdb_avl, &query, &where);
1436 if (entry == NULL) {
1437 return (DDI_FAILURE);
1438 }
1439 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*entry))
1440
1441 /*
1442 * If the flags argument specifies that the entry should
1443 * be removed if found, then call avl_remove() to remove
1444 * the entry from the database.
1445 */
1446 if (flags & TAVOR_UMAP_DB_REMOVE) {
1447
1448 avl_remove(&tavor_userland_rsrc_db.tdl_umapdb_avl, entry);
1449
1450 /*
1451 * The database entry is returned with the expectation
1452 * that the caller will use tavor_umap_db_free() to
1453 * free the entry's memory. ASSERT that this is non-NULL.
1454 * NULL pointer should never be passed for the
1455 * TAVOR_UMAP_DB_REMOVE case.
1456 */
1457 ASSERT(umapdb != NULL);
1458 }
1459
1460 /*
1461 * If the caller would like visibility to the database entry
1462 * (indicated through the use of a non-NULL "umapdb" argument),
1463 * then fill it in.
1464 */
1465 if (umapdb != NULL) {
1466 *umapdb = entry;
1467 }
1468
1469 /* Extract value field from database entry and return success */
1470 *value = entry->tdbe_common.tdb_value;
1471
1472 return (DDI_SUCCESS);
1473 }
1474
1475
1476 /*
1477 * tavor_umap_umemlock_cb()
1478 * Context: Can be called from callback context.
1479 */
1480 void
tavor_umap_umemlock_cb(ddi_umem_cookie_t * umem_cookie)1481 tavor_umap_umemlock_cb(ddi_umem_cookie_t *umem_cookie)
1482 {
1483 tavor_umap_db_entry_t *umapdb;
1484 tavor_state_t *state;
1485 tavor_rsrc_t *rsrcp;
1486 tavor_mrhdl_t mr;
1487 uint64_t value;
1488 uint_t instance;
1489 int status;
1490 void (*mr_callback)(void *, void *);
1491 void *mr_cbarg1, *mr_cbarg2;
1492
1493 /*
1494 * If this was userland memory, then we need to remove its entry
1495 * from the "userland resources database". Note: We use the
1496 * TAVOR_UMAP_DB_IGNORE_INSTANCE flag here because we don't know
1497 * which instance was used when the entry was added (but we want
1498 * to know after the entry is found using the other search criteria).
1499 */
1500 status = tavor_umap_db_find(0, (uint64_t)(uintptr_t)umem_cookie,
1501 MLNX_UMAP_MRMEM_RSRC, &value, (TAVOR_UMAP_DB_REMOVE |
1502 TAVOR_UMAP_DB_IGNORE_INSTANCE), &umapdb);
1503 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb))
1504 if (status == DDI_SUCCESS) {
1505 instance = umapdb->tdbe_common.tdb_instance;
1506 state = ddi_get_soft_state(tavor_statep, instance);
1507 if (state == NULL) {
1508 cmn_err(CE_WARN, "Unable to match Tavor instance\n");
1509 return;
1510 }
1511
1512 /* Free the database entry */
1513 tavor_umap_db_free(umapdb);
1514
1515 /* Use "value" to convert to an MR handle */
1516 rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
1517 mr = (tavor_mrhdl_t)rsrcp->tr_addr;
1518
1519 /*
1520 * If a callback has been provided, call it first. This
1521 * callback is expected to do any cleanup necessary to
1522 * guarantee that the subsequent MR deregister (below)
1523 * will succeed. Specifically, this means freeing up memory
1524 * windows which might have been associated with the MR.
1525 */
1526 mutex_enter(&mr->mr_lock);
1527 mr_callback = mr->mr_umem_cbfunc;
1528 mr_cbarg1 = mr->mr_umem_cbarg1;
1529 mr_cbarg2 = mr->mr_umem_cbarg2;
1530 mutex_exit(&mr->mr_lock);
1531 if (mr_callback != NULL) {
1532 mr_callback(mr_cbarg1, mr_cbarg2);
1533 }
1534
1535 /*
1536 * Then call tavor_mr_deregister() to release the resources
1537 * associated with the MR handle. Note: Because this routine
1538 * will also check for whether the ddi_umem_cookie_t is in the
1539 * database, it will take responsibility for disabling the
1540 * memory region and calling ddi_umem_unlock().
1541 */
1542 status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
1543 TAVOR_SLEEP);
1544 if (status != DDI_SUCCESS) {
1545 TAVOR_WARNING(state, "Unexpected failure in "
1546 "deregister from callback\n");
1547 }
1548 }
1549 }
1550
1551
1552 /*
1553 * tavor_umap_db_compare()
1554 * Context: Can be called from user or kernel context.
1555 */
1556 static int
tavor_umap_db_compare(const void * q,const void * e)1557 tavor_umap_db_compare(const void *q, const void *e)
1558 {
1559 tavor_umap_db_common_t *entry_common, *query_common;
1560 uint_t query_flags;
1561
1562 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*((tavor_umap_db_query_t *)q)))
1563
1564 entry_common = &((tavor_umap_db_entry_t *)e)->tdbe_common;
1565 query_common = &((tavor_umap_db_query_t *)q)->tqdb_common;
1566 query_flags = ((tavor_umap_db_query_t *)q)->tqdb_flags;
1567
1568 /*
1569 * The first comparison is done on the "key" value in "query"
1570 * and "entry". If they are not equal, then the appropriate
1571 * search direction is returned. Else, we continue by
1572 * comparing "type".
1573 */
1574 if (query_common->tdb_key < entry_common->tdb_key) {
1575 return (-1);
1576 } else if (query_common->tdb_key > entry_common->tdb_key) {
1577 return (+1);
1578 }
1579
1580 /*
1581 * If the search reaches this point, then "query" and "entry"
1582 * have equal key values. So we continue be comparing their
1583 * "type" values. Again, if they are not equal, then the
1584 * appropriate search direction is returned. Else, we continue
1585 * by comparing "instance".
1586 */
1587 if (query_common->tdb_type < entry_common->tdb_type) {
1588 return (-1);
1589 } else if (query_common->tdb_type > entry_common->tdb_type) {
1590 return (+1);
1591 }
1592
1593 /*
1594 * If the search reaches this point, then "query" and "entry"
1595 * have exactly the same key and type values. Now we consult
1596 * the "flags" field in the query to determine whether the
1597 * "instance" is relevant to the search. If the
1598 * TAVOR_UMAP_DB_IGNORE_INSTANCE flags is set, then return
1599 * success (0) here. Otherwise, continue the search by comparing
1600 * instance values and returning the appropriate search direction.
1601 */
1602 if (query_flags & TAVOR_UMAP_DB_IGNORE_INSTANCE) {
1603 return (0);
1604 }
1605
1606 /*
1607 * If the search has reached this point, then "query" and "entry"
1608 * can only be differentiated by their instance values. If these
1609 * are not equal, then return the appropriate search direction.
1610 * Else, we return success (0).
1611 */
1612 if (query_common->tdb_instance < entry_common->tdb_instance) {
1613 return (-1);
1614 } else if (query_common->tdb_instance > entry_common->tdb_instance) {
1615 return (+1);
1616 }
1617
1618 /* Everything matches... so return success */
1619 return (0);
1620 }
1621
1622
1623 /*
1624 * tavor_umap_db_set_onclose_cb()
1625 * Context: Can be called from user or kernel context.
1626 */
1627 int
tavor_umap_db_set_onclose_cb(dev_t dev,uint64_t flag,void (* callback)(void *),void * arg)1628 tavor_umap_db_set_onclose_cb(dev_t dev, uint64_t flag,
1629 void (*callback)(void *), void *arg)
1630 {
1631 tavor_umap_db_priv_t *priv;
1632 tavor_umap_db_entry_t *umapdb;
1633 minor_t instance;
1634 uint64_t value;
1635 int status;
1636
1637 instance = TAVOR_DEV_INSTANCE(dev);
1638 if (instance == -1) {
1639 return (DDI_FAILURE);
1640 }
1641
1642 if (flag != TAVOR_ONCLOSE_FLASH_INPROGRESS) {
1643 return (DDI_FAILURE);
1644 }
1645
1646 /*
1647 * Grab the lock for the "userland resources database" and find
1648 * the entry corresponding to this minor number. Once it's found,
1649 * allocate (if necessary) and add an entry (in the "tdb_priv"
1650 * field) to indicate that further processing may be needed during
1651 * Tavor's close() handling.
1652 */
1653 mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1654 status = tavor_umap_db_find_nolock(instance, dev,
1655 MLNX_UMAP_PID_RSRC, &value, 0, &umapdb);
1656 if (status != DDI_SUCCESS) {
1657 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1658 return (DDI_FAILURE);
1659 }
1660
1661 priv = (tavor_umap_db_priv_t *)umapdb->tdbe_common.tdb_priv;
1662 if (priv == NULL) {
1663 priv = (tavor_umap_db_priv_t *)kmem_zalloc(
1664 sizeof (tavor_umap_db_priv_t), KM_NOSLEEP);
1665 if (priv == NULL) {
1666 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1667 return (DDI_FAILURE);
1668 }
1669 }
1670
1671 /*
1672 * Save away the callback and argument to be used during Tavor's
1673 * close() processing.
1674 */
1675 priv->tdp_cb = callback;
1676 priv->tdp_arg = arg;
1677
1678 umapdb->tdbe_common.tdb_priv = (void *)priv;
1679 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1680
1681 return (DDI_SUCCESS);
1682 }
1683
1684
1685 /*
1686 * tavor_umap_db_clear_onclose_cb()
1687 * Context: Can be called from user or kernel context.
1688 */
1689 int
tavor_umap_db_clear_onclose_cb(dev_t dev,uint64_t flag)1690 tavor_umap_db_clear_onclose_cb(dev_t dev, uint64_t flag)
1691 {
1692 tavor_umap_db_priv_t *priv;
1693 tavor_umap_db_entry_t *umapdb;
1694 minor_t instance;
1695 uint64_t value;
1696 int status;
1697
1698 instance = TAVOR_DEV_INSTANCE(dev);
1699 if (instance == -1) {
1700 return (DDI_FAILURE);
1701 }
1702
1703 if (flag != TAVOR_ONCLOSE_FLASH_INPROGRESS) {
1704 return (DDI_FAILURE);
1705 }
1706
1707 /*
1708 * Grab the lock for the "userland resources database" and find
1709 * the entry corresponding to this minor number. Once it's found,
1710 * remove the entry (in the "tdb_priv" field) that indicated the
1711 * need for further processing during Tavor's close(). Free the
1712 * entry, if appropriate.
1713 */
1714 mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1715 status = tavor_umap_db_find_nolock(instance, dev,
1716 MLNX_UMAP_PID_RSRC, &value, 0, &umapdb);
1717 if (status != DDI_SUCCESS) {
1718 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1719 return (DDI_FAILURE);
1720 }
1721
1722 priv = (tavor_umap_db_priv_t *)umapdb->tdbe_common.tdb_priv;
1723 if (priv != NULL) {
1724 kmem_free(priv, sizeof (tavor_umap_db_priv_t));
1725 priv = NULL;
1726 }
1727
1728 umapdb->tdbe_common.tdb_priv = (void *)priv;
1729 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1730 return (DDI_SUCCESS);
1731 }
1732
1733
1734 /*
1735 * tavor_umap_db_clear_onclose_cb()
1736 * Context: Can be called from user or kernel context.
1737 */
1738 void
tavor_umap_db_handle_onclose_cb(tavor_umap_db_priv_t * priv)1739 tavor_umap_db_handle_onclose_cb(tavor_umap_db_priv_t *priv)
1740 {
1741 void (*callback)(void *);
1742
1743 ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1744
1745 /*
1746 * Call the callback.
1747 * Note: Currently there is only one callback (in "tdp_cb"), but
1748 * in the future there may be more, depending on what other types
1749 * of interaction there are between userland processes and the
1750 * driver.
1751 */
1752 callback = priv->tdp_cb;
1753 callback(priv->tdp_arg);
1754 }
1755