1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * tavor_umap.c 29 * Tavor Userland Mapping Routines 30 * 31 * Implements all the routines necessary for enabling direct userland 32 * access to the Tavor hardware. This includes all routines necessary for 33 * maintaining the "userland resources database" and all the support routines 34 * for the devmap calls. 35 */ 36 37 #include <sys/types.h> 38 #include <sys/conf.h> 39 #include <sys/ddi.h> 40 #include <sys/sunddi.h> 41 #include <sys/modctl.h> 42 #include <sys/file.h> 43 #include <sys/avl.h> 44 #include <sys/sysmacros.h> 45 46 #include <sys/ib/adapters/tavor/tavor.h> 47 48 /* Tavor HCA state pointer (extern) */ 49 extern void *tavor_statep; 50 51 /* Tavor HCA Userland Resource Database (extern) */ 52 extern tavor_umap_db_t tavor_userland_rsrc_db; 53 54 static int tavor_umap_uarpg(tavor_state_t *state, devmap_cookie_t dhp, 55 tavor_rsrc_t *rsrcp, size_t *maplen, int *err); 56 static int tavor_umap_cqmem(tavor_state_t *state, devmap_cookie_t dhp, 57 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err); 58 static int tavor_umap_qpmem(tavor_state_t *state, devmap_cookie_t dhp, 59 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err); 60 static int tavor_umap_srqmem(tavor_state_t *state, devmap_cookie_t dhp, 61 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err); 62 static int tavor_devmap_umem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, 63 offset_t off, size_t len, void **pvtp); 64 static int tavor_devmap_umem_dup(devmap_cookie_t dhp, void *pvtp, 65 devmap_cookie_t new_dhp, void **new_pvtp); 66 static void tavor_devmap_umem_unmap(devmap_cookie_t dhp, void *pvtp, 67 offset_t off, size_t len, devmap_cookie_t new_dhp1, void **pvtp1, 68 devmap_cookie_t new_dhp2, void **pvtp2); 69 static int tavor_devmap_devmem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, 70 offset_t off, size_t len, void **pvtp); 71 static int tavor_devmap_devmem_dup(devmap_cookie_t dhp, void *pvtp, 72 devmap_cookie_t new_dhp, void **new_pvtp); 73 static void tavor_devmap_devmem_unmap(devmap_cookie_t dhp, void *pvtp, 74 offset_t off, size_t len, devmap_cookie_t new_dhp1, void **pvtp1, 75 devmap_cookie_t new_dhp2, void **pvtp2); 76 static ibt_status_t tavor_umap_mr_data_in(tavor_mrhdl_t mr, 77 ibt_mr_data_in_t *data, size_t data_sz); 78 static ibt_status_t tavor_umap_cq_data_out(tavor_cqhdl_t cq, 79 mlnx_umap_cq_data_out_t *data, size_t data_sz); 80 static ibt_status_t tavor_umap_qp_data_out(tavor_qphdl_t qp, 81 mlnx_umap_qp_data_out_t *data, size_t data_sz); 82 static ibt_status_t tavor_umap_srq_data_out(tavor_srqhdl_t srq, 83 mlnx_umap_srq_data_out_t *data, size_t data_sz); 84 static int tavor_umap_db_compare(const void *query, const void *entry); 85 static ibt_status_t tavor_umap_pd_data_out(tavor_pdhdl_t pd, 86 mlnx_umap_pd_data_out_t *data, size_t data_sz); 87 88 89 /* 90 * These callbacks are passed to devmap_umem_setup() and devmap_devmem_setup(), 91 * respectively. They are used to handle (among other things) partial 92 * unmappings and to provide a method for invalidating mappings inherited 93 * as a result of a fork(2) system call. 94 */ 95 static struct devmap_callback_ctl tavor_devmap_umem_cbops = { 96 DEVMAP_OPS_REV, 97 tavor_devmap_umem_map, 98 NULL, 99 tavor_devmap_umem_dup, 100 tavor_devmap_umem_unmap 101 }; 102 static struct devmap_callback_ctl tavor_devmap_devmem_cbops = { 103 DEVMAP_OPS_REV, 104 tavor_devmap_devmem_map, 105 NULL, 106 tavor_devmap_devmem_dup, 107 tavor_devmap_devmem_unmap 108 }; 109 110 /* 111 * tavor_devmap() 112 * Context: Can be called from user context. 113 */ 114 /* ARGSUSED */ 115 int 116 tavor_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len, 117 size_t *maplen, uint_t model) 118 { 119 tavor_state_t *state; 120 tavor_rsrc_t *rsrcp; 121 minor_t instance; 122 uint64_t key, value; 123 uint_t type; 124 int err, status; 125 126 TAVOR_TNF_ENTER(tavor_devmap); 127 128 /* Get Tavor softstate structure from instance */ 129 instance = TAVOR_DEV_INSTANCE(dev); 130 state = ddi_get_soft_state(tavor_statep, instance); 131 if (state == NULL) { 132 TNF_PROBE_0(tavor_devmap_gss_fail, TAVOR_TNF_ERROR, ""); 133 TAVOR_TNF_EXIT(tavor_devmap); 134 return (ENXIO); 135 } 136 137 /* 138 * Access to Tavor devmap interface is not allowed in 139 * "maintenance mode". 140 */ 141 if (state->ts_operational_mode == TAVOR_MAINTENANCE_MODE) { 142 TNF_PROBE_0(tavor_devmap_maintenance_mode_fail, 143 TAVOR_TNF_ERROR, ""); 144 TAVOR_TNF_EXIT(tavor_devmap); 145 return (EFAULT); 146 } 147 148 /* 149 * The bottom bits of "offset" are undefined (number depends on 150 * system PAGESIZE). Shifting these off leaves us with a "key". 151 * The "key" is actually a combination of both a real key value 152 * (for the purpose of database lookup) and a "type" value. We 153 * extract this information before doing the database lookup. 154 */ 155 key = off >> PAGESHIFT; 156 type = key & MLNX_UMAP_RSRC_TYPE_MASK; 157 key = key >> MLNX_UMAP_RSRC_TYPE_SHIFT; 158 status = tavor_umap_db_find(instance, key, type, &value, 0, NULL); 159 if (status == DDI_SUCCESS) { 160 rsrcp = (tavor_rsrc_t *)(uintptr_t)value; 161 162 switch (type) { 163 case MLNX_UMAP_UARPG_RSRC: 164 /* 165 * Double check that process who open()'d Tavor is 166 * same process attempting to mmap() UAR page. 167 */ 168 if (key != ddi_get_pid()) { 169 TNF_PROBE_0(tavor_devmap_uarpg_invpid_fail, 170 TAVOR_TNF_ERROR, ""); 171 TAVOR_TNF_EXIT(tavor_devmap); 172 return (EINVAL); 173 } 174 175 /* Map the UAR page out for userland access */ 176 status = tavor_umap_uarpg(state, dhp, rsrcp, maplen, 177 &err); 178 if (status != DDI_SUCCESS) { 179 TNF_PROBE_0(tavor_devmap_uarpg_map_fail, 180 TAVOR_TNF_ERROR, ""); 181 TAVOR_TNF_EXIT(tavor_devmap); 182 return (err); 183 } 184 break; 185 186 case MLNX_UMAP_CQMEM_RSRC: 187 /* Map the CQ memory out for userland access */ 188 status = tavor_umap_cqmem(state, dhp, rsrcp, off, 189 maplen, &err); 190 if (status != DDI_SUCCESS) { 191 TNF_PROBE_0(tavor_devmap_cqmem_map_fail, 192 TAVOR_TNF_ERROR, ""); 193 TAVOR_TNF_EXIT(tavor_devmap); 194 return (err); 195 } 196 break; 197 198 case MLNX_UMAP_QPMEM_RSRC: 199 /* Map the QP memory out for userland access */ 200 status = tavor_umap_qpmem(state, dhp, rsrcp, off, 201 maplen, &err); 202 if (status != DDI_SUCCESS) { 203 TNF_PROBE_0(tavor_devmap_qpmem_map_fail, 204 TAVOR_TNF_ERROR, ""); 205 TAVOR_TNF_EXIT(tavor_devmap); 206 return (err); 207 } 208 break; 209 210 case MLNX_UMAP_SRQMEM_RSRC: 211 /* Map the SRQ memory out for userland access */ 212 status = tavor_umap_srqmem(state, dhp, rsrcp, off, 213 maplen, &err); 214 if (status != DDI_SUCCESS) { 215 TNF_PROBE_0(tavor_devmap_srqmem_map_fail, 216 TAVOR_TNF_ERROR, ""); 217 TAVOR_TNF_EXIT(tavor_devmap); 218 return (err); 219 } 220 break; 221 222 default: 223 TAVOR_WARNING(state, "unexpected rsrc type in devmap"); 224 TNF_PROBE_0(tavor_devmap_invrsrc_fail, 225 TAVOR_TNF_ERROR, ""); 226 TAVOR_TNF_EXIT(tavor_devmap); 227 return (EINVAL); 228 } 229 } else { 230 TNF_PROBE_0(tavor_devmap_umap_lookup_fail, TAVOR_TNF_ERROR, ""); 231 TAVOR_TNF_EXIT(tavor_devmap); 232 return (EINVAL); 233 } 234 235 TAVOR_TNF_EXIT(tavor_devmap); 236 return (0); 237 } 238 239 240 /* 241 * tavor_umap_uarpg() 242 * Context: Can be called from user context. 243 */ 244 static int 245 tavor_umap_uarpg(tavor_state_t *state, devmap_cookie_t dhp, 246 tavor_rsrc_t *rsrcp, size_t *maplen, int *err) 247 { 248 int status; 249 uint_t maxprot; 250 251 TAVOR_TNF_ENTER(tavor_umap_uarpg); 252 253 /* Map out the UAR page (doorbell page) */ 254 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 255 status = devmap_devmem_setup(dhp, state->ts_dip, 256 &tavor_devmap_devmem_cbops, TAVOR_UAR_BAR, (rsrcp->tr_indx << 257 PAGESHIFT), PAGESIZE, maxprot, DEVMAP_ALLOW_REMAP, 258 &state->ts_reg_accattr); 259 if (status < 0) { 260 *err = status; 261 TNF_PROBE_0(tavor_umap_uarpg_devmap_fail, TAVOR_TNF_ERROR, ""); 262 TAVOR_TNF_EXIT(tavor_umap_uarpg); 263 return (DDI_FAILURE); 264 } 265 266 *maplen = PAGESIZE; 267 TAVOR_TNF_EXIT(tavor_umap_uarpg); 268 return (DDI_SUCCESS); 269 } 270 271 272 /* 273 * tavor_umap_cqmem() 274 * Context: Can be called from user context. 275 */ 276 /* ARGSUSED */ 277 static int 278 tavor_umap_cqmem(tavor_state_t *state, devmap_cookie_t dhp, 279 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err) 280 { 281 tavor_cqhdl_t cq; 282 size_t size; 283 uint_t maxprot; 284 int status; 285 286 TAVOR_TNF_ENTER(tavor_umap_cqmem); 287 288 /* Extract the Tavor CQ handle pointer from the tavor_rsrc_t */ 289 cq = (tavor_cqhdl_t)rsrcp->tr_addr; 290 291 /* Round-up the CQ size to system page size */ 292 size = ptob(btopr(cq->cq_cqinfo.qa_size)); 293 294 /* Map out the CQ memory */ 295 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 296 status = devmap_umem_setup(dhp, state->ts_dip, 297 &tavor_devmap_umem_cbops, cq->cq_cqinfo.qa_umemcookie, 0, size, 298 maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL); 299 if (status < 0) { 300 *err = status; 301 TNF_PROBE_0(tavor_umap_cqmem_devmap_fail, TAVOR_TNF_ERROR, ""); 302 TAVOR_TNF_EXIT(tavor_umap_cqmem); 303 return (DDI_FAILURE); 304 } 305 *maplen = size; 306 307 TAVOR_TNF_EXIT(tavor_umap_cqmem); 308 return (DDI_SUCCESS); 309 } 310 311 312 /* 313 * tavor_umap_qpmem() 314 * Context: Can be called from user context. 315 */ 316 /* ARGSUSED */ 317 static int 318 tavor_umap_qpmem(tavor_state_t *state, devmap_cookie_t dhp, 319 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err) 320 { 321 tavor_qphdl_t qp; 322 offset_t offset; 323 size_t size; 324 uint_t maxprot; 325 int status; 326 327 TAVOR_TNF_ENTER(tavor_umap_qpmem); 328 329 /* Extract the Tavor QP handle pointer from the tavor_rsrc_t */ 330 qp = (tavor_qphdl_t)rsrcp->tr_addr; 331 332 /* 333 * Calculate the offset of the first work queue (send or recv) into 334 * the memory (ddi_umem_alloc()) allocated previously for the QP. 335 */ 336 offset = (offset_t)((uintptr_t)qp->qp_wqinfo.qa_buf_aligned - 337 (uintptr_t)qp->qp_wqinfo.qa_buf_real); 338 339 /* Round-up the QP work queue sizes to system page size */ 340 size = ptob(btopr(qp->qp_wqinfo.qa_size)); 341 342 /* Map out the QP memory */ 343 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 344 status = devmap_umem_setup(dhp, state->ts_dip, 345 &tavor_devmap_umem_cbops, qp->qp_wqinfo.qa_umemcookie, offset, 346 size, maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL); 347 if (status < 0) { 348 *err = status; 349 TNF_PROBE_0(tavor_umap_qpmem_devmap_fail, TAVOR_TNF_ERROR, ""); 350 TAVOR_TNF_EXIT(tavor_umap_qpmem); 351 return (DDI_FAILURE); 352 } 353 *maplen = size; 354 355 TAVOR_TNF_EXIT(tavor_umap_qpmem); 356 return (DDI_SUCCESS); 357 } 358 359 360 /* 361 * tavor_umap_srqmem() 362 * Context: Can be called from user context. 363 */ 364 /* ARGSUSED */ 365 static int 366 tavor_umap_srqmem(tavor_state_t *state, devmap_cookie_t dhp, 367 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err) 368 { 369 tavor_srqhdl_t srq; 370 offset_t offset; 371 size_t size; 372 uint_t maxprot; 373 int status; 374 375 TAVOR_TNF_ENTER(tavor_umap_srqmem); 376 377 /* Extract the Tavor SRQ handle pointer from the tavor_rsrc_t */ 378 srq = (tavor_srqhdl_t)rsrcp->tr_addr; 379 380 /* 381 * Calculate the offset of the first shared recv queue into the memory 382 * (ddi_umem_alloc()) allocated previously for the SRQ. 383 */ 384 offset = (offset_t)((uintptr_t)srq->srq_wqinfo.qa_buf_aligned - 385 (uintptr_t)srq->srq_wqinfo.qa_buf_real); 386 387 /* Round-up the SRQ work queue sizes to system page size */ 388 size = ptob(btopr(srq->srq_wqinfo.qa_size)); 389 390 /* Map out the QP memory */ 391 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 392 status = devmap_umem_setup(dhp, state->ts_dip, 393 &tavor_devmap_umem_cbops, srq->srq_wqinfo.qa_umemcookie, offset, 394 size, maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL); 395 if (status < 0) { 396 *err = status; 397 TNF_PROBE_0(tavor_umap_srqmem_devmap_fail, TAVOR_TNF_ERROR, ""); 398 TAVOR_TNF_EXIT(tavor_umap_srqmem); 399 return (DDI_FAILURE); 400 } 401 *maplen = size; 402 403 TAVOR_TNF_EXIT(tavor_umap_srqmem); 404 return (DDI_SUCCESS); 405 } 406 407 408 /* 409 * tavor_devmap_umem_map() 410 * Context: Can be called from kernel context. 411 */ 412 /* ARGSUSED */ 413 static int 414 tavor_devmap_umem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, 415 offset_t off, size_t len, void **pvtp) 416 { 417 tavor_state_t *state; 418 tavor_devmap_track_t *dvm_track; 419 tavor_cqhdl_t cq; 420 tavor_qphdl_t qp; 421 tavor_srqhdl_t srq; 422 minor_t instance; 423 uint64_t key; 424 uint_t type; 425 426 TAVOR_TNF_ENTER(tavor_devmap_umem_map); 427 428 /* Get Tavor softstate structure from instance */ 429 instance = TAVOR_DEV_INSTANCE(dev); 430 state = ddi_get_soft_state(tavor_statep, instance); 431 if (state == NULL) { 432 TNF_PROBE_0(tavor_devmap_umem_map_gss_fail, TAVOR_TNF_ERROR, 433 ""); 434 TAVOR_TNF_EXIT(tavor_devmap_umem_map); 435 return (ENXIO); 436 } 437 438 /* 439 * The bottom bits of "offset" are undefined (number depends on 440 * system PAGESIZE). Shifting these off leaves us with a "key". 441 * The "key" is actually a combination of both a real key value 442 * (for the purpose of database lookup) and a "type" value. Although 443 * we are not going to do any database lookup per se, we do want 444 * to extract the "key" and the "type" (to enable faster lookup of 445 * the appropriate CQ or QP handle). 446 */ 447 key = off >> PAGESHIFT; 448 type = key & MLNX_UMAP_RSRC_TYPE_MASK; 449 key = key >> MLNX_UMAP_RSRC_TYPE_SHIFT; 450 451 /* 452 * Allocate an entry to track the mapping and unmapping (specifically, 453 * partial unmapping) of this resource. 454 */ 455 dvm_track = (tavor_devmap_track_t *)kmem_zalloc( 456 sizeof (tavor_devmap_track_t), KM_SLEEP); 457 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track)) 458 dvm_track->tdt_offset = off; 459 dvm_track->tdt_state = state; 460 dvm_track->tdt_refcnt = 1; 461 mutex_init(&dvm_track->tdt_lock, NULL, MUTEX_DRIVER, 462 DDI_INTR_PRI(state->ts_intrmsi_pri)); 463 464 /* 465 * Depending of the type of resource that has been mapped out, we 466 * need to update the QP or CQ handle to reflect that it has, in 467 * fact, been mapped. This allows the driver code which frees a QP 468 * or a CQ to know whether it is appropriate to do a 469 * devmap_devmem_remap() to invalidate the userland mapping for the 470 * corresponding queue's memory. 471 */ 472 if (type == MLNX_UMAP_CQMEM_RSRC) { 473 474 /* Use "key" (CQ number) to do fast lookup of CQ handle */ 475 cq = tavor_cqhdl_from_cqnum(state, key); 476 477 /* 478 * Update the handle to the userland mapping. Note: If 479 * the CQ already has a valid userland mapping, then stop 480 * and return failure. 481 */ 482 mutex_enter(&cq->cq_lock); 483 if (cq->cq_umap_dhp == NULL) { 484 cq->cq_umap_dhp = dhp; 485 dvm_track->tdt_size = cq->cq_cqinfo.qa_size; 486 mutex_exit(&cq->cq_lock); 487 } else { 488 mutex_exit(&cq->cq_lock); 489 goto umem_map_fail; 490 } 491 492 } else if (type == MLNX_UMAP_QPMEM_RSRC) { 493 494 /* Use "key" (QP number) to do fast lookup of QP handle */ 495 qp = tavor_qphdl_from_qpnum(state, key); 496 497 /* 498 * Update the handle to the userland mapping. Note: If 499 * the CQ already has a valid userland mapping, then stop 500 * and return failure. 501 */ 502 mutex_enter(&qp->qp_lock); 503 if (qp->qp_umap_dhp == NULL) { 504 qp->qp_umap_dhp = dhp; 505 dvm_track->tdt_size = qp->qp_wqinfo.qa_size; 506 mutex_exit(&qp->qp_lock); 507 } else { 508 mutex_exit(&qp->qp_lock); 509 goto umem_map_fail; 510 } 511 512 } else if (type == MLNX_UMAP_SRQMEM_RSRC) { 513 514 /* Use "key" (SRQ number) to do fast lookup on SRQ handle */ 515 srq = tavor_srqhdl_from_srqnum(state, key); 516 517 /* 518 * Update the handle to the userland mapping. Note: If the 519 * SRQ already has a valid userland mapping, then stop and 520 * return failure. 521 */ 522 mutex_enter(&srq->srq_lock); 523 if (srq->srq_umap_dhp == NULL) { 524 srq->srq_umap_dhp = dhp; 525 dvm_track->tdt_size = srq->srq_wqinfo.qa_size; 526 mutex_exit(&srq->srq_lock); 527 } else { 528 mutex_exit(&srq->srq_lock); 529 goto umem_map_fail; 530 } 531 } 532 533 /* 534 * Pass the private "Tavor devmap tracking structure" back. This 535 * pointer will be returned in subsequent "unmap" callbacks. 536 */ 537 *pvtp = dvm_track; 538 539 TAVOR_TNF_EXIT(tavor_devmap_umem_map); 540 return (DDI_SUCCESS); 541 542 umem_map_fail: 543 mutex_destroy(&dvm_track->tdt_lock); 544 kmem_free(dvm_track, sizeof (tavor_devmap_track_t)); 545 TAVOR_TNF_EXIT(tavor_devmap_umem_map); 546 return (DDI_FAILURE); 547 } 548 549 550 /* 551 * tavor_devmap_umem_dup() 552 * Context: Can be called from kernel context. 553 */ 554 /* ARGSUSED */ 555 static int 556 tavor_devmap_umem_dup(devmap_cookie_t dhp, void *pvtp, devmap_cookie_t new_dhp, 557 void **new_pvtp) 558 { 559 tavor_state_t *state; 560 tavor_devmap_track_t *dvm_track, *new_dvm_track; 561 uint_t maxprot; 562 int status; 563 564 TAVOR_TNF_ENTER(tavor_devmap_umem_dup); 565 566 /* 567 * Extract the Tavor softstate pointer from "Tavor devmap tracking 568 * structure" (in "pvtp"). 569 */ 570 dvm_track = (tavor_devmap_track_t *)pvtp; 571 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track)) 572 state = dvm_track->tdt_state; 573 574 /* 575 * Since this devmap_dup() entry point is generally called 576 * when a process does fork(2), it is incumbent upon the driver 577 * to insure that the child does not inherit a valid copy of 578 * the parent's QP or CQ resource. This is accomplished by using 579 * devmap_devmem_remap() to invalidate the child's mapping to the 580 * kernel memory. 581 */ 582 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 583 status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0, 584 dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL); 585 if (status != DDI_SUCCESS) { 586 TAVOR_WARNING(state, "failed in tavor_devmap_umem_dup()"); 587 TAVOR_TNF_EXIT(tavor_devmap_umem_dup); 588 return (status); 589 } 590 591 /* 592 * Allocate a new entry to track the subsequent unmapping 593 * (specifically, all partial unmappings) of the child's newly 594 * invalidated resource. Note: Setting the "tdt_size" field to 595 * zero here is an indication to the devmap_unmap() entry point 596 * that this mapping is invalid, and that its subsequent unmapping 597 * should not affect any of the parent's CQ or QP resources. 598 */ 599 new_dvm_track = (tavor_devmap_track_t *)kmem_zalloc( 600 sizeof (tavor_devmap_track_t), KM_SLEEP); 601 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*new_dvm_track)) 602 new_dvm_track->tdt_offset = 0; 603 new_dvm_track->tdt_state = state; 604 new_dvm_track->tdt_refcnt = 1; 605 new_dvm_track->tdt_size = 0; 606 mutex_init(&new_dvm_track->tdt_lock, NULL, MUTEX_DRIVER, 607 DDI_INTR_PRI(state->ts_intrmsi_pri)); 608 *new_pvtp = new_dvm_track; 609 610 TAVOR_TNF_EXIT(tavor_devmap_umem_dup); 611 return (DDI_SUCCESS); 612 } 613 614 615 /* 616 * tavor_devmap_umem_unmap() 617 * Context: Can be called from kernel context. 618 */ 619 /* ARGSUSED */ 620 static void 621 tavor_devmap_umem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, 622 size_t len, devmap_cookie_t new_dhp1, void **pvtp1, 623 devmap_cookie_t new_dhp2, void **pvtp2) 624 { 625 tavor_state_t *state; 626 tavor_rsrc_t *rsrcp; 627 tavor_devmap_track_t *dvm_track; 628 tavor_cqhdl_t cq; 629 tavor_qphdl_t qp; 630 tavor_srqhdl_t srq; 631 uint64_t key, value; 632 uint_t type; 633 uint_t size; 634 int status; 635 636 TAVOR_TNF_ENTER(tavor_devmap_umem_unmap); 637 638 /* 639 * Extract the Tavor softstate pointer from "Tavor devmap tracking 640 * structure" (in "pvtp"). 641 */ 642 dvm_track = (tavor_devmap_track_t *)pvtp; 643 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track)) 644 state = dvm_track->tdt_state; 645 646 /* 647 * Extract the "offset" from the "Tavor devmap tracking structure". 648 * Note: The input argument "off" is ignored here because the 649 * Tavor mapping interfaces define a very specific meaning to 650 * each "logical offset". Also extract the "key" and "type" encoded 651 * in the logical offset. 652 */ 653 key = dvm_track->tdt_offset >> PAGESHIFT; 654 type = key & MLNX_UMAP_RSRC_TYPE_MASK; 655 key = key >> MLNX_UMAP_RSRC_TYPE_SHIFT; 656 657 /* 658 * Extract the "size" of the mapping. If this size is determined 659 * to be zero, then it is an indication of a previously invalidated 660 * mapping, and no CQ or QP resources should be affected. 661 */ 662 size = dvm_track->tdt_size; 663 664 /* 665 * If only the "middle portion of a given mapping is being unmapped, 666 * then we are effectively creating one new piece of mapped memory. 667 * (Original region is divided into three pieces of which the middle 668 * piece is being removed. This leaves two pieces. Since we started 669 * with one piece and now have two pieces, we need to increment the 670 * counter in the "Tavor devmap tracking structure". 671 * 672 * If, however, the whole mapped region is being unmapped, then we 673 * have started with one region which we are completely removing. 674 * In this case, we need to decrement the counter in the "Tavor 675 * devmap tracking structure". 676 * 677 * In each of the remaining cases, we will have started with one 678 * mapped region and ended with one (different) region. So no counter 679 * modification is necessary. 680 */ 681 mutex_enter(&dvm_track->tdt_lock); 682 if ((new_dhp1 == NULL) && (new_dhp2 == NULL)) { 683 dvm_track->tdt_refcnt--; 684 } else if ((new_dhp1 != NULL) && (new_dhp2 != NULL)) { 685 dvm_track->tdt_refcnt++; 686 } 687 mutex_exit(&dvm_track->tdt_lock); 688 689 /* 690 * For each of the cases where the region is being divided, then we 691 * need to pass back the "Tavor devmap tracking structure". This way 692 * we get it back when each of the remaining pieces is subsequently 693 * unmapped. 694 */ 695 if (new_dhp1 != NULL) { 696 *pvtp1 = pvtp; 697 } 698 if (new_dhp2 != NULL) { 699 *pvtp2 = pvtp; 700 } 701 702 /* 703 * If the "Tavor devmap tracking structure" is no longer being 704 * referenced, then free it up. Otherwise, return. 705 */ 706 if (dvm_track->tdt_refcnt == 0) { 707 mutex_destroy(&dvm_track->tdt_lock); 708 kmem_free(dvm_track, sizeof (tavor_devmap_track_t)); 709 710 /* 711 * If the mapping was invalid (see explanation above), then 712 * no further processing is necessary. 713 */ 714 if (size == 0) { 715 TAVOR_TNF_EXIT(tavor_devmap_umem_unmap); 716 return; 717 } 718 } else { 719 TAVOR_TNF_EXIT(tavor_devmap_umem_unmap); 720 return; 721 } 722 723 /* 724 * Now that we can guarantee that the user memory is fully unmapped, 725 * we can use the "key" and "type" values to try to find the entry 726 * in the "userland resources database". If it's found, then it 727 * indicates that the queue memory (CQ or QP) has not yet been freed. 728 * In this case, we update the corresponding CQ or QP handle to 729 * indicate that the "devmap_devmem_remap()" call will be unnecessary. 730 * If it's _not_ found, then it indicates that the CQ or QP memory 731 * was, in fact, freed before it was unmapped (thus requiring a 732 * previous invalidation by remapping - which will already have 733 * been done in the free routine). 734 */ 735 status = tavor_umap_db_find(state->ts_instance, key, type, &value, 736 0, NULL); 737 if (status == DDI_SUCCESS) { 738 /* 739 * Depending on the type of the mapped resource (CQ or QP), 740 * update handle to indicate that no invalidation remapping 741 * will be necessary. 742 */ 743 if (type == MLNX_UMAP_CQMEM_RSRC) { 744 745 /* Use "value" to convert to CQ handle */ 746 rsrcp = (tavor_rsrc_t *)(uintptr_t)value; 747 cq = (tavor_cqhdl_t)rsrcp->tr_addr; 748 749 /* 750 * Invalidate the handle to the userland mapping. 751 * Note: We must ensure that the mapping being 752 * unmapped here is the current one for the CQ. It 753 * is possible that it might not be if this CQ has 754 * been resized and the previous CQ memory has not 755 * yet been unmapped. But in that case, because of 756 * the devmap_devmem_remap(), there is no longer any 757 * association between the mapping and the real CQ 758 * kernel memory. 759 */ 760 mutex_enter(&cq->cq_lock); 761 if (cq->cq_umap_dhp == dhp) { 762 cq->cq_umap_dhp = (devmap_cookie_t)NULL; 763 } 764 mutex_exit(&cq->cq_lock); 765 766 } else if (type == MLNX_UMAP_QPMEM_RSRC) { 767 768 /* Use "value" to convert to QP handle */ 769 rsrcp = (tavor_rsrc_t *)(uintptr_t)value; 770 qp = (tavor_qphdl_t)rsrcp->tr_addr; 771 772 /* 773 * Invalidate the handle to the userland mapping. 774 * Note: we ensure that the mapping being unmapped 775 * here is the current one for the QP. This is 776 * more of a sanity check here since, unlike CQs 777 * (above) we do not support resize of QPs. 778 */ 779 mutex_enter(&qp->qp_lock); 780 if (qp->qp_umap_dhp == dhp) { 781 qp->qp_umap_dhp = (devmap_cookie_t)NULL; 782 } 783 mutex_exit(&qp->qp_lock); 784 785 } else if (type == MLNX_UMAP_SRQMEM_RSRC) { 786 787 /* Use "value" to convert to SRQ handle */ 788 rsrcp = (tavor_rsrc_t *)(uintptr_t)value; 789 srq = (tavor_srqhdl_t)rsrcp->tr_addr; 790 791 /* 792 * Invalidate the handle to the userland mapping. 793 * Note: we ensure that the mapping being unmapped 794 * here is the current one for the QP. This is 795 * more of a sanity check here since, unlike CQs 796 * (above) we do not support resize of QPs. 797 */ 798 mutex_enter(&srq->srq_lock); 799 if (srq->srq_umap_dhp == dhp) { 800 srq->srq_umap_dhp = (devmap_cookie_t)NULL; 801 } 802 mutex_exit(&srq->srq_lock); 803 } 804 } 805 806 TAVOR_TNF_EXIT(tavor_devmap_umem_unmap); 807 } 808 809 810 /* 811 * tavor_devmap_devmem_map() 812 * Context: Can be called from kernel context. 813 */ 814 /* ARGSUSED */ 815 static int 816 tavor_devmap_devmem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, 817 offset_t off, size_t len, void **pvtp) 818 { 819 tavor_state_t *state; 820 tavor_devmap_track_t *dvm_track; 821 minor_t instance; 822 823 TAVOR_TNF_ENTER(tavor_devmap_devmem_map); 824 825 /* Get Tavor softstate structure from instance */ 826 instance = TAVOR_DEV_INSTANCE(dev); 827 state = ddi_get_soft_state(tavor_statep, instance); 828 if (state == NULL) { 829 TNF_PROBE_0(tavor_devmap_devmem_map_gss_fail, TAVOR_TNF_ERROR, 830 ""); 831 TAVOR_TNF_EXIT(tavor_devmap_devmem_map); 832 return (ENXIO); 833 } 834 835 /* 836 * Allocate an entry to track the mapping and unmapping of this 837 * resource. Note: We don't need to initialize the "refcnt" or 838 * "offset" fields here, nor do we need to initialize the mutex 839 * used with the "refcnt". Since UAR pages are single pages, they 840 * are not subject to "partial" unmappings. This makes these other 841 * fields unnecessary. 842 */ 843 dvm_track = (tavor_devmap_track_t *)kmem_zalloc( 844 sizeof (tavor_devmap_track_t), KM_SLEEP); 845 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track)) 846 dvm_track->tdt_state = state; 847 dvm_track->tdt_size = PAGESIZE; 848 849 /* 850 * Pass the private "Tavor devmap tracking structure" back. This 851 * pointer will be returned in a subsequent "unmap" callback. 852 */ 853 *pvtp = dvm_track; 854 855 TAVOR_TNF_EXIT(tavor_devmap_devmem_map); 856 return (DDI_SUCCESS); 857 } 858 859 860 /* 861 * tavor_devmap_devmem_dup() 862 * Context: Can be called from kernel context. 863 */ 864 /* ARGSUSED */ 865 static int 866 tavor_devmap_devmem_dup(devmap_cookie_t dhp, void *pvtp, 867 devmap_cookie_t new_dhp, void **new_pvtp) 868 { 869 tavor_state_t *state; 870 tavor_devmap_track_t *dvm_track; 871 uint_t maxprot; 872 int status; 873 874 TAVOR_TNF_ENTER(tavor_devmap_devmem_dup); 875 876 /* 877 * Extract the Tavor softstate pointer from "Tavor devmap tracking 878 * structure" (in "pvtp"). Note: If the tracking structure is NULL 879 * here, it means that the mapping corresponds to an invalid mapping. 880 * In this case, it can be safely ignored ("new_pvtp" set to NULL). 881 */ 882 dvm_track = (tavor_devmap_track_t *)pvtp; 883 if (dvm_track == NULL) { 884 *new_pvtp = NULL; 885 TAVOR_TNF_EXIT(tavor_devmap_devmem_dup); 886 return (DDI_SUCCESS); 887 } 888 889 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track)) 890 state = dvm_track->tdt_state; 891 892 /* 893 * Since this devmap_dup() entry point is generally called 894 * when a process does fork(2), it is incumbent upon the driver 895 * to insure that the child does not inherit a valid copy of 896 * the parent's resource. This is accomplished by using 897 * devmap_devmem_remap() to invalidate the child's mapping to the 898 * kernel memory. 899 */ 900 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 901 status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0, 902 dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL); 903 if (status != DDI_SUCCESS) { 904 TAVOR_WARNING(state, "failed in tavor_devmap_devmem_dup()"); 905 TAVOR_TNF_EXIT(tavor_devmap_devmem_dup); 906 return (status); 907 } 908 909 /* 910 * Since the region is invalid, there is no need for us to 911 * allocate and continue to track an additional "Tavor devmap 912 * tracking structure". Instead we return NULL here, which is an 913 * indication to the devmap_unmap() entry point that this entry 914 * can be safely ignored. 915 */ 916 *new_pvtp = NULL; 917 918 TAVOR_TNF_EXIT(tavor_devmap_devmem_dup); 919 return (DDI_SUCCESS); 920 } 921 922 923 /* 924 * tavor_devmap_devmem_unmap() 925 * Context: Can be called from kernel context. 926 */ 927 /* ARGSUSED */ 928 static void 929 tavor_devmap_devmem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, 930 size_t len, devmap_cookie_t new_dhp1, void **pvtp1, 931 devmap_cookie_t new_dhp2, void **pvtp2) 932 { 933 tavor_devmap_track_t *dvm_track; 934 935 TAVOR_TNF_ENTER(tavor_devmap_devmem_unmap); 936 937 /* 938 * Free up the "Tavor devmap tracking structure" (in "pvtp"). 939 * There cannot be "partial" unmappings here because all UAR pages 940 * are single pages. Note: If the tracking structure is NULL here, 941 * it means that the mapping corresponds to an invalid mapping. In 942 * this case, it can be safely ignored. 943 */ 944 dvm_track = (tavor_devmap_track_t *)pvtp; 945 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track)) 946 if (dvm_track == NULL) { 947 TAVOR_TNF_EXIT(tavor_devmap_devmem_unmap); 948 return; 949 } 950 951 kmem_free(dvm_track, sizeof (tavor_devmap_track_t)); 952 TAVOR_TNF_EXIT(tavor_devmap_devmem_unmap); 953 } 954 955 956 /* 957 * tavor_umap_ci_data_in() 958 * Context: Can be called from user or kernel context. 959 */ 960 /* ARGSUSED */ 961 ibt_status_t 962 tavor_umap_ci_data_in(tavor_state_t *state, ibt_ci_data_flags_t flags, 963 ibt_object_type_t object, void *hdl, void *data_p, size_t data_sz) 964 { 965 int status; 966 967 TAVOR_TNF_ENTER(tavor_umap_ci_data_in); 968 969 /* 970 * Depending on the type of object about which additional information 971 * is being provided (currently only MR is supported), we call the 972 * appropriate resource-specific function. 973 */ 974 switch (object) { 975 case IBT_HDL_MR: 976 status = tavor_umap_mr_data_in((tavor_mrhdl_t)hdl, 977 (ibt_mr_data_in_t *)data_p, data_sz); 978 if (status != DDI_SUCCESS) { 979 TNF_PROBE_0(tavor_umap_mr_data_in_fail, 980 TAVOR_TNF_ERROR, ""); 981 TAVOR_TNF_EXIT(tavor_umap_ci_data_in); 982 return (status); 983 } 984 break; 985 986 /* 987 * For other possible valid IBT types, we return IBT_NOT_SUPPORTED, 988 * since the Tavor driver does not support these. 989 */ 990 case IBT_HDL_HCA: 991 case IBT_HDL_QP: 992 case IBT_HDL_CQ: 993 case IBT_HDL_PD: 994 case IBT_HDL_MW: 995 case IBT_HDL_AH: 996 case IBT_HDL_SCHED: 997 case IBT_HDL_EEC: 998 case IBT_HDL_RDD: 999 case IBT_HDL_SRQ: 1000 TNF_PROBE_0(tavor_umap_ci_data_in_unsupp_type, 1001 TAVOR_TNF_ERROR, ""); 1002 TAVOR_TNF_EXIT(tavor_umap_ci_data_in); 1003 return (IBT_NOT_SUPPORTED); 1004 1005 /* 1006 * Any other types are invalid. 1007 */ 1008 default: 1009 TNF_PROBE_0(tavor_umap_ci_data_in_invtype_fail, 1010 TAVOR_TNF_ERROR, ""); 1011 TAVOR_TNF_EXIT(tavor_umap_ci_data_in); 1012 return (IBT_INVALID_PARAM); 1013 } 1014 1015 TAVOR_TNF_EXIT(tavor_umap_ci_data_in); 1016 return (DDI_SUCCESS); 1017 } 1018 1019 1020 /* 1021 * tavor_umap_mr_data_in() 1022 * Context: Can be called from user or kernel context. 1023 */ 1024 static ibt_status_t 1025 tavor_umap_mr_data_in(tavor_mrhdl_t mr, ibt_mr_data_in_t *data, 1026 size_t data_sz) 1027 { 1028 TAVOR_TNF_ENTER(tavor_umap_mr_data_in); 1029 1030 if (data->mr_rev != IBT_MR_DATA_IN_IF_VERSION) { 1031 TNF_PROBE_0(tavor_umap_mr_data_in_ver_fail, 1032 TAVOR_TNF_ERROR, ""); 1033 TAVOR_TNF_EXIT(tavor_umap_mr_data_in); 1034 return (IBT_NOT_SUPPORTED); 1035 } 1036 1037 /* Check for valid MR handle pointer */ 1038 if (mr == NULL) { 1039 TNF_PROBE_0(tavor_umap_mr_data_in_invmrhdl_fail, 1040 TAVOR_TNF_ERROR, ""); 1041 TAVOR_TNF_EXIT(tavor_umap_mr_data_in); 1042 return (IBT_MR_HDL_INVALID); 1043 } 1044 1045 /* Check for valid MR input structure size */ 1046 if (data_sz < sizeof (ibt_mr_data_in_t)) { 1047 TNF_PROBE_0(tavor_umap_mr_data_in_invdatasz_fail, 1048 TAVOR_TNF_ERROR, ""); 1049 TAVOR_TNF_EXIT(tavor_umap_mr_data_in); 1050 return (IBT_INSUFF_RESOURCE); 1051 } 1052 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data)) 1053 1054 /* 1055 * Ensure that the MR corresponds to userland memory and that it is 1056 * a currently valid memory region as well. 1057 */ 1058 mutex_enter(&mr->mr_lock); 1059 if ((mr->mr_is_umem == 0) || (mr->mr_umemcookie == NULL)) { 1060 mutex_exit(&mr->mr_lock); 1061 TNF_PROBE_0(tavor_umap_mr_data_in_invumem_fail, 1062 TAVOR_TNF_ERROR, ""); 1063 TAVOR_TNF_EXIT(tavor_umap_mr_data_in); 1064 return (IBT_MR_HDL_INVALID); 1065 } 1066 1067 /* 1068 * If it has passed all the above checks, then extract the callback 1069 * function and argument from the input structure. Copy them into 1070 * the MR handle. This function will be called only if the memory 1071 * corresponding to the MR handle gets a umem_lockmemory() callback. 1072 */ 1073 mr->mr_umem_cbfunc = data->mr_func; 1074 mr->mr_umem_cbarg1 = data->mr_arg1; 1075 mr->mr_umem_cbarg2 = data->mr_arg2; 1076 mutex_exit(&mr->mr_lock); 1077 1078 TAVOR_TNF_EXIT(tavor_umap_cq_data_out); 1079 return (DDI_SUCCESS); 1080 } 1081 1082 1083 /* 1084 * tavor_umap_ci_data_out() 1085 * Context: Can be called from user or kernel context. 1086 */ 1087 /* ARGSUSED */ 1088 ibt_status_t 1089 tavor_umap_ci_data_out(tavor_state_t *state, ibt_ci_data_flags_t flags, 1090 ibt_object_type_t object, void *hdl, void *data_p, size_t data_sz) 1091 { 1092 int status; 1093 1094 TAVOR_TNF_ENTER(tavor_umap_ci_data_out); 1095 1096 /* 1097 * Depending on the type of object about which additional information 1098 * is being requested (CQ or QP), we call the appropriate resource- 1099 * specific mapping function. 1100 */ 1101 switch (object) { 1102 case IBT_HDL_CQ: 1103 status = tavor_umap_cq_data_out((tavor_cqhdl_t)hdl, 1104 (mlnx_umap_cq_data_out_t *)data_p, data_sz); 1105 if (status != DDI_SUCCESS) { 1106 TNF_PROBE_0(tavor_umap_cq_data_out_fail, 1107 TAVOR_TNF_ERROR, ""); 1108 TAVOR_TNF_EXIT(tavor_umap_ci_data_out); 1109 return (status); 1110 } 1111 break; 1112 1113 case IBT_HDL_QP: 1114 status = tavor_umap_qp_data_out((tavor_qphdl_t)hdl, 1115 (mlnx_umap_qp_data_out_t *)data_p, data_sz); 1116 if (status != DDI_SUCCESS) { 1117 TNF_PROBE_0(tavor_umap_qp_data_out_fail, 1118 TAVOR_TNF_ERROR, ""); 1119 TAVOR_TNF_EXIT(tavor_umap_ci_data_out); 1120 return (status); 1121 } 1122 break; 1123 1124 case IBT_HDL_SRQ: 1125 status = tavor_umap_srq_data_out((tavor_srqhdl_t)hdl, 1126 (mlnx_umap_srq_data_out_t *)data_p, data_sz); 1127 if (status != DDI_SUCCESS) { 1128 TNF_PROBE_0(tavor_umap_srq_data_out_fail, 1129 TAVOR_TNF_ERROR, ""); 1130 TAVOR_TNF_EXIT(tavor_umap_ci_data_out); 1131 return (status); 1132 } 1133 break; 1134 1135 /* 1136 * For other possible valid IBT types, we return IBT_NOT_SUPPORTED, 1137 * since the Tavor driver does not support these. 1138 */ 1139 case IBT_HDL_PD: 1140 status = tavor_umap_pd_data_out((tavor_pdhdl_t)hdl, 1141 (mlnx_umap_pd_data_out_t *)data_p, data_sz); 1142 if (status != DDI_SUCCESS) { 1143 TNF_PROBE_0(tavor_umap_pd_data_out_fail, 1144 TAVOR_TNF_ERROR, ""); 1145 TAVOR_TNF_EXIT(tavor_umap_ci_data_out); 1146 return (status); 1147 } 1148 break; 1149 1150 case IBT_HDL_HCA: 1151 case IBT_HDL_MR: 1152 case IBT_HDL_MW: 1153 case IBT_HDL_AH: 1154 case IBT_HDL_SCHED: 1155 case IBT_HDL_EEC: 1156 case IBT_HDL_RDD: 1157 TNF_PROBE_0(tavor_umap_ci_data_out_unsupp_type, 1158 TAVOR_TNF_ERROR, ""); 1159 TAVOR_TNF_EXIT(tavor_umap_ci_data_out); 1160 return (IBT_NOT_SUPPORTED); 1161 1162 /* 1163 * Any other types are invalid. 1164 */ 1165 default: 1166 TNF_PROBE_0(tavor_umap_ci_data_out_invtype_fail, 1167 TAVOR_TNF_ERROR, ""); 1168 TAVOR_TNF_EXIT(tavor_umap_ci_data_out); 1169 return (IBT_INVALID_PARAM); 1170 } 1171 1172 TAVOR_TNF_EXIT(tavor_umap_ci_data_out); 1173 return (DDI_SUCCESS); 1174 } 1175 1176 1177 /* 1178 * tavor_umap_cq_data_out() 1179 * Context: Can be called from user or kernel context. 1180 */ 1181 static ibt_status_t 1182 tavor_umap_cq_data_out(tavor_cqhdl_t cq, mlnx_umap_cq_data_out_t *data, 1183 size_t data_sz) 1184 { 1185 TAVOR_TNF_ENTER(tavor_umap_cq_data_out); 1186 1187 /* Check for valid CQ handle pointer */ 1188 if (cq == NULL) { 1189 TNF_PROBE_0(tavor_umap_cq_data_out_invcqhdl_fail, 1190 TAVOR_TNF_ERROR, ""); 1191 TAVOR_TNF_EXIT(tavor_umap_cq_data_out); 1192 return (IBT_CQ_HDL_INVALID); 1193 } 1194 1195 /* Check for valid CQ mapping structure size */ 1196 if (data_sz < sizeof (mlnx_umap_cq_data_out_t)) { 1197 TNF_PROBE_0(tavor_umap_cq_data_out_invdatasz_fail, 1198 TAVOR_TNF_ERROR, ""); 1199 TAVOR_TNF_EXIT(tavor_umap_cq_data_out); 1200 return (IBT_INSUFF_RESOURCE); 1201 } 1202 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data)) 1203 1204 /* 1205 * If it has passed all the above checks, then fill in all the useful 1206 * mapping information (including the mapping offset that will be 1207 * passed back to the devmap() interface during a subsequent mmap() 1208 * call. 1209 * 1210 * The "offset" for CQ mmap()'s looks like this: 1211 * +----------------------------------------+--------+--------------+ 1212 * | CQ Number | 0x33 | Reserved (0) | 1213 * +----------------------------------------+--------+--------------+ 1214 * (64 - 8 - PAGESHIFT) bits 8 bits PAGESHIFT bits 1215 * 1216 * This returns information about the mapping offset, the length of 1217 * the CQ memory, the CQ number (for use in later CQ doorbells), the 1218 * number of CQEs the CQ memory can hold, and the size of each CQE. 1219 */ 1220 data->mcq_rev = MLNX_UMAP_IF_VERSION; 1221 data->mcq_mapoffset = ((((uint64_t)cq->cq_cqnum << 1222 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_CQMEM_RSRC) << PAGESHIFT); 1223 data->mcq_maplen = cq->cq_cqinfo.qa_size; 1224 data->mcq_cqnum = cq->cq_cqnum; 1225 data->mcq_numcqe = cq->cq_bufsz; 1226 data->mcq_cqesz = sizeof (tavor_hw_cqe_t); 1227 1228 TAVOR_TNF_EXIT(tavor_umap_cq_data_out); 1229 return (DDI_SUCCESS); 1230 } 1231 1232 1233 /* 1234 * tavor_umap_qp_data_out() 1235 * Context: Can be called from user or kernel context. 1236 */ 1237 static ibt_status_t 1238 tavor_umap_qp_data_out(tavor_qphdl_t qp, mlnx_umap_qp_data_out_t *data, 1239 size_t data_sz) 1240 { 1241 TAVOR_TNF_ENTER(tavor_umap_qp_data_out); 1242 1243 /* Check for valid QP handle pointer */ 1244 if (qp == NULL) { 1245 TNF_PROBE_0(tavor_umap_qp_data_out_invqphdl_fail, 1246 TAVOR_TNF_ERROR, ""); 1247 TAVOR_TNF_EXIT(tavor_umap_qp_data_out); 1248 return (IBT_QP_HDL_INVALID); 1249 } 1250 1251 /* Check for valid QP mapping structure size */ 1252 if (data_sz < sizeof (mlnx_umap_qp_data_out_t)) { 1253 TNF_PROBE_0(tavor_umap_qp_data_out_invdatasz_fail, 1254 TAVOR_TNF_ERROR, ""); 1255 TAVOR_TNF_EXIT(tavor_umap_qp_data_out); 1256 return (IBT_INSUFF_RESOURCE); 1257 } 1258 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data)) 1259 1260 /* 1261 * If it has passed all the checks, then fill in all the useful 1262 * mapping information (including the mapping offset that will be 1263 * passed back to the devmap() interface during a subsequent mmap() 1264 * call. 1265 * 1266 * The "offset" for QP mmap()'s looks like this: 1267 * +----------------------------------------+--------+--------------+ 1268 * | QP Number | 0x44 | Reserved (0) | 1269 * +----------------------------------------+--------+--------------+ 1270 * (64 - 8 - PAGESHIFT) bits 8 bits PAGESHIFT bits 1271 * 1272 * This returns information about the mapping offset, the length of 1273 * the QP memory, and the QP number (for use in later send and recv 1274 * doorbells). It also returns the following information for both 1275 * the receive work queue and the send work queue, respectively: the 1276 * offset (from the base mapped address) of the start of the given 1277 * work queue, the 64-bit IB virtual address that corresponds to 1278 * the base mapped address (needed for posting WQEs though the 1279 * QP doorbells), the number of WQEs the given work queue can hold, 1280 * and the size of each WQE for the given work queue. 1281 */ 1282 data->mqp_rev = MLNX_UMAP_IF_VERSION; 1283 data->mqp_mapoffset = ((((uint64_t)qp->qp_qpnum << 1284 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_QPMEM_RSRC) << PAGESHIFT); 1285 data->mqp_maplen = qp->qp_wqinfo.qa_size; 1286 data->mqp_qpnum = qp->qp_qpnum; 1287 1288 /* 1289 * If this QP is associated with a shared receive queue (SRQ), 1290 * then return invalid RecvQ parameters. Otherwise, return 1291 * the proper parameter values. 1292 */ 1293 if (qp->qp_srq_en == TAVOR_QP_SRQ_ENABLED) { 1294 data->mqp_rq_off = (uint32_t)qp->qp_wqinfo.qa_size; 1295 data->mqp_rq_desc_addr = (uint32_t)qp->qp_wqinfo.qa_size; 1296 data->mqp_rq_numwqe = 0; 1297 data->mqp_rq_wqesz = 0; 1298 } else { 1299 data->mqp_rq_off = (uintptr_t)qp->qp_rq_buf - 1300 (uintptr_t)qp->qp_wqinfo.qa_buf_aligned; 1301 data->mqp_rq_desc_addr = (uint32_t)((uintptr_t)qp->qp_rq_buf - 1302 qp->qp_desc_off); 1303 data->mqp_rq_numwqe = qp->qp_rq_bufsz; 1304 data->mqp_rq_wqesz = (1 << qp->qp_rq_log_wqesz); 1305 } 1306 data->mqp_sq_off = (uintptr_t)qp->qp_sq_buf - 1307 (uintptr_t)qp->qp_wqinfo.qa_buf_aligned; 1308 data->mqp_sq_desc_addr = (uint32_t)((uintptr_t)qp->qp_sq_buf - 1309 qp->qp_desc_off); 1310 data->mqp_sq_numwqe = qp->qp_sq_bufsz; 1311 data->mqp_sq_wqesz = (1 << qp->qp_sq_log_wqesz); 1312 1313 TAVOR_TNF_EXIT(tavor_umap_qp_data_out); 1314 return (DDI_SUCCESS); 1315 } 1316 1317 1318 /* 1319 * tavor_umap_srq_data_out() 1320 * Context: Can be called from user or kernel context. 1321 */ 1322 static ibt_status_t 1323 tavor_umap_srq_data_out(tavor_srqhdl_t srq, mlnx_umap_srq_data_out_t *data, 1324 size_t data_sz) 1325 { 1326 TAVOR_TNF_ENTER(tavor_umap_srq_data_out); 1327 1328 /* Check for valid SRQ handle pointer */ 1329 if (srq == NULL) { 1330 TNF_PROBE_0(tavor_umap_srq_data_out_invsrqhdl_fail, 1331 TAVOR_TNF_ERROR, ""); 1332 TAVOR_TNF_EXIT(tavor_umap_srq_data_out); 1333 return (IBT_SRQ_HDL_INVALID); 1334 } 1335 1336 /* Check for valid SRQ mapping structure size */ 1337 if (data_sz < sizeof (mlnx_umap_srq_data_out_t)) { 1338 TNF_PROBE_0(tavor_umap_srq_data_out_invdatasz_fail, 1339 TAVOR_TNF_ERROR, ""); 1340 TAVOR_TNF_EXIT(tavor_umap_srq_data_out); 1341 return (IBT_INSUFF_RESOURCE); 1342 } 1343 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data)) 1344 1345 /* 1346 * If it has passed all the checks, then fill in all the useful 1347 * mapping information (including the mapping offset that will be 1348 * passed back to the devmap() interface during a subsequent mmap() 1349 * call. 1350 * 1351 * The "offset" for SRQ mmap()'s looks like this: 1352 * +----------------------------------------+--------+--------------+ 1353 * | SRQ Number | 0x66 | Reserved (0) | 1354 * +----------------------------------------+--------+--------------+ 1355 * (64 - 8 - PAGESHIFT) bits 8 bits PAGESHIFT bits 1356 * 1357 * This returns information about the mapping offset, the length of the 1358 * SRQ memory, and the SRQ number (for use in later send and recv 1359 * doorbells). It also returns the following information for the 1360 * shared receive queue: the offset (from the base mapped address) of 1361 * the start of the given work queue, the 64-bit IB virtual address 1362 * that corresponds to the base mapped address (needed for posting WQEs 1363 * though the QP doorbells), the number of WQEs the given work queue 1364 * can hold, and the size of each WQE for the given work queue. 1365 */ 1366 data->msrq_rev = MLNX_UMAP_IF_VERSION; 1367 data->msrq_mapoffset = ((((uint64_t)srq->srq_srqnum << 1368 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_SRQMEM_RSRC) << PAGESHIFT); 1369 data->msrq_maplen = srq->srq_wqinfo.qa_size; 1370 data->msrq_srqnum = srq->srq_srqnum; 1371 1372 data->msrq_desc_addr = (uint32_t)((uintptr_t)srq->srq_wq_buf - 1373 srq->srq_desc_off); 1374 data->msrq_numwqe = srq->srq_wq_bufsz; 1375 data->msrq_wqesz = (1 << srq->srq_wq_log_wqesz); 1376 1377 TAVOR_TNF_EXIT(tavor_umap_srq_data_out); 1378 return (DDI_SUCCESS); 1379 } 1380 1381 /* 1382 * tavor_umap_pd_data_out() 1383 * Context: Can be called from user or kernel context. 1384 */ 1385 static ibt_status_t 1386 tavor_umap_pd_data_out(tavor_pdhdl_t pd, mlnx_umap_pd_data_out_t *data, 1387 size_t data_sz) 1388 { 1389 TAVOR_TNF_ENTER(tavor_umap_pd_data_out); 1390 1391 /* Check for valid PD handle pointer */ 1392 if (pd == NULL) { 1393 TNF_PROBE_0(tavor_umap_pd_data_out_invpdhdl_fail, 1394 TAVOR_TNF_ERROR, ""); 1395 TAVOR_TNF_EXIT(tavor_umap_pd_data_out); 1396 return (IBT_PD_HDL_INVALID); 1397 } 1398 1399 /* Check for valid PD mapping structure size */ 1400 if (data_sz < sizeof (mlnx_umap_pd_data_out_t)) { 1401 TNF_PROBE_0(tavor_umap_pd_data_out_invdatasz_fail, 1402 TAVOR_TNF_ERROR, ""); 1403 TAVOR_TNF_EXIT(tavor_umap_pd_data_out); 1404 return (IBT_INSUFF_RESOURCE); 1405 } 1406 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data)) 1407 1408 /* 1409 * If it has passed all the checks, then fill the PD table index 1410 * (the PD table allocated index for the PD pd_pdnum) 1411 */ 1412 data->mpd_rev = MLNX_UMAP_IF_VERSION; 1413 data->mpd_pdnum = pd->pd_pdnum; 1414 1415 TAVOR_TNF_EXIT(tavor_umap_pd_data_out); 1416 return (DDI_SUCCESS); 1417 } 1418 1419 /* 1420 * tavor_umap_db_init() 1421 * Context: Only called from attach() path context 1422 */ 1423 void 1424 tavor_umap_db_init(void) 1425 { 1426 TAVOR_TNF_ENTER(tavor_umap_db_init); 1427 1428 /* 1429 * Initialize the lock used by the Tavor "userland resources database" 1430 * This is used to ensure atomic access to add, remove, and find 1431 * entries in the database. 1432 */ 1433 mutex_init(&tavor_userland_rsrc_db.tdl_umapdb_lock, NULL, 1434 MUTEX_DRIVER, NULL); 1435 1436 /* 1437 * Initialize the AVL tree used for the "userland resources 1438 * database". Using an AVL tree here provides the ability to 1439 * scale the database size to large numbers of resources. The 1440 * entries in the tree are "tavor_umap_db_entry_t". 1441 * The tree is searched with the help of the 1442 * tavor_umap_db_compare() routine. 1443 */ 1444 avl_create(&tavor_userland_rsrc_db.tdl_umapdb_avl, 1445 tavor_umap_db_compare, sizeof (tavor_umap_db_entry_t), 1446 offsetof(tavor_umap_db_entry_t, tdbe_avlnode)); 1447 1448 TAVOR_TNF_EXIT(tavor_umap_db_init); 1449 } 1450 1451 1452 /* 1453 * tavor_umap_db_fini() 1454 * Context: Only called from attach() and/or detach() path contexts 1455 */ 1456 void 1457 tavor_umap_db_fini(void) 1458 { 1459 TAVOR_TNF_ENTER(tavor_umap_db_fini); 1460 1461 /* Destroy the AVL tree for the "userland resources database" */ 1462 avl_destroy(&tavor_userland_rsrc_db.tdl_umapdb_avl); 1463 1464 /* Destroy the lock for the "userland resources database" */ 1465 mutex_destroy(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1466 1467 TAVOR_TNF_EXIT(tavor_umap_db_fini); 1468 } 1469 1470 1471 /* 1472 * tavor_umap_db_alloc() 1473 * Context: Can be called from user or kernel context. 1474 */ 1475 tavor_umap_db_entry_t * 1476 tavor_umap_db_alloc(uint_t instance, uint64_t key, uint_t type, uint64_t value) 1477 { 1478 tavor_umap_db_entry_t *umapdb; 1479 1480 TAVOR_TNF_ENTER(tavor_umap_db_alloc); 1481 1482 /* Allocate an entry to add to the "userland resources database" */ 1483 umapdb = kmem_zalloc(sizeof (tavor_umap_db_entry_t), KM_NOSLEEP); 1484 if (umapdb == NULL) { 1485 TNF_PROBE_0(tavor_umap_db_alloc_kmz_fail, TAVOR_TNF_ERROR, ""); 1486 TAVOR_TNF_EXIT(tavor_umap_db_alloc); 1487 return (NULL); 1488 } 1489 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb)) 1490 1491 /* Fill in the fields in the database entry */ 1492 umapdb->tdbe_common.tdb_instance = instance; 1493 umapdb->tdbe_common.tdb_type = type; 1494 umapdb->tdbe_common.tdb_key = key; 1495 umapdb->tdbe_common.tdb_value = value; 1496 1497 TAVOR_TNF_EXIT(tavor_umap_db_alloc); 1498 return (umapdb); 1499 } 1500 1501 1502 /* 1503 * tavor_umap_db_free() 1504 * Context: Can be called from user or kernel context. 1505 */ 1506 void 1507 tavor_umap_db_free(tavor_umap_db_entry_t *umapdb) 1508 { 1509 TAVOR_TNF_ENTER(tavor_umap_db_free); 1510 1511 /* Free the database entry */ 1512 kmem_free(umapdb, sizeof (tavor_umap_db_entry_t)); 1513 1514 TAVOR_TNF_EXIT(tavor_umap_db_free); 1515 } 1516 1517 1518 /* 1519 * tavor_umap_db_add() 1520 * Context: Can be called from user or kernel context. 1521 */ 1522 void 1523 tavor_umap_db_add(tavor_umap_db_entry_t *umapdb) 1524 { 1525 TAVOR_TNF_ENTER(tavor_umap_db_add); 1526 1527 mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1528 tavor_umap_db_add_nolock(umapdb); 1529 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1530 1531 TAVOR_TNF_EXIT(tavor_umap_db_add); 1532 } 1533 1534 1535 /* 1536 * tavor_umap_db_add_nolock() 1537 * Context: Can be called from user or kernel context. 1538 */ 1539 void 1540 tavor_umap_db_add_nolock(tavor_umap_db_entry_t *umapdb) 1541 { 1542 tavor_umap_db_query_t query; 1543 avl_index_t where; 1544 1545 TAVOR_TNF_ENTER(tavor_umap_db_add_nolock); 1546 1547 ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock)); 1548 1549 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb)) 1550 1551 /* 1552 * Copy the common portion of the "to-be-added" database entry 1553 * into the "tavor_umap_db_query_t" structure. We use this structure 1554 * (with no flags set) to find the appropriate location in the 1555 * "userland resources database" for the new entry to be added. 1556 * 1557 * Note: we expect that this entry should not be found in the 1558 * database (unless something bad has happened). 1559 */ 1560 query.tqdb_common = umapdb->tdbe_common; 1561 query.tqdb_flags = 0; 1562 (void) avl_find(&tavor_userland_rsrc_db.tdl_umapdb_avl, &query, 1563 &where); 1564 1565 /* 1566 * Now, using the "where" field from the avl_find() operation 1567 * above, we will insert the new database entry ("umapdb"). 1568 */ 1569 avl_insert(&tavor_userland_rsrc_db.tdl_umapdb_avl, umapdb, 1570 where); 1571 1572 TAVOR_TNF_EXIT(tavor_umap_db_add_nolock); 1573 } 1574 1575 1576 /* 1577 * tavor_umap_db_find() 1578 * Context: Can be called from user or kernel context. 1579 */ 1580 int 1581 tavor_umap_db_find(uint_t instance, uint64_t key, uint_t type, 1582 uint64_t *value, uint_t flag, tavor_umap_db_entry_t **umapdb) 1583 { 1584 int status; 1585 1586 TAVOR_TNF_ENTER(tavor_umap_db_find); 1587 1588 mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1589 status = tavor_umap_db_find_nolock(instance, key, type, value, flag, 1590 umapdb); 1591 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1592 1593 TAVOR_TNF_EXIT(tavor_umap_db_find); 1594 return (status); 1595 } 1596 1597 1598 /* 1599 * tavor_umap_db_find_nolock() 1600 * Context: Can be called from user or kernel context. 1601 */ 1602 int 1603 tavor_umap_db_find_nolock(uint_t instance, uint64_t key, uint_t type, 1604 uint64_t *value, uint_t flags, tavor_umap_db_entry_t **umapdb) 1605 { 1606 tavor_umap_db_query_t query; 1607 tavor_umap_db_entry_t *entry; 1608 avl_index_t where; 1609 1610 TAVOR_TNF_ENTER(tavor_umap_db_find_nolock); 1611 1612 ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock)); 1613 1614 /* 1615 * Fill in key, type, instance, and flags values of the 1616 * tavor_umap_db_query_t in preparation for the database 1617 * lookup. 1618 */ 1619 query.tqdb_flags = flags; 1620 query.tqdb_common.tdb_key = key; 1621 query.tqdb_common.tdb_type = type; 1622 query.tqdb_common.tdb_instance = instance; 1623 1624 /* 1625 * Perform the database query. If no entry is found, then 1626 * return failure, else continue. 1627 */ 1628 entry = (tavor_umap_db_entry_t *)avl_find( 1629 &tavor_userland_rsrc_db.tdl_umapdb_avl, &query, &where); 1630 if (entry == NULL) { 1631 TAVOR_TNF_EXIT(tavor_umap_db_find_nolock); 1632 return (DDI_FAILURE); 1633 } 1634 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*entry)) 1635 1636 /* 1637 * If the flags argument specifies that the entry should 1638 * be removed if found, then call avl_remove() to remove 1639 * the entry from the database. 1640 */ 1641 if (flags & TAVOR_UMAP_DB_REMOVE) { 1642 1643 avl_remove(&tavor_userland_rsrc_db.tdl_umapdb_avl, entry); 1644 1645 /* 1646 * The database entry is returned with the expectation 1647 * that the caller will use tavor_umap_db_free() to 1648 * free the entry's memory. ASSERT that this is non-NULL. 1649 * NULL pointer should never be passed for the 1650 * TAVOR_UMAP_DB_REMOVE case. 1651 */ 1652 ASSERT(umapdb != NULL); 1653 } 1654 1655 /* 1656 * If the caller would like visibility to the database entry 1657 * (indicated through the use of a non-NULL "umapdb" argument), 1658 * then fill it in. 1659 */ 1660 if (umapdb != NULL) { 1661 *umapdb = entry; 1662 } 1663 1664 /* Extract value field from database entry and return success */ 1665 *value = entry->tdbe_common.tdb_value; 1666 1667 TAVOR_TNF_EXIT(tavor_umap_db_find_nolock); 1668 return (DDI_SUCCESS); 1669 } 1670 1671 1672 /* 1673 * tavor_umap_umemlock_cb() 1674 * Context: Can be called from callback context. 1675 */ 1676 void 1677 tavor_umap_umemlock_cb(ddi_umem_cookie_t *umem_cookie) 1678 { 1679 tavor_umap_db_entry_t *umapdb; 1680 tavor_state_t *state; 1681 tavor_rsrc_t *rsrcp; 1682 tavor_mrhdl_t mr; 1683 uint64_t value; 1684 uint_t instance; 1685 int status; 1686 void (*mr_callback)(void *, void *); 1687 void *mr_cbarg1, *mr_cbarg2; 1688 1689 TAVOR_TNF_ENTER(tavor_umap_umemlock_cb); 1690 1691 /* 1692 * If this was userland memory, then we need to remove its entry 1693 * from the "userland resources database". Note: We use the 1694 * TAVOR_UMAP_DB_IGNORE_INSTANCE flag here because we don't know 1695 * which instance was used when the entry was added (but we want 1696 * to know after the entry is found using the other search criteria). 1697 */ 1698 status = tavor_umap_db_find(0, (uint64_t)(uintptr_t)umem_cookie, 1699 MLNX_UMAP_MRMEM_RSRC, &value, (TAVOR_UMAP_DB_REMOVE | 1700 TAVOR_UMAP_DB_IGNORE_INSTANCE), &umapdb); 1701 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb)) 1702 if (status == DDI_SUCCESS) { 1703 instance = umapdb->tdbe_common.tdb_instance; 1704 state = ddi_get_soft_state(tavor_statep, instance); 1705 if (state == NULL) { 1706 cmn_err(CE_WARN, "Unable to match Tavor instance\n"); 1707 TNF_PROBE_0(tavor_umap_umemlock_cb_gss_fail, 1708 TAVOR_TNF_ERROR, ""); 1709 TAVOR_TNF_EXIT(tavor_umap_umemlock_cb); 1710 return; 1711 } 1712 1713 /* Free the database entry */ 1714 tavor_umap_db_free(umapdb); 1715 1716 /* Use "value" to convert to an MR handle */ 1717 rsrcp = (tavor_rsrc_t *)(uintptr_t)value; 1718 mr = (tavor_mrhdl_t)rsrcp->tr_addr; 1719 1720 /* 1721 * If a callback has been provided, call it first. This 1722 * callback is expected to do any cleanup necessary to 1723 * guarantee that the subsequent MR deregister (below) 1724 * will succeed. Specifically, this means freeing up memory 1725 * windows which might have been associated with the MR. 1726 */ 1727 mutex_enter(&mr->mr_lock); 1728 mr_callback = mr->mr_umem_cbfunc; 1729 mr_cbarg1 = mr->mr_umem_cbarg1; 1730 mr_cbarg2 = mr->mr_umem_cbarg2; 1731 mutex_exit(&mr->mr_lock); 1732 if (mr_callback != NULL) { 1733 mr_callback(mr_cbarg1, mr_cbarg2); 1734 } 1735 1736 /* 1737 * Then call tavor_mr_deregister() to release the resources 1738 * associated with the MR handle. Note: Because this routine 1739 * will also check for whether the ddi_umem_cookie_t is in the 1740 * database, it will take responsibility for disabling the 1741 * memory region and calling ddi_umem_unlock(). 1742 */ 1743 status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL, 1744 TAVOR_SLEEP); 1745 if (status != DDI_SUCCESS) { 1746 TAVOR_WARNING(state, "Unexpected failure in " 1747 "deregister from callback\n"); 1748 TNF_PROBE_0(tavor_umap_umemlock_cb_dereg_fail, 1749 TAVOR_TNF_ERROR, ""); 1750 TAVOR_TNF_EXIT(tavor_umap_umemlock_cb); 1751 } 1752 } 1753 1754 TAVOR_TNF_EXIT(tavor_umap_umemlock_cb); 1755 } 1756 1757 1758 /* 1759 * tavor_umap_db_compare() 1760 * Context: Can be called from user or kernel context. 1761 */ 1762 static int 1763 tavor_umap_db_compare(const void *q, const void *e) 1764 { 1765 tavor_umap_db_common_t *entry_common, *query_common; 1766 uint_t query_flags; 1767 1768 TAVOR_TNF_ENTER(tavor_umap_db_compare); 1769 1770 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*((tavor_umap_db_query_t *)q))) 1771 1772 entry_common = &((tavor_umap_db_entry_t *)e)->tdbe_common; 1773 query_common = &((tavor_umap_db_query_t *)q)->tqdb_common; 1774 query_flags = ((tavor_umap_db_query_t *)q)->tqdb_flags; 1775 1776 /* 1777 * The first comparison is done on the "key" value in "query" 1778 * and "entry". If they are not equal, then the appropriate 1779 * search direction is returned. Else, we continue by 1780 * comparing "type". 1781 */ 1782 if (query_common->tdb_key < entry_common->tdb_key) { 1783 TAVOR_TNF_EXIT(tavor_umap_db_compare); 1784 return (-1); 1785 } else if (query_common->tdb_key > entry_common->tdb_key) { 1786 TAVOR_TNF_EXIT(tavor_umap_db_compare); 1787 return (+1); 1788 } 1789 1790 /* 1791 * If the search reaches this point, then "query" and "entry" 1792 * have equal key values. So we continue be comparing their 1793 * "type" values. Again, if they are not equal, then the 1794 * appropriate search direction is returned. Else, we continue 1795 * by comparing "instance". 1796 */ 1797 if (query_common->tdb_type < entry_common->tdb_type) { 1798 TAVOR_TNF_EXIT(tavor_umap_db_compare); 1799 return (-1); 1800 } else if (query_common->tdb_type > entry_common->tdb_type) { 1801 TAVOR_TNF_EXIT(tavor_umap_db_compare); 1802 return (+1); 1803 } 1804 1805 /* 1806 * If the search reaches this point, then "query" and "entry" 1807 * have exactly the same key and type values. Now we consult 1808 * the "flags" field in the query to determine whether the 1809 * "instance" is relevant to the search. If the 1810 * TAVOR_UMAP_DB_IGNORE_INSTANCE flags is set, then return 1811 * success (0) here. Otherwise, continue the search by comparing 1812 * instance values and returning the appropriate search direction. 1813 */ 1814 if (query_flags & TAVOR_UMAP_DB_IGNORE_INSTANCE) { 1815 TAVOR_TNF_EXIT(tavor_umap_db_compare); 1816 return (0); 1817 } 1818 1819 /* 1820 * If the search has reached this point, then "query" and "entry" 1821 * can only be differentiated by their instance values. If these 1822 * are not equal, then return the appropriate search direction. 1823 * Else, we return success (0). 1824 */ 1825 if (query_common->tdb_instance < entry_common->tdb_instance) { 1826 TAVOR_TNF_EXIT(tavor_umap_db_compare); 1827 return (-1); 1828 } else if (query_common->tdb_instance > entry_common->tdb_instance) { 1829 TAVOR_TNF_EXIT(tavor_umap_db_compare); 1830 return (+1); 1831 } 1832 1833 /* Everything matches... so return success */ 1834 TAVOR_TNF_EXIT(tavor_umap_db_compare); 1835 return (0); 1836 } 1837 1838 1839 /* 1840 * tavor_umap_db_set_onclose_cb() 1841 * Context: Can be called from user or kernel context. 1842 */ 1843 int 1844 tavor_umap_db_set_onclose_cb(dev_t dev, uint64_t flag, 1845 void (*callback)(void *), void *arg) 1846 { 1847 tavor_umap_db_priv_t *priv; 1848 tavor_umap_db_entry_t *umapdb; 1849 minor_t instance; 1850 uint64_t value; 1851 int status; 1852 1853 TAVOR_TNF_ENTER(tavor_umap_db_set_onclose_cb); 1854 1855 instance = TAVOR_DEV_INSTANCE(dev); 1856 if (instance == -1) { 1857 TNF_PROBE_0(tavor_umap_db_set_onclose_cb_inst_fail, 1858 TAVOR_TNF_ERROR, ""); 1859 TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb); 1860 return (DDI_FAILURE); 1861 } 1862 1863 if (flag != TAVOR_ONCLOSE_FLASH_INPROGRESS) { 1864 TNF_PROBE_0(tavor_umap_db_set_onclose_cb_invflag_fail, 1865 TAVOR_TNF_ERROR, ""); 1866 TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb); 1867 return (DDI_FAILURE); 1868 } 1869 1870 /* 1871 * Grab the lock for the "userland resources database" and find 1872 * the entry corresponding to this minor number. Once it's found, 1873 * allocate (if necessary) and add an entry (in the "tdb_priv" 1874 * field) to indicate that further processing may be needed during 1875 * Tavor's close() handling. 1876 */ 1877 mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1878 status = tavor_umap_db_find_nolock(instance, dev, 1879 MLNX_UMAP_PID_RSRC, &value, 0, &umapdb); 1880 if (status != DDI_SUCCESS) { 1881 TNF_PROBE_0(tavor_umap_db_set_onclose_cb_find_fail, 1882 TAVOR_TNF_ERROR, ""); 1883 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1884 TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb); 1885 return (DDI_FAILURE); 1886 } 1887 1888 priv = (tavor_umap_db_priv_t *)umapdb->tdbe_common.tdb_priv; 1889 if (priv == NULL) { 1890 priv = (tavor_umap_db_priv_t *)kmem_zalloc( 1891 sizeof (tavor_umap_db_priv_t), KM_NOSLEEP); 1892 if (priv == NULL) { 1893 TNF_PROBE_0(tavor_umap_db_set_onclose_cb_kmz_fail, 1894 TAVOR_TNF_ERROR, ""); 1895 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1896 TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb); 1897 return (DDI_FAILURE); 1898 } 1899 } 1900 1901 /* 1902 * Save away the callback and argument to be used during Tavor's 1903 * close() processing. 1904 */ 1905 priv->tdp_cb = callback; 1906 priv->tdp_arg = arg; 1907 1908 umapdb->tdbe_common.tdb_priv = (void *)priv; 1909 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1910 1911 TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb); 1912 return (DDI_SUCCESS); 1913 } 1914 1915 1916 /* 1917 * tavor_umap_db_clear_onclose_cb() 1918 * Context: Can be called from user or kernel context. 1919 */ 1920 int 1921 tavor_umap_db_clear_onclose_cb(dev_t dev, uint64_t flag) 1922 { 1923 tavor_umap_db_priv_t *priv; 1924 tavor_umap_db_entry_t *umapdb; 1925 minor_t instance; 1926 uint64_t value; 1927 int status; 1928 1929 TAVOR_TNF_ENTER(tavor_umap_db_set_onclose_cb); 1930 1931 instance = TAVOR_DEV_INSTANCE(dev); 1932 if (instance == -1) { 1933 TNF_PROBE_0(tavor_umap_db_clear_onclose_cb_inst_fail, 1934 TAVOR_TNF_ERROR, ""); 1935 TAVOR_TNF_EXIT(tavor_umap_db_clear_onclose_cb); 1936 return (DDI_FAILURE); 1937 } 1938 1939 if (flag != TAVOR_ONCLOSE_FLASH_INPROGRESS) { 1940 TNF_PROBE_0(tavor_umap_db_clear_onclose_cb_invflag_fail, 1941 TAVOR_TNF_ERROR, ""); 1942 TAVOR_TNF_EXIT(tavor_umap_db_clear_onclose_cb); 1943 return (DDI_FAILURE); 1944 } 1945 1946 /* 1947 * Grab the lock for the "userland resources database" and find 1948 * the entry corresponding to this minor number. Once it's found, 1949 * remove the entry (in the "tdb_priv" field) that indicated the 1950 * need for further processing during Tavor's close(). Free the 1951 * entry, if appropriate. 1952 */ 1953 mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1954 status = tavor_umap_db_find_nolock(instance, dev, 1955 MLNX_UMAP_PID_RSRC, &value, 0, &umapdb); 1956 if (status != DDI_SUCCESS) { 1957 TNF_PROBE_0(tavor_umap_db_clear_onclose_cb_find_fail, 1958 TAVOR_TNF_ERROR, ""); 1959 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1960 TAVOR_TNF_EXIT(tavor_umap_db_clear_onclose_cb); 1961 return (DDI_FAILURE); 1962 } 1963 1964 priv = (tavor_umap_db_priv_t *)umapdb->tdbe_common.tdb_priv; 1965 if (priv != NULL) { 1966 kmem_free(priv, sizeof (tavor_umap_db_priv_t)); 1967 priv = NULL; 1968 } 1969 1970 umapdb->tdbe_common.tdb_priv = (void *)priv; 1971 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1972 return (DDI_SUCCESS); 1973 } 1974 1975 1976 /* 1977 * tavor_umap_db_clear_onclose_cb() 1978 * Context: Can be called from user or kernel context. 1979 */ 1980 void 1981 tavor_umap_db_handle_onclose_cb(tavor_umap_db_priv_t *priv) 1982 { 1983 void (*callback)(void *); 1984 1985 ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock)); 1986 1987 /* 1988 * Call the callback. 1989 * Note: Currently there is only one callback (in "tdp_cb"), but 1990 * in the future there may be more, depending on what other types 1991 * of interaction there are between userland processes and the 1992 * driver. 1993 */ 1994 callback = priv->tdp_cb; 1995 callback(priv->tdp_arg); 1996 } 1997