1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2017 by Delphix. All rights reserved. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/t_lock.h> 31 #include <sys/param.h> 32 #include <sys/conf.h> 33 #include <sys/systm.h> 34 #include <sys/sysmacros.h> 35 #include <sys/buf.h> 36 #include <sys/cred.h> 37 #include <sys/user.h> 38 #include <sys/stat.h> 39 #include <sys/uio.h> 40 #include <sys/vnode.h> 41 #include <sys/fs/snode.h> 42 #include <sys/open.h> 43 #include <sys/kmem.h> 44 #include <sys/file.h> 45 #include <sys/debug.h> 46 #include <sys/tnf_probe.h> 47 48 /* Don't #include <sys/ddi.h> - it #undef's getmajor() */ 49 50 #include <sys/sunddi.h> 51 #include <sys/sunndi.h> 52 #include <sys/sunpm.h> 53 #include <sys/ddi_impldefs.h> 54 #include <sys/ndi_impldefs.h> 55 #include <sys/esunddi.h> 56 #include <sys/autoconf.h> 57 #include <sys/modctl.h> 58 #include <sys/epm.h> 59 #include <sys/dacf.h> 60 #include <sys/sunmdi.h> 61 #include <sys/instance.h> 62 #include <sys/sdt.h> 63 64 static void i_attach_ctlop(dev_info_t *, ddi_attach_cmd_t, ddi_pre_post_t, int); 65 static void i_detach_ctlop(dev_info_t *, ddi_detach_cmd_t, ddi_pre_post_t, int); 66 67 /* decide what to do when a double dev_lclose is detected */ 68 #ifdef DEBUG 69 int dev_lclose_ce = CE_PANIC; 70 #else /* DEBUG */ 71 int dev_lclose_ce = CE_WARN; 72 #endif /* DEBUG */ 73 74 /* 75 * Configuration-related entry points for nexus and leaf drivers 76 */ 77 int 78 devi_identify(dev_info_t *devi) 79 { 80 struct dev_ops *ops; 81 int (*fn)(dev_info_t *); 82 83 if ((ops = ddi_get_driver(devi)) == NULL || 84 (fn = ops->devo_identify) == NULL) 85 return (-1); 86 87 return ((*fn)(devi)); 88 } 89 90 int 91 devi_probe(dev_info_t *devi) 92 { 93 int rv, probe_failed; 94 pm_ppm_cookie_t ppm_cookie; 95 struct dev_ops *ops; 96 int (*fn)(dev_info_t *); 97 98 ops = ddi_get_driver(devi); 99 ASSERT(ops); 100 101 pm_pre_probe(devi, &ppm_cookie); 102 103 /* 104 * probe(9E) in 2.0 implies that you can get 105 * away with not writing one of these .. so we 106 * pretend we're 'nulldev' if we don't find one (sigh). 107 */ 108 if ((fn = ops->devo_probe) == NULL) { 109 if (ddi_dev_is_sid(devi) == DDI_SUCCESS) 110 rv = DDI_PROBE_DONTCARE; 111 else 112 rv = DDI_PROBE_FAILURE; 113 } else 114 rv = (*fn)(devi); 115 116 switch (rv) { 117 case DDI_PROBE_DONTCARE: 118 case DDI_PROBE_SUCCESS: 119 probe_failed = 0; 120 break; 121 default: 122 probe_failed = 1; 123 break; 124 } 125 pm_post_probe(&ppm_cookie, rv, probe_failed); 126 127 return (rv); 128 } 129 130 131 /* 132 * devi_attach() 133 * attach a device instance to the system if the driver supplies an 134 * attach(9E) entrypoint. 135 */ 136 int 137 devi_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 138 { 139 struct dev_ops *ops; 140 int error; 141 int (*fn)(dev_info_t *, ddi_attach_cmd_t); 142 pm_ppm_cookie_t pc; 143 144 if ((error = mdi_pre_attach(devi, cmd)) != DDI_SUCCESS) { 145 return (error); 146 } 147 148 pm_pre_attach(devi, &pc, cmd); 149 150 if ((cmd == DDI_RESUME || cmd == DDI_PM_RESUME) && 151 e_ddi_parental_suspend_resume(devi)) { 152 error = e_ddi_resume(devi, cmd); 153 goto done; 154 } 155 ops = ddi_get_driver(devi); 156 ASSERT(ops); 157 if ((fn = ops->devo_attach) == NULL) { 158 error = DDI_FAILURE; 159 goto done; 160 } 161 162 /* 163 * Call the driver's attach(9e) entrypoint 164 */ 165 i_attach_ctlop(devi, cmd, DDI_PRE, 0); 166 error = (*fn)(devi, cmd); 167 i_attach_ctlop(devi, cmd, DDI_POST, error); 168 169 done: 170 pm_post_attach(&pc, error); 171 mdi_post_attach(devi, cmd, error); 172 173 return (error); 174 } 175 176 /* 177 * devi_detach() 178 * detach a device instance from the system if the driver supplies a 179 * detach(9E) entrypoint. 180 */ 181 int 182 devi_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 183 { 184 struct dev_ops *ops; 185 int error; 186 int (*fn)(dev_info_t *, ddi_detach_cmd_t); 187 pm_ppm_cookie_t pc; 188 189 ASSERT(cmd == DDI_SUSPEND || cmd == DDI_PM_SUSPEND || 190 cmd == DDI_DETACH); 191 192 if ((cmd == DDI_SUSPEND || cmd == DDI_PM_SUSPEND) && 193 e_ddi_parental_suspend_resume(devi)) { 194 return (e_ddi_suspend(devi, cmd)); 195 } 196 ops = ddi_get_driver(devi); 197 ASSERT(ops); 198 if ((fn = ops->devo_detach) == NULL) 199 return (DDI_FAILURE); 200 201 if ((error = mdi_pre_detach(devi, cmd)) != DDI_SUCCESS) { 202 return (error); 203 } 204 i_detach_ctlop(devi, cmd, DDI_PRE, 0); 205 pm_pre_detach(devi, cmd, &pc); 206 207 /* 208 * Call the driver's detach routine 209 */ 210 error = (*fn)(devi, cmd); 211 212 pm_post_detach(&pc, error); 213 i_detach_ctlop(devi, cmd, DDI_POST, error); 214 mdi_post_detach(devi, cmd, error); 215 216 return (error); 217 } 218 219 static void 220 i_attach_ctlop(dev_info_t *devi, ddi_attach_cmd_t cmd, ddi_pre_post_t w, 221 int ret) 222 { 223 int error; 224 struct attachspec as; 225 dev_info_t *pdip = ddi_get_parent(devi); 226 227 as.cmd = cmd; 228 as.when = w; 229 as.pdip = pdip; 230 as.result = ret; 231 (void) ddi_ctlops(devi, devi, DDI_CTLOPS_ATTACH, &as, &error); 232 } 233 234 static void 235 i_detach_ctlop(dev_info_t *devi, ddi_detach_cmd_t cmd, ddi_pre_post_t w, 236 int ret) 237 { 238 int error; 239 struct detachspec ds; 240 dev_info_t *pdip = ddi_get_parent(devi); 241 242 ds.cmd = cmd; 243 ds.when = w; 244 ds.pdip = pdip; 245 ds.result = ret; 246 (void) ddi_ctlops(devi, devi, DDI_CTLOPS_DETACH, &ds, &error); 247 } 248 249 /* 250 * This entry point not defined by Solaris 2.0 DDI/DKI, so 251 * its inclusion here is somewhat moot. 252 */ 253 int 254 devi_reset(dev_info_t *devi, ddi_reset_cmd_t cmd) 255 { 256 struct dev_ops *ops; 257 int (*fn)(dev_info_t *, ddi_reset_cmd_t); 258 259 if ((ops = ddi_get_driver(devi)) == NULL || 260 (fn = ops->devo_reset) == NULL) 261 return (DDI_FAILURE); 262 263 return ((*fn)(devi, cmd)); 264 } 265 266 int 267 devi_quiesce(dev_info_t *devi) 268 { 269 struct dev_ops *ops; 270 int (*fn)(dev_info_t *); 271 272 if (((ops = ddi_get_driver(devi)) == NULL) || 273 (ops->devo_rev < 4) || ((fn = ops->devo_quiesce) == NULL)) 274 return (DDI_FAILURE); 275 276 return ((*fn)(devi)); 277 } 278 279 /* 280 * Leaf driver entry points. The following [cb]dev_* functions are *not* part 281 * of the DDI, please use functions defined in <sys/sunldi.h> and driver_lyr.c. 282 */ 283 int 284 dev_open(dev_t *devp, int flag, int type, struct cred *cred) 285 { 286 struct cb_ops *cb; 287 288 cb = devopsp[getmajor(*devp)]->devo_cb_ops; 289 return ((*cb->cb_open)(devp, flag, type, cred)); 290 } 291 292 int 293 dev_close(dev_t dev, int flag, int type, struct cred *cred) 294 { 295 struct cb_ops *cb; 296 297 cb = (devopsp[getmajor(dev)])->devo_cb_ops; 298 return ((*cb->cb_close)(dev, flag, type, cred)); 299 } 300 301 /* 302 * New Leaf driver open entry point. We make a vnode and go through specfs 303 * in order to obtain open close exclusions guarantees. Note that we drop 304 * OTYP_LYR if it was specified - we are going through specfs and it provides 305 * last close semantics (FKLYR is provided to open(9E)). Also, since 306 * spec_open will drive attach via e_ddi_hold_devi_by_dev for a makespecvp 307 * vnode with no SDIP_SET on the common snode, the dev_lopen caller no longer 308 * needs to call ddi_hold_installed_driver. 309 */ 310 int 311 dev_lopen(dev_t *devp, int flag, int otype, struct cred *cred) 312 { 313 struct vnode *vp; 314 int error; 315 struct vnode *cvp; 316 317 vp = makespecvp(*devp, (otype == OTYP_BLK) ? VBLK : VCHR); 318 error = VOP_OPEN(&vp, flag | FKLYR, cred, NULL); 319 if (error == 0) { 320 /* Pick up the (possibly) new dev_t value. */ 321 *devp = vp->v_rdev; 322 323 /* 324 * Place extra hold on the common vnode, which contains the 325 * open count, so that it is not destroyed by the VN_RELE of 326 * the shadow makespecvp vnode below. 327 */ 328 cvp = STOV(VTOCS(vp)); 329 VN_HOLD(cvp); 330 } 331 332 /* release the shadow makespecvp vnode. */ 333 VN_RELE(vp); 334 return (error); 335 } 336 337 /* 338 * Leaf driver close entry point. We make a vnode and go through specfs in 339 * order to obtain open close exclusions guarantees. Note that we drop 340 * OTYP_LYR if it was specified - we are going through specfs and it provides 341 * last close semantics (FLKYR is provided to close(9E)). 342 */ 343 int 344 dev_lclose(dev_t dev, int flag, int otype, struct cred *cred) 345 { 346 struct vnode *vp; 347 int error; 348 struct vnode *cvp; 349 char *funcname; 350 ulong_t offset; 351 352 vp = makespecvp(dev, (otype == OTYP_BLK) ? VBLK : VCHR); 353 error = VOP_CLOSE(vp, flag | FKLYR, 1, (offset_t)0, cred, NULL); 354 355 /* 356 * Release the extra dev_lopen hold on the common vnode. We inline a 357 * VN_RELE(cvp) call so that we can detect more dev_lclose calls than 358 * dev_lopen calls without panic. See vn_rele. If our inline of 359 * vn_rele called VOP_INACTIVE(cvp, CRED(), ...) we would panic on the 360 * "release the makespecvp vnode" VN_RELE(vp) that follows - so 361 * instead we diagnose this situation. Note that the driver has 362 * still seen a double close(9E), but that would have occurred with 363 * the old dev_close implementation too. 364 */ 365 cvp = STOV(VTOCS(vp)); 366 mutex_enter(&cvp->v_lock); 367 switch (cvp->v_count) { 368 default: 369 VN_RELE_LOCKED(cvp); 370 break; 371 372 case 0: 373 VTOS(vp)->s_commonvp = NULL; /* avoid panic */ 374 /*FALLTHROUGH*/ 375 case 1: 376 /* 377 * The following message indicates a serious problem in the 378 * identified driver, the driver should be fixed. If obtaining 379 * a panic dump is needed to diagnose the driver problem then 380 * adding "set dev_lclose_ce=3" to /etc/system will cause a 381 * panic when this occurs. 382 */ 383 funcname = modgetsymname((uintptr_t)caller(), &offset); 384 cmn_err(dev_lclose_ce, "dev_lclose: extra close of dev_t 0x%lx " 385 "from %s`%s()", dev, mod_containing_pc(caller()), 386 funcname ? funcname : "unknown..."); 387 break; 388 } 389 mutex_exit(&cvp->v_lock); 390 391 /* release the makespecvp vnode. */ 392 VN_RELE(vp); 393 return (error); 394 } 395 396 /* 397 * Returns -1 or the instance number of the given dev_t as 398 * interpreted by the device driver. The code may load the driver 399 * but it does not attach any instances. 400 * 401 * Instance is supposed to be a int but drivers have assumed that 402 * the pointer was a pointer to "void *" instead of a pointer to 403 * "int *" so we now explicitly pass a pointer to "void *" and then 404 * cast the result to an int when returning the value. 405 */ 406 int 407 dev_to_instance(dev_t dev) 408 { 409 major_t major = getmajor(dev); 410 struct dev_ops *ops; 411 void *vinstance; 412 int error; 413 414 /* verify that the driver is loaded */ 415 if ((ops = mod_hold_dev_by_major(major)) == NULL) 416 return (-1); 417 ASSERT(CB_DRV_INSTALLED(ops)); 418 419 /* verify that it supports the getinfo(9E) entry point */ 420 if (ops->devo_getinfo == NULL) { 421 mod_rele_dev_by_major(major); 422 return (-1); 423 } 424 425 /* ask the driver to extract the instance number from the devt */ 426 error = (*ops->devo_getinfo)(NULL, DDI_INFO_DEVT2INSTANCE, 427 (void *)dev, &vinstance); 428 429 /* release the driver */ 430 mod_rele_dev_by_major(major); 431 432 if (error != DDI_SUCCESS) 433 return (-1); 434 435 return ((int)(uintptr_t)vinstance); 436 } 437 438 static void 439 bdev_strategy_tnf_probe(struct buf *bp) 440 { 441 /* Kernel probe */ 442 TNF_PROBE_5(strategy, "io blockio", /* CSTYLED */, 443 tnf_device, device, bp->b_edev, 444 tnf_diskaddr, block, bp->b_lblkno, 445 tnf_size, size, bp->b_bcount, 446 tnf_opaque, buf, bp, 447 tnf_bioflags, flags, bp->b_flags); 448 } 449 450 int 451 bdev_strategy(struct buf *bp) 452 { 453 struct dev_ops *ops; 454 455 ops = devopsp[getmajor(bp->b_edev)]; 456 457 /* 458 * Before we hit the io:::start probe, we need to fill in the b_dip 459 * field of the buf structure. This should be -- for the most part -- 460 * incredibly cheap. If you're in this code looking to bum cycles, 461 * there is almost certainly bigger game further down the I/O path... 462 */ 463 (void) ops->devo_getinfo(NULL, DDI_INFO_DEVT2DEVINFO, 464 (void *)bp->b_edev, (void **)&bp->b_dip); 465 466 DTRACE_IO1(start, struct buf *, bp); 467 bp->b_flags |= B_STARTED; 468 469 /* 470 * Call the TNF probe here instead of the inline code 471 * to force our compiler to use the tail call optimization. 472 */ 473 bdev_strategy_tnf_probe(bp); 474 475 return (ops->devo_cb_ops->cb_strategy(bp)); 476 } 477 478 int 479 bdev_print(dev_t dev, caddr_t str) 480 { 481 struct cb_ops *cb; 482 483 cb = devopsp[getmajor(dev)]->devo_cb_ops; 484 return ((*cb->cb_print)(dev, str)); 485 } 486 487 /* 488 * Return number of DEV_BSIZE byte blocks. 489 */ 490 int 491 bdev_size(dev_t dev) 492 { 493 uint_t nblocks; 494 uint_t blksize; 495 496 if ((nblocks = e_ddi_getprop(dev, VBLK, "nblocks", 497 DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, -1)) == -1) 498 return (-1); 499 500 /* Get blksize, default to DEV_BSIZE */ 501 if ((blksize = e_ddi_getprop(dev, VBLK, "blksize", 502 DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, -1)) == -1) 503 blksize = e_ddi_getprop(DDI_DEV_T_ANY, VBLK, "device-blksize", 504 DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, DEV_BSIZE); 505 506 if (blksize >= DEV_BSIZE) 507 return (nblocks * (blksize / DEV_BSIZE)); 508 else 509 return (nblocks / (DEV_BSIZE / blksize)); 510 } 511 512 /* 513 * Same for 64-bit Nblocks property 514 */ 515 uint64_t 516 bdev_Size(dev_t dev) 517 { 518 uint64_t nblocks; 519 uint_t blksize; 520 521 if ((nblocks = e_ddi_getprop_int64(dev, VBLK, "Nblocks", 522 DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, -1)) == -1) 523 return (-1); 524 525 /* Get blksize, default to DEV_BSIZE */ 526 if ((blksize = e_ddi_getprop(dev, VBLK, "blksize", 527 DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, -1)) == -1) 528 blksize = e_ddi_getprop(DDI_DEV_T_ANY, VBLK, "device-blksize", 529 DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, DEV_BSIZE); 530 531 if (blksize >= DEV_BSIZE) 532 return (nblocks * (blksize / DEV_BSIZE)); 533 else 534 return (nblocks / (DEV_BSIZE / blksize)); 535 } 536 537 int 538 bdev_dump(dev_t dev, caddr_t addr, daddr_t blkno, int blkcnt) 539 { 540 struct cb_ops *cb; 541 542 cb = devopsp[getmajor(dev)]->devo_cb_ops; 543 return ((*cb->cb_dump)(dev, addr, blkno, blkcnt)); 544 } 545 546 int 547 cdev_read(dev_t dev, struct uio *uiop, struct cred *cred) 548 { 549 struct cb_ops *cb; 550 551 cb = devopsp[getmajor(dev)]->devo_cb_ops; 552 return ((*cb->cb_read)(dev, uiop, cred)); 553 } 554 555 int 556 cdev_write(dev_t dev, struct uio *uiop, struct cred *cred) 557 { 558 struct cb_ops *cb; 559 560 cb = devopsp[getmajor(dev)]->devo_cb_ops; 561 return ((*cb->cb_write)(dev, uiop, cred)); 562 } 563 564 int 565 cdev_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, struct cred *cred, 566 int *rvalp) 567 { 568 struct cb_ops *cb; 569 570 cb = devopsp[getmajor(dev)]->devo_cb_ops; 571 return ((*cb->cb_ioctl)(dev, cmd, arg, mode, cred, rvalp)); 572 } 573 574 int 575 cdev_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len, 576 size_t *maplen, uint_t mode) 577 { 578 struct cb_ops *cb; 579 580 cb = devopsp[getmajor(dev)]->devo_cb_ops; 581 return ((*cb->cb_devmap)(dev, dhp, off, len, maplen, mode)); 582 } 583 584 int 585 cdev_mmap(int (*mapfunc)(dev_t, off_t, int), dev_t dev, off_t off, int prot) 586 { 587 return ((*mapfunc)(dev, off, prot)); 588 } 589 590 int 591 cdev_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len, 592 uint_t prot, uint_t maxprot, uint_t flags, cred_t *credp) 593 { 594 struct cb_ops *cb; 595 596 cb = devopsp[getmajor(dev)]->devo_cb_ops; 597 return ((*cb->cb_segmap)(dev, off, as, addrp, 598 len, prot, maxprot, flags, credp)); 599 } 600 601 int 602 cdev_poll(dev_t dev, short events, int anyyet, short *reventsp, 603 struct pollhead **pollhdrp) 604 { 605 struct cb_ops *cb; 606 607 cb = devopsp[getmajor(dev)]->devo_cb_ops; 608 return ((*cb->cb_chpoll)(dev, events, anyyet, reventsp, pollhdrp)); 609 } 610 611 /* 612 * A 'size' property can be provided by a VCHR device. 613 * 614 * Since it's defined as zero for STREAMS devices, so we avoid the 615 * overhead of looking it up. Note also that we don't force an 616 * unused driver into memory simply to ask about it's size. We also 617 * don't bother to ask it its size unless it's already been attached 618 * (the attach routine is the earliest place the property will be created) 619 * 620 * XXX In an ideal world, we'd call this at VOP_GETATTR() time. 621 */ 622 int 623 cdev_size(dev_t dev) 624 { 625 major_t maj; 626 struct devnames *dnp; 627 628 if ((maj = getmajor(dev)) >= devcnt) 629 return (0); 630 631 dnp = &(devnamesp[maj]); 632 LOCK_DEV_OPS(&dnp->dn_lock); 633 if (devopsp[maj] && devopsp[maj]->devo_cb_ops && 634 !devopsp[maj]->devo_cb_ops->cb_str) { 635 UNLOCK_DEV_OPS(&dnp->dn_lock); 636 return (e_ddi_getprop(dev, VCHR, "size", 637 DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, 0)); 638 } 639 UNLOCK_DEV_OPS(&dnp->dn_lock); 640 return (0); 641 } 642 643 /* 644 * same for 64-bit Size property 645 */ 646 uint64_t 647 cdev_Size(dev_t dev) 648 { 649 major_t maj; 650 struct devnames *dnp; 651 652 if ((maj = getmajor(dev)) >= devcnt) 653 return (0); 654 655 dnp = &(devnamesp[maj]); 656 LOCK_DEV_OPS(&dnp->dn_lock); 657 if (devopsp[maj] && devopsp[maj]->devo_cb_ops && 658 !devopsp[maj]->devo_cb_ops->cb_str) { 659 UNLOCK_DEV_OPS(&dnp->dn_lock); 660 return (e_ddi_getprop_int64(dev, VCHR, "Size", 661 DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, 0)); 662 } 663 UNLOCK_DEV_OPS(&dnp->dn_lock); 664 return (0); 665 } 666 667 /* 668 * XXX This routine is poorly named, because block devices can and do 669 * have properties (see bdev_size() above). 670 * 671 * XXX fix the comment in devops.h that claims that cb_prop_op 672 * is character-only. 673 */ 674 int 675 cdev_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 676 char *name, caddr_t valuep, int *lengthp) 677 { 678 struct cb_ops *cb; 679 680 if ((cb = devopsp[DEVI(dip)->devi_major]->devo_cb_ops) == NULL) 681 return (DDI_PROP_NOT_FOUND); 682 683 return ((*cb->cb_prop_op)(dev, dip, prop_op, mod_flags, 684 name, valuep, lengthp)); 685 } 686