1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2011, 2012 Nexenta Systems, Inc. All rights reserved. 24 * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/ksynch.h> 29 #include <sys/kmem.h> 30 #include <sys/file.h> 31 #include <sys/errno.h> 32 #include <sys/open.h> 33 #include <sys/buf.h> 34 #include <sys/uio.h> 35 #include <sys/aio_req.h> 36 #include <sys/cred.h> 37 #include <sys/modctl.h> 38 #include <sys/cmlb.h> 39 #include <sys/conf.h> 40 #include <sys/devops.h> 41 #include <sys/list.h> 42 #include <sys/sysmacros.h> 43 #include <sys/dkio.h> 44 #include <sys/vtoc.h> 45 #include <sys/scsi/scsi.h> /* for DTYPE_DIRECT */ 46 #include <sys/kstat.h> 47 #include <sys/fs/dv_node.h> 48 #include <sys/ddi.h> 49 #include <sys/sunddi.h> 50 #include <sys/note.h> 51 #include <sys/blkdev.h> 52 53 #define BD_MAXPART 64 54 #define BDINST(dev) (getminor(dev) / BD_MAXPART) 55 #define BDPART(dev) (getminor(dev) % BD_MAXPART) 56 57 typedef struct bd bd_t; 58 typedef struct bd_xfer_impl bd_xfer_impl_t; 59 60 struct bd { 61 void *d_private; 62 dev_info_t *d_dip; 63 kmutex_t d_ocmutex; 64 kmutex_t d_iomutex; 65 kmutex_t d_statemutex; 66 kcondvar_t d_statecv; 67 enum dkio_state d_state; 68 cmlb_handle_t d_cmlbh; 69 unsigned d_open_lyr[BD_MAXPART]; /* open count */ 70 uint64_t d_open_excl; /* bit mask indexed by partition */ 71 uint64_t d_open_reg[OTYPCNT]; /* bit mask */ 72 73 uint32_t d_qsize; 74 uint32_t d_qactive; 75 uint32_t d_maxxfer; 76 uint32_t d_blkshift; 77 uint64_t d_numblks; 78 ddi_devid_t d_devid; 79 80 kmem_cache_t *d_cache; 81 list_t d_runq; 82 list_t d_waitq; 83 kstat_t *d_ksp; 84 kstat_io_t *d_kiop; 85 86 boolean_t d_rdonly; 87 boolean_t d_removable; 88 boolean_t d_hotpluggable; 89 boolean_t d_use_dma; 90 91 ddi_dma_attr_t d_dma; 92 bd_ops_t d_ops; 93 bd_handle_t d_handle; 94 }; 95 96 struct bd_handle { 97 bd_ops_t h_ops; 98 ddi_dma_attr_t *h_dma; 99 dev_info_t *h_parent; 100 dev_info_t *h_child; 101 void *h_private; 102 bd_t *h_bd; 103 char *h_name; 104 char h_addr[20]; /* enough for %X,%X */ 105 }; 106 107 struct bd_xfer_impl { 108 bd_xfer_t i_public; 109 list_node_t i_linkage; 110 bd_t *i_bd; 111 buf_t *i_bp; 112 uint_t i_num_win; 113 uint_t i_cur_win; 114 off_t i_offset; 115 int (*i_func)(void *, bd_xfer_t *); 116 uint32_t i_blkshift; 117 size_t i_len; 118 size_t i_resid; 119 }; 120 121 #define i_dmah i_public.x_dmah 122 #define i_dmac i_public.x_dmac 123 #define i_ndmac i_public.x_ndmac 124 #define i_kaddr i_public.x_kaddr 125 #define i_nblks i_public.x_nblks 126 #define i_blkno i_public.x_blkno 127 #define i_flags i_public.x_flags 128 129 130 /* 131 * Private prototypes. 132 */ 133 134 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 135 static int bd_attach(dev_info_t *, ddi_attach_cmd_t); 136 static int bd_detach(dev_info_t *, ddi_detach_cmd_t); 137 138 static int bd_open(dev_t *, int, int, cred_t *); 139 static int bd_close(dev_t, int, int, cred_t *); 140 static int bd_strategy(struct buf *); 141 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 142 static int bd_dump(dev_t, caddr_t, daddr_t, int); 143 static int bd_read(dev_t, struct uio *, cred_t *); 144 static int bd_write(dev_t, struct uio *, cred_t *); 145 static int bd_aread(dev_t, struct aio_req *, cred_t *); 146 static int bd_awrite(dev_t, struct aio_req *, cred_t *); 147 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, 148 caddr_t, int *); 149 150 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, 151 void *); 152 static int bd_tg_getinfo(dev_info_t *, int, void *, void *); 153 static int bd_xfer_ctor(void *, void *, int); 154 static void bd_xfer_dtor(void *, void *); 155 static void bd_sched(bd_t *); 156 static void bd_submit(bd_t *, bd_xfer_impl_t *); 157 static void bd_runq_exit(bd_xfer_impl_t *, int); 158 static void bd_update_state(bd_t *); 159 static int bd_check_state(bd_t *, enum dkio_state *); 160 static int bd_flush_write_cache(bd_t *, struct dk_callback *); 161 162 struct cmlb_tg_ops bd_tg_ops = { 163 TG_DK_OPS_VERSION_1, 164 bd_tg_rdwr, 165 bd_tg_getinfo, 166 }; 167 168 static struct cb_ops bd_cb_ops = { 169 bd_open, /* open */ 170 bd_close, /* close */ 171 bd_strategy, /* strategy */ 172 nodev, /* print */ 173 bd_dump, /* dump */ 174 bd_read, /* read */ 175 bd_write, /* write */ 176 bd_ioctl, /* ioctl */ 177 nodev, /* devmap */ 178 nodev, /* mmap */ 179 nodev, /* segmap */ 180 nochpoll, /* poll */ 181 bd_prop_op, /* cb_prop_op */ 182 0, /* streamtab */ 183 D_64BIT | D_MP, /* Driver comaptibility flag */ 184 CB_REV, /* cb_rev */ 185 bd_aread, /* async read */ 186 bd_awrite /* async write */ 187 }; 188 189 struct dev_ops bd_dev_ops = { 190 DEVO_REV, /* devo_rev, */ 191 0, /* refcnt */ 192 bd_getinfo, /* getinfo */ 193 nulldev, /* identify */ 194 nulldev, /* probe */ 195 bd_attach, /* attach */ 196 bd_detach, /* detach */ 197 nodev, /* reset */ 198 &bd_cb_ops, /* driver operations */ 199 NULL, /* bus operations */ 200 NULL, /* power */ 201 ddi_quiesce_not_needed, /* quiesce */ 202 }; 203 204 static struct modldrv modldrv = { 205 &mod_driverops, 206 "Generic Block Device", 207 &bd_dev_ops, 208 }; 209 210 static struct modlinkage modlinkage = { 211 MODREV_1, { &modldrv, NULL } 212 }; 213 214 static void *bd_state; 215 static krwlock_t bd_lock; 216 217 int 218 _init(void) 219 { 220 int rv; 221 222 rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2); 223 if (rv != DDI_SUCCESS) { 224 return (rv); 225 } 226 rw_init(&bd_lock, NULL, RW_DRIVER, NULL); 227 rv = mod_install(&modlinkage); 228 if (rv != DDI_SUCCESS) { 229 rw_destroy(&bd_lock); 230 ddi_soft_state_fini(&bd_state); 231 } 232 return (rv); 233 } 234 235 int 236 _fini(void) 237 { 238 int rv; 239 240 rv = mod_remove(&modlinkage); 241 if (rv == DDI_SUCCESS) { 242 rw_destroy(&bd_lock); 243 ddi_soft_state_fini(&bd_state); 244 } 245 return (rv); 246 } 247 248 int 249 _info(struct modinfo *modinfop) 250 { 251 return (mod_info(&modlinkage, modinfop)); 252 } 253 254 static int 255 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 256 { 257 bd_t *bd; 258 minor_t inst; 259 260 _NOTE(ARGUNUSED(dip)); 261 262 inst = BDINST((dev_t)arg); 263 264 switch (cmd) { 265 case DDI_INFO_DEVT2DEVINFO: 266 bd = ddi_get_soft_state(bd_state, inst); 267 if (bd == NULL) { 268 return (DDI_FAILURE); 269 } 270 *resultp = (void *)bd->d_dip; 271 break; 272 273 case DDI_INFO_DEVT2INSTANCE: 274 *resultp = (void *)(intptr_t)inst; 275 break; 276 277 default: 278 return (DDI_FAILURE); 279 } 280 return (DDI_SUCCESS); 281 } 282 283 static int 284 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 285 { 286 int inst; 287 bd_handle_t hdl; 288 bd_t *bd; 289 bd_drive_t drive; 290 int rv; 291 char name[16]; 292 char kcache[32]; 293 294 switch (cmd) { 295 case DDI_ATTACH: 296 break; 297 case DDI_RESUME: 298 /* We don't do anything native for suspend/resume */ 299 return (DDI_SUCCESS); 300 default: 301 return (DDI_FAILURE); 302 } 303 304 inst = ddi_get_instance(dip); 305 hdl = ddi_get_parent_data(dip); 306 307 (void) snprintf(name, sizeof (name), "%s%d", 308 ddi_driver_name(dip), ddi_get_instance(dip)); 309 (void) snprintf(kcache, sizeof (kcache), "%s_xfer", name); 310 311 if (hdl == NULL) { 312 cmn_err(CE_WARN, "%s: missing parent data!", name); 313 return (DDI_FAILURE); 314 } 315 316 if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) { 317 cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name); 318 return (DDI_FAILURE); 319 } 320 bd = ddi_get_soft_state(bd_state, inst); 321 322 if (hdl->h_dma) { 323 bd->d_dma = *(hdl->h_dma); 324 bd->d_dma.dma_attr_granular = 325 max(DEV_BSIZE, bd->d_dma.dma_attr_granular); 326 bd->d_use_dma = B_TRUE; 327 328 if (bd->d_maxxfer && 329 (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) { 330 cmn_err(CE_WARN, 331 "%s: inconsistent maximum transfer size!", 332 name); 333 /* We force it */ 334 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 335 } else { 336 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 337 } 338 } else { 339 bd->d_use_dma = B_FALSE; 340 if (bd->d_maxxfer == 0) { 341 bd->d_maxxfer = 1024 * 1024; 342 } 343 } 344 bd->d_ops = hdl->h_ops; 345 bd->d_private = hdl->h_private; 346 bd->d_blkshift = 9; /* 512 bytes, to start */ 347 348 if (bd->d_maxxfer % DEV_BSIZE) { 349 cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name); 350 bd->d_maxxfer &= ~(DEV_BSIZE - 1); 351 } 352 if (bd->d_maxxfer < DEV_BSIZE) { 353 cmn_err(CE_WARN, "%s: maximum transfer size too small!", name); 354 ddi_soft_state_free(bd_state, inst); 355 return (DDI_FAILURE); 356 } 357 358 bd->d_dip = dip; 359 bd->d_handle = hdl; 360 hdl->h_bd = bd; 361 ddi_set_driver_private(dip, bd); 362 363 mutex_init(&bd->d_iomutex, NULL, MUTEX_DRIVER, NULL); 364 mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL); 365 mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL); 366 cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL); 367 368 list_create(&bd->d_waitq, sizeof (bd_xfer_impl_t), 369 offsetof(struct bd_xfer_impl, i_linkage)); 370 list_create(&bd->d_runq, sizeof (bd_xfer_impl_t), 371 offsetof(struct bd_xfer_impl, i_linkage)); 372 373 bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8, 374 bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0); 375 376 bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk", 377 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT); 378 if (bd->d_ksp != NULL) { 379 bd->d_ksp->ks_lock = &bd->d_iomutex; 380 kstat_install(bd->d_ksp); 381 bd->d_kiop = bd->d_ksp->ks_data; 382 } else { 383 /* 384 * Even if we cannot create the kstat, we create a 385 * scratch kstat. The reason for this is to ensure 386 * that we can update the kstat all of the time, 387 * without adding an extra branch instruction. 388 */ 389 bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP); 390 } 391 392 cmlb_alloc_handle(&bd->d_cmlbh); 393 394 bd->d_state = DKIO_NONE; 395 396 bzero(&drive, sizeof (drive)); 397 bd->d_ops.o_drive_info(bd->d_private, &drive); 398 bd->d_qsize = drive.d_qsize; 399 bd->d_maxxfer = drive.d_maxxfer; 400 bd->d_removable = drive.d_removable; 401 bd->d_hotpluggable = drive.d_hotpluggable; 402 403 rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT, 404 bd->d_removable, bd->d_hotpluggable, 405 drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK, 406 CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0); 407 if (rv != 0) { 408 cmlb_free_handle(&bd->d_cmlbh); 409 kmem_cache_destroy(bd->d_cache); 410 mutex_destroy(&bd->d_iomutex); 411 mutex_destroy(&bd->d_ocmutex); 412 mutex_destroy(&bd->d_statemutex); 413 cv_destroy(&bd->d_statecv); 414 list_destroy(&bd->d_waitq); 415 list_destroy(&bd->d_runq); 416 if (bd->d_ksp != NULL) { 417 kstat_delete(bd->d_ksp); 418 bd->d_ksp = NULL; 419 } else { 420 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 421 } 422 ddi_soft_state_free(bd_state, inst); 423 return (DDI_FAILURE); 424 } 425 426 if (bd->d_ops.o_devid_init != NULL) { 427 rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid); 428 if (rv == DDI_SUCCESS) { 429 if (ddi_devid_register(dip, bd->d_devid) != 430 DDI_SUCCESS) { 431 cmn_err(CE_WARN, 432 "%s: unable to register devid", name); 433 } 434 } 435 } 436 437 /* 438 * Add a zero-length attribute to tell the world we support 439 * kernel ioctls (for layered drivers). Also set up properties 440 * used by HAL to identify removable media. 441 */ 442 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 443 DDI_KERNEL_IOCTL, NULL, 0); 444 if (bd->d_removable) { 445 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 446 "removable-media", NULL, 0); 447 } 448 if (bd->d_hotpluggable) { 449 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 450 "hotpluggable", NULL, 0); 451 } 452 453 ddi_report_dev(dip); 454 455 return (DDI_SUCCESS); 456 } 457 458 static int 459 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 460 { 461 bd_t *bd; 462 463 bd = ddi_get_driver_private(dip); 464 465 switch (cmd) { 466 case DDI_DETACH: 467 break; 468 case DDI_SUSPEND: 469 /* We don't suspend, but our parent does */ 470 return (DDI_SUCCESS); 471 default: 472 return (DDI_FAILURE); 473 } 474 if (bd->d_ksp != NULL) { 475 kstat_delete(bd->d_ksp); 476 bd->d_ksp = NULL; 477 } else { 478 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 479 } 480 cmlb_detach(bd->d_cmlbh, 0); 481 cmlb_free_handle(&bd->d_cmlbh); 482 if (bd->d_devid) 483 ddi_devid_free(bd->d_devid); 484 kmem_cache_destroy(bd->d_cache); 485 mutex_destroy(&bd->d_iomutex); 486 mutex_destroy(&bd->d_ocmutex); 487 mutex_destroy(&bd->d_statemutex); 488 cv_destroy(&bd->d_statecv); 489 list_destroy(&bd->d_waitq); 490 list_destroy(&bd->d_runq); 491 ddi_soft_state_free(bd_state, ddi_get_instance(dip)); 492 return (DDI_SUCCESS); 493 } 494 495 static int 496 bd_xfer_ctor(void *buf, void *arg, int kmflag) 497 { 498 bd_xfer_impl_t *xi; 499 bd_t *bd = arg; 500 int (*dcb)(caddr_t); 501 502 if (kmflag == KM_SLEEP) { 503 dcb = DDI_DMA_SLEEP; 504 } else { 505 dcb = DDI_DMA_DONTWAIT; 506 } 507 508 xi = buf; 509 bzero(xi, sizeof (*xi)); 510 xi->i_bd = bd; 511 512 if (bd->d_use_dma) { 513 if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL, 514 &xi->i_dmah) != DDI_SUCCESS) { 515 return (-1); 516 } 517 } 518 519 return (0); 520 } 521 522 static void 523 bd_xfer_dtor(void *buf, void *arg) 524 { 525 bd_xfer_impl_t *xi = buf; 526 527 _NOTE(ARGUNUSED(arg)); 528 529 if (xi->i_dmah) 530 ddi_dma_free_handle(&xi->i_dmah); 531 xi->i_dmah = NULL; 532 } 533 534 static bd_xfer_impl_t * 535 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *), 536 int kmflag) 537 { 538 bd_xfer_impl_t *xi; 539 int rv; 540 int status; 541 unsigned dir; 542 int (*cb)(caddr_t); 543 size_t len; 544 uint32_t shift; 545 546 if (kmflag == KM_SLEEP) { 547 cb = DDI_DMA_SLEEP; 548 } else { 549 cb = DDI_DMA_DONTWAIT; 550 } 551 552 xi = kmem_cache_alloc(bd->d_cache, kmflag); 553 if (xi == NULL) { 554 bioerror(bp, ENOMEM); 555 return (NULL); 556 } 557 558 ASSERT(bp); 559 560 xi->i_bp = bp; 561 xi->i_func = func; 562 xi->i_blkno = bp->b_lblkno; 563 564 if (bp->b_bcount == 0) { 565 xi->i_len = 0; 566 xi->i_nblks = 0; 567 xi->i_kaddr = NULL; 568 xi->i_resid = 0; 569 xi->i_num_win = 0; 570 goto done; 571 } 572 573 if (bp->b_flags & B_READ) { 574 dir = DDI_DMA_READ; 575 xi->i_func = bd->d_ops.o_read; 576 } else { 577 dir = DDI_DMA_WRITE; 578 xi->i_func = bd->d_ops.o_write; 579 } 580 581 shift = bd->d_blkshift; 582 xi->i_blkshift = shift; 583 584 if (!bd->d_use_dma) { 585 bp_mapin(bp); 586 rv = 0; 587 xi->i_offset = 0; 588 xi->i_num_win = 589 (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer; 590 xi->i_cur_win = 0; 591 xi->i_len = min(bp->b_bcount, bd->d_maxxfer); 592 xi->i_nblks = xi->i_len >> shift; 593 xi->i_kaddr = bp->b_un.b_addr; 594 xi->i_resid = bp->b_bcount; 595 } else { 596 597 /* 598 * We have to use consistent DMA if the address is misaligned. 599 */ 600 if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) && 601 ((uintptr_t)bp->b_un.b_addr & 0x7)) { 602 dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL; 603 } else { 604 dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 605 } 606 607 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb, 608 NULL, &xi->i_dmac, &xi->i_ndmac); 609 switch (status) { 610 case DDI_DMA_MAPPED: 611 xi->i_num_win = 1; 612 xi->i_cur_win = 0; 613 xi->i_offset = 0; 614 xi->i_len = bp->b_bcount; 615 xi->i_nblks = xi->i_len >> shift; 616 xi->i_resid = bp->b_bcount; 617 rv = 0; 618 break; 619 case DDI_DMA_PARTIAL_MAP: 620 xi->i_cur_win = 0; 621 622 if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) != 623 DDI_SUCCESS) || 624 (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset, 625 &len, &xi->i_dmac, &xi->i_ndmac) != 626 DDI_SUCCESS) || 627 (P2PHASE(len, shift) != 0)) { 628 (void) ddi_dma_unbind_handle(xi->i_dmah); 629 rv = EFAULT; 630 goto done; 631 } 632 xi->i_len = len; 633 xi->i_nblks = xi->i_len >> shift; 634 xi->i_resid = bp->b_bcount; 635 rv = 0; 636 break; 637 case DDI_DMA_NORESOURCES: 638 rv = EAGAIN; 639 goto done; 640 case DDI_DMA_TOOBIG: 641 rv = EINVAL; 642 goto done; 643 case DDI_DMA_NOMAPPING: 644 case DDI_DMA_INUSE: 645 default: 646 rv = EFAULT; 647 goto done; 648 } 649 } 650 651 done: 652 if (rv != 0) { 653 kmem_cache_free(bd->d_cache, xi); 654 bioerror(bp, rv); 655 return (NULL); 656 } 657 658 return (xi); 659 } 660 661 static void 662 bd_xfer_free(bd_xfer_impl_t *xi) 663 { 664 if (xi->i_dmah) { 665 (void) ddi_dma_unbind_handle(xi->i_dmah); 666 } 667 kmem_cache_free(xi->i_bd->d_cache, xi); 668 } 669 670 static int 671 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp) 672 { 673 dev_t dev = *devp; 674 bd_t *bd; 675 minor_t part; 676 minor_t inst; 677 uint64_t mask; 678 boolean_t ndelay; 679 int rv; 680 diskaddr_t nblks; 681 diskaddr_t lba; 682 683 _NOTE(ARGUNUSED(credp)); 684 685 part = BDPART(dev); 686 inst = BDINST(dev); 687 688 if (otyp >= OTYPCNT) 689 return (EINVAL); 690 691 ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE; 692 693 /* 694 * Block any DR events from changing the set of registered 695 * devices while we function. 696 */ 697 rw_enter(&bd_lock, RW_READER); 698 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 699 rw_exit(&bd_lock); 700 return (ENXIO); 701 } 702 703 mutex_enter(&bd->d_ocmutex); 704 705 ASSERT(part < 64); 706 mask = (1U << part); 707 708 bd_update_state(bd); 709 710 if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) { 711 712 /* non-blocking opens are allowed to succeed */ 713 if (!ndelay) { 714 rv = ENXIO; 715 goto done; 716 } 717 } else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba, 718 NULL, NULL, 0) == 0) { 719 720 /* 721 * We read the partinfo, verify valid ranges. If the 722 * partition is invalid, and we aren't blocking or 723 * doing a raw access, then fail. (Non-blocking and 724 * raw accesses can still succeed to allow a disk with 725 * bad partition data to opened by format and fdisk.) 726 */ 727 if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) { 728 rv = ENXIO; 729 goto done; 730 } 731 } else if (!ndelay) { 732 /* 733 * cmlb_partinfo failed -- invalid partition or no 734 * disk label. 735 */ 736 rv = ENXIO; 737 goto done; 738 } 739 740 if ((flag & FWRITE) && bd->d_rdonly) { 741 rv = EROFS; 742 goto done; 743 } 744 745 if ((bd->d_open_excl) & (mask)) { 746 rv = EBUSY; 747 goto done; 748 } 749 if (flag & FEXCL) { 750 if (bd->d_open_lyr[part]) { 751 rv = EBUSY; 752 goto done; 753 } 754 for (int i = 0; i < OTYP_LYR; i++) { 755 if (bd->d_open_reg[i] & mask) { 756 rv = EBUSY; 757 goto done; 758 } 759 } 760 } 761 762 if (otyp == OTYP_LYR) { 763 bd->d_open_lyr[part]++; 764 } else { 765 bd->d_open_reg[otyp] |= mask; 766 } 767 if (flag & FEXCL) { 768 bd->d_open_excl |= mask; 769 } 770 771 rv = 0; 772 done: 773 mutex_exit(&bd->d_ocmutex); 774 rw_exit(&bd_lock); 775 776 return (rv); 777 } 778 779 static int 780 bd_close(dev_t dev, int flag, int otyp, cred_t *credp) 781 { 782 bd_t *bd; 783 minor_t inst; 784 minor_t part; 785 uint64_t mask; 786 boolean_t last = B_TRUE; 787 788 _NOTE(ARGUNUSED(flag)); 789 _NOTE(ARGUNUSED(credp)); 790 791 part = BDPART(dev); 792 inst = BDINST(dev); 793 794 ASSERT(part < 64); 795 mask = (1U << part); 796 797 rw_enter(&bd_lock, RW_READER); 798 799 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 800 rw_exit(&bd_lock); 801 return (ENXIO); 802 } 803 804 mutex_enter(&bd->d_ocmutex); 805 if (bd->d_open_excl & mask) { 806 bd->d_open_excl &= ~mask; 807 } 808 if (otyp == OTYP_LYR) { 809 bd->d_open_lyr[part]--; 810 } else { 811 bd->d_open_reg[otyp] &= ~mask; 812 } 813 for (int i = 0; i < 64; i++) { 814 if (bd->d_open_lyr[part]) { 815 last = B_FALSE; 816 } 817 } 818 for (int i = 0; last && (i < OTYP_LYR); i++) { 819 if (bd->d_open_reg[i]) { 820 last = B_FALSE; 821 } 822 } 823 mutex_exit(&bd->d_ocmutex); 824 825 if (last) { 826 cmlb_invalidate(bd->d_cmlbh, 0); 827 } 828 rw_exit(&bd_lock); 829 830 return (0); 831 } 832 833 static int 834 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk) 835 { 836 minor_t inst; 837 minor_t part; 838 diskaddr_t pstart; 839 diskaddr_t psize; 840 bd_t *bd; 841 bd_xfer_impl_t *xi; 842 buf_t *bp; 843 int rv; 844 845 rw_enter(&bd_lock, RW_READER); 846 847 part = BDPART(dev); 848 inst = BDINST(dev); 849 850 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 851 rw_exit(&bd_lock); 852 return (ENXIO); 853 } 854 /* 855 * do cmlb, but do it synchronously unless we already have the 856 * partition (which we probably should.) 857 */ 858 if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL, 859 (void *)1)) { 860 rw_exit(&bd_lock); 861 return (ENXIO); 862 } 863 864 if ((blkno + nblk) > psize) { 865 rw_exit(&bd_lock); 866 return (EINVAL); 867 } 868 bp = getrbuf(KM_NOSLEEP); 869 if (bp == NULL) { 870 rw_exit(&bd_lock); 871 return (ENOMEM); 872 } 873 874 bp->b_bcount = nblk << bd->d_blkshift; 875 bp->b_resid = bp->b_bcount; 876 bp->b_lblkno = blkno; 877 bp->b_un.b_addr = caddr; 878 879 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_write, KM_NOSLEEP); 880 if (xi == NULL) { 881 rw_exit(&bd_lock); 882 freerbuf(bp); 883 return (ENOMEM); 884 } 885 xi->i_blkno = blkno + pstart; 886 xi->i_flags = BD_XFER_POLL; 887 bd_submit(bd, xi); 888 rw_exit(&bd_lock); 889 890 /* 891 * Generally, we should have run this entirely synchronously 892 * at this point and the biowait call should be a no-op. If 893 * it didn't happen this way, it's a bug in the underlying 894 * driver not honoring BD_XFER_POLL. 895 */ 896 (void) biowait(bp); 897 rv = geterror(bp); 898 freerbuf(bp); 899 return (rv); 900 } 901 902 void 903 bd_minphys(struct buf *bp) 904 { 905 minor_t inst; 906 bd_t *bd; 907 inst = BDINST(bp->b_edev); 908 909 bd = ddi_get_soft_state(bd_state, inst); 910 911 /* 912 * In a non-debug kernel, bd_strategy will catch !bd as 913 * well, and will fail nicely. 914 */ 915 ASSERT(bd); 916 917 if (bp->b_bcount > bd->d_maxxfer) 918 bp->b_bcount = bd->d_maxxfer; 919 } 920 921 static int 922 bd_read(dev_t dev, struct uio *uio, cred_t *credp) 923 { 924 _NOTE(ARGUNUSED(credp)); 925 return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio)); 926 } 927 928 static int 929 bd_write(dev_t dev, struct uio *uio, cred_t *credp) 930 { 931 _NOTE(ARGUNUSED(credp)); 932 return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio)); 933 } 934 935 static int 936 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp) 937 { 938 _NOTE(ARGUNUSED(credp)); 939 return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio)); 940 } 941 942 static int 943 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp) 944 { 945 _NOTE(ARGUNUSED(credp)); 946 return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio)); 947 } 948 949 static int 950 bd_strategy(struct buf *bp) 951 { 952 minor_t inst; 953 minor_t part; 954 bd_t *bd; 955 diskaddr_t p_lba; 956 diskaddr_t p_nblks; 957 diskaddr_t b_nblks; 958 bd_xfer_impl_t *xi; 959 uint32_t shift; 960 int (*func)(void *, bd_xfer_t *); 961 962 part = BDPART(bp->b_edev); 963 inst = BDINST(bp->b_edev); 964 965 ASSERT(bp); 966 967 bp->b_resid = bp->b_bcount; 968 969 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 970 bioerror(bp, ENXIO); 971 biodone(bp); 972 return (0); 973 } 974 975 if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba, 976 NULL, NULL, 0)) { 977 bioerror(bp, ENXIO); 978 biodone(bp); 979 return (0); 980 } 981 982 shift = bd->d_blkshift; 983 984 if ((P2PHASE(bp->b_bcount, (1U << shift)) != 0) || 985 (bp->b_lblkno > p_nblks)) { 986 bioerror(bp, ENXIO); 987 biodone(bp); 988 return (0); 989 } 990 b_nblks = bp->b_bcount >> shift; 991 if ((bp->b_lblkno == p_nblks) || (bp->b_bcount == 0)) { 992 biodone(bp); 993 return (0); 994 } 995 996 if ((b_nblks + bp->b_lblkno) > p_nblks) { 997 bp->b_resid = ((bp->b_lblkno + b_nblks - p_nblks) << shift); 998 bp->b_bcount -= bp->b_resid; 999 } else { 1000 bp->b_resid = 0; 1001 } 1002 func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write; 1003 1004 xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP); 1005 if (xi == NULL) { 1006 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE); 1007 } 1008 if (xi == NULL) { 1009 /* bd_request_alloc will have done bioerror */ 1010 biodone(bp); 1011 return (0); 1012 } 1013 xi->i_blkno = bp->b_lblkno + p_lba; 1014 1015 bd_submit(bd, xi); 1016 1017 return (0); 1018 } 1019 1020 static int 1021 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp) 1022 { 1023 minor_t inst; 1024 uint16_t part; 1025 bd_t *bd; 1026 void *ptr = (void *)arg; 1027 int rv; 1028 1029 part = BDPART(dev); 1030 inst = BDINST(dev); 1031 1032 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1033 return (ENXIO); 1034 } 1035 1036 rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0); 1037 if (rv != ENOTTY) 1038 return (rv); 1039 1040 switch (cmd) { 1041 case DKIOCGMEDIAINFO: { 1042 struct dk_minfo minfo; 1043 1044 /* make sure our state information is current */ 1045 bd_update_state(bd); 1046 bzero(&minfo, sizeof (minfo)); 1047 minfo.dki_media_type = DK_FIXED_DISK; 1048 minfo.dki_lbsize = (1U << bd->d_blkshift); 1049 minfo.dki_capacity = bd->d_numblks; 1050 if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) { 1051 return (EFAULT); 1052 } 1053 return (0); 1054 } 1055 case DKIOCINFO: { 1056 struct dk_cinfo cinfo; 1057 bzero(&cinfo, sizeof (cinfo)); 1058 cinfo.dki_ctype = DKC_BLKDEV; 1059 cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip)); 1060 (void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname), 1061 "%s", ddi_driver_name(ddi_get_parent(bd->d_dip))); 1062 (void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname), 1063 "%s", ddi_driver_name(bd->d_dip)); 1064 cinfo.dki_unit = inst; 1065 cinfo.dki_flags = DKI_FMTVOL; 1066 cinfo.dki_partition = part; 1067 cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE; 1068 cinfo.dki_addr = 0; 1069 cinfo.dki_slave = 0; 1070 cinfo.dki_space = 0; 1071 cinfo.dki_prio = 0; 1072 cinfo.dki_vec = 0; 1073 if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) { 1074 return (EFAULT); 1075 } 1076 return (0); 1077 } 1078 case DKIOCREMOVABLE: { 1079 int i; 1080 i = bd->d_removable ? 1 : 0; 1081 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1082 return (EFAULT); 1083 } 1084 return (0); 1085 } 1086 case DKIOCHOTPLUGGABLE: { 1087 int i; 1088 i = bd->d_hotpluggable ? 1 : 0; 1089 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1090 return (EFAULT); 1091 } 1092 return (0); 1093 } 1094 case DKIOCREADONLY: { 1095 int i; 1096 i = bd->d_rdonly ? 1 : 0; 1097 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1098 return (EFAULT); 1099 } 1100 return (0); 1101 } 1102 case DKIOCSTATE: { 1103 enum dkio_state state; 1104 if (ddi_copyin(ptr, &state, sizeof (state), flag)) { 1105 return (EFAULT); 1106 } 1107 if ((rv = bd_check_state(bd, &state)) != 0) { 1108 return (rv); 1109 } 1110 if (ddi_copyout(&state, ptr, sizeof (state), flag)) { 1111 return (EFAULT); 1112 } 1113 return (0); 1114 } 1115 case DKIOCFLUSHWRITECACHE: { 1116 struct dk_callback *dkc = NULL; 1117 1118 if (flag & FKIOCTL) 1119 dkc = (void *)arg; 1120 1121 rv = bd_flush_write_cache(bd, dkc); 1122 return (rv); 1123 } 1124 1125 default: 1126 break; 1127 1128 } 1129 return (ENOTTY); 1130 } 1131 1132 static int 1133 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 1134 char *name, caddr_t valuep, int *lengthp) 1135 { 1136 bd_t *bd; 1137 1138 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1139 if (bd == NULL) 1140 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 1141 name, valuep, lengthp)); 1142 1143 return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name, 1144 valuep, lengthp, BDPART(dev), 0)); 1145 } 1146 1147 1148 static int 1149 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start, 1150 size_t length, void *tg_cookie) 1151 { 1152 bd_t *bd; 1153 buf_t *bp; 1154 bd_xfer_impl_t *xi; 1155 int rv; 1156 int (*func)(void *, bd_xfer_t *); 1157 int kmflag; 1158 1159 /* 1160 * If we are running in polled mode (such as during dump(9e) 1161 * execution), then we cannot sleep for kernel allocations. 1162 */ 1163 kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP; 1164 1165 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1166 1167 if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) { 1168 /* We can only transfer whole blocks at a time! */ 1169 return (EINVAL); 1170 } 1171 1172 if ((bp = getrbuf(kmflag)) == NULL) { 1173 return (ENOMEM); 1174 } 1175 1176 switch (cmd) { 1177 case TG_READ: 1178 bp->b_flags = B_READ; 1179 func = bd->d_ops.o_read; 1180 break; 1181 case TG_WRITE: 1182 bp->b_flags = B_WRITE; 1183 func = bd->d_ops.o_write; 1184 break; 1185 default: 1186 freerbuf(bp); 1187 return (EINVAL); 1188 } 1189 1190 bp->b_un.b_addr = bufaddr; 1191 bp->b_bcount = length; 1192 xi = bd_xfer_alloc(bd, bp, func, kmflag); 1193 if (xi == NULL) { 1194 rv = geterror(bp); 1195 freerbuf(bp); 1196 return (rv); 1197 } 1198 xi->i_flags = tg_cookie ? BD_XFER_POLL : 0; 1199 xi->i_blkno = start; 1200 bd_submit(bd, xi); 1201 (void) biowait(bp); 1202 rv = geterror(bp); 1203 freerbuf(bp); 1204 1205 return (rv); 1206 } 1207 1208 static int 1209 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 1210 { 1211 bd_t *bd; 1212 1213 _NOTE(ARGUNUSED(tg_cookie)); 1214 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1215 1216 switch (cmd) { 1217 case TG_GETPHYGEOM: 1218 case TG_GETVIRTGEOM: 1219 /* 1220 * We don't have any "geometry" as such, let cmlb 1221 * fabricate something. 1222 */ 1223 return (ENOTTY); 1224 1225 case TG_GETCAPACITY: 1226 bd_update_state(bd); 1227 *(diskaddr_t *)arg = bd->d_numblks; 1228 return (0); 1229 1230 case TG_GETBLOCKSIZE: 1231 *(uint32_t *)arg = (1U << bd->d_blkshift); 1232 return (0); 1233 1234 case TG_GETATTR: 1235 /* 1236 * It turns out that cmlb really doesn't do much for 1237 * non-writable media, but lets make the information 1238 * available for it in case it does more in the 1239 * future. (The value is currently used for 1240 * triggering special behavior for CD-ROMs.) 1241 */ 1242 bd_update_state(bd); 1243 ((tg_attribute_t *)arg)->media_is_writable = 1244 bd->d_rdonly ? B_FALSE : B_TRUE; 1245 return (0); 1246 1247 default: 1248 return (EINVAL); 1249 } 1250 } 1251 1252 1253 static void 1254 bd_sched(bd_t *bd) 1255 { 1256 bd_xfer_impl_t *xi; 1257 struct buf *bp; 1258 int rv; 1259 1260 mutex_enter(&bd->d_iomutex); 1261 1262 while ((bd->d_qactive < bd->d_qsize) && 1263 ((xi = list_remove_head(&bd->d_waitq)) != NULL)) { 1264 bd->d_qactive++; 1265 kstat_waitq_to_runq(bd->d_kiop); 1266 list_insert_tail(&bd->d_runq, xi); 1267 1268 /* 1269 * Submit the job to the driver. We drop the I/O mutex 1270 * so that we can deal with the case where the driver 1271 * completion routine calls back into us synchronously. 1272 */ 1273 1274 mutex_exit(&bd->d_iomutex); 1275 1276 rv = xi->i_func(bd->d_private, &xi->i_public); 1277 if (rv != 0) { 1278 bp = xi->i_bp; 1279 bd_xfer_free(xi); 1280 bioerror(bp, rv); 1281 biodone(bp); 1282 1283 mutex_enter(&bd->d_iomutex); 1284 bd->d_qactive--; 1285 kstat_runq_exit(bd->d_kiop); 1286 list_remove(&bd->d_runq, xi); 1287 } else { 1288 mutex_enter(&bd->d_iomutex); 1289 } 1290 } 1291 1292 mutex_exit(&bd->d_iomutex); 1293 } 1294 1295 static void 1296 bd_submit(bd_t *bd, bd_xfer_impl_t *xi) 1297 { 1298 mutex_enter(&bd->d_iomutex); 1299 list_insert_tail(&bd->d_waitq, xi); 1300 kstat_waitq_enter(bd->d_kiop); 1301 mutex_exit(&bd->d_iomutex); 1302 1303 bd_sched(bd); 1304 } 1305 1306 static void 1307 bd_runq_exit(bd_xfer_impl_t *xi, int err) 1308 { 1309 bd_t *bd = xi->i_bd; 1310 buf_t *bp = xi->i_bp; 1311 1312 mutex_enter(&bd->d_iomutex); 1313 bd->d_qactive--; 1314 kstat_runq_exit(bd->d_kiop); 1315 list_remove(&bd->d_runq, xi); 1316 mutex_exit(&bd->d_iomutex); 1317 1318 if (err == 0) { 1319 if (bp->b_flags & B_READ) { 1320 bd->d_kiop->reads++; 1321 bd->d_kiop->nread += (bp->b_bcount - xi->i_resid); 1322 } else { 1323 bd->d_kiop->writes++; 1324 bd->d_kiop->nwritten += (bp->b_bcount - xi->i_resid); 1325 } 1326 } 1327 bd_sched(bd); 1328 } 1329 1330 static void 1331 bd_update_state(bd_t *bd) 1332 { 1333 enum dkio_state state; 1334 bd_media_t media; 1335 boolean_t docmlb = B_FALSE; 1336 1337 bzero(&media, sizeof (media)); 1338 1339 mutex_enter(&bd->d_statemutex); 1340 if (bd->d_ops.o_media_info(bd->d_private, &media) == 0) { 1341 if ((1U << bd->d_blkshift) != media.m_blksize) { 1342 if ((media.m_blksize < 512) || 1343 (!ISP2(media.m_blksize)) || 1344 (P2PHASE(bd->d_maxxfer, media.m_blksize))) { 1345 cmn_err(CE_WARN, 1346 "%s%d: Invalid media block size (%d)", 1347 ddi_driver_name(bd->d_dip), 1348 ddi_get_instance(bd->d_dip), 1349 media.m_blksize); 1350 /* 1351 * We can't use the media, treat it as 1352 * not present. 1353 */ 1354 state = DKIO_EJECTED; 1355 bd->d_numblks = 0; 1356 } else { 1357 bd->d_blkshift = ddi_ffs(media.m_blksize) - 1; 1358 bd->d_numblks = media.m_nblks; 1359 bd->d_rdonly = media.m_readonly; 1360 state = DKIO_INSERTED; 1361 } 1362 1363 /* Device size changed */ 1364 docmlb = B_TRUE; 1365 1366 } else { 1367 if (bd->d_numblks != media.m_nblks) { 1368 /* Device size changed */ 1369 docmlb = B_TRUE; 1370 } 1371 bd->d_numblks = media.m_nblks; 1372 bd->d_rdonly = media.m_readonly; 1373 state = DKIO_INSERTED; 1374 } 1375 1376 } else { 1377 bd->d_numblks = 0; 1378 state = DKIO_EJECTED; 1379 } 1380 if (state != bd->d_state) { 1381 bd->d_state = state; 1382 cv_broadcast(&bd->d_statecv); 1383 docmlb = B_TRUE; 1384 } 1385 mutex_exit(&bd->d_statemutex); 1386 1387 if (docmlb) { 1388 if (state == DKIO_INSERTED) { 1389 (void) cmlb_validate(bd->d_cmlbh, 0, 0); 1390 } else { 1391 cmlb_invalidate(bd->d_cmlbh, 0); 1392 } 1393 } 1394 } 1395 1396 static int 1397 bd_check_state(bd_t *bd, enum dkio_state *state) 1398 { 1399 clock_t when; 1400 1401 for (;;) { 1402 1403 bd_update_state(bd); 1404 1405 mutex_enter(&bd->d_statemutex); 1406 1407 if (bd->d_state != *state) { 1408 *state = bd->d_state; 1409 mutex_exit(&bd->d_statemutex); 1410 break; 1411 } 1412 1413 when = drv_usectohz(1000000); 1414 if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex, 1415 when, TR_CLOCK_TICK) == 0) { 1416 mutex_exit(&bd->d_statemutex); 1417 return (EINTR); 1418 } 1419 1420 mutex_exit(&bd->d_statemutex); 1421 } 1422 1423 return (0); 1424 } 1425 1426 static int 1427 bd_flush_write_cache_done(struct buf *bp) 1428 { 1429 struct dk_callback *dc = (void *)bp->b_private; 1430 1431 (*dc->dkc_callback)(dc->dkc_cookie, geterror(bp)); 1432 kmem_free(dc, sizeof (*dc)); 1433 freerbuf(bp); 1434 return (0); 1435 } 1436 1437 static int 1438 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc) 1439 { 1440 buf_t *bp; 1441 struct dk_callback *dc; 1442 bd_xfer_impl_t *xi; 1443 int rv; 1444 1445 if (bd->d_ops.o_sync_cache == NULL) { 1446 return (ENOTSUP); 1447 } 1448 if ((bp = getrbuf(KM_SLEEP)) == NULL) { 1449 return (ENOMEM); 1450 } 1451 bp->b_resid = 0; 1452 bp->b_bcount = 0; 1453 1454 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP); 1455 if (xi == NULL) { 1456 rv = geterror(bp); 1457 freerbuf(bp); 1458 return (rv); 1459 } 1460 1461 /* Make an asynchronous flush, but only if there is a callback */ 1462 if (dkc != NULL && dkc->dkc_callback != NULL) { 1463 /* Make a private copy of the callback structure */ 1464 dc = kmem_alloc(sizeof (*dc), KM_SLEEP); 1465 *dc = *dkc; 1466 bp->b_private = dc; 1467 bp->b_iodone = bd_flush_write_cache_done; 1468 1469 bd_submit(bd, xi); 1470 return (0); 1471 } 1472 1473 /* In case there is no callback, perform a synchronous flush */ 1474 bd_submit(bd, xi); 1475 (void) biowait(bp); 1476 rv = geterror(bp); 1477 freerbuf(bp); 1478 1479 return (rv); 1480 } 1481 1482 /* 1483 * Nexus support. 1484 */ 1485 int 1486 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop, 1487 void *arg, void *result) 1488 { 1489 bd_handle_t hdl; 1490 1491 switch (ctlop) { 1492 case DDI_CTLOPS_REPORTDEV: 1493 cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n", 1494 ddi_node_name(rdip), ddi_get_name_addr(rdip), 1495 ddi_driver_name(rdip), ddi_get_instance(rdip)); 1496 return (DDI_SUCCESS); 1497 1498 case DDI_CTLOPS_INITCHILD: 1499 hdl = ddi_get_parent_data((dev_info_t *)arg); 1500 if (hdl == NULL) { 1501 return (DDI_NOT_WELL_FORMED); 1502 } 1503 ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr); 1504 return (DDI_SUCCESS); 1505 1506 case DDI_CTLOPS_UNINITCHILD: 1507 ddi_set_name_addr((dev_info_t *)arg, NULL); 1508 ndi_prop_remove_all((dev_info_t *)arg); 1509 return (DDI_SUCCESS); 1510 1511 default: 1512 return (ddi_ctlops(dip, rdip, ctlop, arg, result)); 1513 } 1514 } 1515 1516 /* 1517 * Functions for device drivers. 1518 */ 1519 bd_handle_t 1520 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag) 1521 { 1522 bd_handle_t hdl; 1523 1524 hdl = kmem_zalloc(sizeof (*hdl), kmflag); 1525 if (hdl != NULL) { 1526 hdl->h_ops = *ops; 1527 hdl->h_dma = dma; 1528 hdl->h_private = private; 1529 } 1530 1531 return (hdl); 1532 } 1533 1534 void 1535 bd_free_handle(bd_handle_t hdl) 1536 { 1537 kmem_free(hdl, sizeof (*hdl)); 1538 } 1539 1540 int 1541 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl) 1542 { 1543 dev_info_t *child; 1544 bd_drive_t drive; 1545 1546 /* if drivers don't override this, make it assume none */ 1547 drive.d_lun = -1; 1548 hdl->h_ops.o_drive_info(hdl->h_private, &drive); 1549 1550 hdl->h_parent = dip; 1551 hdl->h_name = "blkdev"; 1552 1553 if (drive.d_lun >= 0) { 1554 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), "%X,%X", 1555 drive.d_target, drive.d_lun); 1556 } else { 1557 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), "%X", 1558 drive.d_target); 1559 } 1560 if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID, 1561 &child) != NDI_SUCCESS) { 1562 cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s", 1563 ddi_driver_name(dip), ddi_get_instance(dip), 1564 "blkdev", hdl->h_addr); 1565 return (DDI_FAILURE); 1566 } 1567 1568 ddi_set_parent_data(child, hdl); 1569 hdl->h_child = child; 1570 1571 if (ndi_devi_online(child, 0) == NDI_FAILURE) { 1572 cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online", 1573 ddi_driver_name(dip), ddi_get_instance(dip), 1574 hdl->h_name, hdl->h_addr); 1575 (void) ndi_devi_free(child); 1576 return (DDI_FAILURE); 1577 } 1578 1579 return (DDI_SUCCESS); 1580 } 1581 1582 int 1583 bd_detach_handle(bd_handle_t hdl) 1584 { 1585 int circ; 1586 int rv; 1587 char *devnm; 1588 1589 if (hdl->h_child == NULL) { 1590 return (DDI_SUCCESS); 1591 } 1592 ndi_devi_enter(hdl->h_parent, &circ); 1593 if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) { 1594 rv = ddi_remove_child(hdl->h_child, 0); 1595 } else { 1596 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); 1597 (void) ddi_deviname(hdl->h_child, devnm); 1598 (void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE); 1599 rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL, 1600 NDI_DEVI_REMOVE | NDI_UNCONFIG); 1601 kmem_free(devnm, MAXNAMELEN + 1); 1602 } 1603 if (rv == 0) { 1604 hdl->h_child = NULL; 1605 } 1606 1607 ndi_devi_exit(hdl->h_parent, circ); 1608 return (rv = NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE); 1609 } 1610 1611 void 1612 bd_xfer_done(bd_xfer_t *xfer, int err) 1613 { 1614 bd_xfer_impl_t *xi = (void *)xfer; 1615 buf_t *bp = xi->i_bp; 1616 int rv; 1617 bd_t *bd = xi->i_bd; 1618 size_t len; 1619 1620 if (err != 0) { 1621 bd_runq_exit(xi, err); 1622 1623 bp->b_resid += xi->i_resid; 1624 bd_xfer_free(xi); 1625 bioerror(bp, err); 1626 biodone(bp); 1627 return; 1628 } 1629 1630 xi->i_cur_win++; 1631 xi->i_resid -= xi->i_len; 1632 1633 if (xi->i_resid == 0) { 1634 /* Job completed succcessfully! */ 1635 bd_runq_exit(xi, 0); 1636 1637 bd_xfer_free(xi); 1638 biodone(bp); 1639 return; 1640 } 1641 1642 xi->i_blkno += xi->i_nblks; 1643 1644 if (bd->d_use_dma) { 1645 /* More transfer still pending... advance to next DMA window. */ 1646 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win, 1647 &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac); 1648 } else { 1649 /* Advance memory window. */ 1650 xi->i_kaddr += xi->i_len; 1651 xi->i_offset += xi->i_len; 1652 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer); 1653 } 1654 1655 1656 if ((rv != DDI_SUCCESS) || 1657 (P2PHASE(len, (1U << xi->i_blkshift) != 0))) { 1658 bd_runq_exit(xi, EFAULT); 1659 1660 bp->b_resid += xi->i_resid; 1661 bd_xfer_free(xi); 1662 bioerror(bp, EFAULT); 1663 biodone(bp); 1664 return; 1665 } 1666 xi->i_len = len; 1667 xi->i_nblks = len >> xi->i_blkshift; 1668 1669 /* Submit next window to hardware. */ 1670 rv = xi->i_func(bd->d_private, &xi->i_public); 1671 if (rv != 0) { 1672 bd_runq_exit(xi, rv); 1673 1674 bp->b_resid += xi->i_resid; 1675 bd_xfer_free(xi); 1676 bioerror(bp, rv); 1677 biodone(bp); 1678 } 1679 } 1680 1681 void 1682 bd_state_change(bd_handle_t hdl) 1683 { 1684 bd_t *bd; 1685 1686 if ((bd = hdl->h_bd) != NULL) { 1687 bd_update_state(bd); 1688 } 1689 } 1690 1691 void 1692 bd_mod_init(struct dev_ops *devops) 1693 { 1694 static struct bus_ops bd_bus_ops = { 1695 BUSO_REV, /* busops_rev */ 1696 nullbusmap, /* bus_map */ 1697 NULL, /* bus_get_intrspec (OBSOLETE) */ 1698 NULL, /* bus_add_intrspec (OBSOLETE) */ 1699 NULL, /* bus_remove_intrspec (OBSOLETE) */ 1700 i_ddi_map_fault, /* bus_map_fault */ 1701 NULL, /* bus_dma_map (OBSOLETE) */ 1702 ddi_dma_allochdl, /* bus_dma_allochdl */ 1703 ddi_dma_freehdl, /* bus_dma_freehdl */ 1704 ddi_dma_bindhdl, /* bus_dma_bindhdl */ 1705 ddi_dma_unbindhdl, /* bus_dma_unbindhdl */ 1706 ddi_dma_flush, /* bus_dma_flush */ 1707 ddi_dma_win, /* bus_dma_win */ 1708 ddi_dma_mctl, /* bus_dma_ctl */ 1709 bd_bus_ctl, /* bus_ctl */ 1710 ddi_bus_prop_op, /* bus_prop_op */ 1711 NULL, /* bus_get_eventcookie */ 1712 NULL, /* bus_add_eventcall */ 1713 NULL, /* bus_remove_eventcall */ 1714 NULL, /* bus_post_event */ 1715 NULL, /* bus_intr_ctl (OBSOLETE) */ 1716 NULL, /* bus_config */ 1717 NULL, /* bus_unconfig */ 1718 NULL, /* bus_fm_init */ 1719 NULL, /* bus_fm_fini */ 1720 NULL, /* bus_fm_access_enter */ 1721 NULL, /* bus_fm_access_exit */ 1722 NULL, /* bus_power */ 1723 NULL, /* bus_intr_op */ 1724 }; 1725 1726 devops->devo_bus_ops = &bd_bus_ops; 1727 1728 /* 1729 * NB: The device driver is free to supply its own 1730 * character entry device support. 1731 */ 1732 } 1733 1734 void 1735 bd_mod_fini(struct dev_ops *devops) 1736 { 1737 devops->devo_bus_ops = NULL; 1738 } 1739