1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2011, 2012 Nexenta Systems, Inc. All rights reserved. 24 * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/ksynch.h> 29 #include <sys/kmem.h> 30 #include <sys/file.h> 31 #include <sys/errno.h> 32 #include <sys/open.h> 33 #include <sys/buf.h> 34 #include <sys/uio.h> 35 #include <sys/aio_req.h> 36 #include <sys/cred.h> 37 #include <sys/modctl.h> 38 #include <sys/cmlb.h> 39 #include <sys/conf.h> 40 #include <sys/devops.h> 41 #include <sys/list.h> 42 #include <sys/sysmacros.h> 43 #include <sys/dkio.h> 44 #include <sys/vtoc.h> 45 #include <sys/scsi/scsi.h> /* for DTYPE_DIRECT */ 46 #include <sys/kstat.h> 47 #include <sys/fs/dv_node.h> 48 #include <sys/ddi.h> 49 #include <sys/sunddi.h> 50 #include <sys/note.h> 51 #include <sys/blkdev.h> 52 53 #define BD_MAXPART 64 54 #define BDINST(dev) (getminor(dev) / BD_MAXPART) 55 #define BDPART(dev) (getminor(dev) % BD_MAXPART) 56 57 typedef struct bd bd_t; 58 typedef struct bd_xfer_impl bd_xfer_impl_t; 59 60 struct bd { 61 void *d_private; 62 dev_info_t *d_dip; 63 kmutex_t d_ocmutex; 64 kmutex_t d_iomutex; 65 kmutex_t d_statemutex; 66 kcondvar_t d_statecv; 67 enum dkio_state d_state; 68 cmlb_handle_t d_cmlbh; 69 unsigned d_open_lyr[BD_MAXPART]; /* open count */ 70 uint64_t d_open_excl; /* bit mask indexed by partition */ 71 uint64_t d_open_reg[OTYPCNT]; /* bit mask */ 72 73 uint32_t d_qsize; 74 uint32_t d_qactive; 75 uint32_t d_maxxfer; 76 uint32_t d_blkshift; 77 uint64_t d_numblks; 78 ddi_devid_t d_devid; 79 80 kmem_cache_t *d_cache; 81 list_t d_runq; 82 list_t d_waitq; 83 kstat_t *d_ksp; 84 kstat_io_t *d_kiop; 85 86 boolean_t d_rdonly; 87 boolean_t d_removable; 88 boolean_t d_hotpluggable; 89 boolean_t d_use_dma; 90 91 ddi_dma_attr_t d_dma; 92 bd_ops_t d_ops; 93 bd_handle_t d_handle; 94 }; 95 96 struct bd_handle { 97 bd_ops_t h_ops; 98 ddi_dma_attr_t *h_dma; 99 dev_info_t *h_parent; 100 dev_info_t *h_child; 101 void *h_private; 102 bd_t *h_bd; 103 char *h_name; 104 char h_addr[20]; /* enough for %X,%X */ 105 }; 106 107 struct bd_xfer_impl { 108 bd_xfer_t i_public; 109 list_node_t i_linkage; 110 bd_t *i_bd; 111 buf_t *i_bp; 112 uint_t i_num_win; 113 uint_t i_cur_win; 114 off_t i_offset; 115 int (*i_func)(void *, bd_xfer_t *); 116 uint32_t i_blkshift; 117 size_t i_len; 118 size_t i_resid; 119 }; 120 121 #define i_dmah i_public.x_dmah 122 #define i_dmac i_public.x_dmac 123 #define i_ndmac i_public.x_ndmac 124 #define i_kaddr i_public.x_kaddr 125 #define i_nblks i_public.x_nblks 126 #define i_blkno i_public.x_blkno 127 #define i_flags i_public.x_flags 128 129 130 /* 131 * Private prototypes. 132 */ 133 134 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 135 static int bd_attach(dev_info_t *, ddi_attach_cmd_t); 136 static int bd_detach(dev_info_t *, ddi_detach_cmd_t); 137 138 static int bd_open(dev_t *, int, int, cred_t *); 139 static int bd_close(dev_t, int, int, cred_t *); 140 static int bd_strategy(struct buf *); 141 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 142 static int bd_dump(dev_t, caddr_t, daddr_t, int); 143 static int bd_read(dev_t, struct uio *, cred_t *); 144 static int bd_write(dev_t, struct uio *, cred_t *); 145 static int bd_aread(dev_t, struct aio_req *, cred_t *); 146 static int bd_awrite(dev_t, struct aio_req *, cred_t *); 147 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, 148 caddr_t, int *); 149 150 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, 151 void *); 152 static int bd_tg_getinfo(dev_info_t *, int, void *, void *); 153 static int bd_xfer_ctor(void *, void *, int); 154 static void bd_xfer_dtor(void *, void *); 155 static void bd_sched(bd_t *); 156 static void bd_submit(bd_t *, bd_xfer_impl_t *); 157 static void bd_runq_exit(bd_xfer_impl_t *, int); 158 static void bd_update_state(bd_t *); 159 static int bd_check_state(bd_t *, enum dkio_state *); 160 static int bd_flush_write_cache(bd_t *, struct dk_callback *); 161 162 struct cmlb_tg_ops bd_tg_ops = { 163 TG_DK_OPS_VERSION_1, 164 bd_tg_rdwr, 165 bd_tg_getinfo, 166 }; 167 168 static struct cb_ops bd_cb_ops = { 169 bd_open, /* open */ 170 bd_close, /* close */ 171 bd_strategy, /* strategy */ 172 nodev, /* print */ 173 bd_dump, /* dump */ 174 bd_read, /* read */ 175 bd_write, /* write */ 176 bd_ioctl, /* ioctl */ 177 nodev, /* devmap */ 178 nodev, /* mmap */ 179 nodev, /* segmap */ 180 nochpoll, /* poll */ 181 bd_prop_op, /* cb_prop_op */ 182 0, /* streamtab */ 183 D_64BIT | D_MP, /* Driver comaptibility flag */ 184 CB_REV, /* cb_rev */ 185 bd_aread, /* async read */ 186 bd_awrite /* async write */ 187 }; 188 189 struct dev_ops bd_dev_ops = { 190 DEVO_REV, /* devo_rev, */ 191 0, /* refcnt */ 192 bd_getinfo, /* getinfo */ 193 nulldev, /* identify */ 194 nulldev, /* probe */ 195 bd_attach, /* attach */ 196 bd_detach, /* detach */ 197 nodev, /* reset */ 198 &bd_cb_ops, /* driver operations */ 199 NULL, /* bus operations */ 200 NULL, /* power */ 201 ddi_quiesce_not_needed, /* quiesce */ 202 }; 203 204 static struct modldrv modldrv = { 205 &mod_driverops, 206 "Generic Block Device", 207 &bd_dev_ops, 208 }; 209 210 static struct modlinkage modlinkage = { 211 MODREV_1, { &modldrv, NULL } 212 }; 213 214 static void *bd_state; 215 static krwlock_t bd_lock; 216 217 int 218 _init(void) 219 { 220 int rv; 221 222 rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2); 223 if (rv != DDI_SUCCESS) { 224 return (rv); 225 } 226 rw_init(&bd_lock, NULL, RW_DRIVER, NULL); 227 rv = mod_install(&modlinkage); 228 if (rv != DDI_SUCCESS) { 229 rw_destroy(&bd_lock); 230 ddi_soft_state_fini(&bd_state); 231 } 232 return (rv); 233 } 234 235 int 236 _fini(void) 237 { 238 int rv; 239 240 rv = mod_remove(&modlinkage); 241 if (rv == DDI_SUCCESS) { 242 rw_destroy(&bd_lock); 243 ddi_soft_state_fini(&bd_state); 244 } 245 return (rv); 246 } 247 248 int 249 _info(struct modinfo *modinfop) 250 { 251 return (mod_info(&modlinkage, modinfop)); 252 } 253 254 static int 255 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 256 { 257 bd_t *bd; 258 minor_t inst; 259 260 _NOTE(ARGUNUSED(dip)); 261 262 inst = BDINST((dev_t)arg); 263 264 switch (cmd) { 265 case DDI_INFO_DEVT2DEVINFO: 266 bd = ddi_get_soft_state(bd_state, inst); 267 if (bd == NULL) { 268 return (DDI_FAILURE); 269 } 270 *resultp = (void *)bd->d_dip; 271 break; 272 273 case DDI_INFO_DEVT2INSTANCE: 274 *resultp = (void *)(intptr_t)inst; 275 break; 276 277 default: 278 return (DDI_FAILURE); 279 } 280 return (DDI_SUCCESS); 281 } 282 283 static int 284 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 285 { 286 int inst; 287 bd_handle_t hdl; 288 bd_t *bd; 289 bd_drive_t drive; 290 int rv; 291 char name[16]; 292 char kcache[32]; 293 294 switch (cmd) { 295 case DDI_ATTACH: 296 break; 297 case DDI_RESUME: 298 /* We don't do anything native for suspend/resume */ 299 return (DDI_SUCCESS); 300 default: 301 return (DDI_FAILURE); 302 } 303 304 inst = ddi_get_instance(dip); 305 hdl = ddi_get_parent_data(dip); 306 307 (void) snprintf(name, sizeof (name), "%s%d", 308 ddi_driver_name(dip), ddi_get_instance(dip)); 309 (void) snprintf(kcache, sizeof (kcache), "%s_xfer", name); 310 311 if (hdl == NULL) { 312 cmn_err(CE_WARN, "%s: missing parent data!", name); 313 return (DDI_FAILURE); 314 } 315 316 if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) { 317 cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name); 318 return (DDI_FAILURE); 319 } 320 bd = ddi_get_soft_state(bd_state, inst); 321 322 if (hdl->h_dma) { 323 bd->d_dma = *(hdl->h_dma); 324 bd->d_dma.dma_attr_granular = 325 max(DEV_BSIZE, bd->d_dma.dma_attr_granular); 326 bd->d_use_dma = B_TRUE; 327 328 if (bd->d_maxxfer && 329 (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) { 330 cmn_err(CE_WARN, 331 "%s: inconsistent maximum transfer size!", 332 name); 333 /* We force it */ 334 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 335 } else { 336 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 337 } 338 } else { 339 bd->d_use_dma = B_FALSE; 340 if (bd->d_maxxfer == 0) { 341 bd->d_maxxfer = 1024 * 1024; 342 } 343 } 344 bd->d_ops = hdl->h_ops; 345 bd->d_private = hdl->h_private; 346 bd->d_blkshift = 9; /* 512 bytes, to start */ 347 348 if (bd->d_maxxfer % DEV_BSIZE) { 349 cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name); 350 bd->d_maxxfer &= ~(DEV_BSIZE - 1); 351 } 352 if (bd->d_maxxfer < DEV_BSIZE) { 353 cmn_err(CE_WARN, "%s: maximum transfer size too small!", name); 354 ddi_soft_state_free(bd_state, inst); 355 return (DDI_FAILURE); 356 } 357 358 bd->d_dip = dip; 359 bd->d_handle = hdl; 360 hdl->h_bd = bd; 361 ddi_set_driver_private(dip, bd); 362 363 mutex_init(&bd->d_iomutex, NULL, MUTEX_DRIVER, NULL); 364 mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL); 365 mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL); 366 cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL); 367 368 list_create(&bd->d_waitq, sizeof (bd_xfer_impl_t), 369 offsetof(struct bd_xfer_impl, i_linkage)); 370 list_create(&bd->d_runq, sizeof (bd_xfer_impl_t), 371 offsetof(struct bd_xfer_impl, i_linkage)); 372 373 bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8, 374 bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0); 375 376 bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk", 377 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT); 378 if (bd->d_ksp != NULL) { 379 bd->d_ksp->ks_lock = &bd->d_iomutex; 380 kstat_install(bd->d_ksp); 381 bd->d_kiop = bd->d_ksp->ks_data; 382 } else { 383 /* 384 * Even if we cannot create the kstat, we create a 385 * scratch kstat. The reason for this is to ensure 386 * that we can update the kstat all of the time, 387 * without adding an extra branch instruction. 388 */ 389 bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP); 390 } 391 392 cmlb_alloc_handle(&bd->d_cmlbh); 393 394 bd->d_state = DKIO_NONE; 395 396 bzero(&drive, sizeof (drive)); 397 bd->d_ops.o_drive_info(bd->d_private, &drive); 398 bd->d_qsize = drive.d_qsize; 399 bd->d_removable = drive.d_removable; 400 bd->d_hotpluggable = drive.d_hotpluggable; 401 402 if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer) 403 bd->d_maxxfer = drive.d_maxxfer; 404 405 406 rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT, 407 bd->d_removable, bd->d_hotpluggable, 408 drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK, 409 CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0); 410 if (rv != 0) { 411 cmlb_free_handle(&bd->d_cmlbh); 412 kmem_cache_destroy(bd->d_cache); 413 mutex_destroy(&bd->d_iomutex); 414 mutex_destroy(&bd->d_ocmutex); 415 mutex_destroy(&bd->d_statemutex); 416 cv_destroy(&bd->d_statecv); 417 list_destroy(&bd->d_waitq); 418 list_destroy(&bd->d_runq); 419 if (bd->d_ksp != NULL) { 420 kstat_delete(bd->d_ksp); 421 bd->d_ksp = NULL; 422 } else { 423 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 424 } 425 ddi_soft_state_free(bd_state, inst); 426 return (DDI_FAILURE); 427 } 428 429 if (bd->d_ops.o_devid_init != NULL) { 430 rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid); 431 if (rv == DDI_SUCCESS) { 432 if (ddi_devid_register(dip, bd->d_devid) != 433 DDI_SUCCESS) { 434 cmn_err(CE_WARN, 435 "%s: unable to register devid", name); 436 } 437 } 438 } 439 440 /* 441 * Add a zero-length attribute to tell the world we support 442 * kernel ioctls (for layered drivers). Also set up properties 443 * used by HAL to identify removable media. 444 */ 445 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 446 DDI_KERNEL_IOCTL, NULL, 0); 447 if (bd->d_removable) { 448 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 449 "removable-media", NULL, 0); 450 } 451 if (bd->d_hotpluggable) { 452 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 453 "hotpluggable", NULL, 0); 454 } 455 456 ddi_report_dev(dip); 457 458 return (DDI_SUCCESS); 459 } 460 461 static int 462 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 463 { 464 bd_t *bd; 465 466 bd = ddi_get_driver_private(dip); 467 468 switch (cmd) { 469 case DDI_DETACH: 470 break; 471 case DDI_SUSPEND: 472 /* We don't suspend, but our parent does */ 473 return (DDI_SUCCESS); 474 default: 475 return (DDI_FAILURE); 476 } 477 if (bd->d_ksp != NULL) { 478 kstat_delete(bd->d_ksp); 479 bd->d_ksp = NULL; 480 } else { 481 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 482 } 483 cmlb_detach(bd->d_cmlbh, 0); 484 cmlb_free_handle(&bd->d_cmlbh); 485 if (bd->d_devid) 486 ddi_devid_free(bd->d_devid); 487 kmem_cache_destroy(bd->d_cache); 488 mutex_destroy(&bd->d_iomutex); 489 mutex_destroy(&bd->d_ocmutex); 490 mutex_destroy(&bd->d_statemutex); 491 cv_destroy(&bd->d_statecv); 492 list_destroy(&bd->d_waitq); 493 list_destroy(&bd->d_runq); 494 ddi_soft_state_free(bd_state, ddi_get_instance(dip)); 495 return (DDI_SUCCESS); 496 } 497 498 static int 499 bd_xfer_ctor(void *buf, void *arg, int kmflag) 500 { 501 bd_xfer_impl_t *xi; 502 bd_t *bd = arg; 503 int (*dcb)(caddr_t); 504 505 if (kmflag == KM_SLEEP) { 506 dcb = DDI_DMA_SLEEP; 507 } else { 508 dcb = DDI_DMA_DONTWAIT; 509 } 510 511 xi = buf; 512 bzero(xi, sizeof (*xi)); 513 xi->i_bd = bd; 514 515 if (bd->d_use_dma) { 516 if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL, 517 &xi->i_dmah) != DDI_SUCCESS) { 518 return (-1); 519 } 520 } 521 522 return (0); 523 } 524 525 static void 526 bd_xfer_dtor(void *buf, void *arg) 527 { 528 bd_xfer_impl_t *xi = buf; 529 530 _NOTE(ARGUNUSED(arg)); 531 532 if (xi->i_dmah) 533 ddi_dma_free_handle(&xi->i_dmah); 534 xi->i_dmah = NULL; 535 } 536 537 static bd_xfer_impl_t * 538 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *), 539 int kmflag) 540 { 541 bd_xfer_impl_t *xi; 542 int rv; 543 int status; 544 unsigned dir; 545 int (*cb)(caddr_t); 546 size_t len; 547 uint32_t shift; 548 549 if (kmflag == KM_SLEEP) { 550 cb = DDI_DMA_SLEEP; 551 } else { 552 cb = DDI_DMA_DONTWAIT; 553 } 554 555 xi = kmem_cache_alloc(bd->d_cache, kmflag); 556 if (xi == NULL) { 557 bioerror(bp, ENOMEM); 558 return (NULL); 559 } 560 561 ASSERT(bp); 562 563 xi->i_bp = bp; 564 xi->i_func = func; 565 xi->i_blkno = bp->b_lblkno; 566 567 if (bp->b_bcount == 0) { 568 xi->i_len = 0; 569 xi->i_nblks = 0; 570 xi->i_kaddr = NULL; 571 xi->i_resid = 0; 572 xi->i_num_win = 0; 573 goto done; 574 } 575 576 if (bp->b_flags & B_READ) { 577 dir = DDI_DMA_READ; 578 xi->i_func = bd->d_ops.o_read; 579 } else { 580 dir = DDI_DMA_WRITE; 581 xi->i_func = bd->d_ops.o_write; 582 } 583 584 shift = bd->d_blkshift; 585 xi->i_blkshift = shift; 586 587 if (!bd->d_use_dma) { 588 bp_mapin(bp); 589 rv = 0; 590 xi->i_offset = 0; 591 xi->i_num_win = 592 (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer; 593 xi->i_cur_win = 0; 594 xi->i_len = min(bp->b_bcount, bd->d_maxxfer); 595 xi->i_nblks = xi->i_len >> shift; 596 xi->i_kaddr = bp->b_un.b_addr; 597 xi->i_resid = bp->b_bcount; 598 } else { 599 600 /* 601 * We have to use consistent DMA if the address is misaligned. 602 */ 603 if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) && 604 ((uintptr_t)bp->b_un.b_addr & 0x7)) { 605 dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL; 606 } else { 607 dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 608 } 609 610 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb, 611 NULL, &xi->i_dmac, &xi->i_ndmac); 612 switch (status) { 613 case DDI_DMA_MAPPED: 614 xi->i_num_win = 1; 615 xi->i_cur_win = 0; 616 xi->i_offset = 0; 617 xi->i_len = bp->b_bcount; 618 xi->i_nblks = xi->i_len >> shift; 619 xi->i_resid = bp->b_bcount; 620 rv = 0; 621 break; 622 case DDI_DMA_PARTIAL_MAP: 623 xi->i_cur_win = 0; 624 625 if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) != 626 DDI_SUCCESS) || 627 (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset, 628 &len, &xi->i_dmac, &xi->i_ndmac) != 629 DDI_SUCCESS) || 630 (P2PHASE(len, shift) != 0)) { 631 (void) ddi_dma_unbind_handle(xi->i_dmah); 632 rv = EFAULT; 633 goto done; 634 } 635 xi->i_len = len; 636 xi->i_nblks = xi->i_len >> shift; 637 xi->i_resid = bp->b_bcount; 638 rv = 0; 639 break; 640 case DDI_DMA_NORESOURCES: 641 rv = EAGAIN; 642 goto done; 643 case DDI_DMA_TOOBIG: 644 rv = EINVAL; 645 goto done; 646 case DDI_DMA_NOMAPPING: 647 case DDI_DMA_INUSE: 648 default: 649 rv = EFAULT; 650 goto done; 651 } 652 } 653 654 done: 655 if (rv != 0) { 656 kmem_cache_free(bd->d_cache, xi); 657 bioerror(bp, rv); 658 return (NULL); 659 } 660 661 return (xi); 662 } 663 664 static void 665 bd_xfer_free(bd_xfer_impl_t *xi) 666 { 667 if (xi->i_dmah) { 668 (void) ddi_dma_unbind_handle(xi->i_dmah); 669 } 670 kmem_cache_free(xi->i_bd->d_cache, xi); 671 } 672 673 static int 674 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp) 675 { 676 dev_t dev = *devp; 677 bd_t *bd; 678 minor_t part; 679 minor_t inst; 680 uint64_t mask; 681 boolean_t ndelay; 682 int rv; 683 diskaddr_t nblks; 684 diskaddr_t lba; 685 686 _NOTE(ARGUNUSED(credp)); 687 688 part = BDPART(dev); 689 inst = BDINST(dev); 690 691 if (otyp >= OTYPCNT) 692 return (EINVAL); 693 694 ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE; 695 696 /* 697 * Block any DR events from changing the set of registered 698 * devices while we function. 699 */ 700 rw_enter(&bd_lock, RW_READER); 701 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 702 rw_exit(&bd_lock); 703 return (ENXIO); 704 } 705 706 mutex_enter(&bd->d_ocmutex); 707 708 ASSERT(part < 64); 709 mask = (1U << part); 710 711 bd_update_state(bd); 712 713 if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) { 714 715 /* non-blocking opens are allowed to succeed */ 716 if (!ndelay) { 717 rv = ENXIO; 718 goto done; 719 } 720 } else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba, 721 NULL, NULL, 0) == 0) { 722 723 /* 724 * We read the partinfo, verify valid ranges. If the 725 * partition is invalid, and we aren't blocking or 726 * doing a raw access, then fail. (Non-blocking and 727 * raw accesses can still succeed to allow a disk with 728 * bad partition data to opened by format and fdisk.) 729 */ 730 if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) { 731 rv = ENXIO; 732 goto done; 733 } 734 } else if (!ndelay) { 735 /* 736 * cmlb_partinfo failed -- invalid partition or no 737 * disk label. 738 */ 739 rv = ENXIO; 740 goto done; 741 } 742 743 if ((flag & FWRITE) && bd->d_rdonly) { 744 rv = EROFS; 745 goto done; 746 } 747 748 if ((bd->d_open_excl) & (mask)) { 749 rv = EBUSY; 750 goto done; 751 } 752 if (flag & FEXCL) { 753 if (bd->d_open_lyr[part]) { 754 rv = EBUSY; 755 goto done; 756 } 757 for (int i = 0; i < OTYP_LYR; i++) { 758 if (bd->d_open_reg[i] & mask) { 759 rv = EBUSY; 760 goto done; 761 } 762 } 763 } 764 765 if (otyp == OTYP_LYR) { 766 bd->d_open_lyr[part]++; 767 } else { 768 bd->d_open_reg[otyp] |= mask; 769 } 770 if (flag & FEXCL) { 771 bd->d_open_excl |= mask; 772 } 773 774 rv = 0; 775 done: 776 mutex_exit(&bd->d_ocmutex); 777 rw_exit(&bd_lock); 778 779 return (rv); 780 } 781 782 static int 783 bd_close(dev_t dev, int flag, int otyp, cred_t *credp) 784 { 785 bd_t *bd; 786 minor_t inst; 787 minor_t part; 788 uint64_t mask; 789 boolean_t last = B_TRUE; 790 791 _NOTE(ARGUNUSED(flag)); 792 _NOTE(ARGUNUSED(credp)); 793 794 part = BDPART(dev); 795 inst = BDINST(dev); 796 797 ASSERT(part < 64); 798 mask = (1U << part); 799 800 rw_enter(&bd_lock, RW_READER); 801 802 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 803 rw_exit(&bd_lock); 804 return (ENXIO); 805 } 806 807 mutex_enter(&bd->d_ocmutex); 808 if (bd->d_open_excl & mask) { 809 bd->d_open_excl &= ~mask; 810 } 811 if (otyp == OTYP_LYR) { 812 bd->d_open_lyr[part]--; 813 } else { 814 bd->d_open_reg[otyp] &= ~mask; 815 } 816 for (int i = 0; i < 64; i++) { 817 if (bd->d_open_lyr[part]) { 818 last = B_FALSE; 819 } 820 } 821 for (int i = 0; last && (i < OTYP_LYR); i++) { 822 if (bd->d_open_reg[i]) { 823 last = B_FALSE; 824 } 825 } 826 mutex_exit(&bd->d_ocmutex); 827 828 if (last) { 829 cmlb_invalidate(bd->d_cmlbh, 0); 830 } 831 rw_exit(&bd_lock); 832 833 return (0); 834 } 835 836 static int 837 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk) 838 { 839 minor_t inst; 840 minor_t part; 841 diskaddr_t pstart; 842 diskaddr_t psize; 843 bd_t *bd; 844 bd_xfer_impl_t *xi; 845 buf_t *bp; 846 int rv; 847 848 rw_enter(&bd_lock, RW_READER); 849 850 part = BDPART(dev); 851 inst = BDINST(dev); 852 853 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 854 rw_exit(&bd_lock); 855 return (ENXIO); 856 } 857 /* 858 * do cmlb, but do it synchronously unless we already have the 859 * partition (which we probably should.) 860 */ 861 if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL, 862 (void *)1)) { 863 rw_exit(&bd_lock); 864 return (ENXIO); 865 } 866 867 if ((blkno + nblk) > psize) { 868 rw_exit(&bd_lock); 869 return (EINVAL); 870 } 871 bp = getrbuf(KM_NOSLEEP); 872 if (bp == NULL) { 873 rw_exit(&bd_lock); 874 return (ENOMEM); 875 } 876 877 bp->b_bcount = nblk << bd->d_blkshift; 878 bp->b_resid = bp->b_bcount; 879 bp->b_lblkno = blkno; 880 bp->b_un.b_addr = caddr; 881 882 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_write, KM_NOSLEEP); 883 if (xi == NULL) { 884 rw_exit(&bd_lock); 885 freerbuf(bp); 886 return (ENOMEM); 887 } 888 xi->i_blkno = blkno + pstart; 889 xi->i_flags = BD_XFER_POLL; 890 bd_submit(bd, xi); 891 rw_exit(&bd_lock); 892 893 /* 894 * Generally, we should have run this entirely synchronously 895 * at this point and the biowait call should be a no-op. If 896 * it didn't happen this way, it's a bug in the underlying 897 * driver not honoring BD_XFER_POLL. 898 */ 899 (void) biowait(bp); 900 rv = geterror(bp); 901 freerbuf(bp); 902 return (rv); 903 } 904 905 void 906 bd_minphys(struct buf *bp) 907 { 908 minor_t inst; 909 bd_t *bd; 910 inst = BDINST(bp->b_edev); 911 912 bd = ddi_get_soft_state(bd_state, inst); 913 914 /* 915 * In a non-debug kernel, bd_strategy will catch !bd as 916 * well, and will fail nicely. 917 */ 918 ASSERT(bd); 919 920 if (bp->b_bcount > bd->d_maxxfer) 921 bp->b_bcount = bd->d_maxxfer; 922 } 923 924 static int 925 bd_read(dev_t dev, struct uio *uio, cred_t *credp) 926 { 927 _NOTE(ARGUNUSED(credp)); 928 return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio)); 929 } 930 931 static int 932 bd_write(dev_t dev, struct uio *uio, cred_t *credp) 933 { 934 _NOTE(ARGUNUSED(credp)); 935 return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio)); 936 } 937 938 static int 939 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp) 940 { 941 _NOTE(ARGUNUSED(credp)); 942 return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio)); 943 } 944 945 static int 946 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp) 947 { 948 _NOTE(ARGUNUSED(credp)); 949 return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio)); 950 } 951 952 static int 953 bd_strategy(struct buf *bp) 954 { 955 minor_t inst; 956 minor_t part; 957 bd_t *bd; 958 diskaddr_t p_lba; 959 diskaddr_t p_nblks; 960 diskaddr_t b_nblks; 961 bd_xfer_impl_t *xi; 962 uint32_t shift; 963 int (*func)(void *, bd_xfer_t *); 964 965 part = BDPART(bp->b_edev); 966 inst = BDINST(bp->b_edev); 967 968 ASSERT(bp); 969 970 bp->b_resid = bp->b_bcount; 971 972 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 973 bioerror(bp, ENXIO); 974 biodone(bp); 975 return (0); 976 } 977 978 if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba, 979 NULL, NULL, 0)) { 980 bioerror(bp, ENXIO); 981 biodone(bp); 982 return (0); 983 } 984 985 shift = bd->d_blkshift; 986 987 if ((P2PHASE(bp->b_bcount, (1U << shift)) != 0) || 988 (bp->b_lblkno > p_nblks)) { 989 bioerror(bp, ENXIO); 990 biodone(bp); 991 return (0); 992 } 993 b_nblks = bp->b_bcount >> shift; 994 if ((bp->b_lblkno == p_nblks) || (bp->b_bcount == 0)) { 995 biodone(bp); 996 return (0); 997 } 998 999 if ((b_nblks + bp->b_lblkno) > p_nblks) { 1000 bp->b_resid = ((bp->b_lblkno + b_nblks - p_nblks) << shift); 1001 bp->b_bcount -= bp->b_resid; 1002 } else { 1003 bp->b_resid = 0; 1004 } 1005 func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write; 1006 1007 xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP); 1008 if (xi == NULL) { 1009 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE); 1010 } 1011 if (xi == NULL) { 1012 /* bd_request_alloc will have done bioerror */ 1013 biodone(bp); 1014 return (0); 1015 } 1016 xi->i_blkno = bp->b_lblkno + p_lba; 1017 1018 bd_submit(bd, xi); 1019 1020 return (0); 1021 } 1022 1023 static int 1024 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp) 1025 { 1026 minor_t inst; 1027 uint16_t part; 1028 bd_t *bd; 1029 void *ptr = (void *)arg; 1030 int rv; 1031 1032 part = BDPART(dev); 1033 inst = BDINST(dev); 1034 1035 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1036 return (ENXIO); 1037 } 1038 1039 rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0); 1040 if (rv != ENOTTY) 1041 return (rv); 1042 1043 switch (cmd) { 1044 case DKIOCGMEDIAINFO: { 1045 struct dk_minfo minfo; 1046 1047 /* make sure our state information is current */ 1048 bd_update_state(bd); 1049 bzero(&minfo, sizeof (minfo)); 1050 minfo.dki_media_type = DK_FIXED_DISK; 1051 minfo.dki_lbsize = (1U << bd->d_blkshift); 1052 minfo.dki_capacity = bd->d_numblks; 1053 if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) { 1054 return (EFAULT); 1055 } 1056 return (0); 1057 } 1058 case DKIOCINFO: { 1059 struct dk_cinfo cinfo; 1060 bzero(&cinfo, sizeof (cinfo)); 1061 cinfo.dki_ctype = DKC_BLKDEV; 1062 cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip)); 1063 (void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname), 1064 "%s", ddi_driver_name(ddi_get_parent(bd->d_dip))); 1065 (void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname), 1066 "%s", ddi_driver_name(bd->d_dip)); 1067 cinfo.dki_unit = inst; 1068 cinfo.dki_flags = DKI_FMTVOL; 1069 cinfo.dki_partition = part; 1070 cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE; 1071 cinfo.dki_addr = 0; 1072 cinfo.dki_slave = 0; 1073 cinfo.dki_space = 0; 1074 cinfo.dki_prio = 0; 1075 cinfo.dki_vec = 0; 1076 if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) { 1077 return (EFAULT); 1078 } 1079 return (0); 1080 } 1081 case DKIOCREMOVABLE: { 1082 int i; 1083 i = bd->d_removable ? 1 : 0; 1084 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1085 return (EFAULT); 1086 } 1087 return (0); 1088 } 1089 case DKIOCHOTPLUGGABLE: { 1090 int i; 1091 i = bd->d_hotpluggable ? 1 : 0; 1092 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1093 return (EFAULT); 1094 } 1095 return (0); 1096 } 1097 case DKIOCREADONLY: { 1098 int i; 1099 i = bd->d_rdonly ? 1 : 0; 1100 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1101 return (EFAULT); 1102 } 1103 return (0); 1104 } 1105 case DKIOCSTATE: { 1106 enum dkio_state state; 1107 if (ddi_copyin(ptr, &state, sizeof (state), flag)) { 1108 return (EFAULT); 1109 } 1110 if ((rv = bd_check_state(bd, &state)) != 0) { 1111 return (rv); 1112 } 1113 if (ddi_copyout(&state, ptr, sizeof (state), flag)) { 1114 return (EFAULT); 1115 } 1116 return (0); 1117 } 1118 case DKIOCFLUSHWRITECACHE: { 1119 struct dk_callback *dkc = NULL; 1120 1121 if (flag & FKIOCTL) 1122 dkc = (void *)arg; 1123 1124 rv = bd_flush_write_cache(bd, dkc); 1125 return (rv); 1126 } 1127 1128 default: 1129 break; 1130 1131 } 1132 return (ENOTTY); 1133 } 1134 1135 static int 1136 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 1137 char *name, caddr_t valuep, int *lengthp) 1138 { 1139 bd_t *bd; 1140 1141 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1142 if (bd == NULL) 1143 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 1144 name, valuep, lengthp)); 1145 1146 return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name, 1147 valuep, lengthp, BDPART(dev), 0)); 1148 } 1149 1150 1151 static int 1152 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start, 1153 size_t length, void *tg_cookie) 1154 { 1155 bd_t *bd; 1156 buf_t *bp; 1157 bd_xfer_impl_t *xi; 1158 int rv; 1159 int (*func)(void *, bd_xfer_t *); 1160 int kmflag; 1161 1162 /* 1163 * If we are running in polled mode (such as during dump(9e) 1164 * execution), then we cannot sleep for kernel allocations. 1165 */ 1166 kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP; 1167 1168 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1169 1170 if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) { 1171 /* We can only transfer whole blocks at a time! */ 1172 return (EINVAL); 1173 } 1174 1175 if ((bp = getrbuf(kmflag)) == NULL) { 1176 return (ENOMEM); 1177 } 1178 1179 switch (cmd) { 1180 case TG_READ: 1181 bp->b_flags = B_READ; 1182 func = bd->d_ops.o_read; 1183 break; 1184 case TG_WRITE: 1185 bp->b_flags = B_WRITE; 1186 func = bd->d_ops.o_write; 1187 break; 1188 default: 1189 freerbuf(bp); 1190 return (EINVAL); 1191 } 1192 1193 bp->b_un.b_addr = bufaddr; 1194 bp->b_bcount = length; 1195 xi = bd_xfer_alloc(bd, bp, func, kmflag); 1196 if (xi == NULL) { 1197 rv = geterror(bp); 1198 freerbuf(bp); 1199 return (rv); 1200 } 1201 xi->i_flags = tg_cookie ? BD_XFER_POLL : 0; 1202 xi->i_blkno = start; 1203 bd_submit(bd, xi); 1204 (void) biowait(bp); 1205 rv = geterror(bp); 1206 freerbuf(bp); 1207 1208 return (rv); 1209 } 1210 1211 static int 1212 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 1213 { 1214 bd_t *bd; 1215 1216 _NOTE(ARGUNUSED(tg_cookie)); 1217 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1218 1219 switch (cmd) { 1220 case TG_GETPHYGEOM: 1221 case TG_GETVIRTGEOM: 1222 /* 1223 * We don't have any "geometry" as such, let cmlb 1224 * fabricate something. 1225 */ 1226 return (ENOTTY); 1227 1228 case TG_GETCAPACITY: 1229 bd_update_state(bd); 1230 *(diskaddr_t *)arg = bd->d_numblks; 1231 return (0); 1232 1233 case TG_GETBLOCKSIZE: 1234 *(uint32_t *)arg = (1U << bd->d_blkshift); 1235 return (0); 1236 1237 case TG_GETATTR: 1238 /* 1239 * It turns out that cmlb really doesn't do much for 1240 * non-writable media, but lets make the information 1241 * available for it in case it does more in the 1242 * future. (The value is currently used for 1243 * triggering special behavior for CD-ROMs.) 1244 */ 1245 bd_update_state(bd); 1246 ((tg_attribute_t *)arg)->media_is_writable = 1247 bd->d_rdonly ? B_FALSE : B_TRUE; 1248 return (0); 1249 1250 default: 1251 return (EINVAL); 1252 } 1253 } 1254 1255 1256 static void 1257 bd_sched(bd_t *bd) 1258 { 1259 bd_xfer_impl_t *xi; 1260 struct buf *bp; 1261 int rv; 1262 1263 mutex_enter(&bd->d_iomutex); 1264 1265 while ((bd->d_qactive < bd->d_qsize) && 1266 ((xi = list_remove_head(&bd->d_waitq)) != NULL)) { 1267 bd->d_qactive++; 1268 kstat_waitq_to_runq(bd->d_kiop); 1269 list_insert_tail(&bd->d_runq, xi); 1270 1271 /* 1272 * Submit the job to the driver. We drop the I/O mutex 1273 * so that we can deal with the case where the driver 1274 * completion routine calls back into us synchronously. 1275 */ 1276 1277 mutex_exit(&bd->d_iomutex); 1278 1279 rv = xi->i_func(bd->d_private, &xi->i_public); 1280 if (rv != 0) { 1281 bp = xi->i_bp; 1282 bd_xfer_free(xi); 1283 bioerror(bp, rv); 1284 biodone(bp); 1285 1286 mutex_enter(&bd->d_iomutex); 1287 bd->d_qactive--; 1288 kstat_runq_exit(bd->d_kiop); 1289 list_remove(&bd->d_runq, xi); 1290 } else { 1291 mutex_enter(&bd->d_iomutex); 1292 } 1293 } 1294 1295 mutex_exit(&bd->d_iomutex); 1296 } 1297 1298 static void 1299 bd_submit(bd_t *bd, bd_xfer_impl_t *xi) 1300 { 1301 mutex_enter(&bd->d_iomutex); 1302 list_insert_tail(&bd->d_waitq, xi); 1303 kstat_waitq_enter(bd->d_kiop); 1304 mutex_exit(&bd->d_iomutex); 1305 1306 bd_sched(bd); 1307 } 1308 1309 static void 1310 bd_runq_exit(bd_xfer_impl_t *xi, int err) 1311 { 1312 bd_t *bd = xi->i_bd; 1313 buf_t *bp = xi->i_bp; 1314 1315 mutex_enter(&bd->d_iomutex); 1316 bd->d_qactive--; 1317 kstat_runq_exit(bd->d_kiop); 1318 list_remove(&bd->d_runq, xi); 1319 mutex_exit(&bd->d_iomutex); 1320 1321 if (err == 0) { 1322 if (bp->b_flags & B_READ) { 1323 bd->d_kiop->reads++; 1324 bd->d_kiop->nread += (bp->b_bcount - xi->i_resid); 1325 } else { 1326 bd->d_kiop->writes++; 1327 bd->d_kiop->nwritten += (bp->b_bcount - xi->i_resid); 1328 } 1329 } 1330 bd_sched(bd); 1331 } 1332 1333 static void 1334 bd_update_state(bd_t *bd) 1335 { 1336 enum dkio_state state; 1337 bd_media_t media; 1338 boolean_t docmlb = B_FALSE; 1339 1340 bzero(&media, sizeof (media)); 1341 1342 mutex_enter(&bd->d_statemutex); 1343 if (bd->d_ops.o_media_info(bd->d_private, &media) == 0) { 1344 if ((1U << bd->d_blkshift) != media.m_blksize) { 1345 if ((media.m_blksize < 512) || 1346 (!ISP2(media.m_blksize)) || 1347 (P2PHASE(bd->d_maxxfer, media.m_blksize))) { 1348 cmn_err(CE_WARN, 1349 "%s%d: Invalid media block size (%d)", 1350 ddi_driver_name(bd->d_dip), 1351 ddi_get_instance(bd->d_dip), 1352 media.m_blksize); 1353 /* 1354 * We can't use the media, treat it as 1355 * not present. 1356 */ 1357 state = DKIO_EJECTED; 1358 bd->d_numblks = 0; 1359 } else { 1360 bd->d_blkshift = ddi_ffs(media.m_blksize) - 1; 1361 bd->d_numblks = media.m_nblks; 1362 bd->d_rdonly = media.m_readonly; 1363 state = DKIO_INSERTED; 1364 } 1365 1366 /* Device size changed */ 1367 docmlb = B_TRUE; 1368 1369 } else { 1370 if (bd->d_numblks != media.m_nblks) { 1371 /* Device size changed */ 1372 docmlb = B_TRUE; 1373 } 1374 bd->d_numblks = media.m_nblks; 1375 bd->d_rdonly = media.m_readonly; 1376 state = DKIO_INSERTED; 1377 } 1378 1379 } else { 1380 bd->d_numblks = 0; 1381 state = DKIO_EJECTED; 1382 } 1383 if (state != bd->d_state) { 1384 bd->d_state = state; 1385 cv_broadcast(&bd->d_statecv); 1386 docmlb = B_TRUE; 1387 } 1388 mutex_exit(&bd->d_statemutex); 1389 1390 if (docmlb) { 1391 if (state == DKIO_INSERTED) { 1392 (void) cmlb_validate(bd->d_cmlbh, 0, 0); 1393 } else { 1394 cmlb_invalidate(bd->d_cmlbh, 0); 1395 } 1396 } 1397 } 1398 1399 static int 1400 bd_check_state(bd_t *bd, enum dkio_state *state) 1401 { 1402 clock_t when; 1403 1404 for (;;) { 1405 1406 bd_update_state(bd); 1407 1408 mutex_enter(&bd->d_statemutex); 1409 1410 if (bd->d_state != *state) { 1411 *state = bd->d_state; 1412 mutex_exit(&bd->d_statemutex); 1413 break; 1414 } 1415 1416 when = drv_usectohz(1000000); 1417 if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex, 1418 when, TR_CLOCK_TICK) == 0) { 1419 mutex_exit(&bd->d_statemutex); 1420 return (EINTR); 1421 } 1422 1423 mutex_exit(&bd->d_statemutex); 1424 } 1425 1426 return (0); 1427 } 1428 1429 static int 1430 bd_flush_write_cache_done(struct buf *bp) 1431 { 1432 struct dk_callback *dc = (void *)bp->b_private; 1433 1434 (*dc->dkc_callback)(dc->dkc_cookie, geterror(bp)); 1435 kmem_free(dc, sizeof (*dc)); 1436 freerbuf(bp); 1437 return (0); 1438 } 1439 1440 static int 1441 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc) 1442 { 1443 buf_t *bp; 1444 struct dk_callback *dc; 1445 bd_xfer_impl_t *xi; 1446 int rv; 1447 1448 if (bd->d_ops.o_sync_cache == NULL) { 1449 return (ENOTSUP); 1450 } 1451 if ((bp = getrbuf(KM_SLEEP)) == NULL) { 1452 return (ENOMEM); 1453 } 1454 bp->b_resid = 0; 1455 bp->b_bcount = 0; 1456 1457 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP); 1458 if (xi == NULL) { 1459 rv = geterror(bp); 1460 freerbuf(bp); 1461 return (rv); 1462 } 1463 1464 /* Make an asynchronous flush, but only if there is a callback */ 1465 if (dkc != NULL && dkc->dkc_callback != NULL) { 1466 /* Make a private copy of the callback structure */ 1467 dc = kmem_alloc(sizeof (*dc), KM_SLEEP); 1468 *dc = *dkc; 1469 bp->b_private = dc; 1470 bp->b_iodone = bd_flush_write_cache_done; 1471 1472 bd_submit(bd, xi); 1473 return (0); 1474 } 1475 1476 /* In case there is no callback, perform a synchronous flush */ 1477 bd_submit(bd, xi); 1478 (void) biowait(bp); 1479 rv = geterror(bp); 1480 freerbuf(bp); 1481 1482 return (rv); 1483 } 1484 1485 /* 1486 * Nexus support. 1487 */ 1488 int 1489 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop, 1490 void *arg, void *result) 1491 { 1492 bd_handle_t hdl; 1493 1494 switch (ctlop) { 1495 case DDI_CTLOPS_REPORTDEV: 1496 cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n", 1497 ddi_node_name(rdip), ddi_get_name_addr(rdip), 1498 ddi_driver_name(rdip), ddi_get_instance(rdip)); 1499 return (DDI_SUCCESS); 1500 1501 case DDI_CTLOPS_INITCHILD: 1502 hdl = ddi_get_parent_data((dev_info_t *)arg); 1503 if (hdl == NULL) { 1504 return (DDI_NOT_WELL_FORMED); 1505 } 1506 ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr); 1507 return (DDI_SUCCESS); 1508 1509 case DDI_CTLOPS_UNINITCHILD: 1510 ddi_set_name_addr((dev_info_t *)arg, NULL); 1511 ndi_prop_remove_all((dev_info_t *)arg); 1512 return (DDI_SUCCESS); 1513 1514 default: 1515 return (ddi_ctlops(dip, rdip, ctlop, arg, result)); 1516 } 1517 } 1518 1519 /* 1520 * Functions for device drivers. 1521 */ 1522 bd_handle_t 1523 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag) 1524 { 1525 bd_handle_t hdl; 1526 1527 hdl = kmem_zalloc(sizeof (*hdl), kmflag); 1528 if (hdl != NULL) { 1529 hdl->h_ops = *ops; 1530 hdl->h_dma = dma; 1531 hdl->h_private = private; 1532 } 1533 1534 return (hdl); 1535 } 1536 1537 void 1538 bd_free_handle(bd_handle_t hdl) 1539 { 1540 kmem_free(hdl, sizeof (*hdl)); 1541 } 1542 1543 int 1544 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl) 1545 { 1546 dev_info_t *child; 1547 bd_drive_t drive; 1548 1549 /* if drivers don't override this, make it assume none */ 1550 drive.d_lun = -1; 1551 hdl->h_ops.o_drive_info(hdl->h_private, &drive); 1552 1553 hdl->h_parent = dip; 1554 hdl->h_name = "blkdev"; 1555 1556 if (drive.d_lun >= 0) { 1557 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), "%X,%X", 1558 drive.d_target, drive.d_lun); 1559 } else { 1560 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), "%X", 1561 drive.d_target); 1562 } 1563 if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID, 1564 &child) != NDI_SUCCESS) { 1565 cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s", 1566 ddi_driver_name(dip), ddi_get_instance(dip), 1567 "blkdev", hdl->h_addr); 1568 return (DDI_FAILURE); 1569 } 1570 1571 ddi_set_parent_data(child, hdl); 1572 hdl->h_child = child; 1573 1574 if (ndi_devi_online(child, 0) == NDI_FAILURE) { 1575 cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online", 1576 ddi_driver_name(dip), ddi_get_instance(dip), 1577 hdl->h_name, hdl->h_addr); 1578 (void) ndi_devi_free(child); 1579 return (DDI_FAILURE); 1580 } 1581 1582 return (DDI_SUCCESS); 1583 } 1584 1585 int 1586 bd_detach_handle(bd_handle_t hdl) 1587 { 1588 int circ; 1589 int rv; 1590 char *devnm; 1591 1592 if (hdl->h_child == NULL) { 1593 return (DDI_SUCCESS); 1594 } 1595 ndi_devi_enter(hdl->h_parent, &circ); 1596 if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) { 1597 rv = ddi_remove_child(hdl->h_child, 0); 1598 } else { 1599 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); 1600 (void) ddi_deviname(hdl->h_child, devnm); 1601 (void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE); 1602 rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL, 1603 NDI_DEVI_REMOVE | NDI_UNCONFIG); 1604 kmem_free(devnm, MAXNAMELEN + 1); 1605 } 1606 if (rv == 0) { 1607 hdl->h_child = NULL; 1608 } 1609 1610 ndi_devi_exit(hdl->h_parent, circ); 1611 return (rv = NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE); 1612 } 1613 1614 void 1615 bd_xfer_done(bd_xfer_t *xfer, int err) 1616 { 1617 bd_xfer_impl_t *xi = (void *)xfer; 1618 buf_t *bp = xi->i_bp; 1619 int rv; 1620 bd_t *bd = xi->i_bd; 1621 size_t len; 1622 1623 if (err != 0) { 1624 bd_runq_exit(xi, err); 1625 1626 bp->b_resid += xi->i_resid; 1627 bd_xfer_free(xi); 1628 bioerror(bp, err); 1629 biodone(bp); 1630 return; 1631 } 1632 1633 xi->i_cur_win++; 1634 xi->i_resid -= xi->i_len; 1635 1636 if (xi->i_resid == 0) { 1637 /* Job completed succcessfully! */ 1638 bd_runq_exit(xi, 0); 1639 1640 bd_xfer_free(xi); 1641 biodone(bp); 1642 return; 1643 } 1644 1645 xi->i_blkno += xi->i_nblks; 1646 1647 if (bd->d_use_dma) { 1648 /* More transfer still pending... advance to next DMA window. */ 1649 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win, 1650 &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac); 1651 } else { 1652 /* Advance memory window. */ 1653 xi->i_kaddr += xi->i_len; 1654 xi->i_offset += xi->i_len; 1655 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer); 1656 } 1657 1658 1659 if ((rv != DDI_SUCCESS) || 1660 (P2PHASE(len, (1U << xi->i_blkshift) != 0))) { 1661 bd_runq_exit(xi, EFAULT); 1662 1663 bp->b_resid += xi->i_resid; 1664 bd_xfer_free(xi); 1665 bioerror(bp, EFAULT); 1666 biodone(bp); 1667 return; 1668 } 1669 xi->i_len = len; 1670 xi->i_nblks = len >> xi->i_blkshift; 1671 1672 /* Submit next window to hardware. */ 1673 rv = xi->i_func(bd->d_private, &xi->i_public); 1674 if (rv != 0) { 1675 bd_runq_exit(xi, rv); 1676 1677 bp->b_resid += xi->i_resid; 1678 bd_xfer_free(xi); 1679 bioerror(bp, rv); 1680 biodone(bp); 1681 } 1682 } 1683 1684 void 1685 bd_state_change(bd_handle_t hdl) 1686 { 1687 bd_t *bd; 1688 1689 if ((bd = hdl->h_bd) != NULL) { 1690 bd_update_state(bd); 1691 } 1692 } 1693 1694 void 1695 bd_mod_init(struct dev_ops *devops) 1696 { 1697 static struct bus_ops bd_bus_ops = { 1698 BUSO_REV, /* busops_rev */ 1699 nullbusmap, /* bus_map */ 1700 NULL, /* bus_get_intrspec (OBSOLETE) */ 1701 NULL, /* bus_add_intrspec (OBSOLETE) */ 1702 NULL, /* bus_remove_intrspec (OBSOLETE) */ 1703 i_ddi_map_fault, /* bus_map_fault */ 1704 NULL, /* bus_dma_map (OBSOLETE) */ 1705 ddi_dma_allochdl, /* bus_dma_allochdl */ 1706 ddi_dma_freehdl, /* bus_dma_freehdl */ 1707 ddi_dma_bindhdl, /* bus_dma_bindhdl */ 1708 ddi_dma_unbindhdl, /* bus_dma_unbindhdl */ 1709 ddi_dma_flush, /* bus_dma_flush */ 1710 ddi_dma_win, /* bus_dma_win */ 1711 ddi_dma_mctl, /* bus_dma_ctl */ 1712 bd_bus_ctl, /* bus_ctl */ 1713 ddi_bus_prop_op, /* bus_prop_op */ 1714 NULL, /* bus_get_eventcookie */ 1715 NULL, /* bus_add_eventcall */ 1716 NULL, /* bus_remove_eventcall */ 1717 NULL, /* bus_post_event */ 1718 NULL, /* bus_intr_ctl (OBSOLETE) */ 1719 NULL, /* bus_config */ 1720 NULL, /* bus_unconfig */ 1721 NULL, /* bus_fm_init */ 1722 NULL, /* bus_fm_fini */ 1723 NULL, /* bus_fm_access_enter */ 1724 NULL, /* bus_fm_access_exit */ 1725 NULL, /* bus_power */ 1726 NULL, /* bus_intr_op */ 1727 }; 1728 1729 devops->devo_bus_ops = &bd_bus_ops; 1730 1731 /* 1732 * NB: The device driver is free to supply its own 1733 * character entry device support. 1734 */ 1735 } 1736 1737 void 1738 bd_mod_fini(struct dev_ops *devops) 1739 { 1740 devops->devo_bus_ops = NULL; 1741 } 1742