1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2012 Nexenta Systems, Inc. All rights reserved. 24 * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved. 25 * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved. 26 */ 27 28 #include <sys/types.h> 29 #include <sys/ksynch.h> 30 #include <sys/kmem.h> 31 #include <sys/file.h> 32 #include <sys/errno.h> 33 #include <sys/open.h> 34 #include <sys/buf.h> 35 #include <sys/uio.h> 36 #include <sys/aio_req.h> 37 #include <sys/cred.h> 38 #include <sys/modctl.h> 39 #include <sys/cmlb.h> 40 #include <sys/conf.h> 41 #include <sys/devops.h> 42 #include <sys/list.h> 43 #include <sys/sysmacros.h> 44 #include <sys/dkio.h> 45 #include <sys/vtoc.h> 46 #include <sys/scsi/scsi.h> /* for DTYPE_DIRECT */ 47 #include <sys/kstat.h> 48 #include <sys/fs/dv_node.h> 49 #include <sys/ddi.h> 50 #include <sys/sunddi.h> 51 #include <sys/note.h> 52 #include <sys/blkdev.h> 53 54 #define BD_MAXPART 64 55 #define BDINST(dev) (getminor(dev) / BD_MAXPART) 56 #define BDPART(dev) (getminor(dev) % BD_MAXPART) 57 58 typedef struct bd bd_t; 59 typedef struct bd_xfer_impl bd_xfer_impl_t; 60 61 struct bd { 62 void *d_private; 63 dev_info_t *d_dip; 64 kmutex_t d_ocmutex; 65 kmutex_t d_iomutex; 66 kmutex_t d_statemutex; 67 kcondvar_t d_statecv; 68 enum dkio_state d_state; 69 cmlb_handle_t d_cmlbh; 70 unsigned d_open_lyr[BD_MAXPART]; /* open count */ 71 uint64_t d_open_excl; /* bit mask indexed by partition */ 72 uint64_t d_open_reg[OTYPCNT]; /* bit mask */ 73 74 uint32_t d_qsize; 75 uint32_t d_qactive; 76 uint32_t d_maxxfer; 77 uint32_t d_blkshift; 78 uint64_t d_numblks; 79 ddi_devid_t d_devid; 80 81 kmem_cache_t *d_cache; 82 list_t d_runq; 83 list_t d_waitq; 84 kstat_t *d_ksp; 85 kstat_io_t *d_kiop; 86 87 boolean_t d_rdonly; 88 boolean_t d_removable; 89 boolean_t d_hotpluggable; 90 boolean_t d_use_dma; 91 92 ddi_dma_attr_t d_dma; 93 bd_ops_t d_ops; 94 bd_handle_t d_handle; 95 }; 96 97 struct bd_handle { 98 bd_ops_t h_ops; 99 ddi_dma_attr_t *h_dma; 100 dev_info_t *h_parent; 101 dev_info_t *h_child; 102 void *h_private; 103 bd_t *h_bd; 104 char *h_name; 105 char h_addr[20]; /* enough for %X,%X */ 106 }; 107 108 struct bd_xfer_impl { 109 bd_xfer_t i_public; 110 list_node_t i_linkage; 111 bd_t *i_bd; 112 buf_t *i_bp; 113 uint_t i_num_win; 114 uint_t i_cur_win; 115 off_t i_offset; 116 int (*i_func)(void *, bd_xfer_t *); 117 uint32_t i_blkshift; 118 size_t i_len; 119 size_t i_resid; 120 }; 121 122 #define i_dmah i_public.x_dmah 123 #define i_dmac i_public.x_dmac 124 #define i_ndmac i_public.x_ndmac 125 #define i_kaddr i_public.x_kaddr 126 #define i_nblks i_public.x_nblks 127 #define i_blkno i_public.x_blkno 128 #define i_flags i_public.x_flags 129 130 131 /* 132 * Private prototypes. 133 */ 134 135 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 136 static int bd_attach(dev_info_t *, ddi_attach_cmd_t); 137 static int bd_detach(dev_info_t *, ddi_detach_cmd_t); 138 139 static int bd_open(dev_t *, int, int, cred_t *); 140 static int bd_close(dev_t, int, int, cred_t *); 141 static int bd_strategy(struct buf *); 142 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 143 static int bd_dump(dev_t, caddr_t, daddr_t, int); 144 static int bd_read(dev_t, struct uio *, cred_t *); 145 static int bd_write(dev_t, struct uio *, cred_t *); 146 static int bd_aread(dev_t, struct aio_req *, cred_t *); 147 static int bd_awrite(dev_t, struct aio_req *, cred_t *); 148 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, 149 caddr_t, int *); 150 151 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, 152 void *); 153 static int bd_tg_getinfo(dev_info_t *, int, void *, void *); 154 static int bd_xfer_ctor(void *, void *, int); 155 static void bd_xfer_dtor(void *, void *); 156 static void bd_sched(bd_t *); 157 static void bd_submit(bd_t *, bd_xfer_impl_t *); 158 static void bd_runq_exit(bd_xfer_impl_t *, int); 159 static void bd_update_state(bd_t *); 160 static int bd_check_state(bd_t *, enum dkio_state *); 161 static int bd_flush_write_cache(bd_t *, struct dk_callback *); 162 163 struct cmlb_tg_ops bd_tg_ops = { 164 TG_DK_OPS_VERSION_1, 165 bd_tg_rdwr, 166 bd_tg_getinfo, 167 }; 168 169 static struct cb_ops bd_cb_ops = { 170 bd_open, /* open */ 171 bd_close, /* close */ 172 bd_strategy, /* strategy */ 173 nodev, /* print */ 174 bd_dump, /* dump */ 175 bd_read, /* read */ 176 bd_write, /* write */ 177 bd_ioctl, /* ioctl */ 178 nodev, /* devmap */ 179 nodev, /* mmap */ 180 nodev, /* segmap */ 181 nochpoll, /* poll */ 182 bd_prop_op, /* cb_prop_op */ 183 0, /* streamtab */ 184 D_64BIT | D_MP, /* Driver comaptibility flag */ 185 CB_REV, /* cb_rev */ 186 bd_aread, /* async read */ 187 bd_awrite /* async write */ 188 }; 189 190 struct dev_ops bd_dev_ops = { 191 DEVO_REV, /* devo_rev, */ 192 0, /* refcnt */ 193 bd_getinfo, /* getinfo */ 194 nulldev, /* identify */ 195 nulldev, /* probe */ 196 bd_attach, /* attach */ 197 bd_detach, /* detach */ 198 nodev, /* reset */ 199 &bd_cb_ops, /* driver operations */ 200 NULL, /* bus operations */ 201 NULL, /* power */ 202 ddi_quiesce_not_needed, /* quiesce */ 203 }; 204 205 static struct modldrv modldrv = { 206 &mod_driverops, 207 "Generic Block Device", 208 &bd_dev_ops, 209 }; 210 211 static struct modlinkage modlinkage = { 212 MODREV_1, { &modldrv, NULL } 213 }; 214 215 static void *bd_state; 216 static krwlock_t bd_lock; 217 218 int 219 _init(void) 220 { 221 int rv; 222 223 rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2); 224 if (rv != DDI_SUCCESS) { 225 return (rv); 226 } 227 rw_init(&bd_lock, NULL, RW_DRIVER, NULL); 228 rv = mod_install(&modlinkage); 229 if (rv != DDI_SUCCESS) { 230 rw_destroy(&bd_lock); 231 ddi_soft_state_fini(&bd_state); 232 } 233 return (rv); 234 } 235 236 int 237 _fini(void) 238 { 239 int rv; 240 241 rv = mod_remove(&modlinkage); 242 if (rv == DDI_SUCCESS) { 243 rw_destroy(&bd_lock); 244 ddi_soft_state_fini(&bd_state); 245 } 246 return (rv); 247 } 248 249 int 250 _info(struct modinfo *modinfop) 251 { 252 return (mod_info(&modlinkage, modinfop)); 253 } 254 255 static int 256 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 257 { 258 bd_t *bd; 259 minor_t inst; 260 261 _NOTE(ARGUNUSED(dip)); 262 263 inst = BDINST((dev_t)arg); 264 265 switch (cmd) { 266 case DDI_INFO_DEVT2DEVINFO: 267 bd = ddi_get_soft_state(bd_state, inst); 268 if (bd == NULL) { 269 return (DDI_FAILURE); 270 } 271 *resultp = (void *)bd->d_dip; 272 break; 273 274 case DDI_INFO_DEVT2INSTANCE: 275 *resultp = (void *)(intptr_t)inst; 276 break; 277 278 default: 279 return (DDI_FAILURE); 280 } 281 return (DDI_SUCCESS); 282 } 283 284 static int 285 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 286 { 287 int inst; 288 bd_handle_t hdl; 289 bd_t *bd; 290 bd_drive_t drive; 291 int rv; 292 char name[16]; 293 char kcache[32]; 294 295 switch (cmd) { 296 case DDI_ATTACH: 297 break; 298 case DDI_RESUME: 299 /* We don't do anything native for suspend/resume */ 300 return (DDI_SUCCESS); 301 default: 302 return (DDI_FAILURE); 303 } 304 305 inst = ddi_get_instance(dip); 306 hdl = ddi_get_parent_data(dip); 307 308 (void) snprintf(name, sizeof (name), "%s%d", 309 ddi_driver_name(dip), ddi_get_instance(dip)); 310 (void) snprintf(kcache, sizeof (kcache), "%s_xfer", name); 311 312 if (hdl == NULL) { 313 cmn_err(CE_WARN, "%s: missing parent data!", name); 314 return (DDI_FAILURE); 315 } 316 317 if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) { 318 cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name); 319 return (DDI_FAILURE); 320 } 321 bd = ddi_get_soft_state(bd_state, inst); 322 323 if (hdl->h_dma) { 324 bd->d_dma = *(hdl->h_dma); 325 bd->d_dma.dma_attr_granular = 326 max(DEV_BSIZE, bd->d_dma.dma_attr_granular); 327 bd->d_use_dma = B_TRUE; 328 329 if (bd->d_maxxfer && 330 (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) { 331 cmn_err(CE_WARN, 332 "%s: inconsistent maximum transfer size!", 333 name); 334 /* We force it */ 335 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 336 } else { 337 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 338 } 339 } else { 340 bd->d_use_dma = B_FALSE; 341 if (bd->d_maxxfer == 0) { 342 bd->d_maxxfer = 1024 * 1024; 343 } 344 } 345 bd->d_ops = hdl->h_ops; 346 bd->d_private = hdl->h_private; 347 bd->d_blkshift = 9; /* 512 bytes, to start */ 348 349 if (bd->d_maxxfer % DEV_BSIZE) { 350 cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name); 351 bd->d_maxxfer &= ~(DEV_BSIZE - 1); 352 } 353 if (bd->d_maxxfer < DEV_BSIZE) { 354 cmn_err(CE_WARN, "%s: maximum transfer size too small!", name); 355 ddi_soft_state_free(bd_state, inst); 356 return (DDI_FAILURE); 357 } 358 359 bd->d_dip = dip; 360 bd->d_handle = hdl; 361 hdl->h_bd = bd; 362 ddi_set_driver_private(dip, bd); 363 364 mutex_init(&bd->d_iomutex, NULL, MUTEX_DRIVER, NULL); 365 mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL); 366 mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL); 367 cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL); 368 369 list_create(&bd->d_waitq, sizeof (bd_xfer_impl_t), 370 offsetof(struct bd_xfer_impl, i_linkage)); 371 list_create(&bd->d_runq, sizeof (bd_xfer_impl_t), 372 offsetof(struct bd_xfer_impl, i_linkage)); 373 374 bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8, 375 bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0); 376 377 bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk", 378 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT); 379 if (bd->d_ksp != NULL) { 380 bd->d_ksp->ks_lock = &bd->d_iomutex; 381 kstat_install(bd->d_ksp); 382 bd->d_kiop = bd->d_ksp->ks_data; 383 } else { 384 /* 385 * Even if we cannot create the kstat, we create a 386 * scratch kstat. The reason for this is to ensure 387 * that we can update the kstat all of the time, 388 * without adding an extra branch instruction. 389 */ 390 bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP); 391 } 392 393 cmlb_alloc_handle(&bd->d_cmlbh); 394 395 bd->d_state = DKIO_NONE; 396 397 bzero(&drive, sizeof (drive)); 398 bd->d_ops.o_drive_info(bd->d_private, &drive); 399 bd->d_qsize = drive.d_qsize; 400 bd->d_removable = drive.d_removable; 401 bd->d_hotpluggable = drive.d_hotpluggable; 402 403 if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer) 404 bd->d_maxxfer = drive.d_maxxfer; 405 406 407 rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT, 408 bd->d_removable, bd->d_hotpluggable, 409 drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK, 410 CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0); 411 if (rv != 0) { 412 cmlb_free_handle(&bd->d_cmlbh); 413 kmem_cache_destroy(bd->d_cache); 414 mutex_destroy(&bd->d_iomutex); 415 mutex_destroy(&bd->d_ocmutex); 416 mutex_destroy(&bd->d_statemutex); 417 cv_destroy(&bd->d_statecv); 418 list_destroy(&bd->d_waitq); 419 list_destroy(&bd->d_runq); 420 if (bd->d_ksp != NULL) { 421 kstat_delete(bd->d_ksp); 422 bd->d_ksp = NULL; 423 } else { 424 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 425 } 426 ddi_soft_state_free(bd_state, inst); 427 return (DDI_FAILURE); 428 } 429 430 if (bd->d_ops.o_devid_init != NULL) { 431 rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid); 432 if (rv == DDI_SUCCESS) { 433 if (ddi_devid_register(dip, bd->d_devid) != 434 DDI_SUCCESS) { 435 cmn_err(CE_WARN, 436 "%s: unable to register devid", name); 437 } 438 } 439 } 440 441 /* 442 * Add a zero-length attribute to tell the world we support 443 * kernel ioctls (for layered drivers). Also set up properties 444 * used by HAL to identify removable media. 445 */ 446 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 447 DDI_KERNEL_IOCTL, NULL, 0); 448 if (bd->d_removable) { 449 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 450 "removable-media", NULL, 0); 451 } 452 if (bd->d_hotpluggable) { 453 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 454 "hotpluggable", NULL, 0); 455 } 456 457 ddi_report_dev(dip); 458 459 return (DDI_SUCCESS); 460 } 461 462 static int 463 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 464 { 465 bd_t *bd; 466 467 bd = ddi_get_driver_private(dip); 468 469 switch (cmd) { 470 case DDI_DETACH: 471 break; 472 case DDI_SUSPEND: 473 /* We don't suspend, but our parent does */ 474 return (DDI_SUCCESS); 475 default: 476 return (DDI_FAILURE); 477 } 478 if (bd->d_ksp != NULL) { 479 kstat_delete(bd->d_ksp); 480 bd->d_ksp = NULL; 481 } else { 482 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 483 } 484 cmlb_detach(bd->d_cmlbh, 0); 485 cmlb_free_handle(&bd->d_cmlbh); 486 if (bd->d_devid) 487 ddi_devid_free(bd->d_devid); 488 kmem_cache_destroy(bd->d_cache); 489 mutex_destroy(&bd->d_iomutex); 490 mutex_destroy(&bd->d_ocmutex); 491 mutex_destroy(&bd->d_statemutex); 492 cv_destroy(&bd->d_statecv); 493 list_destroy(&bd->d_waitq); 494 list_destroy(&bd->d_runq); 495 ddi_soft_state_free(bd_state, ddi_get_instance(dip)); 496 return (DDI_SUCCESS); 497 } 498 499 static int 500 bd_xfer_ctor(void *buf, void *arg, int kmflag) 501 { 502 bd_xfer_impl_t *xi; 503 bd_t *bd = arg; 504 int (*dcb)(caddr_t); 505 506 if (kmflag == KM_PUSHPAGE || kmflag == KM_SLEEP) { 507 dcb = DDI_DMA_SLEEP; 508 } else { 509 dcb = DDI_DMA_DONTWAIT; 510 } 511 512 xi = buf; 513 bzero(xi, sizeof (*xi)); 514 xi->i_bd = bd; 515 516 if (bd->d_use_dma) { 517 if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL, 518 &xi->i_dmah) != DDI_SUCCESS) { 519 return (-1); 520 } 521 } 522 523 return (0); 524 } 525 526 static void 527 bd_xfer_dtor(void *buf, void *arg) 528 { 529 bd_xfer_impl_t *xi = buf; 530 531 _NOTE(ARGUNUSED(arg)); 532 533 if (xi->i_dmah) 534 ddi_dma_free_handle(&xi->i_dmah); 535 xi->i_dmah = NULL; 536 } 537 538 static bd_xfer_impl_t * 539 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *), 540 int kmflag) 541 { 542 bd_xfer_impl_t *xi; 543 int rv; 544 int status; 545 unsigned dir; 546 int (*cb)(caddr_t); 547 size_t len; 548 uint32_t shift; 549 550 if (kmflag == KM_SLEEP) { 551 cb = DDI_DMA_SLEEP; 552 } else { 553 cb = DDI_DMA_DONTWAIT; 554 } 555 556 xi = kmem_cache_alloc(bd->d_cache, kmflag); 557 if (xi == NULL) { 558 bioerror(bp, ENOMEM); 559 return (NULL); 560 } 561 562 ASSERT(bp); 563 564 xi->i_bp = bp; 565 xi->i_func = func; 566 xi->i_blkno = bp->b_lblkno; 567 568 if (bp->b_bcount == 0) { 569 xi->i_len = 0; 570 xi->i_nblks = 0; 571 xi->i_kaddr = NULL; 572 xi->i_resid = 0; 573 xi->i_num_win = 0; 574 goto done; 575 } 576 577 if (bp->b_flags & B_READ) { 578 dir = DDI_DMA_READ; 579 xi->i_func = bd->d_ops.o_read; 580 } else { 581 dir = DDI_DMA_WRITE; 582 xi->i_func = bd->d_ops.o_write; 583 } 584 585 shift = bd->d_blkshift; 586 xi->i_blkshift = shift; 587 588 if (!bd->d_use_dma) { 589 bp_mapin(bp); 590 rv = 0; 591 xi->i_offset = 0; 592 xi->i_num_win = 593 (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer; 594 xi->i_cur_win = 0; 595 xi->i_len = min(bp->b_bcount, bd->d_maxxfer); 596 xi->i_nblks = xi->i_len >> shift; 597 xi->i_kaddr = bp->b_un.b_addr; 598 xi->i_resid = bp->b_bcount; 599 } else { 600 601 /* 602 * We have to use consistent DMA if the address is misaligned. 603 */ 604 if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) && 605 ((uintptr_t)bp->b_un.b_addr & 0x7)) { 606 dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL; 607 } else { 608 dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 609 } 610 611 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb, 612 NULL, &xi->i_dmac, &xi->i_ndmac); 613 switch (status) { 614 case DDI_DMA_MAPPED: 615 xi->i_num_win = 1; 616 xi->i_cur_win = 0; 617 xi->i_offset = 0; 618 xi->i_len = bp->b_bcount; 619 xi->i_nblks = xi->i_len >> shift; 620 xi->i_resid = bp->b_bcount; 621 rv = 0; 622 break; 623 case DDI_DMA_PARTIAL_MAP: 624 xi->i_cur_win = 0; 625 626 if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) != 627 DDI_SUCCESS) || 628 (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset, 629 &len, &xi->i_dmac, &xi->i_ndmac) != 630 DDI_SUCCESS) || 631 (P2PHASE(len, shift) != 0)) { 632 (void) ddi_dma_unbind_handle(xi->i_dmah); 633 rv = EFAULT; 634 goto done; 635 } 636 xi->i_len = len; 637 xi->i_nblks = xi->i_len >> shift; 638 xi->i_resid = bp->b_bcount; 639 rv = 0; 640 break; 641 case DDI_DMA_NORESOURCES: 642 rv = EAGAIN; 643 goto done; 644 case DDI_DMA_TOOBIG: 645 rv = EINVAL; 646 goto done; 647 case DDI_DMA_NOMAPPING: 648 case DDI_DMA_INUSE: 649 default: 650 rv = EFAULT; 651 goto done; 652 } 653 } 654 655 done: 656 if (rv != 0) { 657 kmem_cache_free(bd->d_cache, xi); 658 bioerror(bp, rv); 659 return (NULL); 660 } 661 662 return (xi); 663 } 664 665 static void 666 bd_xfer_free(bd_xfer_impl_t *xi) 667 { 668 if (xi->i_dmah) { 669 (void) ddi_dma_unbind_handle(xi->i_dmah); 670 } 671 kmem_cache_free(xi->i_bd->d_cache, xi); 672 } 673 674 static int 675 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp) 676 { 677 dev_t dev = *devp; 678 bd_t *bd; 679 minor_t part; 680 minor_t inst; 681 uint64_t mask; 682 boolean_t ndelay; 683 int rv; 684 diskaddr_t nblks; 685 diskaddr_t lba; 686 687 _NOTE(ARGUNUSED(credp)); 688 689 part = BDPART(dev); 690 inst = BDINST(dev); 691 692 if (otyp >= OTYPCNT) 693 return (EINVAL); 694 695 ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE; 696 697 /* 698 * Block any DR events from changing the set of registered 699 * devices while we function. 700 */ 701 rw_enter(&bd_lock, RW_READER); 702 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 703 rw_exit(&bd_lock); 704 return (ENXIO); 705 } 706 707 mutex_enter(&bd->d_ocmutex); 708 709 ASSERT(part < 64); 710 mask = (1U << part); 711 712 bd_update_state(bd); 713 714 if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) { 715 716 /* non-blocking opens are allowed to succeed */ 717 if (!ndelay) { 718 rv = ENXIO; 719 goto done; 720 } 721 } else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba, 722 NULL, NULL, 0) == 0) { 723 724 /* 725 * We read the partinfo, verify valid ranges. If the 726 * partition is invalid, and we aren't blocking or 727 * doing a raw access, then fail. (Non-blocking and 728 * raw accesses can still succeed to allow a disk with 729 * bad partition data to opened by format and fdisk.) 730 */ 731 if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) { 732 rv = ENXIO; 733 goto done; 734 } 735 } else if (!ndelay) { 736 /* 737 * cmlb_partinfo failed -- invalid partition or no 738 * disk label. 739 */ 740 rv = ENXIO; 741 goto done; 742 } 743 744 if ((flag & FWRITE) && bd->d_rdonly) { 745 rv = EROFS; 746 goto done; 747 } 748 749 if ((bd->d_open_excl) & (mask)) { 750 rv = EBUSY; 751 goto done; 752 } 753 if (flag & FEXCL) { 754 if (bd->d_open_lyr[part]) { 755 rv = EBUSY; 756 goto done; 757 } 758 for (int i = 0; i < OTYP_LYR; i++) { 759 if (bd->d_open_reg[i] & mask) { 760 rv = EBUSY; 761 goto done; 762 } 763 } 764 } 765 766 if (otyp == OTYP_LYR) { 767 bd->d_open_lyr[part]++; 768 } else { 769 bd->d_open_reg[otyp] |= mask; 770 } 771 if (flag & FEXCL) { 772 bd->d_open_excl |= mask; 773 } 774 775 rv = 0; 776 done: 777 mutex_exit(&bd->d_ocmutex); 778 rw_exit(&bd_lock); 779 780 return (rv); 781 } 782 783 static int 784 bd_close(dev_t dev, int flag, int otyp, cred_t *credp) 785 { 786 bd_t *bd; 787 minor_t inst; 788 minor_t part; 789 uint64_t mask; 790 boolean_t last = B_TRUE; 791 792 _NOTE(ARGUNUSED(flag)); 793 _NOTE(ARGUNUSED(credp)); 794 795 part = BDPART(dev); 796 inst = BDINST(dev); 797 798 ASSERT(part < 64); 799 mask = (1U << part); 800 801 rw_enter(&bd_lock, RW_READER); 802 803 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 804 rw_exit(&bd_lock); 805 return (ENXIO); 806 } 807 808 mutex_enter(&bd->d_ocmutex); 809 if (bd->d_open_excl & mask) { 810 bd->d_open_excl &= ~mask; 811 } 812 if (otyp == OTYP_LYR) { 813 bd->d_open_lyr[part]--; 814 } else { 815 bd->d_open_reg[otyp] &= ~mask; 816 } 817 for (int i = 0; i < 64; i++) { 818 if (bd->d_open_lyr[part]) { 819 last = B_FALSE; 820 } 821 } 822 for (int i = 0; last && (i < OTYP_LYR); i++) { 823 if (bd->d_open_reg[i]) { 824 last = B_FALSE; 825 } 826 } 827 mutex_exit(&bd->d_ocmutex); 828 829 if (last) { 830 cmlb_invalidate(bd->d_cmlbh, 0); 831 } 832 rw_exit(&bd_lock); 833 834 return (0); 835 } 836 837 static int 838 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk) 839 { 840 minor_t inst; 841 minor_t part; 842 diskaddr_t pstart; 843 diskaddr_t psize; 844 bd_t *bd; 845 bd_xfer_impl_t *xi; 846 buf_t *bp; 847 int rv; 848 849 rw_enter(&bd_lock, RW_READER); 850 851 part = BDPART(dev); 852 inst = BDINST(dev); 853 854 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 855 rw_exit(&bd_lock); 856 return (ENXIO); 857 } 858 /* 859 * do cmlb, but do it synchronously unless we already have the 860 * partition (which we probably should.) 861 */ 862 if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL, 863 (void *)1)) { 864 rw_exit(&bd_lock); 865 return (ENXIO); 866 } 867 868 if ((blkno + nblk) > psize) { 869 rw_exit(&bd_lock); 870 return (EINVAL); 871 } 872 bp = getrbuf(KM_NOSLEEP); 873 if (bp == NULL) { 874 rw_exit(&bd_lock); 875 return (ENOMEM); 876 } 877 878 bp->b_bcount = nblk << bd->d_blkshift; 879 bp->b_resid = bp->b_bcount; 880 bp->b_lblkno = blkno; 881 bp->b_un.b_addr = caddr; 882 883 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_write, KM_NOSLEEP); 884 if (xi == NULL) { 885 rw_exit(&bd_lock); 886 freerbuf(bp); 887 return (ENOMEM); 888 } 889 xi->i_blkno = blkno + pstart; 890 xi->i_flags = BD_XFER_POLL; 891 bd_submit(bd, xi); 892 rw_exit(&bd_lock); 893 894 /* 895 * Generally, we should have run this entirely synchronously 896 * at this point and the biowait call should be a no-op. If 897 * it didn't happen this way, it's a bug in the underlying 898 * driver not honoring BD_XFER_POLL. 899 */ 900 (void) biowait(bp); 901 rv = geterror(bp); 902 freerbuf(bp); 903 return (rv); 904 } 905 906 void 907 bd_minphys(struct buf *bp) 908 { 909 minor_t inst; 910 bd_t *bd; 911 inst = BDINST(bp->b_edev); 912 913 bd = ddi_get_soft_state(bd_state, inst); 914 915 /* 916 * In a non-debug kernel, bd_strategy will catch !bd as 917 * well, and will fail nicely. 918 */ 919 ASSERT(bd); 920 921 if (bp->b_bcount > bd->d_maxxfer) 922 bp->b_bcount = bd->d_maxxfer; 923 } 924 925 static int 926 bd_read(dev_t dev, struct uio *uio, cred_t *credp) 927 { 928 _NOTE(ARGUNUSED(credp)); 929 return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio)); 930 } 931 932 static int 933 bd_write(dev_t dev, struct uio *uio, cred_t *credp) 934 { 935 _NOTE(ARGUNUSED(credp)); 936 return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio)); 937 } 938 939 static int 940 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp) 941 { 942 _NOTE(ARGUNUSED(credp)); 943 return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio)); 944 } 945 946 static int 947 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp) 948 { 949 _NOTE(ARGUNUSED(credp)); 950 return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio)); 951 } 952 953 static int 954 bd_strategy(struct buf *bp) 955 { 956 minor_t inst; 957 minor_t part; 958 bd_t *bd; 959 diskaddr_t p_lba; 960 diskaddr_t p_nblks; 961 diskaddr_t b_nblks; 962 bd_xfer_impl_t *xi; 963 uint32_t shift; 964 int (*func)(void *, bd_xfer_t *); 965 966 part = BDPART(bp->b_edev); 967 inst = BDINST(bp->b_edev); 968 969 ASSERT(bp); 970 971 bp->b_resid = bp->b_bcount; 972 973 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 974 bioerror(bp, ENXIO); 975 biodone(bp); 976 return (0); 977 } 978 979 if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba, 980 NULL, NULL, 0)) { 981 bioerror(bp, ENXIO); 982 biodone(bp); 983 return (0); 984 } 985 986 shift = bd->d_blkshift; 987 988 if ((P2PHASE(bp->b_bcount, (1U << shift)) != 0) || 989 (bp->b_lblkno > p_nblks)) { 990 bioerror(bp, ENXIO); 991 biodone(bp); 992 return (0); 993 } 994 b_nblks = bp->b_bcount >> shift; 995 if ((bp->b_lblkno == p_nblks) || (bp->b_bcount == 0)) { 996 biodone(bp); 997 return (0); 998 } 999 1000 if ((b_nblks + bp->b_lblkno) > p_nblks) { 1001 bp->b_resid = ((bp->b_lblkno + b_nblks - p_nblks) << shift); 1002 bp->b_bcount -= bp->b_resid; 1003 } else { 1004 bp->b_resid = 0; 1005 } 1006 func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write; 1007 1008 xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP); 1009 if (xi == NULL) { 1010 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE); 1011 } 1012 if (xi == NULL) { 1013 /* bd_request_alloc will have done bioerror */ 1014 biodone(bp); 1015 return (0); 1016 } 1017 xi->i_blkno = bp->b_lblkno + p_lba; 1018 1019 bd_submit(bd, xi); 1020 1021 return (0); 1022 } 1023 1024 static int 1025 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp) 1026 { 1027 minor_t inst; 1028 uint16_t part; 1029 bd_t *bd; 1030 void *ptr = (void *)arg; 1031 int rv; 1032 1033 part = BDPART(dev); 1034 inst = BDINST(dev); 1035 1036 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1037 return (ENXIO); 1038 } 1039 1040 rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0); 1041 if (rv != ENOTTY) 1042 return (rv); 1043 1044 switch (cmd) { 1045 case DKIOCGMEDIAINFO: { 1046 struct dk_minfo minfo; 1047 1048 /* make sure our state information is current */ 1049 bd_update_state(bd); 1050 bzero(&minfo, sizeof (minfo)); 1051 minfo.dki_media_type = DK_FIXED_DISK; 1052 minfo.dki_lbsize = (1U << bd->d_blkshift); 1053 minfo.dki_capacity = bd->d_numblks; 1054 if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) { 1055 return (EFAULT); 1056 } 1057 return (0); 1058 } 1059 case DKIOCINFO: { 1060 struct dk_cinfo cinfo; 1061 bzero(&cinfo, sizeof (cinfo)); 1062 cinfo.dki_ctype = DKC_BLKDEV; 1063 cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip)); 1064 (void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname), 1065 "%s", ddi_driver_name(ddi_get_parent(bd->d_dip))); 1066 (void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname), 1067 "%s", ddi_driver_name(bd->d_dip)); 1068 cinfo.dki_unit = inst; 1069 cinfo.dki_flags = DKI_FMTVOL; 1070 cinfo.dki_partition = part; 1071 cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE; 1072 cinfo.dki_addr = 0; 1073 cinfo.dki_slave = 0; 1074 cinfo.dki_space = 0; 1075 cinfo.dki_prio = 0; 1076 cinfo.dki_vec = 0; 1077 if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) { 1078 return (EFAULT); 1079 } 1080 return (0); 1081 } 1082 case DKIOCREMOVABLE: { 1083 int i; 1084 i = bd->d_removable ? 1 : 0; 1085 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1086 return (EFAULT); 1087 } 1088 return (0); 1089 } 1090 case DKIOCHOTPLUGGABLE: { 1091 int i; 1092 i = bd->d_hotpluggable ? 1 : 0; 1093 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1094 return (EFAULT); 1095 } 1096 return (0); 1097 } 1098 case DKIOCREADONLY: { 1099 int i; 1100 i = bd->d_rdonly ? 1 : 0; 1101 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1102 return (EFAULT); 1103 } 1104 return (0); 1105 } 1106 case DKIOCSTATE: { 1107 enum dkio_state state; 1108 if (ddi_copyin(ptr, &state, sizeof (state), flag)) { 1109 return (EFAULT); 1110 } 1111 if ((rv = bd_check_state(bd, &state)) != 0) { 1112 return (rv); 1113 } 1114 if (ddi_copyout(&state, ptr, sizeof (state), flag)) { 1115 return (EFAULT); 1116 } 1117 return (0); 1118 } 1119 case DKIOCFLUSHWRITECACHE: { 1120 struct dk_callback *dkc = NULL; 1121 1122 if (flag & FKIOCTL) 1123 dkc = (void *)arg; 1124 1125 rv = bd_flush_write_cache(bd, dkc); 1126 return (rv); 1127 } 1128 1129 default: 1130 break; 1131 1132 } 1133 return (ENOTTY); 1134 } 1135 1136 static int 1137 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 1138 char *name, caddr_t valuep, int *lengthp) 1139 { 1140 bd_t *bd; 1141 1142 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1143 if (bd == NULL) 1144 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 1145 name, valuep, lengthp)); 1146 1147 return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name, 1148 valuep, lengthp, BDPART(dev), 0)); 1149 } 1150 1151 1152 static int 1153 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start, 1154 size_t length, void *tg_cookie) 1155 { 1156 bd_t *bd; 1157 buf_t *bp; 1158 bd_xfer_impl_t *xi; 1159 int rv; 1160 int (*func)(void *, bd_xfer_t *); 1161 int kmflag; 1162 1163 /* 1164 * If we are running in polled mode (such as during dump(9e) 1165 * execution), then we cannot sleep for kernel allocations. 1166 */ 1167 kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP; 1168 1169 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1170 1171 if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) { 1172 /* We can only transfer whole blocks at a time! */ 1173 return (EINVAL); 1174 } 1175 1176 if ((bp = getrbuf(kmflag)) == NULL) { 1177 return (ENOMEM); 1178 } 1179 1180 switch (cmd) { 1181 case TG_READ: 1182 bp->b_flags = B_READ; 1183 func = bd->d_ops.o_read; 1184 break; 1185 case TG_WRITE: 1186 bp->b_flags = B_WRITE; 1187 func = bd->d_ops.o_write; 1188 break; 1189 default: 1190 freerbuf(bp); 1191 return (EINVAL); 1192 } 1193 1194 bp->b_un.b_addr = bufaddr; 1195 bp->b_bcount = length; 1196 xi = bd_xfer_alloc(bd, bp, func, kmflag); 1197 if (xi == NULL) { 1198 rv = geterror(bp); 1199 freerbuf(bp); 1200 return (rv); 1201 } 1202 xi->i_flags = tg_cookie ? BD_XFER_POLL : 0; 1203 xi->i_blkno = start; 1204 bd_submit(bd, xi); 1205 (void) biowait(bp); 1206 rv = geterror(bp); 1207 freerbuf(bp); 1208 1209 return (rv); 1210 } 1211 1212 static int 1213 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 1214 { 1215 bd_t *bd; 1216 1217 _NOTE(ARGUNUSED(tg_cookie)); 1218 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1219 1220 switch (cmd) { 1221 case TG_GETPHYGEOM: 1222 case TG_GETVIRTGEOM: 1223 /* 1224 * We don't have any "geometry" as such, let cmlb 1225 * fabricate something. 1226 */ 1227 return (ENOTTY); 1228 1229 case TG_GETCAPACITY: 1230 bd_update_state(bd); 1231 *(diskaddr_t *)arg = bd->d_numblks; 1232 return (0); 1233 1234 case TG_GETBLOCKSIZE: 1235 *(uint32_t *)arg = (1U << bd->d_blkshift); 1236 return (0); 1237 1238 case TG_GETATTR: 1239 /* 1240 * It turns out that cmlb really doesn't do much for 1241 * non-writable media, but lets make the information 1242 * available for it in case it does more in the 1243 * future. (The value is currently used for 1244 * triggering special behavior for CD-ROMs.) 1245 */ 1246 bd_update_state(bd); 1247 ((tg_attribute_t *)arg)->media_is_writable = 1248 bd->d_rdonly ? B_FALSE : B_TRUE; 1249 return (0); 1250 1251 default: 1252 return (EINVAL); 1253 } 1254 } 1255 1256 1257 static void 1258 bd_sched(bd_t *bd) 1259 { 1260 bd_xfer_impl_t *xi; 1261 struct buf *bp; 1262 int rv; 1263 1264 mutex_enter(&bd->d_iomutex); 1265 1266 while ((bd->d_qactive < bd->d_qsize) && 1267 ((xi = list_remove_head(&bd->d_waitq)) != NULL)) { 1268 bd->d_qactive++; 1269 kstat_waitq_to_runq(bd->d_kiop); 1270 list_insert_tail(&bd->d_runq, xi); 1271 1272 /* 1273 * Submit the job to the driver. We drop the I/O mutex 1274 * so that we can deal with the case where the driver 1275 * completion routine calls back into us synchronously. 1276 */ 1277 1278 mutex_exit(&bd->d_iomutex); 1279 1280 rv = xi->i_func(bd->d_private, &xi->i_public); 1281 if (rv != 0) { 1282 bp = xi->i_bp; 1283 bd_xfer_free(xi); 1284 bioerror(bp, rv); 1285 biodone(bp); 1286 1287 mutex_enter(&bd->d_iomutex); 1288 bd->d_qactive--; 1289 kstat_runq_exit(bd->d_kiop); 1290 list_remove(&bd->d_runq, xi); 1291 } else { 1292 mutex_enter(&bd->d_iomutex); 1293 } 1294 } 1295 1296 mutex_exit(&bd->d_iomutex); 1297 } 1298 1299 static void 1300 bd_submit(bd_t *bd, bd_xfer_impl_t *xi) 1301 { 1302 mutex_enter(&bd->d_iomutex); 1303 list_insert_tail(&bd->d_waitq, xi); 1304 kstat_waitq_enter(bd->d_kiop); 1305 mutex_exit(&bd->d_iomutex); 1306 1307 bd_sched(bd); 1308 } 1309 1310 static void 1311 bd_runq_exit(bd_xfer_impl_t *xi, int err) 1312 { 1313 bd_t *bd = xi->i_bd; 1314 buf_t *bp = xi->i_bp; 1315 1316 mutex_enter(&bd->d_iomutex); 1317 bd->d_qactive--; 1318 kstat_runq_exit(bd->d_kiop); 1319 list_remove(&bd->d_runq, xi); 1320 mutex_exit(&bd->d_iomutex); 1321 1322 if (err == 0) { 1323 if (bp->b_flags & B_READ) { 1324 bd->d_kiop->reads++; 1325 bd->d_kiop->nread += (bp->b_bcount - xi->i_resid); 1326 } else { 1327 bd->d_kiop->writes++; 1328 bd->d_kiop->nwritten += (bp->b_bcount - xi->i_resid); 1329 } 1330 } 1331 bd_sched(bd); 1332 } 1333 1334 static void 1335 bd_update_state(bd_t *bd) 1336 { 1337 enum dkio_state state; 1338 bd_media_t media; 1339 boolean_t docmlb = B_FALSE; 1340 1341 bzero(&media, sizeof (media)); 1342 1343 mutex_enter(&bd->d_statemutex); 1344 if (bd->d_ops.o_media_info(bd->d_private, &media) == 0) { 1345 if ((1U << bd->d_blkshift) != media.m_blksize) { 1346 if ((media.m_blksize < 512) || 1347 (!ISP2(media.m_blksize)) || 1348 (P2PHASE(bd->d_maxxfer, media.m_blksize))) { 1349 cmn_err(CE_WARN, 1350 "%s%d: Invalid media block size (%d)", 1351 ddi_driver_name(bd->d_dip), 1352 ddi_get_instance(bd->d_dip), 1353 media.m_blksize); 1354 /* 1355 * We can't use the media, treat it as 1356 * not present. 1357 */ 1358 state = DKIO_EJECTED; 1359 bd->d_numblks = 0; 1360 } else { 1361 bd->d_blkshift = ddi_ffs(media.m_blksize) - 1; 1362 bd->d_numblks = media.m_nblks; 1363 bd->d_rdonly = media.m_readonly; 1364 state = DKIO_INSERTED; 1365 } 1366 1367 /* Device size changed */ 1368 docmlb = B_TRUE; 1369 1370 } else { 1371 if (bd->d_numblks != media.m_nblks) { 1372 /* Device size changed */ 1373 docmlb = B_TRUE; 1374 } 1375 bd->d_numblks = media.m_nblks; 1376 bd->d_rdonly = media.m_readonly; 1377 state = DKIO_INSERTED; 1378 } 1379 1380 } else { 1381 bd->d_numblks = 0; 1382 state = DKIO_EJECTED; 1383 } 1384 if (state != bd->d_state) { 1385 bd->d_state = state; 1386 cv_broadcast(&bd->d_statecv); 1387 docmlb = B_TRUE; 1388 } 1389 mutex_exit(&bd->d_statemutex); 1390 1391 if (docmlb) { 1392 if (state == DKIO_INSERTED) { 1393 (void) cmlb_validate(bd->d_cmlbh, 0, 0); 1394 } else { 1395 cmlb_invalidate(bd->d_cmlbh, 0); 1396 } 1397 } 1398 } 1399 1400 static int 1401 bd_check_state(bd_t *bd, enum dkio_state *state) 1402 { 1403 clock_t when; 1404 1405 for (;;) { 1406 1407 bd_update_state(bd); 1408 1409 mutex_enter(&bd->d_statemutex); 1410 1411 if (bd->d_state != *state) { 1412 *state = bd->d_state; 1413 mutex_exit(&bd->d_statemutex); 1414 break; 1415 } 1416 1417 when = drv_usectohz(1000000); 1418 if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex, 1419 when, TR_CLOCK_TICK) == 0) { 1420 mutex_exit(&bd->d_statemutex); 1421 return (EINTR); 1422 } 1423 1424 mutex_exit(&bd->d_statemutex); 1425 } 1426 1427 return (0); 1428 } 1429 1430 static int 1431 bd_flush_write_cache_done(struct buf *bp) 1432 { 1433 struct dk_callback *dc = (void *)bp->b_private; 1434 1435 (*dc->dkc_callback)(dc->dkc_cookie, geterror(bp)); 1436 kmem_free(dc, sizeof (*dc)); 1437 freerbuf(bp); 1438 return (0); 1439 } 1440 1441 static int 1442 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc) 1443 { 1444 buf_t *bp; 1445 struct dk_callback *dc; 1446 bd_xfer_impl_t *xi; 1447 int rv; 1448 1449 if (bd->d_ops.o_sync_cache == NULL) { 1450 return (ENOTSUP); 1451 } 1452 if ((bp = getrbuf(KM_SLEEP)) == NULL) { 1453 return (ENOMEM); 1454 } 1455 bp->b_resid = 0; 1456 bp->b_bcount = 0; 1457 1458 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP); 1459 if (xi == NULL) { 1460 rv = geterror(bp); 1461 freerbuf(bp); 1462 return (rv); 1463 } 1464 1465 /* Make an asynchronous flush, but only if there is a callback */ 1466 if (dkc != NULL && dkc->dkc_callback != NULL) { 1467 /* Make a private copy of the callback structure */ 1468 dc = kmem_alloc(sizeof (*dc), KM_SLEEP); 1469 *dc = *dkc; 1470 bp->b_private = dc; 1471 bp->b_iodone = bd_flush_write_cache_done; 1472 1473 bd_submit(bd, xi); 1474 return (0); 1475 } 1476 1477 /* In case there is no callback, perform a synchronous flush */ 1478 bd_submit(bd, xi); 1479 (void) biowait(bp); 1480 rv = geterror(bp); 1481 freerbuf(bp); 1482 1483 return (rv); 1484 } 1485 1486 /* 1487 * Nexus support. 1488 */ 1489 int 1490 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop, 1491 void *arg, void *result) 1492 { 1493 bd_handle_t hdl; 1494 1495 switch (ctlop) { 1496 case DDI_CTLOPS_REPORTDEV: 1497 cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n", 1498 ddi_node_name(rdip), ddi_get_name_addr(rdip), 1499 ddi_driver_name(rdip), ddi_get_instance(rdip)); 1500 return (DDI_SUCCESS); 1501 1502 case DDI_CTLOPS_INITCHILD: 1503 hdl = ddi_get_parent_data((dev_info_t *)arg); 1504 if (hdl == NULL) { 1505 return (DDI_NOT_WELL_FORMED); 1506 } 1507 ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr); 1508 return (DDI_SUCCESS); 1509 1510 case DDI_CTLOPS_UNINITCHILD: 1511 ddi_set_name_addr((dev_info_t *)arg, NULL); 1512 ndi_prop_remove_all((dev_info_t *)arg); 1513 return (DDI_SUCCESS); 1514 1515 default: 1516 return (ddi_ctlops(dip, rdip, ctlop, arg, result)); 1517 } 1518 } 1519 1520 /* 1521 * Functions for device drivers. 1522 */ 1523 bd_handle_t 1524 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag) 1525 { 1526 bd_handle_t hdl; 1527 1528 hdl = kmem_zalloc(sizeof (*hdl), kmflag); 1529 if (hdl != NULL) { 1530 hdl->h_ops = *ops; 1531 hdl->h_dma = dma; 1532 hdl->h_private = private; 1533 } 1534 1535 return (hdl); 1536 } 1537 1538 void 1539 bd_free_handle(bd_handle_t hdl) 1540 { 1541 kmem_free(hdl, sizeof (*hdl)); 1542 } 1543 1544 int 1545 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl) 1546 { 1547 dev_info_t *child; 1548 bd_drive_t drive; 1549 1550 /* if drivers don't override this, make it assume none */ 1551 drive.d_lun = -1; 1552 hdl->h_ops.o_drive_info(hdl->h_private, &drive); 1553 1554 hdl->h_parent = dip; 1555 hdl->h_name = "blkdev"; 1556 1557 if (drive.d_lun >= 0) { 1558 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), "%X,%X", 1559 drive.d_target, drive.d_lun); 1560 } else { 1561 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), "%X", 1562 drive.d_target); 1563 } 1564 if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID, 1565 &child) != NDI_SUCCESS) { 1566 cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s", 1567 ddi_driver_name(dip), ddi_get_instance(dip), 1568 "blkdev", hdl->h_addr); 1569 return (DDI_FAILURE); 1570 } 1571 1572 ddi_set_parent_data(child, hdl); 1573 hdl->h_child = child; 1574 1575 if (ndi_devi_online(child, 0) == NDI_FAILURE) { 1576 cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online", 1577 ddi_driver_name(dip), ddi_get_instance(dip), 1578 hdl->h_name, hdl->h_addr); 1579 (void) ndi_devi_free(child); 1580 return (DDI_FAILURE); 1581 } 1582 1583 return (DDI_SUCCESS); 1584 } 1585 1586 int 1587 bd_detach_handle(bd_handle_t hdl) 1588 { 1589 int circ; 1590 int rv; 1591 char *devnm; 1592 1593 if (hdl->h_child == NULL) { 1594 return (DDI_SUCCESS); 1595 } 1596 ndi_devi_enter(hdl->h_parent, &circ); 1597 if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) { 1598 rv = ddi_remove_child(hdl->h_child, 0); 1599 } else { 1600 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); 1601 (void) ddi_deviname(hdl->h_child, devnm); 1602 (void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE); 1603 rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL, 1604 NDI_DEVI_REMOVE | NDI_UNCONFIG); 1605 kmem_free(devnm, MAXNAMELEN + 1); 1606 } 1607 if (rv == 0) { 1608 hdl->h_child = NULL; 1609 } 1610 1611 ndi_devi_exit(hdl->h_parent, circ); 1612 return (rv = NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE); 1613 } 1614 1615 void 1616 bd_xfer_done(bd_xfer_t *xfer, int err) 1617 { 1618 bd_xfer_impl_t *xi = (void *)xfer; 1619 buf_t *bp = xi->i_bp; 1620 int rv = DDI_SUCCESS; 1621 bd_t *bd = xi->i_bd; 1622 size_t len; 1623 1624 if (err != 0) { 1625 bd_runq_exit(xi, err); 1626 1627 bp->b_resid += xi->i_resid; 1628 bd_xfer_free(xi); 1629 bioerror(bp, err); 1630 biodone(bp); 1631 return; 1632 } 1633 1634 xi->i_cur_win++; 1635 xi->i_resid -= xi->i_len; 1636 1637 if (xi->i_resid == 0) { 1638 /* Job completed succcessfully! */ 1639 bd_runq_exit(xi, 0); 1640 1641 bd_xfer_free(xi); 1642 biodone(bp); 1643 return; 1644 } 1645 1646 xi->i_blkno += xi->i_nblks; 1647 1648 if (bd->d_use_dma) { 1649 /* More transfer still pending... advance to next DMA window. */ 1650 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win, 1651 &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac); 1652 } else { 1653 /* Advance memory window. */ 1654 xi->i_kaddr += xi->i_len; 1655 xi->i_offset += xi->i_len; 1656 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer); 1657 } 1658 1659 1660 if ((rv != DDI_SUCCESS) || 1661 (P2PHASE(len, (1U << xi->i_blkshift) != 0))) { 1662 bd_runq_exit(xi, EFAULT); 1663 1664 bp->b_resid += xi->i_resid; 1665 bd_xfer_free(xi); 1666 bioerror(bp, EFAULT); 1667 biodone(bp); 1668 return; 1669 } 1670 xi->i_len = len; 1671 xi->i_nblks = len >> xi->i_blkshift; 1672 1673 /* Submit next window to hardware. */ 1674 rv = xi->i_func(bd->d_private, &xi->i_public); 1675 if (rv != 0) { 1676 bd_runq_exit(xi, rv); 1677 1678 bp->b_resid += xi->i_resid; 1679 bd_xfer_free(xi); 1680 bioerror(bp, rv); 1681 biodone(bp); 1682 } 1683 } 1684 1685 void 1686 bd_state_change(bd_handle_t hdl) 1687 { 1688 bd_t *bd; 1689 1690 if ((bd = hdl->h_bd) != NULL) { 1691 bd_update_state(bd); 1692 } 1693 } 1694 1695 void 1696 bd_mod_init(struct dev_ops *devops) 1697 { 1698 static struct bus_ops bd_bus_ops = { 1699 BUSO_REV, /* busops_rev */ 1700 nullbusmap, /* bus_map */ 1701 NULL, /* bus_get_intrspec (OBSOLETE) */ 1702 NULL, /* bus_add_intrspec (OBSOLETE) */ 1703 NULL, /* bus_remove_intrspec (OBSOLETE) */ 1704 i_ddi_map_fault, /* bus_map_fault */ 1705 NULL, /* bus_dma_map (OBSOLETE) */ 1706 ddi_dma_allochdl, /* bus_dma_allochdl */ 1707 ddi_dma_freehdl, /* bus_dma_freehdl */ 1708 ddi_dma_bindhdl, /* bus_dma_bindhdl */ 1709 ddi_dma_unbindhdl, /* bus_dma_unbindhdl */ 1710 ddi_dma_flush, /* bus_dma_flush */ 1711 ddi_dma_win, /* bus_dma_win */ 1712 ddi_dma_mctl, /* bus_dma_ctl */ 1713 bd_bus_ctl, /* bus_ctl */ 1714 ddi_bus_prop_op, /* bus_prop_op */ 1715 NULL, /* bus_get_eventcookie */ 1716 NULL, /* bus_add_eventcall */ 1717 NULL, /* bus_remove_eventcall */ 1718 NULL, /* bus_post_event */ 1719 NULL, /* bus_intr_ctl (OBSOLETE) */ 1720 NULL, /* bus_config */ 1721 NULL, /* bus_unconfig */ 1722 NULL, /* bus_fm_init */ 1723 NULL, /* bus_fm_fini */ 1724 NULL, /* bus_fm_access_enter */ 1725 NULL, /* bus_fm_access_exit */ 1726 NULL, /* bus_power */ 1727 NULL, /* bus_intr_op */ 1728 }; 1729 1730 devops->devo_bus_ops = &bd_bus_ops; 1731 1732 /* 1733 * NB: The device driver is free to supply its own 1734 * character entry device support. 1735 */ 1736 } 1737 1738 void 1739 bd_mod_fini(struct dev_ops *devops) 1740 { 1741 devops->devo_bus_ops = NULL; 1742 } 1743