1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved. 24 * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved. 25 * Copyright 2013 Nexenta Systems, Inc. All rights reserved. 26 */ 27 28 #include <sys/types.h> 29 #include <sys/ksynch.h> 30 #include <sys/kmem.h> 31 #include <sys/file.h> 32 #include <sys/errno.h> 33 #include <sys/open.h> 34 #include <sys/buf.h> 35 #include <sys/uio.h> 36 #include <sys/aio_req.h> 37 #include <sys/cred.h> 38 #include <sys/modctl.h> 39 #include <sys/cmlb.h> 40 #include <sys/conf.h> 41 #include <sys/devops.h> 42 #include <sys/list.h> 43 #include <sys/sysmacros.h> 44 #include <sys/dkio.h> 45 #include <sys/vtoc.h> 46 #include <sys/scsi/scsi.h> /* for DTYPE_DIRECT */ 47 #include <sys/kstat.h> 48 #include <sys/fs/dv_node.h> 49 #include <sys/ddi.h> 50 #include <sys/sunddi.h> 51 #include <sys/note.h> 52 #include <sys/blkdev.h> 53 54 #define BD_MAXPART 64 55 #define BDINST(dev) (getminor(dev) / BD_MAXPART) 56 #define BDPART(dev) (getminor(dev) % BD_MAXPART) 57 58 typedef struct bd bd_t; 59 typedef struct bd_xfer_impl bd_xfer_impl_t; 60 61 struct bd { 62 void *d_private; 63 dev_info_t *d_dip; 64 kmutex_t d_ocmutex; 65 kmutex_t d_iomutex; 66 kmutex_t d_statemutex; 67 kcondvar_t d_statecv; 68 enum dkio_state d_state; 69 cmlb_handle_t d_cmlbh; 70 unsigned d_open_lyr[BD_MAXPART]; /* open count */ 71 uint64_t d_open_excl; /* bit mask indexed by partition */ 72 uint64_t d_open_reg[OTYPCNT]; /* bit mask */ 73 74 uint32_t d_qsize; 75 uint32_t d_qactive; 76 uint32_t d_maxxfer; 77 uint32_t d_blkshift; 78 uint64_t d_numblks; 79 ddi_devid_t d_devid; 80 81 kmem_cache_t *d_cache; 82 list_t d_runq; 83 list_t d_waitq; 84 kstat_t *d_ksp; 85 kstat_io_t *d_kiop; 86 87 boolean_t d_rdonly; 88 boolean_t d_removable; 89 boolean_t d_hotpluggable; 90 boolean_t d_use_dma; 91 92 ddi_dma_attr_t d_dma; 93 bd_ops_t d_ops; 94 bd_handle_t d_handle; 95 }; 96 97 struct bd_handle { 98 bd_ops_t h_ops; 99 ddi_dma_attr_t *h_dma; 100 dev_info_t *h_parent; 101 dev_info_t *h_child; 102 void *h_private; 103 bd_t *h_bd; 104 char *h_name; 105 char h_addr[20]; /* enough for %X,%X */ 106 }; 107 108 struct bd_xfer_impl { 109 bd_xfer_t i_public; 110 list_node_t i_linkage; 111 bd_t *i_bd; 112 buf_t *i_bp; 113 uint_t i_num_win; 114 uint_t i_cur_win; 115 off_t i_offset; 116 int (*i_func)(void *, bd_xfer_t *); 117 uint32_t i_blkshift; 118 size_t i_len; 119 size_t i_resid; 120 }; 121 122 #define i_dmah i_public.x_dmah 123 #define i_dmac i_public.x_dmac 124 #define i_ndmac i_public.x_ndmac 125 #define i_kaddr i_public.x_kaddr 126 #define i_nblks i_public.x_nblks 127 #define i_blkno i_public.x_blkno 128 #define i_flags i_public.x_flags 129 130 131 /* 132 * Private prototypes. 133 */ 134 135 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 136 static int bd_attach(dev_info_t *, ddi_attach_cmd_t); 137 static int bd_detach(dev_info_t *, ddi_detach_cmd_t); 138 139 static int bd_open(dev_t *, int, int, cred_t *); 140 static int bd_close(dev_t, int, int, cred_t *); 141 static int bd_strategy(struct buf *); 142 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 143 static int bd_dump(dev_t, caddr_t, daddr_t, int); 144 static int bd_read(dev_t, struct uio *, cred_t *); 145 static int bd_write(dev_t, struct uio *, cred_t *); 146 static int bd_aread(dev_t, struct aio_req *, cred_t *); 147 static int bd_awrite(dev_t, struct aio_req *, cred_t *); 148 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, 149 caddr_t, int *); 150 151 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, 152 void *); 153 static int bd_tg_getinfo(dev_info_t *, int, void *, void *); 154 static int bd_xfer_ctor(void *, void *, int); 155 static void bd_xfer_dtor(void *, void *); 156 static void bd_sched(bd_t *); 157 static void bd_submit(bd_t *, bd_xfer_impl_t *); 158 static void bd_runq_exit(bd_xfer_impl_t *, int); 159 static void bd_update_state(bd_t *); 160 static int bd_check_state(bd_t *, enum dkio_state *); 161 static int bd_flush_write_cache(bd_t *, struct dk_callback *); 162 163 struct cmlb_tg_ops bd_tg_ops = { 164 TG_DK_OPS_VERSION_1, 165 bd_tg_rdwr, 166 bd_tg_getinfo, 167 }; 168 169 static struct cb_ops bd_cb_ops = { 170 bd_open, /* open */ 171 bd_close, /* close */ 172 bd_strategy, /* strategy */ 173 nodev, /* print */ 174 bd_dump, /* dump */ 175 bd_read, /* read */ 176 bd_write, /* write */ 177 bd_ioctl, /* ioctl */ 178 nodev, /* devmap */ 179 nodev, /* mmap */ 180 nodev, /* segmap */ 181 nochpoll, /* poll */ 182 bd_prop_op, /* cb_prop_op */ 183 0, /* streamtab */ 184 D_64BIT | D_MP, /* Driver comaptibility flag */ 185 CB_REV, /* cb_rev */ 186 bd_aread, /* async read */ 187 bd_awrite /* async write */ 188 }; 189 190 struct dev_ops bd_dev_ops = { 191 DEVO_REV, /* devo_rev, */ 192 0, /* refcnt */ 193 bd_getinfo, /* getinfo */ 194 nulldev, /* identify */ 195 nulldev, /* probe */ 196 bd_attach, /* attach */ 197 bd_detach, /* detach */ 198 nodev, /* reset */ 199 &bd_cb_ops, /* driver operations */ 200 NULL, /* bus operations */ 201 NULL, /* power */ 202 ddi_quiesce_not_needed, /* quiesce */ 203 }; 204 205 static struct modldrv modldrv = { 206 &mod_driverops, 207 "Generic Block Device", 208 &bd_dev_ops, 209 }; 210 211 static struct modlinkage modlinkage = { 212 MODREV_1, { &modldrv, NULL } 213 }; 214 215 static void *bd_state; 216 static krwlock_t bd_lock; 217 218 int 219 _init(void) 220 { 221 int rv; 222 223 rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2); 224 if (rv != DDI_SUCCESS) { 225 return (rv); 226 } 227 rw_init(&bd_lock, NULL, RW_DRIVER, NULL); 228 rv = mod_install(&modlinkage); 229 if (rv != DDI_SUCCESS) { 230 rw_destroy(&bd_lock); 231 ddi_soft_state_fini(&bd_state); 232 } 233 return (rv); 234 } 235 236 int 237 _fini(void) 238 { 239 int rv; 240 241 rv = mod_remove(&modlinkage); 242 if (rv == DDI_SUCCESS) { 243 rw_destroy(&bd_lock); 244 ddi_soft_state_fini(&bd_state); 245 } 246 return (rv); 247 } 248 249 int 250 _info(struct modinfo *modinfop) 251 { 252 return (mod_info(&modlinkage, modinfop)); 253 } 254 255 static int 256 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 257 { 258 bd_t *bd; 259 minor_t inst; 260 261 _NOTE(ARGUNUSED(dip)); 262 263 inst = BDINST((dev_t)arg); 264 265 switch (cmd) { 266 case DDI_INFO_DEVT2DEVINFO: 267 bd = ddi_get_soft_state(bd_state, inst); 268 if (bd == NULL) { 269 return (DDI_FAILURE); 270 } 271 *resultp = (void *)bd->d_dip; 272 break; 273 274 case DDI_INFO_DEVT2INSTANCE: 275 *resultp = (void *)(intptr_t)inst; 276 break; 277 278 default: 279 return (DDI_FAILURE); 280 } 281 return (DDI_SUCCESS); 282 } 283 284 static int 285 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 286 { 287 int inst; 288 bd_handle_t hdl; 289 bd_t *bd; 290 bd_drive_t drive; 291 int rv; 292 char name[16]; 293 char kcache[32]; 294 295 switch (cmd) { 296 case DDI_ATTACH: 297 break; 298 case DDI_RESUME: 299 /* We don't do anything native for suspend/resume */ 300 return (DDI_SUCCESS); 301 default: 302 return (DDI_FAILURE); 303 } 304 305 inst = ddi_get_instance(dip); 306 hdl = ddi_get_parent_data(dip); 307 308 (void) snprintf(name, sizeof (name), "%s%d", 309 ddi_driver_name(dip), ddi_get_instance(dip)); 310 (void) snprintf(kcache, sizeof (kcache), "%s_xfer", name); 311 312 if (hdl == NULL) { 313 cmn_err(CE_WARN, "%s: missing parent data!", name); 314 return (DDI_FAILURE); 315 } 316 317 if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) { 318 cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name); 319 return (DDI_FAILURE); 320 } 321 bd = ddi_get_soft_state(bd_state, inst); 322 323 if (hdl->h_dma) { 324 bd->d_dma = *(hdl->h_dma); 325 bd->d_dma.dma_attr_granular = 326 max(DEV_BSIZE, bd->d_dma.dma_attr_granular); 327 bd->d_use_dma = B_TRUE; 328 329 if (bd->d_maxxfer && 330 (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) { 331 cmn_err(CE_WARN, 332 "%s: inconsistent maximum transfer size!", 333 name); 334 /* We force it */ 335 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 336 } else { 337 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 338 } 339 } else { 340 bd->d_use_dma = B_FALSE; 341 if (bd->d_maxxfer == 0) { 342 bd->d_maxxfer = 1024 * 1024; 343 } 344 } 345 bd->d_ops = hdl->h_ops; 346 bd->d_private = hdl->h_private; 347 bd->d_blkshift = 9; /* 512 bytes, to start */ 348 349 if (bd->d_maxxfer % DEV_BSIZE) { 350 cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name); 351 bd->d_maxxfer &= ~(DEV_BSIZE - 1); 352 } 353 if (bd->d_maxxfer < DEV_BSIZE) { 354 cmn_err(CE_WARN, "%s: maximum transfer size too small!", name); 355 ddi_soft_state_free(bd_state, inst); 356 return (DDI_FAILURE); 357 } 358 359 bd->d_dip = dip; 360 bd->d_handle = hdl; 361 hdl->h_bd = bd; 362 ddi_set_driver_private(dip, bd); 363 364 mutex_init(&bd->d_iomutex, NULL, MUTEX_DRIVER, NULL); 365 mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL); 366 mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL); 367 cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL); 368 369 list_create(&bd->d_waitq, sizeof (bd_xfer_impl_t), 370 offsetof(struct bd_xfer_impl, i_linkage)); 371 list_create(&bd->d_runq, sizeof (bd_xfer_impl_t), 372 offsetof(struct bd_xfer_impl, i_linkage)); 373 374 bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8, 375 bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0); 376 377 bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk", 378 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT); 379 if (bd->d_ksp != NULL) { 380 bd->d_ksp->ks_lock = &bd->d_iomutex; 381 kstat_install(bd->d_ksp); 382 bd->d_kiop = bd->d_ksp->ks_data; 383 } else { 384 /* 385 * Even if we cannot create the kstat, we create a 386 * scratch kstat. The reason for this is to ensure 387 * that we can update the kstat all of the time, 388 * without adding an extra branch instruction. 389 */ 390 bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP); 391 } 392 393 cmlb_alloc_handle(&bd->d_cmlbh); 394 395 bd->d_state = DKIO_NONE; 396 397 bzero(&drive, sizeof (drive)); 398 bd->d_ops.o_drive_info(bd->d_private, &drive); 399 bd->d_qsize = drive.d_qsize; 400 bd->d_removable = drive.d_removable; 401 bd->d_hotpluggable = drive.d_hotpluggable; 402 403 if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer) 404 bd->d_maxxfer = drive.d_maxxfer; 405 406 407 rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT, 408 bd->d_removable, bd->d_hotpluggable, 409 drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK, 410 CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0); 411 if (rv != 0) { 412 cmlb_free_handle(&bd->d_cmlbh); 413 kmem_cache_destroy(bd->d_cache); 414 mutex_destroy(&bd->d_iomutex); 415 mutex_destroy(&bd->d_ocmutex); 416 mutex_destroy(&bd->d_statemutex); 417 cv_destroy(&bd->d_statecv); 418 list_destroy(&bd->d_waitq); 419 list_destroy(&bd->d_runq); 420 if (bd->d_ksp != NULL) { 421 kstat_delete(bd->d_ksp); 422 bd->d_ksp = NULL; 423 } else { 424 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 425 } 426 ddi_soft_state_free(bd_state, inst); 427 return (DDI_FAILURE); 428 } 429 430 if (bd->d_ops.o_devid_init != NULL) { 431 rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid); 432 if (rv == DDI_SUCCESS) { 433 if (ddi_devid_register(dip, bd->d_devid) != 434 DDI_SUCCESS) { 435 cmn_err(CE_WARN, 436 "%s: unable to register devid", name); 437 } 438 } 439 } 440 441 /* 442 * Add a zero-length attribute to tell the world we support 443 * kernel ioctls (for layered drivers). Also set up properties 444 * used by HAL to identify removable media. 445 */ 446 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 447 DDI_KERNEL_IOCTL, NULL, 0); 448 if (bd->d_removable) { 449 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 450 "removable-media", NULL, 0); 451 } 452 if (bd->d_hotpluggable) { 453 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 454 "hotpluggable", NULL, 0); 455 } 456 457 ddi_report_dev(dip); 458 459 return (DDI_SUCCESS); 460 } 461 462 static int 463 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 464 { 465 bd_t *bd; 466 467 bd = ddi_get_driver_private(dip); 468 469 switch (cmd) { 470 case DDI_DETACH: 471 break; 472 case DDI_SUSPEND: 473 /* We don't suspend, but our parent does */ 474 return (DDI_SUCCESS); 475 default: 476 return (DDI_FAILURE); 477 } 478 if (bd->d_ksp != NULL) { 479 kstat_delete(bd->d_ksp); 480 bd->d_ksp = NULL; 481 } else { 482 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 483 } 484 cmlb_detach(bd->d_cmlbh, 0); 485 cmlb_free_handle(&bd->d_cmlbh); 486 if (bd->d_devid) 487 ddi_devid_free(bd->d_devid); 488 kmem_cache_destroy(bd->d_cache); 489 mutex_destroy(&bd->d_iomutex); 490 mutex_destroy(&bd->d_ocmutex); 491 mutex_destroy(&bd->d_statemutex); 492 cv_destroy(&bd->d_statecv); 493 list_destroy(&bd->d_waitq); 494 list_destroy(&bd->d_runq); 495 ddi_soft_state_free(bd_state, ddi_get_instance(dip)); 496 return (DDI_SUCCESS); 497 } 498 499 static int 500 bd_xfer_ctor(void *buf, void *arg, int kmflag) 501 { 502 bd_xfer_impl_t *xi; 503 bd_t *bd = arg; 504 int (*dcb)(caddr_t); 505 506 if (kmflag == KM_PUSHPAGE || kmflag == KM_SLEEP) { 507 dcb = DDI_DMA_SLEEP; 508 } else { 509 dcb = DDI_DMA_DONTWAIT; 510 } 511 512 xi = buf; 513 bzero(xi, sizeof (*xi)); 514 xi->i_bd = bd; 515 516 if (bd->d_use_dma) { 517 if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL, 518 &xi->i_dmah) != DDI_SUCCESS) { 519 return (-1); 520 } 521 } 522 523 return (0); 524 } 525 526 static void 527 bd_xfer_dtor(void *buf, void *arg) 528 { 529 bd_xfer_impl_t *xi = buf; 530 531 _NOTE(ARGUNUSED(arg)); 532 533 if (xi->i_dmah) 534 ddi_dma_free_handle(&xi->i_dmah); 535 xi->i_dmah = NULL; 536 } 537 538 static bd_xfer_impl_t * 539 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *), 540 int kmflag) 541 { 542 bd_xfer_impl_t *xi; 543 int rv; 544 int status; 545 unsigned dir; 546 int (*cb)(caddr_t); 547 size_t len; 548 uint32_t shift; 549 550 if (kmflag == KM_SLEEP) { 551 cb = DDI_DMA_SLEEP; 552 } else { 553 cb = DDI_DMA_DONTWAIT; 554 } 555 556 xi = kmem_cache_alloc(bd->d_cache, kmflag); 557 if (xi == NULL) { 558 bioerror(bp, ENOMEM); 559 return (NULL); 560 } 561 562 ASSERT(bp); 563 564 xi->i_bp = bp; 565 xi->i_func = func; 566 xi->i_blkno = bp->b_lblkno; 567 568 if (bp->b_bcount == 0) { 569 xi->i_len = 0; 570 xi->i_nblks = 0; 571 xi->i_kaddr = NULL; 572 xi->i_resid = 0; 573 xi->i_num_win = 0; 574 goto done; 575 } 576 577 if (bp->b_flags & B_READ) { 578 dir = DDI_DMA_READ; 579 xi->i_func = bd->d_ops.o_read; 580 } else { 581 dir = DDI_DMA_WRITE; 582 xi->i_func = bd->d_ops.o_write; 583 } 584 585 shift = bd->d_blkshift; 586 xi->i_blkshift = shift; 587 588 if (!bd->d_use_dma) { 589 bp_mapin(bp); 590 rv = 0; 591 xi->i_offset = 0; 592 xi->i_num_win = 593 (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer; 594 xi->i_cur_win = 0; 595 xi->i_len = min(bp->b_bcount, bd->d_maxxfer); 596 xi->i_nblks = xi->i_len >> shift; 597 xi->i_kaddr = bp->b_un.b_addr; 598 xi->i_resid = bp->b_bcount; 599 } else { 600 601 /* 602 * We have to use consistent DMA if the address is misaligned. 603 */ 604 if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) && 605 ((uintptr_t)bp->b_un.b_addr & 0x7)) { 606 dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL; 607 } else { 608 dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 609 } 610 611 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb, 612 NULL, &xi->i_dmac, &xi->i_ndmac); 613 switch (status) { 614 case DDI_DMA_MAPPED: 615 xi->i_num_win = 1; 616 xi->i_cur_win = 0; 617 xi->i_offset = 0; 618 xi->i_len = bp->b_bcount; 619 xi->i_nblks = xi->i_len >> shift; 620 xi->i_resid = bp->b_bcount; 621 rv = 0; 622 break; 623 case DDI_DMA_PARTIAL_MAP: 624 xi->i_cur_win = 0; 625 626 if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) != 627 DDI_SUCCESS) || 628 (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset, 629 &len, &xi->i_dmac, &xi->i_ndmac) != 630 DDI_SUCCESS) || 631 (P2PHASE(len, shift) != 0)) { 632 (void) ddi_dma_unbind_handle(xi->i_dmah); 633 rv = EFAULT; 634 goto done; 635 } 636 xi->i_len = len; 637 xi->i_nblks = xi->i_len >> shift; 638 xi->i_resid = bp->b_bcount; 639 rv = 0; 640 break; 641 case DDI_DMA_NORESOURCES: 642 rv = EAGAIN; 643 goto done; 644 case DDI_DMA_TOOBIG: 645 rv = EINVAL; 646 goto done; 647 case DDI_DMA_NOMAPPING: 648 case DDI_DMA_INUSE: 649 default: 650 rv = EFAULT; 651 goto done; 652 } 653 } 654 655 done: 656 if (rv != 0) { 657 kmem_cache_free(bd->d_cache, xi); 658 bioerror(bp, rv); 659 return (NULL); 660 } 661 662 return (xi); 663 } 664 665 static void 666 bd_xfer_free(bd_xfer_impl_t *xi) 667 { 668 if (xi->i_dmah) { 669 (void) ddi_dma_unbind_handle(xi->i_dmah); 670 } 671 kmem_cache_free(xi->i_bd->d_cache, xi); 672 } 673 674 static int 675 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp) 676 { 677 dev_t dev = *devp; 678 bd_t *bd; 679 minor_t part; 680 minor_t inst; 681 uint64_t mask; 682 boolean_t ndelay; 683 int rv; 684 diskaddr_t nblks; 685 diskaddr_t lba; 686 687 _NOTE(ARGUNUSED(credp)); 688 689 part = BDPART(dev); 690 inst = BDINST(dev); 691 692 if (otyp >= OTYPCNT) 693 return (EINVAL); 694 695 ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE; 696 697 /* 698 * Block any DR events from changing the set of registered 699 * devices while we function. 700 */ 701 rw_enter(&bd_lock, RW_READER); 702 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 703 rw_exit(&bd_lock); 704 return (ENXIO); 705 } 706 707 mutex_enter(&bd->d_ocmutex); 708 709 ASSERT(part < 64); 710 mask = (1U << part); 711 712 bd_update_state(bd); 713 714 if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) { 715 716 /* non-blocking opens are allowed to succeed */ 717 if (!ndelay) { 718 rv = ENXIO; 719 goto done; 720 } 721 } else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba, 722 NULL, NULL, 0) == 0) { 723 724 /* 725 * We read the partinfo, verify valid ranges. If the 726 * partition is invalid, and we aren't blocking or 727 * doing a raw access, then fail. (Non-blocking and 728 * raw accesses can still succeed to allow a disk with 729 * bad partition data to opened by format and fdisk.) 730 */ 731 if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) { 732 rv = ENXIO; 733 goto done; 734 } 735 } else if (!ndelay) { 736 /* 737 * cmlb_partinfo failed -- invalid partition or no 738 * disk label. 739 */ 740 rv = ENXIO; 741 goto done; 742 } 743 744 if ((flag & FWRITE) && bd->d_rdonly) { 745 rv = EROFS; 746 goto done; 747 } 748 749 if ((bd->d_open_excl) & (mask)) { 750 rv = EBUSY; 751 goto done; 752 } 753 if (flag & FEXCL) { 754 if (bd->d_open_lyr[part]) { 755 rv = EBUSY; 756 goto done; 757 } 758 for (int i = 0; i < OTYP_LYR; i++) { 759 if (bd->d_open_reg[i] & mask) { 760 rv = EBUSY; 761 goto done; 762 } 763 } 764 } 765 766 if (otyp == OTYP_LYR) { 767 bd->d_open_lyr[part]++; 768 } else { 769 bd->d_open_reg[otyp] |= mask; 770 } 771 if (flag & FEXCL) { 772 bd->d_open_excl |= mask; 773 } 774 775 rv = 0; 776 done: 777 mutex_exit(&bd->d_ocmutex); 778 rw_exit(&bd_lock); 779 780 return (rv); 781 } 782 783 static int 784 bd_close(dev_t dev, int flag, int otyp, cred_t *credp) 785 { 786 bd_t *bd; 787 minor_t inst; 788 minor_t part; 789 uint64_t mask; 790 boolean_t last = B_TRUE; 791 792 _NOTE(ARGUNUSED(flag)); 793 _NOTE(ARGUNUSED(credp)); 794 795 part = BDPART(dev); 796 inst = BDINST(dev); 797 798 ASSERT(part < 64); 799 mask = (1U << part); 800 801 rw_enter(&bd_lock, RW_READER); 802 803 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 804 rw_exit(&bd_lock); 805 return (ENXIO); 806 } 807 808 mutex_enter(&bd->d_ocmutex); 809 if (bd->d_open_excl & mask) { 810 bd->d_open_excl &= ~mask; 811 } 812 if (otyp == OTYP_LYR) { 813 bd->d_open_lyr[part]--; 814 } else { 815 bd->d_open_reg[otyp] &= ~mask; 816 } 817 for (int i = 0; i < 64; i++) { 818 if (bd->d_open_lyr[part]) { 819 last = B_FALSE; 820 } 821 } 822 for (int i = 0; last && (i < OTYP_LYR); i++) { 823 if (bd->d_open_reg[i]) { 824 last = B_FALSE; 825 } 826 } 827 mutex_exit(&bd->d_ocmutex); 828 829 if (last) { 830 cmlb_invalidate(bd->d_cmlbh, 0); 831 } 832 rw_exit(&bd_lock); 833 834 return (0); 835 } 836 837 static int 838 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk) 839 { 840 minor_t inst; 841 minor_t part; 842 diskaddr_t pstart; 843 diskaddr_t psize; 844 bd_t *bd; 845 bd_xfer_impl_t *xi; 846 buf_t *bp; 847 int rv; 848 849 rw_enter(&bd_lock, RW_READER); 850 851 part = BDPART(dev); 852 inst = BDINST(dev); 853 854 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 855 rw_exit(&bd_lock); 856 return (ENXIO); 857 } 858 /* 859 * do cmlb, but do it synchronously unless we already have the 860 * partition (which we probably should.) 861 */ 862 if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL, 863 (void *)1)) { 864 rw_exit(&bd_lock); 865 return (ENXIO); 866 } 867 868 if ((blkno + nblk) > psize) { 869 rw_exit(&bd_lock); 870 return (EINVAL); 871 } 872 bp = getrbuf(KM_NOSLEEP); 873 if (bp == NULL) { 874 rw_exit(&bd_lock); 875 return (ENOMEM); 876 } 877 878 bp->b_bcount = nblk << bd->d_blkshift; 879 bp->b_resid = bp->b_bcount; 880 bp->b_lblkno = blkno; 881 bp->b_un.b_addr = caddr; 882 883 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_write, KM_NOSLEEP); 884 if (xi == NULL) { 885 rw_exit(&bd_lock); 886 freerbuf(bp); 887 return (ENOMEM); 888 } 889 xi->i_blkno = blkno + pstart; 890 xi->i_flags = BD_XFER_POLL; 891 bd_submit(bd, xi); 892 rw_exit(&bd_lock); 893 894 /* 895 * Generally, we should have run this entirely synchronously 896 * at this point and the biowait call should be a no-op. If 897 * it didn't happen this way, it's a bug in the underlying 898 * driver not honoring BD_XFER_POLL. 899 */ 900 (void) biowait(bp); 901 rv = geterror(bp); 902 freerbuf(bp); 903 return (rv); 904 } 905 906 void 907 bd_minphys(struct buf *bp) 908 { 909 minor_t inst; 910 bd_t *bd; 911 inst = BDINST(bp->b_edev); 912 913 bd = ddi_get_soft_state(bd_state, inst); 914 915 /* 916 * In a non-debug kernel, bd_strategy will catch !bd as 917 * well, and will fail nicely. 918 */ 919 ASSERT(bd); 920 921 if (bp->b_bcount > bd->d_maxxfer) 922 bp->b_bcount = bd->d_maxxfer; 923 } 924 925 static int 926 bd_read(dev_t dev, struct uio *uio, cred_t *credp) 927 { 928 _NOTE(ARGUNUSED(credp)); 929 return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio)); 930 } 931 932 static int 933 bd_write(dev_t dev, struct uio *uio, cred_t *credp) 934 { 935 _NOTE(ARGUNUSED(credp)); 936 return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio)); 937 } 938 939 static int 940 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp) 941 { 942 _NOTE(ARGUNUSED(credp)); 943 return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio)); 944 } 945 946 static int 947 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp) 948 { 949 _NOTE(ARGUNUSED(credp)); 950 return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio)); 951 } 952 953 static int 954 bd_strategy(struct buf *bp) 955 { 956 minor_t inst; 957 minor_t part; 958 bd_t *bd; 959 diskaddr_t p_lba; 960 diskaddr_t p_nblks; 961 diskaddr_t b_nblks; 962 bd_xfer_impl_t *xi; 963 uint32_t shift; 964 int (*func)(void *, bd_xfer_t *); 965 966 part = BDPART(bp->b_edev); 967 inst = BDINST(bp->b_edev); 968 969 ASSERT(bp); 970 971 bp->b_resid = bp->b_bcount; 972 973 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 974 bioerror(bp, ENXIO); 975 biodone(bp); 976 return (0); 977 } 978 979 if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba, 980 NULL, NULL, 0)) { 981 bioerror(bp, ENXIO); 982 biodone(bp); 983 return (0); 984 } 985 986 shift = bd->d_blkshift; 987 988 if ((P2PHASE(bp->b_bcount, (1U << shift)) != 0) || 989 (bp->b_lblkno > p_nblks)) { 990 bioerror(bp, ENXIO); 991 biodone(bp); 992 return (0); 993 } 994 b_nblks = bp->b_bcount >> shift; 995 if ((bp->b_lblkno == p_nblks) || (bp->b_bcount == 0)) { 996 biodone(bp); 997 return (0); 998 } 999 1000 if ((b_nblks + bp->b_lblkno) > p_nblks) { 1001 bp->b_resid = ((bp->b_lblkno + b_nblks - p_nblks) << shift); 1002 bp->b_bcount -= bp->b_resid; 1003 } else { 1004 bp->b_resid = 0; 1005 } 1006 func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write; 1007 1008 xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP); 1009 if (xi == NULL) { 1010 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE); 1011 } 1012 if (xi == NULL) { 1013 /* bd_request_alloc will have done bioerror */ 1014 biodone(bp); 1015 return (0); 1016 } 1017 xi->i_blkno = bp->b_lblkno + p_lba; 1018 1019 bd_submit(bd, xi); 1020 1021 return (0); 1022 } 1023 1024 static int 1025 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp) 1026 { 1027 minor_t inst; 1028 uint16_t part; 1029 bd_t *bd; 1030 void *ptr = (void *)arg; 1031 int rv; 1032 1033 part = BDPART(dev); 1034 inst = BDINST(dev); 1035 1036 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1037 return (ENXIO); 1038 } 1039 1040 rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0); 1041 if (rv != ENOTTY) 1042 return (rv); 1043 1044 switch (cmd) { 1045 case DKIOCGMEDIAINFO: { 1046 struct dk_minfo minfo; 1047 1048 /* make sure our state information is current */ 1049 bd_update_state(bd); 1050 bzero(&minfo, sizeof (minfo)); 1051 minfo.dki_media_type = DK_FIXED_DISK; 1052 minfo.dki_lbsize = (1U << bd->d_blkshift); 1053 minfo.dki_capacity = bd->d_numblks; 1054 if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) { 1055 return (EFAULT); 1056 } 1057 return (0); 1058 } 1059 case DKIOCGMEDIAINFOEXT: { 1060 struct dk_minfo_ext miext; 1061 1062 /* make sure our state information is current */ 1063 bd_update_state(bd); 1064 bzero(&miext, sizeof (miext)); 1065 miext.dki_media_type = DK_FIXED_DISK; 1066 miext.dki_lbsize = (1U << bd->d_blkshift); 1067 miext.dki_pbsize = miext.dki_lbsize; 1068 miext.dki_capacity = bd->d_numblks; 1069 if (ddi_copyout(&miext, ptr, sizeof (miext), flag)) { 1070 return (EFAULT); 1071 } 1072 return (0); 1073 } 1074 case DKIOCINFO: { 1075 struct dk_cinfo cinfo; 1076 bzero(&cinfo, sizeof (cinfo)); 1077 cinfo.dki_ctype = DKC_BLKDEV; 1078 cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip)); 1079 (void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname), 1080 "%s", ddi_driver_name(ddi_get_parent(bd->d_dip))); 1081 (void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname), 1082 "%s", ddi_driver_name(bd->d_dip)); 1083 cinfo.dki_unit = inst; 1084 cinfo.dki_flags = DKI_FMTVOL; 1085 cinfo.dki_partition = part; 1086 cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE; 1087 cinfo.dki_addr = 0; 1088 cinfo.dki_slave = 0; 1089 cinfo.dki_space = 0; 1090 cinfo.dki_prio = 0; 1091 cinfo.dki_vec = 0; 1092 if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) { 1093 return (EFAULT); 1094 } 1095 return (0); 1096 } 1097 case DKIOCREMOVABLE: { 1098 int i; 1099 i = bd->d_removable ? 1 : 0; 1100 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1101 return (EFAULT); 1102 } 1103 return (0); 1104 } 1105 case DKIOCHOTPLUGGABLE: { 1106 int i; 1107 i = bd->d_hotpluggable ? 1 : 0; 1108 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1109 return (EFAULT); 1110 } 1111 return (0); 1112 } 1113 case DKIOCREADONLY: { 1114 int i; 1115 i = bd->d_rdonly ? 1 : 0; 1116 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1117 return (EFAULT); 1118 } 1119 return (0); 1120 } 1121 case DKIOCSTATE: { 1122 enum dkio_state state; 1123 if (ddi_copyin(ptr, &state, sizeof (state), flag)) { 1124 return (EFAULT); 1125 } 1126 if ((rv = bd_check_state(bd, &state)) != 0) { 1127 return (rv); 1128 } 1129 if (ddi_copyout(&state, ptr, sizeof (state), flag)) { 1130 return (EFAULT); 1131 } 1132 return (0); 1133 } 1134 case DKIOCFLUSHWRITECACHE: { 1135 struct dk_callback *dkc = NULL; 1136 1137 if (flag & FKIOCTL) 1138 dkc = (void *)arg; 1139 1140 rv = bd_flush_write_cache(bd, dkc); 1141 return (rv); 1142 } 1143 1144 default: 1145 break; 1146 1147 } 1148 return (ENOTTY); 1149 } 1150 1151 static int 1152 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 1153 char *name, caddr_t valuep, int *lengthp) 1154 { 1155 bd_t *bd; 1156 1157 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1158 if (bd == NULL) 1159 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 1160 name, valuep, lengthp)); 1161 1162 return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name, 1163 valuep, lengthp, BDPART(dev), 0)); 1164 } 1165 1166 1167 static int 1168 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start, 1169 size_t length, void *tg_cookie) 1170 { 1171 bd_t *bd; 1172 buf_t *bp; 1173 bd_xfer_impl_t *xi; 1174 int rv; 1175 int (*func)(void *, bd_xfer_t *); 1176 int kmflag; 1177 1178 /* 1179 * If we are running in polled mode (such as during dump(9e) 1180 * execution), then we cannot sleep for kernel allocations. 1181 */ 1182 kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP; 1183 1184 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1185 1186 if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) { 1187 /* We can only transfer whole blocks at a time! */ 1188 return (EINVAL); 1189 } 1190 1191 if ((bp = getrbuf(kmflag)) == NULL) { 1192 return (ENOMEM); 1193 } 1194 1195 switch (cmd) { 1196 case TG_READ: 1197 bp->b_flags = B_READ; 1198 func = bd->d_ops.o_read; 1199 break; 1200 case TG_WRITE: 1201 bp->b_flags = B_WRITE; 1202 func = bd->d_ops.o_write; 1203 break; 1204 default: 1205 freerbuf(bp); 1206 return (EINVAL); 1207 } 1208 1209 bp->b_un.b_addr = bufaddr; 1210 bp->b_bcount = length; 1211 xi = bd_xfer_alloc(bd, bp, func, kmflag); 1212 if (xi == NULL) { 1213 rv = geterror(bp); 1214 freerbuf(bp); 1215 return (rv); 1216 } 1217 xi->i_flags = tg_cookie ? BD_XFER_POLL : 0; 1218 xi->i_blkno = start; 1219 bd_submit(bd, xi); 1220 (void) biowait(bp); 1221 rv = geterror(bp); 1222 freerbuf(bp); 1223 1224 return (rv); 1225 } 1226 1227 static int 1228 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 1229 { 1230 bd_t *bd; 1231 1232 _NOTE(ARGUNUSED(tg_cookie)); 1233 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1234 1235 switch (cmd) { 1236 case TG_GETPHYGEOM: 1237 case TG_GETVIRTGEOM: 1238 /* 1239 * We don't have any "geometry" as such, let cmlb 1240 * fabricate something. 1241 */ 1242 return (ENOTTY); 1243 1244 case TG_GETCAPACITY: 1245 bd_update_state(bd); 1246 *(diskaddr_t *)arg = bd->d_numblks; 1247 return (0); 1248 1249 case TG_GETBLOCKSIZE: 1250 *(uint32_t *)arg = (1U << bd->d_blkshift); 1251 return (0); 1252 1253 case TG_GETATTR: 1254 /* 1255 * It turns out that cmlb really doesn't do much for 1256 * non-writable media, but lets make the information 1257 * available for it in case it does more in the 1258 * future. (The value is currently used for 1259 * triggering special behavior for CD-ROMs.) 1260 */ 1261 bd_update_state(bd); 1262 ((tg_attribute_t *)arg)->media_is_writable = 1263 bd->d_rdonly ? B_FALSE : B_TRUE; 1264 return (0); 1265 1266 default: 1267 return (EINVAL); 1268 } 1269 } 1270 1271 1272 static void 1273 bd_sched(bd_t *bd) 1274 { 1275 bd_xfer_impl_t *xi; 1276 struct buf *bp; 1277 int rv; 1278 1279 mutex_enter(&bd->d_iomutex); 1280 1281 while ((bd->d_qactive < bd->d_qsize) && 1282 ((xi = list_remove_head(&bd->d_waitq)) != NULL)) { 1283 bd->d_qactive++; 1284 kstat_waitq_to_runq(bd->d_kiop); 1285 list_insert_tail(&bd->d_runq, xi); 1286 1287 /* 1288 * Submit the job to the driver. We drop the I/O mutex 1289 * so that we can deal with the case where the driver 1290 * completion routine calls back into us synchronously. 1291 */ 1292 1293 mutex_exit(&bd->d_iomutex); 1294 1295 rv = xi->i_func(bd->d_private, &xi->i_public); 1296 if (rv != 0) { 1297 bp = xi->i_bp; 1298 bd_xfer_free(xi); 1299 bioerror(bp, rv); 1300 biodone(bp); 1301 1302 mutex_enter(&bd->d_iomutex); 1303 bd->d_qactive--; 1304 kstat_runq_exit(bd->d_kiop); 1305 list_remove(&bd->d_runq, xi); 1306 } else { 1307 mutex_enter(&bd->d_iomutex); 1308 } 1309 } 1310 1311 mutex_exit(&bd->d_iomutex); 1312 } 1313 1314 static void 1315 bd_submit(bd_t *bd, bd_xfer_impl_t *xi) 1316 { 1317 mutex_enter(&bd->d_iomutex); 1318 list_insert_tail(&bd->d_waitq, xi); 1319 kstat_waitq_enter(bd->d_kiop); 1320 mutex_exit(&bd->d_iomutex); 1321 1322 bd_sched(bd); 1323 } 1324 1325 static void 1326 bd_runq_exit(bd_xfer_impl_t *xi, int err) 1327 { 1328 bd_t *bd = xi->i_bd; 1329 buf_t *bp = xi->i_bp; 1330 1331 mutex_enter(&bd->d_iomutex); 1332 bd->d_qactive--; 1333 kstat_runq_exit(bd->d_kiop); 1334 list_remove(&bd->d_runq, xi); 1335 mutex_exit(&bd->d_iomutex); 1336 1337 if (err == 0) { 1338 if (bp->b_flags & B_READ) { 1339 bd->d_kiop->reads++; 1340 bd->d_kiop->nread += (bp->b_bcount - xi->i_resid); 1341 } else { 1342 bd->d_kiop->writes++; 1343 bd->d_kiop->nwritten += (bp->b_bcount - xi->i_resid); 1344 } 1345 } 1346 bd_sched(bd); 1347 } 1348 1349 static void 1350 bd_update_state(bd_t *bd) 1351 { 1352 enum dkio_state state; 1353 bd_media_t media; 1354 boolean_t docmlb = B_FALSE; 1355 1356 bzero(&media, sizeof (media)); 1357 1358 mutex_enter(&bd->d_statemutex); 1359 if (bd->d_ops.o_media_info(bd->d_private, &media) == 0) { 1360 if ((1U << bd->d_blkshift) != media.m_blksize) { 1361 if ((media.m_blksize < 512) || 1362 (!ISP2(media.m_blksize)) || 1363 (P2PHASE(bd->d_maxxfer, media.m_blksize))) { 1364 cmn_err(CE_WARN, 1365 "%s%d: Invalid media block size (%d)", 1366 ddi_driver_name(bd->d_dip), 1367 ddi_get_instance(bd->d_dip), 1368 media.m_blksize); 1369 /* 1370 * We can't use the media, treat it as 1371 * not present. 1372 */ 1373 state = DKIO_EJECTED; 1374 bd->d_numblks = 0; 1375 } else { 1376 bd->d_blkshift = ddi_ffs(media.m_blksize) - 1; 1377 bd->d_numblks = media.m_nblks; 1378 bd->d_rdonly = media.m_readonly; 1379 state = DKIO_INSERTED; 1380 } 1381 1382 /* Device size changed */ 1383 docmlb = B_TRUE; 1384 1385 } else { 1386 if (bd->d_numblks != media.m_nblks) { 1387 /* Device size changed */ 1388 docmlb = B_TRUE; 1389 } 1390 bd->d_numblks = media.m_nblks; 1391 bd->d_rdonly = media.m_readonly; 1392 state = DKIO_INSERTED; 1393 } 1394 1395 } else { 1396 bd->d_numblks = 0; 1397 state = DKIO_EJECTED; 1398 } 1399 if (state != bd->d_state) { 1400 bd->d_state = state; 1401 cv_broadcast(&bd->d_statecv); 1402 docmlb = B_TRUE; 1403 } 1404 mutex_exit(&bd->d_statemutex); 1405 1406 if (docmlb) { 1407 if (state == DKIO_INSERTED) { 1408 (void) cmlb_validate(bd->d_cmlbh, 0, 0); 1409 } else { 1410 cmlb_invalidate(bd->d_cmlbh, 0); 1411 } 1412 } 1413 } 1414 1415 static int 1416 bd_check_state(bd_t *bd, enum dkio_state *state) 1417 { 1418 clock_t when; 1419 1420 for (;;) { 1421 1422 bd_update_state(bd); 1423 1424 mutex_enter(&bd->d_statemutex); 1425 1426 if (bd->d_state != *state) { 1427 *state = bd->d_state; 1428 mutex_exit(&bd->d_statemutex); 1429 break; 1430 } 1431 1432 when = drv_usectohz(1000000); 1433 if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex, 1434 when, TR_CLOCK_TICK) == 0) { 1435 mutex_exit(&bd->d_statemutex); 1436 return (EINTR); 1437 } 1438 1439 mutex_exit(&bd->d_statemutex); 1440 } 1441 1442 return (0); 1443 } 1444 1445 static int 1446 bd_flush_write_cache_done(struct buf *bp) 1447 { 1448 struct dk_callback *dc = (void *)bp->b_private; 1449 1450 (*dc->dkc_callback)(dc->dkc_cookie, geterror(bp)); 1451 kmem_free(dc, sizeof (*dc)); 1452 freerbuf(bp); 1453 return (0); 1454 } 1455 1456 static int 1457 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc) 1458 { 1459 buf_t *bp; 1460 struct dk_callback *dc; 1461 bd_xfer_impl_t *xi; 1462 int rv; 1463 1464 if (bd->d_ops.o_sync_cache == NULL) { 1465 return (ENOTSUP); 1466 } 1467 if ((bp = getrbuf(KM_SLEEP)) == NULL) { 1468 return (ENOMEM); 1469 } 1470 bp->b_resid = 0; 1471 bp->b_bcount = 0; 1472 1473 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP); 1474 if (xi == NULL) { 1475 rv = geterror(bp); 1476 freerbuf(bp); 1477 return (rv); 1478 } 1479 1480 /* Make an asynchronous flush, but only if there is a callback */ 1481 if (dkc != NULL && dkc->dkc_callback != NULL) { 1482 /* Make a private copy of the callback structure */ 1483 dc = kmem_alloc(sizeof (*dc), KM_SLEEP); 1484 *dc = *dkc; 1485 bp->b_private = dc; 1486 bp->b_iodone = bd_flush_write_cache_done; 1487 1488 bd_submit(bd, xi); 1489 return (0); 1490 } 1491 1492 /* In case there is no callback, perform a synchronous flush */ 1493 bd_submit(bd, xi); 1494 (void) biowait(bp); 1495 rv = geterror(bp); 1496 freerbuf(bp); 1497 1498 return (rv); 1499 } 1500 1501 /* 1502 * Nexus support. 1503 */ 1504 int 1505 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop, 1506 void *arg, void *result) 1507 { 1508 bd_handle_t hdl; 1509 1510 switch (ctlop) { 1511 case DDI_CTLOPS_REPORTDEV: 1512 cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n", 1513 ddi_node_name(rdip), ddi_get_name_addr(rdip), 1514 ddi_driver_name(rdip), ddi_get_instance(rdip)); 1515 return (DDI_SUCCESS); 1516 1517 case DDI_CTLOPS_INITCHILD: 1518 hdl = ddi_get_parent_data((dev_info_t *)arg); 1519 if (hdl == NULL) { 1520 return (DDI_NOT_WELL_FORMED); 1521 } 1522 ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr); 1523 return (DDI_SUCCESS); 1524 1525 case DDI_CTLOPS_UNINITCHILD: 1526 ddi_set_name_addr((dev_info_t *)arg, NULL); 1527 ndi_prop_remove_all((dev_info_t *)arg); 1528 return (DDI_SUCCESS); 1529 1530 default: 1531 return (ddi_ctlops(dip, rdip, ctlop, arg, result)); 1532 } 1533 } 1534 1535 /* 1536 * Functions for device drivers. 1537 */ 1538 bd_handle_t 1539 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag) 1540 { 1541 bd_handle_t hdl; 1542 1543 hdl = kmem_zalloc(sizeof (*hdl), kmflag); 1544 if (hdl != NULL) { 1545 hdl->h_ops = *ops; 1546 hdl->h_dma = dma; 1547 hdl->h_private = private; 1548 } 1549 1550 return (hdl); 1551 } 1552 1553 void 1554 bd_free_handle(bd_handle_t hdl) 1555 { 1556 kmem_free(hdl, sizeof (*hdl)); 1557 } 1558 1559 int 1560 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl) 1561 { 1562 dev_info_t *child; 1563 bd_drive_t drive; 1564 1565 /* if drivers don't override this, make it assume none */ 1566 drive.d_lun = -1; 1567 hdl->h_ops.o_drive_info(hdl->h_private, &drive); 1568 1569 hdl->h_parent = dip; 1570 hdl->h_name = "blkdev"; 1571 1572 if (drive.d_lun >= 0) { 1573 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), "%X,%X", 1574 drive.d_target, drive.d_lun); 1575 } else { 1576 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), "%X", 1577 drive.d_target); 1578 } 1579 if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID, 1580 &child) != NDI_SUCCESS) { 1581 cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s", 1582 ddi_driver_name(dip), ddi_get_instance(dip), 1583 "blkdev", hdl->h_addr); 1584 return (DDI_FAILURE); 1585 } 1586 1587 ddi_set_parent_data(child, hdl); 1588 hdl->h_child = child; 1589 1590 if (ndi_devi_online(child, 0) == NDI_FAILURE) { 1591 cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online", 1592 ddi_driver_name(dip), ddi_get_instance(dip), 1593 hdl->h_name, hdl->h_addr); 1594 (void) ndi_devi_free(child); 1595 return (DDI_FAILURE); 1596 } 1597 1598 return (DDI_SUCCESS); 1599 } 1600 1601 int 1602 bd_detach_handle(bd_handle_t hdl) 1603 { 1604 int circ; 1605 int rv; 1606 char *devnm; 1607 1608 if (hdl->h_child == NULL) { 1609 return (DDI_SUCCESS); 1610 } 1611 ndi_devi_enter(hdl->h_parent, &circ); 1612 if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) { 1613 rv = ddi_remove_child(hdl->h_child, 0); 1614 } else { 1615 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); 1616 (void) ddi_deviname(hdl->h_child, devnm); 1617 (void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE); 1618 rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL, 1619 NDI_DEVI_REMOVE | NDI_UNCONFIG); 1620 kmem_free(devnm, MAXNAMELEN + 1); 1621 } 1622 if (rv == 0) { 1623 hdl->h_child = NULL; 1624 } 1625 1626 ndi_devi_exit(hdl->h_parent, circ); 1627 return (rv = NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE); 1628 } 1629 1630 void 1631 bd_xfer_done(bd_xfer_t *xfer, int err) 1632 { 1633 bd_xfer_impl_t *xi = (void *)xfer; 1634 buf_t *bp = xi->i_bp; 1635 int rv = DDI_SUCCESS; 1636 bd_t *bd = xi->i_bd; 1637 size_t len; 1638 1639 if (err != 0) { 1640 bd_runq_exit(xi, err); 1641 1642 bp->b_resid += xi->i_resid; 1643 bd_xfer_free(xi); 1644 bioerror(bp, err); 1645 biodone(bp); 1646 return; 1647 } 1648 1649 xi->i_cur_win++; 1650 xi->i_resid -= xi->i_len; 1651 1652 if (xi->i_resid == 0) { 1653 /* Job completed succcessfully! */ 1654 bd_runq_exit(xi, 0); 1655 1656 bd_xfer_free(xi); 1657 biodone(bp); 1658 return; 1659 } 1660 1661 xi->i_blkno += xi->i_nblks; 1662 1663 if (bd->d_use_dma) { 1664 /* More transfer still pending... advance to next DMA window. */ 1665 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win, 1666 &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac); 1667 } else { 1668 /* Advance memory window. */ 1669 xi->i_kaddr += xi->i_len; 1670 xi->i_offset += xi->i_len; 1671 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer); 1672 } 1673 1674 1675 if ((rv != DDI_SUCCESS) || 1676 (P2PHASE(len, (1U << xi->i_blkshift) != 0))) { 1677 bd_runq_exit(xi, EFAULT); 1678 1679 bp->b_resid += xi->i_resid; 1680 bd_xfer_free(xi); 1681 bioerror(bp, EFAULT); 1682 biodone(bp); 1683 return; 1684 } 1685 xi->i_len = len; 1686 xi->i_nblks = len >> xi->i_blkshift; 1687 1688 /* Submit next window to hardware. */ 1689 rv = xi->i_func(bd->d_private, &xi->i_public); 1690 if (rv != 0) { 1691 bd_runq_exit(xi, rv); 1692 1693 bp->b_resid += xi->i_resid; 1694 bd_xfer_free(xi); 1695 bioerror(bp, rv); 1696 biodone(bp); 1697 } 1698 } 1699 1700 void 1701 bd_state_change(bd_handle_t hdl) 1702 { 1703 bd_t *bd; 1704 1705 if ((bd = hdl->h_bd) != NULL) { 1706 bd_update_state(bd); 1707 } 1708 } 1709 1710 void 1711 bd_mod_init(struct dev_ops *devops) 1712 { 1713 static struct bus_ops bd_bus_ops = { 1714 BUSO_REV, /* busops_rev */ 1715 nullbusmap, /* bus_map */ 1716 NULL, /* bus_get_intrspec (OBSOLETE) */ 1717 NULL, /* bus_add_intrspec (OBSOLETE) */ 1718 NULL, /* bus_remove_intrspec (OBSOLETE) */ 1719 i_ddi_map_fault, /* bus_map_fault */ 1720 NULL, /* bus_dma_map (OBSOLETE) */ 1721 ddi_dma_allochdl, /* bus_dma_allochdl */ 1722 ddi_dma_freehdl, /* bus_dma_freehdl */ 1723 ddi_dma_bindhdl, /* bus_dma_bindhdl */ 1724 ddi_dma_unbindhdl, /* bus_dma_unbindhdl */ 1725 ddi_dma_flush, /* bus_dma_flush */ 1726 ddi_dma_win, /* bus_dma_win */ 1727 ddi_dma_mctl, /* bus_dma_ctl */ 1728 bd_bus_ctl, /* bus_ctl */ 1729 ddi_bus_prop_op, /* bus_prop_op */ 1730 NULL, /* bus_get_eventcookie */ 1731 NULL, /* bus_add_eventcall */ 1732 NULL, /* bus_remove_eventcall */ 1733 NULL, /* bus_post_event */ 1734 NULL, /* bus_intr_ctl (OBSOLETE) */ 1735 NULL, /* bus_config */ 1736 NULL, /* bus_unconfig */ 1737 NULL, /* bus_fm_init */ 1738 NULL, /* bus_fm_fini */ 1739 NULL, /* bus_fm_access_enter */ 1740 NULL, /* bus_fm_access_exit */ 1741 NULL, /* bus_power */ 1742 NULL, /* bus_intr_op */ 1743 }; 1744 1745 devops->devo_bus_ops = &bd_bus_ops; 1746 1747 /* 1748 * NB: The device driver is free to supply its own 1749 * character entry device support. 1750 */ 1751 } 1752 1753 void 1754 bd_mod_fini(struct dev_ops *devops) 1755 { 1756 devops->devo_bus_ops = NULL; 1757 } 1758