1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved. 24 * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved. 25 * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 26 */ 27 28 #include <sys/types.h> 29 #include <sys/ksynch.h> 30 #include <sys/kmem.h> 31 #include <sys/file.h> 32 #include <sys/errno.h> 33 #include <sys/open.h> 34 #include <sys/buf.h> 35 #include <sys/uio.h> 36 #include <sys/aio_req.h> 37 #include <sys/cred.h> 38 #include <sys/modctl.h> 39 #include <sys/cmlb.h> 40 #include <sys/conf.h> 41 #include <sys/devops.h> 42 #include <sys/list.h> 43 #include <sys/sysmacros.h> 44 #include <sys/dkio.h> 45 #include <sys/vtoc.h> 46 #include <sys/scsi/scsi.h> /* for DTYPE_DIRECT */ 47 #include <sys/kstat.h> 48 #include <sys/fs/dv_node.h> 49 #include <sys/ddi.h> 50 #include <sys/sunddi.h> 51 #include <sys/note.h> 52 #include <sys/blkdev.h> 53 54 #define BD_MAXPART 64 55 #define BDINST(dev) (getminor(dev) / BD_MAXPART) 56 #define BDPART(dev) (getminor(dev) % BD_MAXPART) 57 58 typedef struct bd bd_t; 59 typedef struct bd_xfer_impl bd_xfer_impl_t; 60 61 struct bd { 62 void *d_private; 63 dev_info_t *d_dip; 64 kmutex_t d_ocmutex; 65 kmutex_t d_iomutex; 66 kmutex_t d_statemutex; 67 kcondvar_t d_statecv; 68 enum dkio_state d_state; 69 cmlb_handle_t d_cmlbh; 70 unsigned d_open_lyr[BD_MAXPART]; /* open count */ 71 uint64_t d_open_excl; /* bit mask indexed by partition */ 72 uint64_t d_open_reg[OTYPCNT]; /* bit mask */ 73 74 uint32_t d_qsize; 75 uint32_t d_qactive; 76 uint32_t d_maxxfer; 77 uint32_t d_blkshift; 78 uint64_t d_numblks; 79 ddi_devid_t d_devid; 80 81 kmem_cache_t *d_cache; 82 list_t d_runq; 83 list_t d_waitq; 84 kstat_t *d_ksp; 85 kstat_io_t *d_kiop; 86 87 boolean_t d_rdonly; 88 boolean_t d_ssd; 89 boolean_t d_removable; 90 boolean_t d_hotpluggable; 91 boolean_t d_use_dma; 92 93 ddi_dma_attr_t d_dma; 94 bd_ops_t d_ops; 95 bd_handle_t d_handle; 96 }; 97 98 struct bd_handle { 99 bd_ops_t h_ops; 100 ddi_dma_attr_t *h_dma; 101 dev_info_t *h_parent; 102 dev_info_t *h_child; 103 void *h_private; 104 bd_t *h_bd; 105 char *h_name; 106 char h_addr[20]; /* enough for %X,%X */ 107 }; 108 109 struct bd_xfer_impl { 110 bd_xfer_t i_public; 111 list_node_t i_linkage; 112 bd_t *i_bd; 113 buf_t *i_bp; 114 uint_t i_num_win; 115 uint_t i_cur_win; 116 off_t i_offset; 117 int (*i_func)(void *, bd_xfer_t *); 118 uint32_t i_blkshift; 119 size_t i_len; 120 size_t i_resid; 121 }; 122 123 #define i_dmah i_public.x_dmah 124 #define i_dmac i_public.x_dmac 125 #define i_ndmac i_public.x_ndmac 126 #define i_kaddr i_public.x_kaddr 127 #define i_nblks i_public.x_nblks 128 #define i_blkno i_public.x_blkno 129 #define i_flags i_public.x_flags 130 131 132 /* 133 * Private prototypes. 134 */ 135 136 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 137 static int bd_attach(dev_info_t *, ddi_attach_cmd_t); 138 static int bd_detach(dev_info_t *, ddi_detach_cmd_t); 139 140 static int bd_open(dev_t *, int, int, cred_t *); 141 static int bd_close(dev_t, int, int, cred_t *); 142 static int bd_strategy(struct buf *); 143 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 144 static int bd_dump(dev_t, caddr_t, daddr_t, int); 145 static int bd_read(dev_t, struct uio *, cred_t *); 146 static int bd_write(dev_t, struct uio *, cred_t *); 147 static int bd_aread(dev_t, struct aio_req *, cred_t *); 148 static int bd_awrite(dev_t, struct aio_req *, cred_t *); 149 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, 150 caddr_t, int *); 151 152 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, 153 void *); 154 static int bd_tg_getinfo(dev_info_t *, int, void *, void *); 155 static int bd_xfer_ctor(void *, void *, int); 156 static void bd_xfer_dtor(void *, void *); 157 static void bd_sched(bd_t *); 158 static void bd_submit(bd_t *, bd_xfer_impl_t *); 159 static void bd_runq_exit(bd_xfer_impl_t *, int); 160 static void bd_update_state(bd_t *); 161 static int bd_check_state(bd_t *, enum dkio_state *); 162 static int bd_flush_write_cache(bd_t *, struct dk_callback *); 163 164 struct cmlb_tg_ops bd_tg_ops = { 165 TG_DK_OPS_VERSION_1, 166 bd_tg_rdwr, 167 bd_tg_getinfo, 168 }; 169 170 static struct cb_ops bd_cb_ops = { 171 bd_open, /* open */ 172 bd_close, /* close */ 173 bd_strategy, /* strategy */ 174 nodev, /* print */ 175 bd_dump, /* dump */ 176 bd_read, /* read */ 177 bd_write, /* write */ 178 bd_ioctl, /* ioctl */ 179 nodev, /* devmap */ 180 nodev, /* mmap */ 181 nodev, /* segmap */ 182 nochpoll, /* poll */ 183 bd_prop_op, /* cb_prop_op */ 184 0, /* streamtab */ 185 D_64BIT | D_MP, /* Driver comaptibility flag */ 186 CB_REV, /* cb_rev */ 187 bd_aread, /* async read */ 188 bd_awrite /* async write */ 189 }; 190 191 struct dev_ops bd_dev_ops = { 192 DEVO_REV, /* devo_rev, */ 193 0, /* refcnt */ 194 bd_getinfo, /* getinfo */ 195 nulldev, /* identify */ 196 nulldev, /* probe */ 197 bd_attach, /* attach */ 198 bd_detach, /* detach */ 199 nodev, /* reset */ 200 &bd_cb_ops, /* driver operations */ 201 NULL, /* bus operations */ 202 NULL, /* power */ 203 ddi_quiesce_not_needed, /* quiesce */ 204 }; 205 206 static struct modldrv modldrv = { 207 &mod_driverops, 208 "Generic Block Device", 209 &bd_dev_ops, 210 }; 211 212 static struct modlinkage modlinkage = { 213 MODREV_1, { &modldrv, NULL } 214 }; 215 216 static void *bd_state; 217 static krwlock_t bd_lock; 218 219 int 220 _init(void) 221 { 222 int rv; 223 224 rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2); 225 if (rv != DDI_SUCCESS) { 226 return (rv); 227 } 228 rw_init(&bd_lock, NULL, RW_DRIVER, NULL); 229 rv = mod_install(&modlinkage); 230 if (rv != DDI_SUCCESS) { 231 rw_destroy(&bd_lock); 232 ddi_soft_state_fini(&bd_state); 233 } 234 return (rv); 235 } 236 237 int 238 _fini(void) 239 { 240 int rv; 241 242 rv = mod_remove(&modlinkage); 243 if (rv == DDI_SUCCESS) { 244 rw_destroy(&bd_lock); 245 ddi_soft_state_fini(&bd_state); 246 } 247 return (rv); 248 } 249 250 int 251 _info(struct modinfo *modinfop) 252 { 253 return (mod_info(&modlinkage, modinfop)); 254 } 255 256 static int 257 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 258 { 259 bd_t *bd; 260 minor_t inst; 261 262 _NOTE(ARGUNUSED(dip)); 263 264 inst = BDINST((dev_t)arg); 265 266 switch (cmd) { 267 case DDI_INFO_DEVT2DEVINFO: 268 bd = ddi_get_soft_state(bd_state, inst); 269 if (bd == NULL) { 270 return (DDI_FAILURE); 271 } 272 *resultp = (void *)bd->d_dip; 273 break; 274 275 case DDI_INFO_DEVT2INSTANCE: 276 *resultp = (void *)(intptr_t)inst; 277 break; 278 279 default: 280 return (DDI_FAILURE); 281 } 282 return (DDI_SUCCESS); 283 } 284 285 static int 286 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 287 { 288 int inst; 289 bd_handle_t hdl; 290 bd_t *bd; 291 bd_drive_t drive; 292 int rv; 293 char name[16]; 294 char kcache[32]; 295 296 switch (cmd) { 297 case DDI_ATTACH: 298 break; 299 case DDI_RESUME: 300 /* We don't do anything native for suspend/resume */ 301 return (DDI_SUCCESS); 302 default: 303 return (DDI_FAILURE); 304 } 305 306 inst = ddi_get_instance(dip); 307 hdl = ddi_get_parent_data(dip); 308 309 (void) snprintf(name, sizeof (name), "%s%d", 310 ddi_driver_name(dip), ddi_get_instance(dip)); 311 (void) snprintf(kcache, sizeof (kcache), "%s_xfer", name); 312 313 if (hdl == NULL) { 314 cmn_err(CE_WARN, "%s: missing parent data!", name); 315 return (DDI_FAILURE); 316 } 317 318 if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) { 319 cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name); 320 return (DDI_FAILURE); 321 } 322 bd = ddi_get_soft_state(bd_state, inst); 323 324 if (hdl->h_dma) { 325 bd->d_dma = *(hdl->h_dma); 326 bd->d_dma.dma_attr_granular = 327 max(DEV_BSIZE, bd->d_dma.dma_attr_granular); 328 bd->d_use_dma = B_TRUE; 329 330 if (bd->d_maxxfer && 331 (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) { 332 cmn_err(CE_WARN, 333 "%s: inconsistent maximum transfer size!", 334 name); 335 /* We force it */ 336 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 337 } else { 338 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 339 } 340 } else { 341 bd->d_use_dma = B_FALSE; 342 if (bd->d_maxxfer == 0) { 343 bd->d_maxxfer = 1024 * 1024; 344 } 345 } 346 bd->d_ops = hdl->h_ops; 347 bd->d_private = hdl->h_private; 348 bd->d_blkshift = 9; /* 512 bytes, to start */ 349 350 if (bd->d_maxxfer % DEV_BSIZE) { 351 cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name); 352 bd->d_maxxfer &= ~(DEV_BSIZE - 1); 353 } 354 if (bd->d_maxxfer < DEV_BSIZE) { 355 cmn_err(CE_WARN, "%s: maximum transfer size too small!", name); 356 ddi_soft_state_free(bd_state, inst); 357 return (DDI_FAILURE); 358 } 359 360 bd->d_dip = dip; 361 bd->d_handle = hdl; 362 hdl->h_bd = bd; 363 ddi_set_driver_private(dip, bd); 364 365 mutex_init(&bd->d_iomutex, NULL, MUTEX_DRIVER, NULL); 366 mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL); 367 mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL); 368 cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL); 369 370 list_create(&bd->d_waitq, sizeof (bd_xfer_impl_t), 371 offsetof(struct bd_xfer_impl, i_linkage)); 372 list_create(&bd->d_runq, sizeof (bd_xfer_impl_t), 373 offsetof(struct bd_xfer_impl, i_linkage)); 374 375 bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8, 376 bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0); 377 378 bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk", 379 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT); 380 if (bd->d_ksp != NULL) { 381 bd->d_ksp->ks_lock = &bd->d_iomutex; 382 kstat_install(bd->d_ksp); 383 bd->d_kiop = bd->d_ksp->ks_data; 384 } else { 385 /* 386 * Even if we cannot create the kstat, we create a 387 * scratch kstat. The reason for this is to ensure 388 * that we can update the kstat all of the time, 389 * without adding an extra branch instruction. 390 */ 391 bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP); 392 } 393 394 cmlb_alloc_handle(&bd->d_cmlbh); 395 396 bd->d_state = DKIO_NONE; 397 398 bzero(&drive, sizeof (drive)); 399 bd->d_ops.o_drive_info(bd->d_private, &drive); 400 bd->d_qsize = drive.d_qsize; 401 bd->d_removable = drive.d_removable; 402 bd->d_hotpluggable = drive.d_hotpluggable; 403 404 if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer) 405 bd->d_maxxfer = drive.d_maxxfer; 406 407 408 rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT, 409 bd->d_removable, bd->d_hotpluggable, 410 drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK, 411 CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0); 412 if (rv != 0) { 413 cmlb_free_handle(&bd->d_cmlbh); 414 kmem_cache_destroy(bd->d_cache); 415 mutex_destroy(&bd->d_iomutex); 416 mutex_destroy(&bd->d_ocmutex); 417 mutex_destroy(&bd->d_statemutex); 418 cv_destroy(&bd->d_statecv); 419 list_destroy(&bd->d_waitq); 420 list_destroy(&bd->d_runq); 421 if (bd->d_ksp != NULL) { 422 kstat_delete(bd->d_ksp); 423 bd->d_ksp = NULL; 424 } else { 425 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 426 } 427 ddi_soft_state_free(bd_state, inst); 428 return (DDI_FAILURE); 429 } 430 431 if (bd->d_ops.o_devid_init != NULL) { 432 rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid); 433 if (rv == DDI_SUCCESS) { 434 if (ddi_devid_register(dip, bd->d_devid) != 435 DDI_SUCCESS) { 436 cmn_err(CE_WARN, 437 "%s: unable to register devid", name); 438 } 439 } 440 } 441 442 /* 443 * Add a zero-length attribute to tell the world we support 444 * kernel ioctls (for layered drivers). Also set up properties 445 * used by HAL to identify removable media. 446 */ 447 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 448 DDI_KERNEL_IOCTL, NULL, 0); 449 if (bd->d_removable) { 450 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 451 "removable-media", NULL, 0); 452 } 453 if (bd->d_hotpluggable) { 454 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 455 "hotpluggable", NULL, 0); 456 } 457 458 ddi_report_dev(dip); 459 460 return (DDI_SUCCESS); 461 } 462 463 static int 464 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 465 { 466 bd_t *bd; 467 468 bd = ddi_get_driver_private(dip); 469 470 switch (cmd) { 471 case DDI_DETACH: 472 break; 473 case DDI_SUSPEND: 474 /* We don't suspend, but our parent does */ 475 return (DDI_SUCCESS); 476 default: 477 return (DDI_FAILURE); 478 } 479 if (bd->d_ksp != NULL) { 480 kstat_delete(bd->d_ksp); 481 bd->d_ksp = NULL; 482 } else { 483 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 484 } 485 cmlb_detach(bd->d_cmlbh, 0); 486 cmlb_free_handle(&bd->d_cmlbh); 487 if (bd->d_devid) 488 ddi_devid_free(bd->d_devid); 489 kmem_cache_destroy(bd->d_cache); 490 mutex_destroy(&bd->d_iomutex); 491 mutex_destroy(&bd->d_ocmutex); 492 mutex_destroy(&bd->d_statemutex); 493 cv_destroy(&bd->d_statecv); 494 list_destroy(&bd->d_waitq); 495 list_destroy(&bd->d_runq); 496 ddi_soft_state_free(bd_state, ddi_get_instance(dip)); 497 return (DDI_SUCCESS); 498 } 499 500 static int 501 bd_xfer_ctor(void *buf, void *arg, int kmflag) 502 { 503 bd_xfer_impl_t *xi; 504 bd_t *bd = arg; 505 int (*dcb)(caddr_t); 506 507 if (kmflag == KM_PUSHPAGE || kmflag == KM_SLEEP) { 508 dcb = DDI_DMA_SLEEP; 509 } else { 510 dcb = DDI_DMA_DONTWAIT; 511 } 512 513 xi = buf; 514 bzero(xi, sizeof (*xi)); 515 xi->i_bd = bd; 516 517 if (bd->d_use_dma) { 518 if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL, 519 &xi->i_dmah) != DDI_SUCCESS) { 520 return (-1); 521 } 522 } 523 524 return (0); 525 } 526 527 static void 528 bd_xfer_dtor(void *buf, void *arg) 529 { 530 bd_xfer_impl_t *xi = buf; 531 532 _NOTE(ARGUNUSED(arg)); 533 534 if (xi->i_dmah) 535 ddi_dma_free_handle(&xi->i_dmah); 536 xi->i_dmah = NULL; 537 } 538 539 static bd_xfer_impl_t * 540 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *), 541 int kmflag) 542 { 543 bd_xfer_impl_t *xi; 544 int rv = 0; 545 int status; 546 unsigned dir; 547 int (*cb)(caddr_t); 548 size_t len; 549 uint32_t shift; 550 551 if (kmflag == KM_SLEEP) { 552 cb = DDI_DMA_SLEEP; 553 } else { 554 cb = DDI_DMA_DONTWAIT; 555 } 556 557 xi = kmem_cache_alloc(bd->d_cache, kmflag); 558 if (xi == NULL) { 559 bioerror(bp, ENOMEM); 560 return (NULL); 561 } 562 563 ASSERT(bp); 564 565 xi->i_bp = bp; 566 xi->i_func = func; 567 xi->i_blkno = bp->b_lblkno; 568 569 if (bp->b_bcount == 0) { 570 xi->i_len = 0; 571 xi->i_nblks = 0; 572 xi->i_kaddr = NULL; 573 xi->i_resid = 0; 574 xi->i_num_win = 0; 575 goto done; 576 } 577 578 if (bp->b_flags & B_READ) { 579 dir = DDI_DMA_READ; 580 xi->i_func = bd->d_ops.o_read; 581 } else { 582 dir = DDI_DMA_WRITE; 583 xi->i_func = bd->d_ops.o_write; 584 } 585 586 shift = bd->d_blkshift; 587 xi->i_blkshift = shift; 588 589 if (!bd->d_use_dma) { 590 bp_mapin(bp); 591 rv = 0; 592 xi->i_offset = 0; 593 xi->i_num_win = 594 (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer; 595 xi->i_cur_win = 0; 596 xi->i_len = min(bp->b_bcount, bd->d_maxxfer); 597 xi->i_nblks = xi->i_len >> shift; 598 xi->i_kaddr = bp->b_un.b_addr; 599 xi->i_resid = bp->b_bcount; 600 } else { 601 602 /* 603 * We have to use consistent DMA if the address is misaligned. 604 */ 605 if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) && 606 ((uintptr_t)bp->b_un.b_addr & 0x7)) { 607 dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL; 608 } else { 609 dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 610 } 611 612 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb, 613 NULL, &xi->i_dmac, &xi->i_ndmac); 614 switch (status) { 615 case DDI_DMA_MAPPED: 616 xi->i_num_win = 1; 617 xi->i_cur_win = 0; 618 xi->i_offset = 0; 619 xi->i_len = bp->b_bcount; 620 xi->i_nblks = xi->i_len >> shift; 621 xi->i_resid = bp->b_bcount; 622 rv = 0; 623 break; 624 case DDI_DMA_PARTIAL_MAP: 625 xi->i_cur_win = 0; 626 627 if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) != 628 DDI_SUCCESS) || 629 (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset, 630 &len, &xi->i_dmac, &xi->i_ndmac) != 631 DDI_SUCCESS) || 632 (P2PHASE(len, shift) != 0)) { 633 (void) ddi_dma_unbind_handle(xi->i_dmah); 634 rv = EFAULT; 635 goto done; 636 } 637 xi->i_len = len; 638 xi->i_nblks = xi->i_len >> shift; 639 xi->i_resid = bp->b_bcount; 640 rv = 0; 641 break; 642 case DDI_DMA_NORESOURCES: 643 rv = EAGAIN; 644 goto done; 645 case DDI_DMA_TOOBIG: 646 rv = EINVAL; 647 goto done; 648 case DDI_DMA_NOMAPPING: 649 case DDI_DMA_INUSE: 650 default: 651 rv = EFAULT; 652 goto done; 653 } 654 } 655 656 done: 657 if (rv != 0) { 658 kmem_cache_free(bd->d_cache, xi); 659 bioerror(bp, rv); 660 return (NULL); 661 } 662 663 return (xi); 664 } 665 666 static void 667 bd_xfer_free(bd_xfer_impl_t *xi) 668 { 669 if (xi->i_dmah) { 670 (void) ddi_dma_unbind_handle(xi->i_dmah); 671 } 672 kmem_cache_free(xi->i_bd->d_cache, xi); 673 } 674 675 static int 676 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp) 677 { 678 dev_t dev = *devp; 679 bd_t *bd; 680 minor_t part; 681 minor_t inst; 682 uint64_t mask; 683 boolean_t ndelay; 684 int rv; 685 diskaddr_t nblks; 686 diskaddr_t lba; 687 688 _NOTE(ARGUNUSED(credp)); 689 690 part = BDPART(dev); 691 inst = BDINST(dev); 692 693 if (otyp >= OTYPCNT) 694 return (EINVAL); 695 696 ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE; 697 698 /* 699 * Block any DR events from changing the set of registered 700 * devices while we function. 701 */ 702 rw_enter(&bd_lock, RW_READER); 703 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 704 rw_exit(&bd_lock); 705 return (ENXIO); 706 } 707 708 mutex_enter(&bd->d_ocmutex); 709 710 ASSERT(part < 64); 711 mask = (1U << part); 712 713 bd_update_state(bd); 714 715 if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) { 716 717 /* non-blocking opens are allowed to succeed */ 718 if (!ndelay) { 719 rv = ENXIO; 720 goto done; 721 } 722 } else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba, 723 NULL, NULL, 0) == 0) { 724 725 /* 726 * We read the partinfo, verify valid ranges. If the 727 * partition is invalid, and we aren't blocking or 728 * doing a raw access, then fail. (Non-blocking and 729 * raw accesses can still succeed to allow a disk with 730 * bad partition data to opened by format and fdisk.) 731 */ 732 if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) { 733 rv = ENXIO; 734 goto done; 735 } 736 } else if (!ndelay) { 737 /* 738 * cmlb_partinfo failed -- invalid partition or no 739 * disk label. 740 */ 741 rv = ENXIO; 742 goto done; 743 } 744 745 if ((flag & FWRITE) && bd->d_rdonly) { 746 rv = EROFS; 747 goto done; 748 } 749 750 if ((bd->d_open_excl) & (mask)) { 751 rv = EBUSY; 752 goto done; 753 } 754 if (flag & FEXCL) { 755 if (bd->d_open_lyr[part]) { 756 rv = EBUSY; 757 goto done; 758 } 759 for (int i = 0; i < OTYP_LYR; i++) { 760 if (bd->d_open_reg[i] & mask) { 761 rv = EBUSY; 762 goto done; 763 } 764 } 765 } 766 767 if (otyp == OTYP_LYR) { 768 bd->d_open_lyr[part]++; 769 } else { 770 bd->d_open_reg[otyp] |= mask; 771 } 772 if (flag & FEXCL) { 773 bd->d_open_excl |= mask; 774 } 775 776 rv = 0; 777 done: 778 mutex_exit(&bd->d_ocmutex); 779 rw_exit(&bd_lock); 780 781 return (rv); 782 } 783 784 static int 785 bd_close(dev_t dev, int flag, int otyp, cred_t *credp) 786 { 787 bd_t *bd; 788 minor_t inst; 789 minor_t part; 790 uint64_t mask; 791 boolean_t last = B_TRUE; 792 793 _NOTE(ARGUNUSED(flag)); 794 _NOTE(ARGUNUSED(credp)); 795 796 part = BDPART(dev); 797 inst = BDINST(dev); 798 799 ASSERT(part < 64); 800 mask = (1U << part); 801 802 rw_enter(&bd_lock, RW_READER); 803 804 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 805 rw_exit(&bd_lock); 806 return (ENXIO); 807 } 808 809 mutex_enter(&bd->d_ocmutex); 810 if (bd->d_open_excl & mask) { 811 bd->d_open_excl &= ~mask; 812 } 813 if (otyp == OTYP_LYR) { 814 bd->d_open_lyr[part]--; 815 } else { 816 bd->d_open_reg[otyp] &= ~mask; 817 } 818 for (int i = 0; i < 64; i++) { 819 if (bd->d_open_lyr[part]) { 820 last = B_FALSE; 821 } 822 } 823 for (int i = 0; last && (i < OTYP_LYR); i++) { 824 if (bd->d_open_reg[i]) { 825 last = B_FALSE; 826 } 827 } 828 mutex_exit(&bd->d_ocmutex); 829 830 if (last) { 831 cmlb_invalidate(bd->d_cmlbh, 0); 832 } 833 rw_exit(&bd_lock); 834 835 return (0); 836 } 837 838 static int 839 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk) 840 { 841 minor_t inst; 842 minor_t part; 843 diskaddr_t pstart; 844 diskaddr_t psize; 845 bd_t *bd; 846 bd_xfer_impl_t *xi; 847 buf_t *bp; 848 int rv; 849 850 rw_enter(&bd_lock, RW_READER); 851 852 part = BDPART(dev); 853 inst = BDINST(dev); 854 855 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 856 rw_exit(&bd_lock); 857 return (ENXIO); 858 } 859 /* 860 * do cmlb, but do it synchronously unless we already have the 861 * partition (which we probably should.) 862 */ 863 if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL, 864 (void *)1)) { 865 rw_exit(&bd_lock); 866 return (ENXIO); 867 } 868 869 if ((blkno + nblk) > psize) { 870 rw_exit(&bd_lock); 871 return (EINVAL); 872 } 873 bp = getrbuf(KM_NOSLEEP); 874 if (bp == NULL) { 875 rw_exit(&bd_lock); 876 return (ENOMEM); 877 } 878 879 bp->b_bcount = nblk << bd->d_blkshift; 880 bp->b_resid = bp->b_bcount; 881 bp->b_lblkno = blkno; 882 bp->b_un.b_addr = caddr; 883 884 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_write, KM_NOSLEEP); 885 if (xi == NULL) { 886 rw_exit(&bd_lock); 887 freerbuf(bp); 888 return (ENOMEM); 889 } 890 xi->i_blkno = blkno + pstart; 891 xi->i_flags = BD_XFER_POLL; 892 bd_submit(bd, xi); 893 rw_exit(&bd_lock); 894 895 /* 896 * Generally, we should have run this entirely synchronously 897 * at this point and the biowait call should be a no-op. If 898 * it didn't happen this way, it's a bug in the underlying 899 * driver not honoring BD_XFER_POLL. 900 */ 901 (void) biowait(bp); 902 rv = geterror(bp); 903 freerbuf(bp); 904 return (rv); 905 } 906 907 void 908 bd_minphys(struct buf *bp) 909 { 910 minor_t inst; 911 bd_t *bd; 912 inst = BDINST(bp->b_edev); 913 914 bd = ddi_get_soft_state(bd_state, inst); 915 916 /* 917 * In a non-debug kernel, bd_strategy will catch !bd as 918 * well, and will fail nicely. 919 */ 920 ASSERT(bd); 921 922 if (bp->b_bcount > bd->d_maxxfer) 923 bp->b_bcount = bd->d_maxxfer; 924 } 925 926 static int 927 bd_read(dev_t dev, struct uio *uio, cred_t *credp) 928 { 929 _NOTE(ARGUNUSED(credp)); 930 return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio)); 931 } 932 933 static int 934 bd_write(dev_t dev, struct uio *uio, cred_t *credp) 935 { 936 _NOTE(ARGUNUSED(credp)); 937 return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio)); 938 } 939 940 static int 941 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp) 942 { 943 _NOTE(ARGUNUSED(credp)); 944 return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio)); 945 } 946 947 static int 948 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp) 949 { 950 _NOTE(ARGUNUSED(credp)); 951 return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio)); 952 } 953 954 static int 955 bd_strategy(struct buf *bp) 956 { 957 minor_t inst; 958 minor_t part; 959 bd_t *bd; 960 diskaddr_t p_lba; 961 diskaddr_t p_nblks; 962 diskaddr_t b_nblks; 963 bd_xfer_impl_t *xi; 964 uint32_t shift; 965 int (*func)(void *, bd_xfer_t *); 966 967 part = BDPART(bp->b_edev); 968 inst = BDINST(bp->b_edev); 969 970 ASSERT(bp); 971 972 bp->b_resid = bp->b_bcount; 973 974 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 975 bioerror(bp, ENXIO); 976 biodone(bp); 977 return (0); 978 } 979 980 if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba, 981 NULL, NULL, 0)) { 982 bioerror(bp, ENXIO); 983 biodone(bp); 984 return (0); 985 } 986 987 shift = bd->d_blkshift; 988 989 if ((P2PHASE(bp->b_bcount, (1U << shift)) != 0) || 990 (bp->b_lblkno > p_nblks)) { 991 bioerror(bp, ENXIO); 992 biodone(bp); 993 return (0); 994 } 995 b_nblks = bp->b_bcount >> shift; 996 if ((bp->b_lblkno == p_nblks) || (bp->b_bcount == 0)) { 997 biodone(bp); 998 return (0); 999 } 1000 1001 if ((b_nblks + bp->b_lblkno) > p_nblks) { 1002 bp->b_resid = ((bp->b_lblkno + b_nblks - p_nblks) << shift); 1003 bp->b_bcount -= bp->b_resid; 1004 } else { 1005 bp->b_resid = 0; 1006 } 1007 func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write; 1008 1009 xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP); 1010 if (xi == NULL) { 1011 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE); 1012 } 1013 if (xi == NULL) { 1014 /* bd_request_alloc will have done bioerror */ 1015 biodone(bp); 1016 return (0); 1017 } 1018 xi->i_blkno = bp->b_lblkno + p_lba; 1019 1020 bd_submit(bd, xi); 1021 1022 return (0); 1023 } 1024 1025 static int 1026 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp) 1027 { 1028 minor_t inst; 1029 uint16_t part; 1030 bd_t *bd; 1031 void *ptr = (void *)arg; 1032 int rv; 1033 1034 part = BDPART(dev); 1035 inst = BDINST(dev); 1036 1037 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1038 return (ENXIO); 1039 } 1040 1041 rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0); 1042 if (rv != ENOTTY) 1043 return (rv); 1044 1045 if (rvalp != NULL) { 1046 /* the return value of the ioctl is 0 by default */ 1047 *rvalp = 0; 1048 } 1049 1050 switch (cmd) { 1051 case DKIOCGMEDIAINFO: { 1052 struct dk_minfo minfo; 1053 1054 /* make sure our state information is current */ 1055 bd_update_state(bd); 1056 bzero(&minfo, sizeof (minfo)); 1057 minfo.dki_media_type = DK_FIXED_DISK; 1058 minfo.dki_lbsize = (1U << bd->d_blkshift); 1059 minfo.dki_capacity = bd->d_numblks; 1060 if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) { 1061 return (EFAULT); 1062 } 1063 return (0); 1064 } 1065 case DKIOCGMEDIAINFOEXT: { 1066 struct dk_minfo_ext miext; 1067 1068 /* make sure our state information is current */ 1069 bd_update_state(bd); 1070 bzero(&miext, sizeof (miext)); 1071 miext.dki_media_type = DK_FIXED_DISK; 1072 miext.dki_lbsize = (1U << bd->d_blkshift); 1073 miext.dki_pbsize = miext.dki_lbsize; 1074 miext.dki_capacity = bd->d_numblks; 1075 if (ddi_copyout(&miext, ptr, sizeof (miext), flag)) { 1076 return (EFAULT); 1077 } 1078 return (0); 1079 } 1080 case DKIOCINFO: { 1081 struct dk_cinfo cinfo; 1082 bzero(&cinfo, sizeof (cinfo)); 1083 cinfo.dki_ctype = DKC_BLKDEV; 1084 cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip)); 1085 (void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname), 1086 "%s", ddi_driver_name(ddi_get_parent(bd->d_dip))); 1087 (void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname), 1088 "%s", ddi_driver_name(bd->d_dip)); 1089 cinfo.dki_unit = inst; 1090 cinfo.dki_flags = DKI_FMTVOL; 1091 cinfo.dki_partition = part; 1092 cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE; 1093 cinfo.dki_addr = 0; 1094 cinfo.dki_slave = 0; 1095 cinfo.dki_space = 0; 1096 cinfo.dki_prio = 0; 1097 cinfo.dki_vec = 0; 1098 if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) { 1099 return (EFAULT); 1100 } 1101 return (0); 1102 } 1103 case DKIOCREMOVABLE: { 1104 int i; 1105 i = bd->d_removable ? 1 : 0; 1106 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1107 return (EFAULT); 1108 } 1109 return (0); 1110 } 1111 case DKIOCHOTPLUGGABLE: { 1112 int i; 1113 i = bd->d_hotpluggable ? 1 : 0; 1114 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1115 return (EFAULT); 1116 } 1117 return (0); 1118 } 1119 case DKIOCREADONLY: { 1120 int i; 1121 i = bd->d_rdonly ? 1 : 0; 1122 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1123 return (EFAULT); 1124 } 1125 return (0); 1126 } 1127 case DKIOCSOLIDSTATE: { 1128 int i; 1129 i = bd->d_ssd ? 1 : 0; 1130 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1131 return (EFAULT); 1132 } 1133 return (0); 1134 } 1135 case DKIOCSTATE: { 1136 enum dkio_state state; 1137 if (ddi_copyin(ptr, &state, sizeof (state), flag)) { 1138 return (EFAULT); 1139 } 1140 if ((rv = bd_check_state(bd, &state)) != 0) { 1141 return (rv); 1142 } 1143 if (ddi_copyout(&state, ptr, sizeof (state), flag)) { 1144 return (EFAULT); 1145 } 1146 return (0); 1147 } 1148 case DKIOCFLUSHWRITECACHE: { 1149 struct dk_callback *dkc = NULL; 1150 1151 if (flag & FKIOCTL) 1152 dkc = (void *)arg; 1153 1154 rv = bd_flush_write_cache(bd, dkc); 1155 return (rv); 1156 } 1157 1158 default: 1159 break; 1160 1161 } 1162 return (ENOTTY); 1163 } 1164 1165 static int 1166 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 1167 char *name, caddr_t valuep, int *lengthp) 1168 { 1169 bd_t *bd; 1170 1171 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1172 if (bd == NULL) 1173 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 1174 name, valuep, lengthp)); 1175 1176 return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name, 1177 valuep, lengthp, BDPART(dev), 0)); 1178 } 1179 1180 1181 static int 1182 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start, 1183 size_t length, void *tg_cookie) 1184 { 1185 bd_t *bd; 1186 buf_t *bp; 1187 bd_xfer_impl_t *xi; 1188 int rv; 1189 int (*func)(void *, bd_xfer_t *); 1190 int kmflag; 1191 1192 /* 1193 * If we are running in polled mode (such as during dump(9e) 1194 * execution), then we cannot sleep for kernel allocations. 1195 */ 1196 kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP; 1197 1198 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1199 1200 if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) { 1201 /* We can only transfer whole blocks at a time! */ 1202 return (EINVAL); 1203 } 1204 1205 if ((bp = getrbuf(kmflag)) == NULL) { 1206 return (ENOMEM); 1207 } 1208 1209 switch (cmd) { 1210 case TG_READ: 1211 bp->b_flags = B_READ; 1212 func = bd->d_ops.o_read; 1213 break; 1214 case TG_WRITE: 1215 bp->b_flags = B_WRITE; 1216 func = bd->d_ops.o_write; 1217 break; 1218 default: 1219 freerbuf(bp); 1220 return (EINVAL); 1221 } 1222 1223 bp->b_un.b_addr = bufaddr; 1224 bp->b_bcount = length; 1225 xi = bd_xfer_alloc(bd, bp, func, kmflag); 1226 if (xi == NULL) { 1227 rv = geterror(bp); 1228 freerbuf(bp); 1229 return (rv); 1230 } 1231 xi->i_flags = tg_cookie ? BD_XFER_POLL : 0; 1232 xi->i_blkno = start; 1233 bd_submit(bd, xi); 1234 (void) biowait(bp); 1235 rv = geterror(bp); 1236 freerbuf(bp); 1237 1238 return (rv); 1239 } 1240 1241 static int 1242 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 1243 { 1244 bd_t *bd; 1245 1246 _NOTE(ARGUNUSED(tg_cookie)); 1247 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1248 1249 switch (cmd) { 1250 case TG_GETPHYGEOM: 1251 case TG_GETVIRTGEOM: 1252 /* 1253 * We don't have any "geometry" as such, let cmlb 1254 * fabricate something. 1255 */ 1256 return (ENOTTY); 1257 1258 case TG_GETCAPACITY: 1259 bd_update_state(bd); 1260 *(diskaddr_t *)arg = bd->d_numblks; 1261 return (0); 1262 1263 case TG_GETBLOCKSIZE: 1264 *(uint32_t *)arg = (1U << bd->d_blkshift); 1265 return (0); 1266 1267 case TG_GETATTR: 1268 /* 1269 * It turns out that cmlb really doesn't do much for 1270 * non-writable media, but lets make the information 1271 * available for it in case it does more in the 1272 * future. (The value is currently used for 1273 * triggering special behavior for CD-ROMs.) 1274 */ 1275 bd_update_state(bd); 1276 ((tg_attribute_t *)arg)->media_is_writable = 1277 bd->d_rdonly ? B_FALSE : B_TRUE; 1278 ((tg_attribute_t *)arg)->media_is_solid_state = bd->d_ssd; 1279 return (0); 1280 1281 default: 1282 return (EINVAL); 1283 } 1284 } 1285 1286 1287 static void 1288 bd_sched(bd_t *bd) 1289 { 1290 bd_xfer_impl_t *xi; 1291 struct buf *bp; 1292 int rv; 1293 1294 mutex_enter(&bd->d_iomutex); 1295 1296 while ((bd->d_qactive < bd->d_qsize) && 1297 ((xi = list_remove_head(&bd->d_waitq)) != NULL)) { 1298 bd->d_qactive++; 1299 kstat_waitq_to_runq(bd->d_kiop); 1300 list_insert_tail(&bd->d_runq, xi); 1301 1302 /* 1303 * Submit the job to the driver. We drop the I/O mutex 1304 * so that we can deal with the case where the driver 1305 * completion routine calls back into us synchronously. 1306 */ 1307 1308 mutex_exit(&bd->d_iomutex); 1309 1310 rv = xi->i_func(bd->d_private, &xi->i_public); 1311 if (rv != 0) { 1312 bp = xi->i_bp; 1313 bd_xfer_free(xi); 1314 bioerror(bp, rv); 1315 biodone(bp); 1316 1317 mutex_enter(&bd->d_iomutex); 1318 bd->d_qactive--; 1319 kstat_runq_exit(bd->d_kiop); 1320 list_remove(&bd->d_runq, xi); 1321 } else { 1322 mutex_enter(&bd->d_iomutex); 1323 } 1324 } 1325 1326 mutex_exit(&bd->d_iomutex); 1327 } 1328 1329 static void 1330 bd_submit(bd_t *bd, bd_xfer_impl_t *xi) 1331 { 1332 mutex_enter(&bd->d_iomutex); 1333 list_insert_tail(&bd->d_waitq, xi); 1334 kstat_waitq_enter(bd->d_kiop); 1335 mutex_exit(&bd->d_iomutex); 1336 1337 bd_sched(bd); 1338 } 1339 1340 static void 1341 bd_runq_exit(bd_xfer_impl_t *xi, int err) 1342 { 1343 bd_t *bd = xi->i_bd; 1344 buf_t *bp = xi->i_bp; 1345 1346 mutex_enter(&bd->d_iomutex); 1347 bd->d_qactive--; 1348 kstat_runq_exit(bd->d_kiop); 1349 list_remove(&bd->d_runq, xi); 1350 mutex_exit(&bd->d_iomutex); 1351 1352 if (err == 0) { 1353 if (bp->b_flags & B_READ) { 1354 bd->d_kiop->reads++; 1355 bd->d_kiop->nread += (bp->b_bcount - xi->i_resid); 1356 } else { 1357 bd->d_kiop->writes++; 1358 bd->d_kiop->nwritten += (bp->b_bcount - xi->i_resid); 1359 } 1360 } 1361 bd_sched(bd); 1362 } 1363 1364 static void 1365 bd_update_state(bd_t *bd) 1366 { 1367 enum dkio_state state; 1368 bd_media_t media; 1369 boolean_t docmlb = B_FALSE; 1370 1371 bzero(&media, sizeof (media)); 1372 1373 mutex_enter(&bd->d_statemutex); 1374 if (bd->d_ops.o_media_info(bd->d_private, &media) == 0) { 1375 if ((1U << bd->d_blkshift) != media.m_blksize) { 1376 if ((media.m_blksize < 512) || 1377 (!ISP2(media.m_blksize)) || 1378 (P2PHASE(bd->d_maxxfer, media.m_blksize))) { 1379 cmn_err(CE_WARN, 1380 "%s%d: Invalid media block size (%d)", 1381 ddi_driver_name(bd->d_dip), 1382 ddi_get_instance(bd->d_dip), 1383 media.m_blksize); 1384 /* 1385 * We can't use the media, treat it as 1386 * not present. 1387 */ 1388 state = DKIO_EJECTED; 1389 bd->d_numblks = 0; 1390 } else { 1391 bd->d_blkshift = ddi_ffs(media.m_blksize) - 1; 1392 bd->d_numblks = media.m_nblks; 1393 bd->d_rdonly = media.m_readonly; 1394 bd->d_ssd = media.m_solidstate; 1395 state = DKIO_INSERTED; 1396 } 1397 1398 /* Device size changed */ 1399 docmlb = B_TRUE; 1400 1401 } else { 1402 if (bd->d_numblks != media.m_nblks) { 1403 /* Device size changed */ 1404 docmlb = B_TRUE; 1405 } 1406 bd->d_numblks = media.m_nblks; 1407 bd->d_rdonly = media.m_readonly; 1408 state = DKIO_INSERTED; 1409 } 1410 1411 } else { 1412 bd->d_numblks = 0; 1413 state = DKIO_EJECTED; 1414 } 1415 if (state != bd->d_state) { 1416 bd->d_state = state; 1417 cv_broadcast(&bd->d_statecv); 1418 docmlb = B_TRUE; 1419 } 1420 mutex_exit(&bd->d_statemutex); 1421 1422 if (docmlb) { 1423 if (state == DKIO_INSERTED) { 1424 (void) cmlb_validate(bd->d_cmlbh, 0, 0); 1425 } else { 1426 cmlb_invalidate(bd->d_cmlbh, 0); 1427 } 1428 } 1429 } 1430 1431 static int 1432 bd_check_state(bd_t *bd, enum dkio_state *state) 1433 { 1434 clock_t when; 1435 1436 for (;;) { 1437 1438 bd_update_state(bd); 1439 1440 mutex_enter(&bd->d_statemutex); 1441 1442 if (bd->d_state != *state) { 1443 *state = bd->d_state; 1444 mutex_exit(&bd->d_statemutex); 1445 break; 1446 } 1447 1448 when = drv_usectohz(1000000); 1449 if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex, 1450 when, TR_CLOCK_TICK) == 0) { 1451 mutex_exit(&bd->d_statemutex); 1452 return (EINTR); 1453 } 1454 1455 mutex_exit(&bd->d_statemutex); 1456 } 1457 1458 return (0); 1459 } 1460 1461 static int 1462 bd_flush_write_cache_done(struct buf *bp) 1463 { 1464 struct dk_callback *dc = (void *)bp->b_private; 1465 1466 (*dc->dkc_callback)(dc->dkc_cookie, geterror(bp)); 1467 kmem_free(dc, sizeof (*dc)); 1468 freerbuf(bp); 1469 return (0); 1470 } 1471 1472 static int 1473 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc) 1474 { 1475 buf_t *bp; 1476 struct dk_callback *dc; 1477 bd_xfer_impl_t *xi; 1478 int rv; 1479 1480 if (bd->d_ops.o_sync_cache == NULL) { 1481 return (ENOTSUP); 1482 } 1483 if ((bp = getrbuf(KM_SLEEP)) == NULL) { 1484 return (ENOMEM); 1485 } 1486 bp->b_resid = 0; 1487 bp->b_bcount = 0; 1488 1489 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP); 1490 if (xi == NULL) { 1491 rv = geterror(bp); 1492 freerbuf(bp); 1493 return (rv); 1494 } 1495 1496 /* Make an asynchronous flush, but only if there is a callback */ 1497 if (dkc != NULL && dkc->dkc_callback != NULL) { 1498 /* Make a private copy of the callback structure */ 1499 dc = kmem_alloc(sizeof (*dc), KM_SLEEP); 1500 *dc = *dkc; 1501 bp->b_private = dc; 1502 bp->b_iodone = bd_flush_write_cache_done; 1503 1504 bd_submit(bd, xi); 1505 return (0); 1506 } 1507 1508 /* In case there is no callback, perform a synchronous flush */ 1509 bd_submit(bd, xi); 1510 (void) biowait(bp); 1511 rv = geterror(bp); 1512 freerbuf(bp); 1513 1514 return (rv); 1515 } 1516 1517 /* 1518 * Nexus support. 1519 */ 1520 int 1521 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop, 1522 void *arg, void *result) 1523 { 1524 bd_handle_t hdl; 1525 1526 switch (ctlop) { 1527 case DDI_CTLOPS_REPORTDEV: 1528 cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n", 1529 ddi_node_name(rdip), ddi_get_name_addr(rdip), 1530 ddi_driver_name(rdip), ddi_get_instance(rdip)); 1531 return (DDI_SUCCESS); 1532 1533 case DDI_CTLOPS_INITCHILD: 1534 hdl = ddi_get_parent_data((dev_info_t *)arg); 1535 if (hdl == NULL) { 1536 return (DDI_NOT_WELL_FORMED); 1537 } 1538 ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr); 1539 return (DDI_SUCCESS); 1540 1541 case DDI_CTLOPS_UNINITCHILD: 1542 ddi_set_name_addr((dev_info_t *)arg, NULL); 1543 ndi_prop_remove_all((dev_info_t *)arg); 1544 return (DDI_SUCCESS); 1545 1546 default: 1547 return (ddi_ctlops(dip, rdip, ctlop, arg, result)); 1548 } 1549 } 1550 1551 /* 1552 * Functions for device drivers. 1553 */ 1554 bd_handle_t 1555 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag) 1556 { 1557 bd_handle_t hdl; 1558 1559 hdl = kmem_zalloc(sizeof (*hdl), kmflag); 1560 if (hdl != NULL) { 1561 hdl->h_ops = *ops; 1562 hdl->h_dma = dma; 1563 hdl->h_private = private; 1564 } 1565 1566 return (hdl); 1567 } 1568 1569 void 1570 bd_free_handle(bd_handle_t hdl) 1571 { 1572 kmem_free(hdl, sizeof (*hdl)); 1573 } 1574 1575 int 1576 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl) 1577 { 1578 dev_info_t *child; 1579 bd_drive_t drive; 1580 1581 /* if drivers don't override this, make it assume none */ 1582 drive.d_lun = -1; 1583 hdl->h_ops.o_drive_info(hdl->h_private, &drive); 1584 1585 hdl->h_parent = dip; 1586 hdl->h_name = "blkdev"; 1587 1588 if (drive.d_lun >= 0) { 1589 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), "%X,%X", 1590 drive.d_target, drive.d_lun); 1591 } else { 1592 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), "%X", 1593 drive.d_target); 1594 } 1595 if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID, 1596 &child) != NDI_SUCCESS) { 1597 cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s", 1598 ddi_driver_name(dip), ddi_get_instance(dip), 1599 "blkdev", hdl->h_addr); 1600 return (DDI_FAILURE); 1601 } 1602 1603 ddi_set_parent_data(child, hdl); 1604 hdl->h_child = child; 1605 1606 if (ndi_devi_online(child, 0) == NDI_FAILURE) { 1607 cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online", 1608 ddi_driver_name(dip), ddi_get_instance(dip), 1609 hdl->h_name, hdl->h_addr); 1610 (void) ndi_devi_free(child); 1611 return (DDI_FAILURE); 1612 } 1613 1614 return (DDI_SUCCESS); 1615 } 1616 1617 int 1618 bd_detach_handle(bd_handle_t hdl) 1619 { 1620 int circ; 1621 int rv; 1622 char *devnm; 1623 1624 if (hdl->h_child == NULL) { 1625 return (DDI_SUCCESS); 1626 } 1627 ndi_devi_enter(hdl->h_parent, &circ); 1628 if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) { 1629 rv = ddi_remove_child(hdl->h_child, 0); 1630 } else { 1631 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); 1632 (void) ddi_deviname(hdl->h_child, devnm); 1633 (void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE); 1634 rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL, 1635 NDI_DEVI_REMOVE | NDI_UNCONFIG); 1636 kmem_free(devnm, MAXNAMELEN + 1); 1637 } 1638 if (rv == 0) { 1639 hdl->h_child = NULL; 1640 } 1641 1642 ndi_devi_exit(hdl->h_parent, circ); 1643 return (rv = NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE); 1644 } 1645 1646 void 1647 bd_xfer_done(bd_xfer_t *xfer, int err) 1648 { 1649 bd_xfer_impl_t *xi = (void *)xfer; 1650 buf_t *bp = xi->i_bp; 1651 int rv = DDI_SUCCESS; 1652 bd_t *bd = xi->i_bd; 1653 size_t len; 1654 1655 if (err != 0) { 1656 bd_runq_exit(xi, err); 1657 1658 bp->b_resid += xi->i_resid; 1659 bd_xfer_free(xi); 1660 bioerror(bp, err); 1661 biodone(bp); 1662 return; 1663 } 1664 1665 xi->i_cur_win++; 1666 xi->i_resid -= xi->i_len; 1667 1668 if (xi->i_resid == 0) { 1669 /* Job completed succcessfully! */ 1670 bd_runq_exit(xi, 0); 1671 1672 bd_xfer_free(xi); 1673 biodone(bp); 1674 return; 1675 } 1676 1677 xi->i_blkno += xi->i_nblks; 1678 1679 if (bd->d_use_dma) { 1680 /* More transfer still pending... advance to next DMA window. */ 1681 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win, 1682 &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac); 1683 } else { 1684 /* Advance memory window. */ 1685 xi->i_kaddr += xi->i_len; 1686 xi->i_offset += xi->i_len; 1687 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer); 1688 } 1689 1690 1691 if ((rv != DDI_SUCCESS) || 1692 (P2PHASE(len, (1U << xi->i_blkshift) != 0))) { 1693 bd_runq_exit(xi, EFAULT); 1694 1695 bp->b_resid += xi->i_resid; 1696 bd_xfer_free(xi); 1697 bioerror(bp, EFAULT); 1698 biodone(bp); 1699 return; 1700 } 1701 xi->i_len = len; 1702 xi->i_nblks = len >> xi->i_blkshift; 1703 1704 /* Submit next window to hardware. */ 1705 rv = xi->i_func(bd->d_private, &xi->i_public); 1706 if (rv != 0) { 1707 bd_runq_exit(xi, rv); 1708 1709 bp->b_resid += xi->i_resid; 1710 bd_xfer_free(xi); 1711 bioerror(bp, rv); 1712 biodone(bp); 1713 } 1714 } 1715 1716 void 1717 bd_state_change(bd_handle_t hdl) 1718 { 1719 bd_t *bd; 1720 1721 if ((bd = hdl->h_bd) != NULL) { 1722 bd_update_state(bd); 1723 } 1724 } 1725 1726 void 1727 bd_mod_init(struct dev_ops *devops) 1728 { 1729 static struct bus_ops bd_bus_ops = { 1730 BUSO_REV, /* busops_rev */ 1731 nullbusmap, /* bus_map */ 1732 NULL, /* bus_get_intrspec (OBSOLETE) */ 1733 NULL, /* bus_add_intrspec (OBSOLETE) */ 1734 NULL, /* bus_remove_intrspec (OBSOLETE) */ 1735 i_ddi_map_fault, /* bus_map_fault */ 1736 NULL, /* bus_dma_map (OBSOLETE) */ 1737 ddi_dma_allochdl, /* bus_dma_allochdl */ 1738 ddi_dma_freehdl, /* bus_dma_freehdl */ 1739 ddi_dma_bindhdl, /* bus_dma_bindhdl */ 1740 ddi_dma_unbindhdl, /* bus_dma_unbindhdl */ 1741 ddi_dma_flush, /* bus_dma_flush */ 1742 ddi_dma_win, /* bus_dma_win */ 1743 ddi_dma_mctl, /* bus_dma_ctl */ 1744 bd_bus_ctl, /* bus_ctl */ 1745 ddi_bus_prop_op, /* bus_prop_op */ 1746 NULL, /* bus_get_eventcookie */ 1747 NULL, /* bus_add_eventcall */ 1748 NULL, /* bus_remove_eventcall */ 1749 NULL, /* bus_post_event */ 1750 NULL, /* bus_intr_ctl (OBSOLETE) */ 1751 NULL, /* bus_config */ 1752 NULL, /* bus_unconfig */ 1753 NULL, /* bus_fm_init */ 1754 NULL, /* bus_fm_fini */ 1755 NULL, /* bus_fm_access_enter */ 1756 NULL, /* bus_fm_access_exit */ 1757 NULL, /* bus_power */ 1758 NULL, /* bus_intr_op */ 1759 }; 1760 1761 devops->devo_bus_ops = &bd_bus_ops; 1762 1763 /* 1764 * NB: The device driver is free to supply its own 1765 * character entry device support. 1766 */ 1767 } 1768 1769 void 1770 bd_mod_fini(struct dev_ops *devops) 1771 { 1772 devops->devo_bus_ops = NULL; 1773 } 1774