1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved. 24 * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved. 25 * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 26 */ 27 28 #include <sys/types.h> 29 #include <sys/ksynch.h> 30 #include <sys/kmem.h> 31 #include <sys/file.h> 32 #include <sys/errno.h> 33 #include <sys/open.h> 34 #include <sys/buf.h> 35 #include <sys/uio.h> 36 #include <sys/aio_req.h> 37 #include <sys/cred.h> 38 #include <sys/modctl.h> 39 #include <sys/cmlb.h> 40 #include <sys/conf.h> 41 #include <sys/devops.h> 42 #include <sys/list.h> 43 #include <sys/sysmacros.h> 44 #include <sys/dkio.h> 45 #include <sys/vtoc.h> 46 #include <sys/scsi/scsi.h> /* for DTYPE_DIRECT */ 47 #include <sys/kstat.h> 48 #include <sys/fs/dv_node.h> 49 #include <sys/ddi.h> 50 #include <sys/sunddi.h> 51 #include <sys/note.h> 52 #include <sys/blkdev.h> 53 54 #define BD_MAXPART 64 55 #define BDINST(dev) (getminor(dev) / BD_MAXPART) 56 #define BDPART(dev) (getminor(dev) % BD_MAXPART) 57 58 typedef struct bd bd_t; 59 typedef struct bd_xfer_impl bd_xfer_impl_t; 60 61 struct bd { 62 void *d_private; 63 dev_info_t *d_dip; 64 kmutex_t d_ocmutex; 65 kmutex_t d_iomutex; 66 kmutex_t d_statemutex; 67 kcondvar_t d_statecv; 68 enum dkio_state d_state; 69 cmlb_handle_t d_cmlbh; 70 unsigned d_open_lyr[BD_MAXPART]; /* open count */ 71 uint64_t d_open_excl; /* bit mask indexed by partition */ 72 uint64_t d_open_reg[OTYPCNT]; /* bit mask */ 73 74 uint32_t d_qsize; 75 uint32_t d_qactive; 76 uint32_t d_maxxfer; 77 uint32_t d_blkshift; 78 uint32_t d_pblkshift; 79 uint64_t d_numblks; 80 ddi_devid_t d_devid; 81 82 kmem_cache_t *d_cache; 83 list_t d_runq; 84 list_t d_waitq; 85 kstat_t *d_ksp; 86 kstat_io_t *d_kiop; 87 88 boolean_t d_rdonly; 89 boolean_t d_ssd; 90 boolean_t d_removable; 91 boolean_t d_hotpluggable; 92 boolean_t d_use_dma; 93 94 ddi_dma_attr_t d_dma; 95 bd_ops_t d_ops; 96 bd_handle_t d_handle; 97 }; 98 99 struct bd_handle { 100 bd_ops_t h_ops; 101 ddi_dma_attr_t *h_dma; 102 dev_info_t *h_parent; 103 dev_info_t *h_child; 104 void *h_private; 105 bd_t *h_bd; 106 char *h_name; 107 char h_addr[20]; /* enough for %X,%X */ 108 }; 109 110 struct bd_xfer_impl { 111 bd_xfer_t i_public; 112 list_node_t i_linkage; 113 bd_t *i_bd; 114 buf_t *i_bp; 115 uint_t i_num_win; 116 uint_t i_cur_win; 117 off_t i_offset; 118 int (*i_func)(void *, bd_xfer_t *); 119 uint32_t i_blkshift; 120 size_t i_len; 121 size_t i_resid; 122 }; 123 124 #define i_dmah i_public.x_dmah 125 #define i_dmac i_public.x_dmac 126 #define i_ndmac i_public.x_ndmac 127 #define i_kaddr i_public.x_kaddr 128 #define i_nblks i_public.x_nblks 129 #define i_blkno i_public.x_blkno 130 #define i_flags i_public.x_flags 131 132 133 /* 134 * Private prototypes. 135 */ 136 137 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 138 static int bd_attach(dev_info_t *, ddi_attach_cmd_t); 139 static int bd_detach(dev_info_t *, ddi_detach_cmd_t); 140 141 static int bd_open(dev_t *, int, int, cred_t *); 142 static int bd_close(dev_t, int, int, cred_t *); 143 static int bd_strategy(struct buf *); 144 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 145 static int bd_dump(dev_t, caddr_t, daddr_t, int); 146 static int bd_read(dev_t, struct uio *, cred_t *); 147 static int bd_write(dev_t, struct uio *, cred_t *); 148 static int bd_aread(dev_t, struct aio_req *, cred_t *); 149 static int bd_awrite(dev_t, struct aio_req *, cred_t *); 150 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, 151 caddr_t, int *); 152 153 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, 154 void *); 155 static int bd_tg_getinfo(dev_info_t *, int, void *, void *); 156 static int bd_xfer_ctor(void *, void *, int); 157 static void bd_xfer_dtor(void *, void *); 158 static void bd_sched(bd_t *); 159 static void bd_submit(bd_t *, bd_xfer_impl_t *); 160 static void bd_runq_exit(bd_xfer_impl_t *, int); 161 static void bd_update_state(bd_t *); 162 static int bd_check_state(bd_t *, enum dkio_state *); 163 static int bd_flush_write_cache(bd_t *, struct dk_callback *); 164 165 struct cmlb_tg_ops bd_tg_ops = { 166 TG_DK_OPS_VERSION_1, 167 bd_tg_rdwr, 168 bd_tg_getinfo, 169 }; 170 171 static struct cb_ops bd_cb_ops = { 172 bd_open, /* open */ 173 bd_close, /* close */ 174 bd_strategy, /* strategy */ 175 nodev, /* print */ 176 bd_dump, /* dump */ 177 bd_read, /* read */ 178 bd_write, /* write */ 179 bd_ioctl, /* ioctl */ 180 nodev, /* devmap */ 181 nodev, /* mmap */ 182 nodev, /* segmap */ 183 nochpoll, /* poll */ 184 bd_prop_op, /* cb_prop_op */ 185 0, /* streamtab */ 186 D_64BIT | D_MP, /* Driver comaptibility flag */ 187 CB_REV, /* cb_rev */ 188 bd_aread, /* async read */ 189 bd_awrite /* async write */ 190 }; 191 192 struct dev_ops bd_dev_ops = { 193 DEVO_REV, /* devo_rev, */ 194 0, /* refcnt */ 195 bd_getinfo, /* getinfo */ 196 nulldev, /* identify */ 197 nulldev, /* probe */ 198 bd_attach, /* attach */ 199 bd_detach, /* detach */ 200 nodev, /* reset */ 201 &bd_cb_ops, /* driver operations */ 202 NULL, /* bus operations */ 203 NULL, /* power */ 204 ddi_quiesce_not_needed, /* quiesce */ 205 }; 206 207 static struct modldrv modldrv = { 208 &mod_driverops, 209 "Generic Block Device", 210 &bd_dev_ops, 211 }; 212 213 static struct modlinkage modlinkage = { 214 MODREV_1, { &modldrv, NULL } 215 }; 216 217 static void *bd_state; 218 static krwlock_t bd_lock; 219 220 int 221 _init(void) 222 { 223 int rv; 224 225 rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2); 226 if (rv != DDI_SUCCESS) { 227 return (rv); 228 } 229 rw_init(&bd_lock, NULL, RW_DRIVER, NULL); 230 rv = mod_install(&modlinkage); 231 if (rv != DDI_SUCCESS) { 232 rw_destroy(&bd_lock); 233 ddi_soft_state_fini(&bd_state); 234 } 235 return (rv); 236 } 237 238 int 239 _fini(void) 240 { 241 int rv; 242 243 rv = mod_remove(&modlinkage); 244 if (rv == DDI_SUCCESS) { 245 rw_destroy(&bd_lock); 246 ddi_soft_state_fini(&bd_state); 247 } 248 return (rv); 249 } 250 251 int 252 _info(struct modinfo *modinfop) 253 { 254 return (mod_info(&modlinkage, modinfop)); 255 } 256 257 static int 258 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 259 { 260 bd_t *bd; 261 minor_t inst; 262 263 _NOTE(ARGUNUSED(dip)); 264 265 inst = BDINST((dev_t)arg); 266 267 switch (cmd) { 268 case DDI_INFO_DEVT2DEVINFO: 269 bd = ddi_get_soft_state(bd_state, inst); 270 if (bd == NULL) { 271 return (DDI_FAILURE); 272 } 273 *resultp = (void *)bd->d_dip; 274 break; 275 276 case DDI_INFO_DEVT2INSTANCE: 277 *resultp = (void *)(intptr_t)inst; 278 break; 279 280 default: 281 return (DDI_FAILURE); 282 } 283 return (DDI_SUCCESS); 284 } 285 286 static int 287 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 288 { 289 int inst; 290 bd_handle_t hdl; 291 bd_t *bd; 292 bd_drive_t drive; 293 int rv; 294 char name[16]; 295 char kcache[32]; 296 297 switch (cmd) { 298 case DDI_ATTACH: 299 break; 300 case DDI_RESUME: 301 /* We don't do anything native for suspend/resume */ 302 return (DDI_SUCCESS); 303 default: 304 return (DDI_FAILURE); 305 } 306 307 inst = ddi_get_instance(dip); 308 hdl = ddi_get_parent_data(dip); 309 310 (void) snprintf(name, sizeof (name), "%s%d", 311 ddi_driver_name(dip), ddi_get_instance(dip)); 312 (void) snprintf(kcache, sizeof (kcache), "%s_xfer", name); 313 314 if (hdl == NULL) { 315 cmn_err(CE_WARN, "%s: missing parent data!", name); 316 return (DDI_FAILURE); 317 } 318 319 if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) { 320 cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name); 321 return (DDI_FAILURE); 322 } 323 bd = ddi_get_soft_state(bd_state, inst); 324 325 if (hdl->h_dma) { 326 bd->d_dma = *(hdl->h_dma); 327 bd->d_dma.dma_attr_granular = 328 max(DEV_BSIZE, bd->d_dma.dma_attr_granular); 329 bd->d_use_dma = B_TRUE; 330 331 if (bd->d_maxxfer && 332 (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) { 333 cmn_err(CE_WARN, 334 "%s: inconsistent maximum transfer size!", 335 name); 336 /* We force it */ 337 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 338 } else { 339 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 340 } 341 } else { 342 bd->d_use_dma = B_FALSE; 343 if (bd->d_maxxfer == 0) { 344 bd->d_maxxfer = 1024 * 1024; 345 } 346 } 347 bd->d_ops = hdl->h_ops; 348 bd->d_private = hdl->h_private; 349 bd->d_blkshift = 9; /* 512 bytes, to start */ 350 351 if (bd->d_maxxfer % DEV_BSIZE) { 352 cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name); 353 bd->d_maxxfer &= ~(DEV_BSIZE - 1); 354 } 355 if (bd->d_maxxfer < DEV_BSIZE) { 356 cmn_err(CE_WARN, "%s: maximum transfer size too small!", name); 357 ddi_soft_state_free(bd_state, inst); 358 return (DDI_FAILURE); 359 } 360 361 bd->d_dip = dip; 362 bd->d_handle = hdl; 363 hdl->h_bd = bd; 364 ddi_set_driver_private(dip, bd); 365 366 mutex_init(&bd->d_iomutex, NULL, MUTEX_DRIVER, NULL); 367 mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL); 368 mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL); 369 cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL); 370 371 list_create(&bd->d_waitq, sizeof (bd_xfer_impl_t), 372 offsetof(struct bd_xfer_impl, i_linkage)); 373 list_create(&bd->d_runq, sizeof (bd_xfer_impl_t), 374 offsetof(struct bd_xfer_impl, i_linkage)); 375 376 bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8, 377 bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0); 378 379 bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk", 380 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT); 381 if (bd->d_ksp != NULL) { 382 bd->d_ksp->ks_lock = &bd->d_iomutex; 383 kstat_install(bd->d_ksp); 384 bd->d_kiop = bd->d_ksp->ks_data; 385 } else { 386 /* 387 * Even if we cannot create the kstat, we create a 388 * scratch kstat. The reason for this is to ensure 389 * that we can update the kstat all of the time, 390 * without adding an extra branch instruction. 391 */ 392 bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP); 393 } 394 395 cmlb_alloc_handle(&bd->d_cmlbh); 396 397 bd->d_state = DKIO_NONE; 398 399 bzero(&drive, sizeof (drive)); 400 bd->d_ops.o_drive_info(bd->d_private, &drive); 401 bd->d_qsize = drive.d_qsize; 402 bd->d_removable = drive.d_removable; 403 bd->d_hotpluggable = drive.d_hotpluggable; 404 405 if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer) 406 bd->d_maxxfer = drive.d_maxxfer; 407 408 409 rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT, 410 bd->d_removable, bd->d_hotpluggable, 411 drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK, 412 CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0); 413 if (rv != 0) { 414 cmlb_free_handle(&bd->d_cmlbh); 415 kmem_cache_destroy(bd->d_cache); 416 mutex_destroy(&bd->d_iomutex); 417 mutex_destroy(&bd->d_ocmutex); 418 mutex_destroy(&bd->d_statemutex); 419 cv_destroy(&bd->d_statecv); 420 list_destroy(&bd->d_waitq); 421 list_destroy(&bd->d_runq); 422 if (bd->d_ksp != NULL) { 423 kstat_delete(bd->d_ksp); 424 bd->d_ksp = NULL; 425 } else { 426 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 427 } 428 ddi_soft_state_free(bd_state, inst); 429 return (DDI_FAILURE); 430 } 431 432 if (bd->d_ops.o_devid_init != NULL) { 433 rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid); 434 if (rv == DDI_SUCCESS) { 435 if (ddi_devid_register(dip, bd->d_devid) != 436 DDI_SUCCESS) { 437 cmn_err(CE_WARN, 438 "%s: unable to register devid", name); 439 } 440 } 441 } 442 443 /* 444 * Add a zero-length attribute to tell the world we support 445 * kernel ioctls (for layered drivers). Also set up properties 446 * used by HAL to identify removable media. 447 */ 448 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 449 DDI_KERNEL_IOCTL, NULL, 0); 450 if (bd->d_removable) { 451 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 452 "removable-media", NULL, 0); 453 } 454 if (bd->d_hotpluggable) { 455 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 456 "hotpluggable", NULL, 0); 457 } 458 459 ddi_report_dev(dip); 460 461 return (DDI_SUCCESS); 462 } 463 464 static int 465 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 466 { 467 bd_t *bd; 468 469 bd = ddi_get_driver_private(dip); 470 471 switch (cmd) { 472 case DDI_DETACH: 473 break; 474 case DDI_SUSPEND: 475 /* We don't suspend, but our parent does */ 476 return (DDI_SUCCESS); 477 default: 478 return (DDI_FAILURE); 479 } 480 if (bd->d_ksp != NULL) { 481 kstat_delete(bd->d_ksp); 482 bd->d_ksp = NULL; 483 } else { 484 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 485 } 486 cmlb_detach(bd->d_cmlbh, 0); 487 cmlb_free_handle(&bd->d_cmlbh); 488 if (bd->d_devid) 489 ddi_devid_free(bd->d_devid); 490 kmem_cache_destroy(bd->d_cache); 491 mutex_destroy(&bd->d_iomutex); 492 mutex_destroy(&bd->d_ocmutex); 493 mutex_destroy(&bd->d_statemutex); 494 cv_destroy(&bd->d_statecv); 495 list_destroy(&bd->d_waitq); 496 list_destroy(&bd->d_runq); 497 ddi_soft_state_free(bd_state, ddi_get_instance(dip)); 498 return (DDI_SUCCESS); 499 } 500 501 static int 502 bd_xfer_ctor(void *buf, void *arg, int kmflag) 503 { 504 bd_xfer_impl_t *xi; 505 bd_t *bd = arg; 506 int (*dcb)(caddr_t); 507 508 if (kmflag == KM_PUSHPAGE || kmflag == KM_SLEEP) { 509 dcb = DDI_DMA_SLEEP; 510 } else { 511 dcb = DDI_DMA_DONTWAIT; 512 } 513 514 xi = buf; 515 bzero(xi, sizeof (*xi)); 516 xi->i_bd = bd; 517 518 if (bd->d_use_dma) { 519 if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL, 520 &xi->i_dmah) != DDI_SUCCESS) { 521 return (-1); 522 } 523 } 524 525 return (0); 526 } 527 528 static void 529 bd_xfer_dtor(void *buf, void *arg) 530 { 531 bd_xfer_impl_t *xi = buf; 532 533 _NOTE(ARGUNUSED(arg)); 534 535 if (xi->i_dmah) 536 ddi_dma_free_handle(&xi->i_dmah); 537 xi->i_dmah = NULL; 538 } 539 540 static bd_xfer_impl_t * 541 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *), 542 int kmflag) 543 { 544 bd_xfer_impl_t *xi; 545 int rv = 0; 546 int status; 547 unsigned dir; 548 int (*cb)(caddr_t); 549 size_t len; 550 uint32_t shift; 551 552 if (kmflag == KM_SLEEP) { 553 cb = DDI_DMA_SLEEP; 554 } else { 555 cb = DDI_DMA_DONTWAIT; 556 } 557 558 xi = kmem_cache_alloc(bd->d_cache, kmflag); 559 if (xi == NULL) { 560 bioerror(bp, ENOMEM); 561 return (NULL); 562 } 563 564 ASSERT(bp); 565 566 xi->i_bp = bp; 567 xi->i_func = func; 568 xi->i_blkno = bp->b_lblkno; 569 570 if (bp->b_bcount == 0) { 571 xi->i_len = 0; 572 xi->i_nblks = 0; 573 xi->i_kaddr = NULL; 574 xi->i_resid = 0; 575 xi->i_num_win = 0; 576 goto done; 577 } 578 579 if (bp->b_flags & B_READ) { 580 dir = DDI_DMA_READ; 581 xi->i_func = bd->d_ops.o_read; 582 } else { 583 dir = DDI_DMA_WRITE; 584 xi->i_func = bd->d_ops.o_write; 585 } 586 587 shift = bd->d_blkshift; 588 xi->i_blkshift = shift; 589 590 if (!bd->d_use_dma) { 591 bp_mapin(bp); 592 rv = 0; 593 xi->i_offset = 0; 594 xi->i_num_win = 595 (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer; 596 xi->i_cur_win = 0; 597 xi->i_len = min(bp->b_bcount, bd->d_maxxfer); 598 xi->i_nblks = xi->i_len >> shift; 599 xi->i_kaddr = bp->b_un.b_addr; 600 xi->i_resid = bp->b_bcount; 601 } else { 602 603 /* 604 * We have to use consistent DMA if the address is misaligned. 605 */ 606 if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) && 607 ((uintptr_t)bp->b_un.b_addr & 0x7)) { 608 dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL; 609 } else { 610 dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 611 } 612 613 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb, 614 NULL, &xi->i_dmac, &xi->i_ndmac); 615 switch (status) { 616 case DDI_DMA_MAPPED: 617 xi->i_num_win = 1; 618 xi->i_cur_win = 0; 619 xi->i_offset = 0; 620 xi->i_len = bp->b_bcount; 621 xi->i_nblks = xi->i_len >> shift; 622 xi->i_resid = bp->b_bcount; 623 rv = 0; 624 break; 625 case DDI_DMA_PARTIAL_MAP: 626 xi->i_cur_win = 0; 627 628 if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) != 629 DDI_SUCCESS) || 630 (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset, 631 &len, &xi->i_dmac, &xi->i_ndmac) != 632 DDI_SUCCESS) || 633 (P2PHASE(len, shift) != 0)) { 634 (void) ddi_dma_unbind_handle(xi->i_dmah); 635 rv = EFAULT; 636 goto done; 637 } 638 xi->i_len = len; 639 xi->i_nblks = xi->i_len >> shift; 640 xi->i_resid = bp->b_bcount; 641 rv = 0; 642 break; 643 case DDI_DMA_NORESOURCES: 644 rv = EAGAIN; 645 goto done; 646 case DDI_DMA_TOOBIG: 647 rv = EINVAL; 648 goto done; 649 case DDI_DMA_NOMAPPING: 650 case DDI_DMA_INUSE: 651 default: 652 rv = EFAULT; 653 goto done; 654 } 655 } 656 657 done: 658 if (rv != 0) { 659 kmem_cache_free(bd->d_cache, xi); 660 bioerror(bp, rv); 661 return (NULL); 662 } 663 664 return (xi); 665 } 666 667 static void 668 bd_xfer_free(bd_xfer_impl_t *xi) 669 { 670 if (xi->i_dmah) { 671 (void) ddi_dma_unbind_handle(xi->i_dmah); 672 } 673 kmem_cache_free(xi->i_bd->d_cache, xi); 674 } 675 676 static int 677 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp) 678 { 679 dev_t dev = *devp; 680 bd_t *bd; 681 minor_t part; 682 minor_t inst; 683 uint64_t mask; 684 boolean_t ndelay; 685 int rv; 686 diskaddr_t nblks; 687 diskaddr_t lba; 688 689 _NOTE(ARGUNUSED(credp)); 690 691 part = BDPART(dev); 692 inst = BDINST(dev); 693 694 if (otyp >= OTYPCNT) 695 return (EINVAL); 696 697 ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE; 698 699 /* 700 * Block any DR events from changing the set of registered 701 * devices while we function. 702 */ 703 rw_enter(&bd_lock, RW_READER); 704 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 705 rw_exit(&bd_lock); 706 return (ENXIO); 707 } 708 709 mutex_enter(&bd->d_ocmutex); 710 711 ASSERT(part < 64); 712 mask = (1U << part); 713 714 bd_update_state(bd); 715 716 if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) { 717 718 /* non-blocking opens are allowed to succeed */ 719 if (!ndelay) { 720 rv = ENXIO; 721 goto done; 722 } 723 } else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba, 724 NULL, NULL, 0) == 0) { 725 726 /* 727 * We read the partinfo, verify valid ranges. If the 728 * partition is invalid, and we aren't blocking or 729 * doing a raw access, then fail. (Non-blocking and 730 * raw accesses can still succeed to allow a disk with 731 * bad partition data to opened by format and fdisk.) 732 */ 733 if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) { 734 rv = ENXIO; 735 goto done; 736 } 737 } else if (!ndelay) { 738 /* 739 * cmlb_partinfo failed -- invalid partition or no 740 * disk label. 741 */ 742 rv = ENXIO; 743 goto done; 744 } 745 746 if ((flag & FWRITE) && bd->d_rdonly) { 747 rv = EROFS; 748 goto done; 749 } 750 751 if ((bd->d_open_excl) & (mask)) { 752 rv = EBUSY; 753 goto done; 754 } 755 if (flag & FEXCL) { 756 if (bd->d_open_lyr[part]) { 757 rv = EBUSY; 758 goto done; 759 } 760 for (int i = 0; i < OTYP_LYR; i++) { 761 if (bd->d_open_reg[i] & mask) { 762 rv = EBUSY; 763 goto done; 764 } 765 } 766 } 767 768 if (otyp == OTYP_LYR) { 769 bd->d_open_lyr[part]++; 770 } else { 771 bd->d_open_reg[otyp] |= mask; 772 } 773 if (flag & FEXCL) { 774 bd->d_open_excl |= mask; 775 } 776 777 rv = 0; 778 done: 779 mutex_exit(&bd->d_ocmutex); 780 rw_exit(&bd_lock); 781 782 return (rv); 783 } 784 785 static int 786 bd_close(dev_t dev, int flag, int otyp, cred_t *credp) 787 { 788 bd_t *bd; 789 minor_t inst; 790 minor_t part; 791 uint64_t mask; 792 boolean_t last = B_TRUE; 793 794 _NOTE(ARGUNUSED(flag)); 795 _NOTE(ARGUNUSED(credp)); 796 797 part = BDPART(dev); 798 inst = BDINST(dev); 799 800 ASSERT(part < 64); 801 mask = (1U << part); 802 803 rw_enter(&bd_lock, RW_READER); 804 805 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 806 rw_exit(&bd_lock); 807 return (ENXIO); 808 } 809 810 mutex_enter(&bd->d_ocmutex); 811 if (bd->d_open_excl & mask) { 812 bd->d_open_excl &= ~mask; 813 } 814 if (otyp == OTYP_LYR) { 815 bd->d_open_lyr[part]--; 816 } else { 817 bd->d_open_reg[otyp] &= ~mask; 818 } 819 for (int i = 0; i < 64; i++) { 820 if (bd->d_open_lyr[part]) { 821 last = B_FALSE; 822 } 823 } 824 for (int i = 0; last && (i < OTYP_LYR); i++) { 825 if (bd->d_open_reg[i]) { 826 last = B_FALSE; 827 } 828 } 829 mutex_exit(&bd->d_ocmutex); 830 831 if (last) { 832 cmlb_invalidate(bd->d_cmlbh, 0); 833 } 834 rw_exit(&bd_lock); 835 836 return (0); 837 } 838 839 static int 840 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk) 841 { 842 minor_t inst; 843 minor_t part; 844 diskaddr_t pstart; 845 diskaddr_t psize; 846 bd_t *bd; 847 bd_xfer_impl_t *xi; 848 buf_t *bp; 849 int rv; 850 851 rw_enter(&bd_lock, RW_READER); 852 853 part = BDPART(dev); 854 inst = BDINST(dev); 855 856 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 857 rw_exit(&bd_lock); 858 return (ENXIO); 859 } 860 /* 861 * do cmlb, but do it synchronously unless we already have the 862 * partition (which we probably should.) 863 */ 864 if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL, 865 (void *)1)) { 866 rw_exit(&bd_lock); 867 return (ENXIO); 868 } 869 870 if ((blkno + nblk) > psize) { 871 rw_exit(&bd_lock); 872 return (EINVAL); 873 } 874 bp = getrbuf(KM_NOSLEEP); 875 if (bp == NULL) { 876 rw_exit(&bd_lock); 877 return (ENOMEM); 878 } 879 880 bp->b_bcount = nblk << bd->d_blkshift; 881 bp->b_resid = bp->b_bcount; 882 bp->b_lblkno = blkno; 883 bp->b_un.b_addr = caddr; 884 885 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_write, KM_NOSLEEP); 886 if (xi == NULL) { 887 rw_exit(&bd_lock); 888 freerbuf(bp); 889 return (ENOMEM); 890 } 891 xi->i_blkno = blkno + pstart; 892 xi->i_flags = BD_XFER_POLL; 893 bd_submit(bd, xi); 894 rw_exit(&bd_lock); 895 896 /* 897 * Generally, we should have run this entirely synchronously 898 * at this point and the biowait call should be a no-op. If 899 * it didn't happen this way, it's a bug in the underlying 900 * driver not honoring BD_XFER_POLL. 901 */ 902 (void) biowait(bp); 903 rv = geterror(bp); 904 freerbuf(bp); 905 return (rv); 906 } 907 908 void 909 bd_minphys(struct buf *bp) 910 { 911 minor_t inst; 912 bd_t *bd; 913 inst = BDINST(bp->b_edev); 914 915 bd = ddi_get_soft_state(bd_state, inst); 916 917 /* 918 * In a non-debug kernel, bd_strategy will catch !bd as 919 * well, and will fail nicely. 920 */ 921 ASSERT(bd); 922 923 if (bp->b_bcount > bd->d_maxxfer) 924 bp->b_bcount = bd->d_maxxfer; 925 } 926 927 static int 928 bd_read(dev_t dev, struct uio *uio, cred_t *credp) 929 { 930 _NOTE(ARGUNUSED(credp)); 931 return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio)); 932 } 933 934 static int 935 bd_write(dev_t dev, struct uio *uio, cred_t *credp) 936 { 937 _NOTE(ARGUNUSED(credp)); 938 return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio)); 939 } 940 941 static int 942 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp) 943 { 944 _NOTE(ARGUNUSED(credp)); 945 return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio)); 946 } 947 948 static int 949 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp) 950 { 951 _NOTE(ARGUNUSED(credp)); 952 return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio)); 953 } 954 955 static int 956 bd_strategy(struct buf *bp) 957 { 958 minor_t inst; 959 minor_t part; 960 bd_t *bd; 961 diskaddr_t p_lba; 962 diskaddr_t p_nblks; 963 diskaddr_t b_nblks; 964 bd_xfer_impl_t *xi; 965 uint32_t shift; 966 int (*func)(void *, bd_xfer_t *); 967 968 part = BDPART(bp->b_edev); 969 inst = BDINST(bp->b_edev); 970 971 ASSERT(bp); 972 973 bp->b_resid = bp->b_bcount; 974 975 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 976 bioerror(bp, ENXIO); 977 biodone(bp); 978 return (0); 979 } 980 981 if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba, 982 NULL, NULL, 0)) { 983 bioerror(bp, ENXIO); 984 biodone(bp); 985 return (0); 986 } 987 988 shift = bd->d_blkshift; 989 990 if ((P2PHASE(bp->b_bcount, (1U << shift)) != 0) || 991 (bp->b_lblkno > p_nblks)) { 992 bioerror(bp, ENXIO); 993 biodone(bp); 994 return (0); 995 } 996 b_nblks = bp->b_bcount >> shift; 997 if ((bp->b_lblkno == p_nblks) || (bp->b_bcount == 0)) { 998 biodone(bp); 999 return (0); 1000 } 1001 1002 if ((b_nblks + bp->b_lblkno) > p_nblks) { 1003 bp->b_resid = ((bp->b_lblkno + b_nblks - p_nblks) << shift); 1004 bp->b_bcount -= bp->b_resid; 1005 } else { 1006 bp->b_resid = 0; 1007 } 1008 func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write; 1009 1010 xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP); 1011 if (xi == NULL) { 1012 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE); 1013 } 1014 if (xi == NULL) { 1015 /* bd_request_alloc will have done bioerror */ 1016 biodone(bp); 1017 return (0); 1018 } 1019 xi->i_blkno = bp->b_lblkno + p_lba; 1020 1021 bd_submit(bd, xi); 1022 1023 return (0); 1024 } 1025 1026 static int 1027 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp) 1028 { 1029 minor_t inst; 1030 uint16_t part; 1031 bd_t *bd; 1032 void *ptr = (void *)arg; 1033 int rv; 1034 1035 part = BDPART(dev); 1036 inst = BDINST(dev); 1037 1038 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1039 return (ENXIO); 1040 } 1041 1042 rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0); 1043 if (rv != ENOTTY) 1044 return (rv); 1045 1046 if (rvalp != NULL) { 1047 /* the return value of the ioctl is 0 by default */ 1048 *rvalp = 0; 1049 } 1050 1051 switch (cmd) { 1052 case DKIOCGMEDIAINFO: { 1053 struct dk_minfo minfo; 1054 1055 /* make sure our state information is current */ 1056 bd_update_state(bd); 1057 bzero(&minfo, sizeof (minfo)); 1058 minfo.dki_media_type = DK_FIXED_DISK; 1059 minfo.dki_lbsize = (1U << bd->d_blkshift); 1060 minfo.dki_capacity = bd->d_numblks; 1061 if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) { 1062 return (EFAULT); 1063 } 1064 return (0); 1065 } 1066 case DKIOCGMEDIAINFOEXT: { 1067 struct dk_minfo_ext miext; 1068 1069 /* make sure our state information is current */ 1070 bd_update_state(bd); 1071 bzero(&miext, sizeof (miext)); 1072 miext.dki_media_type = DK_FIXED_DISK; 1073 miext.dki_lbsize = (1U << bd->d_blkshift); 1074 miext.dki_pbsize = (1U << bd->d_pblkshift); 1075 miext.dki_capacity = bd->d_numblks; 1076 if (ddi_copyout(&miext, ptr, sizeof (miext), flag)) { 1077 return (EFAULT); 1078 } 1079 return (0); 1080 } 1081 case DKIOCINFO: { 1082 struct dk_cinfo cinfo; 1083 bzero(&cinfo, sizeof (cinfo)); 1084 cinfo.dki_ctype = DKC_BLKDEV; 1085 cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip)); 1086 (void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname), 1087 "%s", ddi_driver_name(ddi_get_parent(bd->d_dip))); 1088 (void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname), 1089 "%s", ddi_driver_name(bd->d_dip)); 1090 cinfo.dki_unit = inst; 1091 cinfo.dki_flags = DKI_FMTVOL; 1092 cinfo.dki_partition = part; 1093 cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE; 1094 cinfo.dki_addr = 0; 1095 cinfo.dki_slave = 0; 1096 cinfo.dki_space = 0; 1097 cinfo.dki_prio = 0; 1098 cinfo.dki_vec = 0; 1099 if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) { 1100 return (EFAULT); 1101 } 1102 return (0); 1103 } 1104 case DKIOCREMOVABLE: { 1105 int i; 1106 i = bd->d_removable ? 1 : 0; 1107 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1108 return (EFAULT); 1109 } 1110 return (0); 1111 } 1112 case DKIOCHOTPLUGGABLE: { 1113 int i; 1114 i = bd->d_hotpluggable ? 1 : 0; 1115 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1116 return (EFAULT); 1117 } 1118 return (0); 1119 } 1120 case DKIOCREADONLY: { 1121 int i; 1122 i = bd->d_rdonly ? 1 : 0; 1123 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1124 return (EFAULT); 1125 } 1126 return (0); 1127 } 1128 case DKIOCSOLIDSTATE: { 1129 int i; 1130 i = bd->d_ssd ? 1 : 0; 1131 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1132 return (EFAULT); 1133 } 1134 return (0); 1135 } 1136 case DKIOCSTATE: { 1137 enum dkio_state state; 1138 if (ddi_copyin(ptr, &state, sizeof (state), flag)) { 1139 return (EFAULT); 1140 } 1141 if ((rv = bd_check_state(bd, &state)) != 0) { 1142 return (rv); 1143 } 1144 if (ddi_copyout(&state, ptr, sizeof (state), flag)) { 1145 return (EFAULT); 1146 } 1147 return (0); 1148 } 1149 case DKIOCFLUSHWRITECACHE: { 1150 struct dk_callback *dkc = NULL; 1151 1152 if (flag & FKIOCTL) 1153 dkc = (void *)arg; 1154 1155 rv = bd_flush_write_cache(bd, dkc); 1156 return (rv); 1157 } 1158 1159 default: 1160 break; 1161 1162 } 1163 return (ENOTTY); 1164 } 1165 1166 static int 1167 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 1168 char *name, caddr_t valuep, int *lengthp) 1169 { 1170 bd_t *bd; 1171 1172 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1173 if (bd == NULL) 1174 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 1175 name, valuep, lengthp)); 1176 1177 return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name, 1178 valuep, lengthp, BDPART(dev), 0)); 1179 } 1180 1181 1182 static int 1183 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start, 1184 size_t length, void *tg_cookie) 1185 { 1186 bd_t *bd; 1187 buf_t *bp; 1188 bd_xfer_impl_t *xi; 1189 int rv; 1190 int (*func)(void *, bd_xfer_t *); 1191 int kmflag; 1192 1193 /* 1194 * If we are running in polled mode (such as during dump(9e) 1195 * execution), then we cannot sleep for kernel allocations. 1196 */ 1197 kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP; 1198 1199 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1200 1201 if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) { 1202 /* We can only transfer whole blocks at a time! */ 1203 return (EINVAL); 1204 } 1205 1206 if ((bp = getrbuf(kmflag)) == NULL) { 1207 return (ENOMEM); 1208 } 1209 1210 switch (cmd) { 1211 case TG_READ: 1212 bp->b_flags = B_READ; 1213 func = bd->d_ops.o_read; 1214 break; 1215 case TG_WRITE: 1216 bp->b_flags = B_WRITE; 1217 func = bd->d_ops.o_write; 1218 break; 1219 default: 1220 freerbuf(bp); 1221 return (EINVAL); 1222 } 1223 1224 bp->b_un.b_addr = bufaddr; 1225 bp->b_bcount = length; 1226 xi = bd_xfer_alloc(bd, bp, func, kmflag); 1227 if (xi == NULL) { 1228 rv = geterror(bp); 1229 freerbuf(bp); 1230 return (rv); 1231 } 1232 xi->i_flags = tg_cookie ? BD_XFER_POLL : 0; 1233 xi->i_blkno = start; 1234 bd_submit(bd, xi); 1235 (void) biowait(bp); 1236 rv = geterror(bp); 1237 freerbuf(bp); 1238 1239 return (rv); 1240 } 1241 1242 static int 1243 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 1244 { 1245 bd_t *bd; 1246 1247 _NOTE(ARGUNUSED(tg_cookie)); 1248 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1249 1250 switch (cmd) { 1251 case TG_GETPHYGEOM: 1252 case TG_GETVIRTGEOM: 1253 /* 1254 * We don't have any "geometry" as such, let cmlb 1255 * fabricate something. 1256 */ 1257 return (ENOTTY); 1258 1259 case TG_GETCAPACITY: 1260 bd_update_state(bd); 1261 *(diskaddr_t *)arg = bd->d_numblks; 1262 return (0); 1263 1264 case TG_GETBLOCKSIZE: 1265 *(uint32_t *)arg = (1U << bd->d_blkshift); 1266 return (0); 1267 1268 case TG_GETATTR: 1269 /* 1270 * It turns out that cmlb really doesn't do much for 1271 * non-writable media, but lets make the information 1272 * available for it in case it does more in the 1273 * future. (The value is currently used for 1274 * triggering special behavior for CD-ROMs.) 1275 */ 1276 bd_update_state(bd); 1277 ((tg_attribute_t *)arg)->media_is_writable = 1278 bd->d_rdonly ? B_FALSE : B_TRUE; 1279 ((tg_attribute_t *)arg)->media_is_solid_state = bd->d_ssd; 1280 return (0); 1281 1282 default: 1283 return (EINVAL); 1284 } 1285 } 1286 1287 1288 static void 1289 bd_sched(bd_t *bd) 1290 { 1291 bd_xfer_impl_t *xi; 1292 struct buf *bp; 1293 int rv; 1294 1295 mutex_enter(&bd->d_iomutex); 1296 1297 while ((bd->d_qactive < bd->d_qsize) && 1298 ((xi = list_remove_head(&bd->d_waitq)) != NULL)) { 1299 bd->d_qactive++; 1300 kstat_waitq_to_runq(bd->d_kiop); 1301 list_insert_tail(&bd->d_runq, xi); 1302 1303 /* 1304 * Submit the job to the driver. We drop the I/O mutex 1305 * so that we can deal with the case where the driver 1306 * completion routine calls back into us synchronously. 1307 */ 1308 1309 mutex_exit(&bd->d_iomutex); 1310 1311 rv = xi->i_func(bd->d_private, &xi->i_public); 1312 if (rv != 0) { 1313 bp = xi->i_bp; 1314 bioerror(bp, rv); 1315 biodone(bp); 1316 1317 mutex_enter(&bd->d_iomutex); 1318 bd->d_qactive--; 1319 kstat_runq_exit(bd->d_kiop); 1320 list_remove(&bd->d_runq, xi); 1321 bd_xfer_free(xi); 1322 } else { 1323 mutex_enter(&bd->d_iomutex); 1324 } 1325 } 1326 1327 mutex_exit(&bd->d_iomutex); 1328 } 1329 1330 static void 1331 bd_submit(bd_t *bd, bd_xfer_impl_t *xi) 1332 { 1333 mutex_enter(&bd->d_iomutex); 1334 list_insert_tail(&bd->d_waitq, xi); 1335 kstat_waitq_enter(bd->d_kiop); 1336 mutex_exit(&bd->d_iomutex); 1337 1338 bd_sched(bd); 1339 } 1340 1341 static void 1342 bd_runq_exit(bd_xfer_impl_t *xi, int err) 1343 { 1344 bd_t *bd = xi->i_bd; 1345 buf_t *bp = xi->i_bp; 1346 1347 mutex_enter(&bd->d_iomutex); 1348 bd->d_qactive--; 1349 kstat_runq_exit(bd->d_kiop); 1350 list_remove(&bd->d_runq, xi); 1351 mutex_exit(&bd->d_iomutex); 1352 1353 if (err == 0) { 1354 if (bp->b_flags & B_READ) { 1355 bd->d_kiop->reads++; 1356 bd->d_kiop->nread += (bp->b_bcount - xi->i_resid); 1357 } else { 1358 bd->d_kiop->writes++; 1359 bd->d_kiop->nwritten += (bp->b_bcount - xi->i_resid); 1360 } 1361 } 1362 bd_sched(bd); 1363 } 1364 1365 static void 1366 bd_update_state(bd_t *bd) 1367 { 1368 enum dkio_state state = DKIO_INSERTED; 1369 boolean_t docmlb = B_FALSE; 1370 bd_media_t media; 1371 1372 bzero(&media, sizeof (media)); 1373 1374 mutex_enter(&bd->d_statemutex); 1375 if (bd->d_ops.o_media_info(bd->d_private, &media) != 0) { 1376 bd->d_numblks = 0; 1377 state = DKIO_EJECTED; 1378 goto done; 1379 } 1380 1381 if ((media.m_blksize < 512) || 1382 (!ISP2(media.m_blksize)) || 1383 (P2PHASE(bd->d_maxxfer, media.m_blksize))) { 1384 cmn_err(CE_WARN, "%s%d: Invalid media block size (%d)", 1385 ddi_driver_name(bd->d_dip), ddi_get_instance(bd->d_dip), 1386 media.m_blksize); 1387 /* 1388 * We can't use the media, treat it as not present. 1389 */ 1390 state = DKIO_EJECTED; 1391 bd->d_numblks = 0; 1392 goto done; 1393 } 1394 1395 if (((1U << bd->d_blkshift) != media.m_blksize) || 1396 (bd->d_numblks != media.m_nblks)) { 1397 /* Device size changed */ 1398 docmlb = B_TRUE; 1399 } 1400 1401 bd->d_blkshift = ddi_ffs(media.m_blksize) - 1; 1402 bd->d_pblkshift = bd->d_blkshift; 1403 bd->d_numblks = media.m_nblks; 1404 bd->d_rdonly = media.m_readonly; 1405 bd->d_ssd = media.m_solidstate; 1406 1407 /* 1408 * Only use the supplied physical block size if it is non-zero, 1409 * greater or equal to the block size, and a power of 2. Ignore it 1410 * if not, it's just informational and we can still use the media. 1411 */ 1412 if ((media.m_pblksize != 0) && 1413 (media.m_pblksize >= media.m_blksize) && 1414 (ISP2(media.m_pblksize))) 1415 bd->d_pblkshift = ddi_ffs(media.m_pblksize) - 1; 1416 1417 done: 1418 if (state != bd->d_state) { 1419 bd->d_state = state; 1420 cv_broadcast(&bd->d_statecv); 1421 docmlb = B_TRUE; 1422 } 1423 mutex_exit(&bd->d_statemutex); 1424 1425 if (docmlb) { 1426 if (state == DKIO_INSERTED) { 1427 (void) cmlb_validate(bd->d_cmlbh, 0, 0); 1428 } else { 1429 cmlb_invalidate(bd->d_cmlbh, 0); 1430 } 1431 } 1432 } 1433 1434 static int 1435 bd_check_state(bd_t *bd, enum dkio_state *state) 1436 { 1437 clock_t when; 1438 1439 for (;;) { 1440 1441 bd_update_state(bd); 1442 1443 mutex_enter(&bd->d_statemutex); 1444 1445 if (bd->d_state != *state) { 1446 *state = bd->d_state; 1447 mutex_exit(&bd->d_statemutex); 1448 break; 1449 } 1450 1451 when = drv_usectohz(1000000); 1452 if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex, 1453 when, TR_CLOCK_TICK) == 0) { 1454 mutex_exit(&bd->d_statemutex); 1455 return (EINTR); 1456 } 1457 1458 mutex_exit(&bd->d_statemutex); 1459 } 1460 1461 return (0); 1462 } 1463 1464 static int 1465 bd_flush_write_cache_done(struct buf *bp) 1466 { 1467 struct dk_callback *dc = (void *)bp->b_private; 1468 1469 (*dc->dkc_callback)(dc->dkc_cookie, geterror(bp)); 1470 kmem_free(dc, sizeof (*dc)); 1471 freerbuf(bp); 1472 return (0); 1473 } 1474 1475 static int 1476 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc) 1477 { 1478 buf_t *bp; 1479 struct dk_callback *dc; 1480 bd_xfer_impl_t *xi; 1481 int rv; 1482 1483 if (bd->d_ops.o_sync_cache == NULL) { 1484 return (ENOTSUP); 1485 } 1486 if ((bp = getrbuf(KM_SLEEP)) == NULL) { 1487 return (ENOMEM); 1488 } 1489 bp->b_resid = 0; 1490 bp->b_bcount = 0; 1491 1492 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP); 1493 if (xi == NULL) { 1494 rv = geterror(bp); 1495 freerbuf(bp); 1496 return (rv); 1497 } 1498 1499 /* Make an asynchronous flush, but only if there is a callback */ 1500 if (dkc != NULL && dkc->dkc_callback != NULL) { 1501 /* Make a private copy of the callback structure */ 1502 dc = kmem_alloc(sizeof (*dc), KM_SLEEP); 1503 *dc = *dkc; 1504 bp->b_private = dc; 1505 bp->b_iodone = bd_flush_write_cache_done; 1506 1507 bd_submit(bd, xi); 1508 return (0); 1509 } 1510 1511 /* In case there is no callback, perform a synchronous flush */ 1512 bd_submit(bd, xi); 1513 (void) biowait(bp); 1514 rv = geterror(bp); 1515 freerbuf(bp); 1516 1517 return (rv); 1518 } 1519 1520 /* 1521 * Nexus support. 1522 */ 1523 int 1524 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop, 1525 void *arg, void *result) 1526 { 1527 bd_handle_t hdl; 1528 1529 switch (ctlop) { 1530 case DDI_CTLOPS_REPORTDEV: 1531 cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n", 1532 ddi_node_name(rdip), ddi_get_name_addr(rdip), 1533 ddi_driver_name(rdip), ddi_get_instance(rdip)); 1534 return (DDI_SUCCESS); 1535 1536 case DDI_CTLOPS_INITCHILD: 1537 hdl = ddi_get_parent_data((dev_info_t *)arg); 1538 if (hdl == NULL) { 1539 return (DDI_NOT_WELL_FORMED); 1540 } 1541 ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr); 1542 return (DDI_SUCCESS); 1543 1544 case DDI_CTLOPS_UNINITCHILD: 1545 ddi_set_name_addr((dev_info_t *)arg, NULL); 1546 ndi_prop_remove_all((dev_info_t *)arg); 1547 return (DDI_SUCCESS); 1548 1549 default: 1550 return (ddi_ctlops(dip, rdip, ctlop, arg, result)); 1551 } 1552 } 1553 1554 /* 1555 * Functions for device drivers. 1556 */ 1557 bd_handle_t 1558 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag) 1559 { 1560 bd_handle_t hdl; 1561 1562 hdl = kmem_zalloc(sizeof (*hdl), kmflag); 1563 if (hdl != NULL) { 1564 hdl->h_ops = *ops; 1565 hdl->h_dma = dma; 1566 hdl->h_private = private; 1567 } 1568 1569 return (hdl); 1570 } 1571 1572 void 1573 bd_free_handle(bd_handle_t hdl) 1574 { 1575 kmem_free(hdl, sizeof (*hdl)); 1576 } 1577 1578 int 1579 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl) 1580 { 1581 dev_info_t *child; 1582 bd_drive_t drive; 1583 1584 /* if drivers don't override this, make it assume none */ 1585 drive.d_lun = -1; 1586 hdl->h_ops.o_drive_info(hdl->h_private, &drive); 1587 1588 hdl->h_parent = dip; 1589 hdl->h_name = "blkdev"; 1590 1591 if (drive.d_lun >= 0) { 1592 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), "%X,%X", 1593 drive.d_target, drive.d_lun); 1594 } else { 1595 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), "%X", 1596 drive.d_target); 1597 } 1598 if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID, 1599 &child) != NDI_SUCCESS) { 1600 cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s", 1601 ddi_driver_name(dip), ddi_get_instance(dip), 1602 "blkdev", hdl->h_addr); 1603 return (DDI_FAILURE); 1604 } 1605 1606 ddi_set_parent_data(child, hdl); 1607 hdl->h_child = child; 1608 1609 if (ndi_devi_online(child, 0) == NDI_FAILURE) { 1610 cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online", 1611 ddi_driver_name(dip), ddi_get_instance(dip), 1612 hdl->h_name, hdl->h_addr); 1613 (void) ndi_devi_free(child); 1614 return (DDI_FAILURE); 1615 } 1616 1617 return (DDI_SUCCESS); 1618 } 1619 1620 int 1621 bd_detach_handle(bd_handle_t hdl) 1622 { 1623 int circ; 1624 int rv; 1625 char *devnm; 1626 1627 if (hdl->h_child == NULL) { 1628 return (DDI_SUCCESS); 1629 } 1630 ndi_devi_enter(hdl->h_parent, &circ); 1631 if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) { 1632 rv = ddi_remove_child(hdl->h_child, 0); 1633 } else { 1634 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); 1635 (void) ddi_deviname(hdl->h_child, devnm); 1636 (void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE); 1637 rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL, 1638 NDI_DEVI_REMOVE | NDI_UNCONFIG); 1639 kmem_free(devnm, MAXNAMELEN + 1); 1640 } 1641 if (rv == 0) { 1642 hdl->h_child = NULL; 1643 } 1644 1645 ndi_devi_exit(hdl->h_parent, circ); 1646 return (rv = NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE); 1647 } 1648 1649 void 1650 bd_xfer_done(bd_xfer_t *xfer, int err) 1651 { 1652 bd_xfer_impl_t *xi = (void *)xfer; 1653 buf_t *bp = xi->i_bp; 1654 int rv = DDI_SUCCESS; 1655 bd_t *bd = xi->i_bd; 1656 size_t len; 1657 1658 if (err != 0) { 1659 bd_runq_exit(xi, err); 1660 1661 bp->b_resid += xi->i_resid; 1662 bd_xfer_free(xi); 1663 bioerror(bp, err); 1664 biodone(bp); 1665 return; 1666 } 1667 1668 xi->i_cur_win++; 1669 xi->i_resid -= xi->i_len; 1670 1671 if (xi->i_resid == 0) { 1672 /* Job completed succcessfully! */ 1673 bd_runq_exit(xi, 0); 1674 1675 bd_xfer_free(xi); 1676 biodone(bp); 1677 return; 1678 } 1679 1680 xi->i_blkno += xi->i_nblks; 1681 1682 if (bd->d_use_dma) { 1683 /* More transfer still pending... advance to next DMA window. */ 1684 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win, 1685 &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac); 1686 } else { 1687 /* Advance memory window. */ 1688 xi->i_kaddr += xi->i_len; 1689 xi->i_offset += xi->i_len; 1690 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer); 1691 } 1692 1693 1694 if ((rv != DDI_SUCCESS) || 1695 (P2PHASE(len, (1U << xi->i_blkshift) != 0))) { 1696 bd_runq_exit(xi, EFAULT); 1697 1698 bp->b_resid += xi->i_resid; 1699 bd_xfer_free(xi); 1700 bioerror(bp, EFAULT); 1701 biodone(bp); 1702 return; 1703 } 1704 xi->i_len = len; 1705 xi->i_nblks = len >> xi->i_blkshift; 1706 1707 /* Submit next window to hardware. */ 1708 rv = xi->i_func(bd->d_private, &xi->i_public); 1709 if (rv != 0) { 1710 bd_runq_exit(xi, rv); 1711 1712 bp->b_resid += xi->i_resid; 1713 bd_xfer_free(xi); 1714 bioerror(bp, rv); 1715 biodone(bp); 1716 } 1717 } 1718 1719 void 1720 bd_state_change(bd_handle_t hdl) 1721 { 1722 bd_t *bd; 1723 1724 if ((bd = hdl->h_bd) != NULL) { 1725 bd_update_state(bd); 1726 } 1727 } 1728 1729 void 1730 bd_mod_init(struct dev_ops *devops) 1731 { 1732 static struct bus_ops bd_bus_ops = { 1733 BUSO_REV, /* busops_rev */ 1734 nullbusmap, /* bus_map */ 1735 NULL, /* bus_get_intrspec (OBSOLETE) */ 1736 NULL, /* bus_add_intrspec (OBSOLETE) */ 1737 NULL, /* bus_remove_intrspec (OBSOLETE) */ 1738 i_ddi_map_fault, /* bus_map_fault */ 1739 NULL, /* bus_dma_map (OBSOLETE) */ 1740 ddi_dma_allochdl, /* bus_dma_allochdl */ 1741 ddi_dma_freehdl, /* bus_dma_freehdl */ 1742 ddi_dma_bindhdl, /* bus_dma_bindhdl */ 1743 ddi_dma_unbindhdl, /* bus_dma_unbindhdl */ 1744 ddi_dma_flush, /* bus_dma_flush */ 1745 ddi_dma_win, /* bus_dma_win */ 1746 ddi_dma_mctl, /* bus_dma_ctl */ 1747 bd_bus_ctl, /* bus_ctl */ 1748 ddi_bus_prop_op, /* bus_prop_op */ 1749 NULL, /* bus_get_eventcookie */ 1750 NULL, /* bus_add_eventcall */ 1751 NULL, /* bus_remove_eventcall */ 1752 NULL, /* bus_post_event */ 1753 NULL, /* bus_intr_ctl (OBSOLETE) */ 1754 NULL, /* bus_config */ 1755 NULL, /* bus_unconfig */ 1756 NULL, /* bus_fm_init */ 1757 NULL, /* bus_fm_fini */ 1758 NULL, /* bus_fm_access_enter */ 1759 NULL, /* bus_fm_access_exit */ 1760 NULL, /* bus_power */ 1761 NULL, /* bus_intr_op */ 1762 }; 1763 1764 devops->devo_bus_ops = &bd_bus_ops; 1765 1766 /* 1767 * NB: The device driver is free to supply its own 1768 * character entry device support. 1769 */ 1770 } 1771 1772 void 1773 bd_mod_fini(struct dev_ops *devops) 1774 { 1775 devops->devo_bus_ops = NULL; 1776 } 1777