1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved. 24 * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved. 25 * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 26 */ 27 28 #include <sys/types.h> 29 #include <sys/ksynch.h> 30 #include <sys/kmem.h> 31 #include <sys/file.h> 32 #include <sys/errno.h> 33 #include <sys/open.h> 34 #include <sys/buf.h> 35 #include <sys/uio.h> 36 #include <sys/aio_req.h> 37 #include <sys/cred.h> 38 #include <sys/modctl.h> 39 #include <sys/cmlb.h> 40 #include <sys/conf.h> 41 #include <sys/devops.h> 42 #include <sys/list.h> 43 #include <sys/sysmacros.h> 44 #include <sys/dkio.h> 45 #include <sys/vtoc.h> 46 #include <sys/scsi/scsi.h> /* for DTYPE_DIRECT */ 47 #include <sys/kstat.h> 48 #include <sys/fs/dv_node.h> 49 #include <sys/ddi.h> 50 #include <sys/sunddi.h> 51 #include <sys/note.h> 52 #include <sys/blkdev.h> 53 #include <sys/scsi/impl/inquiry.h> 54 55 #define BD_MAXPART 64 56 #define BDINST(dev) (getminor(dev) / BD_MAXPART) 57 #define BDPART(dev) (getminor(dev) % BD_MAXPART) 58 59 typedef struct bd bd_t; 60 typedef struct bd_xfer_impl bd_xfer_impl_t; 61 62 struct bd { 63 void *d_private; 64 dev_info_t *d_dip; 65 kmutex_t d_ocmutex; 66 kmutex_t d_iomutex; 67 kmutex_t d_statemutex; 68 kcondvar_t d_statecv; 69 enum dkio_state d_state; 70 cmlb_handle_t d_cmlbh; 71 unsigned d_open_lyr[BD_MAXPART]; /* open count */ 72 uint64_t d_open_excl; /* bit mask indexed by partition */ 73 uint64_t d_open_reg[OTYPCNT]; /* bit mask */ 74 75 uint32_t d_qsize; 76 uint32_t d_qactive; 77 uint32_t d_maxxfer; 78 uint32_t d_blkshift; 79 uint32_t d_pblkshift; 80 uint64_t d_numblks; 81 ddi_devid_t d_devid; 82 83 kmem_cache_t *d_cache; 84 list_t d_runq; 85 list_t d_waitq; 86 kstat_t *d_ksp; 87 kstat_io_t *d_kiop; 88 89 boolean_t d_rdonly; 90 boolean_t d_ssd; 91 boolean_t d_removable; 92 boolean_t d_hotpluggable; 93 boolean_t d_use_dma; 94 95 ddi_dma_attr_t d_dma; 96 bd_ops_t d_ops; 97 bd_handle_t d_handle; 98 }; 99 100 struct bd_handle { 101 bd_ops_t h_ops; 102 ddi_dma_attr_t *h_dma; 103 dev_info_t *h_parent; 104 dev_info_t *h_child; 105 void *h_private; 106 bd_t *h_bd; 107 char *h_name; 108 char h_addr[20]; /* enough for %X,%X */ 109 }; 110 111 struct bd_xfer_impl { 112 bd_xfer_t i_public; 113 list_node_t i_linkage; 114 bd_t *i_bd; 115 buf_t *i_bp; 116 uint_t i_num_win; 117 uint_t i_cur_win; 118 off_t i_offset; 119 int (*i_func)(void *, bd_xfer_t *); 120 uint32_t i_blkshift; 121 size_t i_len; 122 size_t i_resid; 123 }; 124 125 #define i_dmah i_public.x_dmah 126 #define i_dmac i_public.x_dmac 127 #define i_ndmac i_public.x_ndmac 128 #define i_kaddr i_public.x_kaddr 129 #define i_nblks i_public.x_nblks 130 #define i_blkno i_public.x_blkno 131 #define i_flags i_public.x_flags 132 133 134 /* 135 * Private prototypes. 136 */ 137 138 static void bd_prop_update_inqstring(dev_info_t *, char *, char *, size_t); 139 static void bd_create_inquiry_props(dev_info_t *, bd_drive_t *); 140 141 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 142 static int bd_attach(dev_info_t *, ddi_attach_cmd_t); 143 static int bd_detach(dev_info_t *, ddi_detach_cmd_t); 144 145 static int bd_open(dev_t *, int, int, cred_t *); 146 static int bd_close(dev_t, int, int, cred_t *); 147 static int bd_strategy(struct buf *); 148 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 149 static int bd_dump(dev_t, caddr_t, daddr_t, int); 150 static int bd_read(dev_t, struct uio *, cred_t *); 151 static int bd_write(dev_t, struct uio *, cred_t *); 152 static int bd_aread(dev_t, struct aio_req *, cred_t *); 153 static int bd_awrite(dev_t, struct aio_req *, cred_t *); 154 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, 155 caddr_t, int *); 156 157 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, 158 void *); 159 static int bd_tg_getinfo(dev_info_t *, int, void *, void *); 160 static int bd_xfer_ctor(void *, void *, int); 161 static void bd_xfer_dtor(void *, void *); 162 static void bd_sched(bd_t *); 163 static void bd_submit(bd_t *, bd_xfer_impl_t *); 164 static void bd_runq_exit(bd_xfer_impl_t *, int); 165 static void bd_update_state(bd_t *); 166 static int bd_check_state(bd_t *, enum dkio_state *); 167 static int bd_flush_write_cache(bd_t *, struct dk_callback *); 168 169 struct cmlb_tg_ops bd_tg_ops = { 170 TG_DK_OPS_VERSION_1, 171 bd_tg_rdwr, 172 bd_tg_getinfo, 173 }; 174 175 static struct cb_ops bd_cb_ops = { 176 bd_open, /* open */ 177 bd_close, /* close */ 178 bd_strategy, /* strategy */ 179 nodev, /* print */ 180 bd_dump, /* dump */ 181 bd_read, /* read */ 182 bd_write, /* write */ 183 bd_ioctl, /* ioctl */ 184 nodev, /* devmap */ 185 nodev, /* mmap */ 186 nodev, /* segmap */ 187 nochpoll, /* poll */ 188 bd_prop_op, /* cb_prop_op */ 189 0, /* streamtab */ 190 D_64BIT | D_MP, /* Driver comaptibility flag */ 191 CB_REV, /* cb_rev */ 192 bd_aread, /* async read */ 193 bd_awrite /* async write */ 194 }; 195 196 struct dev_ops bd_dev_ops = { 197 DEVO_REV, /* devo_rev, */ 198 0, /* refcnt */ 199 bd_getinfo, /* getinfo */ 200 nulldev, /* identify */ 201 nulldev, /* probe */ 202 bd_attach, /* attach */ 203 bd_detach, /* detach */ 204 nodev, /* reset */ 205 &bd_cb_ops, /* driver operations */ 206 NULL, /* bus operations */ 207 NULL, /* power */ 208 ddi_quiesce_not_needed, /* quiesce */ 209 }; 210 211 static struct modldrv modldrv = { 212 &mod_driverops, 213 "Generic Block Device", 214 &bd_dev_ops, 215 }; 216 217 static struct modlinkage modlinkage = { 218 MODREV_1, { &modldrv, NULL } 219 }; 220 221 static void *bd_state; 222 static krwlock_t bd_lock; 223 224 int 225 _init(void) 226 { 227 int rv; 228 229 rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2); 230 if (rv != DDI_SUCCESS) { 231 return (rv); 232 } 233 rw_init(&bd_lock, NULL, RW_DRIVER, NULL); 234 rv = mod_install(&modlinkage); 235 if (rv != DDI_SUCCESS) { 236 rw_destroy(&bd_lock); 237 ddi_soft_state_fini(&bd_state); 238 } 239 return (rv); 240 } 241 242 int 243 _fini(void) 244 { 245 int rv; 246 247 rv = mod_remove(&modlinkage); 248 if (rv == DDI_SUCCESS) { 249 rw_destroy(&bd_lock); 250 ddi_soft_state_fini(&bd_state); 251 } 252 return (rv); 253 } 254 255 int 256 _info(struct modinfo *modinfop) 257 { 258 return (mod_info(&modlinkage, modinfop)); 259 } 260 261 static int 262 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 263 { 264 bd_t *bd; 265 minor_t inst; 266 267 _NOTE(ARGUNUSED(dip)); 268 269 inst = BDINST((dev_t)arg); 270 271 switch (cmd) { 272 case DDI_INFO_DEVT2DEVINFO: 273 bd = ddi_get_soft_state(bd_state, inst); 274 if (bd == NULL) { 275 return (DDI_FAILURE); 276 } 277 *resultp = (void *)bd->d_dip; 278 break; 279 280 case DDI_INFO_DEVT2INSTANCE: 281 *resultp = (void *)(intptr_t)inst; 282 break; 283 284 default: 285 return (DDI_FAILURE); 286 } 287 return (DDI_SUCCESS); 288 } 289 290 static void 291 bd_prop_update_inqstring(dev_info_t *dip, char *name, char *data, size_t len) 292 { 293 int ilen; 294 char *data_string; 295 296 ilen = scsi_ascii_inquiry_len(data, len); 297 ASSERT3U(ilen, <=, len); 298 if (ilen <= 0) 299 return; 300 /* ensure null termination */ 301 data_string = kmem_zalloc(ilen + 1, KM_SLEEP); 302 bcopy(data, data_string, ilen); 303 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, name, data_string); 304 kmem_free(data_string, ilen + 1); 305 } 306 307 static void 308 bd_create_inquiry_props(dev_info_t *dip, bd_drive_t *drive) 309 { 310 if (drive->d_vendor_len > 0) 311 bd_prop_update_inqstring(dip, INQUIRY_VENDOR_ID, 312 drive->d_vendor, drive->d_vendor_len); 313 314 if (drive->d_product_len > 0) 315 bd_prop_update_inqstring(dip, INQUIRY_PRODUCT_ID, 316 drive->d_product, drive->d_product_len); 317 318 if (drive->d_serial_len > 0) 319 bd_prop_update_inqstring(dip, INQUIRY_SERIAL_NO, 320 drive->d_serial, drive->d_serial_len); 321 322 if (drive->d_revision_len > 0) 323 bd_prop_update_inqstring(dip, INQUIRY_REVISION_ID, 324 drive->d_revision, drive->d_revision_len); 325 } 326 327 static int 328 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 329 { 330 int inst; 331 bd_handle_t hdl; 332 bd_t *bd; 333 bd_drive_t drive; 334 int rv; 335 char name[16]; 336 char kcache[32]; 337 338 switch (cmd) { 339 case DDI_ATTACH: 340 break; 341 case DDI_RESUME: 342 /* We don't do anything native for suspend/resume */ 343 return (DDI_SUCCESS); 344 default: 345 return (DDI_FAILURE); 346 } 347 348 inst = ddi_get_instance(dip); 349 hdl = ddi_get_parent_data(dip); 350 351 (void) snprintf(name, sizeof (name), "%s%d", 352 ddi_driver_name(dip), ddi_get_instance(dip)); 353 (void) snprintf(kcache, sizeof (kcache), "%s_xfer", name); 354 355 if (hdl == NULL) { 356 cmn_err(CE_WARN, "%s: missing parent data!", name); 357 return (DDI_FAILURE); 358 } 359 360 if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) { 361 cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name); 362 return (DDI_FAILURE); 363 } 364 bd = ddi_get_soft_state(bd_state, inst); 365 366 if (hdl->h_dma) { 367 bd->d_dma = *(hdl->h_dma); 368 bd->d_dma.dma_attr_granular = 369 max(DEV_BSIZE, bd->d_dma.dma_attr_granular); 370 bd->d_use_dma = B_TRUE; 371 372 if (bd->d_maxxfer && 373 (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) { 374 cmn_err(CE_WARN, 375 "%s: inconsistent maximum transfer size!", 376 name); 377 /* We force it */ 378 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 379 } else { 380 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 381 } 382 } else { 383 bd->d_use_dma = B_FALSE; 384 if (bd->d_maxxfer == 0) { 385 bd->d_maxxfer = 1024 * 1024; 386 } 387 } 388 bd->d_ops = hdl->h_ops; 389 bd->d_private = hdl->h_private; 390 bd->d_blkshift = 9; /* 512 bytes, to start */ 391 392 if (bd->d_maxxfer % DEV_BSIZE) { 393 cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name); 394 bd->d_maxxfer &= ~(DEV_BSIZE - 1); 395 } 396 if (bd->d_maxxfer < DEV_BSIZE) { 397 cmn_err(CE_WARN, "%s: maximum transfer size too small!", name); 398 ddi_soft_state_free(bd_state, inst); 399 return (DDI_FAILURE); 400 } 401 402 bd->d_dip = dip; 403 bd->d_handle = hdl; 404 hdl->h_bd = bd; 405 ddi_set_driver_private(dip, bd); 406 407 mutex_init(&bd->d_iomutex, NULL, MUTEX_DRIVER, NULL); 408 mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL); 409 mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL); 410 cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL); 411 412 list_create(&bd->d_waitq, sizeof (bd_xfer_impl_t), 413 offsetof(struct bd_xfer_impl, i_linkage)); 414 list_create(&bd->d_runq, sizeof (bd_xfer_impl_t), 415 offsetof(struct bd_xfer_impl, i_linkage)); 416 417 bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8, 418 bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0); 419 420 bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk", 421 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT); 422 if (bd->d_ksp != NULL) { 423 bd->d_ksp->ks_lock = &bd->d_iomutex; 424 kstat_install(bd->d_ksp); 425 bd->d_kiop = bd->d_ksp->ks_data; 426 } else { 427 /* 428 * Even if we cannot create the kstat, we create a 429 * scratch kstat. The reason for this is to ensure 430 * that we can update the kstat all of the time, 431 * without adding an extra branch instruction. 432 */ 433 bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP); 434 } 435 436 cmlb_alloc_handle(&bd->d_cmlbh); 437 438 bd->d_state = DKIO_NONE; 439 440 bzero(&drive, sizeof (drive)); 441 bd->d_ops.o_drive_info(bd->d_private, &drive); 442 bd->d_qsize = drive.d_qsize; 443 bd->d_removable = drive.d_removable; 444 bd->d_hotpluggable = drive.d_hotpluggable; 445 446 if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer) 447 bd->d_maxxfer = drive.d_maxxfer; 448 449 bd_create_inquiry_props(dip, &drive); 450 451 rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT, 452 bd->d_removable, bd->d_hotpluggable, 453 drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK, 454 CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0); 455 if (rv != 0) { 456 cmlb_free_handle(&bd->d_cmlbh); 457 kmem_cache_destroy(bd->d_cache); 458 mutex_destroy(&bd->d_iomutex); 459 mutex_destroy(&bd->d_ocmutex); 460 mutex_destroy(&bd->d_statemutex); 461 cv_destroy(&bd->d_statecv); 462 list_destroy(&bd->d_waitq); 463 list_destroy(&bd->d_runq); 464 if (bd->d_ksp != NULL) { 465 kstat_delete(bd->d_ksp); 466 bd->d_ksp = NULL; 467 } else { 468 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 469 } 470 ddi_soft_state_free(bd_state, inst); 471 return (DDI_FAILURE); 472 } 473 474 if (bd->d_ops.o_devid_init != NULL) { 475 rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid); 476 if (rv == DDI_SUCCESS) { 477 if (ddi_devid_register(dip, bd->d_devid) != 478 DDI_SUCCESS) { 479 cmn_err(CE_WARN, 480 "%s: unable to register devid", name); 481 } 482 } 483 } 484 485 /* 486 * Add a zero-length attribute to tell the world we support 487 * kernel ioctls (for layered drivers). Also set up properties 488 * used by HAL to identify removable media. 489 */ 490 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 491 DDI_KERNEL_IOCTL, NULL, 0); 492 if (bd->d_removable) { 493 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 494 "removable-media", NULL, 0); 495 } 496 if (bd->d_hotpluggable) { 497 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 498 "hotpluggable", NULL, 0); 499 } 500 501 ddi_report_dev(dip); 502 503 return (DDI_SUCCESS); 504 } 505 506 static int 507 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 508 { 509 bd_t *bd; 510 511 bd = ddi_get_driver_private(dip); 512 513 switch (cmd) { 514 case DDI_DETACH: 515 break; 516 case DDI_SUSPEND: 517 /* We don't suspend, but our parent does */ 518 return (DDI_SUCCESS); 519 default: 520 return (DDI_FAILURE); 521 } 522 if (bd->d_ksp != NULL) { 523 kstat_delete(bd->d_ksp); 524 bd->d_ksp = NULL; 525 } else { 526 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 527 } 528 cmlb_detach(bd->d_cmlbh, 0); 529 cmlb_free_handle(&bd->d_cmlbh); 530 if (bd->d_devid) 531 ddi_devid_free(bd->d_devid); 532 kmem_cache_destroy(bd->d_cache); 533 mutex_destroy(&bd->d_iomutex); 534 mutex_destroy(&bd->d_ocmutex); 535 mutex_destroy(&bd->d_statemutex); 536 cv_destroy(&bd->d_statecv); 537 list_destroy(&bd->d_waitq); 538 list_destroy(&bd->d_runq); 539 ddi_soft_state_free(bd_state, ddi_get_instance(dip)); 540 return (DDI_SUCCESS); 541 } 542 543 static int 544 bd_xfer_ctor(void *buf, void *arg, int kmflag) 545 { 546 bd_xfer_impl_t *xi; 547 bd_t *bd = arg; 548 int (*dcb)(caddr_t); 549 550 if (kmflag == KM_PUSHPAGE || kmflag == KM_SLEEP) { 551 dcb = DDI_DMA_SLEEP; 552 } else { 553 dcb = DDI_DMA_DONTWAIT; 554 } 555 556 xi = buf; 557 bzero(xi, sizeof (*xi)); 558 xi->i_bd = bd; 559 560 if (bd->d_use_dma) { 561 if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL, 562 &xi->i_dmah) != DDI_SUCCESS) { 563 return (-1); 564 } 565 } 566 567 return (0); 568 } 569 570 static void 571 bd_xfer_dtor(void *buf, void *arg) 572 { 573 bd_xfer_impl_t *xi = buf; 574 575 _NOTE(ARGUNUSED(arg)); 576 577 if (xi->i_dmah) 578 ddi_dma_free_handle(&xi->i_dmah); 579 xi->i_dmah = NULL; 580 } 581 582 static bd_xfer_impl_t * 583 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *), 584 int kmflag) 585 { 586 bd_xfer_impl_t *xi; 587 int rv = 0; 588 int status; 589 unsigned dir; 590 int (*cb)(caddr_t); 591 size_t len; 592 uint32_t shift; 593 594 if (kmflag == KM_SLEEP) { 595 cb = DDI_DMA_SLEEP; 596 } else { 597 cb = DDI_DMA_DONTWAIT; 598 } 599 600 xi = kmem_cache_alloc(bd->d_cache, kmflag); 601 if (xi == NULL) { 602 bioerror(bp, ENOMEM); 603 return (NULL); 604 } 605 606 ASSERT(bp); 607 608 xi->i_bp = bp; 609 xi->i_func = func; 610 xi->i_blkno = bp->b_lblkno; 611 612 if (bp->b_bcount == 0) { 613 xi->i_len = 0; 614 xi->i_nblks = 0; 615 xi->i_kaddr = NULL; 616 xi->i_resid = 0; 617 xi->i_num_win = 0; 618 goto done; 619 } 620 621 if (bp->b_flags & B_READ) { 622 dir = DDI_DMA_READ; 623 xi->i_func = bd->d_ops.o_read; 624 } else { 625 dir = DDI_DMA_WRITE; 626 xi->i_func = bd->d_ops.o_write; 627 } 628 629 shift = bd->d_blkshift; 630 xi->i_blkshift = shift; 631 632 if (!bd->d_use_dma) { 633 bp_mapin(bp); 634 rv = 0; 635 xi->i_offset = 0; 636 xi->i_num_win = 637 (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer; 638 xi->i_cur_win = 0; 639 xi->i_len = min(bp->b_bcount, bd->d_maxxfer); 640 xi->i_nblks = xi->i_len >> shift; 641 xi->i_kaddr = bp->b_un.b_addr; 642 xi->i_resid = bp->b_bcount; 643 } else { 644 645 /* 646 * We have to use consistent DMA if the address is misaligned. 647 */ 648 if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) && 649 ((uintptr_t)bp->b_un.b_addr & 0x7)) { 650 dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL; 651 } else { 652 dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 653 } 654 655 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb, 656 NULL, &xi->i_dmac, &xi->i_ndmac); 657 switch (status) { 658 case DDI_DMA_MAPPED: 659 xi->i_num_win = 1; 660 xi->i_cur_win = 0; 661 xi->i_offset = 0; 662 xi->i_len = bp->b_bcount; 663 xi->i_nblks = xi->i_len >> shift; 664 xi->i_resid = bp->b_bcount; 665 rv = 0; 666 break; 667 case DDI_DMA_PARTIAL_MAP: 668 xi->i_cur_win = 0; 669 670 if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) != 671 DDI_SUCCESS) || 672 (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset, 673 &len, &xi->i_dmac, &xi->i_ndmac) != 674 DDI_SUCCESS) || 675 (P2PHASE(len, shift) != 0)) { 676 (void) ddi_dma_unbind_handle(xi->i_dmah); 677 rv = EFAULT; 678 goto done; 679 } 680 xi->i_len = len; 681 xi->i_nblks = xi->i_len >> shift; 682 xi->i_resid = bp->b_bcount; 683 rv = 0; 684 break; 685 case DDI_DMA_NORESOURCES: 686 rv = EAGAIN; 687 goto done; 688 case DDI_DMA_TOOBIG: 689 rv = EINVAL; 690 goto done; 691 case DDI_DMA_NOMAPPING: 692 case DDI_DMA_INUSE: 693 default: 694 rv = EFAULT; 695 goto done; 696 } 697 } 698 699 done: 700 if (rv != 0) { 701 kmem_cache_free(bd->d_cache, xi); 702 bioerror(bp, rv); 703 return (NULL); 704 } 705 706 return (xi); 707 } 708 709 static void 710 bd_xfer_free(bd_xfer_impl_t *xi) 711 { 712 if (xi->i_dmah) { 713 (void) ddi_dma_unbind_handle(xi->i_dmah); 714 } 715 kmem_cache_free(xi->i_bd->d_cache, xi); 716 } 717 718 static int 719 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp) 720 { 721 dev_t dev = *devp; 722 bd_t *bd; 723 minor_t part; 724 minor_t inst; 725 uint64_t mask; 726 boolean_t ndelay; 727 int rv; 728 diskaddr_t nblks; 729 diskaddr_t lba; 730 731 _NOTE(ARGUNUSED(credp)); 732 733 part = BDPART(dev); 734 inst = BDINST(dev); 735 736 if (otyp >= OTYPCNT) 737 return (EINVAL); 738 739 ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE; 740 741 /* 742 * Block any DR events from changing the set of registered 743 * devices while we function. 744 */ 745 rw_enter(&bd_lock, RW_READER); 746 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 747 rw_exit(&bd_lock); 748 return (ENXIO); 749 } 750 751 mutex_enter(&bd->d_ocmutex); 752 753 ASSERT(part < 64); 754 mask = (1U << part); 755 756 bd_update_state(bd); 757 758 if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) { 759 760 /* non-blocking opens are allowed to succeed */ 761 if (!ndelay) { 762 rv = ENXIO; 763 goto done; 764 } 765 } else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba, 766 NULL, NULL, 0) == 0) { 767 768 /* 769 * We read the partinfo, verify valid ranges. If the 770 * partition is invalid, and we aren't blocking or 771 * doing a raw access, then fail. (Non-blocking and 772 * raw accesses can still succeed to allow a disk with 773 * bad partition data to opened by format and fdisk.) 774 */ 775 if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) { 776 rv = ENXIO; 777 goto done; 778 } 779 } else if (!ndelay) { 780 /* 781 * cmlb_partinfo failed -- invalid partition or no 782 * disk label. 783 */ 784 rv = ENXIO; 785 goto done; 786 } 787 788 if ((flag & FWRITE) && bd->d_rdonly) { 789 rv = EROFS; 790 goto done; 791 } 792 793 if ((bd->d_open_excl) & (mask)) { 794 rv = EBUSY; 795 goto done; 796 } 797 if (flag & FEXCL) { 798 if (bd->d_open_lyr[part]) { 799 rv = EBUSY; 800 goto done; 801 } 802 for (int i = 0; i < OTYP_LYR; i++) { 803 if (bd->d_open_reg[i] & mask) { 804 rv = EBUSY; 805 goto done; 806 } 807 } 808 } 809 810 if (otyp == OTYP_LYR) { 811 bd->d_open_lyr[part]++; 812 } else { 813 bd->d_open_reg[otyp] |= mask; 814 } 815 if (flag & FEXCL) { 816 bd->d_open_excl |= mask; 817 } 818 819 rv = 0; 820 done: 821 mutex_exit(&bd->d_ocmutex); 822 rw_exit(&bd_lock); 823 824 return (rv); 825 } 826 827 static int 828 bd_close(dev_t dev, int flag, int otyp, cred_t *credp) 829 { 830 bd_t *bd; 831 minor_t inst; 832 minor_t part; 833 uint64_t mask; 834 boolean_t last = B_TRUE; 835 836 _NOTE(ARGUNUSED(flag)); 837 _NOTE(ARGUNUSED(credp)); 838 839 part = BDPART(dev); 840 inst = BDINST(dev); 841 842 ASSERT(part < 64); 843 mask = (1U << part); 844 845 rw_enter(&bd_lock, RW_READER); 846 847 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 848 rw_exit(&bd_lock); 849 return (ENXIO); 850 } 851 852 mutex_enter(&bd->d_ocmutex); 853 if (bd->d_open_excl & mask) { 854 bd->d_open_excl &= ~mask; 855 } 856 if (otyp == OTYP_LYR) { 857 bd->d_open_lyr[part]--; 858 } else { 859 bd->d_open_reg[otyp] &= ~mask; 860 } 861 for (int i = 0; i < 64; i++) { 862 if (bd->d_open_lyr[part]) { 863 last = B_FALSE; 864 } 865 } 866 for (int i = 0; last && (i < OTYP_LYR); i++) { 867 if (bd->d_open_reg[i]) { 868 last = B_FALSE; 869 } 870 } 871 mutex_exit(&bd->d_ocmutex); 872 873 if (last) { 874 cmlb_invalidate(bd->d_cmlbh, 0); 875 } 876 rw_exit(&bd_lock); 877 878 return (0); 879 } 880 881 static int 882 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk) 883 { 884 minor_t inst; 885 minor_t part; 886 diskaddr_t pstart; 887 diskaddr_t psize; 888 bd_t *bd; 889 bd_xfer_impl_t *xi; 890 buf_t *bp; 891 int rv; 892 893 rw_enter(&bd_lock, RW_READER); 894 895 part = BDPART(dev); 896 inst = BDINST(dev); 897 898 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 899 rw_exit(&bd_lock); 900 return (ENXIO); 901 } 902 /* 903 * do cmlb, but do it synchronously unless we already have the 904 * partition (which we probably should.) 905 */ 906 if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL, 907 (void *)1)) { 908 rw_exit(&bd_lock); 909 return (ENXIO); 910 } 911 912 if ((blkno + nblk) > psize) { 913 rw_exit(&bd_lock); 914 return (EINVAL); 915 } 916 bp = getrbuf(KM_NOSLEEP); 917 if (bp == NULL) { 918 rw_exit(&bd_lock); 919 return (ENOMEM); 920 } 921 922 bp->b_bcount = nblk << bd->d_blkshift; 923 bp->b_resid = bp->b_bcount; 924 bp->b_lblkno = blkno; 925 bp->b_un.b_addr = caddr; 926 927 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_write, KM_NOSLEEP); 928 if (xi == NULL) { 929 rw_exit(&bd_lock); 930 freerbuf(bp); 931 return (ENOMEM); 932 } 933 xi->i_blkno = blkno + pstart; 934 xi->i_flags = BD_XFER_POLL; 935 bd_submit(bd, xi); 936 rw_exit(&bd_lock); 937 938 /* 939 * Generally, we should have run this entirely synchronously 940 * at this point and the biowait call should be a no-op. If 941 * it didn't happen this way, it's a bug in the underlying 942 * driver not honoring BD_XFER_POLL. 943 */ 944 (void) biowait(bp); 945 rv = geterror(bp); 946 freerbuf(bp); 947 return (rv); 948 } 949 950 void 951 bd_minphys(struct buf *bp) 952 { 953 minor_t inst; 954 bd_t *bd; 955 inst = BDINST(bp->b_edev); 956 957 bd = ddi_get_soft_state(bd_state, inst); 958 959 /* 960 * In a non-debug kernel, bd_strategy will catch !bd as 961 * well, and will fail nicely. 962 */ 963 ASSERT(bd); 964 965 if (bp->b_bcount > bd->d_maxxfer) 966 bp->b_bcount = bd->d_maxxfer; 967 } 968 969 static int 970 bd_read(dev_t dev, struct uio *uio, cred_t *credp) 971 { 972 _NOTE(ARGUNUSED(credp)); 973 return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio)); 974 } 975 976 static int 977 bd_write(dev_t dev, struct uio *uio, cred_t *credp) 978 { 979 _NOTE(ARGUNUSED(credp)); 980 return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio)); 981 } 982 983 static int 984 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp) 985 { 986 _NOTE(ARGUNUSED(credp)); 987 return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio)); 988 } 989 990 static int 991 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp) 992 { 993 _NOTE(ARGUNUSED(credp)); 994 return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio)); 995 } 996 997 static int 998 bd_strategy(struct buf *bp) 999 { 1000 minor_t inst; 1001 minor_t part; 1002 bd_t *bd; 1003 diskaddr_t p_lba; 1004 diskaddr_t p_nblks; 1005 diskaddr_t b_nblks; 1006 bd_xfer_impl_t *xi; 1007 uint32_t shift; 1008 int (*func)(void *, bd_xfer_t *); 1009 1010 part = BDPART(bp->b_edev); 1011 inst = BDINST(bp->b_edev); 1012 1013 ASSERT(bp); 1014 1015 bp->b_resid = bp->b_bcount; 1016 1017 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1018 bioerror(bp, ENXIO); 1019 biodone(bp); 1020 return (0); 1021 } 1022 1023 if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba, 1024 NULL, NULL, 0)) { 1025 bioerror(bp, ENXIO); 1026 biodone(bp); 1027 return (0); 1028 } 1029 1030 shift = bd->d_blkshift; 1031 1032 if ((P2PHASE(bp->b_bcount, (1U << shift)) != 0) || 1033 (bp->b_lblkno > p_nblks)) { 1034 bioerror(bp, ENXIO); 1035 biodone(bp); 1036 return (0); 1037 } 1038 b_nblks = bp->b_bcount >> shift; 1039 if ((bp->b_lblkno == p_nblks) || (bp->b_bcount == 0)) { 1040 biodone(bp); 1041 return (0); 1042 } 1043 1044 if ((b_nblks + bp->b_lblkno) > p_nblks) { 1045 bp->b_resid = ((bp->b_lblkno + b_nblks - p_nblks) << shift); 1046 bp->b_bcount -= bp->b_resid; 1047 } else { 1048 bp->b_resid = 0; 1049 } 1050 func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write; 1051 1052 xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP); 1053 if (xi == NULL) { 1054 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE); 1055 } 1056 if (xi == NULL) { 1057 /* bd_request_alloc will have done bioerror */ 1058 biodone(bp); 1059 return (0); 1060 } 1061 xi->i_blkno = bp->b_lblkno + p_lba; 1062 1063 bd_submit(bd, xi); 1064 1065 return (0); 1066 } 1067 1068 static int 1069 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp) 1070 { 1071 minor_t inst; 1072 uint16_t part; 1073 bd_t *bd; 1074 void *ptr = (void *)arg; 1075 int rv; 1076 1077 part = BDPART(dev); 1078 inst = BDINST(dev); 1079 1080 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1081 return (ENXIO); 1082 } 1083 1084 rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0); 1085 if (rv != ENOTTY) 1086 return (rv); 1087 1088 if (rvalp != NULL) { 1089 /* the return value of the ioctl is 0 by default */ 1090 *rvalp = 0; 1091 } 1092 1093 switch (cmd) { 1094 case DKIOCGMEDIAINFO: { 1095 struct dk_minfo minfo; 1096 1097 /* make sure our state information is current */ 1098 bd_update_state(bd); 1099 bzero(&minfo, sizeof (minfo)); 1100 minfo.dki_media_type = DK_FIXED_DISK; 1101 minfo.dki_lbsize = (1U << bd->d_blkshift); 1102 minfo.dki_capacity = bd->d_numblks; 1103 if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) { 1104 return (EFAULT); 1105 } 1106 return (0); 1107 } 1108 case DKIOCGMEDIAINFOEXT: { 1109 struct dk_minfo_ext miext; 1110 1111 /* make sure our state information is current */ 1112 bd_update_state(bd); 1113 bzero(&miext, sizeof (miext)); 1114 miext.dki_media_type = DK_FIXED_DISK; 1115 miext.dki_lbsize = (1U << bd->d_blkshift); 1116 miext.dki_pbsize = (1U << bd->d_pblkshift); 1117 miext.dki_capacity = bd->d_numblks; 1118 if (ddi_copyout(&miext, ptr, sizeof (miext), flag)) { 1119 return (EFAULT); 1120 } 1121 return (0); 1122 } 1123 case DKIOCINFO: { 1124 struct dk_cinfo cinfo; 1125 bzero(&cinfo, sizeof (cinfo)); 1126 cinfo.dki_ctype = DKC_BLKDEV; 1127 cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip)); 1128 (void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname), 1129 "%s", ddi_driver_name(ddi_get_parent(bd->d_dip))); 1130 (void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname), 1131 "%s", ddi_driver_name(bd->d_dip)); 1132 cinfo.dki_unit = inst; 1133 cinfo.dki_flags = DKI_FMTVOL; 1134 cinfo.dki_partition = part; 1135 cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE; 1136 cinfo.dki_addr = 0; 1137 cinfo.dki_slave = 0; 1138 cinfo.dki_space = 0; 1139 cinfo.dki_prio = 0; 1140 cinfo.dki_vec = 0; 1141 if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) { 1142 return (EFAULT); 1143 } 1144 return (0); 1145 } 1146 case DKIOCREMOVABLE: { 1147 int i; 1148 i = bd->d_removable ? 1 : 0; 1149 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1150 return (EFAULT); 1151 } 1152 return (0); 1153 } 1154 case DKIOCHOTPLUGGABLE: { 1155 int i; 1156 i = bd->d_hotpluggable ? 1 : 0; 1157 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1158 return (EFAULT); 1159 } 1160 return (0); 1161 } 1162 case DKIOCREADONLY: { 1163 int i; 1164 i = bd->d_rdonly ? 1 : 0; 1165 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1166 return (EFAULT); 1167 } 1168 return (0); 1169 } 1170 case DKIOCSOLIDSTATE: { 1171 int i; 1172 i = bd->d_ssd ? 1 : 0; 1173 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1174 return (EFAULT); 1175 } 1176 return (0); 1177 } 1178 case DKIOCSTATE: { 1179 enum dkio_state state; 1180 if (ddi_copyin(ptr, &state, sizeof (state), flag)) { 1181 return (EFAULT); 1182 } 1183 if ((rv = bd_check_state(bd, &state)) != 0) { 1184 return (rv); 1185 } 1186 if (ddi_copyout(&state, ptr, sizeof (state), flag)) { 1187 return (EFAULT); 1188 } 1189 return (0); 1190 } 1191 case DKIOCFLUSHWRITECACHE: { 1192 struct dk_callback *dkc = NULL; 1193 1194 if (flag & FKIOCTL) 1195 dkc = (void *)arg; 1196 1197 rv = bd_flush_write_cache(bd, dkc); 1198 return (rv); 1199 } 1200 1201 default: 1202 break; 1203 1204 } 1205 return (ENOTTY); 1206 } 1207 1208 static int 1209 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 1210 char *name, caddr_t valuep, int *lengthp) 1211 { 1212 bd_t *bd; 1213 1214 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1215 if (bd == NULL) 1216 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 1217 name, valuep, lengthp)); 1218 1219 return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name, 1220 valuep, lengthp, BDPART(dev), 0)); 1221 } 1222 1223 1224 static int 1225 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start, 1226 size_t length, void *tg_cookie) 1227 { 1228 bd_t *bd; 1229 buf_t *bp; 1230 bd_xfer_impl_t *xi; 1231 int rv; 1232 int (*func)(void *, bd_xfer_t *); 1233 int kmflag; 1234 1235 /* 1236 * If we are running in polled mode (such as during dump(9e) 1237 * execution), then we cannot sleep for kernel allocations. 1238 */ 1239 kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP; 1240 1241 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1242 1243 if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) { 1244 /* We can only transfer whole blocks at a time! */ 1245 return (EINVAL); 1246 } 1247 1248 if ((bp = getrbuf(kmflag)) == NULL) { 1249 return (ENOMEM); 1250 } 1251 1252 switch (cmd) { 1253 case TG_READ: 1254 bp->b_flags = B_READ; 1255 func = bd->d_ops.o_read; 1256 break; 1257 case TG_WRITE: 1258 bp->b_flags = B_WRITE; 1259 func = bd->d_ops.o_write; 1260 break; 1261 default: 1262 freerbuf(bp); 1263 return (EINVAL); 1264 } 1265 1266 bp->b_un.b_addr = bufaddr; 1267 bp->b_bcount = length; 1268 xi = bd_xfer_alloc(bd, bp, func, kmflag); 1269 if (xi == NULL) { 1270 rv = geterror(bp); 1271 freerbuf(bp); 1272 return (rv); 1273 } 1274 xi->i_flags = tg_cookie ? BD_XFER_POLL : 0; 1275 xi->i_blkno = start; 1276 bd_submit(bd, xi); 1277 (void) biowait(bp); 1278 rv = geterror(bp); 1279 freerbuf(bp); 1280 1281 return (rv); 1282 } 1283 1284 static int 1285 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 1286 { 1287 bd_t *bd; 1288 1289 _NOTE(ARGUNUSED(tg_cookie)); 1290 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1291 1292 switch (cmd) { 1293 case TG_GETPHYGEOM: 1294 case TG_GETVIRTGEOM: 1295 /* 1296 * We don't have any "geometry" as such, let cmlb 1297 * fabricate something. 1298 */ 1299 return (ENOTTY); 1300 1301 case TG_GETCAPACITY: 1302 bd_update_state(bd); 1303 *(diskaddr_t *)arg = bd->d_numblks; 1304 return (0); 1305 1306 case TG_GETBLOCKSIZE: 1307 *(uint32_t *)arg = (1U << bd->d_blkshift); 1308 return (0); 1309 1310 case TG_GETATTR: 1311 /* 1312 * It turns out that cmlb really doesn't do much for 1313 * non-writable media, but lets make the information 1314 * available for it in case it does more in the 1315 * future. (The value is currently used for 1316 * triggering special behavior for CD-ROMs.) 1317 */ 1318 bd_update_state(bd); 1319 ((tg_attribute_t *)arg)->media_is_writable = 1320 bd->d_rdonly ? B_FALSE : B_TRUE; 1321 ((tg_attribute_t *)arg)->media_is_solid_state = bd->d_ssd; 1322 return (0); 1323 1324 default: 1325 return (EINVAL); 1326 } 1327 } 1328 1329 1330 static void 1331 bd_sched(bd_t *bd) 1332 { 1333 bd_xfer_impl_t *xi; 1334 struct buf *bp; 1335 int rv; 1336 1337 mutex_enter(&bd->d_iomutex); 1338 1339 while ((bd->d_qactive < bd->d_qsize) && 1340 ((xi = list_remove_head(&bd->d_waitq)) != NULL)) { 1341 bd->d_qactive++; 1342 kstat_waitq_to_runq(bd->d_kiop); 1343 list_insert_tail(&bd->d_runq, xi); 1344 1345 /* 1346 * Submit the job to the driver. We drop the I/O mutex 1347 * so that we can deal with the case where the driver 1348 * completion routine calls back into us synchronously. 1349 */ 1350 1351 mutex_exit(&bd->d_iomutex); 1352 1353 rv = xi->i_func(bd->d_private, &xi->i_public); 1354 if (rv != 0) { 1355 bp = xi->i_bp; 1356 bioerror(bp, rv); 1357 biodone(bp); 1358 1359 mutex_enter(&bd->d_iomutex); 1360 bd->d_qactive--; 1361 kstat_runq_exit(bd->d_kiop); 1362 list_remove(&bd->d_runq, xi); 1363 bd_xfer_free(xi); 1364 } else { 1365 mutex_enter(&bd->d_iomutex); 1366 } 1367 } 1368 1369 mutex_exit(&bd->d_iomutex); 1370 } 1371 1372 static void 1373 bd_submit(bd_t *bd, bd_xfer_impl_t *xi) 1374 { 1375 mutex_enter(&bd->d_iomutex); 1376 list_insert_tail(&bd->d_waitq, xi); 1377 kstat_waitq_enter(bd->d_kiop); 1378 mutex_exit(&bd->d_iomutex); 1379 1380 bd_sched(bd); 1381 } 1382 1383 static void 1384 bd_runq_exit(bd_xfer_impl_t *xi, int err) 1385 { 1386 bd_t *bd = xi->i_bd; 1387 buf_t *bp = xi->i_bp; 1388 1389 mutex_enter(&bd->d_iomutex); 1390 bd->d_qactive--; 1391 kstat_runq_exit(bd->d_kiop); 1392 list_remove(&bd->d_runq, xi); 1393 mutex_exit(&bd->d_iomutex); 1394 1395 if (err == 0) { 1396 if (bp->b_flags & B_READ) { 1397 bd->d_kiop->reads++; 1398 bd->d_kiop->nread += (bp->b_bcount - xi->i_resid); 1399 } else { 1400 bd->d_kiop->writes++; 1401 bd->d_kiop->nwritten += (bp->b_bcount - xi->i_resid); 1402 } 1403 } 1404 bd_sched(bd); 1405 } 1406 1407 static void 1408 bd_update_state(bd_t *bd) 1409 { 1410 enum dkio_state state = DKIO_INSERTED; 1411 boolean_t docmlb = B_FALSE; 1412 bd_media_t media; 1413 1414 bzero(&media, sizeof (media)); 1415 1416 mutex_enter(&bd->d_statemutex); 1417 if (bd->d_ops.o_media_info(bd->d_private, &media) != 0) { 1418 bd->d_numblks = 0; 1419 state = DKIO_EJECTED; 1420 goto done; 1421 } 1422 1423 if ((media.m_blksize < 512) || 1424 (!ISP2(media.m_blksize)) || 1425 (P2PHASE(bd->d_maxxfer, media.m_blksize))) { 1426 cmn_err(CE_WARN, "%s%d: Invalid media block size (%d)", 1427 ddi_driver_name(bd->d_dip), ddi_get_instance(bd->d_dip), 1428 media.m_blksize); 1429 /* 1430 * We can't use the media, treat it as not present. 1431 */ 1432 state = DKIO_EJECTED; 1433 bd->d_numblks = 0; 1434 goto done; 1435 } 1436 1437 if (((1U << bd->d_blkshift) != media.m_blksize) || 1438 (bd->d_numblks != media.m_nblks)) { 1439 /* Device size changed */ 1440 docmlb = B_TRUE; 1441 } 1442 1443 bd->d_blkshift = ddi_ffs(media.m_blksize) - 1; 1444 bd->d_pblkshift = bd->d_blkshift; 1445 bd->d_numblks = media.m_nblks; 1446 bd->d_rdonly = media.m_readonly; 1447 bd->d_ssd = media.m_solidstate; 1448 1449 /* 1450 * Only use the supplied physical block size if it is non-zero, 1451 * greater or equal to the block size, and a power of 2. Ignore it 1452 * if not, it's just informational and we can still use the media. 1453 */ 1454 if ((media.m_pblksize != 0) && 1455 (media.m_pblksize >= media.m_blksize) && 1456 (ISP2(media.m_pblksize))) 1457 bd->d_pblkshift = ddi_ffs(media.m_pblksize) - 1; 1458 1459 done: 1460 if (state != bd->d_state) { 1461 bd->d_state = state; 1462 cv_broadcast(&bd->d_statecv); 1463 docmlb = B_TRUE; 1464 } 1465 mutex_exit(&bd->d_statemutex); 1466 1467 if (docmlb) { 1468 if (state == DKIO_INSERTED) { 1469 (void) cmlb_validate(bd->d_cmlbh, 0, 0); 1470 } else { 1471 cmlb_invalidate(bd->d_cmlbh, 0); 1472 } 1473 } 1474 } 1475 1476 static int 1477 bd_check_state(bd_t *bd, enum dkio_state *state) 1478 { 1479 clock_t when; 1480 1481 for (;;) { 1482 1483 bd_update_state(bd); 1484 1485 mutex_enter(&bd->d_statemutex); 1486 1487 if (bd->d_state != *state) { 1488 *state = bd->d_state; 1489 mutex_exit(&bd->d_statemutex); 1490 break; 1491 } 1492 1493 when = drv_usectohz(1000000); 1494 if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex, 1495 when, TR_CLOCK_TICK) == 0) { 1496 mutex_exit(&bd->d_statemutex); 1497 return (EINTR); 1498 } 1499 1500 mutex_exit(&bd->d_statemutex); 1501 } 1502 1503 return (0); 1504 } 1505 1506 static int 1507 bd_flush_write_cache_done(struct buf *bp) 1508 { 1509 struct dk_callback *dc = (void *)bp->b_private; 1510 1511 (*dc->dkc_callback)(dc->dkc_cookie, geterror(bp)); 1512 kmem_free(dc, sizeof (*dc)); 1513 freerbuf(bp); 1514 return (0); 1515 } 1516 1517 static int 1518 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc) 1519 { 1520 buf_t *bp; 1521 struct dk_callback *dc; 1522 bd_xfer_impl_t *xi; 1523 int rv; 1524 1525 if (bd->d_ops.o_sync_cache == NULL) { 1526 return (ENOTSUP); 1527 } 1528 if ((bp = getrbuf(KM_SLEEP)) == NULL) { 1529 return (ENOMEM); 1530 } 1531 bp->b_resid = 0; 1532 bp->b_bcount = 0; 1533 1534 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP); 1535 if (xi == NULL) { 1536 rv = geterror(bp); 1537 freerbuf(bp); 1538 return (rv); 1539 } 1540 1541 /* Make an asynchronous flush, but only if there is a callback */ 1542 if (dkc != NULL && dkc->dkc_callback != NULL) { 1543 /* Make a private copy of the callback structure */ 1544 dc = kmem_alloc(sizeof (*dc), KM_SLEEP); 1545 *dc = *dkc; 1546 bp->b_private = dc; 1547 bp->b_iodone = bd_flush_write_cache_done; 1548 1549 bd_submit(bd, xi); 1550 return (0); 1551 } 1552 1553 /* In case there is no callback, perform a synchronous flush */ 1554 bd_submit(bd, xi); 1555 (void) biowait(bp); 1556 rv = geterror(bp); 1557 freerbuf(bp); 1558 1559 return (rv); 1560 } 1561 1562 /* 1563 * Nexus support. 1564 */ 1565 int 1566 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop, 1567 void *arg, void *result) 1568 { 1569 bd_handle_t hdl; 1570 1571 switch (ctlop) { 1572 case DDI_CTLOPS_REPORTDEV: 1573 cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n", 1574 ddi_node_name(rdip), ddi_get_name_addr(rdip), 1575 ddi_driver_name(rdip), ddi_get_instance(rdip)); 1576 return (DDI_SUCCESS); 1577 1578 case DDI_CTLOPS_INITCHILD: 1579 hdl = ddi_get_parent_data((dev_info_t *)arg); 1580 if (hdl == NULL) { 1581 return (DDI_NOT_WELL_FORMED); 1582 } 1583 ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr); 1584 return (DDI_SUCCESS); 1585 1586 case DDI_CTLOPS_UNINITCHILD: 1587 ddi_set_name_addr((dev_info_t *)arg, NULL); 1588 ndi_prop_remove_all((dev_info_t *)arg); 1589 return (DDI_SUCCESS); 1590 1591 default: 1592 return (ddi_ctlops(dip, rdip, ctlop, arg, result)); 1593 } 1594 } 1595 1596 /* 1597 * Functions for device drivers. 1598 */ 1599 bd_handle_t 1600 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag) 1601 { 1602 bd_handle_t hdl; 1603 1604 hdl = kmem_zalloc(sizeof (*hdl), kmflag); 1605 if (hdl != NULL) { 1606 hdl->h_ops = *ops; 1607 hdl->h_dma = dma; 1608 hdl->h_private = private; 1609 } 1610 1611 return (hdl); 1612 } 1613 1614 void 1615 bd_free_handle(bd_handle_t hdl) 1616 { 1617 kmem_free(hdl, sizeof (*hdl)); 1618 } 1619 1620 int 1621 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl) 1622 { 1623 dev_info_t *child; 1624 bd_drive_t drive = { 0 }; 1625 1626 /* if drivers don't override this, make it assume none */ 1627 drive.d_lun = -1; 1628 hdl->h_ops.o_drive_info(hdl->h_private, &drive); 1629 1630 hdl->h_parent = dip; 1631 hdl->h_name = "blkdev"; 1632 1633 if (drive.d_lun >= 0) { 1634 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), "%X,%X", 1635 drive.d_target, drive.d_lun); 1636 } else { 1637 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), "%X", 1638 drive.d_target); 1639 } 1640 if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID, 1641 &child) != NDI_SUCCESS) { 1642 cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s", 1643 ddi_driver_name(dip), ddi_get_instance(dip), 1644 "blkdev", hdl->h_addr); 1645 return (DDI_FAILURE); 1646 } 1647 1648 ddi_set_parent_data(child, hdl); 1649 hdl->h_child = child; 1650 1651 if (ndi_devi_online(child, 0) == NDI_FAILURE) { 1652 cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online", 1653 ddi_driver_name(dip), ddi_get_instance(dip), 1654 hdl->h_name, hdl->h_addr); 1655 (void) ndi_devi_free(child); 1656 return (DDI_FAILURE); 1657 } 1658 1659 return (DDI_SUCCESS); 1660 } 1661 1662 int 1663 bd_detach_handle(bd_handle_t hdl) 1664 { 1665 int circ; 1666 int rv; 1667 char *devnm; 1668 1669 if (hdl->h_child == NULL) { 1670 return (DDI_SUCCESS); 1671 } 1672 ndi_devi_enter(hdl->h_parent, &circ); 1673 if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) { 1674 rv = ddi_remove_child(hdl->h_child, 0); 1675 } else { 1676 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); 1677 (void) ddi_deviname(hdl->h_child, devnm); 1678 (void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE); 1679 rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL, 1680 NDI_DEVI_REMOVE | NDI_UNCONFIG); 1681 kmem_free(devnm, MAXNAMELEN + 1); 1682 } 1683 if (rv == 0) { 1684 hdl->h_child = NULL; 1685 } 1686 1687 ndi_devi_exit(hdl->h_parent, circ); 1688 return (rv = NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE); 1689 } 1690 1691 void 1692 bd_xfer_done(bd_xfer_t *xfer, int err) 1693 { 1694 bd_xfer_impl_t *xi = (void *)xfer; 1695 buf_t *bp = xi->i_bp; 1696 int rv = DDI_SUCCESS; 1697 bd_t *bd = xi->i_bd; 1698 size_t len; 1699 1700 if (err != 0) { 1701 bd_runq_exit(xi, err); 1702 1703 bp->b_resid += xi->i_resid; 1704 bd_xfer_free(xi); 1705 bioerror(bp, err); 1706 biodone(bp); 1707 return; 1708 } 1709 1710 xi->i_cur_win++; 1711 xi->i_resid -= xi->i_len; 1712 1713 if (xi->i_resid == 0) { 1714 /* Job completed succcessfully! */ 1715 bd_runq_exit(xi, 0); 1716 1717 bd_xfer_free(xi); 1718 biodone(bp); 1719 return; 1720 } 1721 1722 xi->i_blkno += xi->i_nblks; 1723 1724 if (bd->d_use_dma) { 1725 /* More transfer still pending... advance to next DMA window. */ 1726 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win, 1727 &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac); 1728 } else { 1729 /* Advance memory window. */ 1730 xi->i_kaddr += xi->i_len; 1731 xi->i_offset += xi->i_len; 1732 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer); 1733 } 1734 1735 1736 if ((rv != DDI_SUCCESS) || 1737 (P2PHASE(len, (1U << xi->i_blkshift) != 0))) { 1738 bd_runq_exit(xi, EFAULT); 1739 1740 bp->b_resid += xi->i_resid; 1741 bd_xfer_free(xi); 1742 bioerror(bp, EFAULT); 1743 biodone(bp); 1744 return; 1745 } 1746 xi->i_len = len; 1747 xi->i_nblks = len >> xi->i_blkshift; 1748 1749 /* Submit next window to hardware. */ 1750 rv = xi->i_func(bd->d_private, &xi->i_public); 1751 if (rv != 0) { 1752 bd_runq_exit(xi, rv); 1753 1754 bp->b_resid += xi->i_resid; 1755 bd_xfer_free(xi); 1756 bioerror(bp, rv); 1757 biodone(bp); 1758 } 1759 } 1760 1761 void 1762 bd_state_change(bd_handle_t hdl) 1763 { 1764 bd_t *bd; 1765 1766 if ((bd = hdl->h_bd) != NULL) { 1767 bd_update_state(bd); 1768 } 1769 } 1770 1771 void 1772 bd_mod_init(struct dev_ops *devops) 1773 { 1774 static struct bus_ops bd_bus_ops = { 1775 BUSO_REV, /* busops_rev */ 1776 nullbusmap, /* bus_map */ 1777 NULL, /* bus_get_intrspec (OBSOLETE) */ 1778 NULL, /* bus_add_intrspec (OBSOLETE) */ 1779 NULL, /* bus_remove_intrspec (OBSOLETE) */ 1780 i_ddi_map_fault, /* bus_map_fault */ 1781 NULL, /* bus_dma_map (OBSOLETE) */ 1782 ddi_dma_allochdl, /* bus_dma_allochdl */ 1783 ddi_dma_freehdl, /* bus_dma_freehdl */ 1784 ddi_dma_bindhdl, /* bus_dma_bindhdl */ 1785 ddi_dma_unbindhdl, /* bus_dma_unbindhdl */ 1786 ddi_dma_flush, /* bus_dma_flush */ 1787 ddi_dma_win, /* bus_dma_win */ 1788 ddi_dma_mctl, /* bus_dma_ctl */ 1789 bd_bus_ctl, /* bus_ctl */ 1790 ddi_bus_prop_op, /* bus_prop_op */ 1791 NULL, /* bus_get_eventcookie */ 1792 NULL, /* bus_add_eventcall */ 1793 NULL, /* bus_remove_eventcall */ 1794 NULL, /* bus_post_event */ 1795 NULL, /* bus_intr_ctl (OBSOLETE) */ 1796 NULL, /* bus_config */ 1797 NULL, /* bus_unconfig */ 1798 NULL, /* bus_fm_init */ 1799 NULL, /* bus_fm_fini */ 1800 NULL, /* bus_fm_access_enter */ 1801 NULL, /* bus_fm_access_exit */ 1802 NULL, /* bus_power */ 1803 NULL, /* bus_intr_op */ 1804 }; 1805 1806 devops->devo_bus_ops = &bd_bus_ops; 1807 1808 /* 1809 * NB: The device driver is free to supply its own 1810 * character entry device support. 1811 */ 1812 } 1813 1814 void 1815 bd_mod_fini(struct dev_ops *devops) 1816 { 1817 devops->devo_bus_ops = NULL; 1818 } 1819