1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/ksynch.h> 31 #include <sys/kmem.h> 32 #include <sys/file.h> 33 #include <sys/errno.h> 34 #include <sys/open.h> 35 #include <sys/buf.h> 36 #include <sys/uio.h> 37 #include <sys/aio_req.h> 38 #include <sys/cred.h> 39 #include <sys/modctl.h> 40 #include <sys/cmlb.h> 41 #include <sys/conf.h> 42 #include <sys/devops.h> 43 #include <sys/list.h> 44 #include <sys/sysmacros.h> 45 #include <sys/dkio.h> 46 #include <sys/vtoc.h> 47 #include <sys/scsi/scsi.h> /* for DTYPE_DIRECT */ 48 #include <sys/kstat.h> 49 #include <sys/fs/dv_node.h> 50 #include <sys/ddi.h> 51 #include <sys/sunddi.h> 52 #include <sys/note.h> 53 #include <sys/blkdev.h> 54 55 #define BD_MAXPART 64 56 #define BDINST(dev) (getminor(dev) / BD_MAXPART) 57 #define BDPART(dev) (getminor(dev) % BD_MAXPART) 58 59 typedef struct bd bd_t; 60 typedef struct bd_xfer_impl bd_xfer_impl_t; 61 62 struct bd { 63 void *d_private; 64 dev_info_t *d_dip; 65 kmutex_t d_ocmutex; 66 kmutex_t d_iomutex; 67 kmutex_t d_statemutex; 68 kcondvar_t d_statecv; 69 enum dkio_state d_state; 70 cmlb_handle_t d_cmlbh; 71 unsigned d_open_lyr[BD_MAXPART]; /* open count */ 72 uint64_t d_open_excl; /* bit mask indexed by partition */ 73 uint64_t d_open_reg[OTYPCNT]; /* bit mask */ 74 75 uint32_t d_qsize; 76 uint32_t d_qactive; 77 uint32_t d_maxxfer; 78 uint32_t d_blkshift; 79 uint64_t d_numblks; 80 ddi_devid_t d_devid; 81 82 kmem_cache_t *d_cache; 83 list_t d_runq; 84 list_t d_waitq; 85 kstat_t *d_ksp; 86 kstat_io_t *d_kiop; 87 88 boolean_t d_rdonly; 89 boolean_t d_removable; 90 boolean_t d_hotpluggable; 91 boolean_t d_use_dma; 92 93 ddi_dma_attr_t d_dma; 94 bd_ops_t d_ops; 95 bd_handle_t d_handle; 96 }; 97 98 struct bd_handle { 99 bd_ops_t h_ops; 100 ddi_dma_attr_t *h_dma; 101 dev_info_t *h_parent; 102 dev_info_t *h_child; 103 void *h_private; 104 bd_t *h_bd; 105 char *h_name; 106 char h_addr[20]; /* enough for %X,%X */ 107 }; 108 109 struct bd_xfer_impl { 110 bd_xfer_t i_public; 111 list_node_t i_linkage; 112 bd_t *i_bd; 113 buf_t *i_bp; 114 uint_t i_num_win; 115 uint_t i_cur_win; 116 off_t i_offset; 117 int (*i_func)(void *, bd_xfer_t *); 118 uint32_t i_blkshift; 119 size_t i_len; 120 size_t i_resid; 121 }; 122 123 #define i_dmah i_public.x_dmah 124 #define i_dmac i_public.x_dmac 125 #define i_ndmac i_public.x_ndmac 126 #define i_kaddr i_public.x_kaddr 127 #define i_nblks i_public.x_nblks 128 #define i_blkno i_public.x_blkno 129 #define i_flags i_public.x_flags 130 131 132 /* 133 * Private prototypes. 134 */ 135 136 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 137 static int bd_attach(dev_info_t *, ddi_attach_cmd_t); 138 static int bd_detach(dev_info_t *, ddi_detach_cmd_t); 139 140 static int bd_open(dev_t *, int, int, cred_t *); 141 static int bd_close(dev_t, int, int, cred_t *); 142 static int bd_strategy(struct buf *); 143 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 144 static int bd_dump(dev_t, caddr_t, daddr_t, int); 145 static int bd_read(dev_t, struct uio *, cred_t *); 146 static int bd_write(dev_t, struct uio *, cred_t *); 147 static int bd_aread(dev_t, struct aio_req *, cred_t *); 148 static int bd_awrite(dev_t, struct aio_req *, cred_t *); 149 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, 150 caddr_t, int *); 151 152 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, 153 void *); 154 static int bd_tg_getinfo(dev_info_t *, int, void *, void *); 155 static int bd_xfer_ctor(void *, void *, int); 156 static void bd_xfer_dtor(void *, void *); 157 static void bd_sched(bd_t *); 158 static void bd_submit(bd_t *, bd_xfer_impl_t *); 159 static void bd_runq_exit(bd_xfer_impl_t *, int); 160 static void bd_update_state(bd_t *); 161 static int bd_check_state(bd_t *, enum dkio_state *); 162 static int bd_flush_write_cache(bd_t *, struct dk_callback *); 163 164 struct cmlb_tg_ops bd_tg_ops = { 165 TG_DK_OPS_VERSION_1, 166 bd_tg_rdwr, 167 bd_tg_getinfo, 168 }; 169 170 static struct cb_ops bd_cb_ops = { 171 bd_open, /* open */ 172 bd_close, /* close */ 173 bd_strategy, /* strategy */ 174 nodev, /* print */ 175 bd_dump, /* dump */ 176 bd_read, /* read */ 177 bd_write, /* write */ 178 bd_ioctl, /* ioctl */ 179 nodev, /* devmap */ 180 nodev, /* mmap */ 181 nodev, /* segmap */ 182 nochpoll, /* poll */ 183 bd_prop_op, /* cb_prop_op */ 184 0, /* streamtab */ 185 D_64BIT | D_MP, /* Driver comaptibility flag */ 186 CB_REV, /* cb_rev */ 187 bd_aread, /* async read */ 188 bd_awrite /* async write */ 189 }; 190 191 struct dev_ops bd_dev_ops = { 192 DEVO_REV, /* devo_rev, */ 193 0, /* refcnt */ 194 bd_getinfo, /* getinfo */ 195 nulldev, /* identify */ 196 nulldev, /* probe */ 197 bd_attach, /* attach */ 198 bd_detach, /* detach */ 199 nodev, /* reset */ 200 &bd_cb_ops, /* driver operations */ 201 NULL, /* bus operations */ 202 NULL, /* power */ 203 ddi_quiesce_not_needed, /* quiesce */ 204 }; 205 206 static struct modldrv modldrv = { 207 &mod_driverops, 208 "Generic Block Device", 209 &bd_dev_ops, 210 }; 211 212 static struct modlinkage modlinkage = { 213 MODREV_1, { &modldrv, NULL } 214 }; 215 216 static void *bd_state; 217 static krwlock_t bd_lock; 218 219 int 220 _init(void) 221 { 222 int rv; 223 224 rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2); 225 if (rv != DDI_SUCCESS) { 226 return (rv); 227 } 228 rw_init(&bd_lock, NULL, RW_DRIVER, NULL); 229 rv = mod_install(&modlinkage); 230 if (rv != DDI_SUCCESS) { 231 rw_destroy(&bd_lock); 232 ddi_soft_state_fini(&bd_state); 233 } 234 return (rv); 235 } 236 237 int 238 _fini(void) 239 { 240 int rv; 241 242 rv = mod_remove(&modlinkage); 243 if (rv == DDI_SUCCESS) { 244 rw_destroy(&bd_lock); 245 ddi_soft_state_fini(&bd_state); 246 } 247 return (rv); 248 } 249 250 int 251 _info(struct modinfo *modinfop) 252 { 253 return (mod_info(&modlinkage, modinfop)); 254 } 255 256 static int 257 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 258 { 259 bd_t *bd; 260 minor_t inst; 261 262 _NOTE(ARGUNUSED(dip)); 263 264 inst = BDINST((dev_t)arg); 265 266 switch (cmd) { 267 case DDI_INFO_DEVT2DEVINFO: 268 bd = ddi_get_soft_state(bd_state, inst); 269 if (bd == NULL) { 270 return (DDI_FAILURE); 271 } 272 *resultp = (void *)bd->d_dip; 273 break; 274 275 case DDI_INFO_DEVT2INSTANCE: 276 *resultp = (void *)(intptr_t)inst; 277 break; 278 279 default: 280 return (DDI_FAILURE); 281 } 282 return (DDI_SUCCESS); 283 } 284 285 static int 286 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 287 { 288 int inst; 289 bd_handle_t hdl; 290 bd_t *bd; 291 bd_drive_t drive; 292 int rv; 293 char name[16]; 294 char kcache[32]; 295 296 switch (cmd) { 297 case DDI_ATTACH: 298 break; 299 case DDI_RESUME: 300 /* We don't do anything native for suspend/resume */ 301 return (DDI_SUCCESS); 302 default: 303 return (DDI_FAILURE); 304 } 305 306 inst = ddi_get_instance(dip); 307 hdl = ddi_get_parent_data(dip); 308 309 (void) snprintf(name, sizeof (name), "%s%d", 310 ddi_driver_name(dip), ddi_get_instance(dip)); 311 (void) snprintf(kcache, sizeof (kcache), "%s_xfer", name); 312 313 if (hdl == NULL) { 314 cmn_err(CE_WARN, "%s: missing parent data!", name); 315 return (DDI_FAILURE); 316 } 317 318 if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) { 319 cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name); 320 return (DDI_FAILURE); 321 } 322 bd = ddi_get_soft_state(bd_state, inst); 323 324 if (hdl->h_dma) { 325 bd->d_dma = *(hdl->h_dma); 326 bd->d_dma.dma_attr_granular = 327 max(DEV_BSIZE, bd->d_dma.dma_attr_granular); 328 bd->d_use_dma = B_TRUE; 329 330 if (bd->d_maxxfer && 331 (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) { 332 cmn_err(CE_WARN, 333 "%s: inconsistent maximum transfer size!", 334 name); 335 /* We force it */ 336 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 337 } else { 338 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 339 } 340 } else { 341 bd->d_use_dma = B_FALSE; 342 if (bd->d_maxxfer == 0) { 343 bd->d_maxxfer = 1024 * 1024; 344 } 345 } 346 bd->d_ops = hdl->h_ops; 347 bd->d_private = hdl->h_private; 348 bd->d_blkshift = 9; /* 512 bytes, to start */ 349 350 if (bd->d_maxxfer % DEV_BSIZE) { 351 cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name); 352 bd->d_maxxfer &= ~(DEV_BSIZE - 1); 353 } 354 if (bd->d_maxxfer < DEV_BSIZE) { 355 cmn_err(CE_WARN, "%s: maximum transfer size too small!", name); 356 ddi_soft_state_free(bd_state, inst); 357 return (DDI_FAILURE); 358 } 359 360 bd->d_dip = dip; 361 bd->d_handle = hdl; 362 hdl->h_bd = bd; 363 ddi_set_driver_private(dip, bd); 364 365 mutex_init(&bd->d_iomutex, NULL, MUTEX_DRIVER, NULL); 366 mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL); 367 mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL); 368 cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL); 369 370 list_create(&bd->d_waitq, sizeof (bd_xfer_impl_t), 371 offsetof(struct bd_xfer_impl, i_linkage)); 372 list_create(&bd->d_runq, sizeof (bd_xfer_impl_t), 373 offsetof(struct bd_xfer_impl, i_linkage)); 374 375 bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8, 376 bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0); 377 378 bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk", 379 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT); 380 if (bd->d_ksp != NULL) { 381 bd->d_ksp->ks_lock = &bd->d_iomutex; 382 kstat_install(bd->d_ksp); 383 bd->d_kiop = bd->d_ksp->ks_data; 384 } else { 385 /* 386 * Even if we cannot create the kstat, we create a 387 * scratch kstat. The reason for this is to ensure 388 * that we can update the kstat all of the time, 389 * without adding an extra branch instruction. 390 */ 391 bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP); 392 } 393 394 cmlb_alloc_handle(&bd->d_cmlbh); 395 396 bd->d_state = DKIO_NONE; 397 398 bzero(&drive, sizeof (drive)); 399 bd->d_ops.o_drive_info(bd->d_private, &drive); 400 bd->d_qsize = drive.d_qsize; 401 bd->d_maxxfer = drive.d_maxxfer; 402 bd->d_removable = drive.d_removable; 403 bd->d_hotpluggable = drive.d_hotpluggable; 404 405 rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT, 406 bd->d_removable, bd->d_hotpluggable, 407 drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK, 408 CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0); 409 if (rv != 0) { 410 cmlb_free_handle(&bd->d_cmlbh); 411 kmem_cache_destroy(bd->d_cache); 412 mutex_destroy(&bd->d_iomutex); 413 mutex_destroy(&bd->d_ocmutex); 414 mutex_destroy(&bd->d_statemutex); 415 cv_destroy(&bd->d_statecv); 416 list_destroy(&bd->d_waitq); 417 list_destroy(&bd->d_runq); 418 if (bd->d_ksp != NULL) { 419 kstat_delete(bd->d_ksp); 420 bd->d_ksp = NULL; 421 } else { 422 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 423 } 424 ddi_soft_state_free(bd_state, inst); 425 return (DDI_FAILURE); 426 } 427 428 if (bd->d_ops.o_devid_init != NULL) { 429 rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid); 430 if (rv == DDI_SUCCESS) { 431 if (ddi_devid_register(dip, bd->d_devid) != 432 DDI_SUCCESS) { 433 cmn_err(CE_WARN, 434 "%s: unable to register devid", name); 435 } 436 } 437 } 438 439 /* 440 * Add a zero-length attribute to tell the world we support 441 * kernel ioctls (for layered drivers). Also set up properties 442 * used by HAL to identify removable media. 443 */ 444 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 445 DDI_KERNEL_IOCTL, NULL, 0); 446 if (bd->d_removable) { 447 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 448 "removable-media", NULL, 0); 449 } 450 if (bd->d_hotpluggable) { 451 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 452 "hotpluggable", NULL, 0); 453 } 454 455 ddi_report_dev(dip); 456 457 return (DDI_SUCCESS); 458 } 459 460 static int 461 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 462 { 463 bd_t *bd; 464 465 bd = ddi_get_driver_private(dip); 466 467 switch (cmd) { 468 case DDI_DETACH: 469 break; 470 case DDI_SUSPEND: 471 /* We don't suspend, but our parent does */ 472 return (DDI_SUCCESS); 473 default: 474 return (DDI_FAILURE); 475 } 476 if (bd->d_ksp != NULL) { 477 kstat_delete(bd->d_ksp); 478 bd->d_ksp = NULL; 479 } else { 480 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 481 } 482 cmlb_detach(bd->d_cmlbh, 0); 483 cmlb_free_handle(&bd->d_cmlbh); 484 if (bd->d_devid) 485 ddi_devid_free(bd->d_devid); 486 kmem_cache_destroy(bd->d_cache); 487 mutex_destroy(&bd->d_iomutex); 488 mutex_destroy(&bd->d_ocmutex); 489 mutex_destroy(&bd->d_statemutex); 490 cv_destroy(&bd->d_statecv); 491 list_destroy(&bd->d_waitq); 492 list_destroy(&bd->d_runq); 493 ddi_soft_state_free(bd_state, ddi_get_instance(dip)); 494 return (DDI_SUCCESS); 495 } 496 497 static int 498 bd_xfer_ctor(void *buf, void *arg, int kmflag) 499 { 500 bd_xfer_impl_t *xi; 501 bd_t *bd = arg; 502 int (*dcb)(caddr_t); 503 504 if (kmflag == KM_SLEEP) { 505 dcb = DDI_DMA_SLEEP; 506 } else { 507 dcb = DDI_DMA_DONTWAIT; 508 } 509 510 xi = buf; 511 bzero(xi, sizeof (*xi)); 512 xi->i_bd = bd; 513 514 if (bd->d_use_dma) { 515 if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL, 516 &xi->i_dmah) != DDI_SUCCESS) { 517 return (-1); 518 } 519 } 520 521 return (0); 522 } 523 524 static void 525 bd_xfer_dtor(void *buf, void *arg) 526 { 527 bd_xfer_impl_t *xi = buf; 528 529 _NOTE(ARGUNUSED(arg)); 530 531 if (xi->i_dmah) 532 ddi_dma_free_handle(&xi->i_dmah); 533 xi->i_dmah = NULL; 534 } 535 536 static bd_xfer_impl_t * 537 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *), 538 int kmflag) 539 { 540 bd_xfer_impl_t *xi; 541 int rv; 542 int status; 543 unsigned dir; 544 int (*cb)(caddr_t); 545 size_t len; 546 uint32_t shift; 547 548 if (kmflag == KM_SLEEP) { 549 cb = DDI_DMA_SLEEP; 550 } else { 551 cb = DDI_DMA_DONTWAIT; 552 } 553 554 xi = kmem_cache_alloc(bd->d_cache, kmflag); 555 if (xi == NULL) { 556 bioerror(bp, ENOMEM); 557 return (NULL); 558 } 559 560 ASSERT(bp); 561 ASSERT(bp->b_bcount); 562 563 xi->i_bp = bp; 564 xi->i_func = func; 565 xi->i_blkno = bp->b_lblkno; 566 567 if (bp->b_bcount == 0) { 568 xi->i_len = 0; 569 xi->i_nblks = 0; 570 xi->i_kaddr = NULL; 571 xi->i_resid = 0; 572 xi->i_num_win = 0; 573 goto done; 574 } 575 576 if (bp->b_flags & B_READ) { 577 dir = DDI_DMA_READ; 578 xi->i_func = bd->d_ops.o_read; 579 } else { 580 dir = DDI_DMA_WRITE; 581 xi->i_func = bd->d_ops.o_write; 582 } 583 584 shift = bd->d_blkshift; 585 xi->i_blkshift = shift; 586 587 if (!bd->d_use_dma) { 588 bp_mapin(bp); 589 rv = 0; 590 xi->i_offset = 0; 591 xi->i_num_win = 592 (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer; 593 xi->i_cur_win = 0; 594 xi->i_len = min(bp->b_bcount, bd->d_maxxfer); 595 xi->i_nblks = xi->i_len >> shift; 596 xi->i_kaddr = bp->b_un.b_addr; 597 xi->i_resid = bp->b_bcount; 598 } else { 599 600 /* 601 * We have to use consistent DMA if the address is misaligned. 602 */ 603 if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) && 604 ((uintptr_t)bp->b_un.b_addr & 0x7)) { 605 dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL; 606 } else { 607 dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 608 } 609 610 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb, 611 NULL, &xi->i_dmac, &xi->i_ndmac); 612 switch (status) { 613 case DDI_DMA_MAPPED: 614 xi->i_num_win = 1; 615 xi->i_cur_win = 0; 616 xi->i_offset = 0; 617 xi->i_len = bp->b_bcount; 618 xi->i_nblks = xi->i_len >> shift; 619 xi->i_resid = bp->b_bcount; 620 rv = 0; 621 break; 622 case DDI_DMA_PARTIAL_MAP: 623 xi->i_cur_win = 0; 624 625 if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) != 626 DDI_SUCCESS) || 627 (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset, 628 &len, &xi->i_dmac, &xi->i_ndmac) != 629 DDI_SUCCESS) || 630 (P2PHASE(len, shift) != 0)) { 631 (void) ddi_dma_unbind_handle(xi->i_dmah); 632 rv = EFAULT; 633 goto done; 634 } 635 xi->i_len = len; 636 xi->i_nblks = xi->i_len >> shift; 637 xi->i_resid = bp->b_bcount; 638 rv = 0; 639 break; 640 case DDI_DMA_NORESOURCES: 641 rv = EAGAIN; 642 goto done; 643 case DDI_DMA_TOOBIG: 644 rv = EINVAL; 645 goto done; 646 case DDI_DMA_NOMAPPING: 647 case DDI_DMA_INUSE: 648 default: 649 rv = EFAULT; 650 goto done; 651 } 652 } 653 654 done: 655 if (rv != 0) { 656 kmem_cache_free(bd->d_cache, xi); 657 bioerror(bp, rv); 658 return (NULL); 659 } 660 661 return (xi); 662 } 663 664 static void 665 bd_xfer_free(bd_xfer_impl_t *xi) 666 { 667 if (xi->i_dmah) { 668 (void) ddi_dma_unbind_handle(xi->i_dmah); 669 } 670 kmem_cache_free(xi->i_bd->d_cache, xi); 671 } 672 673 static int 674 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp) 675 { 676 dev_t dev = *devp; 677 bd_t *bd; 678 minor_t part; 679 minor_t inst; 680 uint64_t mask; 681 boolean_t ndelay; 682 int rv; 683 diskaddr_t nblks; 684 diskaddr_t lba; 685 686 _NOTE(ARGUNUSED(credp)); 687 688 part = BDPART(dev); 689 inst = BDINST(dev); 690 691 if (otyp >= OTYPCNT) 692 return (EINVAL); 693 694 ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE; 695 696 /* 697 * Block any DR events from changing the set of registered 698 * devices while we function. 699 */ 700 rw_enter(&bd_lock, RW_READER); 701 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 702 rw_exit(&bd_lock); 703 return (ENXIO); 704 } 705 706 mutex_enter(&bd->d_ocmutex); 707 708 ASSERT(part < 64); 709 mask = (1U << part); 710 711 bd_update_state(bd); 712 713 if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) { 714 715 /* non-blocking opens are allowed to succeed */ 716 if (!ndelay) { 717 rv = ENXIO; 718 goto done; 719 } 720 } else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba, 721 NULL, NULL, 0) == 0) { 722 723 /* 724 * We read the partinfo, verify valid ranges. If the 725 * partition is invalid, and we aren't blocking or 726 * doing a raw access, then fail. (Non-blocking and 727 * raw accesses can still succeed to allow a disk with 728 * bad partition data to opened by format and fdisk.) 729 */ 730 if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) { 731 rv = ENXIO; 732 goto done; 733 } 734 } else if (!ndelay) { 735 /* 736 * cmlb_partinfo failed -- invalid partition or no 737 * disk label. 738 */ 739 rv = ENXIO; 740 goto done; 741 } 742 743 if ((flag & FWRITE) && bd->d_rdonly) { 744 rv = EROFS; 745 goto done; 746 } 747 748 if ((bd->d_open_excl) & (mask)) { 749 rv = EBUSY; 750 goto done; 751 } 752 if (flag & FEXCL) { 753 if (bd->d_open_lyr[part]) { 754 rv = EBUSY; 755 goto done; 756 } 757 for (int i = 0; i < OTYP_LYR; i++) { 758 if (bd->d_open_reg[i] & mask) { 759 rv = EBUSY; 760 goto done; 761 } 762 } 763 } 764 765 if (otyp == OTYP_LYR) { 766 bd->d_open_lyr[part]++; 767 } else { 768 bd->d_open_reg[otyp] |= mask; 769 } 770 if (flag & FEXCL) { 771 bd->d_open_excl |= mask; 772 } 773 774 rv = 0; 775 done: 776 mutex_exit(&bd->d_ocmutex); 777 rw_exit(&bd_lock); 778 779 return (rv); 780 } 781 782 static int 783 bd_close(dev_t dev, int flag, int otyp, cred_t *credp) 784 { 785 bd_t *bd; 786 minor_t inst; 787 minor_t part; 788 uint64_t mask; 789 boolean_t last = B_TRUE; 790 791 _NOTE(ARGUNUSED(flag)); 792 _NOTE(ARGUNUSED(credp)); 793 794 part = BDPART(dev); 795 inst = BDINST(dev); 796 797 ASSERT(part < 64); 798 mask = (1U << part); 799 800 rw_enter(&bd_lock, RW_READER); 801 802 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 803 rw_exit(&bd_lock); 804 return (ENXIO); 805 } 806 807 mutex_enter(&bd->d_ocmutex); 808 if (bd->d_open_excl & mask) { 809 bd->d_open_excl &= ~mask; 810 } 811 if (otyp == OTYP_LYR) { 812 bd->d_open_lyr[part]--; 813 } else { 814 bd->d_open_reg[otyp] &= ~mask; 815 } 816 for (int i = 0; i < 64; i++) { 817 if (bd->d_open_lyr[part]) { 818 last = B_FALSE; 819 } 820 } 821 for (int i = 0; last && (i < OTYP_LYR); i++) { 822 if (bd->d_open_reg[i]) { 823 last = B_FALSE; 824 } 825 } 826 mutex_exit(&bd->d_ocmutex); 827 828 if (last) { 829 cmlb_invalidate(bd->d_cmlbh, 0); 830 } 831 rw_exit(&bd_lock); 832 833 return (0); 834 } 835 836 static int 837 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk) 838 { 839 minor_t inst; 840 minor_t part; 841 diskaddr_t pstart; 842 diskaddr_t psize; 843 bd_t *bd; 844 bd_xfer_impl_t *xi; 845 buf_t *bp; 846 int rv; 847 848 rw_enter(&bd_lock, RW_READER); 849 850 part = BDPART(dev); 851 inst = BDINST(dev); 852 853 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 854 rw_exit(&bd_lock); 855 return (ENXIO); 856 } 857 /* 858 * do cmlb, but do it synchronously unless we already have the 859 * partition (which we probably should.) 860 */ 861 if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL, 862 (void *)1)) { 863 rw_exit(&bd_lock); 864 return (ENXIO); 865 } 866 867 if ((blkno + nblk) > psize) { 868 rw_exit(&bd_lock); 869 return (EINVAL); 870 } 871 bp = getrbuf(KM_NOSLEEP); 872 if (bp == NULL) { 873 rw_exit(&bd_lock); 874 return (ENOMEM); 875 } 876 877 bp->b_bcount = nblk << bd->d_blkshift; 878 bp->b_resid = bp->b_bcount; 879 bp->b_lblkno = blkno; 880 bp->b_un.b_addr = caddr; 881 882 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_write, KM_NOSLEEP); 883 if (xi == NULL) { 884 rw_exit(&bd_lock); 885 freerbuf(bp); 886 return (ENOMEM); 887 } 888 xi->i_blkno = blkno + pstart; 889 xi->i_flags = BD_XFER_POLL; 890 bd_submit(bd, xi); 891 rw_exit(&bd_lock); 892 893 /* 894 * Generally, we should have run this entirely synchronously 895 * at this point and the biowait call should be a no-op. If 896 * it didn't happen this way, it's a bug in the underlying 897 * driver not honoring BD_XFER_POLL. 898 */ 899 (void) biowait(bp); 900 rv = geterror(bp); 901 freerbuf(bp); 902 return (rv); 903 } 904 905 static int 906 bd_read(dev_t dev, struct uio *uio, cred_t *credp) 907 { 908 _NOTE(ARGUNUSED(credp)); 909 return (physio(bd_strategy, NULL, dev, B_READ, minphys, uio)); 910 } 911 912 static int 913 bd_write(dev_t dev, struct uio *uio, cred_t *credp) 914 { 915 _NOTE(ARGUNUSED(credp)); 916 return (physio(bd_strategy, NULL, dev, B_WRITE, minphys, uio)); 917 } 918 919 static int 920 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp) 921 { 922 _NOTE(ARGUNUSED(credp)); 923 return (aphysio(bd_strategy, anocancel, dev, B_READ, minphys, aio)); 924 } 925 926 static int 927 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp) 928 { 929 _NOTE(ARGUNUSED(credp)); 930 return (aphysio(bd_strategy, anocancel, dev, B_WRITE, minphys, aio)); 931 } 932 933 static int 934 bd_strategy(struct buf *bp) 935 { 936 minor_t inst; 937 minor_t part; 938 bd_t *bd; 939 diskaddr_t p_lba; 940 diskaddr_t p_nblks; 941 diskaddr_t b_nblks; 942 bd_xfer_impl_t *xi; 943 uint32_t shift; 944 int (*func)(void *, bd_xfer_t *); 945 946 part = BDPART(bp->b_edev); 947 inst = BDINST(bp->b_edev); 948 949 ASSERT(bp); 950 951 bp->b_resid = bp->b_bcount; 952 953 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 954 bioerror(bp, ENXIO); 955 biodone(bp); 956 return (0); 957 } 958 959 if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba, 960 NULL, NULL, 0)) { 961 bioerror(bp, ENXIO); 962 biodone(bp); 963 return (0); 964 } 965 966 shift = bd->d_blkshift; 967 968 if ((P2PHASE(bp->b_bcount, (1U << shift)) != 0) || 969 (bp->b_lblkno > p_nblks)) { 970 bioerror(bp, ENXIO); 971 biodone(bp); 972 return (0); 973 } 974 b_nblks = bp->b_bcount >> shift; 975 if ((bp->b_lblkno == p_nblks) || (bp->b_bcount == 0)) { 976 biodone(bp); 977 return (0); 978 } 979 980 if ((b_nblks + bp->b_lblkno) > p_nblks) { 981 bp->b_resid = ((bp->b_lblkno + b_nblks - p_nblks) << shift); 982 bp->b_bcount -= bp->b_resid; 983 } else { 984 bp->b_resid = 0; 985 } 986 func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write; 987 988 xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP); 989 if (xi == NULL) { 990 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE); 991 } 992 if (xi == NULL) { 993 /* bd_request_alloc will have done bioerror */ 994 biodone(bp); 995 return (0); 996 } 997 xi->i_blkno = bp->b_lblkno + p_lba; 998 999 bd_submit(bd, xi); 1000 1001 return (0); 1002 } 1003 1004 static int 1005 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp) 1006 { 1007 minor_t inst; 1008 uint16_t part; 1009 bd_t *bd; 1010 void *ptr = (void *)arg; 1011 int rv; 1012 1013 part = BDPART(dev); 1014 inst = BDINST(dev); 1015 1016 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1017 return (ENXIO); 1018 } 1019 1020 rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0); 1021 if (rv != ENOTTY) 1022 return (rv); 1023 1024 switch (cmd) { 1025 case DKIOCGMEDIAINFO: { 1026 struct dk_minfo minfo; 1027 1028 /* make sure our state information is current */ 1029 bd_update_state(bd); 1030 bzero(&minfo, sizeof (minfo)); 1031 minfo.dki_media_type = DK_FIXED_DISK; 1032 minfo.dki_lbsize = (1U << bd->d_blkshift); 1033 minfo.dki_capacity = bd->d_numblks; 1034 if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) { 1035 return (EFAULT); 1036 } 1037 return (0); 1038 } 1039 case DKIOCINFO: { 1040 struct dk_cinfo cinfo; 1041 bzero(&cinfo, sizeof (cinfo)); 1042 cinfo.dki_ctype = DKC_BLKDEV; 1043 cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip)); 1044 (void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname), 1045 "%s", ddi_driver_name(ddi_get_parent(bd->d_dip))); 1046 (void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname), 1047 "%s", ddi_driver_name(bd->d_dip)); 1048 cinfo.dki_unit = inst; 1049 cinfo.dki_flags = DKI_FMTVOL; 1050 cinfo.dki_partition = part; 1051 cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE; 1052 cinfo.dki_addr = 0; 1053 cinfo.dki_slave = 0; 1054 cinfo.dki_space = 0; 1055 cinfo.dki_prio = 0; 1056 cinfo.dki_vec = 0; 1057 if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) { 1058 return (EFAULT); 1059 } 1060 return (0); 1061 } 1062 case DKIOCREMOVABLE: { 1063 int i; 1064 i = bd->d_removable ? 1 : 0; 1065 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1066 return (EFAULT); 1067 } 1068 return (0); 1069 } 1070 case DKIOCHOTPLUGGABLE: { 1071 int i; 1072 i = bd->d_hotpluggable ? 1 : 0; 1073 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1074 return (EFAULT); 1075 } 1076 return (0); 1077 } 1078 case DKIOCREADONLY: { 1079 int i; 1080 i = bd->d_rdonly ? 1 : 0; 1081 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1082 return (EFAULT); 1083 } 1084 return (0); 1085 } 1086 case DKIOCSTATE: { 1087 enum dkio_state state; 1088 if (ddi_copyin(ptr, &state, sizeof (state), flag)) { 1089 return (EFAULT); 1090 } 1091 if ((rv = bd_check_state(bd, &state)) != 0) { 1092 return (rv); 1093 } 1094 if (ddi_copyout(&state, ptr, sizeof (state), flag)) { 1095 return (EFAULT); 1096 } 1097 return (0); 1098 } 1099 case DKIOCFLUSHWRITECACHE: { 1100 struct dk_callback *dkc; 1101 1102 dkc = flag & FKIOCTL ? (void *)arg : NULL; 1103 rv = bd_flush_write_cache(bd, dkc); 1104 return (rv); 1105 } 1106 1107 default: 1108 break; 1109 1110 } 1111 return (ENOTTY); 1112 } 1113 1114 static int 1115 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 1116 char *name, caddr_t valuep, int *lengthp) 1117 { 1118 bd_t *bd; 1119 1120 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1121 if (bd == NULL) 1122 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 1123 name, valuep, lengthp)); 1124 1125 return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name, 1126 valuep, lengthp, BDPART(dev), 0)); 1127 } 1128 1129 1130 static int 1131 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start, 1132 size_t length, void *tg_cookie) 1133 { 1134 bd_t *bd; 1135 buf_t *bp; 1136 bd_xfer_impl_t *xi; 1137 int rv; 1138 int (*func)(void *, bd_xfer_t *); 1139 int kmflag; 1140 1141 /* 1142 * If we are running in polled mode (such as during dump(9e) 1143 * execution), then we cannot sleep for kernel allocations. 1144 */ 1145 kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP; 1146 1147 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1148 1149 if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) { 1150 /* We can only transfer whole blocks at a time! */ 1151 return (EINVAL); 1152 } 1153 1154 if ((bp = getrbuf(kmflag)) == NULL) { 1155 return (ENOMEM); 1156 } 1157 1158 switch (cmd) { 1159 case TG_READ: 1160 bp->b_flags = B_READ; 1161 func = bd->d_ops.o_read; 1162 break; 1163 case TG_WRITE: 1164 bp->b_flags = B_WRITE; 1165 func = bd->d_ops.o_write; 1166 break; 1167 default: 1168 freerbuf(bp); 1169 return (EINVAL); 1170 } 1171 1172 bp->b_un.b_addr = bufaddr; 1173 bp->b_bcount = length; 1174 xi = bd_xfer_alloc(bd, bp, func, kmflag); 1175 if (xi == NULL) { 1176 rv = geterror(bp); 1177 freerbuf(bp); 1178 return (rv); 1179 } 1180 xi->i_flags = tg_cookie ? BD_XFER_POLL : 0; 1181 xi->i_blkno = start; 1182 bd_submit(bd, xi); 1183 (void) biowait(bp); 1184 rv = geterror(bp); 1185 freerbuf(bp); 1186 1187 return (rv); 1188 } 1189 1190 static int 1191 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 1192 { 1193 bd_t *bd; 1194 1195 _NOTE(ARGUNUSED(tg_cookie)); 1196 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1197 1198 switch (cmd) { 1199 case TG_GETPHYGEOM: 1200 case TG_GETVIRTGEOM: 1201 /* 1202 * We don't have any "geometry" as such, let cmlb 1203 * fabricate something. 1204 */ 1205 return (ENOTTY); 1206 1207 case TG_GETCAPACITY: 1208 bd_update_state(bd); 1209 *(diskaddr_t *)arg = bd->d_numblks; 1210 return (0); 1211 1212 case TG_GETBLOCKSIZE: 1213 *(uint32_t *)arg = (1U << bd->d_blkshift); 1214 return (0); 1215 1216 case TG_GETATTR: 1217 /* 1218 * It turns out that cmlb really doesn't do much for 1219 * non-writable media, but lets make the information 1220 * available for it in case it does more in the 1221 * future. (The value is currently used for 1222 * triggering special behavior for CD-ROMs.) 1223 */ 1224 bd_update_state(bd); 1225 ((tg_attribute_t *)arg)->media_is_writable = 1226 bd->d_rdonly ? B_FALSE : B_TRUE; 1227 return (0); 1228 1229 default: 1230 return (EINVAL); 1231 } 1232 } 1233 1234 1235 static void 1236 bd_sched(bd_t *bd) 1237 { 1238 bd_xfer_impl_t *xi; 1239 struct buf *bp; 1240 int rv; 1241 1242 mutex_enter(&bd->d_iomutex); 1243 1244 while ((bd->d_qactive < bd->d_qsize) && 1245 ((xi = list_remove_head(&bd->d_waitq)) != NULL)) { 1246 bd->d_qactive++; 1247 kstat_waitq_to_runq(bd->d_kiop); 1248 list_insert_tail(&bd->d_runq, xi); 1249 1250 /* 1251 * Submit the job to the driver. We drop the I/O mutex 1252 * so that we can deal with the case where the driver 1253 * completion routine calls back into us synchronously. 1254 */ 1255 1256 mutex_exit(&bd->d_iomutex); 1257 1258 rv = xi->i_func(bd->d_private, &xi->i_public); 1259 if (rv != 0) { 1260 bp = xi->i_bp; 1261 bd_xfer_free(xi); 1262 bioerror(bp, rv); 1263 biodone(bp); 1264 1265 mutex_enter(&bd->d_iomutex); 1266 bd->d_qactive--; 1267 kstat_runq_exit(bd->d_kiop); 1268 list_remove(&bd->d_runq, xi); 1269 } else { 1270 mutex_enter(&bd->d_iomutex); 1271 } 1272 } 1273 1274 mutex_exit(&bd->d_iomutex); 1275 } 1276 1277 static void 1278 bd_submit(bd_t *bd, bd_xfer_impl_t *xi) 1279 { 1280 mutex_enter(&bd->d_iomutex); 1281 list_insert_tail(&bd->d_waitq, xi); 1282 kstat_waitq_enter(bd->d_kiop); 1283 mutex_exit(&bd->d_iomutex); 1284 1285 bd_sched(bd); 1286 } 1287 1288 static void 1289 bd_runq_exit(bd_xfer_impl_t *xi, int err) 1290 { 1291 bd_t *bd = xi->i_bd; 1292 buf_t *bp = xi->i_bp; 1293 1294 mutex_enter(&bd->d_iomutex); 1295 bd->d_qactive--; 1296 kstat_runq_exit(bd->d_kiop); 1297 list_remove(&bd->d_runq, xi); 1298 mutex_exit(&bd->d_iomutex); 1299 1300 if (err == 0) { 1301 if (bp->b_flags & B_READ) { 1302 bd->d_kiop->reads++; 1303 bd->d_kiop->nread += (bp->b_bcount - xi->i_resid); 1304 } else { 1305 bd->d_kiop->writes++; 1306 bd->d_kiop->nwritten += (bp->b_bcount - xi->i_resid); 1307 } 1308 } 1309 bd_sched(bd); 1310 } 1311 1312 static void 1313 bd_update_state(bd_t *bd) 1314 { 1315 enum dkio_state state; 1316 bd_media_t media; 1317 boolean_t docmlb = B_FALSE; 1318 1319 bzero(&media, sizeof (media)); 1320 1321 mutex_enter(&bd->d_statemutex); 1322 if (bd->d_ops.o_media_info(bd->d_private, &media) == 0) { 1323 if ((1U << bd->d_blkshift) != media.m_blksize) { 1324 if ((media.m_blksize < 512) || 1325 (!ISP2(media.m_blksize)) || 1326 (P2PHASE(bd->d_maxxfer, media.m_blksize))) { 1327 cmn_err(CE_WARN, 1328 "%s%d: Invalid media block size (%d)", 1329 ddi_driver_name(bd->d_dip), 1330 ddi_get_instance(bd->d_dip), 1331 media.m_blksize); 1332 /* 1333 * We can't use the media, treat it as 1334 * not present. 1335 */ 1336 state = DKIO_EJECTED; 1337 bd->d_numblks = 0; 1338 } else { 1339 bd->d_blkshift = ddi_ffs(media.m_blksize) - 1; 1340 bd->d_numblks = media.m_nblks; 1341 bd->d_rdonly = media.m_readonly; 1342 state = DKIO_INSERTED; 1343 } 1344 1345 /* Device size changed */ 1346 docmlb = B_TRUE; 1347 1348 } else { 1349 if (bd->d_numblks != media.m_nblks) { 1350 /* Device size changed */ 1351 docmlb = B_TRUE; 1352 } 1353 bd->d_numblks = media.m_nblks; 1354 bd->d_rdonly = media.m_readonly; 1355 state = DKIO_INSERTED; 1356 } 1357 1358 } else { 1359 bd->d_numblks = 0; 1360 state = DKIO_EJECTED; 1361 } 1362 if (state != bd->d_state) { 1363 bd->d_state = state; 1364 cv_broadcast(&bd->d_statecv); 1365 docmlb = B_TRUE; 1366 } 1367 mutex_exit(&bd->d_statemutex); 1368 1369 if (docmlb) { 1370 if (state == DKIO_INSERTED) { 1371 (void) cmlb_validate(bd->d_cmlbh, 0, 0); 1372 } else { 1373 cmlb_invalidate(bd->d_cmlbh, 0); 1374 } 1375 } 1376 } 1377 1378 static int 1379 bd_check_state(bd_t *bd, enum dkio_state *state) 1380 { 1381 clock_t when; 1382 1383 for (;;) { 1384 1385 bd_update_state(bd); 1386 1387 mutex_enter(&bd->d_statemutex); 1388 1389 if (bd->d_state != *state) { 1390 *state = bd->d_state; 1391 mutex_exit(&bd->d_statemutex); 1392 break; 1393 } 1394 1395 when = drv_usectohz(1000000); 1396 if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex, 1397 when, TR_CLOCK_TICK) == 0) { 1398 mutex_exit(&bd->d_statemutex); 1399 return (EINTR); 1400 } 1401 1402 mutex_exit(&bd->d_statemutex); 1403 } 1404 1405 return (0); 1406 } 1407 1408 static int 1409 bd_flush_write_cache_done(struct buf *bp) 1410 { 1411 struct dk_callback *dc = (void *)bp->b_private; 1412 1413 (*dc->dkc_callback)(dc->dkc_cookie, geterror(bp)); 1414 kmem_free(dc, sizeof (*dc)); 1415 freerbuf(bp); 1416 return (0); 1417 } 1418 1419 static int 1420 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc) 1421 { 1422 buf_t *bp; 1423 struct dk_callback *dc; 1424 bd_xfer_impl_t *xi; 1425 int rv; 1426 1427 if (bd->d_ops.o_sync_cache == NULL) { 1428 return (ENOTSUP); 1429 } 1430 if ((bp = getrbuf(KM_SLEEP)) == NULL) { 1431 return (ENOMEM); 1432 } 1433 bp->b_resid = 0; 1434 bp->b_bcount = 0; 1435 1436 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP); 1437 if (xi == NULL) { 1438 rv = geterror(bp); 1439 freerbuf(bp); 1440 return (rv); 1441 } 1442 1443 if (dkc != NULL) { 1444 /* Make a private copy of the callback structure */ 1445 dc = kmem_alloc(sizeof (*dc), KM_SLEEP); 1446 *dc = *dkc; 1447 bp->b_private = dc; 1448 bp->b_iodone = bd_flush_write_cache_done; 1449 } 1450 1451 bd_submit(bd, xi); 1452 if (dkc == NULL) { 1453 /* wait synchronously */ 1454 (void) biowait(bp); 1455 rv = geterror(bp); 1456 freerbuf(bp); 1457 } else { 1458 /* deferred via callback */ 1459 rv = 0; 1460 } 1461 return (rv); 1462 } 1463 1464 /* 1465 * Nexus support. 1466 */ 1467 int 1468 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop, 1469 void *arg, void *result) 1470 { 1471 bd_handle_t hdl; 1472 1473 switch (ctlop) { 1474 case DDI_CTLOPS_REPORTDEV: 1475 cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n", 1476 ddi_node_name(rdip), ddi_get_name_addr(rdip), 1477 ddi_driver_name(rdip), ddi_get_instance(rdip)); 1478 return (DDI_SUCCESS); 1479 1480 case DDI_CTLOPS_INITCHILD: 1481 hdl = ddi_get_parent_data((dev_info_t *)arg); 1482 if (hdl == NULL) { 1483 return (DDI_NOT_WELL_FORMED); 1484 } 1485 ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr); 1486 return (DDI_SUCCESS); 1487 1488 case DDI_CTLOPS_UNINITCHILD: 1489 ddi_set_name_addr((dev_info_t *)arg, NULL); 1490 ndi_prop_remove_all((dev_info_t *)arg); 1491 return (DDI_SUCCESS); 1492 1493 default: 1494 return (ddi_ctlops(dip, rdip, ctlop, arg, result)); 1495 } 1496 } 1497 1498 /* 1499 * Functions for device drivers. 1500 */ 1501 bd_handle_t 1502 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag) 1503 { 1504 bd_handle_t hdl; 1505 1506 hdl = kmem_zalloc(sizeof (*hdl), kmflag); 1507 if (hdl != NULL) { 1508 hdl->h_ops = *ops; 1509 hdl->h_dma = dma; 1510 hdl->h_private = private; 1511 } 1512 1513 return (hdl); 1514 } 1515 1516 void 1517 bd_free_handle(bd_handle_t hdl) 1518 { 1519 kmem_free(hdl, sizeof (*hdl)); 1520 } 1521 1522 int 1523 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl) 1524 { 1525 dev_info_t *child; 1526 bd_drive_t drive; 1527 1528 /* if drivers don't override this, make it assume none */ 1529 drive.d_lun = -1; 1530 hdl->h_ops.o_drive_info(hdl->h_private, &drive); 1531 1532 hdl->h_parent = dip; 1533 hdl->h_name = "blkdev"; 1534 1535 if (drive.d_lun >= 0) { 1536 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), "%X,%X", 1537 drive.d_target, drive.d_lun); 1538 } else { 1539 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), "%X", 1540 drive.d_target); 1541 } 1542 if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID, 1543 &child) != NDI_SUCCESS) { 1544 cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s", 1545 ddi_driver_name(dip), ddi_get_instance(dip), 1546 "blkdev", hdl->h_addr); 1547 return (DDI_FAILURE); 1548 } 1549 1550 ddi_set_parent_data(child, hdl); 1551 hdl->h_child = child; 1552 1553 if (ndi_devi_online(child, 0) == NDI_FAILURE) { 1554 cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online", 1555 ddi_driver_name(dip), ddi_get_instance(dip), 1556 hdl->h_name, hdl->h_addr); 1557 (void) ndi_devi_free(child); 1558 return (DDI_FAILURE); 1559 } 1560 1561 return (DDI_SUCCESS); 1562 } 1563 1564 int 1565 bd_detach_handle(bd_handle_t hdl) 1566 { 1567 int circ; 1568 int rv; 1569 char *devnm; 1570 1571 if (hdl->h_child == NULL) { 1572 return (DDI_SUCCESS); 1573 } 1574 ndi_devi_enter(hdl->h_parent, &circ); 1575 if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) { 1576 rv = ddi_remove_child(hdl->h_child, 0); 1577 } else { 1578 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); 1579 (void) ddi_deviname(hdl->h_child, devnm); 1580 (void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE); 1581 rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL, 1582 NDI_DEVI_REMOVE | NDI_UNCONFIG); 1583 kmem_free(devnm, MAXNAMELEN + 1); 1584 } 1585 if (rv == 0) { 1586 hdl->h_child = NULL; 1587 } 1588 1589 ndi_devi_exit(hdl->h_parent, circ); 1590 return (rv = NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE); 1591 } 1592 1593 void 1594 bd_xfer_done(bd_xfer_t *xfer, int err) 1595 { 1596 bd_xfer_impl_t *xi = (void *)xfer; 1597 buf_t *bp = xi->i_bp; 1598 int rv; 1599 bd_t *bd = xi->i_bd; 1600 size_t len; 1601 1602 if (err != 0) { 1603 bd_runq_exit(xi, err); 1604 1605 bp->b_resid += xi->i_resid; 1606 bd_xfer_free(xi); 1607 bioerror(bp, err); 1608 biodone(bp); 1609 return; 1610 } 1611 1612 xi->i_cur_win++; 1613 xi->i_resid -= xi->i_len; 1614 1615 if (xi->i_resid == 0) { 1616 /* Job completed succcessfully! */ 1617 bd_runq_exit(xi, 0); 1618 1619 bd_xfer_free(xi); 1620 biodone(bp); 1621 return; 1622 } 1623 1624 xi->i_blkno += xi->i_nblks; 1625 1626 if (bd->d_use_dma) { 1627 /* More transfer still pending... advance to next DMA window. */ 1628 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win, 1629 &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac); 1630 } else { 1631 /* Advance memory window. */ 1632 xi->i_kaddr += xi->i_len; 1633 xi->i_offset += xi->i_len; 1634 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer); 1635 } 1636 1637 1638 if ((rv != DDI_SUCCESS) || 1639 (P2PHASE(len, (1U << xi->i_blkshift) != 0))) { 1640 bd_runq_exit(xi, EFAULT); 1641 1642 bp->b_resid += xi->i_resid; 1643 bd_xfer_free(xi); 1644 bioerror(bp, EFAULT); 1645 biodone(bp); 1646 return; 1647 } 1648 xi->i_len = len; 1649 xi->i_nblks = len >> xi->i_blkshift; 1650 1651 /* Submit next window to hardware. */ 1652 rv = xi->i_func(bd->d_private, &xi->i_public); 1653 if (rv != 0) { 1654 bd_runq_exit(xi, rv); 1655 1656 bp->b_resid += xi->i_resid; 1657 bd_xfer_free(xi); 1658 bioerror(bp, rv); 1659 biodone(bp); 1660 } 1661 } 1662 1663 void 1664 bd_state_change(bd_handle_t hdl) 1665 { 1666 bd_t *bd; 1667 1668 if ((bd = hdl->h_bd) != NULL) { 1669 bd_update_state(bd); 1670 } 1671 } 1672 1673 void 1674 bd_mod_init(struct dev_ops *devops) 1675 { 1676 static struct bus_ops bd_bus_ops = { 1677 BUSO_REV, /* busops_rev */ 1678 nullbusmap, /* bus_map */ 1679 NULL, /* bus_get_intrspec (OBSOLETE) */ 1680 NULL, /* bus_add_intrspec (OBSOLETE) */ 1681 NULL, /* bus_remove_intrspec (OBSOLETE) */ 1682 i_ddi_map_fault, /* bus_map_fault */ 1683 ddi_dma_map, /* bus_dma_map */ 1684 ddi_dma_allochdl, /* bus_dma_allochdl */ 1685 ddi_dma_freehdl, /* bus_dma_freehdl */ 1686 ddi_dma_bindhdl, /* bus_dma_bindhdl */ 1687 ddi_dma_unbindhdl, /* bus_dma_unbindhdl */ 1688 ddi_dma_flush, /* bus_dma_flush */ 1689 ddi_dma_win, /* bus_dma_win */ 1690 ddi_dma_mctl, /* bus_dma_ctl */ 1691 bd_bus_ctl, /* bus_ctl */ 1692 ddi_bus_prop_op, /* bus_prop_op */ 1693 NULL, /* bus_get_eventcookie */ 1694 NULL, /* bus_add_eventcall */ 1695 NULL, /* bus_remove_eventcall */ 1696 NULL, /* bus_post_event */ 1697 NULL, /* bus_intr_ctl (OBSOLETE) */ 1698 NULL, /* bus_config */ 1699 NULL, /* bus_unconfig */ 1700 NULL, /* bus_fm_init */ 1701 NULL, /* bus_fm_fini */ 1702 NULL, /* bus_fm_access_enter */ 1703 NULL, /* bus_fm_access_exit */ 1704 NULL, /* bus_power */ 1705 NULL, /* bus_intr_op */ 1706 }; 1707 1708 devops->devo_bus_ops = &bd_bus_ops; 1709 1710 /* 1711 * NB: The device driver is free to supply its own 1712 * character entry device support. 1713 */ 1714 } 1715 1716 void 1717 bd_mod_fini(struct dev_ops *devops) 1718 { 1719 devops->devo_bus_ops = NULL; 1720 } 1721