1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved. 24 * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved. 25 * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 26 */ 27 28 #include <sys/types.h> 29 #include <sys/ksynch.h> 30 #include <sys/kmem.h> 31 #include <sys/file.h> 32 #include <sys/errno.h> 33 #include <sys/open.h> 34 #include <sys/buf.h> 35 #include <sys/uio.h> 36 #include <sys/aio_req.h> 37 #include <sys/cred.h> 38 #include <sys/modctl.h> 39 #include <sys/cmlb.h> 40 #include <sys/conf.h> 41 #include <sys/devops.h> 42 #include <sys/list.h> 43 #include <sys/sysmacros.h> 44 #include <sys/dkio.h> 45 #include <sys/vtoc.h> 46 #include <sys/scsi/scsi.h> /* for DTYPE_DIRECT */ 47 #include <sys/kstat.h> 48 #include <sys/fs/dv_node.h> 49 #include <sys/ddi.h> 50 #include <sys/sunddi.h> 51 #include <sys/note.h> 52 #include <sys/blkdev.h> 53 #include <sys/scsi/impl/inquiry.h> 54 55 #define BD_MAXPART 64 56 #define BDINST(dev) (getminor(dev) / BD_MAXPART) 57 #define BDPART(dev) (getminor(dev) % BD_MAXPART) 58 59 typedef struct bd bd_t; 60 typedef struct bd_xfer_impl bd_xfer_impl_t; 61 62 struct bd { 63 void *d_private; 64 dev_info_t *d_dip; 65 kmutex_t d_ocmutex; 66 kmutex_t d_iomutex; 67 kmutex_t *d_errmutex; 68 kmutex_t d_statemutex; 69 kcondvar_t d_statecv; 70 enum dkio_state d_state; 71 cmlb_handle_t d_cmlbh; 72 unsigned d_open_lyr[BD_MAXPART]; /* open count */ 73 uint64_t d_open_excl; /* bit mask indexed by partition */ 74 uint64_t d_open_reg[OTYPCNT]; /* bit mask */ 75 76 uint32_t d_qsize; 77 uint32_t d_qactive; 78 uint32_t d_maxxfer; 79 uint32_t d_blkshift; 80 uint32_t d_pblkshift; 81 uint64_t d_numblks; 82 ddi_devid_t d_devid; 83 84 kmem_cache_t *d_cache; 85 list_t d_runq; 86 list_t d_waitq; 87 kstat_t *d_ksp; 88 kstat_io_t *d_kiop; 89 kstat_t *d_errstats; 90 struct bd_errstats *d_kerr; 91 92 boolean_t d_rdonly; 93 boolean_t d_ssd; 94 boolean_t d_removable; 95 boolean_t d_hotpluggable; 96 boolean_t d_use_dma; 97 98 ddi_dma_attr_t d_dma; 99 bd_ops_t d_ops; 100 bd_handle_t d_handle; 101 }; 102 103 struct bd_handle { 104 bd_ops_t h_ops; 105 ddi_dma_attr_t *h_dma; 106 dev_info_t *h_parent; 107 dev_info_t *h_child; 108 void *h_private; 109 bd_t *h_bd; 110 char *h_name; 111 char h_addr[20]; /* enough for %X,%X */ 112 }; 113 114 struct bd_xfer_impl { 115 bd_xfer_t i_public; 116 list_node_t i_linkage; 117 bd_t *i_bd; 118 buf_t *i_bp; 119 uint_t i_num_win; 120 uint_t i_cur_win; 121 off_t i_offset; 122 int (*i_func)(void *, bd_xfer_t *); 123 uint32_t i_blkshift; 124 size_t i_len; 125 size_t i_resid; 126 }; 127 128 #define i_dmah i_public.x_dmah 129 #define i_dmac i_public.x_dmac 130 #define i_ndmac i_public.x_ndmac 131 #define i_kaddr i_public.x_kaddr 132 #define i_nblks i_public.x_nblks 133 #define i_blkno i_public.x_blkno 134 #define i_flags i_public.x_flags 135 136 137 /* 138 * Private prototypes. 139 */ 140 141 static void bd_prop_update_inqstring(dev_info_t *, char *, char *, size_t); 142 static void bd_create_inquiry_props(dev_info_t *, bd_drive_t *); 143 static void bd_create_errstats(bd_t *, int, bd_drive_t *); 144 static void bd_errstats_setstr(kstat_named_t *, char *, size_t, char *); 145 static void bd_init_errstats(bd_t *, bd_drive_t *); 146 147 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 148 static int bd_attach(dev_info_t *, ddi_attach_cmd_t); 149 static int bd_detach(dev_info_t *, ddi_detach_cmd_t); 150 151 static int bd_open(dev_t *, int, int, cred_t *); 152 static int bd_close(dev_t, int, int, cred_t *); 153 static int bd_strategy(struct buf *); 154 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 155 static int bd_dump(dev_t, caddr_t, daddr_t, int); 156 static int bd_read(dev_t, struct uio *, cred_t *); 157 static int bd_write(dev_t, struct uio *, cred_t *); 158 static int bd_aread(dev_t, struct aio_req *, cred_t *); 159 static int bd_awrite(dev_t, struct aio_req *, cred_t *); 160 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, 161 caddr_t, int *); 162 163 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, 164 void *); 165 static int bd_tg_getinfo(dev_info_t *, int, void *, void *); 166 static int bd_xfer_ctor(void *, void *, int); 167 static void bd_xfer_dtor(void *, void *); 168 static void bd_sched(bd_t *); 169 static void bd_submit(bd_t *, bd_xfer_impl_t *); 170 static void bd_runq_exit(bd_xfer_impl_t *, int); 171 static void bd_update_state(bd_t *); 172 static int bd_check_state(bd_t *, enum dkio_state *); 173 static int bd_flush_write_cache(bd_t *, struct dk_callback *); 174 175 struct cmlb_tg_ops bd_tg_ops = { 176 TG_DK_OPS_VERSION_1, 177 bd_tg_rdwr, 178 bd_tg_getinfo, 179 }; 180 181 static struct cb_ops bd_cb_ops = { 182 bd_open, /* open */ 183 bd_close, /* close */ 184 bd_strategy, /* strategy */ 185 nodev, /* print */ 186 bd_dump, /* dump */ 187 bd_read, /* read */ 188 bd_write, /* write */ 189 bd_ioctl, /* ioctl */ 190 nodev, /* devmap */ 191 nodev, /* mmap */ 192 nodev, /* segmap */ 193 nochpoll, /* poll */ 194 bd_prop_op, /* cb_prop_op */ 195 0, /* streamtab */ 196 D_64BIT | D_MP, /* Driver comaptibility flag */ 197 CB_REV, /* cb_rev */ 198 bd_aread, /* async read */ 199 bd_awrite /* async write */ 200 }; 201 202 struct dev_ops bd_dev_ops = { 203 DEVO_REV, /* devo_rev, */ 204 0, /* refcnt */ 205 bd_getinfo, /* getinfo */ 206 nulldev, /* identify */ 207 nulldev, /* probe */ 208 bd_attach, /* attach */ 209 bd_detach, /* detach */ 210 nodev, /* reset */ 211 &bd_cb_ops, /* driver operations */ 212 NULL, /* bus operations */ 213 NULL, /* power */ 214 ddi_quiesce_not_needed, /* quiesce */ 215 }; 216 217 static struct modldrv modldrv = { 218 &mod_driverops, 219 "Generic Block Device", 220 &bd_dev_ops, 221 }; 222 223 static struct modlinkage modlinkage = { 224 MODREV_1, { &modldrv, NULL } 225 }; 226 227 static void *bd_state; 228 static krwlock_t bd_lock; 229 230 int 231 _init(void) 232 { 233 int rv; 234 235 rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2); 236 if (rv != DDI_SUCCESS) { 237 return (rv); 238 } 239 rw_init(&bd_lock, NULL, RW_DRIVER, NULL); 240 rv = mod_install(&modlinkage); 241 if (rv != DDI_SUCCESS) { 242 rw_destroy(&bd_lock); 243 ddi_soft_state_fini(&bd_state); 244 } 245 return (rv); 246 } 247 248 int 249 _fini(void) 250 { 251 int rv; 252 253 rv = mod_remove(&modlinkage); 254 if (rv == DDI_SUCCESS) { 255 rw_destroy(&bd_lock); 256 ddi_soft_state_fini(&bd_state); 257 } 258 return (rv); 259 } 260 261 int 262 _info(struct modinfo *modinfop) 263 { 264 return (mod_info(&modlinkage, modinfop)); 265 } 266 267 static int 268 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 269 { 270 bd_t *bd; 271 minor_t inst; 272 273 _NOTE(ARGUNUSED(dip)); 274 275 inst = BDINST((dev_t)arg); 276 277 switch (cmd) { 278 case DDI_INFO_DEVT2DEVINFO: 279 bd = ddi_get_soft_state(bd_state, inst); 280 if (bd == NULL) { 281 return (DDI_FAILURE); 282 } 283 *resultp = (void *)bd->d_dip; 284 break; 285 286 case DDI_INFO_DEVT2INSTANCE: 287 *resultp = (void *)(intptr_t)inst; 288 break; 289 290 default: 291 return (DDI_FAILURE); 292 } 293 return (DDI_SUCCESS); 294 } 295 296 static void 297 bd_prop_update_inqstring(dev_info_t *dip, char *name, char *data, size_t len) 298 { 299 int ilen; 300 char *data_string; 301 302 ilen = scsi_ascii_inquiry_len(data, len); 303 ASSERT3U(ilen, <=, len); 304 if (ilen <= 0) 305 return; 306 /* ensure null termination */ 307 data_string = kmem_zalloc(ilen + 1, KM_SLEEP); 308 bcopy(data, data_string, ilen); 309 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, name, data_string); 310 kmem_free(data_string, ilen + 1); 311 } 312 313 static void 314 bd_create_inquiry_props(dev_info_t *dip, bd_drive_t *drive) 315 { 316 if (drive->d_vendor_len > 0) 317 bd_prop_update_inqstring(dip, INQUIRY_VENDOR_ID, 318 drive->d_vendor, drive->d_vendor_len); 319 320 if (drive->d_product_len > 0) 321 bd_prop_update_inqstring(dip, INQUIRY_PRODUCT_ID, 322 drive->d_product, drive->d_product_len); 323 324 if (drive->d_serial_len > 0) 325 bd_prop_update_inqstring(dip, INQUIRY_SERIAL_NO, 326 drive->d_serial, drive->d_serial_len); 327 328 if (drive->d_revision_len > 0) 329 bd_prop_update_inqstring(dip, INQUIRY_REVISION_ID, 330 drive->d_revision, drive->d_revision_len); 331 } 332 333 static void 334 bd_create_errstats(bd_t *bd, int inst, bd_drive_t *drive) 335 { 336 char ks_module[KSTAT_STRLEN]; 337 char ks_name[KSTAT_STRLEN]; 338 int ndata = sizeof (struct bd_errstats) / sizeof (kstat_named_t); 339 340 if (bd->d_errstats != NULL) 341 return; 342 343 (void) snprintf(ks_module, sizeof (ks_module), "%serr", 344 ddi_driver_name(bd->d_dip)); 345 (void) snprintf(ks_name, sizeof (ks_name), "%s%d,err", 346 ddi_driver_name(bd->d_dip), inst); 347 348 bd->d_errstats = kstat_create(ks_module, inst, ks_name, "device_error", 349 KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT); 350 351 if (bd->d_errstats == NULL) { 352 /* 353 * Even if we cannot create the kstat, we create a 354 * scratch kstat. The reason for this is to ensure 355 * that we can update the kstat all of the time, 356 * without adding an extra branch instruction. 357 */ 358 bd->d_kerr = kmem_zalloc(sizeof (struct bd_errstats), 359 KM_SLEEP); 360 bd->d_errmutex = kmem_zalloc(sizeof (kmutex_t), KM_SLEEP); 361 mutex_init(bd->d_errmutex, NULL, MUTEX_DRIVER, NULL); 362 } else { 363 if (bd->d_errstats->ks_lock == NULL) { 364 bd->d_errstats->ks_lock = kmem_zalloc(sizeof (kmutex_t), 365 KM_SLEEP); 366 mutex_init(bd->d_errstats->ks_lock, NULL, MUTEX_DRIVER, 367 NULL); 368 } 369 370 bd->d_errmutex = bd->d_errstats->ks_lock; 371 bd->d_kerr = (struct bd_errstats *)bd->d_errstats->ks_data; 372 } 373 374 kstat_named_init(&bd->d_kerr->bd_softerrs, "Soft Errors", 375 KSTAT_DATA_UINT32); 376 kstat_named_init(&bd->d_kerr->bd_harderrs, "Hard Errors", 377 KSTAT_DATA_UINT32); 378 kstat_named_init(&bd->d_kerr->bd_transerrs, "Transport Errors", 379 KSTAT_DATA_UINT32); 380 381 if (drive->d_model_len > 0) { 382 kstat_named_init(&bd->d_kerr->bd_model, "Model", 383 KSTAT_DATA_STRING); 384 } else { 385 kstat_named_init(&bd->d_kerr->bd_vid, "Vendor", 386 KSTAT_DATA_STRING); 387 kstat_named_init(&bd->d_kerr->bd_pid, "Product", 388 KSTAT_DATA_STRING); 389 } 390 391 kstat_named_init(&bd->d_kerr->bd_revision, "Revision", 392 KSTAT_DATA_STRING); 393 kstat_named_init(&bd->d_kerr->bd_serial, "Serial No", 394 KSTAT_DATA_STRING); 395 kstat_named_init(&bd->d_kerr->bd_capacity, "Size", 396 KSTAT_DATA_ULONGLONG); 397 kstat_named_init(&bd->d_kerr->bd_rq_media_err, "Media Error", 398 KSTAT_DATA_UINT32); 399 kstat_named_init(&bd->d_kerr->bd_rq_ntrdy_err, "Device Not Ready", 400 KSTAT_DATA_UINT32); 401 kstat_named_init(&bd->d_kerr->bd_rq_nodev_err, "No Device", 402 KSTAT_DATA_UINT32); 403 kstat_named_init(&bd->d_kerr->bd_rq_recov_err, "Recoverable", 404 KSTAT_DATA_UINT32); 405 kstat_named_init(&bd->d_kerr->bd_rq_illrq_err, "Illegal Request", 406 KSTAT_DATA_UINT32); 407 kstat_named_init(&bd->d_kerr->bd_rq_pfa_err, 408 "Predictive Failure Analysis", KSTAT_DATA_UINT32); 409 410 bd->d_errstats->ks_private = bd; 411 412 kstat_install(bd->d_errstats); 413 } 414 415 static void 416 bd_errstats_setstr(kstat_named_t *k, char *str, size_t len, char *alt) 417 { 418 char *tmp; 419 420 if (KSTAT_NAMED_STR_PTR(k) == NULL) { 421 if (len > 0) { 422 tmp = kmem_alloc(len + 1, KM_SLEEP); 423 (void) strlcpy(tmp, str, len); 424 } else { 425 tmp = alt; 426 } 427 428 kstat_named_setstr(k, tmp); 429 } 430 } 431 432 static void 433 bd_init_errstats(bd_t *bd, bd_drive_t *drive) 434 { 435 struct bd_errstats *est = bd->d_kerr; 436 437 mutex_enter(bd->d_errmutex); 438 439 if (drive->d_model_len > 0 && 440 KSTAT_NAMED_STR_PTR(&est->bd_model) == NULL) { 441 bd_errstats_setstr(&est->bd_model, drive->d_model, 442 drive->d_model_len, NULL); 443 } else { 444 bd_errstats_setstr(&est->bd_vid, drive->d_vendor, 445 drive->d_vendor_len, "Unknown "); 446 bd_errstats_setstr(&est->bd_pid, drive->d_product, 447 drive->d_product_len, "Unknown "); 448 } 449 450 bd_errstats_setstr(&est->bd_revision, drive->d_revision, 451 drive->d_revision_len, "0001"); 452 bd_errstats_setstr(&est->bd_serial, drive->d_serial, 453 drive->d_serial_len, "0 "); 454 455 mutex_exit(bd->d_errmutex); 456 } 457 458 static int 459 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 460 { 461 int inst; 462 bd_handle_t hdl; 463 bd_t *bd; 464 bd_drive_t drive; 465 int rv; 466 char name[16]; 467 char kcache[32]; 468 469 switch (cmd) { 470 case DDI_ATTACH: 471 break; 472 case DDI_RESUME: 473 /* We don't do anything native for suspend/resume */ 474 return (DDI_SUCCESS); 475 default: 476 return (DDI_FAILURE); 477 } 478 479 inst = ddi_get_instance(dip); 480 hdl = ddi_get_parent_data(dip); 481 482 (void) snprintf(name, sizeof (name), "%s%d", 483 ddi_driver_name(dip), ddi_get_instance(dip)); 484 (void) snprintf(kcache, sizeof (kcache), "%s_xfer", name); 485 486 if (hdl == NULL) { 487 cmn_err(CE_WARN, "%s: missing parent data!", name); 488 return (DDI_FAILURE); 489 } 490 491 if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) { 492 cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name); 493 return (DDI_FAILURE); 494 } 495 bd = ddi_get_soft_state(bd_state, inst); 496 497 if (hdl->h_dma) { 498 bd->d_dma = *(hdl->h_dma); 499 bd->d_dma.dma_attr_granular = 500 max(DEV_BSIZE, bd->d_dma.dma_attr_granular); 501 bd->d_use_dma = B_TRUE; 502 503 if (bd->d_maxxfer && 504 (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) { 505 cmn_err(CE_WARN, 506 "%s: inconsistent maximum transfer size!", 507 name); 508 /* We force it */ 509 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 510 } else { 511 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 512 } 513 } else { 514 bd->d_use_dma = B_FALSE; 515 if (bd->d_maxxfer == 0) { 516 bd->d_maxxfer = 1024 * 1024; 517 } 518 } 519 bd->d_ops = hdl->h_ops; 520 bd->d_private = hdl->h_private; 521 bd->d_blkshift = 9; /* 512 bytes, to start */ 522 523 if (bd->d_maxxfer % DEV_BSIZE) { 524 cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name); 525 bd->d_maxxfer &= ~(DEV_BSIZE - 1); 526 } 527 if (bd->d_maxxfer < DEV_BSIZE) { 528 cmn_err(CE_WARN, "%s: maximum transfer size too small!", name); 529 ddi_soft_state_free(bd_state, inst); 530 return (DDI_FAILURE); 531 } 532 533 bd->d_dip = dip; 534 bd->d_handle = hdl; 535 hdl->h_bd = bd; 536 ddi_set_driver_private(dip, bd); 537 538 mutex_init(&bd->d_iomutex, NULL, MUTEX_DRIVER, NULL); 539 mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL); 540 mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL); 541 cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL); 542 543 list_create(&bd->d_waitq, sizeof (bd_xfer_impl_t), 544 offsetof(struct bd_xfer_impl, i_linkage)); 545 list_create(&bd->d_runq, sizeof (bd_xfer_impl_t), 546 offsetof(struct bd_xfer_impl, i_linkage)); 547 548 bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8, 549 bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0); 550 551 bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk", 552 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT); 553 if (bd->d_ksp != NULL) { 554 bd->d_ksp->ks_lock = &bd->d_iomutex; 555 kstat_install(bd->d_ksp); 556 bd->d_kiop = bd->d_ksp->ks_data; 557 } else { 558 /* 559 * Even if we cannot create the kstat, we create a 560 * scratch kstat. The reason for this is to ensure 561 * that we can update the kstat all of the time, 562 * without adding an extra branch instruction. 563 */ 564 bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP); 565 } 566 567 cmlb_alloc_handle(&bd->d_cmlbh); 568 569 bd->d_state = DKIO_NONE; 570 571 bzero(&drive, sizeof (drive)); 572 bd->d_ops.o_drive_info(bd->d_private, &drive); 573 bd->d_qsize = drive.d_qsize; 574 bd->d_removable = drive.d_removable; 575 bd->d_hotpluggable = drive.d_hotpluggable; 576 577 if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer) 578 bd->d_maxxfer = drive.d_maxxfer; 579 580 bd_create_inquiry_props(dip, &drive); 581 582 bd_create_errstats(bd, inst, &drive); 583 bd_init_errstats(bd, &drive); 584 bd_update_state(bd); 585 586 rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT, 587 bd->d_removable, bd->d_hotpluggable, 588 drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK, 589 CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0); 590 if (rv != 0) { 591 cmlb_free_handle(&bd->d_cmlbh); 592 kmem_cache_destroy(bd->d_cache); 593 mutex_destroy(&bd->d_iomutex); 594 mutex_destroy(&bd->d_ocmutex); 595 mutex_destroy(&bd->d_statemutex); 596 cv_destroy(&bd->d_statecv); 597 list_destroy(&bd->d_waitq); 598 list_destroy(&bd->d_runq); 599 if (bd->d_ksp != NULL) { 600 kstat_delete(bd->d_ksp); 601 bd->d_ksp = NULL; 602 } else { 603 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 604 } 605 ddi_soft_state_free(bd_state, inst); 606 return (DDI_FAILURE); 607 } 608 609 if (bd->d_ops.o_devid_init != NULL) { 610 rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid); 611 if (rv == DDI_SUCCESS) { 612 if (ddi_devid_register(dip, bd->d_devid) != 613 DDI_SUCCESS) { 614 cmn_err(CE_WARN, 615 "%s: unable to register devid", name); 616 } 617 } 618 } 619 620 /* 621 * Add a zero-length attribute to tell the world we support 622 * kernel ioctls (for layered drivers). Also set up properties 623 * used by HAL to identify removable media. 624 */ 625 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 626 DDI_KERNEL_IOCTL, NULL, 0); 627 if (bd->d_removable) { 628 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 629 "removable-media", NULL, 0); 630 } 631 if (bd->d_hotpluggable) { 632 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 633 "hotpluggable", NULL, 0); 634 } 635 636 ddi_report_dev(dip); 637 638 return (DDI_SUCCESS); 639 } 640 641 static int 642 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 643 { 644 bd_t *bd; 645 646 bd = ddi_get_driver_private(dip); 647 648 switch (cmd) { 649 case DDI_DETACH: 650 break; 651 case DDI_SUSPEND: 652 /* We don't suspend, but our parent does */ 653 return (DDI_SUCCESS); 654 default: 655 return (DDI_FAILURE); 656 } 657 if (bd->d_ksp != NULL) { 658 kstat_delete(bd->d_ksp); 659 bd->d_ksp = NULL; 660 } else { 661 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 662 } 663 664 if (bd->d_errstats != NULL) { 665 kstat_delete(bd->d_errstats); 666 bd->d_errstats = NULL; 667 } else { 668 kmem_free(bd->d_kerr, sizeof (struct bd_errstats)); 669 mutex_destroy(bd->d_errmutex); 670 } 671 672 cmlb_detach(bd->d_cmlbh, 0); 673 cmlb_free_handle(&bd->d_cmlbh); 674 if (bd->d_devid) 675 ddi_devid_free(bd->d_devid); 676 kmem_cache_destroy(bd->d_cache); 677 mutex_destroy(&bd->d_iomutex); 678 mutex_destroy(&bd->d_ocmutex); 679 mutex_destroy(&bd->d_statemutex); 680 cv_destroy(&bd->d_statecv); 681 list_destroy(&bd->d_waitq); 682 list_destroy(&bd->d_runq); 683 ddi_soft_state_free(bd_state, ddi_get_instance(dip)); 684 return (DDI_SUCCESS); 685 } 686 687 static int 688 bd_xfer_ctor(void *buf, void *arg, int kmflag) 689 { 690 bd_xfer_impl_t *xi; 691 bd_t *bd = arg; 692 int (*dcb)(caddr_t); 693 694 if (kmflag == KM_PUSHPAGE || kmflag == KM_SLEEP) { 695 dcb = DDI_DMA_SLEEP; 696 } else { 697 dcb = DDI_DMA_DONTWAIT; 698 } 699 700 xi = buf; 701 bzero(xi, sizeof (*xi)); 702 xi->i_bd = bd; 703 704 if (bd->d_use_dma) { 705 if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL, 706 &xi->i_dmah) != DDI_SUCCESS) { 707 return (-1); 708 } 709 } 710 711 return (0); 712 } 713 714 static void 715 bd_xfer_dtor(void *buf, void *arg) 716 { 717 bd_xfer_impl_t *xi = buf; 718 719 _NOTE(ARGUNUSED(arg)); 720 721 if (xi->i_dmah) 722 ddi_dma_free_handle(&xi->i_dmah); 723 xi->i_dmah = NULL; 724 } 725 726 static bd_xfer_impl_t * 727 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *), 728 int kmflag) 729 { 730 bd_xfer_impl_t *xi; 731 int rv = 0; 732 int status; 733 unsigned dir; 734 int (*cb)(caddr_t); 735 size_t len; 736 uint32_t shift; 737 738 if (kmflag == KM_SLEEP) { 739 cb = DDI_DMA_SLEEP; 740 } else { 741 cb = DDI_DMA_DONTWAIT; 742 } 743 744 xi = kmem_cache_alloc(bd->d_cache, kmflag); 745 if (xi == NULL) { 746 bioerror(bp, ENOMEM); 747 return (NULL); 748 } 749 750 ASSERT(bp); 751 752 xi->i_bp = bp; 753 xi->i_func = func; 754 xi->i_blkno = bp->b_lblkno; 755 756 if (bp->b_bcount == 0) { 757 xi->i_len = 0; 758 xi->i_nblks = 0; 759 xi->i_kaddr = NULL; 760 xi->i_resid = 0; 761 xi->i_num_win = 0; 762 goto done; 763 } 764 765 if (bp->b_flags & B_READ) { 766 dir = DDI_DMA_READ; 767 xi->i_func = bd->d_ops.o_read; 768 } else { 769 dir = DDI_DMA_WRITE; 770 xi->i_func = bd->d_ops.o_write; 771 } 772 773 shift = bd->d_blkshift; 774 xi->i_blkshift = shift; 775 776 if (!bd->d_use_dma) { 777 bp_mapin(bp); 778 rv = 0; 779 xi->i_offset = 0; 780 xi->i_num_win = 781 (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer; 782 xi->i_cur_win = 0; 783 xi->i_len = min(bp->b_bcount, bd->d_maxxfer); 784 xi->i_nblks = xi->i_len >> shift; 785 xi->i_kaddr = bp->b_un.b_addr; 786 xi->i_resid = bp->b_bcount; 787 } else { 788 789 /* 790 * We have to use consistent DMA if the address is misaligned. 791 */ 792 if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) && 793 ((uintptr_t)bp->b_un.b_addr & 0x7)) { 794 dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL; 795 } else { 796 dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 797 } 798 799 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb, 800 NULL, &xi->i_dmac, &xi->i_ndmac); 801 switch (status) { 802 case DDI_DMA_MAPPED: 803 xi->i_num_win = 1; 804 xi->i_cur_win = 0; 805 xi->i_offset = 0; 806 xi->i_len = bp->b_bcount; 807 xi->i_nblks = xi->i_len >> shift; 808 xi->i_resid = bp->b_bcount; 809 rv = 0; 810 break; 811 case DDI_DMA_PARTIAL_MAP: 812 xi->i_cur_win = 0; 813 814 if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) != 815 DDI_SUCCESS) || 816 (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset, 817 &len, &xi->i_dmac, &xi->i_ndmac) != 818 DDI_SUCCESS) || 819 (P2PHASE(len, shift) != 0)) { 820 (void) ddi_dma_unbind_handle(xi->i_dmah); 821 rv = EFAULT; 822 goto done; 823 } 824 xi->i_len = len; 825 xi->i_nblks = xi->i_len >> shift; 826 xi->i_resid = bp->b_bcount; 827 rv = 0; 828 break; 829 case DDI_DMA_NORESOURCES: 830 rv = EAGAIN; 831 goto done; 832 case DDI_DMA_TOOBIG: 833 rv = EINVAL; 834 goto done; 835 case DDI_DMA_NOMAPPING: 836 case DDI_DMA_INUSE: 837 default: 838 rv = EFAULT; 839 goto done; 840 } 841 } 842 843 done: 844 if (rv != 0) { 845 kmem_cache_free(bd->d_cache, xi); 846 bioerror(bp, rv); 847 return (NULL); 848 } 849 850 return (xi); 851 } 852 853 static void 854 bd_xfer_free(bd_xfer_impl_t *xi) 855 { 856 if (xi->i_dmah) { 857 (void) ddi_dma_unbind_handle(xi->i_dmah); 858 } 859 kmem_cache_free(xi->i_bd->d_cache, xi); 860 } 861 862 static int 863 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp) 864 { 865 dev_t dev = *devp; 866 bd_t *bd; 867 minor_t part; 868 minor_t inst; 869 uint64_t mask; 870 boolean_t ndelay; 871 int rv; 872 diskaddr_t nblks; 873 diskaddr_t lba; 874 875 _NOTE(ARGUNUSED(credp)); 876 877 part = BDPART(dev); 878 inst = BDINST(dev); 879 880 if (otyp >= OTYPCNT) 881 return (EINVAL); 882 883 ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE; 884 885 /* 886 * Block any DR events from changing the set of registered 887 * devices while we function. 888 */ 889 rw_enter(&bd_lock, RW_READER); 890 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 891 rw_exit(&bd_lock); 892 return (ENXIO); 893 } 894 895 mutex_enter(&bd->d_ocmutex); 896 897 ASSERT(part < 64); 898 mask = (1U << part); 899 900 bd_update_state(bd); 901 902 if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) { 903 904 /* non-blocking opens are allowed to succeed */ 905 if (!ndelay) { 906 rv = ENXIO; 907 goto done; 908 } 909 } else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba, 910 NULL, NULL, 0) == 0) { 911 912 /* 913 * We read the partinfo, verify valid ranges. If the 914 * partition is invalid, and we aren't blocking or 915 * doing a raw access, then fail. (Non-blocking and 916 * raw accesses can still succeed to allow a disk with 917 * bad partition data to opened by format and fdisk.) 918 */ 919 if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) { 920 rv = ENXIO; 921 goto done; 922 } 923 } else if (!ndelay) { 924 /* 925 * cmlb_partinfo failed -- invalid partition or no 926 * disk label. 927 */ 928 rv = ENXIO; 929 goto done; 930 } 931 932 if ((flag & FWRITE) && bd->d_rdonly) { 933 rv = EROFS; 934 goto done; 935 } 936 937 if ((bd->d_open_excl) & (mask)) { 938 rv = EBUSY; 939 goto done; 940 } 941 if (flag & FEXCL) { 942 if (bd->d_open_lyr[part]) { 943 rv = EBUSY; 944 goto done; 945 } 946 for (int i = 0; i < OTYP_LYR; i++) { 947 if (bd->d_open_reg[i] & mask) { 948 rv = EBUSY; 949 goto done; 950 } 951 } 952 } 953 954 if (otyp == OTYP_LYR) { 955 bd->d_open_lyr[part]++; 956 } else { 957 bd->d_open_reg[otyp] |= mask; 958 } 959 if (flag & FEXCL) { 960 bd->d_open_excl |= mask; 961 } 962 963 rv = 0; 964 done: 965 mutex_exit(&bd->d_ocmutex); 966 rw_exit(&bd_lock); 967 968 return (rv); 969 } 970 971 static int 972 bd_close(dev_t dev, int flag, int otyp, cred_t *credp) 973 { 974 bd_t *bd; 975 minor_t inst; 976 minor_t part; 977 uint64_t mask; 978 boolean_t last = B_TRUE; 979 980 _NOTE(ARGUNUSED(flag)); 981 _NOTE(ARGUNUSED(credp)); 982 983 part = BDPART(dev); 984 inst = BDINST(dev); 985 986 ASSERT(part < 64); 987 mask = (1U << part); 988 989 rw_enter(&bd_lock, RW_READER); 990 991 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 992 rw_exit(&bd_lock); 993 return (ENXIO); 994 } 995 996 mutex_enter(&bd->d_ocmutex); 997 if (bd->d_open_excl & mask) { 998 bd->d_open_excl &= ~mask; 999 } 1000 if (otyp == OTYP_LYR) { 1001 bd->d_open_lyr[part]--; 1002 } else { 1003 bd->d_open_reg[otyp] &= ~mask; 1004 } 1005 for (int i = 0; i < 64; i++) { 1006 if (bd->d_open_lyr[part]) { 1007 last = B_FALSE; 1008 } 1009 } 1010 for (int i = 0; last && (i < OTYP_LYR); i++) { 1011 if (bd->d_open_reg[i]) { 1012 last = B_FALSE; 1013 } 1014 } 1015 mutex_exit(&bd->d_ocmutex); 1016 1017 if (last) { 1018 cmlb_invalidate(bd->d_cmlbh, 0); 1019 } 1020 rw_exit(&bd_lock); 1021 1022 return (0); 1023 } 1024 1025 static int 1026 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk) 1027 { 1028 minor_t inst; 1029 minor_t part; 1030 diskaddr_t pstart; 1031 diskaddr_t psize; 1032 bd_t *bd; 1033 bd_xfer_impl_t *xi; 1034 buf_t *bp; 1035 int rv; 1036 1037 rw_enter(&bd_lock, RW_READER); 1038 1039 part = BDPART(dev); 1040 inst = BDINST(dev); 1041 1042 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1043 rw_exit(&bd_lock); 1044 return (ENXIO); 1045 } 1046 /* 1047 * do cmlb, but do it synchronously unless we already have the 1048 * partition (which we probably should.) 1049 */ 1050 if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL, 1051 (void *)1)) { 1052 rw_exit(&bd_lock); 1053 return (ENXIO); 1054 } 1055 1056 if ((blkno + nblk) > psize) { 1057 rw_exit(&bd_lock); 1058 return (EINVAL); 1059 } 1060 bp = getrbuf(KM_NOSLEEP); 1061 if (bp == NULL) { 1062 rw_exit(&bd_lock); 1063 return (ENOMEM); 1064 } 1065 1066 bp->b_bcount = nblk << bd->d_blkshift; 1067 bp->b_resid = bp->b_bcount; 1068 bp->b_lblkno = blkno; 1069 bp->b_un.b_addr = caddr; 1070 1071 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_write, KM_NOSLEEP); 1072 if (xi == NULL) { 1073 rw_exit(&bd_lock); 1074 freerbuf(bp); 1075 return (ENOMEM); 1076 } 1077 xi->i_blkno = blkno + pstart; 1078 xi->i_flags = BD_XFER_POLL; 1079 bd_submit(bd, xi); 1080 rw_exit(&bd_lock); 1081 1082 /* 1083 * Generally, we should have run this entirely synchronously 1084 * at this point and the biowait call should be a no-op. If 1085 * it didn't happen this way, it's a bug in the underlying 1086 * driver not honoring BD_XFER_POLL. 1087 */ 1088 (void) biowait(bp); 1089 rv = geterror(bp); 1090 freerbuf(bp); 1091 return (rv); 1092 } 1093 1094 void 1095 bd_minphys(struct buf *bp) 1096 { 1097 minor_t inst; 1098 bd_t *bd; 1099 inst = BDINST(bp->b_edev); 1100 1101 bd = ddi_get_soft_state(bd_state, inst); 1102 1103 /* 1104 * In a non-debug kernel, bd_strategy will catch !bd as 1105 * well, and will fail nicely. 1106 */ 1107 ASSERT(bd); 1108 1109 if (bp->b_bcount > bd->d_maxxfer) 1110 bp->b_bcount = bd->d_maxxfer; 1111 } 1112 1113 static int 1114 bd_read(dev_t dev, struct uio *uio, cred_t *credp) 1115 { 1116 _NOTE(ARGUNUSED(credp)); 1117 return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio)); 1118 } 1119 1120 static int 1121 bd_write(dev_t dev, struct uio *uio, cred_t *credp) 1122 { 1123 _NOTE(ARGUNUSED(credp)); 1124 return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio)); 1125 } 1126 1127 static int 1128 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp) 1129 { 1130 _NOTE(ARGUNUSED(credp)); 1131 return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio)); 1132 } 1133 1134 static int 1135 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp) 1136 { 1137 _NOTE(ARGUNUSED(credp)); 1138 return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio)); 1139 } 1140 1141 static int 1142 bd_strategy(struct buf *bp) 1143 { 1144 minor_t inst; 1145 minor_t part; 1146 bd_t *bd; 1147 diskaddr_t p_lba; 1148 diskaddr_t p_nblks; 1149 diskaddr_t b_nblks; 1150 bd_xfer_impl_t *xi; 1151 uint32_t shift; 1152 int (*func)(void *, bd_xfer_t *); 1153 1154 part = BDPART(bp->b_edev); 1155 inst = BDINST(bp->b_edev); 1156 1157 ASSERT(bp); 1158 1159 bp->b_resid = bp->b_bcount; 1160 1161 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1162 bioerror(bp, ENXIO); 1163 biodone(bp); 1164 return (0); 1165 } 1166 1167 if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba, 1168 NULL, NULL, 0)) { 1169 bioerror(bp, ENXIO); 1170 biodone(bp); 1171 return (0); 1172 } 1173 1174 shift = bd->d_blkshift; 1175 1176 if ((P2PHASE(bp->b_bcount, (1U << shift)) != 0) || 1177 (bp->b_lblkno > p_nblks)) { 1178 bioerror(bp, ENXIO); 1179 biodone(bp); 1180 return (0); 1181 } 1182 b_nblks = bp->b_bcount >> shift; 1183 if ((bp->b_lblkno == p_nblks) || (bp->b_bcount == 0)) { 1184 biodone(bp); 1185 return (0); 1186 } 1187 1188 if ((b_nblks + bp->b_lblkno) > p_nblks) { 1189 bp->b_resid = ((bp->b_lblkno + b_nblks - p_nblks) << shift); 1190 bp->b_bcount -= bp->b_resid; 1191 } else { 1192 bp->b_resid = 0; 1193 } 1194 func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write; 1195 1196 xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP); 1197 if (xi == NULL) { 1198 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE); 1199 } 1200 if (xi == NULL) { 1201 /* bd_request_alloc will have done bioerror */ 1202 biodone(bp); 1203 return (0); 1204 } 1205 xi->i_blkno = bp->b_lblkno + p_lba; 1206 1207 bd_submit(bd, xi); 1208 1209 return (0); 1210 } 1211 1212 static int 1213 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp) 1214 { 1215 minor_t inst; 1216 uint16_t part; 1217 bd_t *bd; 1218 void *ptr = (void *)arg; 1219 int rv; 1220 1221 part = BDPART(dev); 1222 inst = BDINST(dev); 1223 1224 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1225 return (ENXIO); 1226 } 1227 1228 rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0); 1229 if (rv != ENOTTY) 1230 return (rv); 1231 1232 if (rvalp != NULL) { 1233 /* the return value of the ioctl is 0 by default */ 1234 *rvalp = 0; 1235 } 1236 1237 switch (cmd) { 1238 case DKIOCGMEDIAINFO: { 1239 struct dk_minfo minfo; 1240 1241 /* make sure our state information is current */ 1242 bd_update_state(bd); 1243 bzero(&minfo, sizeof (minfo)); 1244 minfo.dki_media_type = DK_FIXED_DISK; 1245 minfo.dki_lbsize = (1U << bd->d_blkshift); 1246 minfo.dki_capacity = bd->d_numblks; 1247 if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) { 1248 return (EFAULT); 1249 } 1250 return (0); 1251 } 1252 case DKIOCGMEDIAINFOEXT: { 1253 struct dk_minfo_ext miext; 1254 1255 /* make sure our state information is current */ 1256 bd_update_state(bd); 1257 bzero(&miext, sizeof (miext)); 1258 miext.dki_media_type = DK_FIXED_DISK; 1259 miext.dki_lbsize = (1U << bd->d_blkshift); 1260 miext.dki_pbsize = (1U << bd->d_pblkshift); 1261 miext.dki_capacity = bd->d_numblks; 1262 if (ddi_copyout(&miext, ptr, sizeof (miext), flag)) { 1263 return (EFAULT); 1264 } 1265 return (0); 1266 } 1267 case DKIOCINFO: { 1268 struct dk_cinfo cinfo; 1269 bzero(&cinfo, sizeof (cinfo)); 1270 cinfo.dki_ctype = DKC_BLKDEV; 1271 cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip)); 1272 (void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname), 1273 "%s", ddi_driver_name(ddi_get_parent(bd->d_dip))); 1274 (void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname), 1275 "%s", ddi_driver_name(bd->d_dip)); 1276 cinfo.dki_unit = inst; 1277 cinfo.dki_flags = DKI_FMTVOL; 1278 cinfo.dki_partition = part; 1279 cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE; 1280 cinfo.dki_addr = 0; 1281 cinfo.dki_slave = 0; 1282 cinfo.dki_space = 0; 1283 cinfo.dki_prio = 0; 1284 cinfo.dki_vec = 0; 1285 if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) { 1286 return (EFAULT); 1287 } 1288 return (0); 1289 } 1290 case DKIOCREMOVABLE: { 1291 int i; 1292 i = bd->d_removable ? 1 : 0; 1293 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1294 return (EFAULT); 1295 } 1296 return (0); 1297 } 1298 case DKIOCHOTPLUGGABLE: { 1299 int i; 1300 i = bd->d_hotpluggable ? 1 : 0; 1301 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1302 return (EFAULT); 1303 } 1304 return (0); 1305 } 1306 case DKIOCREADONLY: { 1307 int i; 1308 i = bd->d_rdonly ? 1 : 0; 1309 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1310 return (EFAULT); 1311 } 1312 return (0); 1313 } 1314 case DKIOCSOLIDSTATE: { 1315 int i; 1316 i = bd->d_ssd ? 1 : 0; 1317 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1318 return (EFAULT); 1319 } 1320 return (0); 1321 } 1322 case DKIOCSTATE: { 1323 enum dkio_state state; 1324 if (ddi_copyin(ptr, &state, sizeof (state), flag)) { 1325 return (EFAULT); 1326 } 1327 if ((rv = bd_check_state(bd, &state)) != 0) { 1328 return (rv); 1329 } 1330 if (ddi_copyout(&state, ptr, sizeof (state), flag)) { 1331 return (EFAULT); 1332 } 1333 return (0); 1334 } 1335 case DKIOCFLUSHWRITECACHE: { 1336 struct dk_callback *dkc = NULL; 1337 1338 if (flag & FKIOCTL) 1339 dkc = (void *)arg; 1340 1341 rv = bd_flush_write_cache(bd, dkc); 1342 return (rv); 1343 } 1344 1345 default: 1346 break; 1347 1348 } 1349 return (ENOTTY); 1350 } 1351 1352 static int 1353 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 1354 char *name, caddr_t valuep, int *lengthp) 1355 { 1356 bd_t *bd; 1357 1358 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1359 if (bd == NULL) 1360 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 1361 name, valuep, lengthp)); 1362 1363 return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name, 1364 valuep, lengthp, BDPART(dev), 0)); 1365 } 1366 1367 1368 static int 1369 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start, 1370 size_t length, void *tg_cookie) 1371 { 1372 bd_t *bd; 1373 buf_t *bp; 1374 bd_xfer_impl_t *xi; 1375 int rv; 1376 int (*func)(void *, bd_xfer_t *); 1377 int kmflag; 1378 1379 /* 1380 * If we are running in polled mode (such as during dump(9e) 1381 * execution), then we cannot sleep for kernel allocations. 1382 */ 1383 kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP; 1384 1385 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1386 1387 if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) { 1388 /* We can only transfer whole blocks at a time! */ 1389 return (EINVAL); 1390 } 1391 1392 if ((bp = getrbuf(kmflag)) == NULL) { 1393 return (ENOMEM); 1394 } 1395 1396 switch (cmd) { 1397 case TG_READ: 1398 bp->b_flags = B_READ; 1399 func = bd->d_ops.o_read; 1400 break; 1401 case TG_WRITE: 1402 bp->b_flags = B_WRITE; 1403 func = bd->d_ops.o_write; 1404 break; 1405 default: 1406 freerbuf(bp); 1407 return (EINVAL); 1408 } 1409 1410 bp->b_un.b_addr = bufaddr; 1411 bp->b_bcount = length; 1412 xi = bd_xfer_alloc(bd, bp, func, kmflag); 1413 if (xi == NULL) { 1414 rv = geterror(bp); 1415 freerbuf(bp); 1416 return (rv); 1417 } 1418 xi->i_flags = tg_cookie ? BD_XFER_POLL : 0; 1419 xi->i_blkno = start; 1420 bd_submit(bd, xi); 1421 (void) biowait(bp); 1422 rv = geterror(bp); 1423 freerbuf(bp); 1424 1425 return (rv); 1426 } 1427 1428 static int 1429 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 1430 { 1431 bd_t *bd; 1432 1433 _NOTE(ARGUNUSED(tg_cookie)); 1434 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1435 1436 switch (cmd) { 1437 case TG_GETPHYGEOM: 1438 case TG_GETVIRTGEOM: 1439 /* 1440 * We don't have any "geometry" as such, let cmlb 1441 * fabricate something. 1442 */ 1443 return (ENOTTY); 1444 1445 case TG_GETCAPACITY: 1446 bd_update_state(bd); 1447 *(diskaddr_t *)arg = bd->d_numblks; 1448 return (0); 1449 1450 case TG_GETBLOCKSIZE: 1451 *(uint32_t *)arg = (1U << bd->d_blkshift); 1452 return (0); 1453 1454 case TG_GETATTR: 1455 /* 1456 * It turns out that cmlb really doesn't do much for 1457 * non-writable media, but lets make the information 1458 * available for it in case it does more in the 1459 * future. (The value is currently used for 1460 * triggering special behavior for CD-ROMs.) 1461 */ 1462 bd_update_state(bd); 1463 ((tg_attribute_t *)arg)->media_is_writable = 1464 bd->d_rdonly ? B_FALSE : B_TRUE; 1465 ((tg_attribute_t *)arg)->media_is_solid_state = bd->d_ssd; 1466 return (0); 1467 1468 default: 1469 return (EINVAL); 1470 } 1471 } 1472 1473 1474 static void 1475 bd_sched(bd_t *bd) 1476 { 1477 bd_xfer_impl_t *xi; 1478 struct buf *bp; 1479 int rv; 1480 1481 mutex_enter(&bd->d_iomutex); 1482 1483 while ((bd->d_qactive < bd->d_qsize) && 1484 ((xi = list_remove_head(&bd->d_waitq)) != NULL)) { 1485 bd->d_qactive++; 1486 kstat_waitq_to_runq(bd->d_kiop); 1487 list_insert_tail(&bd->d_runq, xi); 1488 1489 /* 1490 * Submit the job to the driver. We drop the I/O mutex 1491 * so that we can deal with the case where the driver 1492 * completion routine calls back into us synchronously. 1493 */ 1494 1495 mutex_exit(&bd->d_iomutex); 1496 1497 rv = xi->i_func(bd->d_private, &xi->i_public); 1498 if (rv != 0) { 1499 bp = xi->i_bp; 1500 bioerror(bp, rv); 1501 biodone(bp); 1502 1503 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32); 1504 1505 mutex_enter(&bd->d_iomutex); 1506 bd->d_qactive--; 1507 kstat_runq_exit(bd->d_kiop); 1508 list_remove(&bd->d_runq, xi); 1509 bd_xfer_free(xi); 1510 } else { 1511 mutex_enter(&bd->d_iomutex); 1512 } 1513 } 1514 1515 mutex_exit(&bd->d_iomutex); 1516 } 1517 1518 static void 1519 bd_submit(bd_t *bd, bd_xfer_impl_t *xi) 1520 { 1521 mutex_enter(&bd->d_iomutex); 1522 list_insert_tail(&bd->d_waitq, xi); 1523 kstat_waitq_enter(bd->d_kiop); 1524 mutex_exit(&bd->d_iomutex); 1525 1526 bd_sched(bd); 1527 } 1528 1529 static void 1530 bd_runq_exit(bd_xfer_impl_t *xi, int err) 1531 { 1532 bd_t *bd = xi->i_bd; 1533 buf_t *bp = xi->i_bp; 1534 1535 mutex_enter(&bd->d_iomutex); 1536 bd->d_qactive--; 1537 kstat_runq_exit(bd->d_kiop); 1538 list_remove(&bd->d_runq, xi); 1539 mutex_exit(&bd->d_iomutex); 1540 1541 if (err == 0) { 1542 if (bp->b_flags & B_READ) { 1543 bd->d_kiop->reads++; 1544 bd->d_kiop->nread += (bp->b_bcount - xi->i_resid); 1545 } else { 1546 bd->d_kiop->writes++; 1547 bd->d_kiop->nwritten += (bp->b_bcount - xi->i_resid); 1548 } 1549 } 1550 bd_sched(bd); 1551 } 1552 1553 static void 1554 bd_update_state(bd_t *bd) 1555 { 1556 enum dkio_state state = DKIO_INSERTED; 1557 boolean_t docmlb = B_FALSE; 1558 bd_media_t media; 1559 1560 bzero(&media, sizeof (media)); 1561 1562 mutex_enter(&bd->d_statemutex); 1563 if (bd->d_ops.o_media_info(bd->d_private, &media) != 0) { 1564 bd->d_numblks = 0; 1565 state = DKIO_EJECTED; 1566 goto done; 1567 } 1568 1569 if ((media.m_blksize < 512) || 1570 (!ISP2(media.m_blksize)) || 1571 (P2PHASE(bd->d_maxxfer, media.m_blksize))) { 1572 cmn_err(CE_WARN, "%s%d: Invalid media block size (%d)", 1573 ddi_driver_name(bd->d_dip), ddi_get_instance(bd->d_dip), 1574 media.m_blksize); 1575 /* 1576 * We can't use the media, treat it as not present. 1577 */ 1578 state = DKIO_EJECTED; 1579 bd->d_numblks = 0; 1580 goto done; 1581 } 1582 1583 if (((1U << bd->d_blkshift) != media.m_blksize) || 1584 (bd->d_numblks != media.m_nblks)) { 1585 /* Device size changed */ 1586 docmlb = B_TRUE; 1587 } 1588 1589 bd->d_blkshift = ddi_ffs(media.m_blksize) - 1; 1590 bd->d_pblkshift = bd->d_blkshift; 1591 bd->d_numblks = media.m_nblks; 1592 bd->d_rdonly = media.m_readonly; 1593 bd->d_ssd = media.m_solidstate; 1594 1595 /* 1596 * Only use the supplied physical block size if it is non-zero, 1597 * greater or equal to the block size, and a power of 2. Ignore it 1598 * if not, it's just informational and we can still use the media. 1599 */ 1600 if ((media.m_pblksize != 0) && 1601 (media.m_pblksize >= media.m_blksize) && 1602 (ISP2(media.m_pblksize))) 1603 bd->d_pblkshift = ddi_ffs(media.m_pblksize) - 1; 1604 1605 done: 1606 if (state != bd->d_state) { 1607 bd->d_state = state; 1608 cv_broadcast(&bd->d_statecv); 1609 docmlb = B_TRUE; 1610 } 1611 mutex_exit(&bd->d_statemutex); 1612 1613 bd->d_kerr->bd_capacity.value.ui64 = bd->d_numblks << bd->d_blkshift; 1614 1615 if (docmlb) { 1616 if (state == DKIO_INSERTED) { 1617 (void) cmlb_validate(bd->d_cmlbh, 0, 0); 1618 } else { 1619 cmlb_invalidate(bd->d_cmlbh, 0); 1620 } 1621 } 1622 } 1623 1624 static int 1625 bd_check_state(bd_t *bd, enum dkio_state *state) 1626 { 1627 clock_t when; 1628 1629 for (;;) { 1630 1631 bd_update_state(bd); 1632 1633 mutex_enter(&bd->d_statemutex); 1634 1635 if (bd->d_state != *state) { 1636 *state = bd->d_state; 1637 mutex_exit(&bd->d_statemutex); 1638 break; 1639 } 1640 1641 when = drv_usectohz(1000000); 1642 if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex, 1643 when, TR_CLOCK_TICK) == 0) { 1644 mutex_exit(&bd->d_statemutex); 1645 return (EINTR); 1646 } 1647 1648 mutex_exit(&bd->d_statemutex); 1649 } 1650 1651 return (0); 1652 } 1653 1654 static int 1655 bd_flush_write_cache_done(struct buf *bp) 1656 { 1657 struct dk_callback *dc = (void *)bp->b_private; 1658 1659 (*dc->dkc_callback)(dc->dkc_cookie, geterror(bp)); 1660 kmem_free(dc, sizeof (*dc)); 1661 freerbuf(bp); 1662 return (0); 1663 } 1664 1665 static int 1666 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc) 1667 { 1668 buf_t *bp; 1669 struct dk_callback *dc; 1670 bd_xfer_impl_t *xi; 1671 int rv; 1672 1673 if (bd->d_ops.o_sync_cache == NULL) { 1674 return (ENOTSUP); 1675 } 1676 if ((bp = getrbuf(KM_SLEEP)) == NULL) { 1677 return (ENOMEM); 1678 } 1679 bp->b_resid = 0; 1680 bp->b_bcount = 0; 1681 1682 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP); 1683 if (xi == NULL) { 1684 rv = geterror(bp); 1685 freerbuf(bp); 1686 return (rv); 1687 } 1688 1689 /* Make an asynchronous flush, but only if there is a callback */ 1690 if (dkc != NULL && dkc->dkc_callback != NULL) { 1691 /* Make a private copy of the callback structure */ 1692 dc = kmem_alloc(sizeof (*dc), KM_SLEEP); 1693 *dc = *dkc; 1694 bp->b_private = dc; 1695 bp->b_iodone = bd_flush_write_cache_done; 1696 1697 bd_submit(bd, xi); 1698 return (0); 1699 } 1700 1701 /* In case there is no callback, perform a synchronous flush */ 1702 bd_submit(bd, xi); 1703 (void) biowait(bp); 1704 rv = geterror(bp); 1705 freerbuf(bp); 1706 1707 return (rv); 1708 } 1709 1710 /* 1711 * Nexus support. 1712 */ 1713 int 1714 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop, 1715 void *arg, void *result) 1716 { 1717 bd_handle_t hdl; 1718 1719 switch (ctlop) { 1720 case DDI_CTLOPS_REPORTDEV: 1721 cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n", 1722 ddi_node_name(rdip), ddi_get_name_addr(rdip), 1723 ddi_driver_name(rdip), ddi_get_instance(rdip)); 1724 return (DDI_SUCCESS); 1725 1726 case DDI_CTLOPS_INITCHILD: 1727 hdl = ddi_get_parent_data((dev_info_t *)arg); 1728 if (hdl == NULL) { 1729 return (DDI_NOT_WELL_FORMED); 1730 } 1731 ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr); 1732 return (DDI_SUCCESS); 1733 1734 case DDI_CTLOPS_UNINITCHILD: 1735 ddi_set_name_addr((dev_info_t *)arg, NULL); 1736 ndi_prop_remove_all((dev_info_t *)arg); 1737 return (DDI_SUCCESS); 1738 1739 default: 1740 return (ddi_ctlops(dip, rdip, ctlop, arg, result)); 1741 } 1742 } 1743 1744 /* 1745 * Functions for device drivers. 1746 */ 1747 bd_handle_t 1748 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag) 1749 { 1750 bd_handle_t hdl; 1751 1752 hdl = kmem_zalloc(sizeof (*hdl), kmflag); 1753 if (hdl != NULL) { 1754 hdl->h_ops = *ops; 1755 hdl->h_dma = dma; 1756 hdl->h_private = private; 1757 } 1758 1759 return (hdl); 1760 } 1761 1762 void 1763 bd_free_handle(bd_handle_t hdl) 1764 { 1765 kmem_free(hdl, sizeof (*hdl)); 1766 } 1767 1768 int 1769 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl) 1770 { 1771 dev_info_t *child; 1772 bd_drive_t drive = { 0 }; 1773 1774 /* if drivers don't override this, make it assume none */ 1775 drive.d_lun = -1; 1776 hdl->h_ops.o_drive_info(hdl->h_private, &drive); 1777 1778 hdl->h_parent = dip; 1779 hdl->h_name = "blkdev"; 1780 1781 if (drive.d_lun >= 0) { 1782 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), "%X,%X", 1783 drive.d_target, drive.d_lun); 1784 } else { 1785 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), "%X", 1786 drive.d_target); 1787 } 1788 if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID, 1789 &child) != NDI_SUCCESS) { 1790 cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s", 1791 ddi_driver_name(dip), ddi_get_instance(dip), 1792 "blkdev", hdl->h_addr); 1793 return (DDI_FAILURE); 1794 } 1795 1796 ddi_set_parent_data(child, hdl); 1797 hdl->h_child = child; 1798 1799 if (ndi_devi_online(child, 0) == NDI_FAILURE) { 1800 cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online", 1801 ddi_driver_name(dip), ddi_get_instance(dip), 1802 hdl->h_name, hdl->h_addr); 1803 (void) ndi_devi_free(child); 1804 return (DDI_FAILURE); 1805 } 1806 1807 return (DDI_SUCCESS); 1808 } 1809 1810 int 1811 bd_detach_handle(bd_handle_t hdl) 1812 { 1813 int circ; 1814 int rv; 1815 char *devnm; 1816 1817 if (hdl->h_child == NULL) { 1818 return (DDI_SUCCESS); 1819 } 1820 ndi_devi_enter(hdl->h_parent, &circ); 1821 if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) { 1822 rv = ddi_remove_child(hdl->h_child, 0); 1823 } else { 1824 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); 1825 (void) ddi_deviname(hdl->h_child, devnm); 1826 (void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE); 1827 rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL, 1828 NDI_DEVI_REMOVE | NDI_UNCONFIG); 1829 kmem_free(devnm, MAXNAMELEN + 1); 1830 } 1831 if (rv == 0) { 1832 hdl->h_child = NULL; 1833 } 1834 1835 ndi_devi_exit(hdl->h_parent, circ); 1836 return (rv == NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE); 1837 } 1838 1839 void 1840 bd_xfer_done(bd_xfer_t *xfer, int err) 1841 { 1842 bd_xfer_impl_t *xi = (void *)xfer; 1843 buf_t *bp = xi->i_bp; 1844 int rv = DDI_SUCCESS; 1845 bd_t *bd = xi->i_bd; 1846 size_t len; 1847 1848 if (err != 0) { 1849 bd_runq_exit(xi, err); 1850 atomic_inc_32(&bd->d_kerr->bd_harderrs.value.ui32); 1851 1852 bp->b_resid += xi->i_resid; 1853 bd_xfer_free(xi); 1854 bioerror(bp, err); 1855 biodone(bp); 1856 return; 1857 } 1858 1859 xi->i_cur_win++; 1860 xi->i_resid -= xi->i_len; 1861 1862 if (xi->i_resid == 0) { 1863 /* Job completed succcessfully! */ 1864 bd_runq_exit(xi, 0); 1865 1866 bd_xfer_free(xi); 1867 biodone(bp); 1868 return; 1869 } 1870 1871 xi->i_blkno += xi->i_nblks; 1872 1873 if (bd->d_use_dma) { 1874 /* More transfer still pending... advance to next DMA window. */ 1875 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win, 1876 &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac); 1877 } else { 1878 /* Advance memory window. */ 1879 xi->i_kaddr += xi->i_len; 1880 xi->i_offset += xi->i_len; 1881 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer); 1882 } 1883 1884 1885 if ((rv != DDI_SUCCESS) || 1886 (P2PHASE(len, (1U << xi->i_blkshift) != 0))) { 1887 bd_runq_exit(xi, EFAULT); 1888 1889 bp->b_resid += xi->i_resid; 1890 bd_xfer_free(xi); 1891 bioerror(bp, EFAULT); 1892 biodone(bp); 1893 return; 1894 } 1895 xi->i_len = len; 1896 xi->i_nblks = len >> xi->i_blkshift; 1897 1898 /* Submit next window to hardware. */ 1899 rv = xi->i_func(bd->d_private, &xi->i_public); 1900 if (rv != 0) { 1901 bd_runq_exit(xi, rv); 1902 1903 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32); 1904 1905 bp->b_resid += xi->i_resid; 1906 bd_xfer_free(xi); 1907 bioerror(bp, rv); 1908 biodone(bp); 1909 } 1910 } 1911 1912 void 1913 bd_error(bd_xfer_t *xfer, int error) 1914 { 1915 bd_xfer_impl_t *xi = (void *)xfer; 1916 bd_t *bd = xi->i_bd; 1917 1918 switch (error) { 1919 case BD_ERR_MEDIA: 1920 atomic_inc_32(&bd->d_kerr->bd_rq_media_err.value.ui32); 1921 break; 1922 case BD_ERR_NTRDY: 1923 atomic_inc_32(&bd->d_kerr->bd_rq_ntrdy_err.value.ui32); 1924 break; 1925 case BD_ERR_NODEV: 1926 atomic_inc_32(&bd->d_kerr->bd_rq_nodev_err.value.ui32); 1927 break; 1928 case BD_ERR_RECOV: 1929 atomic_inc_32(&bd->d_kerr->bd_rq_recov_err.value.ui32); 1930 break; 1931 case BD_ERR_ILLRQ: 1932 atomic_inc_32(&bd->d_kerr->bd_rq_illrq_err.value.ui32); 1933 break; 1934 case BD_ERR_PFA: 1935 atomic_inc_32(&bd->d_kerr->bd_rq_pfa_err.value.ui32); 1936 break; 1937 default: 1938 cmn_err(CE_PANIC, "bd_error: unknown error type %d", error); 1939 break; 1940 } 1941 } 1942 1943 void 1944 bd_state_change(bd_handle_t hdl) 1945 { 1946 bd_t *bd; 1947 1948 if ((bd = hdl->h_bd) != NULL) { 1949 bd_update_state(bd); 1950 } 1951 } 1952 1953 void 1954 bd_mod_init(struct dev_ops *devops) 1955 { 1956 static struct bus_ops bd_bus_ops = { 1957 BUSO_REV, /* busops_rev */ 1958 nullbusmap, /* bus_map */ 1959 NULL, /* bus_get_intrspec (OBSOLETE) */ 1960 NULL, /* bus_add_intrspec (OBSOLETE) */ 1961 NULL, /* bus_remove_intrspec (OBSOLETE) */ 1962 i_ddi_map_fault, /* bus_map_fault */ 1963 NULL, /* bus_dma_map (OBSOLETE) */ 1964 ddi_dma_allochdl, /* bus_dma_allochdl */ 1965 ddi_dma_freehdl, /* bus_dma_freehdl */ 1966 ddi_dma_bindhdl, /* bus_dma_bindhdl */ 1967 ddi_dma_unbindhdl, /* bus_dma_unbindhdl */ 1968 ddi_dma_flush, /* bus_dma_flush */ 1969 ddi_dma_win, /* bus_dma_win */ 1970 ddi_dma_mctl, /* bus_dma_ctl */ 1971 bd_bus_ctl, /* bus_ctl */ 1972 ddi_bus_prop_op, /* bus_prop_op */ 1973 NULL, /* bus_get_eventcookie */ 1974 NULL, /* bus_add_eventcall */ 1975 NULL, /* bus_remove_eventcall */ 1976 NULL, /* bus_post_event */ 1977 NULL, /* bus_intr_ctl (OBSOLETE) */ 1978 NULL, /* bus_config */ 1979 NULL, /* bus_unconfig */ 1980 NULL, /* bus_fm_init */ 1981 NULL, /* bus_fm_fini */ 1982 NULL, /* bus_fm_access_enter */ 1983 NULL, /* bus_fm_access_exit */ 1984 NULL, /* bus_power */ 1985 NULL, /* bus_intr_op */ 1986 }; 1987 1988 devops->devo_bus_ops = &bd_bus_ops; 1989 1990 /* 1991 * NB: The device driver is free to supply its own 1992 * character entry device support. 1993 */ 1994 } 1995 1996 void 1997 bd_mod_fini(struct dev_ops *devops) 1998 { 1999 devops->devo_bus_ops = NULL; 2000 } 2001