1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved. 24 * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved. 25 * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 26 * Copyright 2017 The MathWorks, Inc. All rights reserved. 27 * Copyright 2019 Western Digital Corporation. 28 * Copyright 2020 Joyent, Inc. 29 */ 30 31 #include <sys/types.h> 32 #include <sys/ksynch.h> 33 #include <sys/kmem.h> 34 #include <sys/file.h> 35 #include <sys/errno.h> 36 #include <sys/open.h> 37 #include <sys/buf.h> 38 #include <sys/uio.h> 39 #include <sys/aio_req.h> 40 #include <sys/cred.h> 41 #include <sys/modctl.h> 42 #include <sys/cmlb.h> 43 #include <sys/conf.h> 44 #include <sys/devops.h> 45 #include <sys/list.h> 46 #include <sys/sysmacros.h> 47 #include <sys/dkio.h> 48 #include <sys/vtoc.h> 49 #include <sys/scsi/scsi.h> /* for DTYPE_DIRECT */ 50 #include <sys/kstat.h> 51 #include <sys/fs/dv_node.h> 52 #include <sys/ddi.h> 53 #include <sys/sunddi.h> 54 #include <sys/note.h> 55 #include <sys/blkdev.h> 56 #include <sys/scsi/impl/inquiry.h> 57 58 /* 59 * blkdev is a driver which provides a lot of the common functionality 60 * a block device driver may need and helps by removing code which 61 * is frequently duplicated in block device drivers. 62 * 63 * Within this driver all the struct cb_ops functions required for a 64 * block device driver are written with appropriate call back functions 65 * to be provided by the parent driver. 66 * 67 * To use blkdev, a driver needs to: 68 * 1. Create a bd_ops_t structure which has the call back operations 69 * blkdev will use. 70 * 2. Create a handle by calling bd_alloc_handle(). One of the 71 * arguments to this function is the bd_ops_t. 72 * 3. Call bd_attach_handle(). This will instantiate a blkdev device 73 * as a child device node of the calling driver. 74 * 75 * A parent driver is not restricted to just allocating and attaching a 76 * single instance, it may attach as many as it wishes. For each handle 77 * attached, appropriate entries in /dev/[r]dsk are created. 78 * 79 * The bd_ops_t routines that a parent of blkdev need to provide are: 80 * 81 * o_drive_info: Provide information to blkdev such as how many I/O queues 82 * to create and the size of those queues. Also some device 83 * specifics such as EUI, vendor, product, model, serial 84 * number .... 85 * 86 * o_media_info: Provide information about the media. Eg size and block size. 87 * 88 * o_devid_init: Creates and initializes the device id. Typically calls 89 * ddi_devid_init(). 90 * 91 * o_sync_cache: Issues a device appropriate command to flush any write 92 * caches. 93 * 94 * o_read: Read data as described by bd_xfer_t argument. 95 * 96 * o_write: Write data as described by bd_xfer_t argument. 97 * 98 * 99 * Queues 100 * ------ 101 * Part of the drive_info data is a queue count. blkdev will create 102 * "queue count" number of waitq/runq pairs. Each waitq/runq pair 103 * operates independently. As an I/O is scheduled up to the parent 104 * driver via o_read or o_write its queue number is given. If the 105 * parent driver supports multiple hardware queues it can then select 106 * where to submit the I/O request. 107 * 108 * Currently blkdev uses a simplistic round-robin queue selection method. 109 * It has the advantage that it is lockless. In the future it will be 110 * worthwhile reviewing this strategy for something which prioritizes queues 111 * depending on how busy they are. 112 * 113 * Each waitq/runq pair is protected by its mutex (q_iomutex). Incoming 114 * I/O requests are initially added to the waitq. They are taken off the 115 * waitq, added to the runq and submitted, providing the runq is less 116 * than the qsize as specified in the drive_info. As an I/O request 117 * completes, the parent driver is required to call bd_xfer_done(), which 118 * will remove the I/O request from the runq and pass I/O completion 119 * status up the stack. 120 * 121 * Locks 122 * ----- 123 * There are 4 instance global locks d_ocmutex, d_ksmutex, d_errmutex and 124 * d_statemutex. As well a q_iomutex per waitq/runq pair. 125 * 126 * Lock Hierarchy 127 * -------------- 128 * The only two locks which may be held simultaneously are q_iomutex and 129 * d_ksmutex. In all cases q_iomutex must be acquired before d_ksmutex. 130 */ 131 132 #define BD_MAXPART 64 133 #define BDINST(dev) (getminor(dev) / BD_MAXPART) 134 #define BDPART(dev) (getminor(dev) % BD_MAXPART) 135 136 typedef struct bd bd_t; 137 typedef struct bd_xfer_impl bd_xfer_impl_t; 138 typedef struct bd_queue bd_queue_t; 139 140 struct bd { 141 void *d_private; 142 dev_info_t *d_dip; 143 kmutex_t d_ocmutex; 144 kmutex_t d_ksmutex; 145 kmutex_t d_errmutex; 146 kmutex_t d_statemutex; 147 kcondvar_t d_statecv; 148 enum dkio_state d_state; 149 cmlb_handle_t d_cmlbh; 150 unsigned d_open_lyr[BD_MAXPART]; /* open count */ 151 uint64_t d_open_excl; /* bit mask indexed by partition */ 152 uint64_t d_open_reg[OTYPCNT]; /* bit mask */ 153 uint64_t d_io_counter; 154 155 uint32_t d_qcount; 156 uint32_t d_qactive; 157 uint32_t d_maxxfer; 158 uint32_t d_blkshift; 159 uint32_t d_pblkshift; 160 uint64_t d_numblks; 161 ddi_devid_t d_devid; 162 163 kmem_cache_t *d_cache; 164 bd_queue_t *d_queues; 165 kstat_t *d_ksp; 166 kstat_io_t *d_kiop; 167 kstat_t *d_errstats; 168 struct bd_errstats *d_kerr; 169 170 boolean_t d_rdonly; 171 boolean_t d_ssd; 172 boolean_t d_removable; 173 boolean_t d_hotpluggable; 174 boolean_t d_use_dma; 175 176 ddi_dma_attr_t d_dma; 177 bd_ops_t d_ops; 178 bd_handle_t d_handle; 179 }; 180 181 struct bd_handle { 182 bd_ops_t h_ops; 183 ddi_dma_attr_t *h_dma; 184 dev_info_t *h_parent; 185 dev_info_t *h_child; 186 void *h_private; 187 bd_t *h_bd; 188 char *h_name; 189 char h_addr[30]; /* enough for w%0.16x,%X */ 190 }; 191 192 struct bd_xfer_impl { 193 bd_xfer_t i_public; 194 list_node_t i_linkage; 195 bd_t *i_bd; 196 buf_t *i_bp; 197 bd_queue_t *i_bq; 198 uint_t i_num_win; 199 uint_t i_cur_win; 200 off_t i_offset; 201 int (*i_func)(void *, bd_xfer_t *); 202 uint32_t i_blkshift; 203 size_t i_len; 204 size_t i_resid; 205 }; 206 207 struct bd_queue { 208 kmutex_t q_iomutex; 209 uint32_t q_qsize; 210 uint32_t q_qactive; 211 list_t q_runq; 212 list_t q_waitq; 213 }; 214 215 #define i_dmah i_public.x_dmah 216 #define i_dmac i_public.x_dmac 217 #define i_ndmac i_public.x_ndmac 218 #define i_kaddr i_public.x_kaddr 219 #define i_nblks i_public.x_nblks 220 #define i_blkno i_public.x_blkno 221 #define i_flags i_public.x_flags 222 #define i_qnum i_public.x_qnum 223 224 225 /* 226 * Private prototypes. 227 */ 228 229 static void bd_prop_update_inqstring(dev_info_t *, char *, char *, size_t); 230 static void bd_create_inquiry_props(dev_info_t *, bd_drive_t *); 231 static void bd_create_errstats(bd_t *, int, bd_drive_t *); 232 static void bd_destroy_errstats(bd_t *); 233 static void bd_errstats_setstr(kstat_named_t *, char *, size_t, char *); 234 static void bd_init_errstats(bd_t *, bd_drive_t *); 235 static void bd_fini_errstats(bd_t *); 236 237 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 238 static int bd_attach(dev_info_t *, ddi_attach_cmd_t); 239 static int bd_detach(dev_info_t *, ddi_detach_cmd_t); 240 241 static int bd_open(dev_t *, int, int, cred_t *); 242 static int bd_close(dev_t, int, int, cred_t *); 243 static int bd_strategy(struct buf *); 244 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 245 static int bd_dump(dev_t, caddr_t, daddr_t, int); 246 static int bd_read(dev_t, struct uio *, cred_t *); 247 static int bd_write(dev_t, struct uio *, cred_t *); 248 static int bd_aread(dev_t, struct aio_req *, cred_t *); 249 static int bd_awrite(dev_t, struct aio_req *, cred_t *); 250 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, 251 caddr_t, int *); 252 253 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, 254 void *); 255 static int bd_tg_getinfo(dev_info_t *, int, void *, void *); 256 static int bd_xfer_ctor(void *, void *, int); 257 static void bd_xfer_dtor(void *, void *); 258 static void bd_sched(bd_t *, bd_queue_t *); 259 static void bd_submit(bd_t *, bd_xfer_impl_t *); 260 static void bd_runq_exit(bd_xfer_impl_t *, int); 261 static void bd_update_state(bd_t *); 262 static int bd_check_state(bd_t *, enum dkio_state *); 263 static int bd_flush_write_cache(bd_t *, struct dk_callback *); 264 static int bd_check_uio(dev_t, struct uio *); 265 266 struct cmlb_tg_ops bd_tg_ops = { 267 TG_DK_OPS_VERSION_1, 268 bd_tg_rdwr, 269 bd_tg_getinfo, 270 }; 271 272 static struct cb_ops bd_cb_ops = { 273 bd_open, /* open */ 274 bd_close, /* close */ 275 bd_strategy, /* strategy */ 276 nodev, /* print */ 277 bd_dump, /* dump */ 278 bd_read, /* read */ 279 bd_write, /* write */ 280 bd_ioctl, /* ioctl */ 281 nodev, /* devmap */ 282 nodev, /* mmap */ 283 nodev, /* segmap */ 284 nochpoll, /* poll */ 285 bd_prop_op, /* cb_prop_op */ 286 0, /* streamtab */ 287 D_64BIT | D_MP, /* Driver comaptibility flag */ 288 CB_REV, /* cb_rev */ 289 bd_aread, /* async read */ 290 bd_awrite /* async write */ 291 }; 292 293 struct dev_ops bd_dev_ops = { 294 DEVO_REV, /* devo_rev, */ 295 0, /* refcnt */ 296 bd_getinfo, /* getinfo */ 297 nulldev, /* identify */ 298 nulldev, /* probe */ 299 bd_attach, /* attach */ 300 bd_detach, /* detach */ 301 nodev, /* reset */ 302 &bd_cb_ops, /* driver operations */ 303 NULL, /* bus operations */ 304 NULL, /* power */ 305 ddi_quiesce_not_needed, /* quiesce */ 306 }; 307 308 static struct modldrv modldrv = { 309 &mod_driverops, 310 "Generic Block Device", 311 &bd_dev_ops, 312 }; 313 314 static struct modlinkage modlinkage = { 315 MODREV_1, { &modldrv, NULL } 316 }; 317 318 static void *bd_state; 319 static krwlock_t bd_lock; 320 321 int 322 _init(void) 323 { 324 int rv; 325 326 rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2); 327 if (rv != DDI_SUCCESS) { 328 return (rv); 329 } 330 rw_init(&bd_lock, NULL, RW_DRIVER, NULL); 331 rv = mod_install(&modlinkage); 332 if (rv != DDI_SUCCESS) { 333 rw_destroy(&bd_lock); 334 ddi_soft_state_fini(&bd_state); 335 } 336 return (rv); 337 } 338 339 int 340 _fini(void) 341 { 342 int rv; 343 344 rv = mod_remove(&modlinkage); 345 if (rv == DDI_SUCCESS) { 346 rw_destroy(&bd_lock); 347 ddi_soft_state_fini(&bd_state); 348 } 349 return (rv); 350 } 351 352 int 353 _info(struct modinfo *modinfop) 354 { 355 return (mod_info(&modlinkage, modinfop)); 356 } 357 358 static int 359 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 360 { 361 bd_t *bd; 362 minor_t inst; 363 364 _NOTE(ARGUNUSED(dip)); 365 366 inst = BDINST((dev_t)arg); 367 368 switch (cmd) { 369 case DDI_INFO_DEVT2DEVINFO: 370 bd = ddi_get_soft_state(bd_state, inst); 371 if (bd == NULL) { 372 return (DDI_FAILURE); 373 } 374 *resultp = (void *)bd->d_dip; 375 break; 376 377 case DDI_INFO_DEVT2INSTANCE: 378 *resultp = (void *)(intptr_t)inst; 379 break; 380 381 default: 382 return (DDI_FAILURE); 383 } 384 return (DDI_SUCCESS); 385 } 386 387 static void 388 bd_prop_update_inqstring(dev_info_t *dip, char *name, char *data, size_t len) 389 { 390 int ilen; 391 char *data_string; 392 393 ilen = scsi_ascii_inquiry_len(data, len); 394 ASSERT3U(ilen, <=, len); 395 if (ilen <= 0) 396 return; 397 /* ensure null termination */ 398 data_string = kmem_zalloc(ilen + 1, KM_SLEEP); 399 bcopy(data, data_string, ilen); 400 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, name, data_string); 401 kmem_free(data_string, ilen + 1); 402 } 403 404 static void 405 bd_create_inquiry_props(dev_info_t *dip, bd_drive_t *drive) 406 { 407 if (drive->d_vendor_len > 0) 408 bd_prop_update_inqstring(dip, INQUIRY_VENDOR_ID, 409 drive->d_vendor, drive->d_vendor_len); 410 411 if (drive->d_product_len > 0) 412 bd_prop_update_inqstring(dip, INQUIRY_PRODUCT_ID, 413 drive->d_product, drive->d_product_len); 414 415 if (drive->d_serial_len > 0) 416 bd_prop_update_inqstring(dip, INQUIRY_SERIAL_NO, 417 drive->d_serial, drive->d_serial_len); 418 419 if (drive->d_revision_len > 0) 420 bd_prop_update_inqstring(dip, INQUIRY_REVISION_ID, 421 drive->d_revision, drive->d_revision_len); 422 } 423 424 static void 425 bd_create_errstats(bd_t *bd, int inst, bd_drive_t *drive) 426 { 427 char ks_module[KSTAT_STRLEN]; 428 char ks_name[KSTAT_STRLEN]; 429 int ndata = sizeof (struct bd_errstats) / sizeof (kstat_named_t); 430 431 if (bd->d_errstats != NULL) 432 return; 433 434 (void) snprintf(ks_module, sizeof (ks_module), "%serr", 435 ddi_driver_name(bd->d_dip)); 436 (void) snprintf(ks_name, sizeof (ks_name), "%s%d,err", 437 ddi_driver_name(bd->d_dip), inst); 438 439 bd->d_errstats = kstat_create(ks_module, inst, ks_name, "device_error", 440 KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT); 441 442 mutex_init(&bd->d_errmutex, NULL, MUTEX_DRIVER, NULL); 443 if (bd->d_errstats == NULL) { 444 /* 445 * Even if we cannot create the kstat, we create a 446 * scratch kstat. The reason for this is to ensure 447 * that we can update the kstat all of the time, 448 * without adding an extra branch instruction. 449 */ 450 bd->d_kerr = kmem_zalloc(sizeof (struct bd_errstats), 451 KM_SLEEP); 452 } else { 453 bd->d_errstats->ks_lock = &bd->d_errmutex; 454 bd->d_kerr = (struct bd_errstats *)bd->d_errstats->ks_data; 455 } 456 457 kstat_named_init(&bd->d_kerr->bd_softerrs, "Soft Errors", 458 KSTAT_DATA_UINT32); 459 kstat_named_init(&bd->d_kerr->bd_harderrs, "Hard Errors", 460 KSTAT_DATA_UINT32); 461 kstat_named_init(&bd->d_kerr->bd_transerrs, "Transport Errors", 462 KSTAT_DATA_UINT32); 463 464 if (drive->d_model_len > 0) { 465 kstat_named_init(&bd->d_kerr->bd_model, "Model", 466 KSTAT_DATA_STRING); 467 } else { 468 kstat_named_init(&bd->d_kerr->bd_vid, "Vendor", 469 KSTAT_DATA_STRING); 470 kstat_named_init(&bd->d_kerr->bd_pid, "Product", 471 KSTAT_DATA_STRING); 472 } 473 474 kstat_named_init(&bd->d_kerr->bd_revision, "Revision", 475 KSTAT_DATA_STRING); 476 kstat_named_init(&bd->d_kerr->bd_serial, "Serial No", 477 KSTAT_DATA_STRING); 478 kstat_named_init(&bd->d_kerr->bd_capacity, "Size", 479 KSTAT_DATA_ULONGLONG); 480 kstat_named_init(&bd->d_kerr->bd_rq_media_err, "Media Error", 481 KSTAT_DATA_UINT32); 482 kstat_named_init(&bd->d_kerr->bd_rq_ntrdy_err, "Device Not Ready", 483 KSTAT_DATA_UINT32); 484 kstat_named_init(&bd->d_kerr->bd_rq_nodev_err, "No Device", 485 KSTAT_DATA_UINT32); 486 kstat_named_init(&bd->d_kerr->bd_rq_recov_err, "Recoverable", 487 KSTAT_DATA_UINT32); 488 kstat_named_init(&bd->d_kerr->bd_rq_illrq_err, "Illegal Request", 489 KSTAT_DATA_UINT32); 490 kstat_named_init(&bd->d_kerr->bd_rq_pfa_err, 491 "Predictive Failure Analysis", KSTAT_DATA_UINT32); 492 493 bd->d_errstats->ks_private = bd; 494 495 kstat_install(bd->d_errstats); 496 bd_init_errstats(bd, drive); 497 } 498 499 static void 500 bd_destroy_errstats(bd_t *bd) 501 { 502 if (bd->d_errstats != NULL) { 503 bd_fini_errstats(bd); 504 kstat_delete(bd->d_errstats); 505 bd->d_errstats = NULL; 506 } else { 507 kmem_free(bd->d_kerr, sizeof (struct bd_errstats)); 508 bd->d_kerr = NULL; 509 mutex_destroy(&bd->d_errmutex); 510 } 511 } 512 513 static void 514 bd_errstats_setstr(kstat_named_t *k, char *str, size_t len, char *alt) 515 { 516 char *tmp; 517 size_t km_len; 518 519 if (KSTAT_NAMED_STR_PTR(k) == NULL) { 520 if (len > 0) 521 km_len = strnlen(str, len); 522 else if (alt != NULL) 523 km_len = strlen(alt); 524 else 525 return; 526 527 tmp = kmem_alloc(km_len + 1, KM_SLEEP); 528 bcopy(len > 0 ? str : alt, tmp, km_len); 529 tmp[km_len] = '\0'; 530 531 kstat_named_setstr(k, tmp); 532 } 533 } 534 535 static void 536 bd_errstats_clrstr(kstat_named_t *k) 537 { 538 if (KSTAT_NAMED_STR_PTR(k) == NULL) 539 return; 540 541 kmem_free(KSTAT_NAMED_STR_PTR(k), KSTAT_NAMED_STR_BUFLEN(k)); 542 kstat_named_setstr(k, NULL); 543 } 544 545 static void 546 bd_init_errstats(bd_t *bd, bd_drive_t *drive) 547 { 548 struct bd_errstats *est = bd->d_kerr; 549 550 mutex_enter(&bd->d_errmutex); 551 552 if (drive->d_model_len > 0 && 553 KSTAT_NAMED_STR_PTR(&est->bd_model) == NULL) { 554 bd_errstats_setstr(&est->bd_model, drive->d_model, 555 drive->d_model_len, NULL); 556 } else { 557 bd_errstats_setstr(&est->bd_vid, drive->d_vendor, 558 drive->d_vendor_len, "Unknown "); 559 bd_errstats_setstr(&est->bd_pid, drive->d_product, 560 drive->d_product_len, "Unknown "); 561 } 562 563 bd_errstats_setstr(&est->bd_revision, drive->d_revision, 564 drive->d_revision_len, "0001"); 565 bd_errstats_setstr(&est->bd_serial, drive->d_serial, 566 drive->d_serial_len, "0 "); 567 568 mutex_exit(&bd->d_errmutex); 569 } 570 571 static void 572 bd_fini_errstats(bd_t *bd) 573 { 574 struct bd_errstats *est = bd->d_kerr; 575 576 mutex_enter(&bd->d_errmutex); 577 578 bd_errstats_clrstr(&est->bd_model); 579 bd_errstats_clrstr(&est->bd_vid); 580 bd_errstats_clrstr(&est->bd_pid); 581 bd_errstats_clrstr(&est->bd_revision); 582 bd_errstats_clrstr(&est->bd_serial); 583 584 mutex_exit(&bd->d_errmutex); 585 } 586 587 static void 588 bd_queues_free(bd_t *bd) 589 { 590 uint32_t i; 591 592 for (i = 0; i < bd->d_qcount; i++) { 593 bd_queue_t *bq = &bd->d_queues[i]; 594 595 mutex_destroy(&bq->q_iomutex); 596 list_destroy(&bq->q_waitq); 597 list_destroy(&bq->q_runq); 598 } 599 600 kmem_free(bd->d_queues, sizeof (*bd->d_queues) * bd->d_qcount); 601 } 602 603 static int 604 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 605 { 606 int inst; 607 bd_handle_t hdl; 608 bd_t *bd; 609 bd_drive_t drive; 610 uint32_t i; 611 int rv; 612 char name[16]; 613 char kcache[32]; 614 615 switch (cmd) { 616 case DDI_ATTACH: 617 break; 618 case DDI_RESUME: 619 /* We don't do anything native for suspend/resume */ 620 return (DDI_SUCCESS); 621 default: 622 return (DDI_FAILURE); 623 } 624 625 inst = ddi_get_instance(dip); 626 hdl = ddi_get_parent_data(dip); 627 628 (void) snprintf(name, sizeof (name), "%s%d", 629 ddi_driver_name(dip), ddi_get_instance(dip)); 630 (void) snprintf(kcache, sizeof (kcache), "%s_xfer", name); 631 632 if (hdl == NULL) { 633 cmn_err(CE_WARN, "%s: missing parent data!", name); 634 return (DDI_FAILURE); 635 } 636 637 if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) { 638 cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name); 639 return (DDI_FAILURE); 640 } 641 bd = ddi_get_soft_state(bd_state, inst); 642 643 if (hdl->h_dma) { 644 bd->d_dma = *(hdl->h_dma); 645 bd->d_dma.dma_attr_granular = 646 max(DEV_BSIZE, bd->d_dma.dma_attr_granular); 647 bd->d_use_dma = B_TRUE; 648 649 if (bd->d_maxxfer && 650 (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) { 651 cmn_err(CE_WARN, 652 "%s: inconsistent maximum transfer size!", 653 name); 654 /* We force it */ 655 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 656 } else { 657 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 658 } 659 } else { 660 bd->d_use_dma = B_FALSE; 661 if (bd->d_maxxfer == 0) { 662 bd->d_maxxfer = 1024 * 1024; 663 } 664 } 665 bd->d_ops = hdl->h_ops; 666 bd->d_private = hdl->h_private; 667 bd->d_blkshift = 9; /* 512 bytes, to start */ 668 669 if (bd->d_maxxfer % DEV_BSIZE) { 670 cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name); 671 bd->d_maxxfer &= ~(DEV_BSIZE - 1); 672 } 673 if (bd->d_maxxfer < DEV_BSIZE) { 674 cmn_err(CE_WARN, "%s: maximum transfer size too small!", name); 675 ddi_soft_state_free(bd_state, inst); 676 return (DDI_FAILURE); 677 } 678 679 bd->d_dip = dip; 680 bd->d_handle = hdl; 681 hdl->h_bd = bd; 682 ddi_set_driver_private(dip, bd); 683 684 mutex_init(&bd->d_ksmutex, NULL, MUTEX_DRIVER, NULL); 685 mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL); 686 mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL); 687 cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL); 688 689 bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8, 690 bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0); 691 692 bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk", 693 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT); 694 if (bd->d_ksp != NULL) { 695 bd->d_ksp->ks_lock = &bd->d_ksmutex; 696 kstat_install(bd->d_ksp); 697 bd->d_kiop = bd->d_ksp->ks_data; 698 } else { 699 /* 700 * Even if we cannot create the kstat, we create a 701 * scratch kstat. The reason for this is to ensure 702 * that we can update the kstat all of the time, 703 * without adding an extra branch instruction. 704 */ 705 bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP); 706 } 707 708 cmlb_alloc_handle(&bd->d_cmlbh); 709 710 bd->d_state = DKIO_NONE; 711 712 bzero(&drive, sizeof (drive)); 713 /* 714 * Default to one queue, parent driver can override. 715 */ 716 drive.d_qcount = 1; 717 bd->d_ops.o_drive_info(bd->d_private, &drive); 718 bd->d_qcount = drive.d_qcount; 719 bd->d_removable = drive.d_removable; 720 bd->d_hotpluggable = drive.d_hotpluggable; 721 722 if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer) 723 bd->d_maxxfer = drive.d_maxxfer; 724 725 bd_create_inquiry_props(dip, &drive); 726 727 bd_create_errstats(bd, inst, &drive); 728 bd_update_state(bd); 729 730 bd->d_queues = kmem_alloc(sizeof (*bd->d_queues) * bd->d_qcount, 731 KM_SLEEP); 732 for (i = 0; i < bd->d_qcount; i++) { 733 bd_queue_t *bq = &bd->d_queues[i]; 734 735 bq->q_qsize = drive.d_qsize; 736 bq->q_qactive = 0; 737 mutex_init(&bq->q_iomutex, NULL, MUTEX_DRIVER, NULL); 738 739 list_create(&bq->q_waitq, sizeof (bd_xfer_impl_t), 740 offsetof(struct bd_xfer_impl, i_linkage)); 741 list_create(&bq->q_runq, sizeof (bd_xfer_impl_t), 742 offsetof(struct bd_xfer_impl, i_linkage)); 743 } 744 745 rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT, 746 bd->d_removable, bd->d_hotpluggable, 747 /*LINTED: E_BAD_PTR_CAST_ALIGN*/ 748 *(uint64_t *)drive.d_eui64 != 0 ? DDI_NT_BLOCK_BLKDEV : 749 drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK, 750 CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0); 751 if (rv != 0) { 752 bd_queues_free(bd); 753 bd_destroy_errstats(bd); 754 cmlb_free_handle(&bd->d_cmlbh); 755 756 if (bd->d_ksp != NULL) { 757 kstat_delete(bd->d_ksp); 758 bd->d_ksp = NULL; 759 } else { 760 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 761 bd->d_kiop = NULL; 762 } 763 764 kmem_cache_destroy(bd->d_cache); 765 cv_destroy(&bd->d_statecv); 766 mutex_destroy(&bd->d_statemutex); 767 mutex_destroy(&bd->d_ocmutex); 768 mutex_destroy(&bd->d_ksmutex); 769 ddi_soft_state_free(bd_state, inst); 770 return (DDI_FAILURE); 771 } 772 773 if (bd->d_ops.o_devid_init != NULL) { 774 rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid); 775 if (rv == DDI_SUCCESS) { 776 if (ddi_devid_register(dip, bd->d_devid) != 777 DDI_SUCCESS) { 778 cmn_err(CE_WARN, 779 "%s: unable to register devid", name); 780 } 781 } 782 } 783 784 /* 785 * Add a zero-length attribute to tell the world we support 786 * kernel ioctls (for layered drivers). Also set up properties 787 * used by HAL to identify removable media. 788 */ 789 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 790 DDI_KERNEL_IOCTL, NULL, 0); 791 if (bd->d_removable) { 792 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 793 "removable-media", NULL, 0); 794 } 795 if (bd->d_hotpluggable) { 796 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 797 "hotpluggable", NULL, 0); 798 } 799 800 ddi_report_dev(dip); 801 802 return (DDI_SUCCESS); 803 } 804 805 static int 806 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 807 { 808 bd_t *bd; 809 810 bd = ddi_get_driver_private(dip); 811 812 switch (cmd) { 813 case DDI_DETACH: 814 break; 815 case DDI_SUSPEND: 816 /* We don't suspend, but our parent does */ 817 return (DDI_SUCCESS); 818 default: 819 return (DDI_FAILURE); 820 } 821 822 if (bd->d_ksp != NULL) { 823 kstat_delete(bd->d_ksp); 824 bd->d_ksp = NULL; 825 } else { 826 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 827 } 828 829 bd_destroy_errstats(bd); 830 cmlb_detach(bd->d_cmlbh, 0); 831 cmlb_free_handle(&bd->d_cmlbh); 832 if (bd->d_devid) 833 ddi_devid_free(bd->d_devid); 834 kmem_cache_destroy(bd->d_cache); 835 mutex_destroy(&bd->d_ksmutex); 836 mutex_destroy(&bd->d_ocmutex); 837 mutex_destroy(&bd->d_statemutex); 838 cv_destroy(&bd->d_statecv); 839 bd_queues_free(bd); 840 ddi_soft_state_free(bd_state, ddi_get_instance(dip)); 841 return (DDI_SUCCESS); 842 } 843 844 static int 845 bd_xfer_ctor(void *buf, void *arg, int kmflag) 846 { 847 bd_xfer_impl_t *xi; 848 bd_t *bd = arg; 849 int (*dcb)(caddr_t); 850 851 if (kmflag == KM_PUSHPAGE || kmflag == KM_SLEEP) { 852 dcb = DDI_DMA_SLEEP; 853 } else { 854 dcb = DDI_DMA_DONTWAIT; 855 } 856 857 xi = buf; 858 bzero(xi, sizeof (*xi)); 859 xi->i_bd = bd; 860 861 if (bd->d_use_dma) { 862 if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL, 863 &xi->i_dmah) != DDI_SUCCESS) { 864 return (-1); 865 } 866 } 867 868 return (0); 869 } 870 871 static void 872 bd_xfer_dtor(void *buf, void *arg) 873 { 874 bd_xfer_impl_t *xi = buf; 875 876 _NOTE(ARGUNUSED(arg)); 877 878 if (xi->i_dmah) 879 ddi_dma_free_handle(&xi->i_dmah); 880 xi->i_dmah = NULL; 881 } 882 883 static bd_xfer_impl_t * 884 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *), 885 int kmflag) 886 { 887 bd_xfer_impl_t *xi; 888 int rv = 0; 889 int status; 890 unsigned dir; 891 int (*cb)(caddr_t); 892 size_t len; 893 uint32_t shift; 894 895 if (kmflag == KM_SLEEP) { 896 cb = DDI_DMA_SLEEP; 897 } else { 898 cb = DDI_DMA_DONTWAIT; 899 } 900 901 xi = kmem_cache_alloc(bd->d_cache, kmflag); 902 if (xi == NULL) { 903 bioerror(bp, ENOMEM); 904 return (NULL); 905 } 906 907 ASSERT(bp); 908 909 xi->i_bp = bp; 910 xi->i_func = func; 911 xi->i_blkno = bp->b_lblkno >> (bd->d_blkshift - DEV_BSHIFT); 912 913 if (bp->b_bcount == 0) { 914 xi->i_len = 0; 915 xi->i_nblks = 0; 916 xi->i_kaddr = NULL; 917 xi->i_resid = 0; 918 xi->i_num_win = 0; 919 goto done; 920 } 921 922 if (bp->b_flags & B_READ) { 923 dir = DDI_DMA_READ; 924 xi->i_func = bd->d_ops.o_read; 925 } else { 926 dir = DDI_DMA_WRITE; 927 xi->i_func = bd->d_ops.o_write; 928 } 929 930 shift = bd->d_blkshift; 931 xi->i_blkshift = shift; 932 933 if (!bd->d_use_dma) { 934 bp_mapin(bp); 935 rv = 0; 936 xi->i_offset = 0; 937 xi->i_num_win = 938 (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer; 939 xi->i_cur_win = 0; 940 xi->i_len = min(bp->b_bcount, bd->d_maxxfer); 941 xi->i_nblks = xi->i_len >> shift; 942 xi->i_kaddr = bp->b_un.b_addr; 943 xi->i_resid = bp->b_bcount; 944 } else { 945 946 /* 947 * We have to use consistent DMA if the address is misaligned. 948 */ 949 if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) && 950 ((uintptr_t)bp->b_un.b_addr & 0x7)) { 951 dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL; 952 } else { 953 dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 954 } 955 956 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb, 957 NULL, &xi->i_dmac, &xi->i_ndmac); 958 switch (status) { 959 case DDI_DMA_MAPPED: 960 xi->i_num_win = 1; 961 xi->i_cur_win = 0; 962 xi->i_offset = 0; 963 xi->i_len = bp->b_bcount; 964 xi->i_nblks = xi->i_len >> shift; 965 xi->i_resid = bp->b_bcount; 966 rv = 0; 967 break; 968 case DDI_DMA_PARTIAL_MAP: 969 xi->i_cur_win = 0; 970 971 if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) != 972 DDI_SUCCESS) || 973 (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset, 974 &len, &xi->i_dmac, &xi->i_ndmac) != 975 DDI_SUCCESS) || 976 (P2PHASE(len, (1U << shift)) != 0)) { 977 (void) ddi_dma_unbind_handle(xi->i_dmah); 978 rv = EFAULT; 979 goto done; 980 } 981 xi->i_len = len; 982 xi->i_nblks = xi->i_len >> shift; 983 xi->i_resid = bp->b_bcount; 984 rv = 0; 985 break; 986 case DDI_DMA_NORESOURCES: 987 rv = EAGAIN; 988 goto done; 989 case DDI_DMA_TOOBIG: 990 rv = EINVAL; 991 goto done; 992 case DDI_DMA_NOMAPPING: 993 case DDI_DMA_INUSE: 994 default: 995 rv = EFAULT; 996 goto done; 997 } 998 } 999 1000 done: 1001 if (rv != 0) { 1002 kmem_cache_free(bd->d_cache, xi); 1003 bioerror(bp, rv); 1004 return (NULL); 1005 } 1006 1007 return (xi); 1008 } 1009 1010 static void 1011 bd_xfer_free(bd_xfer_impl_t *xi) 1012 { 1013 if (xi->i_dmah) { 1014 (void) ddi_dma_unbind_handle(xi->i_dmah); 1015 } 1016 kmem_cache_free(xi->i_bd->d_cache, xi); 1017 } 1018 1019 static int 1020 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp) 1021 { 1022 dev_t dev = *devp; 1023 bd_t *bd; 1024 minor_t part; 1025 minor_t inst; 1026 uint64_t mask; 1027 boolean_t ndelay; 1028 int rv; 1029 diskaddr_t nblks; 1030 diskaddr_t lba; 1031 1032 _NOTE(ARGUNUSED(credp)); 1033 1034 part = BDPART(dev); 1035 inst = BDINST(dev); 1036 1037 if (otyp >= OTYPCNT) 1038 return (EINVAL); 1039 1040 ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE; 1041 1042 /* 1043 * Block any DR events from changing the set of registered 1044 * devices while we function. 1045 */ 1046 rw_enter(&bd_lock, RW_READER); 1047 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1048 rw_exit(&bd_lock); 1049 return (ENXIO); 1050 } 1051 1052 mutex_enter(&bd->d_ocmutex); 1053 1054 ASSERT(part < 64); 1055 mask = (1U << part); 1056 1057 bd_update_state(bd); 1058 1059 if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) { 1060 1061 /* non-blocking opens are allowed to succeed */ 1062 if (!ndelay) { 1063 rv = ENXIO; 1064 goto done; 1065 } 1066 } else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba, 1067 NULL, NULL, 0) == 0) { 1068 1069 /* 1070 * We read the partinfo, verify valid ranges. If the 1071 * partition is invalid, and we aren't blocking or 1072 * doing a raw access, then fail. (Non-blocking and 1073 * raw accesses can still succeed to allow a disk with 1074 * bad partition data to opened by format and fdisk.) 1075 */ 1076 if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) { 1077 rv = ENXIO; 1078 goto done; 1079 } 1080 } else if (!ndelay) { 1081 /* 1082 * cmlb_partinfo failed -- invalid partition or no 1083 * disk label. 1084 */ 1085 rv = ENXIO; 1086 goto done; 1087 } 1088 1089 if ((flag & FWRITE) && bd->d_rdonly) { 1090 rv = EROFS; 1091 goto done; 1092 } 1093 1094 if ((bd->d_open_excl) & (mask)) { 1095 rv = EBUSY; 1096 goto done; 1097 } 1098 if (flag & FEXCL) { 1099 if (bd->d_open_lyr[part]) { 1100 rv = EBUSY; 1101 goto done; 1102 } 1103 for (int i = 0; i < OTYP_LYR; i++) { 1104 if (bd->d_open_reg[i] & mask) { 1105 rv = EBUSY; 1106 goto done; 1107 } 1108 } 1109 } 1110 1111 if (otyp == OTYP_LYR) { 1112 bd->d_open_lyr[part]++; 1113 } else { 1114 bd->d_open_reg[otyp] |= mask; 1115 } 1116 if (flag & FEXCL) { 1117 bd->d_open_excl |= mask; 1118 } 1119 1120 rv = 0; 1121 done: 1122 mutex_exit(&bd->d_ocmutex); 1123 rw_exit(&bd_lock); 1124 1125 return (rv); 1126 } 1127 1128 static int 1129 bd_close(dev_t dev, int flag, int otyp, cred_t *credp) 1130 { 1131 bd_t *bd; 1132 minor_t inst; 1133 minor_t part; 1134 uint64_t mask; 1135 boolean_t last = B_TRUE; 1136 1137 _NOTE(ARGUNUSED(flag)); 1138 _NOTE(ARGUNUSED(credp)); 1139 1140 part = BDPART(dev); 1141 inst = BDINST(dev); 1142 1143 ASSERT(part < 64); 1144 mask = (1U << part); 1145 1146 rw_enter(&bd_lock, RW_READER); 1147 1148 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1149 rw_exit(&bd_lock); 1150 return (ENXIO); 1151 } 1152 1153 mutex_enter(&bd->d_ocmutex); 1154 if (bd->d_open_excl & mask) { 1155 bd->d_open_excl &= ~mask; 1156 } 1157 if (otyp == OTYP_LYR) { 1158 bd->d_open_lyr[part]--; 1159 } else { 1160 bd->d_open_reg[otyp] &= ~mask; 1161 } 1162 for (int i = 0; i < 64; i++) { 1163 if (bd->d_open_lyr[part]) { 1164 last = B_FALSE; 1165 } 1166 } 1167 for (int i = 0; last && (i < OTYP_LYR); i++) { 1168 if (bd->d_open_reg[i]) { 1169 last = B_FALSE; 1170 } 1171 } 1172 mutex_exit(&bd->d_ocmutex); 1173 1174 if (last) { 1175 cmlb_invalidate(bd->d_cmlbh, 0); 1176 } 1177 rw_exit(&bd_lock); 1178 1179 return (0); 1180 } 1181 1182 static int 1183 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk) 1184 { 1185 minor_t inst; 1186 minor_t part; 1187 diskaddr_t pstart; 1188 diskaddr_t psize; 1189 bd_t *bd; 1190 bd_xfer_impl_t *xi; 1191 buf_t *bp; 1192 int rv; 1193 uint32_t shift; 1194 daddr_t d_blkno; 1195 int d_nblk; 1196 1197 rw_enter(&bd_lock, RW_READER); 1198 1199 part = BDPART(dev); 1200 inst = BDINST(dev); 1201 1202 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1203 rw_exit(&bd_lock); 1204 return (ENXIO); 1205 } 1206 shift = bd->d_blkshift; 1207 d_blkno = blkno >> (shift - DEV_BSHIFT); 1208 d_nblk = nblk >> (shift - DEV_BSHIFT); 1209 /* 1210 * do cmlb, but do it synchronously unless we already have the 1211 * partition (which we probably should.) 1212 */ 1213 if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL, 1214 (void *)1)) { 1215 rw_exit(&bd_lock); 1216 return (ENXIO); 1217 } 1218 1219 if ((d_blkno + d_nblk) > psize) { 1220 rw_exit(&bd_lock); 1221 return (EINVAL); 1222 } 1223 bp = getrbuf(KM_NOSLEEP); 1224 if (bp == NULL) { 1225 rw_exit(&bd_lock); 1226 return (ENOMEM); 1227 } 1228 1229 bp->b_bcount = nblk << DEV_BSHIFT; 1230 bp->b_resid = bp->b_bcount; 1231 bp->b_lblkno = blkno; 1232 bp->b_un.b_addr = caddr; 1233 1234 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_write, KM_NOSLEEP); 1235 if (xi == NULL) { 1236 rw_exit(&bd_lock); 1237 freerbuf(bp); 1238 return (ENOMEM); 1239 } 1240 xi->i_blkno = d_blkno + pstart; 1241 xi->i_flags = BD_XFER_POLL; 1242 bd_submit(bd, xi); 1243 rw_exit(&bd_lock); 1244 1245 /* 1246 * Generally, we should have run this entirely synchronously 1247 * at this point and the biowait call should be a no-op. If 1248 * it didn't happen this way, it's a bug in the underlying 1249 * driver not honoring BD_XFER_POLL. 1250 */ 1251 (void) biowait(bp); 1252 rv = geterror(bp); 1253 freerbuf(bp); 1254 return (rv); 1255 } 1256 1257 void 1258 bd_minphys(struct buf *bp) 1259 { 1260 minor_t inst; 1261 bd_t *bd; 1262 inst = BDINST(bp->b_edev); 1263 1264 bd = ddi_get_soft_state(bd_state, inst); 1265 1266 /* 1267 * In a non-debug kernel, bd_strategy will catch !bd as 1268 * well, and will fail nicely. 1269 */ 1270 ASSERT(bd); 1271 1272 if (bp->b_bcount > bd->d_maxxfer) 1273 bp->b_bcount = bd->d_maxxfer; 1274 } 1275 1276 static int 1277 bd_check_uio(dev_t dev, struct uio *uio) 1278 { 1279 bd_t *bd; 1280 uint32_t shift; 1281 1282 if ((bd = ddi_get_soft_state(bd_state, BDINST(dev))) == NULL) { 1283 return (ENXIO); 1284 } 1285 1286 shift = bd->d_blkshift; 1287 if ((P2PHASE(uio->uio_loffset, (1U << shift)) != 0) || 1288 (P2PHASE(uio->uio_iov->iov_len, (1U << shift)) != 0)) { 1289 return (EINVAL); 1290 } 1291 1292 return (0); 1293 } 1294 1295 static int 1296 bd_read(dev_t dev, struct uio *uio, cred_t *credp) 1297 { 1298 _NOTE(ARGUNUSED(credp)); 1299 int ret = bd_check_uio(dev, uio); 1300 if (ret != 0) { 1301 return (ret); 1302 } 1303 return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio)); 1304 } 1305 1306 static int 1307 bd_write(dev_t dev, struct uio *uio, cred_t *credp) 1308 { 1309 _NOTE(ARGUNUSED(credp)); 1310 int ret = bd_check_uio(dev, uio); 1311 if (ret != 0) { 1312 return (ret); 1313 } 1314 return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio)); 1315 } 1316 1317 static int 1318 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp) 1319 { 1320 _NOTE(ARGUNUSED(credp)); 1321 int ret = bd_check_uio(dev, aio->aio_uio); 1322 if (ret != 0) { 1323 return (ret); 1324 } 1325 return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio)); 1326 } 1327 1328 static int 1329 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp) 1330 { 1331 _NOTE(ARGUNUSED(credp)); 1332 int ret = bd_check_uio(dev, aio->aio_uio); 1333 if (ret != 0) { 1334 return (ret); 1335 } 1336 return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio)); 1337 } 1338 1339 static int 1340 bd_strategy(struct buf *bp) 1341 { 1342 minor_t inst; 1343 minor_t part; 1344 bd_t *bd; 1345 diskaddr_t p_lba; 1346 diskaddr_t p_nblks; 1347 diskaddr_t b_nblks; 1348 bd_xfer_impl_t *xi; 1349 uint32_t shift; 1350 int (*func)(void *, bd_xfer_t *); 1351 diskaddr_t lblkno; 1352 1353 part = BDPART(bp->b_edev); 1354 inst = BDINST(bp->b_edev); 1355 1356 ASSERT(bp); 1357 1358 bp->b_resid = bp->b_bcount; 1359 1360 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1361 bioerror(bp, ENXIO); 1362 biodone(bp); 1363 return (0); 1364 } 1365 1366 if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba, 1367 NULL, NULL, 0)) { 1368 bioerror(bp, ENXIO); 1369 biodone(bp); 1370 return (0); 1371 } 1372 1373 shift = bd->d_blkshift; 1374 lblkno = bp->b_lblkno >> (shift - DEV_BSHIFT); 1375 if ((P2PHASE(bp->b_lblkno, (1U << (shift - DEV_BSHIFT))) != 0) || 1376 (P2PHASE(bp->b_bcount, (1U << shift)) != 0) || 1377 (lblkno > p_nblks)) { 1378 bioerror(bp, EINVAL); 1379 biodone(bp); 1380 return (0); 1381 } 1382 b_nblks = bp->b_bcount >> shift; 1383 if ((lblkno == p_nblks) || (bp->b_bcount == 0)) { 1384 biodone(bp); 1385 return (0); 1386 } 1387 1388 if ((b_nblks + lblkno) > p_nblks) { 1389 bp->b_resid = ((lblkno + b_nblks - p_nblks) << shift); 1390 bp->b_bcount -= bp->b_resid; 1391 } else { 1392 bp->b_resid = 0; 1393 } 1394 func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write; 1395 1396 xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP); 1397 if (xi == NULL) { 1398 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE); 1399 } 1400 if (xi == NULL) { 1401 /* bd_request_alloc will have done bioerror */ 1402 biodone(bp); 1403 return (0); 1404 } 1405 xi->i_blkno = lblkno + p_lba; 1406 1407 bd_submit(bd, xi); 1408 1409 return (0); 1410 } 1411 1412 static int 1413 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp) 1414 { 1415 minor_t inst; 1416 uint16_t part; 1417 bd_t *bd; 1418 void *ptr = (void *)arg; 1419 int rv; 1420 1421 part = BDPART(dev); 1422 inst = BDINST(dev); 1423 1424 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1425 return (ENXIO); 1426 } 1427 1428 rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0); 1429 if (rv != ENOTTY) 1430 return (rv); 1431 1432 if (rvalp != NULL) { 1433 /* the return value of the ioctl is 0 by default */ 1434 *rvalp = 0; 1435 } 1436 1437 switch (cmd) { 1438 case DKIOCGMEDIAINFO: { 1439 struct dk_minfo minfo; 1440 1441 /* make sure our state information is current */ 1442 bd_update_state(bd); 1443 bzero(&minfo, sizeof (minfo)); 1444 minfo.dki_media_type = DK_FIXED_DISK; 1445 minfo.dki_lbsize = (1U << bd->d_blkshift); 1446 minfo.dki_capacity = bd->d_numblks; 1447 if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) { 1448 return (EFAULT); 1449 } 1450 return (0); 1451 } 1452 case DKIOCGMEDIAINFOEXT: { 1453 struct dk_minfo_ext miext; 1454 1455 /* make sure our state information is current */ 1456 bd_update_state(bd); 1457 bzero(&miext, sizeof (miext)); 1458 miext.dki_media_type = DK_FIXED_DISK; 1459 miext.dki_lbsize = (1U << bd->d_blkshift); 1460 miext.dki_pbsize = (1U << bd->d_pblkshift); 1461 miext.dki_capacity = bd->d_numblks; 1462 if (ddi_copyout(&miext, ptr, sizeof (miext), flag)) { 1463 return (EFAULT); 1464 } 1465 return (0); 1466 } 1467 case DKIOCINFO: { 1468 struct dk_cinfo cinfo; 1469 bzero(&cinfo, sizeof (cinfo)); 1470 cinfo.dki_ctype = DKC_BLKDEV; 1471 cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip)); 1472 (void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname), 1473 "%s", ddi_driver_name(ddi_get_parent(bd->d_dip))); 1474 (void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname), 1475 "%s", ddi_driver_name(bd->d_dip)); 1476 cinfo.dki_unit = inst; 1477 cinfo.dki_flags = DKI_FMTVOL; 1478 cinfo.dki_partition = part; 1479 cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE; 1480 cinfo.dki_addr = 0; 1481 cinfo.dki_slave = 0; 1482 cinfo.dki_space = 0; 1483 cinfo.dki_prio = 0; 1484 cinfo.dki_vec = 0; 1485 if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) { 1486 return (EFAULT); 1487 } 1488 return (0); 1489 } 1490 case DKIOCREMOVABLE: { 1491 int i; 1492 i = bd->d_removable ? 1 : 0; 1493 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1494 return (EFAULT); 1495 } 1496 return (0); 1497 } 1498 case DKIOCHOTPLUGGABLE: { 1499 int i; 1500 i = bd->d_hotpluggable ? 1 : 0; 1501 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1502 return (EFAULT); 1503 } 1504 return (0); 1505 } 1506 case DKIOCREADONLY: { 1507 int i; 1508 i = bd->d_rdonly ? 1 : 0; 1509 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1510 return (EFAULT); 1511 } 1512 return (0); 1513 } 1514 case DKIOCSOLIDSTATE: { 1515 int i; 1516 i = bd->d_ssd ? 1 : 0; 1517 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1518 return (EFAULT); 1519 } 1520 return (0); 1521 } 1522 case DKIOCSTATE: { 1523 enum dkio_state state; 1524 if (ddi_copyin(ptr, &state, sizeof (state), flag)) { 1525 return (EFAULT); 1526 } 1527 if ((rv = bd_check_state(bd, &state)) != 0) { 1528 return (rv); 1529 } 1530 if (ddi_copyout(&state, ptr, sizeof (state), flag)) { 1531 return (EFAULT); 1532 } 1533 return (0); 1534 } 1535 case DKIOCFLUSHWRITECACHE: { 1536 struct dk_callback *dkc = NULL; 1537 1538 if (flag & FKIOCTL) 1539 dkc = (void *)arg; 1540 1541 rv = bd_flush_write_cache(bd, dkc); 1542 return (rv); 1543 } 1544 1545 default: 1546 break; 1547 1548 } 1549 return (ENOTTY); 1550 } 1551 1552 static int 1553 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 1554 char *name, caddr_t valuep, int *lengthp) 1555 { 1556 bd_t *bd; 1557 1558 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1559 if (bd == NULL) 1560 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 1561 name, valuep, lengthp)); 1562 1563 return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name, 1564 valuep, lengthp, BDPART(dev), 0)); 1565 } 1566 1567 1568 static int 1569 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start, 1570 size_t length, void *tg_cookie) 1571 { 1572 bd_t *bd; 1573 buf_t *bp; 1574 bd_xfer_impl_t *xi; 1575 int rv; 1576 int (*func)(void *, bd_xfer_t *); 1577 int kmflag; 1578 1579 /* 1580 * If we are running in polled mode (such as during dump(9e) 1581 * execution), then we cannot sleep for kernel allocations. 1582 */ 1583 kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP; 1584 1585 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1586 1587 if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) { 1588 /* We can only transfer whole blocks at a time! */ 1589 return (EINVAL); 1590 } 1591 1592 if ((bp = getrbuf(kmflag)) == NULL) { 1593 return (ENOMEM); 1594 } 1595 1596 switch (cmd) { 1597 case TG_READ: 1598 bp->b_flags = B_READ; 1599 func = bd->d_ops.o_read; 1600 break; 1601 case TG_WRITE: 1602 bp->b_flags = B_WRITE; 1603 func = bd->d_ops.o_write; 1604 break; 1605 default: 1606 freerbuf(bp); 1607 return (EINVAL); 1608 } 1609 1610 bp->b_un.b_addr = bufaddr; 1611 bp->b_bcount = length; 1612 xi = bd_xfer_alloc(bd, bp, func, kmflag); 1613 if (xi == NULL) { 1614 rv = geterror(bp); 1615 freerbuf(bp); 1616 return (rv); 1617 } 1618 xi->i_flags = tg_cookie ? BD_XFER_POLL : 0; 1619 xi->i_blkno = start; 1620 bd_submit(bd, xi); 1621 (void) biowait(bp); 1622 rv = geterror(bp); 1623 freerbuf(bp); 1624 1625 return (rv); 1626 } 1627 1628 static int 1629 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 1630 { 1631 bd_t *bd; 1632 1633 _NOTE(ARGUNUSED(tg_cookie)); 1634 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1635 1636 switch (cmd) { 1637 case TG_GETPHYGEOM: 1638 case TG_GETVIRTGEOM: 1639 /* 1640 * We don't have any "geometry" as such, let cmlb 1641 * fabricate something. 1642 */ 1643 return (ENOTTY); 1644 1645 case TG_GETCAPACITY: 1646 bd_update_state(bd); 1647 *(diskaddr_t *)arg = bd->d_numblks; 1648 return (0); 1649 1650 case TG_GETBLOCKSIZE: 1651 *(uint32_t *)arg = (1U << bd->d_blkshift); 1652 return (0); 1653 1654 case TG_GETATTR: 1655 /* 1656 * It turns out that cmlb really doesn't do much for 1657 * non-writable media, but lets make the information 1658 * available for it in case it does more in the 1659 * future. (The value is currently used for 1660 * triggering special behavior for CD-ROMs.) 1661 */ 1662 bd_update_state(bd); 1663 ((tg_attribute_t *)arg)->media_is_writable = 1664 bd->d_rdonly ? B_FALSE : B_TRUE; 1665 ((tg_attribute_t *)arg)->media_is_solid_state = bd->d_ssd; 1666 ((tg_attribute_t *)arg)->media_is_rotational = B_FALSE; 1667 return (0); 1668 1669 default: 1670 return (EINVAL); 1671 } 1672 } 1673 1674 1675 static void 1676 bd_sched(bd_t *bd, bd_queue_t *bq) 1677 { 1678 bd_xfer_impl_t *xi; 1679 struct buf *bp; 1680 int rv; 1681 1682 mutex_enter(&bq->q_iomutex); 1683 1684 while ((bq->q_qactive < bq->q_qsize) && 1685 ((xi = list_remove_head(&bq->q_waitq)) != NULL)) { 1686 mutex_enter(&bd->d_ksmutex); 1687 kstat_waitq_to_runq(bd->d_kiop); 1688 mutex_exit(&bd->d_ksmutex); 1689 1690 bq->q_qactive++; 1691 list_insert_tail(&bq->q_runq, xi); 1692 1693 /* 1694 * Submit the job to the driver. We drop the I/O mutex 1695 * so that we can deal with the case where the driver 1696 * completion routine calls back into us synchronously. 1697 */ 1698 1699 mutex_exit(&bq->q_iomutex); 1700 1701 rv = xi->i_func(bd->d_private, &xi->i_public); 1702 if (rv != 0) { 1703 bp = xi->i_bp; 1704 bioerror(bp, rv); 1705 biodone(bp); 1706 1707 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32); 1708 1709 mutex_enter(&bq->q_iomutex); 1710 1711 mutex_enter(&bd->d_ksmutex); 1712 kstat_runq_exit(bd->d_kiop); 1713 mutex_exit(&bd->d_ksmutex); 1714 1715 bq->q_qactive--; 1716 list_remove(&bq->q_runq, xi); 1717 bd_xfer_free(xi); 1718 } else { 1719 mutex_enter(&bq->q_iomutex); 1720 } 1721 } 1722 1723 mutex_exit(&bq->q_iomutex); 1724 } 1725 1726 static void 1727 bd_submit(bd_t *bd, bd_xfer_impl_t *xi) 1728 { 1729 uint64_t nv = atomic_inc_64_nv(&bd->d_io_counter); 1730 unsigned q = nv % bd->d_qcount; 1731 bd_queue_t *bq = &bd->d_queues[q]; 1732 1733 xi->i_bq = bq; 1734 xi->i_qnum = q; 1735 1736 mutex_enter(&bq->q_iomutex); 1737 1738 list_insert_tail(&bq->q_waitq, xi); 1739 1740 mutex_enter(&bd->d_ksmutex); 1741 kstat_waitq_enter(bd->d_kiop); 1742 mutex_exit(&bd->d_ksmutex); 1743 1744 mutex_exit(&bq->q_iomutex); 1745 1746 bd_sched(bd, bq); 1747 } 1748 1749 static void 1750 bd_runq_exit(bd_xfer_impl_t *xi, int err) 1751 { 1752 bd_t *bd = xi->i_bd; 1753 buf_t *bp = xi->i_bp; 1754 bd_queue_t *bq = xi->i_bq; 1755 1756 mutex_enter(&bq->q_iomutex); 1757 bq->q_qactive--; 1758 1759 mutex_enter(&bd->d_ksmutex); 1760 kstat_runq_exit(bd->d_kiop); 1761 mutex_exit(&bd->d_ksmutex); 1762 1763 list_remove(&bq->q_runq, xi); 1764 mutex_exit(&bq->q_iomutex); 1765 1766 if (err == 0) { 1767 if (bp->b_flags & B_READ) { 1768 atomic_inc_uint(&bd->d_kiop->reads); 1769 atomic_add_64((uint64_t *)&bd->d_kiop->nread, 1770 bp->b_bcount - xi->i_resid); 1771 } else { 1772 atomic_inc_uint(&bd->d_kiop->writes); 1773 atomic_add_64((uint64_t *)&bd->d_kiop->nwritten, 1774 bp->b_bcount - xi->i_resid); 1775 } 1776 } 1777 bd_sched(bd, bq); 1778 } 1779 1780 static void 1781 bd_update_state(bd_t *bd) 1782 { 1783 enum dkio_state state = DKIO_INSERTED; 1784 boolean_t docmlb = B_FALSE; 1785 bd_media_t media; 1786 1787 bzero(&media, sizeof (media)); 1788 1789 mutex_enter(&bd->d_statemutex); 1790 if (bd->d_ops.o_media_info(bd->d_private, &media) != 0) { 1791 bd->d_numblks = 0; 1792 state = DKIO_EJECTED; 1793 goto done; 1794 } 1795 1796 if ((media.m_blksize < 512) || 1797 (!ISP2(media.m_blksize)) || 1798 (P2PHASE(bd->d_maxxfer, media.m_blksize))) { 1799 cmn_err(CE_WARN, "%s%d: Invalid media block size (%d)", 1800 ddi_driver_name(bd->d_dip), ddi_get_instance(bd->d_dip), 1801 media.m_blksize); 1802 /* 1803 * We can't use the media, treat it as not present. 1804 */ 1805 state = DKIO_EJECTED; 1806 bd->d_numblks = 0; 1807 goto done; 1808 } 1809 1810 if (((1U << bd->d_blkshift) != media.m_blksize) || 1811 (bd->d_numblks != media.m_nblks)) { 1812 /* Device size changed */ 1813 docmlb = B_TRUE; 1814 } 1815 1816 bd->d_blkshift = ddi_ffs(media.m_blksize) - 1; 1817 bd->d_pblkshift = bd->d_blkshift; 1818 bd->d_numblks = media.m_nblks; 1819 bd->d_rdonly = media.m_readonly; 1820 bd->d_ssd = media.m_solidstate; 1821 1822 /* 1823 * Only use the supplied physical block size if it is non-zero, 1824 * greater or equal to the block size, and a power of 2. Ignore it 1825 * if not, it's just informational and we can still use the media. 1826 */ 1827 if ((media.m_pblksize != 0) && 1828 (media.m_pblksize >= media.m_blksize) && 1829 (ISP2(media.m_pblksize))) 1830 bd->d_pblkshift = ddi_ffs(media.m_pblksize) - 1; 1831 1832 done: 1833 if (state != bd->d_state) { 1834 bd->d_state = state; 1835 cv_broadcast(&bd->d_statecv); 1836 docmlb = B_TRUE; 1837 } 1838 mutex_exit(&bd->d_statemutex); 1839 1840 bd->d_kerr->bd_capacity.value.ui64 = bd->d_numblks << bd->d_blkshift; 1841 1842 if (docmlb) { 1843 if (state == DKIO_INSERTED) { 1844 (void) cmlb_validate(bd->d_cmlbh, 0, 0); 1845 } else { 1846 cmlb_invalidate(bd->d_cmlbh, 0); 1847 } 1848 } 1849 } 1850 1851 static int 1852 bd_check_state(bd_t *bd, enum dkio_state *state) 1853 { 1854 clock_t when; 1855 1856 for (;;) { 1857 1858 bd_update_state(bd); 1859 1860 mutex_enter(&bd->d_statemutex); 1861 1862 if (bd->d_state != *state) { 1863 *state = bd->d_state; 1864 mutex_exit(&bd->d_statemutex); 1865 break; 1866 } 1867 1868 when = drv_usectohz(1000000); 1869 if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex, 1870 when, TR_CLOCK_TICK) == 0) { 1871 mutex_exit(&bd->d_statemutex); 1872 return (EINTR); 1873 } 1874 1875 mutex_exit(&bd->d_statemutex); 1876 } 1877 1878 return (0); 1879 } 1880 1881 static int 1882 bd_flush_write_cache_done(struct buf *bp) 1883 { 1884 struct dk_callback *dc = (void *)bp->b_private; 1885 1886 (*dc->dkc_callback)(dc->dkc_cookie, geterror(bp)); 1887 kmem_free(dc, sizeof (*dc)); 1888 freerbuf(bp); 1889 return (0); 1890 } 1891 1892 static int 1893 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc) 1894 { 1895 buf_t *bp; 1896 struct dk_callback *dc; 1897 bd_xfer_impl_t *xi; 1898 int rv; 1899 1900 if (bd->d_ops.o_sync_cache == NULL) { 1901 return (ENOTSUP); 1902 } 1903 if ((bp = getrbuf(KM_SLEEP)) == NULL) { 1904 return (ENOMEM); 1905 } 1906 bp->b_resid = 0; 1907 bp->b_bcount = 0; 1908 1909 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP); 1910 if (xi == NULL) { 1911 rv = geterror(bp); 1912 freerbuf(bp); 1913 return (rv); 1914 } 1915 1916 /* Make an asynchronous flush, but only if there is a callback */ 1917 if (dkc != NULL && dkc->dkc_callback != NULL) { 1918 /* Make a private copy of the callback structure */ 1919 dc = kmem_alloc(sizeof (*dc), KM_SLEEP); 1920 *dc = *dkc; 1921 bp->b_private = dc; 1922 bp->b_iodone = bd_flush_write_cache_done; 1923 1924 bd_submit(bd, xi); 1925 return (0); 1926 } 1927 1928 /* In case there is no callback, perform a synchronous flush */ 1929 bd_submit(bd, xi); 1930 (void) biowait(bp); 1931 rv = geterror(bp); 1932 freerbuf(bp); 1933 1934 return (rv); 1935 } 1936 1937 /* 1938 * Nexus support. 1939 */ 1940 int 1941 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop, 1942 void *arg, void *result) 1943 { 1944 bd_handle_t hdl; 1945 1946 switch (ctlop) { 1947 case DDI_CTLOPS_REPORTDEV: 1948 cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n", 1949 ddi_node_name(rdip), ddi_get_name_addr(rdip), 1950 ddi_driver_name(rdip), ddi_get_instance(rdip)); 1951 return (DDI_SUCCESS); 1952 1953 case DDI_CTLOPS_INITCHILD: 1954 hdl = ddi_get_parent_data((dev_info_t *)arg); 1955 if (hdl == NULL) { 1956 return (DDI_NOT_WELL_FORMED); 1957 } 1958 ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr); 1959 return (DDI_SUCCESS); 1960 1961 case DDI_CTLOPS_UNINITCHILD: 1962 ddi_set_name_addr((dev_info_t *)arg, NULL); 1963 ndi_prop_remove_all((dev_info_t *)arg); 1964 return (DDI_SUCCESS); 1965 1966 default: 1967 return (ddi_ctlops(dip, rdip, ctlop, arg, result)); 1968 } 1969 } 1970 1971 /* 1972 * Functions for device drivers. 1973 */ 1974 bd_handle_t 1975 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag) 1976 { 1977 bd_handle_t hdl; 1978 1979 /* 1980 * There is full compatability between the version 0 API and the 1981 * current version. 1982 */ 1983 switch (ops->o_version) { 1984 case BD_OPS_VERSION_0: 1985 case BD_OPS_CURRENT_VERSION: 1986 break; 1987 1988 default: 1989 return (NULL); 1990 } 1991 1992 hdl = kmem_zalloc(sizeof (*hdl), kmflag); 1993 if (hdl != NULL) { 1994 hdl->h_ops = *ops; 1995 hdl->h_dma = dma; 1996 hdl->h_private = private; 1997 } 1998 1999 return (hdl); 2000 } 2001 2002 void 2003 bd_free_handle(bd_handle_t hdl) 2004 { 2005 kmem_free(hdl, sizeof (*hdl)); 2006 } 2007 2008 int 2009 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl) 2010 { 2011 dev_info_t *child; 2012 bd_drive_t drive = { 0 }; 2013 2014 /* 2015 * It's not an error if bd_attach_handle() is called on a handle that 2016 * already is attached. We just ignore the request to attach and return. 2017 * This way drivers using blkdev don't have to keep track about blkdev 2018 * state, they can just call this function to make sure it attached. 2019 */ 2020 if (hdl->h_child != NULL) { 2021 return (DDI_SUCCESS); 2022 } 2023 2024 /* if drivers don't override this, make it assume none */ 2025 drive.d_lun = -1; 2026 hdl->h_ops.o_drive_info(hdl->h_private, &drive); 2027 2028 hdl->h_parent = dip; 2029 hdl->h_name = "blkdev"; 2030 2031 /*LINTED: E_BAD_PTR_CAST_ALIGN*/ 2032 if (*(uint64_t *)drive.d_eui64 != 0) { 2033 if (drive.d_lun >= 0) { 2034 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), 2035 "w%02X%02X%02X%02X%02X%02X%02X%02X,%X", 2036 drive.d_eui64[0], drive.d_eui64[1], 2037 drive.d_eui64[2], drive.d_eui64[3], 2038 drive.d_eui64[4], drive.d_eui64[5], 2039 drive.d_eui64[6], drive.d_eui64[7], drive.d_lun); 2040 } else { 2041 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), 2042 "w%02X%02X%02X%02X%02X%02X%02X%02X", 2043 drive.d_eui64[0], drive.d_eui64[1], 2044 drive.d_eui64[2], drive.d_eui64[3], 2045 drive.d_eui64[4], drive.d_eui64[5], 2046 drive.d_eui64[6], drive.d_eui64[7]); 2047 } 2048 } else { 2049 if (drive.d_lun >= 0) { 2050 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), 2051 "%X,%X", drive.d_target, drive.d_lun); 2052 } else { 2053 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), 2054 "%X", drive.d_target); 2055 } 2056 } 2057 2058 if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID, 2059 &child) != NDI_SUCCESS) { 2060 cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s", 2061 ddi_driver_name(dip), ddi_get_instance(dip), 2062 "blkdev", hdl->h_addr); 2063 return (DDI_FAILURE); 2064 } 2065 2066 ddi_set_parent_data(child, hdl); 2067 hdl->h_child = child; 2068 2069 if (ndi_devi_online(child, 0) != NDI_SUCCESS) { 2070 cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online", 2071 ddi_driver_name(dip), ddi_get_instance(dip), 2072 hdl->h_name, hdl->h_addr); 2073 (void) ndi_devi_free(child); 2074 hdl->h_child = NULL; 2075 return (DDI_FAILURE); 2076 } 2077 2078 return (DDI_SUCCESS); 2079 } 2080 2081 int 2082 bd_detach_handle(bd_handle_t hdl) 2083 { 2084 int circ; 2085 int rv; 2086 char *devnm; 2087 2088 /* 2089 * It's not an error if bd_detach_handle() is called on a handle that 2090 * already is detached. We just ignore the request to detach and return. 2091 * This way drivers using blkdev don't have to keep track about blkdev 2092 * state, they can just call this function to make sure it detached. 2093 */ 2094 if (hdl->h_child == NULL) { 2095 return (DDI_SUCCESS); 2096 } 2097 ndi_devi_enter(hdl->h_parent, &circ); 2098 if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) { 2099 rv = ddi_remove_child(hdl->h_child, 0); 2100 } else { 2101 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); 2102 (void) ddi_deviname(hdl->h_child, devnm); 2103 (void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE); 2104 rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL, 2105 NDI_DEVI_REMOVE | NDI_UNCONFIG); 2106 kmem_free(devnm, MAXNAMELEN + 1); 2107 } 2108 if (rv == 0) { 2109 hdl->h_child = NULL; 2110 } 2111 2112 ndi_devi_exit(hdl->h_parent, circ); 2113 return (rv == NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE); 2114 } 2115 2116 void 2117 bd_xfer_done(bd_xfer_t *xfer, int err) 2118 { 2119 bd_xfer_impl_t *xi = (void *)xfer; 2120 buf_t *bp = xi->i_bp; 2121 int rv = DDI_SUCCESS; 2122 bd_t *bd = xi->i_bd; 2123 size_t len; 2124 2125 if (err != 0) { 2126 bd_runq_exit(xi, err); 2127 atomic_inc_32(&bd->d_kerr->bd_harderrs.value.ui32); 2128 2129 bp->b_resid += xi->i_resid; 2130 bd_xfer_free(xi); 2131 bioerror(bp, err); 2132 biodone(bp); 2133 return; 2134 } 2135 2136 xi->i_cur_win++; 2137 xi->i_resid -= xi->i_len; 2138 2139 if (xi->i_resid == 0) { 2140 /* Job completed succcessfully! */ 2141 bd_runq_exit(xi, 0); 2142 2143 bd_xfer_free(xi); 2144 biodone(bp); 2145 return; 2146 } 2147 2148 xi->i_blkno += xi->i_nblks; 2149 2150 if (bd->d_use_dma) { 2151 /* More transfer still pending... advance to next DMA window. */ 2152 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win, 2153 &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac); 2154 } else { 2155 /* Advance memory window. */ 2156 xi->i_kaddr += xi->i_len; 2157 xi->i_offset += xi->i_len; 2158 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer); 2159 } 2160 2161 2162 if ((rv != DDI_SUCCESS) || 2163 (P2PHASE(len, (1U << xi->i_blkshift)) != 0)) { 2164 bd_runq_exit(xi, EFAULT); 2165 2166 bp->b_resid += xi->i_resid; 2167 bd_xfer_free(xi); 2168 bioerror(bp, EFAULT); 2169 biodone(bp); 2170 return; 2171 } 2172 xi->i_len = len; 2173 xi->i_nblks = len >> xi->i_blkshift; 2174 2175 /* Submit next window to hardware. */ 2176 rv = xi->i_func(bd->d_private, &xi->i_public); 2177 if (rv != 0) { 2178 bd_runq_exit(xi, rv); 2179 2180 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32); 2181 2182 bp->b_resid += xi->i_resid; 2183 bd_xfer_free(xi); 2184 bioerror(bp, rv); 2185 biodone(bp); 2186 } 2187 } 2188 2189 void 2190 bd_error(bd_xfer_t *xfer, int error) 2191 { 2192 bd_xfer_impl_t *xi = (void *)xfer; 2193 bd_t *bd = xi->i_bd; 2194 2195 switch (error) { 2196 case BD_ERR_MEDIA: 2197 atomic_inc_32(&bd->d_kerr->bd_rq_media_err.value.ui32); 2198 break; 2199 case BD_ERR_NTRDY: 2200 atomic_inc_32(&bd->d_kerr->bd_rq_ntrdy_err.value.ui32); 2201 break; 2202 case BD_ERR_NODEV: 2203 atomic_inc_32(&bd->d_kerr->bd_rq_nodev_err.value.ui32); 2204 break; 2205 case BD_ERR_RECOV: 2206 atomic_inc_32(&bd->d_kerr->bd_rq_recov_err.value.ui32); 2207 break; 2208 case BD_ERR_ILLRQ: 2209 atomic_inc_32(&bd->d_kerr->bd_rq_illrq_err.value.ui32); 2210 break; 2211 case BD_ERR_PFA: 2212 atomic_inc_32(&bd->d_kerr->bd_rq_pfa_err.value.ui32); 2213 break; 2214 default: 2215 cmn_err(CE_PANIC, "bd_error: unknown error type %d", error); 2216 break; 2217 } 2218 } 2219 2220 void 2221 bd_state_change(bd_handle_t hdl) 2222 { 2223 bd_t *bd; 2224 2225 if ((bd = hdl->h_bd) != NULL) { 2226 bd_update_state(bd); 2227 } 2228 } 2229 2230 void 2231 bd_mod_init(struct dev_ops *devops) 2232 { 2233 static struct bus_ops bd_bus_ops = { 2234 BUSO_REV, /* busops_rev */ 2235 nullbusmap, /* bus_map */ 2236 NULL, /* bus_get_intrspec (OBSOLETE) */ 2237 NULL, /* bus_add_intrspec (OBSOLETE) */ 2238 NULL, /* bus_remove_intrspec (OBSOLETE) */ 2239 i_ddi_map_fault, /* bus_map_fault */ 2240 NULL, /* bus_dma_map (OBSOLETE) */ 2241 ddi_dma_allochdl, /* bus_dma_allochdl */ 2242 ddi_dma_freehdl, /* bus_dma_freehdl */ 2243 ddi_dma_bindhdl, /* bus_dma_bindhdl */ 2244 ddi_dma_unbindhdl, /* bus_dma_unbindhdl */ 2245 ddi_dma_flush, /* bus_dma_flush */ 2246 ddi_dma_win, /* bus_dma_win */ 2247 ddi_dma_mctl, /* bus_dma_ctl */ 2248 bd_bus_ctl, /* bus_ctl */ 2249 ddi_bus_prop_op, /* bus_prop_op */ 2250 NULL, /* bus_get_eventcookie */ 2251 NULL, /* bus_add_eventcall */ 2252 NULL, /* bus_remove_eventcall */ 2253 NULL, /* bus_post_event */ 2254 NULL, /* bus_intr_ctl (OBSOLETE) */ 2255 NULL, /* bus_config */ 2256 NULL, /* bus_unconfig */ 2257 NULL, /* bus_fm_init */ 2258 NULL, /* bus_fm_fini */ 2259 NULL, /* bus_fm_access_enter */ 2260 NULL, /* bus_fm_access_exit */ 2261 NULL, /* bus_power */ 2262 NULL, /* bus_intr_op */ 2263 }; 2264 2265 devops->devo_bus_ops = &bd_bus_ops; 2266 2267 /* 2268 * NB: The device driver is free to supply its own 2269 * character entry device support. 2270 */ 2271 } 2272 2273 void 2274 bd_mod_fini(struct dev_ops *devops) 2275 { 2276 devops->devo_bus_ops = NULL; 2277 } 2278