1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved. 24 * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved. 25 * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 26 * Copyright 2017 The MathWorks, Inc. All rights reserved. 27 * Copyright 2019 Western Digital Corporation. 28 */ 29 30 #include <sys/types.h> 31 #include <sys/ksynch.h> 32 #include <sys/kmem.h> 33 #include <sys/file.h> 34 #include <sys/errno.h> 35 #include <sys/open.h> 36 #include <sys/buf.h> 37 #include <sys/uio.h> 38 #include <sys/aio_req.h> 39 #include <sys/cred.h> 40 #include <sys/modctl.h> 41 #include <sys/cmlb.h> 42 #include <sys/conf.h> 43 #include <sys/devops.h> 44 #include <sys/list.h> 45 #include <sys/sysmacros.h> 46 #include <sys/dkio.h> 47 #include <sys/vtoc.h> 48 #include <sys/scsi/scsi.h> /* for DTYPE_DIRECT */ 49 #include <sys/kstat.h> 50 #include <sys/fs/dv_node.h> 51 #include <sys/ddi.h> 52 #include <sys/sunddi.h> 53 #include <sys/note.h> 54 #include <sys/blkdev.h> 55 #include <sys/scsi/impl/inquiry.h> 56 57 /* 58 * blkdev is a driver which provides a lot of the common functionality 59 * a block device driver may need and helps by removing code which 60 * is frequently duplicated in block device drivers. 61 * 62 * Within this driver all the struct cb_ops functions required for a 63 * block device driver are written with appropriate call back functions 64 * to be provided by the parent driver. 65 * 66 * To use blkdev, a driver needs to: 67 * 1. Create a bd_ops_t structure which has the call back operations 68 * blkdev will use. 69 * 2. Create a handle by calling bd_alloc_handle(). One of the 70 * arguments to this function is the bd_ops_t. 71 * 3. Call bd_attach_handle(). This will instantiate a blkdev device 72 * as a child device node of the calling driver. 73 * 74 * A parent driver is not restricted to just allocating and attaching a 75 * single instance, it may attach as many as it wishes. For each handle 76 * attached, appropriate entries in /dev/[r]dsk are created. 77 * 78 * The bd_ops_t routines that a parent of blkdev need to provide are: 79 * 80 * o_drive_info: Provide information to blkdev such as how many I/O queues 81 * to create and the size of those queues. Also some device 82 * specifics such as EUI, vendor, product, model, serial 83 * number .... 84 * 85 * o_media_info: Provide information about the media. Eg size and block size. 86 * 87 * o_devid_init: Creates and initializes the device id. Typically calls 88 * ddi_devid_init(). 89 * 90 * o_sync_cache: Issues a device appropriate command to flush any write 91 * caches. 92 * 93 * o_read: Read data as described by bd_xfer_t argument. 94 * 95 * o_write: Write data as described by bd_xfer_t argument. 96 * 97 * 98 * Queues 99 * ------ 100 * Part of the drive_info data is a queue count. blkdev will create 101 * "queue count" number of waitq/runq pairs. Each waitq/runq pair 102 * operates independently. As an I/O is scheduled up to the parent 103 * driver via o_read or o_write its queue number is given. If the 104 * parent driver supports multiple hardware queues it can then select 105 * where to submit the I/O request. 106 * 107 * Currently blkdev uses a simplistic round-robin queue selection method. 108 * It has the advantage that it is lockless. In the future it will be 109 * worthwhile reviewing this strategy for something which prioritizes queues 110 * depending on how busy they are. 111 * 112 * Each waitq/runq pair is protected by its mutex (q_iomutex). Incoming 113 * I/O requests are initially added to the waitq. They are taken off the 114 * waitq, added to the runq and submitted, providing the runq is less 115 * than the qsize as specified in the drive_info. As an I/O request 116 * completes, the parent driver is required to call bd_xfer_done(), which 117 * will remove the I/O request from the runq and pass I/O completion 118 * status up the stack. 119 * 120 * Locks 121 * ----- 122 * There are 4 instance global locks d_ocmutex, d_ksmutex, d_errmutex and 123 * d_statemutex. As well a q_iomutex per waitq/runq pair. 124 * 125 * Currently, there is no lock hierarchy. Nowhere do we ever own more than 126 * one lock, any change needs to be documented here with a defined 127 * hierarchy. 128 */ 129 130 #define BD_MAXPART 64 131 #define BDINST(dev) (getminor(dev) / BD_MAXPART) 132 #define BDPART(dev) (getminor(dev) % BD_MAXPART) 133 134 typedef struct bd bd_t; 135 typedef struct bd_xfer_impl bd_xfer_impl_t; 136 typedef struct bd_queue bd_queue_t; 137 138 struct bd { 139 void *d_private; 140 dev_info_t *d_dip; 141 kmutex_t d_ocmutex; 142 kmutex_t d_ksmutex; 143 kmutex_t d_errmutex; 144 kmutex_t d_statemutex; 145 kcondvar_t d_statecv; 146 enum dkio_state d_state; 147 cmlb_handle_t d_cmlbh; 148 unsigned d_open_lyr[BD_MAXPART]; /* open count */ 149 uint64_t d_open_excl; /* bit mask indexed by partition */ 150 uint64_t d_open_reg[OTYPCNT]; /* bit mask */ 151 uint64_t d_io_counter; 152 153 uint32_t d_qcount; 154 uint32_t d_qactive; 155 uint32_t d_maxxfer; 156 uint32_t d_blkshift; 157 uint32_t d_pblkshift; 158 uint64_t d_numblks; 159 ddi_devid_t d_devid; 160 161 kmem_cache_t *d_cache; 162 bd_queue_t *d_queues; 163 kstat_t *d_ksp; 164 kstat_io_t *d_kiop; 165 kstat_t *d_errstats; 166 struct bd_errstats *d_kerr; 167 168 boolean_t d_rdonly; 169 boolean_t d_ssd; 170 boolean_t d_removable; 171 boolean_t d_hotpluggable; 172 boolean_t d_use_dma; 173 174 ddi_dma_attr_t d_dma; 175 bd_ops_t d_ops; 176 bd_handle_t d_handle; 177 }; 178 179 struct bd_handle { 180 bd_ops_t h_ops; 181 ddi_dma_attr_t *h_dma; 182 dev_info_t *h_parent; 183 dev_info_t *h_child; 184 void *h_private; 185 bd_t *h_bd; 186 char *h_name; 187 char h_addr[30]; /* enough for w%0.16x,%X */ 188 }; 189 190 struct bd_xfer_impl { 191 bd_xfer_t i_public; 192 list_node_t i_linkage; 193 bd_t *i_bd; 194 buf_t *i_bp; 195 bd_queue_t *i_bq; 196 uint_t i_num_win; 197 uint_t i_cur_win; 198 off_t i_offset; 199 int (*i_func)(void *, bd_xfer_t *); 200 uint32_t i_blkshift; 201 size_t i_len; 202 size_t i_resid; 203 }; 204 205 struct bd_queue { 206 kmutex_t q_iomutex; 207 uint32_t q_qsize; 208 uint32_t q_qactive; 209 list_t q_runq; 210 list_t q_waitq; 211 }; 212 213 #define i_dmah i_public.x_dmah 214 #define i_dmac i_public.x_dmac 215 #define i_ndmac i_public.x_ndmac 216 #define i_kaddr i_public.x_kaddr 217 #define i_nblks i_public.x_nblks 218 #define i_blkno i_public.x_blkno 219 #define i_flags i_public.x_flags 220 #define i_qnum i_public.x_qnum 221 222 223 /* 224 * Private prototypes. 225 */ 226 227 static void bd_prop_update_inqstring(dev_info_t *, char *, char *, size_t); 228 static void bd_create_inquiry_props(dev_info_t *, bd_drive_t *); 229 static void bd_create_errstats(bd_t *, int, bd_drive_t *); 230 static void bd_errstats_setstr(kstat_named_t *, char *, size_t, char *); 231 static void bd_init_errstats(bd_t *, bd_drive_t *); 232 233 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 234 static int bd_attach(dev_info_t *, ddi_attach_cmd_t); 235 static int bd_detach(dev_info_t *, ddi_detach_cmd_t); 236 237 static int bd_open(dev_t *, int, int, cred_t *); 238 static int bd_close(dev_t, int, int, cred_t *); 239 static int bd_strategy(struct buf *); 240 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 241 static int bd_dump(dev_t, caddr_t, daddr_t, int); 242 static int bd_read(dev_t, struct uio *, cred_t *); 243 static int bd_write(dev_t, struct uio *, cred_t *); 244 static int bd_aread(dev_t, struct aio_req *, cred_t *); 245 static int bd_awrite(dev_t, struct aio_req *, cred_t *); 246 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, 247 caddr_t, int *); 248 249 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, 250 void *); 251 static int bd_tg_getinfo(dev_info_t *, int, void *, void *); 252 static int bd_xfer_ctor(void *, void *, int); 253 static void bd_xfer_dtor(void *, void *); 254 static void bd_sched(bd_t *, bd_queue_t *); 255 static void bd_submit(bd_t *, bd_xfer_impl_t *); 256 static void bd_runq_exit(bd_xfer_impl_t *, int); 257 static void bd_update_state(bd_t *); 258 static int bd_check_state(bd_t *, enum dkio_state *); 259 static int bd_flush_write_cache(bd_t *, struct dk_callback *); 260 static int bd_check_uio(dev_t, struct uio *); 261 262 struct cmlb_tg_ops bd_tg_ops = { 263 TG_DK_OPS_VERSION_1, 264 bd_tg_rdwr, 265 bd_tg_getinfo, 266 }; 267 268 static struct cb_ops bd_cb_ops = { 269 bd_open, /* open */ 270 bd_close, /* close */ 271 bd_strategy, /* strategy */ 272 nodev, /* print */ 273 bd_dump, /* dump */ 274 bd_read, /* read */ 275 bd_write, /* write */ 276 bd_ioctl, /* ioctl */ 277 nodev, /* devmap */ 278 nodev, /* mmap */ 279 nodev, /* segmap */ 280 nochpoll, /* poll */ 281 bd_prop_op, /* cb_prop_op */ 282 0, /* streamtab */ 283 D_64BIT | D_MP, /* Driver comaptibility flag */ 284 CB_REV, /* cb_rev */ 285 bd_aread, /* async read */ 286 bd_awrite /* async write */ 287 }; 288 289 struct dev_ops bd_dev_ops = { 290 DEVO_REV, /* devo_rev, */ 291 0, /* refcnt */ 292 bd_getinfo, /* getinfo */ 293 nulldev, /* identify */ 294 nulldev, /* probe */ 295 bd_attach, /* attach */ 296 bd_detach, /* detach */ 297 nodev, /* reset */ 298 &bd_cb_ops, /* driver operations */ 299 NULL, /* bus operations */ 300 NULL, /* power */ 301 ddi_quiesce_not_needed, /* quiesce */ 302 }; 303 304 static struct modldrv modldrv = { 305 &mod_driverops, 306 "Generic Block Device", 307 &bd_dev_ops, 308 }; 309 310 static struct modlinkage modlinkage = { 311 MODREV_1, { &modldrv, NULL } 312 }; 313 314 static void *bd_state; 315 static krwlock_t bd_lock; 316 317 int 318 _init(void) 319 { 320 int rv; 321 322 rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2); 323 if (rv != DDI_SUCCESS) { 324 return (rv); 325 } 326 rw_init(&bd_lock, NULL, RW_DRIVER, NULL); 327 rv = mod_install(&modlinkage); 328 if (rv != DDI_SUCCESS) { 329 rw_destroy(&bd_lock); 330 ddi_soft_state_fini(&bd_state); 331 } 332 return (rv); 333 } 334 335 int 336 _fini(void) 337 { 338 int rv; 339 340 rv = mod_remove(&modlinkage); 341 if (rv == DDI_SUCCESS) { 342 rw_destroy(&bd_lock); 343 ddi_soft_state_fini(&bd_state); 344 } 345 return (rv); 346 } 347 348 int 349 _info(struct modinfo *modinfop) 350 { 351 return (mod_info(&modlinkage, modinfop)); 352 } 353 354 static int 355 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 356 { 357 bd_t *bd; 358 minor_t inst; 359 360 _NOTE(ARGUNUSED(dip)); 361 362 inst = BDINST((dev_t)arg); 363 364 switch (cmd) { 365 case DDI_INFO_DEVT2DEVINFO: 366 bd = ddi_get_soft_state(bd_state, inst); 367 if (bd == NULL) { 368 return (DDI_FAILURE); 369 } 370 *resultp = (void *)bd->d_dip; 371 break; 372 373 case DDI_INFO_DEVT2INSTANCE: 374 *resultp = (void *)(intptr_t)inst; 375 break; 376 377 default: 378 return (DDI_FAILURE); 379 } 380 return (DDI_SUCCESS); 381 } 382 383 static void 384 bd_prop_update_inqstring(dev_info_t *dip, char *name, char *data, size_t len) 385 { 386 int ilen; 387 char *data_string; 388 389 ilen = scsi_ascii_inquiry_len(data, len); 390 ASSERT3U(ilen, <=, len); 391 if (ilen <= 0) 392 return; 393 /* ensure null termination */ 394 data_string = kmem_zalloc(ilen + 1, KM_SLEEP); 395 bcopy(data, data_string, ilen); 396 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, name, data_string); 397 kmem_free(data_string, ilen + 1); 398 } 399 400 static void 401 bd_create_inquiry_props(dev_info_t *dip, bd_drive_t *drive) 402 { 403 if (drive->d_vendor_len > 0) 404 bd_prop_update_inqstring(dip, INQUIRY_VENDOR_ID, 405 drive->d_vendor, drive->d_vendor_len); 406 407 if (drive->d_product_len > 0) 408 bd_prop_update_inqstring(dip, INQUIRY_PRODUCT_ID, 409 drive->d_product, drive->d_product_len); 410 411 if (drive->d_serial_len > 0) 412 bd_prop_update_inqstring(dip, INQUIRY_SERIAL_NO, 413 drive->d_serial, drive->d_serial_len); 414 415 if (drive->d_revision_len > 0) 416 bd_prop_update_inqstring(dip, INQUIRY_REVISION_ID, 417 drive->d_revision, drive->d_revision_len); 418 } 419 420 static void 421 bd_create_errstats(bd_t *bd, int inst, bd_drive_t *drive) 422 { 423 char ks_module[KSTAT_STRLEN]; 424 char ks_name[KSTAT_STRLEN]; 425 int ndata = sizeof (struct bd_errstats) / sizeof (kstat_named_t); 426 427 if (bd->d_errstats != NULL) 428 return; 429 430 (void) snprintf(ks_module, sizeof (ks_module), "%serr", 431 ddi_driver_name(bd->d_dip)); 432 (void) snprintf(ks_name, sizeof (ks_name), "%s%d,err", 433 ddi_driver_name(bd->d_dip), inst); 434 435 bd->d_errstats = kstat_create(ks_module, inst, ks_name, "device_error", 436 KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT); 437 438 mutex_init(&bd->d_errmutex, NULL, MUTEX_DRIVER, NULL); 439 if (bd->d_errstats == NULL) { 440 /* 441 * Even if we cannot create the kstat, we create a 442 * scratch kstat. The reason for this is to ensure 443 * that we can update the kstat all of the time, 444 * without adding an extra branch instruction. 445 */ 446 bd->d_kerr = kmem_zalloc(sizeof (struct bd_errstats), 447 KM_SLEEP); 448 } else { 449 bd->d_errstats->ks_lock = &bd->d_errmutex; 450 bd->d_kerr = (struct bd_errstats *)bd->d_errstats->ks_data; 451 } 452 453 kstat_named_init(&bd->d_kerr->bd_softerrs, "Soft Errors", 454 KSTAT_DATA_UINT32); 455 kstat_named_init(&bd->d_kerr->bd_harderrs, "Hard Errors", 456 KSTAT_DATA_UINT32); 457 kstat_named_init(&bd->d_kerr->bd_transerrs, "Transport Errors", 458 KSTAT_DATA_UINT32); 459 460 if (drive->d_model_len > 0) { 461 kstat_named_init(&bd->d_kerr->bd_model, "Model", 462 KSTAT_DATA_STRING); 463 } else { 464 kstat_named_init(&bd->d_kerr->bd_vid, "Vendor", 465 KSTAT_DATA_STRING); 466 kstat_named_init(&bd->d_kerr->bd_pid, "Product", 467 KSTAT_DATA_STRING); 468 } 469 470 kstat_named_init(&bd->d_kerr->bd_revision, "Revision", 471 KSTAT_DATA_STRING); 472 kstat_named_init(&bd->d_kerr->bd_serial, "Serial No", 473 KSTAT_DATA_STRING); 474 kstat_named_init(&bd->d_kerr->bd_capacity, "Size", 475 KSTAT_DATA_ULONGLONG); 476 kstat_named_init(&bd->d_kerr->bd_rq_media_err, "Media Error", 477 KSTAT_DATA_UINT32); 478 kstat_named_init(&bd->d_kerr->bd_rq_ntrdy_err, "Device Not Ready", 479 KSTAT_DATA_UINT32); 480 kstat_named_init(&bd->d_kerr->bd_rq_nodev_err, "No Device", 481 KSTAT_DATA_UINT32); 482 kstat_named_init(&bd->d_kerr->bd_rq_recov_err, "Recoverable", 483 KSTAT_DATA_UINT32); 484 kstat_named_init(&bd->d_kerr->bd_rq_illrq_err, "Illegal Request", 485 KSTAT_DATA_UINT32); 486 kstat_named_init(&bd->d_kerr->bd_rq_pfa_err, 487 "Predictive Failure Analysis", KSTAT_DATA_UINT32); 488 489 bd->d_errstats->ks_private = bd; 490 491 kstat_install(bd->d_errstats); 492 } 493 494 static void 495 bd_errstats_setstr(kstat_named_t *k, char *str, size_t len, char *alt) 496 { 497 char *tmp; 498 size_t km_len; 499 500 if (KSTAT_NAMED_STR_PTR(k) == NULL) { 501 if (len > 0) 502 km_len = strnlen(str, len); 503 else if (alt != NULL) 504 km_len = strlen(alt); 505 else 506 return; 507 508 tmp = kmem_alloc(km_len + 1, KM_SLEEP); 509 bcopy(len > 0 ? str : alt, tmp, km_len); 510 tmp[km_len] = '\0'; 511 512 kstat_named_setstr(k, tmp); 513 } 514 } 515 516 static void 517 bd_errstats_clrstr(kstat_named_t *k) 518 { 519 if (KSTAT_NAMED_STR_PTR(k) == NULL) 520 return; 521 522 kmem_free(KSTAT_NAMED_STR_PTR(k), KSTAT_NAMED_STR_BUFLEN(k)); 523 kstat_named_setstr(k, NULL); 524 } 525 526 static void 527 bd_init_errstats(bd_t *bd, bd_drive_t *drive) 528 { 529 struct bd_errstats *est = bd->d_kerr; 530 531 mutex_enter(&bd->d_errmutex); 532 533 if (drive->d_model_len > 0 && 534 KSTAT_NAMED_STR_PTR(&est->bd_model) == NULL) { 535 bd_errstats_setstr(&est->bd_model, drive->d_model, 536 drive->d_model_len, NULL); 537 } else { 538 bd_errstats_setstr(&est->bd_vid, drive->d_vendor, 539 drive->d_vendor_len, "Unknown "); 540 bd_errstats_setstr(&est->bd_pid, drive->d_product, 541 drive->d_product_len, "Unknown "); 542 } 543 544 bd_errstats_setstr(&est->bd_revision, drive->d_revision, 545 drive->d_revision_len, "0001"); 546 bd_errstats_setstr(&est->bd_serial, drive->d_serial, 547 drive->d_serial_len, "0 "); 548 549 mutex_exit(&bd->d_errmutex); 550 } 551 552 static void 553 bd_fini_errstats(bd_t *bd) 554 { 555 struct bd_errstats *est = bd->d_kerr; 556 557 mutex_enter(&bd->d_errmutex); 558 559 bd_errstats_clrstr(&est->bd_model); 560 bd_errstats_clrstr(&est->bd_vid); 561 bd_errstats_clrstr(&est->bd_pid); 562 bd_errstats_clrstr(&est->bd_revision); 563 bd_errstats_clrstr(&est->bd_serial); 564 565 mutex_exit(&bd->d_errmutex); 566 } 567 568 static void 569 bd_queues_free(bd_t *bd) 570 { 571 uint32_t i; 572 573 for (i = 0; i < bd->d_qcount; i++) { 574 bd_queue_t *bq = &bd->d_queues[i]; 575 576 mutex_destroy(&bq->q_iomutex); 577 list_destroy(&bq->q_waitq); 578 list_destroy(&bq->q_runq); 579 } 580 581 kmem_free(bd->d_queues, sizeof (*bd->d_queues) * bd->d_qcount); 582 } 583 584 static int 585 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 586 { 587 int inst; 588 bd_handle_t hdl; 589 bd_t *bd; 590 bd_drive_t drive; 591 uint32_t i; 592 int rv; 593 char name[16]; 594 char kcache[32]; 595 596 switch (cmd) { 597 case DDI_ATTACH: 598 break; 599 case DDI_RESUME: 600 /* We don't do anything native for suspend/resume */ 601 return (DDI_SUCCESS); 602 default: 603 return (DDI_FAILURE); 604 } 605 606 inst = ddi_get_instance(dip); 607 hdl = ddi_get_parent_data(dip); 608 609 (void) snprintf(name, sizeof (name), "%s%d", 610 ddi_driver_name(dip), ddi_get_instance(dip)); 611 (void) snprintf(kcache, sizeof (kcache), "%s_xfer", name); 612 613 if (hdl == NULL) { 614 cmn_err(CE_WARN, "%s: missing parent data!", name); 615 return (DDI_FAILURE); 616 } 617 618 if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) { 619 cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name); 620 return (DDI_FAILURE); 621 } 622 bd = ddi_get_soft_state(bd_state, inst); 623 624 if (hdl->h_dma) { 625 bd->d_dma = *(hdl->h_dma); 626 bd->d_dma.dma_attr_granular = 627 max(DEV_BSIZE, bd->d_dma.dma_attr_granular); 628 bd->d_use_dma = B_TRUE; 629 630 if (bd->d_maxxfer && 631 (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) { 632 cmn_err(CE_WARN, 633 "%s: inconsistent maximum transfer size!", 634 name); 635 /* We force it */ 636 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 637 } else { 638 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 639 } 640 } else { 641 bd->d_use_dma = B_FALSE; 642 if (bd->d_maxxfer == 0) { 643 bd->d_maxxfer = 1024 * 1024; 644 } 645 } 646 bd->d_ops = hdl->h_ops; 647 bd->d_private = hdl->h_private; 648 bd->d_blkshift = 9; /* 512 bytes, to start */ 649 650 if (bd->d_maxxfer % DEV_BSIZE) { 651 cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name); 652 bd->d_maxxfer &= ~(DEV_BSIZE - 1); 653 } 654 if (bd->d_maxxfer < DEV_BSIZE) { 655 cmn_err(CE_WARN, "%s: maximum transfer size too small!", name); 656 ddi_soft_state_free(bd_state, inst); 657 return (DDI_FAILURE); 658 } 659 660 bd->d_dip = dip; 661 bd->d_handle = hdl; 662 hdl->h_bd = bd; 663 ddi_set_driver_private(dip, bd); 664 665 mutex_init(&bd->d_ksmutex, NULL, MUTEX_DRIVER, NULL); 666 mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL); 667 mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL); 668 cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL); 669 670 bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8, 671 bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0); 672 673 bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk", 674 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT); 675 if (bd->d_ksp != NULL) { 676 bd->d_ksp->ks_lock = &bd->d_ksmutex; 677 kstat_install(bd->d_ksp); 678 bd->d_kiop = bd->d_ksp->ks_data; 679 } else { 680 /* 681 * Even if we cannot create the kstat, we create a 682 * scratch kstat. The reason for this is to ensure 683 * that we can update the kstat all of the time, 684 * without adding an extra branch instruction. 685 */ 686 bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP); 687 } 688 689 cmlb_alloc_handle(&bd->d_cmlbh); 690 691 bd->d_state = DKIO_NONE; 692 693 bzero(&drive, sizeof (drive)); 694 /* 695 * Default to one queue, parent driver can override. 696 */ 697 drive.d_qcount = 1; 698 bd->d_ops.o_drive_info(bd->d_private, &drive); 699 bd->d_qcount = drive.d_qcount; 700 bd->d_removable = drive.d_removable; 701 bd->d_hotpluggable = drive.d_hotpluggable; 702 703 if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer) 704 bd->d_maxxfer = drive.d_maxxfer; 705 706 bd_create_inquiry_props(dip, &drive); 707 708 bd_create_errstats(bd, inst, &drive); 709 bd_init_errstats(bd, &drive); 710 bd_update_state(bd); 711 712 bd->d_queues = kmem_alloc(sizeof (*bd->d_queues) * bd->d_qcount, 713 KM_SLEEP); 714 for (i = 0; i < bd->d_qcount; i++) { 715 bd_queue_t *bq = &bd->d_queues[i]; 716 717 bq->q_qsize = drive.d_qsize; 718 bq->q_qactive = 0; 719 mutex_init(&bq->q_iomutex, NULL, MUTEX_DRIVER, NULL); 720 721 list_create(&bq->q_waitq, sizeof (bd_xfer_impl_t), 722 offsetof(struct bd_xfer_impl, i_linkage)); 723 list_create(&bq->q_runq, sizeof (bd_xfer_impl_t), 724 offsetof(struct bd_xfer_impl, i_linkage)); 725 } 726 727 rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT, 728 bd->d_removable, bd->d_hotpluggable, 729 /*LINTED: E_BAD_PTR_CAST_ALIGN*/ 730 *(uint64_t *)drive.d_eui64 != 0 ? DDI_NT_BLOCK_BLKDEV : 731 drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK, 732 CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0); 733 if (rv != 0) { 734 cmlb_free_handle(&bd->d_cmlbh); 735 kmem_cache_destroy(bd->d_cache); 736 mutex_destroy(&bd->d_ksmutex); 737 mutex_destroy(&bd->d_ocmutex); 738 mutex_destroy(&bd->d_statemutex); 739 cv_destroy(&bd->d_statecv); 740 bd_queues_free(bd); 741 if (bd->d_ksp != NULL) { 742 kstat_delete(bd->d_ksp); 743 bd->d_ksp = NULL; 744 } else { 745 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 746 } 747 ddi_soft_state_free(bd_state, inst); 748 return (DDI_FAILURE); 749 } 750 751 if (bd->d_ops.o_devid_init != NULL) { 752 rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid); 753 if (rv == DDI_SUCCESS) { 754 if (ddi_devid_register(dip, bd->d_devid) != 755 DDI_SUCCESS) { 756 cmn_err(CE_WARN, 757 "%s: unable to register devid", name); 758 } 759 } 760 } 761 762 /* 763 * Add a zero-length attribute to tell the world we support 764 * kernel ioctls (for layered drivers). Also set up properties 765 * used by HAL to identify removable media. 766 */ 767 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 768 DDI_KERNEL_IOCTL, NULL, 0); 769 if (bd->d_removable) { 770 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 771 "removable-media", NULL, 0); 772 } 773 if (bd->d_hotpluggable) { 774 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 775 "hotpluggable", NULL, 0); 776 } 777 778 ddi_report_dev(dip); 779 780 return (DDI_SUCCESS); 781 } 782 783 static int 784 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 785 { 786 bd_t *bd; 787 788 bd = ddi_get_driver_private(dip); 789 790 switch (cmd) { 791 case DDI_DETACH: 792 break; 793 case DDI_SUSPEND: 794 /* We don't suspend, but our parent does */ 795 return (DDI_SUCCESS); 796 default: 797 return (DDI_FAILURE); 798 } 799 if (bd->d_ksp != NULL) { 800 kstat_delete(bd->d_ksp); 801 bd->d_ksp = NULL; 802 } else { 803 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 804 } 805 806 if (bd->d_errstats != NULL) { 807 bd_fini_errstats(bd); 808 kstat_delete(bd->d_errstats); 809 bd->d_errstats = NULL; 810 } else { 811 kmem_free(bd->d_kerr, sizeof (struct bd_errstats)); 812 mutex_destroy(&bd->d_errmutex); 813 } 814 815 cmlb_detach(bd->d_cmlbh, 0); 816 cmlb_free_handle(&bd->d_cmlbh); 817 if (bd->d_devid) 818 ddi_devid_free(bd->d_devid); 819 kmem_cache_destroy(bd->d_cache); 820 mutex_destroy(&bd->d_ksmutex); 821 mutex_destroy(&bd->d_ocmutex); 822 mutex_destroy(&bd->d_statemutex); 823 cv_destroy(&bd->d_statecv); 824 bd_queues_free(bd); 825 ddi_soft_state_free(bd_state, ddi_get_instance(dip)); 826 return (DDI_SUCCESS); 827 } 828 829 static int 830 bd_xfer_ctor(void *buf, void *arg, int kmflag) 831 { 832 bd_xfer_impl_t *xi; 833 bd_t *bd = arg; 834 int (*dcb)(caddr_t); 835 836 if (kmflag == KM_PUSHPAGE || kmflag == KM_SLEEP) { 837 dcb = DDI_DMA_SLEEP; 838 } else { 839 dcb = DDI_DMA_DONTWAIT; 840 } 841 842 xi = buf; 843 bzero(xi, sizeof (*xi)); 844 xi->i_bd = bd; 845 846 if (bd->d_use_dma) { 847 if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL, 848 &xi->i_dmah) != DDI_SUCCESS) { 849 return (-1); 850 } 851 } 852 853 return (0); 854 } 855 856 static void 857 bd_xfer_dtor(void *buf, void *arg) 858 { 859 bd_xfer_impl_t *xi = buf; 860 861 _NOTE(ARGUNUSED(arg)); 862 863 if (xi->i_dmah) 864 ddi_dma_free_handle(&xi->i_dmah); 865 xi->i_dmah = NULL; 866 } 867 868 static bd_xfer_impl_t * 869 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *), 870 int kmflag) 871 { 872 bd_xfer_impl_t *xi; 873 int rv = 0; 874 int status; 875 unsigned dir; 876 int (*cb)(caddr_t); 877 size_t len; 878 uint32_t shift; 879 880 if (kmflag == KM_SLEEP) { 881 cb = DDI_DMA_SLEEP; 882 } else { 883 cb = DDI_DMA_DONTWAIT; 884 } 885 886 xi = kmem_cache_alloc(bd->d_cache, kmflag); 887 if (xi == NULL) { 888 bioerror(bp, ENOMEM); 889 return (NULL); 890 } 891 892 ASSERT(bp); 893 894 xi->i_bp = bp; 895 xi->i_func = func; 896 xi->i_blkno = bp->b_lblkno >> (bd->d_blkshift - DEV_BSHIFT); 897 898 if (bp->b_bcount == 0) { 899 xi->i_len = 0; 900 xi->i_nblks = 0; 901 xi->i_kaddr = NULL; 902 xi->i_resid = 0; 903 xi->i_num_win = 0; 904 goto done; 905 } 906 907 if (bp->b_flags & B_READ) { 908 dir = DDI_DMA_READ; 909 xi->i_func = bd->d_ops.o_read; 910 } else { 911 dir = DDI_DMA_WRITE; 912 xi->i_func = bd->d_ops.o_write; 913 } 914 915 shift = bd->d_blkshift; 916 xi->i_blkshift = shift; 917 918 if (!bd->d_use_dma) { 919 bp_mapin(bp); 920 rv = 0; 921 xi->i_offset = 0; 922 xi->i_num_win = 923 (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer; 924 xi->i_cur_win = 0; 925 xi->i_len = min(bp->b_bcount, bd->d_maxxfer); 926 xi->i_nblks = xi->i_len >> shift; 927 xi->i_kaddr = bp->b_un.b_addr; 928 xi->i_resid = bp->b_bcount; 929 } else { 930 931 /* 932 * We have to use consistent DMA if the address is misaligned. 933 */ 934 if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) && 935 ((uintptr_t)bp->b_un.b_addr & 0x7)) { 936 dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL; 937 } else { 938 dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 939 } 940 941 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb, 942 NULL, &xi->i_dmac, &xi->i_ndmac); 943 switch (status) { 944 case DDI_DMA_MAPPED: 945 xi->i_num_win = 1; 946 xi->i_cur_win = 0; 947 xi->i_offset = 0; 948 xi->i_len = bp->b_bcount; 949 xi->i_nblks = xi->i_len >> shift; 950 xi->i_resid = bp->b_bcount; 951 rv = 0; 952 break; 953 case DDI_DMA_PARTIAL_MAP: 954 xi->i_cur_win = 0; 955 956 if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) != 957 DDI_SUCCESS) || 958 (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset, 959 &len, &xi->i_dmac, &xi->i_ndmac) != 960 DDI_SUCCESS) || 961 (P2PHASE(len, (1U << shift)) != 0)) { 962 (void) ddi_dma_unbind_handle(xi->i_dmah); 963 rv = EFAULT; 964 goto done; 965 } 966 xi->i_len = len; 967 xi->i_nblks = xi->i_len >> shift; 968 xi->i_resid = bp->b_bcount; 969 rv = 0; 970 break; 971 case DDI_DMA_NORESOURCES: 972 rv = EAGAIN; 973 goto done; 974 case DDI_DMA_TOOBIG: 975 rv = EINVAL; 976 goto done; 977 case DDI_DMA_NOMAPPING: 978 case DDI_DMA_INUSE: 979 default: 980 rv = EFAULT; 981 goto done; 982 } 983 } 984 985 done: 986 if (rv != 0) { 987 kmem_cache_free(bd->d_cache, xi); 988 bioerror(bp, rv); 989 return (NULL); 990 } 991 992 return (xi); 993 } 994 995 static void 996 bd_xfer_free(bd_xfer_impl_t *xi) 997 { 998 if (xi->i_dmah) { 999 (void) ddi_dma_unbind_handle(xi->i_dmah); 1000 } 1001 kmem_cache_free(xi->i_bd->d_cache, xi); 1002 } 1003 1004 static int 1005 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp) 1006 { 1007 dev_t dev = *devp; 1008 bd_t *bd; 1009 minor_t part; 1010 minor_t inst; 1011 uint64_t mask; 1012 boolean_t ndelay; 1013 int rv; 1014 diskaddr_t nblks; 1015 diskaddr_t lba; 1016 1017 _NOTE(ARGUNUSED(credp)); 1018 1019 part = BDPART(dev); 1020 inst = BDINST(dev); 1021 1022 if (otyp >= OTYPCNT) 1023 return (EINVAL); 1024 1025 ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE; 1026 1027 /* 1028 * Block any DR events from changing the set of registered 1029 * devices while we function. 1030 */ 1031 rw_enter(&bd_lock, RW_READER); 1032 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1033 rw_exit(&bd_lock); 1034 return (ENXIO); 1035 } 1036 1037 mutex_enter(&bd->d_ocmutex); 1038 1039 ASSERT(part < 64); 1040 mask = (1U << part); 1041 1042 bd_update_state(bd); 1043 1044 if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) { 1045 1046 /* non-blocking opens are allowed to succeed */ 1047 if (!ndelay) { 1048 rv = ENXIO; 1049 goto done; 1050 } 1051 } else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba, 1052 NULL, NULL, 0) == 0) { 1053 1054 /* 1055 * We read the partinfo, verify valid ranges. If the 1056 * partition is invalid, and we aren't blocking or 1057 * doing a raw access, then fail. (Non-blocking and 1058 * raw accesses can still succeed to allow a disk with 1059 * bad partition data to opened by format and fdisk.) 1060 */ 1061 if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) { 1062 rv = ENXIO; 1063 goto done; 1064 } 1065 } else if (!ndelay) { 1066 /* 1067 * cmlb_partinfo failed -- invalid partition or no 1068 * disk label. 1069 */ 1070 rv = ENXIO; 1071 goto done; 1072 } 1073 1074 if ((flag & FWRITE) && bd->d_rdonly) { 1075 rv = EROFS; 1076 goto done; 1077 } 1078 1079 if ((bd->d_open_excl) & (mask)) { 1080 rv = EBUSY; 1081 goto done; 1082 } 1083 if (flag & FEXCL) { 1084 if (bd->d_open_lyr[part]) { 1085 rv = EBUSY; 1086 goto done; 1087 } 1088 for (int i = 0; i < OTYP_LYR; i++) { 1089 if (bd->d_open_reg[i] & mask) { 1090 rv = EBUSY; 1091 goto done; 1092 } 1093 } 1094 } 1095 1096 if (otyp == OTYP_LYR) { 1097 bd->d_open_lyr[part]++; 1098 } else { 1099 bd->d_open_reg[otyp] |= mask; 1100 } 1101 if (flag & FEXCL) { 1102 bd->d_open_excl |= mask; 1103 } 1104 1105 rv = 0; 1106 done: 1107 mutex_exit(&bd->d_ocmutex); 1108 rw_exit(&bd_lock); 1109 1110 return (rv); 1111 } 1112 1113 static int 1114 bd_close(dev_t dev, int flag, int otyp, cred_t *credp) 1115 { 1116 bd_t *bd; 1117 minor_t inst; 1118 minor_t part; 1119 uint64_t mask; 1120 boolean_t last = B_TRUE; 1121 1122 _NOTE(ARGUNUSED(flag)); 1123 _NOTE(ARGUNUSED(credp)); 1124 1125 part = BDPART(dev); 1126 inst = BDINST(dev); 1127 1128 ASSERT(part < 64); 1129 mask = (1U << part); 1130 1131 rw_enter(&bd_lock, RW_READER); 1132 1133 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1134 rw_exit(&bd_lock); 1135 return (ENXIO); 1136 } 1137 1138 mutex_enter(&bd->d_ocmutex); 1139 if (bd->d_open_excl & mask) { 1140 bd->d_open_excl &= ~mask; 1141 } 1142 if (otyp == OTYP_LYR) { 1143 bd->d_open_lyr[part]--; 1144 } else { 1145 bd->d_open_reg[otyp] &= ~mask; 1146 } 1147 for (int i = 0; i < 64; i++) { 1148 if (bd->d_open_lyr[part]) { 1149 last = B_FALSE; 1150 } 1151 } 1152 for (int i = 0; last && (i < OTYP_LYR); i++) { 1153 if (bd->d_open_reg[i]) { 1154 last = B_FALSE; 1155 } 1156 } 1157 mutex_exit(&bd->d_ocmutex); 1158 1159 if (last) { 1160 cmlb_invalidate(bd->d_cmlbh, 0); 1161 } 1162 rw_exit(&bd_lock); 1163 1164 return (0); 1165 } 1166 1167 static int 1168 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk) 1169 { 1170 minor_t inst; 1171 minor_t part; 1172 diskaddr_t pstart; 1173 diskaddr_t psize; 1174 bd_t *bd; 1175 bd_xfer_impl_t *xi; 1176 buf_t *bp; 1177 int rv; 1178 uint32_t shift; 1179 daddr_t d_blkno; 1180 int d_nblk; 1181 1182 rw_enter(&bd_lock, RW_READER); 1183 1184 part = BDPART(dev); 1185 inst = BDINST(dev); 1186 1187 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1188 rw_exit(&bd_lock); 1189 return (ENXIO); 1190 } 1191 shift = bd->d_blkshift; 1192 d_blkno = blkno >> (shift - DEV_BSHIFT); 1193 d_nblk = nblk >> (shift - DEV_BSHIFT); 1194 /* 1195 * do cmlb, but do it synchronously unless we already have the 1196 * partition (which we probably should.) 1197 */ 1198 if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL, 1199 (void *)1)) { 1200 rw_exit(&bd_lock); 1201 return (ENXIO); 1202 } 1203 1204 if ((d_blkno + d_nblk) > psize) { 1205 rw_exit(&bd_lock); 1206 return (EINVAL); 1207 } 1208 bp = getrbuf(KM_NOSLEEP); 1209 if (bp == NULL) { 1210 rw_exit(&bd_lock); 1211 return (ENOMEM); 1212 } 1213 1214 bp->b_bcount = nblk << DEV_BSHIFT; 1215 bp->b_resid = bp->b_bcount; 1216 bp->b_lblkno = blkno; 1217 bp->b_un.b_addr = caddr; 1218 1219 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_write, KM_NOSLEEP); 1220 if (xi == NULL) { 1221 rw_exit(&bd_lock); 1222 freerbuf(bp); 1223 return (ENOMEM); 1224 } 1225 xi->i_blkno = d_blkno + pstart; 1226 xi->i_flags = BD_XFER_POLL; 1227 bd_submit(bd, xi); 1228 rw_exit(&bd_lock); 1229 1230 /* 1231 * Generally, we should have run this entirely synchronously 1232 * at this point and the biowait call should be a no-op. If 1233 * it didn't happen this way, it's a bug in the underlying 1234 * driver not honoring BD_XFER_POLL. 1235 */ 1236 (void) biowait(bp); 1237 rv = geterror(bp); 1238 freerbuf(bp); 1239 return (rv); 1240 } 1241 1242 void 1243 bd_minphys(struct buf *bp) 1244 { 1245 minor_t inst; 1246 bd_t *bd; 1247 inst = BDINST(bp->b_edev); 1248 1249 bd = ddi_get_soft_state(bd_state, inst); 1250 1251 /* 1252 * In a non-debug kernel, bd_strategy will catch !bd as 1253 * well, and will fail nicely. 1254 */ 1255 ASSERT(bd); 1256 1257 if (bp->b_bcount > bd->d_maxxfer) 1258 bp->b_bcount = bd->d_maxxfer; 1259 } 1260 1261 static int 1262 bd_check_uio(dev_t dev, struct uio *uio) 1263 { 1264 bd_t *bd; 1265 uint32_t shift; 1266 1267 if ((bd = ddi_get_soft_state(bd_state, BDINST(dev))) == NULL) { 1268 return (ENXIO); 1269 } 1270 1271 shift = bd->d_blkshift; 1272 if ((P2PHASE(uio->uio_loffset, (1U << shift)) != 0) || 1273 (P2PHASE(uio->uio_iov->iov_len, (1U << shift)) != 0)) { 1274 return (EINVAL); 1275 } 1276 1277 return (0); 1278 } 1279 1280 static int 1281 bd_read(dev_t dev, struct uio *uio, cred_t *credp) 1282 { 1283 _NOTE(ARGUNUSED(credp)); 1284 int ret = bd_check_uio(dev, uio); 1285 if (ret != 0) { 1286 return (ret); 1287 } 1288 return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio)); 1289 } 1290 1291 static int 1292 bd_write(dev_t dev, struct uio *uio, cred_t *credp) 1293 { 1294 _NOTE(ARGUNUSED(credp)); 1295 int ret = bd_check_uio(dev, uio); 1296 if (ret != 0) { 1297 return (ret); 1298 } 1299 return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio)); 1300 } 1301 1302 static int 1303 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp) 1304 { 1305 _NOTE(ARGUNUSED(credp)); 1306 int ret = bd_check_uio(dev, aio->aio_uio); 1307 if (ret != 0) { 1308 return (ret); 1309 } 1310 return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio)); 1311 } 1312 1313 static int 1314 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp) 1315 { 1316 _NOTE(ARGUNUSED(credp)); 1317 int ret = bd_check_uio(dev, aio->aio_uio); 1318 if (ret != 0) { 1319 return (ret); 1320 } 1321 return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio)); 1322 } 1323 1324 static int 1325 bd_strategy(struct buf *bp) 1326 { 1327 minor_t inst; 1328 minor_t part; 1329 bd_t *bd; 1330 diskaddr_t p_lba; 1331 diskaddr_t p_nblks; 1332 diskaddr_t b_nblks; 1333 bd_xfer_impl_t *xi; 1334 uint32_t shift; 1335 int (*func)(void *, bd_xfer_t *); 1336 diskaddr_t lblkno; 1337 1338 part = BDPART(bp->b_edev); 1339 inst = BDINST(bp->b_edev); 1340 1341 ASSERT(bp); 1342 1343 bp->b_resid = bp->b_bcount; 1344 1345 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1346 bioerror(bp, ENXIO); 1347 biodone(bp); 1348 return (0); 1349 } 1350 1351 if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba, 1352 NULL, NULL, 0)) { 1353 bioerror(bp, ENXIO); 1354 biodone(bp); 1355 return (0); 1356 } 1357 1358 shift = bd->d_blkshift; 1359 lblkno = bp->b_lblkno >> (shift - DEV_BSHIFT); 1360 if ((P2PHASE(bp->b_lblkno, (1U << (shift - DEV_BSHIFT))) != 0) || 1361 (P2PHASE(bp->b_bcount, (1U << shift)) != 0) || 1362 (lblkno > p_nblks)) { 1363 bioerror(bp, EINVAL); 1364 biodone(bp); 1365 return (0); 1366 } 1367 b_nblks = bp->b_bcount >> shift; 1368 if ((lblkno == p_nblks) || (bp->b_bcount == 0)) { 1369 biodone(bp); 1370 return (0); 1371 } 1372 1373 if ((b_nblks + lblkno) > p_nblks) { 1374 bp->b_resid = ((lblkno + b_nblks - p_nblks) << shift); 1375 bp->b_bcount -= bp->b_resid; 1376 } else { 1377 bp->b_resid = 0; 1378 } 1379 func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write; 1380 1381 xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP); 1382 if (xi == NULL) { 1383 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE); 1384 } 1385 if (xi == NULL) { 1386 /* bd_request_alloc will have done bioerror */ 1387 biodone(bp); 1388 return (0); 1389 } 1390 xi->i_blkno = lblkno + p_lba; 1391 1392 bd_submit(bd, xi); 1393 1394 return (0); 1395 } 1396 1397 static int 1398 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp) 1399 { 1400 minor_t inst; 1401 uint16_t part; 1402 bd_t *bd; 1403 void *ptr = (void *)arg; 1404 int rv; 1405 1406 part = BDPART(dev); 1407 inst = BDINST(dev); 1408 1409 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1410 return (ENXIO); 1411 } 1412 1413 rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0); 1414 if (rv != ENOTTY) 1415 return (rv); 1416 1417 if (rvalp != NULL) { 1418 /* the return value of the ioctl is 0 by default */ 1419 *rvalp = 0; 1420 } 1421 1422 switch (cmd) { 1423 case DKIOCGMEDIAINFO: { 1424 struct dk_minfo minfo; 1425 1426 /* make sure our state information is current */ 1427 bd_update_state(bd); 1428 bzero(&minfo, sizeof (minfo)); 1429 minfo.dki_media_type = DK_FIXED_DISK; 1430 minfo.dki_lbsize = (1U << bd->d_blkshift); 1431 minfo.dki_capacity = bd->d_numblks; 1432 if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) { 1433 return (EFAULT); 1434 } 1435 return (0); 1436 } 1437 case DKIOCGMEDIAINFOEXT: { 1438 struct dk_minfo_ext miext; 1439 1440 /* make sure our state information is current */ 1441 bd_update_state(bd); 1442 bzero(&miext, sizeof (miext)); 1443 miext.dki_media_type = DK_FIXED_DISK; 1444 miext.dki_lbsize = (1U << bd->d_blkshift); 1445 miext.dki_pbsize = (1U << bd->d_pblkshift); 1446 miext.dki_capacity = bd->d_numblks; 1447 if (ddi_copyout(&miext, ptr, sizeof (miext), flag)) { 1448 return (EFAULT); 1449 } 1450 return (0); 1451 } 1452 case DKIOCINFO: { 1453 struct dk_cinfo cinfo; 1454 bzero(&cinfo, sizeof (cinfo)); 1455 cinfo.dki_ctype = DKC_BLKDEV; 1456 cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip)); 1457 (void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname), 1458 "%s", ddi_driver_name(ddi_get_parent(bd->d_dip))); 1459 (void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname), 1460 "%s", ddi_driver_name(bd->d_dip)); 1461 cinfo.dki_unit = inst; 1462 cinfo.dki_flags = DKI_FMTVOL; 1463 cinfo.dki_partition = part; 1464 cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE; 1465 cinfo.dki_addr = 0; 1466 cinfo.dki_slave = 0; 1467 cinfo.dki_space = 0; 1468 cinfo.dki_prio = 0; 1469 cinfo.dki_vec = 0; 1470 if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) { 1471 return (EFAULT); 1472 } 1473 return (0); 1474 } 1475 case DKIOCREMOVABLE: { 1476 int i; 1477 i = bd->d_removable ? 1 : 0; 1478 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1479 return (EFAULT); 1480 } 1481 return (0); 1482 } 1483 case DKIOCHOTPLUGGABLE: { 1484 int i; 1485 i = bd->d_hotpluggable ? 1 : 0; 1486 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1487 return (EFAULT); 1488 } 1489 return (0); 1490 } 1491 case DKIOCREADONLY: { 1492 int i; 1493 i = bd->d_rdonly ? 1 : 0; 1494 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1495 return (EFAULT); 1496 } 1497 return (0); 1498 } 1499 case DKIOCSOLIDSTATE: { 1500 int i; 1501 i = bd->d_ssd ? 1 : 0; 1502 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1503 return (EFAULT); 1504 } 1505 return (0); 1506 } 1507 case DKIOCSTATE: { 1508 enum dkio_state state; 1509 if (ddi_copyin(ptr, &state, sizeof (state), flag)) { 1510 return (EFAULT); 1511 } 1512 if ((rv = bd_check_state(bd, &state)) != 0) { 1513 return (rv); 1514 } 1515 if (ddi_copyout(&state, ptr, sizeof (state), flag)) { 1516 return (EFAULT); 1517 } 1518 return (0); 1519 } 1520 case DKIOCFLUSHWRITECACHE: { 1521 struct dk_callback *dkc = NULL; 1522 1523 if (flag & FKIOCTL) 1524 dkc = (void *)arg; 1525 1526 rv = bd_flush_write_cache(bd, dkc); 1527 return (rv); 1528 } 1529 1530 default: 1531 break; 1532 1533 } 1534 return (ENOTTY); 1535 } 1536 1537 static int 1538 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 1539 char *name, caddr_t valuep, int *lengthp) 1540 { 1541 bd_t *bd; 1542 1543 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1544 if (bd == NULL) 1545 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 1546 name, valuep, lengthp)); 1547 1548 return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name, 1549 valuep, lengthp, BDPART(dev), 0)); 1550 } 1551 1552 1553 static int 1554 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start, 1555 size_t length, void *tg_cookie) 1556 { 1557 bd_t *bd; 1558 buf_t *bp; 1559 bd_xfer_impl_t *xi; 1560 int rv; 1561 int (*func)(void *, bd_xfer_t *); 1562 int kmflag; 1563 1564 /* 1565 * If we are running in polled mode (such as during dump(9e) 1566 * execution), then we cannot sleep for kernel allocations. 1567 */ 1568 kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP; 1569 1570 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1571 1572 if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) { 1573 /* We can only transfer whole blocks at a time! */ 1574 return (EINVAL); 1575 } 1576 1577 if ((bp = getrbuf(kmflag)) == NULL) { 1578 return (ENOMEM); 1579 } 1580 1581 switch (cmd) { 1582 case TG_READ: 1583 bp->b_flags = B_READ; 1584 func = bd->d_ops.o_read; 1585 break; 1586 case TG_WRITE: 1587 bp->b_flags = B_WRITE; 1588 func = bd->d_ops.o_write; 1589 break; 1590 default: 1591 freerbuf(bp); 1592 return (EINVAL); 1593 } 1594 1595 bp->b_un.b_addr = bufaddr; 1596 bp->b_bcount = length; 1597 xi = bd_xfer_alloc(bd, bp, func, kmflag); 1598 if (xi == NULL) { 1599 rv = geterror(bp); 1600 freerbuf(bp); 1601 return (rv); 1602 } 1603 xi->i_flags = tg_cookie ? BD_XFER_POLL : 0; 1604 xi->i_blkno = start; 1605 bd_submit(bd, xi); 1606 (void) biowait(bp); 1607 rv = geterror(bp); 1608 freerbuf(bp); 1609 1610 return (rv); 1611 } 1612 1613 static int 1614 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 1615 { 1616 bd_t *bd; 1617 1618 _NOTE(ARGUNUSED(tg_cookie)); 1619 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1620 1621 switch (cmd) { 1622 case TG_GETPHYGEOM: 1623 case TG_GETVIRTGEOM: 1624 /* 1625 * We don't have any "geometry" as such, let cmlb 1626 * fabricate something. 1627 */ 1628 return (ENOTTY); 1629 1630 case TG_GETCAPACITY: 1631 bd_update_state(bd); 1632 *(diskaddr_t *)arg = bd->d_numblks; 1633 return (0); 1634 1635 case TG_GETBLOCKSIZE: 1636 *(uint32_t *)arg = (1U << bd->d_blkshift); 1637 return (0); 1638 1639 case TG_GETATTR: 1640 /* 1641 * It turns out that cmlb really doesn't do much for 1642 * non-writable media, but lets make the information 1643 * available for it in case it does more in the 1644 * future. (The value is currently used for 1645 * triggering special behavior for CD-ROMs.) 1646 */ 1647 bd_update_state(bd); 1648 ((tg_attribute_t *)arg)->media_is_writable = 1649 bd->d_rdonly ? B_FALSE : B_TRUE; 1650 ((tg_attribute_t *)arg)->media_is_solid_state = bd->d_ssd; 1651 ((tg_attribute_t *)arg)->media_is_rotational = B_FALSE; 1652 return (0); 1653 1654 default: 1655 return (EINVAL); 1656 } 1657 } 1658 1659 1660 static void 1661 bd_sched(bd_t *bd, bd_queue_t *bq) 1662 { 1663 bd_xfer_impl_t *xi; 1664 struct buf *bp; 1665 int rv; 1666 1667 mutex_enter(&bq->q_iomutex); 1668 1669 while ((bq->q_qactive < bq->q_qsize) && 1670 ((xi = list_remove_head(&bq->q_waitq)) != NULL)) { 1671 bq->q_qactive++; 1672 list_insert_tail(&bq->q_runq, xi); 1673 1674 /* 1675 * Submit the job to the driver. We drop the I/O mutex 1676 * so that we can deal with the case where the driver 1677 * completion routine calls back into us synchronously. 1678 */ 1679 1680 mutex_exit(&bq->q_iomutex); 1681 1682 mutex_enter(&bd->d_ksmutex); 1683 kstat_waitq_to_runq(bd->d_kiop); 1684 mutex_exit(&bd->d_ksmutex); 1685 1686 rv = xi->i_func(bd->d_private, &xi->i_public); 1687 if (rv != 0) { 1688 bp = xi->i_bp; 1689 bioerror(bp, rv); 1690 biodone(bp); 1691 1692 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32); 1693 mutex_enter(&bd->d_ksmutex); 1694 kstat_runq_exit(bd->d_kiop); 1695 mutex_exit(&bd->d_ksmutex); 1696 1697 mutex_enter(&bq->q_iomutex); 1698 bq->q_qactive--; 1699 list_remove(&bq->q_runq, xi); 1700 bd_xfer_free(xi); 1701 } else { 1702 mutex_enter(&bq->q_iomutex); 1703 } 1704 } 1705 1706 mutex_exit(&bq->q_iomutex); 1707 } 1708 1709 static void 1710 bd_submit(bd_t *bd, bd_xfer_impl_t *xi) 1711 { 1712 uint64_t nv = atomic_inc_64_nv(&bd->d_io_counter); 1713 unsigned q = nv % bd->d_qcount; 1714 bd_queue_t *bq = &bd->d_queues[q]; 1715 1716 xi->i_bq = bq; 1717 xi->i_qnum = q; 1718 1719 mutex_enter(&bq->q_iomutex); 1720 list_insert_tail(&bq->q_waitq, xi); 1721 mutex_exit(&bq->q_iomutex); 1722 1723 mutex_enter(&bd->d_ksmutex); 1724 kstat_waitq_enter(bd->d_kiop); 1725 mutex_exit(&bd->d_ksmutex); 1726 1727 bd_sched(bd, bq); 1728 } 1729 1730 static void 1731 bd_runq_exit(bd_xfer_impl_t *xi, int err) 1732 { 1733 bd_t *bd = xi->i_bd; 1734 buf_t *bp = xi->i_bp; 1735 bd_queue_t *bq = xi->i_bq; 1736 1737 mutex_enter(&bq->q_iomutex); 1738 bq->q_qactive--; 1739 list_remove(&bq->q_runq, xi); 1740 mutex_exit(&bq->q_iomutex); 1741 1742 mutex_enter(&bd->d_ksmutex); 1743 kstat_runq_exit(bd->d_kiop); 1744 mutex_exit(&bd->d_ksmutex); 1745 1746 if (err == 0) { 1747 if (bp->b_flags & B_READ) { 1748 atomic_inc_uint(&bd->d_kiop->reads); 1749 atomic_add_64((uint64_t *)&bd->d_kiop->nread, 1750 bp->b_bcount - xi->i_resid); 1751 } else { 1752 atomic_inc_uint(&bd->d_kiop->writes); 1753 atomic_add_64((uint64_t *)&bd->d_kiop->nwritten, 1754 bp->b_bcount - xi->i_resid); 1755 } 1756 } 1757 bd_sched(bd, bq); 1758 } 1759 1760 static void 1761 bd_update_state(bd_t *bd) 1762 { 1763 enum dkio_state state = DKIO_INSERTED; 1764 boolean_t docmlb = B_FALSE; 1765 bd_media_t media; 1766 1767 bzero(&media, sizeof (media)); 1768 1769 mutex_enter(&bd->d_statemutex); 1770 if (bd->d_ops.o_media_info(bd->d_private, &media) != 0) { 1771 bd->d_numblks = 0; 1772 state = DKIO_EJECTED; 1773 goto done; 1774 } 1775 1776 if ((media.m_blksize < 512) || 1777 (!ISP2(media.m_blksize)) || 1778 (P2PHASE(bd->d_maxxfer, media.m_blksize))) { 1779 cmn_err(CE_WARN, "%s%d: Invalid media block size (%d)", 1780 ddi_driver_name(bd->d_dip), ddi_get_instance(bd->d_dip), 1781 media.m_blksize); 1782 /* 1783 * We can't use the media, treat it as not present. 1784 */ 1785 state = DKIO_EJECTED; 1786 bd->d_numblks = 0; 1787 goto done; 1788 } 1789 1790 if (((1U << bd->d_blkshift) != media.m_blksize) || 1791 (bd->d_numblks != media.m_nblks)) { 1792 /* Device size changed */ 1793 docmlb = B_TRUE; 1794 } 1795 1796 bd->d_blkshift = ddi_ffs(media.m_blksize) - 1; 1797 bd->d_pblkshift = bd->d_blkshift; 1798 bd->d_numblks = media.m_nblks; 1799 bd->d_rdonly = media.m_readonly; 1800 bd->d_ssd = media.m_solidstate; 1801 1802 /* 1803 * Only use the supplied physical block size if it is non-zero, 1804 * greater or equal to the block size, and a power of 2. Ignore it 1805 * if not, it's just informational and we can still use the media. 1806 */ 1807 if ((media.m_pblksize != 0) && 1808 (media.m_pblksize >= media.m_blksize) && 1809 (ISP2(media.m_pblksize))) 1810 bd->d_pblkshift = ddi_ffs(media.m_pblksize) - 1; 1811 1812 done: 1813 if (state != bd->d_state) { 1814 bd->d_state = state; 1815 cv_broadcast(&bd->d_statecv); 1816 docmlb = B_TRUE; 1817 } 1818 mutex_exit(&bd->d_statemutex); 1819 1820 bd->d_kerr->bd_capacity.value.ui64 = bd->d_numblks << bd->d_blkshift; 1821 1822 if (docmlb) { 1823 if (state == DKIO_INSERTED) { 1824 (void) cmlb_validate(bd->d_cmlbh, 0, 0); 1825 } else { 1826 cmlb_invalidate(bd->d_cmlbh, 0); 1827 } 1828 } 1829 } 1830 1831 static int 1832 bd_check_state(bd_t *bd, enum dkio_state *state) 1833 { 1834 clock_t when; 1835 1836 for (;;) { 1837 1838 bd_update_state(bd); 1839 1840 mutex_enter(&bd->d_statemutex); 1841 1842 if (bd->d_state != *state) { 1843 *state = bd->d_state; 1844 mutex_exit(&bd->d_statemutex); 1845 break; 1846 } 1847 1848 when = drv_usectohz(1000000); 1849 if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex, 1850 when, TR_CLOCK_TICK) == 0) { 1851 mutex_exit(&bd->d_statemutex); 1852 return (EINTR); 1853 } 1854 1855 mutex_exit(&bd->d_statemutex); 1856 } 1857 1858 return (0); 1859 } 1860 1861 static int 1862 bd_flush_write_cache_done(struct buf *bp) 1863 { 1864 struct dk_callback *dc = (void *)bp->b_private; 1865 1866 (*dc->dkc_callback)(dc->dkc_cookie, geterror(bp)); 1867 kmem_free(dc, sizeof (*dc)); 1868 freerbuf(bp); 1869 return (0); 1870 } 1871 1872 static int 1873 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc) 1874 { 1875 buf_t *bp; 1876 struct dk_callback *dc; 1877 bd_xfer_impl_t *xi; 1878 int rv; 1879 1880 if (bd->d_ops.o_sync_cache == NULL) { 1881 return (ENOTSUP); 1882 } 1883 if ((bp = getrbuf(KM_SLEEP)) == NULL) { 1884 return (ENOMEM); 1885 } 1886 bp->b_resid = 0; 1887 bp->b_bcount = 0; 1888 1889 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP); 1890 if (xi == NULL) { 1891 rv = geterror(bp); 1892 freerbuf(bp); 1893 return (rv); 1894 } 1895 1896 /* Make an asynchronous flush, but only if there is a callback */ 1897 if (dkc != NULL && dkc->dkc_callback != NULL) { 1898 /* Make a private copy of the callback structure */ 1899 dc = kmem_alloc(sizeof (*dc), KM_SLEEP); 1900 *dc = *dkc; 1901 bp->b_private = dc; 1902 bp->b_iodone = bd_flush_write_cache_done; 1903 1904 bd_submit(bd, xi); 1905 return (0); 1906 } 1907 1908 /* In case there is no callback, perform a synchronous flush */ 1909 bd_submit(bd, xi); 1910 (void) biowait(bp); 1911 rv = geterror(bp); 1912 freerbuf(bp); 1913 1914 return (rv); 1915 } 1916 1917 /* 1918 * Nexus support. 1919 */ 1920 int 1921 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop, 1922 void *arg, void *result) 1923 { 1924 bd_handle_t hdl; 1925 1926 switch (ctlop) { 1927 case DDI_CTLOPS_REPORTDEV: 1928 cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n", 1929 ddi_node_name(rdip), ddi_get_name_addr(rdip), 1930 ddi_driver_name(rdip), ddi_get_instance(rdip)); 1931 return (DDI_SUCCESS); 1932 1933 case DDI_CTLOPS_INITCHILD: 1934 hdl = ddi_get_parent_data((dev_info_t *)arg); 1935 if (hdl == NULL) { 1936 return (DDI_NOT_WELL_FORMED); 1937 } 1938 ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr); 1939 return (DDI_SUCCESS); 1940 1941 case DDI_CTLOPS_UNINITCHILD: 1942 ddi_set_name_addr((dev_info_t *)arg, NULL); 1943 ndi_prop_remove_all((dev_info_t *)arg); 1944 return (DDI_SUCCESS); 1945 1946 default: 1947 return (ddi_ctlops(dip, rdip, ctlop, arg, result)); 1948 } 1949 } 1950 1951 /* 1952 * Functions for device drivers. 1953 */ 1954 bd_handle_t 1955 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag) 1956 { 1957 bd_handle_t hdl; 1958 1959 /* 1960 * There is full compatability between the version 0 API and the 1961 * current version. 1962 */ 1963 switch (ops->o_version) { 1964 case BD_OPS_VERSION_0: 1965 case BD_OPS_CURRENT_VERSION: 1966 break; 1967 1968 default: 1969 return (NULL); 1970 } 1971 1972 hdl = kmem_zalloc(sizeof (*hdl), kmflag); 1973 if (hdl != NULL) { 1974 hdl->h_ops = *ops; 1975 hdl->h_dma = dma; 1976 hdl->h_private = private; 1977 } 1978 1979 return (hdl); 1980 } 1981 1982 void 1983 bd_free_handle(bd_handle_t hdl) 1984 { 1985 kmem_free(hdl, sizeof (*hdl)); 1986 } 1987 1988 int 1989 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl) 1990 { 1991 dev_info_t *child; 1992 bd_drive_t drive = { 0 }; 1993 1994 /* 1995 * It's not an error if bd_attach_handle() is called on a handle that 1996 * already is attached. We just ignore the request to attach and return. 1997 * This way drivers using blkdev don't have to keep track about blkdev 1998 * state, they can just call this function to make sure it attached. 1999 */ 2000 if (hdl->h_child != NULL) { 2001 return (DDI_SUCCESS); 2002 } 2003 2004 /* if drivers don't override this, make it assume none */ 2005 drive.d_lun = -1; 2006 hdl->h_ops.o_drive_info(hdl->h_private, &drive); 2007 2008 hdl->h_parent = dip; 2009 hdl->h_name = "blkdev"; 2010 2011 /*LINTED: E_BAD_PTR_CAST_ALIGN*/ 2012 if (*(uint64_t *)drive.d_eui64 != 0) { 2013 if (drive.d_lun >= 0) { 2014 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), 2015 "w%02X%02X%02X%02X%02X%02X%02X%02X,%X", 2016 drive.d_eui64[0], drive.d_eui64[1], 2017 drive.d_eui64[2], drive.d_eui64[3], 2018 drive.d_eui64[4], drive.d_eui64[5], 2019 drive.d_eui64[6], drive.d_eui64[7], drive.d_lun); 2020 } else { 2021 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), 2022 "w%02X%02X%02X%02X%02X%02X%02X%02X", 2023 drive.d_eui64[0], drive.d_eui64[1], 2024 drive.d_eui64[2], drive.d_eui64[3], 2025 drive.d_eui64[4], drive.d_eui64[5], 2026 drive.d_eui64[6], drive.d_eui64[7]); 2027 } 2028 } else { 2029 if (drive.d_lun >= 0) { 2030 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), 2031 "%X,%X", drive.d_target, drive.d_lun); 2032 } else { 2033 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), 2034 "%X", drive.d_target); 2035 } 2036 } 2037 2038 if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID, 2039 &child) != NDI_SUCCESS) { 2040 cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s", 2041 ddi_driver_name(dip), ddi_get_instance(dip), 2042 "blkdev", hdl->h_addr); 2043 return (DDI_FAILURE); 2044 } 2045 2046 ddi_set_parent_data(child, hdl); 2047 hdl->h_child = child; 2048 2049 if (ndi_devi_online(child, 0) == NDI_FAILURE) { 2050 cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online", 2051 ddi_driver_name(dip), ddi_get_instance(dip), 2052 hdl->h_name, hdl->h_addr); 2053 (void) ndi_devi_free(child); 2054 return (DDI_FAILURE); 2055 } 2056 2057 return (DDI_SUCCESS); 2058 } 2059 2060 int 2061 bd_detach_handle(bd_handle_t hdl) 2062 { 2063 int circ; 2064 int rv; 2065 char *devnm; 2066 2067 /* 2068 * It's not an error if bd_detach_handle() is called on a handle that 2069 * already is detached. We just ignore the request to detach and return. 2070 * This way drivers using blkdev don't have to keep track about blkdev 2071 * state, they can just call this function to make sure it detached. 2072 */ 2073 if (hdl->h_child == NULL) { 2074 return (DDI_SUCCESS); 2075 } 2076 ndi_devi_enter(hdl->h_parent, &circ); 2077 if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) { 2078 rv = ddi_remove_child(hdl->h_child, 0); 2079 } else { 2080 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); 2081 (void) ddi_deviname(hdl->h_child, devnm); 2082 (void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE); 2083 rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL, 2084 NDI_DEVI_REMOVE | NDI_UNCONFIG); 2085 kmem_free(devnm, MAXNAMELEN + 1); 2086 } 2087 if (rv == 0) { 2088 hdl->h_child = NULL; 2089 } 2090 2091 ndi_devi_exit(hdl->h_parent, circ); 2092 return (rv == NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE); 2093 } 2094 2095 void 2096 bd_xfer_done(bd_xfer_t *xfer, int err) 2097 { 2098 bd_xfer_impl_t *xi = (void *)xfer; 2099 buf_t *bp = xi->i_bp; 2100 int rv = DDI_SUCCESS; 2101 bd_t *bd = xi->i_bd; 2102 size_t len; 2103 2104 if (err != 0) { 2105 bd_runq_exit(xi, err); 2106 atomic_inc_32(&bd->d_kerr->bd_harderrs.value.ui32); 2107 2108 bp->b_resid += xi->i_resid; 2109 bd_xfer_free(xi); 2110 bioerror(bp, err); 2111 biodone(bp); 2112 return; 2113 } 2114 2115 xi->i_cur_win++; 2116 xi->i_resid -= xi->i_len; 2117 2118 if (xi->i_resid == 0) { 2119 /* Job completed succcessfully! */ 2120 bd_runq_exit(xi, 0); 2121 2122 bd_xfer_free(xi); 2123 biodone(bp); 2124 return; 2125 } 2126 2127 xi->i_blkno += xi->i_nblks; 2128 2129 if (bd->d_use_dma) { 2130 /* More transfer still pending... advance to next DMA window. */ 2131 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win, 2132 &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac); 2133 } else { 2134 /* Advance memory window. */ 2135 xi->i_kaddr += xi->i_len; 2136 xi->i_offset += xi->i_len; 2137 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer); 2138 } 2139 2140 2141 if ((rv != DDI_SUCCESS) || 2142 (P2PHASE(len, (1U << xi->i_blkshift)) != 0)) { 2143 bd_runq_exit(xi, EFAULT); 2144 2145 bp->b_resid += xi->i_resid; 2146 bd_xfer_free(xi); 2147 bioerror(bp, EFAULT); 2148 biodone(bp); 2149 return; 2150 } 2151 xi->i_len = len; 2152 xi->i_nblks = len >> xi->i_blkshift; 2153 2154 /* Submit next window to hardware. */ 2155 rv = xi->i_func(bd->d_private, &xi->i_public); 2156 if (rv != 0) { 2157 bd_runq_exit(xi, rv); 2158 2159 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32); 2160 2161 bp->b_resid += xi->i_resid; 2162 bd_xfer_free(xi); 2163 bioerror(bp, rv); 2164 biodone(bp); 2165 } 2166 } 2167 2168 void 2169 bd_error(bd_xfer_t *xfer, int error) 2170 { 2171 bd_xfer_impl_t *xi = (void *)xfer; 2172 bd_t *bd = xi->i_bd; 2173 2174 switch (error) { 2175 case BD_ERR_MEDIA: 2176 atomic_inc_32(&bd->d_kerr->bd_rq_media_err.value.ui32); 2177 break; 2178 case BD_ERR_NTRDY: 2179 atomic_inc_32(&bd->d_kerr->bd_rq_ntrdy_err.value.ui32); 2180 break; 2181 case BD_ERR_NODEV: 2182 atomic_inc_32(&bd->d_kerr->bd_rq_nodev_err.value.ui32); 2183 break; 2184 case BD_ERR_RECOV: 2185 atomic_inc_32(&bd->d_kerr->bd_rq_recov_err.value.ui32); 2186 break; 2187 case BD_ERR_ILLRQ: 2188 atomic_inc_32(&bd->d_kerr->bd_rq_illrq_err.value.ui32); 2189 break; 2190 case BD_ERR_PFA: 2191 atomic_inc_32(&bd->d_kerr->bd_rq_pfa_err.value.ui32); 2192 break; 2193 default: 2194 cmn_err(CE_PANIC, "bd_error: unknown error type %d", error); 2195 break; 2196 } 2197 } 2198 2199 void 2200 bd_state_change(bd_handle_t hdl) 2201 { 2202 bd_t *bd; 2203 2204 if ((bd = hdl->h_bd) != NULL) { 2205 bd_update_state(bd); 2206 } 2207 } 2208 2209 void 2210 bd_mod_init(struct dev_ops *devops) 2211 { 2212 static struct bus_ops bd_bus_ops = { 2213 BUSO_REV, /* busops_rev */ 2214 nullbusmap, /* bus_map */ 2215 NULL, /* bus_get_intrspec (OBSOLETE) */ 2216 NULL, /* bus_add_intrspec (OBSOLETE) */ 2217 NULL, /* bus_remove_intrspec (OBSOLETE) */ 2218 i_ddi_map_fault, /* bus_map_fault */ 2219 NULL, /* bus_dma_map (OBSOLETE) */ 2220 ddi_dma_allochdl, /* bus_dma_allochdl */ 2221 ddi_dma_freehdl, /* bus_dma_freehdl */ 2222 ddi_dma_bindhdl, /* bus_dma_bindhdl */ 2223 ddi_dma_unbindhdl, /* bus_dma_unbindhdl */ 2224 ddi_dma_flush, /* bus_dma_flush */ 2225 ddi_dma_win, /* bus_dma_win */ 2226 ddi_dma_mctl, /* bus_dma_ctl */ 2227 bd_bus_ctl, /* bus_ctl */ 2228 ddi_bus_prop_op, /* bus_prop_op */ 2229 NULL, /* bus_get_eventcookie */ 2230 NULL, /* bus_add_eventcall */ 2231 NULL, /* bus_remove_eventcall */ 2232 NULL, /* bus_post_event */ 2233 NULL, /* bus_intr_ctl (OBSOLETE) */ 2234 NULL, /* bus_config */ 2235 NULL, /* bus_unconfig */ 2236 NULL, /* bus_fm_init */ 2237 NULL, /* bus_fm_fini */ 2238 NULL, /* bus_fm_access_enter */ 2239 NULL, /* bus_fm_access_exit */ 2240 NULL, /* bus_power */ 2241 NULL, /* bus_intr_op */ 2242 }; 2243 2244 devops->devo_bus_ops = &bd_bus_ops; 2245 2246 /* 2247 * NB: The device driver is free to supply its own 2248 * character entry device support. 2249 */ 2250 } 2251 2252 void 2253 bd_mod_fini(struct dev_ops *devops) 2254 { 2255 devops->devo_bus_ops = NULL; 2256 } 2257