1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved. 24 * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved. 25 * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 26 * Copyright 2017 The MathWorks, Inc. All rights reserved. 27 * Copyright 2019 Western Digital Corporation. 28 */ 29 30 #include <sys/types.h> 31 #include <sys/ksynch.h> 32 #include <sys/kmem.h> 33 #include <sys/file.h> 34 #include <sys/errno.h> 35 #include <sys/open.h> 36 #include <sys/buf.h> 37 #include <sys/uio.h> 38 #include <sys/aio_req.h> 39 #include <sys/cred.h> 40 #include <sys/modctl.h> 41 #include <sys/cmlb.h> 42 #include <sys/conf.h> 43 #include <sys/devops.h> 44 #include <sys/list.h> 45 #include <sys/sysmacros.h> 46 #include <sys/dkio.h> 47 #include <sys/vtoc.h> 48 #include <sys/scsi/scsi.h> /* for DTYPE_DIRECT */ 49 #include <sys/kstat.h> 50 #include <sys/fs/dv_node.h> 51 #include <sys/ddi.h> 52 #include <sys/sunddi.h> 53 #include <sys/note.h> 54 #include <sys/blkdev.h> 55 #include <sys/scsi/impl/inquiry.h> 56 57 /* 58 * blkdev is a driver which provides a lot of the common functionality 59 * a block device driver may need and helps by removing code which 60 * is frequently duplicated in block device drivers. 61 * 62 * Within this driver all the struct cb_ops functions required for a 63 * block device driver are written with appropriate call back functions 64 * to be provided by the parent driver. 65 * 66 * To use blkdev, a driver needs to: 67 * 1. Create a bd_ops_t structure which has the call back operations 68 * blkdev will use. 69 * 2. Create a handle by calling bd_alloc_handle(). One of the 70 * arguments to this function is the bd_ops_t. 71 * 3. Call bd_attach_handle(). This will instantiate a blkdev device 72 * as a child device node of the calling driver. 73 * 74 * A parent driver is not restricted to just allocating and attaching a 75 * single instance, it may attach as many as it wishes. For each handle 76 * attached, appropriate entries in /dev/[r]dsk are created. 77 * 78 * The bd_ops_t routines that a parent of blkdev need to provide are: 79 * 80 * o_drive_info: Provide information to blkdev such as how many I/O queues 81 * to create and the size of those queues. Also some device 82 * specifics such as EUI, vendor, product, model, serial 83 * number .... 84 * 85 * o_media_info: Provide information about the media. Eg size and block size. 86 * 87 * o_devid_init: Creates and initializes the device id. Typically calls 88 * ddi_devid_init(). 89 * 90 * o_sync_cache: Issues a device appropriate command to flush any write 91 * caches. 92 * 93 * o_read: Read data as described by bd_xfer_t argument. 94 * 95 * o_write: Write data as described by bd_xfer_t argument. 96 * 97 * 98 * Queues 99 * ------ 100 * Part of the drive_info data is a queue count. blkdev will create 101 * "queue count" number of waitq/runq pairs. Each waitq/runq pair 102 * operates independently. As an I/O is scheduled up to the parent 103 * driver via o_read or o_write its queue number is given. If the 104 * parent driver supports multiple hardware queues it can then select 105 * where to submit the I/O request. 106 * 107 * Currently blkdev uses a simplistic round-robin queue selection method. 108 * It has the advantage that it is lockless. In the future it will be 109 * worthwhile reviewing this strategy for something which prioritizes queues 110 * depending on how busy they are. 111 * 112 * Each waitq/runq pair is protected by its mutex (q_iomutex). Incoming 113 * I/O requests are initially added to the waitq. They are taken off the 114 * waitq, added to the runq and submitted, providing the runq is less 115 * than the qsize as specified in the drive_info. As an I/O request 116 * completes, the parent driver is required to call bd_xfer_done(), which 117 * will remove the I/O request from the runq and pass I/O completion 118 * status up the stack. 119 * 120 * Locks 121 * ----- 122 * There are 4 instance global locks d_ocmutex, d_ksmutex, d_errmutex and 123 * d_statemutex. As well a q_iomutex per waitq/runq pair. 124 * 125 * Lock Hierarchy 126 * -------------- 127 * The only two locks which may be held simultaneously are q_iomutex and 128 * d_ksmutex. In all cases q_iomutex must be acquired before d_ksmutex. 129 */ 130 131 #define BD_MAXPART 64 132 #define BDINST(dev) (getminor(dev) / BD_MAXPART) 133 #define BDPART(dev) (getminor(dev) % BD_MAXPART) 134 135 typedef struct bd bd_t; 136 typedef struct bd_xfer_impl bd_xfer_impl_t; 137 typedef struct bd_queue bd_queue_t; 138 139 struct bd { 140 void *d_private; 141 dev_info_t *d_dip; 142 kmutex_t d_ocmutex; 143 kmutex_t d_ksmutex; 144 kmutex_t d_errmutex; 145 kmutex_t d_statemutex; 146 kcondvar_t d_statecv; 147 enum dkio_state d_state; 148 cmlb_handle_t d_cmlbh; 149 unsigned d_open_lyr[BD_MAXPART]; /* open count */ 150 uint64_t d_open_excl; /* bit mask indexed by partition */ 151 uint64_t d_open_reg[OTYPCNT]; /* bit mask */ 152 uint64_t d_io_counter; 153 154 uint32_t d_qcount; 155 uint32_t d_qactive; 156 uint32_t d_maxxfer; 157 uint32_t d_blkshift; 158 uint32_t d_pblkshift; 159 uint64_t d_numblks; 160 ddi_devid_t d_devid; 161 162 kmem_cache_t *d_cache; 163 bd_queue_t *d_queues; 164 kstat_t *d_ksp; 165 kstat_io_t *d_kiop; 166 kstat_t *d_errstats; 167 struct bd_errstats *d_kerr; 168 169 boolean_t d_rdonly; 170 boolean_t d_ssd; 171 boolean_t d_removable; 172 boolean_t d_hotpluggable; 173 boolean_t d_use_dma; 174 175 ddi_dma_attr_t d_dma; 176 bd_ops_t d_ops; 177 bd_handle_t d_handle; 178 }; 179 180 struct bd_handle { 181 bd_ops_t h_ops; 182 ddi_dma_attr_t *h_dma; 183 dev_info_t *h_parent; 184 dev_info_t *h_child; 185 void *h_private; 186 bd_t *h_bd; 187 char *h_name; 188 char h_addr[30]; /* enough for w%0.16x,%X */ 189 }; 190 191 struct bd_xfer_impl { 192 bd_xfer_t i_public; 193 list_node_t i_linkage; 194 bd_t *i_bd; 195 buf_t *i_bp; 196 bd_queue_t *i_bq; 197 uint_t i_num_win; 198 uint_t i_cur_win; 199 off_t i_offset; 200 int (*i_func)(void *, bd_xfer_t *); 201 uint32_t i_blkshift; 202 size_t i_len; 203 size_t i_resid; 204 }; 205 206 struct bd_queue { 207 kmutex_t q_iomutex; 208 uint32_t q_qsize; 209 uint32_t q_qactive; 210 list_t q_runq; 211 list_t q_waitq; 212 }; 213 214 #define i_dmah i_public.x_dmah 215 #define i_dmac i_public.x_dmac 216 #define i_ndmac i_public.x_ndmac 217 #define i_kaddr i_public.x_kaddr 218 #define i_nblks i_public.x_nblks 219 #define i_blkno i_public.x_blkno 220 #define i_flags i_public.x_flags 221 #define i_qnum i_public.x_qnum 222 223 224 /* 225 * Private prototypes. 226 */ 227 228 static void bd_prop_update_inqstring(dev_info_t *, char *, char *, size_t); 229 static void bd_create_inquiry_props(dev_info_t *, bd_drive_t *); 230 static void bd_create_errstats(bd_t *, int, bd_drive_t *); 231 static void bd_errstats_setstr(kstat_named_t *, char *, size_t, char *); 232 static void bd_init_errstats(bd_t *, bd_drive_t *); 233 234 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 235 static int bd_attach(dev_info_t *, ddi_attach_cmd_t); 236 static int bd_detach(dev_info_t *, ddi_detach_cmd_t); 237 238 static int bd_open(dev_t *, int, int, cred_t *); 239 static int bd_close(dev_t, int, int, cred_t *); 240 static int bd_strategy(struct buf *); 241 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 242 static int bd_dump(dev_t, caddr_t, daddr_t, int); 243 static int bd_read(dev_t, struct uio *, cred_t *); 244 static int bd_write(dev_t, struct uio *, cred_t *); 245 static int bd_aread(dev_t, struct aio_req *, cred_t *); 246 static int bd_awrite(dev_t, struct aio_req *, cred_t *); 247 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, 248 caddr_t, int *); 249 250 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, 251 void *); 252 static int bd_tg_getinfo(dev_info_t *, int, void *, void *); 253 static int bd_xfer_ctor(void *, void *, int); 254 static void bd_xfer_dtor(void *, void *); 255 static void bd_sched(bd_t *, bd_queue_t *); 256 static void bd_submit(bd_t *, bd_xfer_impl_t *); 257 static void bd_runq_exit(bd_xfer_impl_t *, int); 258 static void bd_update_state(bd_t *); 259 static int bd_check_state(bd_t *, enum dkio_state *); 260 static int bd_flush_write_cache(bd_t *, struct dk_callback *); 261 static int bd_check_uio(dev_t, struct uio *); 262 263 struct cmlb_tg_ops bd_tg_ops = { 264 TG_DK_OPS_VERSION_1, 265 bd_tg_rdwr, 266 bd_tg_getinfo, 267 }; 268 269 static struct cb_ops bd_cb_ops = { 270 bd_open, /* open */ 271 bd_close, /* close */ 272 bd_strategy, /* strategy */ 273 nodev, /* print */ 274 bd_dump, /* dump */ 275 bd_read, /* read */ 276 bd_write, /* write */ 277 bd_ioctl, /* ioctl */ 278 nodev, /* devmap */ 279 nodev, /* mmap */ 280 nodev, /* segmap */ 281 nochpoll, /* poll */ 282 bd_prop_op, /* cb_prop_op */ 283 0, /* streamtab */ 284 D_64BIT | D_MP, /* Driver comaptibility flag */ 285 CB_REV, /* cb_rev */ 286 bd_aread, /* async read */ 287 bd_awrite /* async write */ 288 }; 289 290 struct dev_ops bd_dev_ops = { 291 DEVO_REV, /* devo_rev, */ 292 0, /* refcnt */ 293 bd_getinfo, /* getinfo */ 294 nulldev, /* identify */ 295 nulldev, /* probe */ 296 bd_attach, /* attach */ 297 bd_detach, /* detach */ 298 nodev, /* reset */ 299 &bd_cb_ops, /* driver operations */ 300 NULL, /* bus operations */ 301 NULL, /* power */ 302 ddi_quiesce_not_needed, /* quiesce */ 303 }; 304 305 static struct modldrv modldrv = { 306 &mod_driverops, 307 "Generic Block Device", 308 &bd_dev_ops, 309 }; 310 311 static struct modlinkage modlinkage = { 312 MODREV_1, { &modldrv, NULL } 313 }; 314 315 static void *bd_state; 316 static krwlock_t bd_lock; 317 318 int 319 _init(void) 320 { 321 int rv; 322 323 rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2); 324 if (rv != DDI_SUCCESS) { 325 return (rv); 326 } 327 rw_init(&bd_lock, NULL, RW_DRIVER, NULL); 328 rv = mod_install(&modlinkage); 329 if (rv != DDI_SUCCESS) { 330 rw_destroy(&bd_lock); 331 ddi_soft_state_fini(&bd_state); 332 } 333 return (rv); 334 } 335 336 int 337 _fini(void) 338 { 339 int rv; 340 341 rv = mod_remove(&modlinkage); 342 if (rv == DDI_SUCCESS) { 343 rw_destroy(&bd_lock); 344 ddi_soft_state_fini(&bd_state); 345 } 346 return (rv); 347 } 348 349 int 350 _info(struct modinfo *modinfop) 351 { 352 return (mod_info(&modlinkage, modinfop)); 353 } 354 355 static int 356 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 357 { 358 bd_t *bd; 359 minor_t inst; 360 361 _NOTE(ARGUNUSED(dip)); 362 363 inst = BDINST((dev_t)arg); 364 365 switch (cmd) { 366 case DDI_INFO_DEVT2DEVINFO: 367 bd = ddi_get_soft_state(bd_state, inst); 368 if (bd == NULL) { 369 return (DDI_FAILURE); 370 } 371 *resultp = (void *)bd->d_dip; 372 break; 373 374 case DDI_INFO_DEVT2INSTANCE: 375 *resultp = (void *)(intptr_t)inst; 376 break; 377 378 default: 379 return (DDI_FAILURE); 380 } 381 return (DDI_SUCCESS); 382 } 383 384 static void 385 bd_prop_update_inqstring(dev_info_t *dip, char *name, char *data, size_t len) 386 { 387 int ilen; 388 char *data_string; 389 390 ilen = scsi_ascii_inquiry_len(data, len); 391 ASSERT3U(ilen, <=, len); 392 if (ilen <= 0) 393 return; 394 /* ensure null termination */ 395 data_string = kmem_zalloc(ilen + 1, KM_SLEEP); 396 bcopy(data, data_string, ilen); 397 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, name, data_string); 398 kmem_free(data_string, ilen + 1); 399 } 400 401 static void 402 bd_create_inquiry_props(dev_info_t *dip, bd_drive_t *drive) 403 { 404 if (drive->d_vendor_len > 0) 405 bd_prop_update_inqstring(dip, INQUIRY_VENDOR_ID, 406 drive->d_vendor, drive->d_vendor_len); 407 408 if (drive->d_product_len > 0) 409 bd_prop_update_inqstring(dip, INQUIRY_PRODUCT_ID, 410 drive->d_product, drive->d_product_len); 411 412 if (drive->d_serial_len > 0) 413 bd_prop_update_inqstring(dip, INQUIRY_SERIAL_NO, 414 drive->d_serial, drive->d_serial_len); 415 416 if (drive->d_revision_len > 0) 417 bd_prop_update_inqstring(dip, INQUIRY_REVISION_ID, 418 drive->d_revision, drive->d_revision_len); 419 } 420 421 static void 422 bd_create_errstats(bd_t *bd, int inst, bd_drive_t *drive) 423 { 424 char ks_module[KSTAT_STRLEN]; 425 char ks_name[KSTAT_STRLEN]; 426 int ndata = sizeof (struct bd_errstats) / sizeof (kstat_named_t); 427 428 if (bd->d_errstats != NULL) 429 return; 430 431 (void) snprintf(ks_module, sizeof (ks_module), "%serr", 432 ddi_driver_name(bd->d_dip)); 433 (void) snprintf(ks_name, sizeof (ks_name), "%s%d,err", 434 ddi_driver_name(bd->d_dip), inst); 435 436 bd->d_errstats = kstat_create(ks_module, inst, ks_name, "device_error", 437 KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT); 438 439 mutex_init(&bd->d_errmutex, NULL, MUTEX_DRIVER, NULL); 440 if (bd->d_errstats == NULL) { 441 /* 442 * Even if we cannot create the kstat, we create a 443 * scratch kstat. The reason for this is to ensure 444 * that we can update the kstat all of the time, 445 * without adding an extra branch instruction. 446 */ 447 bd->d_kerr = kmem_zalloc(sizeof (struct bd_errstats), 448 KM_SLEEP); 449 } else { 450 bd->d_errstats->ks_lock = &bd->d_errmutex; 451 bd->d_kerr = (struct bd_errstats *)bd->d_errstats->ks_data; 452 } 453 454 kstat_named_init(&bd->d_kerr->bd_softerrs, "Soft Errors", 455 KSTAT_DATA_UINT32); 456 kstat_named_init(&bd->d_kerr->bd_harderrs, "Hard Errors", 457 KSTAT_DATA_UINT32); 458 kstat_named_init(&bd->d_kerr->bd_transerrs, "Transport Errors", 459 KSTAT_DATA_UINT32); 460 461 if (drive->d_model_len > 0) { 462 kstat_named_init(&bd->d_kerr->bd_model, "Model", 463 KSTAT_DATA_STRING); 464 } else { 465 kstat_named_init(&bd->d_kerr->bd_vid, "Vendor", 466 KSTAT_DATA_STRING); 467 kstat_named_init(&bd->d_kerr->bd_pid, "Product", 468 KSTAT_DATA_STRING); 469 } 470 471 kstat_named_init(&bd->d_kerr->bd_revision, "Revision", 472 KSTAT_DATA_STRING); 473 kstat_named_init(&bd->d_kerr->bd_serial, "Serial No", 474 KSTAT_DATA_STRING); 475 kstat_named_init(&bd->d_kerr->bd_capacity, "Size", 476 KSTAT_DATA_ULONGLONG); 477 kstat_named_init(&bd->d_kerr->bd_rq_media_err, "Media Error", 478 KSTAT_DATA_UINT32); 479 kstat_named_init(&bd->d_kerr->bd_rq_ntrdy_err, "Device Not Ready", 480 KSTAT_DATA_UINT32); 481 kstat_named_init(&bd->d_kerr->bd_rq_nodev_err, "No Device", 482 KSTAT_DATA_UINT32); 483 kstat_named_init(&bd->d_kerr->bd_rq_recov_err, "Recoverable", 484 KSTAT_DATA_UINT32); 485 kstat_named_init(&bd->d_kerr->bd_rq_illrq_err, "Illegal Request", 486 KSTAT_DATA_UINT32); 487 kstat_named_init(&bd->d_kerr->bd_rq_pfa_err, 488 "Predictive Failure Analysis", KSTAT_DATA_UINT32); 489 490 bd->d_errstats->ks_private = bd; 491 492 kstat_install(bd->d_errstats); 493 } 494 495 static void 496 bd_errstats_setstr(kstat_named_t *k, char *str, size_t len, char *alt) 497 { 498 char *tmp; 499 size_t km_len; 500 501 if (KSTAT_NAMED_STR_PTR(k) == NULL) { 502 if (len > 0) 503 km_len = strnlen(str, len); 504 else if (alt != NULL) 505 km_len = strlen(alt); 506 else 507 return; 508 509 tmp = kmem_alloc(km_len + 1, KM_SLEEP); 510 bcopy(len > 0 ? str : alt, tmp, km_len); 511 tmp[km_len] = '\0'; 512 513 kstat_named_setstr(k, tmp); 514 } 515 } 516 517 static void 518 bd_errstats_clrstr(kstat_named_t *k) 519 { 520 if (KSTAT_NAMED_STR_PTR(k) == NULL) 521 return; 522 523 kmem_free(KSTAT_NAMED_STR_PTR(k), KSTAT_NAMED_STR_BUFLEN(k)); 524 kstat_named_setstr(k, NULL); 525 } 526 527 static void 528 bd_init_errstats(bd_t *bd, bd_drive_t *drive) 529 { 530 struct bd_errstats *est = bd->d_kerr; 531 532 mutex_enter(&bd->d_errmutex); 533 534 if (drive->d_model_len > 0 && 535 KSTAT_NAMED_STR_PTR(&est->bd_model) == NULL) { 536 bd_errstats_setstr(&est->bd_model, drive->d_model, 537 drive->d_model_len, NULL); 538 } else { 539 bd_errstats_setstr(&est->bd_vid, drive->d_vendor, 540 drive->d_vendor_len, "Unknown "); 541 bd_errstats_setstr(&est->bd_pid, drive->d_product, 542 drive->d_product_len, "Unknown "); 543 } 544 545 bd_errstats_setstr(&est->bd_revision, drive->d_revision, 546 drive->d_revision_len, "0001"); 547 bd_errstats_setstr(&est->bd_serial, drive->d_serial, 548 drive->d_serial_len, "0 "); 549 550 mutex_exit(&bd->d_errmutex); 551 } 552 553 static void 554 bd_fini_errstats(bd_t *bd) 555 { 556 struct bd_errstats *est = bd->d_kerr; 557 558 mutex_enter(&bd->d_errmutex); 559 560 bd_errstats_clrstr(&est->bd_model); 561 bd_errstats_clrstr(&est->bd_vid); 562 bd_errstats_clrstr(&est->bd_pid); 563 bd_errstats_clrstr(&est->bd_revision); 564 bd_errstats_clrstr(&est->bd_serial); 565 566 mutex_exit(&bd->d_errmutex); 567 } 568 569 static void 570 bd_queues_free(bd_t *bd) 571 { 572 uint32_t i; 573 574 for (i = 0; i < bd->d_qcount; i++) { 575 bd_queue_t *bq = &bd->d_queues[i]; 576 577 mutex_destroy(&bq->q_iomutex); 578 list_destroy(&bq->q_waitq); 579 list_destroy(&bq->q_runq); 580 } 581 582 kmem_free(bd->d_queues, sizeof (*bd->d_queues) * bd->d_qcount); 583 } 584 585 static int 586 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 587 { 588 int inst; 589 bd_handle_t hdl; 590 bd_t *bd; 591 bd_drive_t drive; 592 uint32_t i; 593 int rv; 594 char name[16]; 595 char kcache[32]; 596 597 switch (cmd) { 598 case DDI_ATTACH: 599 break; 600 case DDI_RESUME: 601 /* We don't do anything native for suspend/resume */ 602 return (DDI_SUCCESS); 603 default: 604 return (DDI_FAILURE); 605 } 606 607 inst = ddi_get_instance(dip); 608 hdl = ddi_get_parent_data(dip); 609 610 (void) snprintf(name, sizeof (name), "%s%d", 611 ddi_driver_name(dip), ddi_get_instance(dip)); 612 (void) snprintf(kcache, sizeof (kcache), "%s_xfer", name); 613 614 if (hdl == NULL) { 615 cmn_err(CE_WARN, "%s: missing parent data!", name); 616 return (DDI_FAILURE); 617 } 618 619 if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) { 620 cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name); 621 return (DDI_FAILURE); 622 } 623 bd = ddi_get_soft_state(bd_state, inst); 624 625 if (hdl->h_dma) { 626 bd->d_dma = *(hdl->h_dma); 627 bd->d_dma.dma_attr_granular = 628 max(DEV_BSIZE, bd->d_dma.dma_attr_granular); 629 bd->d_use_dma = B_TRUE; 630 631 if (bd->d_maxxfer && 632 (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) { 633 cmn_err(CE_WARN, 634 "%s: inconsistent maximum transfer size!", 635 name); 636 /* We force it */ 637 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 638 } else { 639 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 640 } 641 } else { 642 bd->d_use_dma = B_FALSE; 643 if (bd->d_maxxfer == 0) { 644 bd->d_maxxfer = 1024 * 1024; 645 } 646 } 647 bd->d_ops = hdl->h_ops; 648 bd->d_private = hdl->h_private; 649 bd->d_blkshift = 9; /* 512 bytes, to start */ 650 651 if (bd->d_maxxfer % DEV_BSIZE) { 652 cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name); 653 bd->d_maxxfer &= ~(DEV_BSIZE - 1); 654 } 655 if (bd->d_maxxfer < DEV_BSIZE) { 656 cmn_err(CE_WARN, "%s: maximum transfer size too small!", name); 657 ddi_soft_state_free(bd_state, inst); 658 return (DDI_FAILURE); 659 } 660 661 bd->d_dip = dip; 662 bd->d_handle = hdl; 663 hdl->h_bd = bd; 664 ddi_set_driver_private(dip, bd); 665 666 mutex_init(&bd->d_ksmutex, NULL, MUTEX_DRIVER, NULL); 667 mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL); 668 mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL); 669 cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL); 670 671 bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8, 672 bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0); 673 674 bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk", 675 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT); 676 if (bd->d_ksp != NULL) { 677 bd->d_ksp->ks_lock = &bd->d_ksmutex; 678 kstat_install(bd->d_ksp); 679 bd->d_kiop = bd->d_ksp->ks_data; 680 } else { 681 /* 682 * Even if we cannot create the kstat, we create a 683 * scratch kstat. The reason for this is to ensure 684 * that we can update the kstat all of the time, 685 * without adding an extra branch instruction. 686 */ 687 bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP); 688 } 689 690 cmlb_alloc_handle(&bd->d_cmlbh); 691 692 bd->d_state = DKIO_NONE; 693 694 bzero(&drive, sizeof (drive)); 695 /* 696 * Default to one queue, parent driver can override. 697 */ 698 drive.d_qcount = 1; 699 bd->d_ops.o_drive_info(bd->d_private, &drive); 700 bd->d_qcount = drive.d_qcount; 701 bd->d_removable = drive.d_removable; 702 bd->d_hotpluggable = drive.d_hotpluggable; 703 704 if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer) 705 bd->d_maxxfer = drive.d_maxxfer; 706 707 bd_create_inquiry_props(dip, &drive); 708 709 bd_create_errstats(bd, inst, &drive); 710 bd_init_errstats(bd, &drive); 711 bd_update_state(bd); 712 713 bd->d_queues = kmem_alloc(sizeof (*bd->d_queues) * bd->d_qcount, 714 KM_SLEEP); 715 for (i = 0; i < bd->d_qcount; i++) { 716 bd_queue_t *bq = &bd->d_queues[i]; 717 718 bq->q_qsize = drive.d_qsize; 719 bq->q_qactive = 0; 720 mutex_init(&bq->q_iomutex, NULL, MUTEX_DRIVER, NULL); 721 722 list_create(&bq->q_waitq, sizeof (bd_xfer_impl_t), 723 offsetof(struct bd_xfer_impl, i_linkage)); 724 list_create(&bq->q_runq, sizeof (bd_xfer_impl_t), 725 offsetof(struct bd_xfer_impl, i_linkage)); 726 } 727 728 rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT, 729 bd->d_removable, bd->d_hotpluggable, 730 /*LINTED: E_BAD_PTR_CAST_ALIGN*/ 731 *(uint64_t *)drive.d_eui64 != 0 ? DDI_NT_BLOCK_BLKDEV : 732 drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK, 733 CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0); 734 if (rv != 0) { 735 cmlb_free_handle(&bd->d_cmlbh); 736 kmem_cache_destroy(bd->d_cache); 737 mutex_destroy(&bd->d_ksmutex); 738 mutex_destroy(&bd->d_ocmutex); 739 mutex_destroy(&bd->d_statemutex); 740 cv_destroy(&bd->d_statecv); 741 bd_queues_free(bd); 742 if (bd->d_ksp != NULL) { 743 kstat_delete(bd->d_ksp); 744 bd->d_ksp = NULL; 745 } else { 746 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 747 } 748 ddi_soft_state_free(bd_state, inst); 749 return (DDI_FAILURE); 750 } 751 752 if (bd->d_ops.o_devid_init != NULL) { 753 rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid); 754 if (rv == DDI_SUCCESS) { 755 if (ddi_devid_register(dip, bd->d_devid) != 756 DDI_SUCCESS) { 757 cmn_err(CE_WARN, 758 "%s: unable to register devid", name); 759 } 760 } 761 } 762 763 /* 764 * Add a zero-length attribute to tell the world we support 765 * kernel ioctls (for layered drivers). Also set up properties 766 * used by HAL to identify removable media. 767 */ 768 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 769 DDI_KERNEL_IOCTL, NULL, 0); 770 if (bd->d_removable) { 771 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 772 "removable-media", NULL, 0); 773 } 774 if (bd->d_hotpluggable) { 775 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 776 "hotpluggable", NULL, 0); 777 } 778 779 ddi_report_dev(dip); 780 781 return (DDI_SUCCESS); 782 } 783 784 static int 785 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 786 { 787 bd_t *bd; 788 789 bd = ddi_get_driver_private(dip); 790 791 switch (cmd) { 792 case DDI_DETACH: 793 break; 794 case DDI_SUSPEND: 795 /* We don't suspend, but our parent does */ 796 return (DDI_SUCCESS); 797 default: 798 return (DDI_FAILURE); 799 } 800 if (bd->d_ksp != NULL) { 801 kstat_delete(bd->d_ksp); 802 bd->d_ksp = NULL; 803 } else { 804 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 805 } 806 807 if (bd->d_errstats != NULL) { 808 bd_fini_errstats(bd); 809 kstat_delete(bd->d_errstats); 810 bd->d_errstats = NULL; 811 } else { 812 kmem_free(bd->d_kerr, sizeof (struct bd_errstats)); 813 mutex_destroy(&bd->d_errmutex); 814 } 815 816 cmlb_detach(bd->d_cmlbh, 0); 817 cmlb_free_handle(&bd->d_cmlbh); 818 if (bd->d_devid) 819 ddi_devid_free(bd->d_devid); 820 kmem_cache_destroy(bd->d_cache); 821 mutex_destroy(&bd->d_ksmutex); 822 mutex_destroy(&bd->d_ocmutex); 823 mutex_destroy(&bd->d_statemutex); 824 cv_destroy(&bd->d_statecv); 825 bd_queues_free(bd); 826 ddi_soft_state_free(bd_state, ddi_get_instance(dip)); 827 return (DDI_SUCCESS); 828 } 829 830 static int 831 bd_xfer_ctor(void *buf, void *arg, int kmflag) 832 { 833 bd_xfer_impl_t *xi; 834 bd_t *bd = arg; 835 int (*dcb)(caddr_t); 836 837 if (kmflag == KM_PUSHPAGE || kmflag == KM_SLEEP) { 838 dcb = DDI_DMA_SLEEP; 839 } else { 840 dcb = DDI_DMA_DONTWAIT; 841 } 842 843 xi = buf; 844 bzero(xi, sizeof (*xi)); 845 xi->i_bd = bd; 846 847 if (bd->d_use_dma) { 848 if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL, 849 &xi->i_dmah) != DDI_SUCCESS) { 850 return (-1); 851 } 852 } 853 854 return (0); 855 } 856 857 static void 858 bd_xfer_dtor(void *buf, void *arg) 859 { 860 bd_xfer_impl_t *xi = buf; 861 862 _NOTE(ARGUNUSED(arg)); 863 864 if (xi->i_dmah) 865 ddi_dma_free_handle(&xi->i_dmah); 866 xi->i_dmah = NULL; 867 } 868 869 static bd_xfer_impl_t * 870 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *), 871 int kmflag) 872 { 873 bd_xfer_impl_t *xi; 874 int rv = 0; 875 int status; 876 unsigned dir; 877 int (*cb)(caddr_t); 878 size_t len; 879 uint32_t shift; 880 881 if (kmflag == KM_SLEEP) { 882 cb = DDI_DMA_SLEEP; 883 } else { 884 cb = DDI_DMA_DONTWAIT; 885 } 886 887 xi = kmem_cache_alloc(bd->d_cache, kmflag); 888 if (xi == NULL) { 889 bioerror(bp, ENOMEM); 890 return (NULL); 891 } 892 893 ASSERT(bp); 894 895 xi->i_bp = bp; 896 xi->i_func = func; 897 xi->i_blkno = bp->b_lblkno >> (bd->d_blkshift - DEV_BSHIFT); 898 899 if (bp->b_bcount == 0) { 900 xi->i_len = 0; 901 xi->i_nblks = 0; 902 xi->i_kaddr = NULL; 903 xi->i_resid = 0; 904 xi->i_num_win = 0; 905 goto done; 906 } 907 908 if (bp->b_flags & B_READ) { 909 dir = DDI_DMA_READ; 910 xi->i_func = bd->d_ops.o_read; 911 } else { 912 dir = DDI_DMA_WRITE; 913 xi->i_func = bd->d_ops.o_write; 914 } 915 916 shift = bd->d_blkshift; 917 xi->i_blkshift = shift; 918 919 if (!bd->d_use_dma) { 920 bp_mapin(bp); 921 rv = 0; 922 xi->i_offset = 0; 923 xi->i_num_win = 924 (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer; 925 xi->i_cur_win = 0; 926 xi->i_len = min(bp->b_bcount, bd->d_maxxfer); 927 xi->i_nblks = xi->i_len >> shift; 928 xi->i_kaddr = bp->b_un.b_addr; 929 xi->i_resid = bp->b_bcount; 930 } else { 931 932 /* 933 * We have to use consistent DMA if the address is misaligned. 934 */ 935 if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) && 936 ((uintptr_t)bp->b_un.b_addr & 0x7)) { 937 dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL; 938 } else { 939 dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 940 } 941 942 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb, 943 NULL, &xi->i_dmac, &xi->i_ndmac); 944 switch (status) { 945 case DDI_DMA_MAPPED: 946 xi->i_num_win = 1; 947 xi->i_cur_win = 0; 948 xi->i_offset = 0; 949 xi->i_len = bp->b_bcount; 950 xi->i_nblks = xi->i_len >> shift; 951 xi->i_resid = bp->b_bcount; 952 rv = 0; 953 break; 954 case DDI_DMA_PARTIAL_MAP: 955 xi->i_cur_win = 0; 956 957 if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) != 958 DDI_SUCCESS) || 959 (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset, 960 &len, &xi->i_dmac, &xi->i_ndmac) != 961 DDI_SUCCESS) || 962 (P2PHASE(len, (1U << shift)) != 0)) { 963 (void) ddi_dma_unbind_handle(xi->i_dmah); 964 rv = EFAULT; 965 goto done; 966 } 967 xi->i_len = len; 968 xi->i_nblks = xi->i_len >> shift; 969 xi->i_resid = bp->b_bcount; 970 rv = 0; 971 break; 972 case DDI_DMA_NORESOURCES: 973 rv = EAGAIN; 974 goto done; 975 case DDI_DMA_TOOBIG: 976 rv = EINVAL; 977 goto done; 978 case DDI_DMA_NOMAPPING: 979 case DDI_DMA_INUSE: 980 default: 981 rv = EFAULT; 982 goto done; 983 } 984 } 985 986 done: 987 if (rv != 0) { 988 kmem_cache_free(bd->d_cache, xi); 989 bioerror(bp, rv); 990 return (NULL); 991 } 992 993 return (xi); 994 } 995 996 static void 997 bd_xfer_free(bd_xfer_impl_t *xi) 998 { 999 if (xi->i_dmah) { 1000 (void) ddi_dma_unbind_handle(xi->i_dmah); 1001 } 1002 kmem_cache_free(xi->i_bd->d_cache, xi); 1003 } 1004 1005 static int 1006 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp) 1007 { 1008 dev_t dev = *devp; 1009 bd_t *bd; 1010 minor_t part; 1011 minor_t inst; 1012 uint64_t mask; 1013 boolean_t ndelay; 1014 int rv; 1015 diskaddr_t nblks; 1016 diskaddr_t lba; 1017 1018 _NOTE(ARGUNUSED(credp)); 1019 1020 part = BDPART(dev); 1021 inst = BDINST(dev); 1022 1023 if (otyp >= OTYPCNT) 1024 return (EINVAL); 1025 1026 ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE; 1027 1028 /* 1029 * Block any DR events from changing the set of registered 1030 * devices while we function. 1031 */ 1032 rw_enter(&bd_lock, RW_READER); 1033 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1034 rw_exit(&bd_lock); 1035 return (ENXIO); 1036 } 1037 1038 mutex_enter(&bd->d_ocmutex); 1039 1040 ASSERT(part < 64); 1041 mask = (1U << part); 1042 1043 bd_update_state(bd); 1044 1045 if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) { 1046 1047 /* non-blocking opens are allowed to succeed */ 1048 if (!ndelay) { 1049 rv = ENXIO; 1050 goto done; 1051 } 1052 } else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba, 1053 NULL, NULL, 0) == 0) { 1054 1055 /* 1056 * We read the partinfo, verify valid ranges. If the 1057 * partition is invalid, and we aren't blocking or 1058 * doing a raw access, then fail. (Non-blocking and 1059 * raw accesses can still succeed to allow a disk with 1060 * bad partition data to opened by format and fdisk.) 1061 */ 1062 if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) { 1063 rv = ENXIO; 1064 goto done; 1065 } 1066 } else if (!ndelay) { 1067 /* 1068 * cmlb_partinfo failed -- invalid partition or no 1069 * disk label. 1070 */ 1071 rv = ENXIO; 1072 goto done; 1073 } 1074 1075 if ((flag & FWRITE) && bd->d_rdonly) { 1076 rv = EROFS; 1077 goto done; 1078 } 1079 1080 if ((bd->d_open_excl) & (mask)) { 1081 rv = EBUSY; 1082 goto done; 1083 } 1084 if (flag & FEXCL) { 1085 if (bd->d_open_lyr[part]) { 1086 rv = EBUSY; 1087 goto done; 1088 } 1089 for (int i = 0; i < OTYP_LYR; i++) { 1090 if (bd->d_open_reg[i] & mask) { 1091 rv = EBUSY; 1092 goto done; 1093 } 1094 } 1095 } 1096 1097 if (otyp == OTYP_LYR) { 1098 bd->d_open_lyr[part]++; 1099 } else { 1100 bd->d_open_reg[otyp] |= mask; 1101 } 1102 if (flag & FEXCL) { 1103 bd->d_open_excl |= mask; 1104 } 1105 1106 rv = 0; 1107 done: 1108 mutex_exit(&bd->d_ocmutex); 1109 rw_exit(&bd_lock); 1110 1111 return (rv); 1112 } 1113 1114 static int 1115 bd_close(dev_t dev, int flag, int otyp, cred_t *credp) 1116 { 1117 bd_t *bd; 1118 minor_t inst; 1119 minor_t part; 1120 uint64_t mask; 1121 boolean_t last = B_TRUE; 1122 1123 _NOTE(ARGUNUSED(flag)); 1124 _NOTE(ARGUNUSED(credp)); 1125 1126 part = BDPART(dev); 1127 inst = BDINST(dev); 1128 1129 ASSERT(part < 64); 1130 mask = (1U << part); 1131 1132 rw_enter(&bd_lock, RW_READER); 1133 1134 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1135 rw_exit(&bd_lock); 1136 return (ENXIO); 1137 } 1138 1139 mutex_enter(&bd->d_ocmutex); 1140 if (bd->d_open_excl & mask) { 1141 bd->d_open_excl &= ~mask; 1142 } 1143 if (otyp == OTYP_LYR) { 1144 bd->d_open_lyr[part]--; 1145 } else { 1146 bd->d_open_reg[otyp] &= ~mask; 1147 } 1148 for (int i = 0; i < 64; i++) { 1149 if (bd->d_open_lyr[part]) { 1150 last = B_FALSE; 1151 } 1152 } 1153 for (int i = 0; last && (i < OTYP_LYR); i++) { 1154 if (bd->d_open_reg[i]) { 1155 last = B_FALSE; 1156 } 1157 } 1158 mutex_exit(&bd->d_ocmutex); 1159 1160 if (last) { 1161 cmlb_invalidate(bd->d_cmlbh, 0); 1162 } 1163 rw_exit(&bd_lock); 1164 1165 return (0); 1166 } 1167 1168 static int 1169 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk) 1170 { 1171 minor_t inst; 1172 minor_t part; 1173 diskaddr_t pstart; 1174 diskaddr_t psize; 1175 bd_t *bd; 1176 bd_xfer_impl_t *xi; 1177 buf_t *bp; 1178 int rv; 1179 uint32_t shift; 1180 daddr_t d_blkno; 1181 int d_nblk; 1182 1183 rw_enter(&bd_lock, RW_READER); 1184 1185 part = BDPART(dev); 1186 inst = BDINST(dev); 1187 1188 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1189 rw_exit(&bd_lock); 1190 return (ENXIO); 1191 } 1192 shift = bd->d_blkshift; 1193 d_blkno = blkno >> (shift - DEV_BSHIFT); 1194 d_nblk = nblk >> (shift - DEV_BSHIFT); 1195 /* 1196 * do cmlb, but do it synchronously unless we already have the 1197 * partition (which we probably should.) 1198 */ 1199 if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL, 1200 (void *)1)) { 1201 rw_exit(&bd_lock); 1202 return (ENXIO); 1203 } 1204 1205 if ((d_blkno + d_nblk) > psize) { 1206 rw_exit(&bd_lock); 1207 return (EINVAL); 1208 } 1209 bp = getrbuf(KM_NOSLEEP); 1210 if (bp == NULL) { 1211 rw_exit(&bd_lock); 1212 return (ENOMEM); 1213 } 1214 1215 bp->b_bcount = nblk << DEV_BSHIFT; 1216 bp->b_resid = bp->b_bcount; 1217 bp->b_lblkno = blkno; 1218 bp->b_un.b_addr = caddr; 1219 1220 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_write, KM_NOSLEEP); 1221 if (xi == NULL) { 1222 rw_exit(&bd_lock); 1223 freerbuf(bp); 1224 return (ENOMEM); 1225 } 1226 xi->i_blkno = d_blkno + pstart; 1227 xi->i_flags = BD_XFER_POLL; 1228 bd_submit(bd, xi); 1229 rw_exit(&bd_lock); 1230 1231 /* 1232 * Generally, we should have run this entirely synchronously 1233 * at this point and the biowait call should be a no-op. If 1234 * it didn't happen this way, it's a bug in the underlying 1235 * driver not honoring BD_XFER_POLL. 1236 */ 1237 (void) biowait(bp); 1238 rv = geterror(bp); 1239 freerbuf(bp); 1240 return (rv); 1241 } 1242 1243 void 1244 bd_minphys(struct buf *bp) 1245 { 1246 minor_t inst; 1247 bd_t *bd; 1248 inst = BDINST(bp->b_edev); 1249 1250 bd = ddi_get_soft_state(bd_state, inst); 1251 1252 /* 1253 * In a non-debug kernel, bd_strategy will catch !bd as 1254 * well, and will fail nicely. 1255 */ 1256 ASSERT(bd); 1257 1258 if (bp->b_bcount > bd->d_maxxfer) 1259 bp->b_bcount = bd->d_maxxfer; 1260 } 1261 1262 static int 1263 bd_check_uio(dev_t dev, struct uio *uio) 1264 { 1265 bd_t *bd; 1266 uint32_t shift; 1267 1268 if ((bd = ddi_get_soft_state(bd_state, BDINST(dev))) == NULL) { 1269 return (ENXIO); 1270 } 1271 1272 shift = bd->d_blkshift; 1273 if ((P2PHASE(uio->uio_loffset, (1U << shift)) != 0) || 1274 (P2PHASE(uio->uio_iov->iov_len, (1U << shift)) != 0)) { 1275 return (EINVAL); 1276 } 1277 1278 return (0); 1279 } 1280 1281 static int 1282 bd_read(dev_t dev, struct uio *uio, cred_t *credp) 1283 { 1284 _NOTE(ARGUNUSED(credp)); 1285 int ret = bd_check_uio(dev, uio); 1286 if (ret != 0) { 1287 return (ret); 1288 } 1289 return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio)); 1290 } 1291 1292 static int 1293 bd_write(dev_t dev, struct uio *uio, cred_t *credp) 1294 { 1295 _NOTE(ARGUNUSED(credp)); 1296 int ret = bd_check_uio(dev, uio); 1297 if (ret != 0) { 1298 return (ret); 1299 } 1300 return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio)); 1301 } 1302 1303 static int 1304 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp) 1305 { 1306 _NOTE(ARGUNUSED(credp)); 1307 int ret = bd_check_uio(dev, aio->aio_uio); 1308 if (ret != 0) { 1309 return (ret); 1310 } 1311 return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio)); 1312 } 1313 1314 static int 1315 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp) 1316 { 1317 _NOTE(ARGUNUSED(credp)); 1318 int ret = bd_check_uio(dev, aio->aio_uio); 1319 if (ret != 0) { 1320 return (ret); 1321 } 1322 return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio)); 1323 } 1324 1325 static int 1326 bd_strategy(struct buf *bp) 1327 { 1328 minor_t inst; 1329 minor_t part; 1330 bd_t *bd; 1331 diskaddr_t p_lba; 1332 diskaddr_t p_nblks; 1333 diskaddr_t b_nblks; 1334 bd_xfer_impl_t *xi; 1335 uint32_t shift; 1336 int (*func)(void *, bd_xfer_t *); 1337 diskaddr_t lblkno; 1338 1339 part = BDPART(bp->b_edev); 1340 inst = BDINST(bp->b_edev); 1341 1342 ASSERT(bp); 1343 1344 bp->b_resid = bp->b_bcount; 1345 1346 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1347 bioerror(bp, ENXIO); 1348 biodone(bp); 1349 return (0); 1350 } 1351 1352 if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba, 1353 NULL, NULL, 0)) { 1354 bioerror(bp, ENXIO); 1355 biodone(bp); 1356 return (0); 1357 } 1358 1359 shift = bd->d_blkshift; 1360 lblkno = bp->b_lblkno >> (shift - DEV_BSHIFT); 1361 if ((P2PHASE(bp->b_lblkno, (1U << (shift - DEV_BSHIFT))) != 0) || 1362 (P2PHASE(bp->b_bcount, (1U << shift)) != 0) || 1363 (lblkno > p_nblks)) { 1364 bioerror(bp, EINVAL); 1365 biodone(bp); 1366 return (0); 1367 } 1368 b_nblks = bp->b_bcount >> shift; 1369 if ((lblkno == p_nblks) || (bp->b_bcount == 0)) { 1370 biodone(bp); 1371 return (0); 1372 } 1373 1374 if ((b_nblks + lblkno) > p_nblks) { 1375 bp->b_resid = ((lblkno + b_nblks - p_nblks) << shift); 1376 bp->b_bcount -= bp->b_resid; 1377 } else { 1378 bp->b_resid = 0; 1379 } 1380 func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write; 1381 1382 xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP); 1383 if (xi == NULL) { 1384 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE); 1385 } 1386 if (xi == NULL) { 1387 /* bd_request_alloc will have done bioerror */ 1388 biodone(bp); 1389 return (0); 1390 } 1391 xi->i_blkno = lblkno + p_lba; 1392 1393 bd_submit(bd, xi); 1394 1395 return (0); 1396 } 1397 1398 static int 1399 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp) 1400 { 1401 minor_t inst; 1402 uint16_t part; 1403 bd_t *bd; 1404 void *ptr = (void *)arg; 1405 int rv; 1406 1407 part = BDPART(dev); 1408 inst = BDINST(dev); 1409 1410 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1411 return (ENXIO); 1412 } 1413 1414 rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0); 1415 if (rv != ENOTTY) 1416 return (rv); 1417 1418 if (rvalp != NULL) { 1419 /* the return value of the ioctl is 0 by default */ 1420 *rvalp = 0; 1421 } 1422 1423 switch (cmd) { 1424 case DKIOCGMEDIAINFO: { 1425 struct dk_minfo minfo; 1426 1427 /* make sure our state information is current */ 1428 bd_update_state(bd); 1429 bzero(&minfo, sizeof (minfo)); 1430 minfo.dki_media_type = DK_FIXED_DISK; 1431 minfo.dki_lbsize = (1U << bd->d_blkshift); 1432 minfo.dki_capacity = bd->d_numblks; 1433 if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) { 1434 return (EFAULT); 1435 } 1436 return (0); 1437 } 1438 case DKIOCGMEDIAINFOEXT: { 1439 struct dk_minfo_ext miext; 1440 1441 /* make sure our state information is current */ 1442 bd_update_state(bd); 1443 bzero(&miext, sizeof (miext)); 1444 miext.dki_media_type = DK_FIXED_DISK; 1445 miext.dki_lbsize = (1U << bd->d_blkshift); 1446 miext.dki_pbsize = (1U << bd->d_pblkshift); 1447 miext.dki_capacity = bd->d_numblks; 1448 if (ddi_copyout(&miext, ptr, sizeof (miext), flag)) { 1449 return (EFAULT); 1450 } 1451 return (0); 1452 } 1453 case DKIOCINFO: { 1454 struct dk_cinfo cinfo; 1455 bzero(&cinfo, sizeof (cinfo)); 1456 cinfo.dki_ctype = DKC_BLKDEV; 1457 cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip)); 1458 (void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname), 1459 "%s", ddi_driver_name(ddi_get_parent(bd->d_dip))); 1460 (void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname), 1461 "%s", ddi_driver_name(bd->d_dip)); 1462 cinfo.dki_unit = inst; 1463 cinfo.dki_flags = DKI_FMTVOL; 1464 cinfo.dki_partition = part; 1465 cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE; 1466 cinfo.dki_addr = 0; 1467 cinfo.dki_slave = 0; 1468 cinfo.dki_space = 0; 1469 cinfo.dki_prio = 0; 1470 cinfo.dki_vec = 0; 1471 if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) { 1472 return (EFAULT); 1473 } 1474 return (0); 1475 } 1476 case DKIOCREMOVABLE: { 1477 int i; 1478 i = bd->d_removable ? 1 : 0; 1479 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1480 return (EFAULT); 1481 } 1482 return (0); 1483 } 1484 case DKIOCHOTPLUGGABLE: { 1485 int i; 1486 i = bd->d_hotpluggable ? 1 : 0; 1487 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1488 return (EFAULT); 1489 } 1490 return (0); 1491 } 1492 case DKIOCREADONLY: { 1493 int i; 1494 i = bd->d_rdonly ? 1 : 0; 1495 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1496 return (EFAULT); 1497 } 1498 return (0); 1499 } 1500 case DKIOCSOLIDSTATE: { 1501 int i; 1502 i = bd->d_ssd ? 1 : 0; 1503 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1504 return (EFAULT); 1505 } 1506 return (0); 1507 } 1508 case DKIOCSTATE: { 1509 enum dkio_state state; 1510 if (ddi_copyin(ptr, &state, sizeof (state), flag)) { 1511 return (EFAULT); 1512 } 1513 if ((rv = bd_check_state(bd, &state)) != 0) { 1514 return (rv); 1515 } 1516 if (ddi_copyout(&state, ptr, sizeof (state), flag)) { 1517 return (EFAULT); 1518 } 1519 return (0); 1520 } 1521 case DKIOCFLUSHWRITECACHE: { 1522 struct dk_callback *dkc = NULL; 1523 1524 if (flag & FKIOCTL) 1525 dkc = (void *)arg; 1526 1527 rv = bd_flush_write_cache(bd, dkc); 1528 return (rv); 1529 } 1530 1531 default: 1532 break; 1533 1534 } 1535 return (ENOTTY); 1536 } 1537 1538 static int 1539 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 1540 char *name, caddr_t valuep, int *lengthp) 1541 { 1542 bd_t *bd; 1543 1544 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1545 if (bd == NULL) 1546 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 1547 name, valuep, lengthp)); 1548 1549 return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name, 1550 valuep, lengthp, BDPART(dev), 0)); 1551 } 1552 1553 1554 static int 1555 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start, 1556 size_t length, void *tg_cookie) 1557 { 1558 bd_t *bd; 1559 buf_t *bp; 1560 bd_xfer_impl_t *xi; 1561 int rv; 1562 int (*func)(void *, bd_xfer_t *); 1563 int kmflag; 1564 1565 /* 1566 * If we are running in polled mode (such as during dump(9e) 1567 * execution), then we cannot sleep for kernel allocations. 1568 */ 1569 kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP; 1570 1571 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1572 1573 if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) { 1574 /* We can only transfer whole blocks at a time! */ 1575 return (EINVAL); 1576 } 1577 1578 if ((bp = getrbuf(kmflag)) == NULL) { 1579 return (ENOMEM); 1580 } 1581 1582 switch (cmd) { 1583 case TG_READ: 1584 bp->b_flags = B_READ; 1585 func = bd->d_ops.o_read; 1586 break; 1587 case TG_WRITE: 1588 bp->b_flags = B_WRITE; 1589 func = bd->d_ops.o_write; 1590 break; 1591 default: 1592 freerbuf(bp); 1593 return (EINVAL); 1594 } 1595 1596 bp->b_un.b_addr = bufaddr; 1597 bp->b_bcount = length; 1598 xi = bd_xfer_alloc(bd, bp, func, kmflag); 1599 if (xi == NULL) { 1600 rv = geterror(bp); 1601 freerbuf(bp); 1602 return (rv); 1603 } 1604 xi->i_flags = tg_cookie ? BD_XFER_POLL : 0; 1605 xi->i_blkno = start; 1606 bd_submit(bd, xi); 1607 (void) biowait(bp); 1608 rv = geterror(bp); 1609 freerbuf(bp); 1610 1611 return (rv); 1612 } 1613 1614 static int 1615 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 1616 { 1617 bd_t *bd; 1618 1619 _NOTE(ARGUNUSED(tg_cookie)); 1620 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1621 1622 switch (cmd) { 1623 case TG_GETPHYGEOM: 1624 case TG_GETVIRTGEOM: 1625 /* 1626 * We don't have any "geometry" as such, let cmlb 1627 * fabricate something. 1628 */ 1629 return (ENOTTY); 1630 1631 case TG_GETCAPACITY: 1632 bd_update_state(bd); 1633 *(diskaddr_t *)arg = bd->d_numblks; 1634 return (0); 1635 1636 case TG_GETBLOCKSIZE: 1637 *(uint32_t *)arg = (1U << bd->d_blkshift); 1638 return (0); 1639 1640 case TG_GETATTR: 1641 /* 1642 * It turns out that cmlb really doesn't do much for 1643 * non-writable media, but lets make the information 1644 * available for it in case it does more in the 1645 * future. (The value is currently used for 1646 * triggering special behavior for CD-ROMs.) 1647 */ 1648 bd_update_state(bd); 1649 ((tg_attribute_t *)arg)->media_is_writable = 1650 bd->d_rdonly ? B_FALSE : B_TRUE; 1651 ((tg_attribute_t *)arg)->media_is_solid_state = bd->d_ssd; 1652 ((tg_attribute_t *)arg)->media_is_rotational = B_FALSE; 1653 return (0); 1654 1655 default: 1656 return (EINVAL); 1657 } 1658 } 1659 1660 1661 static void 1662 bd_sched(bd_t *bd, bd_queue_t *bq) 1663 { 1664 bd_xfer_impl_t *xi; 1665 struct buf *bp; 1666 int rv; 1667 1668 mutex_enter(&bq->q_iomutex); 1669 1670 while ((bq->q_qactive < bq->q_qsize) && 1671 ((xi = list_remove_head(&bq->q_waitq)) != NULL)) { 1672 mutex_enter(&bd->d_ksmutex); 1673 kstat_waitq_to_runq(bd->d_kiop); 1674 mutex_exit(&bd->d_ksmutex); 1675 1676 bq->q_qactive++; 1677 list_insert_tail(&bq->q_runq, xi); 1678 1679 /* 1680 * Submit the job to the driver. We drop the I/O mutex 1681 * so that we can deal with the case where the driver 1682 * completion routine calls back into us synchronously. 1683 */ 1684 1685 mutex_exit(&bq->q_iomutex); 1686 1687 rv = xi->i_func(bd->d_private, &xi->i_public); 1688 if (rv != 0) { 1689 bp = xi->i_bp; 1690 bioerror(bp, rv); 1691 biodone(bp); 1692 1693 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32); 1694 1695 mutex_enter(&bq->q_iomutex); 1696 1697 mutex_enter(&bd->d_ksmutex); 1698 kstat_runq_exit(bd->d_kiop); 1699 mutex_exit(&bd->d_ksmutex); 1700 1701 bq->q_qactive--; 1702 list_remove(&bq->q_runq, xi); 1703 bd_xfer_free(xi); 1704 } else { 1705 mutex_enter(&bq->q_iomutex); 1706 } 1707 } 1708 1709 mutex_exit(&bq->q_iomutex); 1710 } 1711 1712 static void 1713 bd_submit(bd_t *bd, bd_xfer_impl_t *xi) 1714 { 1715 uint64_t nv = atomic_inc_64_nv(&bd->d_io_counter); 1716 unsigned q = nv % bd->d_qcount; 1717 bd_queue_t *bq = &bd->d_queues[q]; 1718 1719 xi->i_bq = bq; 1720 xi->i_qnum = q; 1721 1722 mutex_enter(&bq->q_iomutex); 1723 1724 list_insert_tail(&bq->q_waitq, xi); 1725 1726 mutex_enter(&bd->d_ksmutex); 1727 kstat_waitq_enter(bd->d_kiop); 1728 mutex_exit(&bd->d_ksmutex); 1729 1730 mutex_exit(&bq->q_iomutex); 1731 1732 bd_sched(bd, bq); 1733 } 1734 1735 static void 1736 bd_runq_exit(bd_xfer_impl_t *xi, int err) 1737 { 1738 bd_t *bd = xi->i_bd; 1739 buf_t *bp = xi->i_bp; 1740 bd_queue_t *bq = xi->i_bq; 1741 1742 mutex_enter(&bq->q_iomutex); 1743 bq->q_qactive--; 1744 1745 mutex_enter(&bd->d_ksmutex); 1746 kstat_runq_exit(bd->d_kiop); 1747 mutex_exit(&bd->d_ksmutex); 1748 1749 list_remove(&bq->q_runq, xi); 1750 mutex_exit(&bq->q_iomutex); 1751 1752 if (err == 0) { 1753 if (bp->b_flags & B_READ) { 1754 atomic_inc_uint(&bd->d_kiop->reads); 1755 atomic_add_64((uint64_t *)&bd->d_kiop->nread, 1756 bp->b_bcount - xi->i_resid); 1757 } else { 1758 atomic_inc_uint(&bd->d_kiop->writes); 1759 atomic_add_64((uint64_t *)&bd->d_kiop->nwritten, 1760 bp->b_bcount - xi->i_resid); 1761 } 1762 } 1763 bd_sched(bd, bq); 1764 } 1765 1766 static void 1767 bd_update_state(bd_t *bd) 1768 { 1769 enum dkio_state state = DKIO_INSERTED; 1770 boolean_t docmlb = B_FALSE; 1771 bd_media_t media; 1772 1773 bzero(&media, sizeof (media)); 1774 1775 mutex_enter(&bd->d_statemutex); 1776 if (bd->d_ops.o_media_info(bd->d_private, &media) != 0) { 1777 bd->d_numblks = 0; 1778 state = DKIO_EJECTED; 1779 goto done; 1780 } 1781 1782 if ((media.m_blksize < 512) || 1783 (!ISP2(media.m_blksize)) || 1784 (P2PHASE(bd->d_maxxfer, media.m_blksize))) { 1785 cmn_err(CE_WARN, "%s%d: Invalid media block size (%d)", 1786 ddi_driver_name(bd->d_dip), ddi_get_instance(bd->d_dip), 1787 media.m_blksize); 1788 /* 1789 * We can't use the media, treat it as not present. 1790 */ 1791 state = DKIO_EJECTED; 1792 bd->d_numblks = 0; 1793 goto done; 1794 } 1795 1796 if (((1U << bd->d_blkshift) != media.m_blksize) || 1797 (bd->d_numblks != media.m_nblks)) { 1798 /* Device size changed */ 1799 docmlb = B_TRUE; 1800 } 1801 1802 bd->d_blkshift = ddi_ffs(media.m_blksize) - 1; 1803 bd->d_pblkshift = bd->d_blkshift; 1804 bd->d_numblks = media.m_nblks; 1805 bd->d_rdonly = media.m_readonly; 1806 bd->d_ssd = media.m_solidstate; 1807 1808 /* 1809 * Only use the supplied physical block size if it is non-zero, 1810 * greater or equal to the block size, and a power of 2. Ignore it 1811 * if not, it's just informational and we can still use the media. 1812 */ 1813 if ((media.m_pblksize != 0) && 1814 (media.m_pblksize >= media.m_blksize) && 1815 (ISP2(media.m_pblksize))) 1816 bd->d_pblkshift = ddi_ffs(media.m_pblksize) - 1; 1817 1818 done: 1819 if (state != bd->d_state) { 1820 bd->d_state = state; 1821 cv_broadcast(&bd->d_statecv); 1822 docmlb = B_TRUE; 1823 } 1824 mutex_exit(&bd->d_statemutex); 1825 1826 bd->d_kerr->bd_capacity.value.ui64 = bd->d_numblks << bd->d_blkshift; 1827 1828 if (docmlb) { 1829 if (state == DKIO_INSERTED) { 1830 (void) cmlb_validate(bd->d_cmlbh, 0, 0); 1831 } else { 1832 cmlb_invalidate(bd->d_cmlbh, 0); 1833 } 1834 } 1835 } 1836 1837 static int 1838 bd_check_state(bd_t *bd, enum dkio_state *state) 1839 { 1840 clock_t when; 1841 1842 for (;;) { 1843 1844 bd_update_state(bd); 1845 1846 mutex_enter(&bd->d_statemutex); 1847 1848 if (bd->d_state != *state) { 1849 *state = bd->d_state; 1850 mutex_exit(&bd->d_statemutex); 1851 break; 1852 } 1853 1854 when = drv_usectohz(1000000); 1855 if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex, 1856 when, TR_CLOCK_TICK) == 0) { 1857 mutex_exit(&bd->d_statemutex); 1858 return (EINTR); 1859 } 1860 1861 mutex_exit(&bd->d_statemutex); 1862 } 1863 1864 return (0); 1865 } 1866 1867 static int 1868 bd_flush_write_cache_done(struct buf *bp) 1869 { 1870 struct dk_callback *dc = (void *)bp->b_private; 1871 1872 (*dc->dkc_callback)(dc->dkc_cookie, geterror(bp)); 1873 kmem_free(dc, sizeof (*dc)); 1874 freerbuf(bp); 1875 return (0); 1876 } 1877 1878 static int 1879 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc) 1880 { 1881 buf_t *bp; 1882 struct dk_callback *dc; 1883 bd_xfer_impl_t *xi; 1884 int rv; 1885 1886 if (bd->d_ops.o_sync_cache == NULL) { 1887 return (ENOTSUP); 1888 } 1889 if ((bp = getrbuf(KM_SLEEP)) == NULL) { 1890 return (ENOMEM); 1891 } 1892 bp->b_resid = 0; 1893 bp->b_bcount = 0; 1894 1895 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP); 1896 if (xi == NULL) { 1897 rv = geterror(bp); 1898 freerbuf(bp); 1899 return (rv); 1900 } 1901 1902 /* Make an asynchronous flush, but only if there is a callback */ 1903 if (dkc != NULL && dkc->dkc_callback != NULL) { 1904 /* Make a private copy of the callback structure */ 1905 dc = kmem_alloc(sizeof (*dc), KM_SLEEP); 1906 *dc = *dkc; 1907 bp->b_private = dc; 1908 bp->b_iodone = bd_flush_write_cache_done; 1909 1910 bd_submit(bd, xi); 1911 return (0); 1912 } 1913 1914 /* In case there is no callback, perform a synchronous flush */ 1915 bd_submit(bd, xi); 1916 (void) biowait(bp); 1917 rv = geterror(bp); 1918 freerbuf(bp); 1919 1920 return (rv); 1921 } 1922 1923 /* 1924 * Nexus support. 1925 */ 1926 int 1927 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop, 1928 void *arg, void *result) 1929 { 1930 bd_handle_t hdl; 1931 1932 switch (ctlop) { 1933 case DDI_CTLOPS_REPORTDEV: 1934 cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n", 1935 ddi_node_name(rdip), ddi_get_name_addr(rdip), 1936 ddi_driver_name(rdip), ddi_get_instance(rdip)); 1937 return (DDI_SUCCESS); 1938 1939 case DDI_CTLOPS_INITCHILD: 1940 hdl = ddi_get_parent_data((dev_info_t *)arg); 1941 if (hdl == NULL) { 1942 return (DDI_NOT_WELL_FORMED); 1943 } 1944 ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr); 1945 return (DDI_SUCCESS); 1946 1947 case DDI_CTLOPS_UNINITCHILD: 1948 ddi_set_name_addr((dev_info_t *)arg, NULL); 1949 ndi_prop_remove_all((dev_info_t *)arg); 1950 return (DDI_SUCCESS); 1951 1952 default: 1953 return (ddi_ctlops(dip, rdip, ctlop, arg, result)); 1954 } 1955 } 1956 1957 /* 1958 * Functions for device drivers. 1959 */ 1960 bd_handle_t 1961 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag) 1962 { 1963 bd_handle_t hdl; 1964 1965 /* 1966 * There is full compatability between the version 0 API and the 1967 * current version. 1968 */ 1969 switch (ops->o_version) { 1970 case BD_OPS_VERSION_0: 1971 case BD_OPS_CURRENT_VERSION: 1972 break; 1973 1974 default: 1975 return (NULL); 1976 } 1977 1978 hdl = kmem_zalloc(sizeof (*hdl), kmflag); 1979 if (hdl != NULL) { 1980 hdl->h_ops = *ops; 1981 hdl->h_dma = dma; 1982 hdl->h_private = private; 1983 } 1984 1985 return (hdl); 1986 } 1987 1988 void 1989 bd_free_handle(bd_handle_t hdl) 1990 { 1991 kmem_free(hdl, sizeof (*hdl)); 1992 } 1993 1994 int 1995 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl) 1996 { 1997 dev_info_t *child; 1998 bd_drive_t drive = { 0 }; 1999 2000 /* 2001 * It's not an error if bd_attach_handle() is called on a handle that 2002 * already is attached. We just ignore the request to attach and return. 2003 * This way drivers using blkdev don't have to keep track about blkdev 2004 * state, they can just call this function to make sure it attached. 2005 */ 2006 if (hdl->h_child != NULL) { 2007 return (DDI_SUCCESS); 2008 } 2009 2010 /* if drivers don't override this, make it assume none */ 2011 drive.d_lun = -1; 2012 hdl->h_ops.o_drive_info(hdl->h_private, &drive); 2013 2014 hdl->h_parent = dip; 2015 hdl->h_name = "blkdev"; 2016 2017 /*LINTED: E_BAD_PTR_CAST_ALIGN*/ 2018 if (*(uint64_t *)drive.d_eui64 != 0) { 2019 if (drive.d_lun >= 0) { 2020 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), 2021 "w%02X%02X%02X%02X%02X%02X%02X%02X,%X", 2022 drive.d_eui64[0], drive.d_eui64[1], 2023 drive.d_eui64[2], drive.d_eui64[3], 2024 drive.d_eui64[4], drive.d_eui64[5], 2025 drive.d_eui64[6], drive.d_eui64[7], drive.d_lun); 2026 } else { 2027 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), 2028 "w%02X%02X%02X%02X%02X%02X%02X%02X", 2029 drive.d_eui64[0], drive.d_eui64[1], 2030 drive.d_eui64[2], drive.d_eui64[3], 2031 drive.d_eui64[4], drive.d_eui64[5], 2032 drive.d_eui64[6], drive.d_eui64[7]); 2033 } 2034 } else { 2035 if (drive.d_lun >= 0) { 2036 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), 2037 "%X,%X", drive.d_target, drive.d_lun); 2038 } else { 2039 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), 2040 "%X", drive.d_target); 2041 } 2042 } 2043 2044 if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID, 2045 &child) != NDI_SUCCESS) { 2046 cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s", 2047 ddi_driver_name(dip), ddi_get_instance(dip), 2048 "blkdev", hdl->h_addr); 2049 return (DDI_FAILURE); 2050 } 2051 2052 ddi_set_parent_data(child, hdl); 2053 hdl->h_child = child; 2054 2055 if (ndi_devi_online(child, 0) == NDI_FAILURE) { 2056 cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online", 2057 ddi_driver_name(dip), ddi_get_instance(dip), 2058 hdl->h_name, hdl->h_addr); 2059 (void) ndi_devi_free(child); 2060 return (DDI_FAILURE); 2061 } 2062 2063 return (DDI_SUCCESS); 2064 } 2065 2066 int 2067 bd_detach_handle(bd_handle_t hdl) 2068 { 2069 int circ; 2070 int rv; 2071 char *devnm; 2072 2073 /* 2074 * It's not an error if bd_detach_handle() is called on a handle that 2075 * already is detached. We just ignore the request to detach and return. 2076 * This way drivers using blkdev don't have to keep track about blkdev 2077 * state, they can just call this function to make sure it detached. 2078 */ 2079 if (hdl->h_child == NULL) { 2080 return (DDI_SUCCESS); 2081 } 2082 ndi_devi_enter(hdl->h_parent, &circ); 2083 if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) { 2084 rv = ddi_remove_child(hdl->h_child, 0); 2085 } else { 2086 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); 2087 (void) ddi_deviname(hdl->h_child, devnm); 2088 (void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE); 2089 rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL, 2090 NDI_DEVI_REMOVE | NDI_UNCONFIG); 2091 kmem_free(devnm, MAXNAMELEN + 1); 2092 } 2093 if (rv == 0) { 2094 hdl->h_child = NULL; 2095 } 2096 2097 ndi_devi_exit(hdl->h_parent, circ); 2098 return (rv == NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE); 2099 } 2100 2101 void 2102 bd_xfer_done(bd_xfer_t *xfer, int err) 2103 { 2104 bd_xfer_impl_t *xi = (void *)xfer; 2105 buf_t *bp = xi->i_bp; 2106 int rv = DDI_SUCCESS; 2107 bd_t *bd = xi->i_bd; 2108 size_t len; 2109 2110 if (err != 0) { 2111 bd_runq_exit(xi, err); 2112 atomic_inc_32(&bd->d_kerr->bd_harderrs.value.ui32); 2113 2114 bp->b_resid += xi->i_resid; 2115 bd_xfer_free(xi); 2116 bioerror(bp, err); 2117 biodone(bp); 2118 return; 2119 } 2120 2121 xi->i_cur_win++; 2122 xi->i_resid -= xi->i_len; 2123 2124 if (xi->i_resid == 0) { 2125 /* Job completed succcessfully! */ 2126 bd_runq_exit(xi, 0); 2127 2128 bd_xfer_free(xi); 2129 biodone(bp); 2130 return; 2131 } 2132 2133 xi->i_blkno += xi->i_nblks; 2134 2135 if (bd->d_use_dma) { 2136 /* More transfer still pending... advance to next DMA window. */ 2137 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win, 2138 &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac); 2139 } else { 2140 /* Advance memory window. */ 2141 xi->i_kaddr += xi->i_len; 2142 xi->i_offset += xi->i_len; 2143 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer); 2144 } 2145 2146 2147 if ((rv != DDI_SUCCESS) || 2148 (P2PHASE(len, (1U << xi->i_blkshift)) != 0)) { 2149 bd_runq_exit(xi, EFAULT); 2150 2151 bp->b_resid += xi->i_resid; 2152 bd_xfer_free(xi); 2153 bioerror(bp, EFAULT); 2154 biodone(bp); 2155 return; 2156 } 2157 xi->i_len = len; 2158 xi->i_nblks = len >> xi->i_blkshift; 2159 2160 /* Submit next window to hardware. */ 2161 rv = xi->i_func(bd->d_private, &xi->i_public); 2162 if (rv != 0) { 2163 bd_runq_exit(xi, rv); 2164 2165 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32); 2166 2167 bp->b_resid += xi->i_resid; 2168 bd_xfer_free(xi); 2169 bioerror(bp, rv); 2170 biodone(bp); 2171 } 2172 } 2173 2174 void 2175 bd_error(bd_xfer_t *xfer, int error) 2176 { 2177 bd_xfer_impl_t *xi = (void *)xfer; 2178 bd_t *bd = xi->i_bd; 2179 2180 switch (error) { 2181 case BD_ERR_MEDIA: 2182 atomic_inc_32(&bd->d_kerr->bd_rq_media_err.value.ui32); 2183 break; 2184 case BD_ERR_NTRDY: 2185 atomic_inc_32(&bd->d_kerr->bd_rq_ntrdy_err.value.ui32); 2186 break; 2187 case BD_ERR_NODEV: 2188 atomic_inc_32(&bd->d_kerr->bd_rq_nodev_err.value.ui32); 2189 break; 2190 case BD_ERR_RECOV: 2191 atomic_inc_32(&bd->d_kerr->bd_rq_recov_err.value.ui32); 2192 break; 2193 case BD_ERR_ILLRQ: 2194 atomic_inc_32(&bd->d_kerr->bd_rq_illrq_err.value.ui32); 2195 break; 2196 case BD_ERR_PFA: 2197 atomic_inc_32(&bd->d_kerr->bd_rq_pfa_err.value.ui32); 2198 break; 2199 default: 2200 cmn_err(CE_PANIC, "bd_error: unknown error type %d", error); 2201 break; 2202 } 2203 } 2204 2205 void 2206 bd_state_change(bd_handle_t hdl) 2207 { 2208 bd_t *bd; 2209 2210 if ((bd = hdl->h_bd) != NULL) { 2211 bd_update_state(bd); 2212 } 2213 } 2214 2215 void 2216 bd_mod_init(struct dev_ops *devops) 2217 { 2218 static struct bus_ops bd_bus_ops = { 2219 BUSO_REV, /* busops_rev */ 2220 nullbusmap, /* bus_map */ 2221 NULL, /* bus_get_intrspec (OBSOLETE) */ 2222 NULL, /* bus_add_intrspec (OBSOLETE) */ 2223 NULL, /* bus_remove_intrspec (OBSOLETE) */ 2224 i_ddi_map_fault, /* bus_map_fault */ 2225 NULL, /* bus_dma_map (OBSOLETE) */ 2226 ddi_dma_allochdl, /* bus_dma_allochdl */ 2227 ddi_dma_freehdl, /* bus_dma_freehdl */ 2228 ddi_dma_bindhdl, /* bus_dma_bindhdl */ 2229 ddi_dma_unbindhdl, /* bus_dma_unbindhdl */ 2230 ddi_dma_flush, /* bus_dma_flush */ 2231 ddi_dma_win, /* bus_dma_win */ 2232 ddi_dma_mctl, /* bus_dma_ctl */ 2233 bd_bus_ctl, /* bus_ctl */ 2234 ddi_bus_prop_op, /* bus_prop_op */ 2235 NULL, /* bus_get_eventcookie */ 2236 NULL, /* bus_add_eventcall */ 2237 NULL, /* bus_remove_eventcall */ 2238 NULL, /* bus_post_event */ 2239 NULL, /* bus_intr_ctl (OBSOLETE) */ 2240 NULL, /* bus_config */ 2241 NULL, /* bus_unconfig */ 2242 NULL, /* bus_fm_init */ 2243 NULL, /* bus_fm_fini */ 2244 NULL, /* bus_fm_access_enter */ 2245 NULL, /* bus_fm_access_exit */ 2246 NULL, /* bus_power */ 2247 NULL, /* bus_intr_op */ 2248 }; 2249 2250 devops->devo_bus_ops = &bd_bus_ops; 2251 2252 /* 2253 * NB: The device driver is free to supply its own 2254 * character entry device support. 2255 */ 2256 } 2257 2258 void 2259 bd_mod_fini(struct dev_ops *devops) 2260 { 2261 devops->devo_bus_ops = NULL; 2262 } 2263