1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved. 24 * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved. 25 * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 26 * Copyright 2017 The MathWorks, Inc. All rights reserved. 27 * Copyright 2019 Western Digital Corporation. 28 * Copyright 2020 Joyent, Inc. 29 */ 30 31 #include <sys/types.h> 32 #include <sys/ksynch.h> 33 #include <sys/kmem.h> 34 #include <sys/file.h> 35 #include <sys/errno.h> 36 #include <sys/open.h> 37 #include <sys/buf.h> 38 #include <sys/uio.h> 39 #include <sys/aio_req.h> 40 #include <sys/cred.h> 41 #include <sys/modctl.h> 42 #include <sys/cmlb.h> 43 #include <sys/conf.h> 44 #include <sys/devops.h> 45 #include <sys/list.h> 46 #include <sys/sysmacros.h> 47 #include <sys/dkio.h> 48 #include <sys/dkioc_free_util.h> 49 #include <sys/vtoc.h> 50 #include <sys/scsi/scsi.h> /* for DTYPE_DIRECT */ 51 #include <sys/kstat.h> 52 #include <sys/fs/dv_node.h> 53 #include <sys/ddi.h> 54 #include <sys/sunddi.h> 55 #include <sys/note.h> 56 #include <sys/blkdev.h> 57 #include <sys/scsi/impl/inquiry.h> 58 59 /* 60 * blkdev is a driver which provides a lot of the common functionality 61 * a block device driver may need and helps by removing code which 62 * is frequently duplicated in block device drivers. 63 * 64 * Within this driver all the struct cb_ops functions required for a 65 * block device driver are written with appropriate call back functions 66 * to be provided by the parent driver. 67 * 68 * To use blkdev, a driver needs to: 69 * 1. Create a bd_ops_t structure which has the call back operations 70 * blkdev will use. 71 * 2. Create a handle by calling bd_alloc_handle(). One of the 72 * arguments to this function is the bd_ops_t. 73 * 3. Call bd_attach_handle(). This will instantiate a blkdev device 74 * as a child device node of the calling driver. 75 * 76 * A parent driver is not restricted to just allocating and attaching a 77 * single instance, it may attach as many as it wishes. For each handle 78 * attached, appropriate entries in /dev/[r]dsk are created. 79 * 80 * The bd_ops_t routines that a parent of blkdev need to provide are: 81 * 82 * o_drive_info: Provide information to blkdev such as how many I/O queues 83 * to create and the size of those queues. Also some device 84 * specifics such as EUI, vendor, product, model, serial 85 * number .... 86 * 87 * o_media_info: Provide information about the media. Eg size and block size. 88 * 89 * o_devid_init: Creates and initializes the device id. Typically calls 90 * ddi_devid_init(). 91 * 92 * o_sync_cache: Issues a device appropriate command to flush any write 93 * caches. 94 * 95 * o_read: Read data as described by bd_xfer_t argument. 96 * 97 * o_write: Write data as described by bd_xfer_t argument. 98 * 99 * o_free_space: Free the space described by bd_xfer_t argument (optional). 100 * 101 * Queues 102 * ------ 103 * Part of the drive_info data is a queue count. blkdev will create 104 * "queue count" number of waitq/runq pairs. Each waitq/runq pair 105 * operates independently. As an I/O is scheduled up to the parent 106 * driver via o_read or o_write its queue number is given. If the 107 * parent driver supports multiple hardware queues it can then select 108 * where to submit the I/O request. 109 * 110 * Currently blkdev uses a simplistic round-robin queue selection method. 111 * It has the advantage that it is lockless. In the future it will be 112 * worthwhile reviewing this strategy for something which prioritizes queues 113 * depending on how busy they are. 114 * 115 * Each waitq/runq pair is protected by its mutex (q_iomutex). Incoming 116 * I/O requests are initially added to the waitq. They are taken off the 117 * waitq, added to the runq and submitted, providing the runq is less 118 * than the qsize as specified in the drive_info. As an I/O request 119 * completes, the parent driver is required to call bd_xfer_done(), which 120 * will remove the I/O request from the runq and pass I/O completion 121 * status up the stack. 122 * 123 * Locks 124 * ----- 125 * There are 4 instance global locks d_ocmutex, d_ksmutex, d_errmutex and 126 * d_statemutex. As well a q_iomutex per waitq/runq pair. 127 * 128 * Lock Hierarchy 129 * -------------- 130 * The only two locks which may be held simultaneously are q_iomutex and 131 * d_ksmutex. In all cases q_iomutex must be acquired before d_ksmutex. 132 */ 133 134 #define BD_MAXPART 64 135 #define BDINST(dev) (getminor(dev) / BD_MAXPART) 136 #define BDPART(dev) (getminor(dev) % BD_MAXPART) 137 138 typedef struct bd bd_t; 139 typedef struct bd_xfer_impl bd_xfer_impl_t; 140 typedef struct bd_queue bd_queue_t; 141 142 struct bd { 143 void *d_private; 144 dev_info_t *d_dip; 145 kmutex_t d_ocmutex; 146 kmutex_t d_ksmutex; 147 kmutex_t d_errmutex; 148 kmutex_t d_statemutex; 149 kcondvar_t d_statecv; 150 enum dkio_state d_state; 151 cmlb_handle_t d_cmlbh; 152 unsigned d_open_lyr[BD_MAXPART]; /* open count */ 153 uint64_t d_open_excl; /* bit mask indexed by partition */ 154 uint64_t d_open_reg[OTYPCNT]; /* bit mask */ 155 uint64_t d_io_counter; 156 157 uint32_t d_qcount; 158 uint32_t d_qactive; 159 uint32_t d_maxxfer; 160 uint32_t d_blkshift; 161 uint32_t d_pblkshift; 162 uint64_t d_numblks; 163 ddi_devid_t d_devid; 164 165 uint64_t d_max_free_seg; 166 uint64_t d_max_free_blks; 167 uint64_t d_max_free_seg_blks; 168 uint64_t d_free_align; 169 170 kmem_cache_t *d_cache; 171 bd_queue_t *d_queues; 172 kstat_t *d_ksp; 173 kstat_io_t *d_kiop; 174 kstat_t *d_errstats; 175 struct bd_errstats *d_kerr; 176 177 boolean_t d_rdonly; 178 boolean_t d_ssd; 179 boolean_t d_removable; 180 boolean_t d_hotpluggable; 181 boolean_t d_use_dma; 182 183 ddi_dma_attr_t d_dma; 184 bd_ops_t d_ops; 185 bd_handle_t d_handle; 186 }; 187 188 struct bd_handle { 189 bd_ops_t h_ops; 190 ddi_dma_attr_t *h_dma; 191 dev_info_t *h_parent; 192 dev_info_t *h_child; 193 void *h_private; 194 bd_t *h_bd; 195 char *h_name; 196 char h_addr[30]; /* enough for w%0.16x,%X */ 197 }; 198 199 struct bd_xfer_impl { 200 bd_xfer_t i_public; 201 list_node_t i_linkage; 202 bd_t *i_bd; 203 buf_t *i_bp; 204 bd_queue_t *i_bq; 205 uint_t i_num_win; 206 uint_t i_cur_win; 207 off_t i_offset; 208 int (*i_func)(void *, bd_xfer_t *); 209 uint32_t i_blkshift; 210 size_t i_len; 211 size_t i_resid; 212 }; 213 214 struct bd_queue { 215 kmutex_t q_iomutex; 216 uint32_t q_qsize; 217 uint32_t q_qactive; 218 list_t q_runq; 219 list_t q_waitq; 220 }; 221 222 #define i_dmah i_public.x_dmah 223 #define i_dmac i_public.x_dmac 224 #define i_ndmac i_public.x_ndmac 225 #define i_kaddr i_public.x_kaddr 226 #define i_nblks i_public.x_nblks 227 #define i_blkno i_public.x_blkno 228 #define i_flags i_public.x_flags 229 #define i_qnum i_public.x_qnum 230 #define i_dfl i_public.x_dfl 231 232 #define CAN_FREESPACE(bd) \ 233 (((bd)->d_ops.o_free_space == NULL) ? B_FALSE : B_TRUE) 234 235 /* 236 * Private prototypes. 237 */ 238 239 static void bd_prop_update_inqstring(dev_info_t *, char *, char *, size_t); 240 static void bd_create_inquiry_props(dev_info_t *, bd_drive_t *); 241 static void bd_create_errstats(bd_t *, int, bd_drive_t *); 242 static void bd_destroy_errstats(bd_t *); 243 static void bd_errstats_setstr(kstat_named_t *, char *, size_t, char *); 244 static void bd_init_errstats(bd_t *, bd_drive_t *); 245 static void bd_fini_errstats(bd_t *); 246 247 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 248 static int bd_attach(dev_info_t *, ddi_attach_cmd_t); 249 static int bd_detach(dev_info_t *, ddi_detach_cmd_t); 250 251 static int bd_open(dev_t *, int, int, cred_t *); 252 static int bd_close(dev_t, int, int, cred_t *); 253 static int bd_strategy(struct buf *); 254 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 255 static int bd_dump(dev_t, caddr_t, daddr_t, int); 256 static int bd_read(dev_t, struct uio *, cred_t *); 257 static int bd_write(dev_t, struct uio *, cred_t *); 258 static int bd_aread(dev_t, struct aio_req *, cred_t *); 259 static int bd_awrite(dev_t, struct aio_req *, cred_t *); 260 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, 261 caddr_t, int *); 262 263 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, 264 void *); 265 static int bd_tg_getinfo(dev_info_t *, int, void *, void *); 266 static int bd_xfer_ctor(void *, void *, int); 267 static void bd_xfer_dtor(void *, void *); 268 static void bd_sched(bd_t *, bd_queue_t *); 269 static void bd_submit(bd_t *, bd_xfer_impl_t *); 270 static void bd_runq_exit(bd_xfer_impl_t *, int); 271 static void bd_update_state(bd_t *); 272 static int bd_check_state(bd_t *, enum dkio_state *); 273 static int bd_flush_write_cache(bd_t *, struct dk_callback *); 274 static int bd_check_uio(dev_t, struct uio *); 275 static int bd_free_space(dev_t, bd_t *, dkioc_free_list_t *); 276 277 struct cmlb_tg_ops bd_tg_ops = { 278 TG_DK_OPS_VERSION_1, 279 bd_tg_rdwr, 280 bd_tg_getinfo, 281 }; 282 283 static struct cb_ops bd_cb_ops = { 284 bd_open, /* open */ 285 bd_close, /* close */ 286 bd_strategy, /* strategy */ 287 nodev, /* print */ 288 bd_dump, /* dump */ 289 bd_read, /* read */ 290 bd_write, /* write */ 291 bd_ioctl, /* ioctl */ 292 nodev, /* devmap */ 293 nodev, /* mmap */ 294 nodev, /* segmap */ 295 nochpoll, /* poll */ 296 bd_prop_op, /* cb_prop_op */ 297 0, /* streamtab */ 298 D_64BIT | D_MP, /* Driver comaptibility flag */ 299 CB_REV, /* cb_rev */ 300 bd_aread, /* async read */ 301 bd_awrite /* async write */ 302 }; 303 304 struct dev_ops bd_dev_ops = { 305 DEVO_REV, /* devo_rev, */ 306 0, /* refcnt */ 307 bd_getinfo, /* getinfo */ 308 nulldev, /* identify */ 309 nulldev, /* probe */ 310 bd_attach, /* attach */ 311 bd_detach, /* detach */ 312 nodev, /* reset */ 313 &bd_cb_ops, /* driver operations */ 314 NULL, /* bus operations */ 315 NULL, /* power */ 316 ddi_quiesce_not_needed, /* quiesce */ 317 }; 318 319 static struct modldrv modldrv = { 320 &mod_driverops, 321 "Generic Block Device", 322 &bd_dev_ops, 323 }; 324 325 static struct modlinkage modlinkage = { 326 MODREV_1, { &modldrv, NULL } 327 }; 328 329 static void *bd_state; 330 static krwlock_t bd_lock; 331 332 int 333 _init(void) 334 { 335 int rv; 336 337 rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2); 338 if (rv != DDI_SUCCESS) { 339 return (rv); 340 } 341 rw_init(&bd_lock, NULL, RW_DRIVER, NULL); 342 rv = mod_install(&modlinkage); 343 if (rv != DDI_SUCCESS) { 344 rw_destroy(&bd_lock); 345 ddi_soft_state_fini(&bd_state); 346 } 347 return (rv); 348 } 349 350 int 351 _fini(void) 352 { 353 int rv; 354 355 rv = mod_remove(&modlinkage); 356 if (rv == DDI_SUCCESS) { 357 rw_destroy(&bd_lock); 358 ddi_soft_state_fini(&bd_state); 359 } 360 return (rv); 361 } 362 363 int 364 _info(struct modinfo *modinfop) 365 { 366 return (mod_info(&modlinkage, modinfop)); 367 } 368 369 static int 370 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 371 { 372 bd_t *bd; 373 minor_t inst; 374 375 _NOTE(ARGUNUSED(dip)); 376 377 inst = BDINST((dev_t)arg); 378 379 switch (cmd) { 380 case DDI_INFO_DEVT2DEVINFO: 381 bd = ddi_get_soft_state(bd_state, inst); 382 if (bd == NULL) { 383 return (DDI_FAILURE); 384 } 385 *resultp = (void *)bd->d_dip; 386 break; 387 388 case DDI_INFO_DEVT2INSTANCE: 389 *resultp = (void *)(intptr_t)inst; 390 break; 391 392 default: 393 return (DDI_FAILURE); 394 } 395 return (DDI_SUCCESS); 396 } 397 398 static void 399 bd_prop_update_inqstring(dev_info_t *dip, char *name, char *data, size_t len) 400 { 401 int ilen; 402 char *data_string; 403 404 ilen = scsi_ascii_inquiry_len(data, len); 405 ASSERT3U(ilen, <=, len); 406 if (ilen <= 0) 407 return; 408 /* ensure null termination */ 409 data_string = kmem_zalloc(ilen + 1, KM_SLEEP); 410 bcopy(data, data_string, ilen); 411 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, name, data_string); 412 kmem_free(data_string, ilen + 1); 413 } 414 415 static void 416 bd_create_inquiry_props(dev_info_t *dip, bd_drive_t *drive) 417 { 418 if (drive->d_vendor_len > 0) 419 bd_prop_update_inqstring(dip, INQUIRY_VENDOR_ID, 420 drive->d_vendor, drive->d_vendor_len); 421 422 if (drive->d_product_len > 0) 423 bd_prop_update_inqstring(dip, INQUIRY_PRODUCT_ID, 424 drive->d_product, drive->d_product_len); 425 426 if (drive->d_serial_len > 0) 427 bd_prop_update_inqstring(dip, INQUIRY_SERIAL_NO, 428 drive->d_serial, drive->d_serial_len); 429 430 if (drive->d_revision_len > 0) 431 bd_prop_update_inqstring(dip, INQUIRY_REVISION_ID, 432 drive->d_revision, drive->d_revision_len); 433 } 434 435 static void 436 bd_create_errstats(bd_t *bd, int inst, bd_drive_t *drive) 437 { 438 char ks_module[KSTAT_STRLEN]; 439 char ks_name[KSTAT_STRLEN]; 440 int ndata = sizeof (struct bd_errstats) / sizeof (kstat_named_t); 441 442 if (bd->d_errstats != NULL) 443 return; 444 445 (void) snprintf(ks_module, sizeof (ks_module), "%serr", 446 ddi_driver_name(bd->d_dip)); 447 (void) snprintf(ks_name, sizeof (ks_name), "%s%d,err", 448 ddi_driver_name(bd->d_dip), inst); 449 450 bd->d_errstats = kstat_create(ks_module, inst, ks_name, "device_error", 451 KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT); 452 453 mutex_init(&bd->d_errmutex, NULL, MUTEX_DRIVER, NULL); 454 if (bd->d_errstats == NULL) { 455 /* 456 * Even if we cannot create the kstat, we create a 457 * scratch kstat. The reason for this is to ensure 458 * that we can update the kstat all of the time, 459 * without adding an extra branch instruction. 460 */ 461 bd->d_kerr = kmem_zalloc(sizeof (struct bd_errstats), 462 KM_SLEEP); 463 } else { 464 bd->d_errstats->ks_lock = &bd->d_errmutex; 465 bd->d_kerr = (struct bd_errstats *)bd->d_errstats->ks_data; 466 } 467 468 kstat_named_init(&bd->d_kerr->bd_softerrs, "Soft Errors", 469 KSTAT_DATA_UINT32); 470 kstat_named_init(&bd->d_kerr->bd_harderrs, "Hard Errors", 471 KSTAT_DATA_UINT32); 472 kstat_named_init(&bd->d_kerr->bd_transerrs, "Transport Errors", 473 KSTAT_DATA_UINT32); 474 475 if (drive->d_model_len > 0) { 476 kstat_named_init(&bd->d_kerr->bd_model, "Model", 477 KSTAT_DATA_STRING); 478 } else { 479 kstat_named_init(&bd->d_kerr->bd_vid, "Vendor", 480 KSTAT_DATA_STRING); 481 kstat_named_init(&bd->d_kerr->bd_pid, "Product", 482 KSTAT_DATA_STRING); 483 } 484 485 kstat_named_init(&bd->d_kerr->bd_revision, "Revision", 486 KSTAT_DATA_STRING); 487 kstat_named_init(&bd->d_kerr->bd_serial, "Serial No", 488 KSTAT_DATA_STRING); 489 kstat_named_init(&bd->d_kerr->bd_capacity, "Size", 490 KSTAT_DATA_ULONGLONG); 491 kstat_named_init(&bd->d_kerr->bd_rq_media_err, "Media Error", 492 KSTAT_DATA_UINT32); 493 kstat_named_init(&bd->d_kerr->bd_rq_ntrdy_err, "Device Not Ready", 494 KSTAT_DATA_UINT32); 495 kstat_named_init(&bd->d_kerr->bd_rq_nodev_err, "No Device", 496 KSTAT_DATA_UINT32); 497 kstat_named_init(&bd->d_kerr->bd_rq_recov_err, "Recoverable", 498 KSTAT_DATA_UINT32); 499 kstat_named_init(&bd->d_kerr->bd_rq_illrq_err, "Illegal Request", 500 KSTAT_DATA_UINT32); 501 kstat_named_init(&bd->d_kerr->bd_rq_pfa_err, 502 "Predictive Failure Analysis", KSTAT_DATA_UINT32); 503 504 bd->d_errstats->ks_private = bd; 505 506 kstat_install(bd->d_errstats); 507 bd_init_errstats(bd, drive); 508 } 509 510 static void 511 bd_destroy_errstats(bd_t *bd) 512 { 513 if (bd->d_errstats != NULL) { 514 bd_fini_errstats(bd); 515 kstat_delete(bd->d_errstats); 516 bd->d_errstats = NULL; 517 } else { 518 kmem_free(bd->d_kerr, sizeof (struct bd_errstats)); 519 bd->d_kerr = NULL; 520 mutex_destroy(&bd->d_errmutex); 521 } 522 } 523 524 static void 525 bd_errstats_setstr(kstat_named_t *k, char *str, size_t len, char *alt) 526 { 527 char *tmp; 528 size_t km_len; 529 530 if (KSTAT_NAMED_STR_PTR(k) == NULL) { 531 if (len > 0) 532 km_len = strnlen(str, len); 533 else if (alt != NULL) 534 km_len = strlen(alt); 535 else 536 return; 537 538 tmp = kmem_alloc(km_len + 1, KM_SLEEP); 539 bcopy(len > 0 ? str : alt, tmp, km_len); 540 tmp[km_len] = '\0'; 541 542 kstat_named_setstr(k, tmp); 543 } 544 } 545 546 static void 547 bd_errstats_clrstr(kstat_named_t *k) 548 { 549 if (KSTAT_NAMED_STR_PTR(k) == NULL) 550 return; 551 552 kmem_free(KSTAT_NAMED_STR_PTR(k), KSTAT_NAMED_STR_BUFLEN(k)); 553 kstat_named_setstr(k, NULL); 554 } 555 556 static void 557 bd_init_errstats(bd_t *bd, bd_drive_t *drive) 558 { 559 struct bd_errstats *est = bd->d_kerr; 560 561 mutex_enter(&bd->d_errmutex); 562 563 if (drive->d_model_len > 0 && 564 KSTAT_NAMED_STR_PTR(&est->bd_model) == NULL) { 565 bd_errstats_setstr(&est->bd_model, drive->d_model, 566 drive->d_model_len, NULL); 567 } else { 568 bd_errstats_setstr(&est->bd_vid, drive->d_vendor, 569 drive->d_vendor_len, "Unknown "); 570 bd_errstats_setstr(&est->bd_pid, drive->d_product, 571 drive->d_product_len, "Unknown "); 572 } 573 574 bd_errstats_setstr(&est->bd_revision, drive->d_revision, 575 drive->d_revision_len, "0001"); 576 bd_errstats_setstr(&est->bd_serial, drive->d_serial, 577 drive->d_serial_len, "0 "); 578 579 mutex_exit(&bd->d_errmutex); 580 } 581 582 static void 583 bd_fini_errstats(bd_t *bd) 584 { 585 struct bd_errstats *est = bd->d_kerr; 586 587 mutex_enter(&bd->d_errmutex); 588 589 bd_errstats_clrstr(&est->bd_model); 590 bd_errstats_clrstr(&est->bd_vid); 591 bd_errstats_clrstr(&est->bd_pid); 592 bd_errstats_clrstr(&est->bd_revision); 593 bd_errstats_clrstr(&est->bd_serial); 594 595 mutex_exit(&bd->d_errmutex); 596 } 597 598 static void 599 bd_queues_free(bd_t *bd) 600 { 601 uint32_t i; 602 603 for (i = 0; i < bd->d_qcount; i++) { 604 bd_queue_t *bq = &bd->d_queues[i]; 605 606 mutex_destroy(&bq->q_iomutex); 607 list_destroy(&bq->q_waitq); 608 list_destroy(&bq->q_runq); 609 } 610 611 kmem_free(bd->d_queues, sizeof (*bd->d_queues) * bd->d_qcount); 612 } 613 614 static int 615 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 616 { 617 int inst; 618 bd_handle_t hdl; 619 bd_t *bd; 620 bd_drive_t drive; 621 uint32_t i; 622 int rv; 623 char name[16]; 624 char kcache[32]; 625 626 switch (cmd) { 627 case DDI_ATTACH: 628 break; 629 case DDI_RESUME: 630 /* We don't do anything native for suspend/resume */ 631 return (DDI_SUCCESS); 632 default: 633 return (DDI_FAILURE); 634 } 635 636 inst = ddi_get_instance(dip); 637 hdl = ddi_get_parent_data(dip); 638 639 (void) snprintf(name, sizeof (name), "%s%d", 640 ddi_driver_name(dip), ddi_get_instance(dip)); 641 (void) snprintf(kcache, sizeof (kcache), "%s_xfer", name); 642 643 if (hdl == NULL) { 644 cmn_err(CE_WARN, "%s: missing parent data!", name); 645 return (DDI_FAILURE); 646 } 647 648 if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) { 649 cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name); 650 return (DDI_FAILURE); 651 } 652 bd = ddi_get_soft_state(bd_state, inst); 653 654 if (hdl->h_dma) { 655 bd->d_dma = *(hdl->h_dma); 656 bd->d_dma.dma_attr_granular = 657 max(DEV_BSIZE, bd->d_dma.dma_attr_granular); 658 bd->d_use_dma = B_TRUE; 659 660 if (bd->d_maxxfer && 661 (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) { 662 cmn_err(CE_WARN, 663 "%s: inconsistent maximum transfer size!", 664 name); 665 /* We force it */ 666 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 667 } else { 668 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer; 669 } 670 } else { 671 bd->d_use_dma = B_FALSE; 672 if (bd->d_maxxfer == 0) { 673 bd->d_maxxfer = 1024 * 1024; 674 } 675 } 676 bd->d_ops = hdl->h_ops; 677 bd->d_private = hdl->h_private; 678 bd->d_blkshift = DEV_BSHIFT; /* 512 bytes, to start */ 679 680 if (bd->d_maxxfer % DEV_BSIZE) { 681 cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name); 682 bd->d_maxxfer &= ~(DEV_BSIZE - 1); 683 } 684 if (bd->d_maxxfer < DEV_BSIZE) { 685 cmn_err(CE_WARN, "%s: maximum transfer size too small!", name); 686 ddi_soft_state_free(bd_state, inst); 687 return (DDI_FAILURE); 688 } 689 690 bd->d_dip = dip; 691 bd->d_handle = hdl; 692 hdl->h_bd = bd; 693 ddi_set_driver_private(dip, bd); 694 695 mutex_init(&bd->d_ksmutex, NULL, MUTEX_DRIVER, NULL); 696 mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL); 697 mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL); 698 cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL); 699 700 bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8, 701 bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0); 702 703 bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk", 704 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT); 705 if (bd->d_ksp != NULL) { 706 bd->d_ksp->ks_lock = &bd->d_ksmutex; 707 kstat_install(bd->d_ksp); 708 bd->d_kiop = bd->d_ksp->ks_data; 709 } else { 710 /* 711 * Even if we cannot create the kstat, we create a 712 * scratch kstat. The reason for this is to ensure 713 * that we can update the kstat all of the time, 714 * without adding an extra branch instruction. 715 */ 716 bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP); 717 } 718 719 cmlb_alloc_handle(&bd->d_cmlbh); 720 721 bd->d_state = DKIO_NONE; 722 723 bzero(&drive, sizeof (drive)); 724 /* 725 * Default to one queue, and no restrictions on free space requests 726 * (if driver provides method) parent driver can override. 727 */ 728 drive.d_qcount = 1; 729 drive.d_free_align = 1; 730 bd->d_ops.o_drive_info(bd->d_private, &drive); 731 732 /* 733 * Several checks to make sure o_drive_info() didn't return bad 734 * values: 735 * 736 * There must be at least one queue 737 */ 738 if (drive.d_qcount == 0) 739 goto fail_drive_info; 740 741 /* FREE/UNMAP/TRIM alignment needs to be at least 1 block */ 742 if (drive.d_free_align == 0) 743 goto fail_drive_info; 744 745 /* 746 * If d_max_free_blks is not unlimited (not 0), then we cannot allow 747 * an unlimited segment size. It is however permissible to not impose 748 * a limit on the total number of blocks freed while limiting the 749 * amount allowed in an individual segment. 750 */ 751 if ((drive.d_max_free_blks > 0 && drive.d_max_free_seg_blks == 0)) 752 goto fail_drive_info; 753 754 /* 755 * If a limit is set on d_max_free_blks (by the above check, we know 756 * if there's a limit on d_max_free_blks, d_max_free_seg_blks cannot 757 * be unlimited), it cannot be smaller than the limit on an individual 758 * segment. 759 */ 760 if ((drive.d_max_free_blks > 0 && 761 drive.d_max_free_seg_blks > drive.d_max_free_blks)) { 762 goto fail_drive_info; 763 } 764 765 bd->d_qcount = drive.d_qcount; 766 bd->d_removable = drive.d_removable; 767 bd->d_hotpluggable = drive.d_hotpluggable; 768 769 if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer) 770 bd->d_maxxfer = drive.d_maxxfer; 771 772 bd->d_free_align = drive.d_free_align; 773 bd->d_max_free_seg = drive.d_max_free_seg; 774 bd->d_max_free_blks = drive.d_max_free_blks; 775 bd->d_max_free_seg_blks = drive.d_max_free_seg_blks; 776 777 bd_create_inquiry_props(dip, &drive); 778 bd_create_errstats(bd, inst, &drive); 779 bd_update_state(bd); 780 781 bd->d_queues = kmem_alloc(sizeof (*bd->d_queues) * bd->d_qcount, 782 KM_SLEEP); 783 for (i = 0; i < bd->d_qcount; i++) { 784 bd_queue_t *bq = &bd->d_queues[i]; 785 786 bq->q_qsize = drive.d_qsize; 787 bq->q_qactive = 0; 788 mutex_init(&bq->q_iomutex, NULL, MUTEX_DRIVER, NULL); 789 790 list_create(&bq->q_waitq, sizeof (bd_xfer_impl_t), 791 offsetof(struct bd_xfer_impl, i_linkage)); 792 list_create(&bq->q_runq, sizeof (bd_xfer_impl_t), 793 offsetof(struct bd_xfer_impl, i_linkage)); 794 } 795 796 rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT, 797 bd->d_removable, bd->d_hotpluggable, 798 /*LINTED: E_BAD_PTR_CAST_ALIGN*/ 799 *(uint64_t *)drive.d_eui64 != 0 ? DDI_NT_BLOCK_BLKDEV : 800 drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK, 801 CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0); 802 if (rv != 0) { 803 goto fail_cmlb_attach; 804 } 805 806 if (bd->d_ops.o_devid_init != NULL) { 807 rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid); 808 if (rv == DDI_SUCCESS) { 809 if (ddi_devid_register(dip, bd->d_devid) != 810 DDI_SUCCESS) { 811 cmn_err(CE_WARN, 812 "%s: unable to register devid", name); 813 } 814 } 815 } 816 817 /* 818 * Add a zero-length attribute to tell the world we support 819 * kernel ioctls (for layered drivers). Also set up properties 820 * used by HAL to identify removable media. 821 */ 822 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 823 DDI_KERNEL_IOCTL, NULL, 0); 824 if (bd->d_removable) { 825 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 826 "removable-media", NULL, 0); 827 } 828 if (bd->d_hotpluggable) { 829 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 830 "hotpluggable", NULL, 0); 831 } 832 833 ddi_report_dev(dip); 834 835 return (DDI_SUCCESS); 836 837 fail_cmlb_attach: 838 bd_queues_free(bd); 839 bd_destroy_errstats(bd); 840 841 fail_drive_info: 842 cmlb_free_handle(&bd->d_cmlbh); 843 844 if (bd->d_ksp != NULL) { 845 kstat_delete(bd->d_ksp); 846 bd->d_ksp = NULL; 847 } else { 848 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 849 } 850 851 kmem_cache_destroy(bd->d_cache); 852 cv_destroy(&bd->d_statecv); 853 mutex_destroy(&bd->d_statemutex); 854 mutex_destroy(&bd->d_ocmutex); 855 mutex_destroy(&bd->d_ksmutex); 856 ddi_soft_state_free(bd_state, inst); 857 return (DDI_FAILURE); 858 } 859 860 static int 861 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 862 { 863 bd_t *bd; 864 865 bd = ddi_get_driver_private(dip); 866 867 switch (cmd) { 868 case DDI_DETACH: 869 break; 870 case DDI_SUSPEND: 871 /* We don't suspend, but our parent does */ 872 return (DDI_SUCCESS); 873 default: 874 return (DDI_FAILURE); 875 } 876 877 if (bd->d_ksp != NULL) { 878 kstat_delete(bd->d_ksp); 879 bd->d_ksp = NULL; 880 } else { 881 kmem_free(bd->d_kiop, sizeof (kstat_io_t)); 882 } 883 884 bd_destroy_errstats(bd); 885 cmlb_detach(bd->d_cmlbh, 0); 886 cmlb_free_handle(&bd->d_cmlbh); 887 if (bd->d_devid) 888 ddi_devid_free(bd->d_devid); 889 kmem_cache_destroy(bd->d_cache); 890 mutex_destroy(&bd->d_ksmutex); 891 mutex_destroy(&bd->d_ocmutex); 892 mutex_destroy(&bd->d_statemutex); 893 cv_destroy(&bd->d_statecv); 894 bd_queues_free(bd); 895 ddi_soft_state_free(bd_state, ddi_get_instance(dip)); 896 return (DDI_SUCCESS); 897 } 898 899 static int 900 bd_xfer_ctor(void *buf, void *arg, int kmflag) 901 { 902 bd_xfer_impl_t *xi; 903 bd_t *bd = arg; 904 int (*dcb)(caddr_t); 905 906 if (kmflag == KM_PUSHPAGE || kmflag == KM_SLEEP) { 907 dcb = DDI_DMA_SLEEP; 908 } else { 909 dcb = DDI_DMA_DONTWAIT; 910 } 911 912 xi = buf; 913 bzero(xi, sizeof (*xi)); 914 xi->i_bd = bd; 915 916 if (bd->d_use_dma) { 917 if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL, 918 &xi->i_dmah) != DDI_SUCCESS) { 919 return (-1); 920 } 921 } 922 923 return (0); 924 } 925 926 static void 927 bd_xfer_dtor(void *buf, void *arg) 928 { 929 bd_xfer_impl_t *xi = buf; 930 931 _NOTE(ARGUNUSED(arg)); 932 933 if (xi->i_dmah) 934 ddi_dma_free_handle(&xi->i_dmah); 935 xi->i_dmah = NULL; 936 } 937 938 static bd_xfer_impl_t * 939 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *), 940 int kmflag) 941 { 942 bd_xfer_impl_t *xi; 943 int rv = 0; 944 int status; 945 unsigned dir; 946 int (*cb)(caddr_t); 947 size_t len; 948 uint32_t shift; 949 950 if (kmflag == KM_SLEEP) { 951 cb = DDI_DMA_SLEEP; 952 } else { 953 cb = DDI_DMA_DONTWAIT; 954 } 955 956 xi = kmem_cache_alloc(bd->d_cache, kmflag); 957 if (xi == NULL) { 958 bioerror(bp, ENOMEM); 959 return (NULL); 960 } 961 962 ASSERT(bp); 963 964 xi->i_bp = bp; 965 xi->i_func = func; 966 xi->i_blkno = bp->b_lblkno >> (bd->d_blkshift - DEV_BSHIFT); 967 968 if (bp->b_bcount == 0) { 969 xi->i_len = 0; 970 xi->i_nblks = 0; 971 xi->i_kaddr = NULL; 972 xi->i_resid = 0; 973 xi->i_num_win = 0; 974 goto done; 975 } 976 977 if (bp->b_flags & B_READ) { 978 dir = DDI_DMA_READ; 979 xi->i_func = bd->d_ops.o_read; 980 } else { 981 dir = DDI_DMA_WRITE; 982 xi->i_func = bd->d_ops.o_write; 983 } 984 985 shift = bd->d_blkshift; 986 xi->i_blkshift = shift; 987 988 if (!bd->d_use_dma) { 989 bp_mapin(bp); 990 rv = 0; 991 xi->i_offset = 0; 992 xi->i_num_win = 993 (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer; 994 xi->i_cur_win = 0; 995 xi->i_len = min(bp->b_bcount, bd->d_maxxfer); 996 xi->i_nblks = xi->i_len >> shift; 997 xi->i_kaddr = bp->b_un.b_addr; 998 xi->i_resid = bp->b_bcount; 999 } else { 1000 1001 /* 1002 * We have to use consistent DMA if the address is misaligned. 1003 */ 1004 if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) && 1005 ((uintptr_t)bp->b_un.b_addr & 0x7)) { 1006 dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL; 1007 } else { 1008 dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL; 1009 } 1010 1011 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb, 1012 NULL, &xi->i_dmac, &xi->i_ndmac); 1013 switch (status) { 1014 case DDI_DMA_MAPPED: 1015 xi->i_num_win = 1; 1016 xi->i_cur_win = 0; 1017 xi->i_offset = 0; 1018 xi->i_len = bp->b_bcount; 1019 xi->i_nblks = xi->i_len >> shift; 1020 xi->i_resid = bp->b_bcount; 1021 rv = 0; 1022 break; 1023 case DDI_DMA_PARTIAL_MAP: 1024 xi->i_cur_win = 0; 1025 1026 if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) != 1027 DDI_SUCCESS) || 1028 (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset, 1029 &len, &xi->i_dmac, &xi->i_ndmac) != 1030 DDI_SUCCESS) || 1031 (P2PHASE(len, (1U << shift)) != 0)) { 1032 (void) ddi_dma_unbind_handle(xi->i_dmah); 1033 rv = EFAULT; 1034 goto done; 1035 } 1036 xi->i_len = len; 1037 xi->i_nblks = xi->i_len >> shift; 1038 xi->i_resid = bp->b_bcount; 1039 rv = 0; 1040 break; 1041 case DDI_DMA_NORESOURCES: 1042 rv = EAGAIN; 1043 goto done; 1044 case DDI_DMA_TOOBIG: 1045 rv = EINVAL; 1046 goto done; 1047 case DDI_DMA_NOMAPPING: 1048 case DDI_DMA_INUSE: 1049 default: 1050 rv = EFAULT; 1051 goto done; 1052 } 1053 } 1054 1055 done: 1056 if (rv != 0) { 1057 kmem_cache_free(bd->d_cache, xi); 1058 bioerror(bp, rv); 1059 return (NULL); 1060 } 1061 1062 return (xi); 1063 } 1064 1065 static void 1066 bd_xfer_free(bd_xfer_impl_t *xi) 1067 { 1068 if (xi->i_dmah) { 1069 (void) ddi_dma_unbind_handle(xi->i_dmah); 1070 } 1071 if (xi->i_dfl != NULL) { 1072 dfl_free((dkioc_free_list_t *)xi->i_dfl); 1073 xi->i_dfl = NULL; 1074 } 1075 kmem_cache_free(xi->i_bd->d_cache, xi); 1076 } 1077 1078 static int 1079 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp) 1080 { 1081 dev_t dev = *devp; 1082 bd_t *bd; 1083 minor_t part; 1084 minor_t inst; 1085 uint64_t mask; 1086 boolean_t ndelay; 1087 int rv; 1088 diskaddr_t nblks; 1089 diskaddr_t lba; 1090 1091 _NOTE(ARGUNUSED(credp)); 1092 1093 part = BDPART(dev); 1094 inst = BDINST(dev); 1095 1096 if (otyp >= OTYPCNT) 1097 return (EINVAL); 1098 1099 ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE; 1100 1101 /* 1102 * Block any DR events from changing the set of registered 1103 * devices while we function. 1104 */ 1105 rw_enter(&bd_lock, RW_READER); 1106 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1107 rw_exit(&bd_lock); 1108 return (ENXIO); 1109 } 1110 1111 mutex_enter(&bd->d_ocmutex); 1112 1113 ASSERT(part < 64); 1114 mask = (1U << part); 1115 1116 bd_update_state(bd); 1117 1118 if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) { 1119 1120 /* non-blocking opens are allowed to succeed */ 1121 if (!ndelay) { 1122 rv = ENXIO; 1123 goto done; 1124 } 1125 } else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba, 1126 NULL, NULL, 0) == 0) { 1127 1128 /* 1129 * We read the partinfo, verify valid ranges. If the 1130 * partition is invalid, and we aren't blocking or 1131 * doing a raw access, then fail. (Non-blocking and 1132 * raw accesses can still succeed to allow a disk with 1133 * bad partition data to opened by format and fdisk.) 1134 */ 1135 if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) { 1136 rv = ENXIO; 1137 goto done; 1138 } 1139 } else if (!ndelay) { 1140 /* 1141 * cmlb_partinfo failed -- invalid partition or no 1142 * disk label. 1143 */ 1144 rv = ENXIO; 1145 goto done; 1146 } 1147 1148 if ((flag & FWRITE) && bd->d_rdonly) { 1149 rv = EROFS; 1150 goto done; 1151 } 1152 1153 if ((bd->d_open_excl) & (mask)) { 1154 rv = EBUSY; 1155 goto done; 1156 } 1157 if (flag & FEXCL) { 1158 if (bd->d_open_lyr[part]) { 1159 rv = EBUSY; 1160 goto done; 1161 } 1162 for (int i = 0; i < OTYP_LYR; i++) { 1163 if (bd->d_open_reg[i] & mask) { 1164 rv = EBUSY; 1165 goto done; 1166 } 1167 } 1168 } 1169 1170 if (otyp == OTYP_LYR) { 1171 bd->d_open_lyr[part]++; 1172 } else { 1173 bd->d_open_reg[otyp] |= mask; 1174 } 1175 if (flag & FEXCL) { 1176 bd->d_open_excl |= mask; 1177 } 1178 1179 rv = 0; 1180 done: 1181 mutex_exit(&bd->d_ocmutex); 1182 rw_exit(&bd_lock); 1183 1184 return (rv); 1185 } 1186 1187 static int 1188 bd_close(dev_t dev, int flag, int otyp, cred_t *credp) 1189 { 1190 bd_t *bd; 1191 minor_t inst; 1192 minor_t part; 1193 uint64_t mask; 1194 boolean_t last = B_TRUE; 1195 1196 _NOTE(ARGUNUSED(flag)); 1197 _NOTE(ARGUNUSED(credp)); 1198 1199 part = BDPART(dev); 1200 inst = BDINST(dev); 1201 1202 ASSERT(part < 64); 1203 mask = (1U << part); 1204 1205 rw_enter(&bd_lock, RW_READER); 1206 1207 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1208 rw_exit(&bd_lock); 1209 return (ENXIO); 1210 } 1211 1212 mutex_enter(&bd->d_ocmutex); 1213 if (bd->d_open_excl & mask) { 1214 bd->d_open_excl &= ~mask; 1215 } 1216 if (otyp == OTYP_LYR) { 1217 bd->d_open_lyr[part]--; 1218 } else { 1219 bd->d_open_reg[otyp] &= ~mask; 1220 } 1221 for (int i = 0; i < 64; i++) { 1222 if (bd->d_open_lyr[part]) { 1223 last = B_FALSE; 1224 } 1225 } 1226 for (int i = 0; last && (i < OTYP_LYR); i++) { 1227 if (bd->d_open_reg[i]) { 1228 last = B_FALSE; 1229 } 1230 } 1231 mutex_exit(&bd->d_ocmutex); 1232 1233 if (last) { 1234 cmlb_invalidate(bd->d_cmlbh, 0); 1235 } 1236 rw_exit(&bd_lock); 1237 1238 return (0); 1239 } 1240 1241 static int 1242 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk) 1243 { 1244 minor_t inst; 1245 minor_t part; 1246 diskaddr_t pstart; 1247 diskaddr_t psize; 1248 bd_t *bd; 1249 bd_xfer_impl_t *xi; 1250 buf_t *bp; 1251 int rv; 1252 uint32_t shift; 1253 daddr_t d_blkno; 1254 int d_nblk; 1255 1256 rw_enter(&bd_lock, RW_READER); 1257 1258 part = BDPART(dev); 1259 inst = BDINST(dev); 1260 1261 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1262 rw_exit(&bd_lock); 1263 return (ENXIO); 1264 } 1265 shift = bd->d_blkshift; 1266 d_blkno = blkno >> (shift - DEV_BSHIFT); 1267 d_nblk = nblk >> (shift - DEV_BSHIFT); 1268 /* 1269 * do cmlb, but do it synchronously unless we already have the 1270 * partition (which we probably should.) 1271 */ 1272 if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL, 1273 (void *)1)) { 1274 rw_exit(&bd_lock); 1275 return (ENXIO); 1276 } 1277 1278 if ((d_blkno + d_nblk) > psize) { 1279 rw_exit(&bd_lock); 1280 return (EINVAL); 1281 } 1282 bp = getrbuf(KM_NOSLEEP); 1283 if (bp == NULL) { 1284 rw_exit(&bd_lock); 1285 return (ENOMEM); 1286 } 1287 1288 bp->b_bcount = nblk << DEV_BSHIFT; 1289 bp->b_resid = bp->b_bcount; 1290 bp->b_lblkno = blkno; 1291 bp->b_un.b_addr = caddr; 1292 1293 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_write, KM_NOSLEEP); 1294 if (xi == NULL) { 1295 rw_exit(&bd_lock); 1296 freerbuf(bp); 1297 return (ENOMEM); 1298 } 1299 xi->i_blkno = d_blkno + pstart; 1300 xi->i_flags = BD_XFER_POLL; 1301 bd_submit(bd, xi); 1302 rw_exit(&bd_lock); 1303 1304 /* 1305 * Generally, we should have run this entirely synchronously 1306 * at this point and the biowait call should be a no-op. If 1307 * it didn't happen this way, it's a bug in the underlying 1308 * driver not honoring BD_XFER_POLL. 1309 */ 1310 (void) biowait(bp); 1311 rv = geterror(bp); 1312 freerbuf(bp); 1313 return (rv); 1314 } 1315 1316 void 1317 bd_minphys(struct buf *bp) 1318 { 1319 minor_t inst; 1320 bd_t *bd; 1321 inst = BDINST(bp->b_edev); 1322 1323 bd = ddi_get_soft_state(bd_state, inst); 1324 1325 /* 1326 * In a non-debug kernel, bd_strategy will catch !bd as 1327 * well, and will fail nicely. 1328 */ 1329 ASSERT(bd); 1330 1331 if (bp->b_bcount > bd->d_maxxfer) 1332 bp->b_bcount = bd->d_maxxfer; 1333 } 1334 1335 static int 1336 bd_check_uio(dev_t dev, struct uio *uio) 1337 { 1338 bd_t *bd; 1339 uint32_t shift; 1340 1341 if ((bd = ddi_get_soft_state(bd_state, BDINST(dev))) == NULL) { 1342 return (ENXIO); 1343 } 1344 1345 shift = bd->d_blkshift; 1346 if ((P2PHASE(uio->uio_loffset, (1U << shift)) != 0) || 1347 (P2PHASE(uio->uio_iov->iov_len, (1U << shift)) != 0)) { 1348 return (EINVAL); 1349 } 1350 1351 return (0); 1352 } 1353 1354 static int 1355 bd_read(dev_t dev, struct uio *uio, cred_t *credp) 1356 { 1357 _NOTE(ARGUNUSED(credp)); 1358 int ret = bd_check_uio(dev, uio); 1359 if (ret != 0) { 1360 return (ret); 1361 } 1362 return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio)); 1363 } 1364 1365 static int 1366 bd_write(dev_t dev, struct uio *uio, cred_t *credp) 1367 { 1368 _NOTE(ARGUNUSED(credp)); 1369 int ret = bd_check_uio(dev, uio); 1370 if (ret != 0) { 1371 return (ret); 1372 } 1373 return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio)); 1374 } 1375 1376 static int 1377 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp) 1378 { 1379 _NOTE(ARGUNUSED(credp)); 1380 int ret = bd_check_uio(dev, aio->aio_uio); 1381 if (ret != 0) { 1382 return (ret); 1383 } 1384 return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio)); 1385 } 1386 1387 static int 1388 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp) 1389 { 1390 _NOTE(ARGUNUSED(credp)); 1391 int ret = bd_check_uio(dev, aio->aio_uio); 1392 if (ret != 0) { 1393 return (ret); 1394 } 1395 return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio)); 1396 } 1397 1398 static int 1399 bd_strategy(struct buf *bp) 1400 { 1401 minor_t inst; 1402 minor_t part; 1403 bd_t *bd; 1404 diskaddr_t p_lba; 1405 diskaddr_t p_nblks; 1406 diskaddr_t b_nblks; 1407 bd_xfer_impl_t *xi; 1408 uint32_t shift; 1409 int (*func)(void *, bd_xfer_t *); 1410 diskaddr_t lblkno; 1411 1412 part = BDPART(bp->b_edev); 1413 inst = BDINST(bp->b_edev); 1414 1415 ASSERT(bp); 1416 1417 bp->b_resid = bp->b_bcount; 1418 1419 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1420 bioerror(bp, ENXIO); 1421 biodone(bp); 1422 return (0); 1423 } 1424 1425 if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba, 1426 NULL, NULL, 0)) { 1427 bioerror(bp, ENXIO); 1428 biodone(bp); 1429 return (0); 1430 } 1431 1432 shift = bd->d_blkshift; 1433 lblkno = bp->b_lblkno >> (shift - DEV_BSHIFT); 1434 if ((P2PHASE(bp->b_lblkno, (1U << (shift - DEV_BSHIFT))) != 0) || 1435 (P2PHASE(bp->b_bcount, (1U << shift)) != 0) || 1436 (lblkno > p_nblks)) { 1437 bioerror(bp, EINVAL); 1438 biodone(bp); 1439 return (0); 1440 } 1441 b_nblks = bp->b_bcount >> shift; 1442 if ((lblkno == p_nblks) || (bp->b_bcount == 0)) { 1443 biodone(bp); 1444 return (0); 1445 } 1446 1447 if ((b_nblks + lblkno) > p_nblks) { 1448 bp->b_resid = ((lblkno + b_nblks - p_nblks) << shift); 1449 bp->b_bcount -= bp->b_resid; 1450 } else { 1451 bp->b_resid = 0; 1452 } 1453 func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write; 1454 1455 xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP); 1456 if (xi == NULL) { 1457 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE); 1458 } 1459 if (xi == NULL) { 1460 /* bd_request_alloc will have done bioerror */ 1461 biodone(bp); 1462 return (0); 1463 } 1464 xi->i_blkno = lblkno + p_lba; 1465 1466 bd_submit(bd, xi); 1467 1468 return (0); 1469 } 1470 1471 static int 1472 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp) 1473 { 1474 minor_t inst; 1475 uint16_t part; 1476 bd_t *bd; 1477 void *ptr = (void *)arg; 1478 int rv; 1479 1480 part = BDPART(dev); 1481 inst = BDINST(dev); 1482 1483 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) { 1484 return (ENXIO); 1485 } 1486 1487 rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0); 1488 if (rv != ENOTTY) 1489 return (rv); 1490 1491 if (rvalp != NULL) { 1492 /* the return value of the ioctl is 0 by default */ 1493 *rvalp = 0; 1494 } 1495 1496 switch (cmd) { 1497 case DKIOCGMEDIAINFO: { 1498 struct dk_minfo minfo; 1499 1500 /* make sure our state information is current */ 1501 bd_update_state(bd); 1502 bzero(&minfo, sizeof (minfo)); 1503 minfo.dki_media_type = DK_FIXED_DISK; 1504 minfo.dki_lbsize = (1U << bd->d_blkshift); 1505 minfo.dki_capacity = bd->d_numblks; 1506 if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) { 1507 return (EFAULT); 1508 } 1509 return (0); 1510 } 1511 case DKIOCGMEDIAINFOEXT: { 1512 struct dk_minfo_ext miext; 1513 size_t len; 1514 1515 /* make sure our state information is current */ 1516 bd_update_state(bd); 1517 bzero(&miext, sizeof (miext)); 1518 miext.dki_media_type = DK_FIXED_DISK; 1519 miext.dki_lbsize = (1U << bd->d_blkshift); 1520 miext.dki_pbsize = (1U << bd->d_pblkshift); 1521 miext.dki_capacity = bd->d_numblks; 1522 1523 switch (ddi_model_convert_from(flag & FMODELS)) { 1524 case DDI_MODEL_ILP32: 1525 len = sizeof (struct dk_minfo_ext32); 1526 break; 1527 default: 1528 len = sizeof (struct dk_minfo_ext); 1529 break; 1530 } 1531 1532 if (ddi_copyout(&miext, ptr, len, flag)) { 1533 return (EFAULT); 1534 } 1535 return (0); 1536 } 1537 case DKIOCINFO: { 1538 struct dk_cinfo cinfo; 1539 bzero(&cinfo, sizeof (cinfo)); 1540 cinfo.dki_ctype = DKC_BLKDEV; 1541 cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip)); 1542 (void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname), 1543 "%s", ddi_driver_name(ddi_get_parent(bd->d_dip))); 1544 (void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname), 1545 "%s", ddi_driver_name(bd->d_dip)); 1546 cinfo.dki_unit = inst; 1547 cinfo.dki_flags = DKI_FMTVOL; 1548 cinfo.dki_partition = part; 1549 cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE; 1550 cinfo.dki_addr = 0; 1551 cinfo.dki_slave = 0; 1552 cinfo.dki_space = 0; 1553 cinfo.dki_prio = 0; 1554 cinfo.dki_vec = 0; 1555 if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) { 1556 return (EFAULT); 1557 } 1558 return (0); 1559 } 1560 case DKIOCREMOVABLE: { 1561 int i; 1562 i = bd->d_removable ? 1 : 0; 1563 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1564 return (EFAULT); 1565 } 1566 return (0); 1567 } 1568 case DKIOCHOTPLUGGABLE: { 1569 int i; 1570 i = bd->d_hotpluggable ? 1 : 0; 1571 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1572 return (EFAULT); 1573 } 1574 return (0); 1575 } 1576 case DKIOCREADONLY: { 1577 int i; 1578 i = bd->d_rdonly ? 1 : 0; 1579 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1580 return (EFAULT); 1581 } 1582 return (0); 1583 } 1584 case DKIOCSOLIDSTATE: { 1585 int i; 1586 i = bd->d_ssd ? 1 : 0; 1587 if (ddi_copyout(&i, ptr, sizeof (i), flag)) { 1588 return (EFAULT); 1589 } 1590 return (0); 1591 } 1592 case DKIOCSTATE: { 1593 enum dkio_state state; 1594 if (ddi_copyin(ptr, &state, sizeof (state), flag)) { 1595 return (EFAULT); 1596 } 1597 if ((rv = bd_check_state(bd, &state)) != 0) { 1598 return (rv); 1599 } 1600 if (ddi_copyout(&state, ptr, sizeof (state), flag)) { 1601 return (EFAULT); 1602 } 1603 return (0); 1604 } 1605 case DKIOCFLUSHWRITECACHE: { 1606 struct dk_callback *dkc = NULL; 1607 1608 if (flag & FKIOCTL) 1609 dkc = (void *)arg; 1610 1611 rv = bd_flush_write_cache(bd, dkc); 1612 return (rv); 1613 } 1614 case DKIOCFREE: { 1615 dkioc_free_list_t *dfl = NULL; 1616 1617 /* 1618 * Check free space support early to avoid copyin/allocation 1619 * when unnecessary. 1620 */ 1621 if (!CAN_FREESPACE(bd)) 1622 return (ENOTSUP); 1623 1624 rv = dfl_copyin(ptr, &dfl, flag, KM_SLEEP); 1625 if (rv != 0) 1626 return (rv); 1627 1628 /* 1629 * bd_free_space() consumes 'dfl'. bd_free_space() will 1630 * call dfl_iter() which will normally try to pass dfl through 1631 * to bd_free_space_cb() which attaches dfl to the bd_xfer_t 1632 * that is then queued for the underlying driver. Once the 1633 * driver processes the request, the bd_xfer_t instance is 1634 * disposed of, including any attached dkioc_free_list_t. 1635 * 1636 * If dfl cannot be processed by the underlying driver due to 1637 * size or alignment requirements of the driver, dfl_iter() 1638 * will replace dfl with one or more new dkioc_free_list_t 1639 * instances with the correct alignment and sizes for the driver 1640 * (and free the original dkioc_free_list_t). 1641 */ 1642 rv = bd_free_space(dev, bd, dfl); 1643 return (rv); 1644 } 1645 1646 case DKIOC_CANFREE: { 1647 boolean_t supported = CAN_FREESPACE(bd); 1648 1649 if (ddi_copyout(&supported, (void *)arg, sizeof (supported), 1650 flag) != 0) { 1651 return (EFAULT); 1652 } 1653 1654 return (0); 1655 } 1656 1657 default: 1658 break; 1659 1660 } 1661 return (ENOTTY); 1662 } 1663 1664 static int 1665 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 1666 char *name, caddr_t valuep, int *lengthp) 1667 { 1668 bd_t *bd; 1669 1670 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1671 if (bd == NULL) 1672 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 1673 name, valuep, lengthp)); 1674 1675 return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name, 1676 valuep, lengthp, BDPART(dev), 0)); 1677 } 1678 1679 1680 static int 1681 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start, 1682 size_t length, void *tg_cookie) 1683 { 1684 bd_t *bd; 1685 buf_t *bp; 1686 bd_xfer_impl_t *xi; 1687 int rv; 1688 int (*func)(void *, bd_xfer_t *); 1689 int kmflag; 1690 1691 /* 1692 * If we are running in polled mode (such as during dump(9e) 1693 * execution), then we cannot sleep for kernel allocations. 1694 */ 1695 kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP; 1696 1697 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1698 1699 if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) { 1700 /* We can only transfer whole blocks at a time! */ 1701 return (EINVAL); 1702 } 1703 1704 if ((bp = getrbuf(kmflag)) == NULL) { 1705 return (ENOMEM); 1706 } 1707 1708 switch (cmd) { 1709 case TG_READ: 1710 bp->b_flags = B_READ; 1711 func = bd->d_ops.o_read; 1712 break; 1713 case TG_WRITE: 1714 bp->b_flags = B_WRITE; 1715 func = bd->d_ops.o_write; 1716 break; 1717 default: 1718 freerbuf(bp); 1719 return (EINVAL); 1720 } 1721 1722 bp->b_un.b_addr = bufaddr; 1723 bp->b_bcount = length; 1724 xi = bd_xfer_alloc(bd, bp, func, kmflag); 1725 if (xi == NULL) { 1726 rv = geterror(bp); 1727 freerbuf(bp); 1728 return (rv); 1729 } 1730 xi->i_flags = tg_cookie ? BD_XFER_POLL : 0; 1731 xi->i_blkno = start; 1732 bd_submit(bd, xi); 1733 (void) biowait(bp); 1734 rv = geterror(bp); 1735 freerbuf(bp); 1736 1737 return (rv); 1738 } 1739 1740 static int 1741 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 1742 { 1743 bd_t *bd; 1744 1745 _NOTE(ARGUNUSED(tg_cookie)); 1746 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip)); 1747 1748 switch (cmd) { 1749 case TG_GETPHYGEOM: 1750 case TG_GETVIRTGEOM: 1751 /* 1752 * We don't have any "geometry" as such, let cmlb 1753 * fabricate something. 1754 */ 1755 return (ENOTTY); 1756 1757 case TG_GETCAPACITY: 1758 bd_update_state(bd); 1759 *(diskaddr_t *)arg = bd->d_numblks; 1760 return (0); 1761 1762 case TG_GETBLOCKSIZE: 1763 *(uint32_t *)arg = (1U << bd->d_blkshift); 1764 return (0); 1765 1766 case TG_GETATTR: 1767 /* 1768 * It turns out that cmlb really doesn't do much for 1769 * non-writable media, but lets make the information 1770 * available for it in case it does more in the 1771 * future. (The value is currently used for 1772 * triggering special behavior for CD-ROMs.) 1773 */ 1774 bd_update_state(bd); 1775 ((tg_attribute_t *)arg)->media_is_writable = 1776 bd->d_rdonly ? B_FALSE : B_TRUE; 1777 ((tg_attribute_t *)arg)->media_is_solid_state = bd->d_ssd; 1778 ((tg_attribute_t *)arg)->media_is_rotational = B_FALSE; 1779 return (0); 1780 1781 default: 1782 return (EINVAL); 1783 } 1784 } 1785 1786 1787 static void 1788 bd_sched(bd_t *bd, bd_queue_t *bq) 1789 { 1790 bd_xfer_impl_t *xi; 1791 struct buf *bp; 1792 int rv; 1793 1794 mutex_enter(&bq->q_iomutex); 1795 1796 while ((bq->q_qactive < bq->q_qsize) && 1797 ((xi = list_remove_head(&bq->q_waitq)) != NULL)) { 1798 mutex_enter(&bd->d_ksmutex); 1799 kstat_waitq_to_runq(bd->d_kiop); 1800 mutex_exit(&bd->d_ksmutex); 1801 1802 bq->q_qactive++; 1803 list_insert_tail(&bq->q_runq, xi); 1804 1805 /* 1806 * Submit the job to the driver. We drop the I/O mutex 1807 * so that we can deal with the case where the driver 1808 * completion routine calls back into us synchronously. 1809 */ 1810 1811 mutex_exit(&bq->q_iomutex); 1812 1813 rv = xi->i_func(bd->d_private, &xi->i_public); 1814 if (rv != 0) { 1815 bp = xi->i_bp; 1816 bioerror(bp, rv); 1817 biodone(bp); 1818 1819 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32); 1820 1821 mutex_enter(&bq->q_iomutex); 1822 1823 mutex_enter(&bd->d_ksmutex); 1824 kstat_runq_exit(bd->d_kiop); 1825 mutex_exit(&bd->d_ksmutex); 1826 1827 bq->q_qactive--; 1828 list_remove(&bq->q_runq, xi); 1829 bd_xfer_free(xi); 1830 } else { 1831 mutex_enter(&bq->q_iomutex); 1832 } 1833 } 1834 1835 mutex_exit(&bq->q_iomutex); 1836 } 1837 1838 static void 1839 bd_submit(bd_t *bd, bd_xfer_impl_t *xi) 1840 { 1841 uint64_t nv = atomic_inc_64_nv(&bd->d_io_counter); 1842 unsigned q = nv % bd->d_qcount; 1843 bd_queue_t *bq = &bd->d_queues[q]; 1844 1845 xi->i_bq = bq; 1846 xi->i_qnum = q; 1847 1848 mutex_enter(&bq->q_iomutex); 1849 1850 list_insert_tail(&bq->q_waitq, xi); 1851 1852 mutex_enter(&bd->d_ksmutex); 1853 kstat_waitq_enter(bd->d_kiop); 1854 mutex_exit(&bd->d_ksmutex); 1855 1856 mutex_exit(&bq->q_iomutex); 1857 1858 bd_sched(bd, bq); 1859 } 1860 1861 static void 1862 bd_runq_exit(bd_xfer_impl_t *xi, int err) 1863 { 1864 bd_t *bd = xi->i_bd; 1865 buf_t *bp = xi->i_bp; 1866 bd_queue_t *bq = xi->i_bq; 1867 1868 mutex_enter(&bq->q_iomutex); 1869 bq->q_qactive--; 1870 1871 mutex_enter(&bd->d_ksmutex); 1872 kstat_runq_exit(bd->d_kiop); 1873 mutex_exit(&bd->d_ksmutex); 1874 1875 list_remove(&bq->q_runq, xi); 1876 mutex_exit(&bq->q_iomutex); 1877 1878 if (err == 0) { 1879 if (bp->b_flags & B_READ) { 1880 atomic_inc_uint(&bd->d_kiop->reads); 1881 atomic_add_64((uint64_t *)&bd->d_kiop->nread, 1882 bp->b_bcount - xi->i_resid); 1883 } else { 1884 atomic_inc_uint(&bd->d_kiop->writes); 1885 atomic_add_64((uint64_t *)&bd->d_kiop->nwritten, 1886 bp->b_bcount - xi->i_resid); 1887 } 1888 } 1889 bd_sched(bd, bq); 1890 } 1891 1892 static void 1893 bd_update_state(bd_t *bd) 1894 { 1895 enum dkio_state state = DKIO_INSERTED; 1896 boolean_t docmlb = B_FALSE; 1897 bd_media_t media; 1898 1899 bzero(&media, sizeof (media)); 1900 1901 mutex_enter(&bd->d_statemutex); 1902 if (bd->d_ops.o_media_info(bd->d_private, &media) != 0) { 1903 bd->d_numblks = 0; 1904 state = DKIO_EJECTED; 1905 goto done; 1906 } 1907 1908 if ((media.m_blksize < 512) || 1909 (!ISP2(media.m_blksize)) || 1910 (P2PHASE(bd->d_maxxfer, media.m_blksize))) { 1911 cmn_err(CE_WARN, "%s%d: Invalid media block size (%d)", 1912 ddi_driver_name(bd->d_dip), ddi_get_instance(bd->d_dip), 1913 media.m_blksize); 1914 /* 1915 * We can't use the media, treat it as not present. 1916 */ 1917 state = DKIO_EJECTED; 1918 bd->d_numblks = 0; 1919 goto done; 1920 } 1921 1922 if (((1U << bd->d_blkshift) != media.m_blksize) || 1923 (bd->d_numblks != media.m_nblks)) { 1924 /* Device size changed */ 1925 docmlb = B_TRUE; 1926 } 1927 1928 bd->d_blkshift = ddi_ffs(media.m_blksize) - 1; 1929 bd->d_pblkshift = bd->d_blkshift; 1930 bd->d_numblks = media.m_nblks; 1931 bd->d_rdonly = media.m_readonly; 1932 bd->d_ssd = media.m_solidstate; 1933 1934 /* 1935 * Only use the supplied physical block size if it is non-zero, 1936 * greater or equal to the block size, and a power of 2. Ignore it 1937 * if not, it's just informational and we can still use the media. 1938 */ 1939 if ((media.m_pblksize != 0) && 1940 (media.m_pblksize >= media.m_blksize) && 1941 (ISP2(media.m_pblksize))) 1942 bd->d_pblkshift = ddi_ffs(media.m_pblksize) - 1; 1943 1944 done: 1945 if (state != bd->d_state) { 1946 bd->d_state = state; 1947 cv_broadcast(&bd->d_statecv); 1948 docmlb = B_TRUE; 1949 } 1950 mutex_exit(&bd->d_statemutex); 1951 1952 bd->d_kerr->bd_capacity.value.ui64 = bd->d_numblks << bd->d_blkshift; 1953 1954 if (docmlb) { 1955 if (state == DKIO_INSERTED) { 1956 (void) cmlb_validate(bd->d_cmlbh, 0, 0); 1957 } else { 1958 cmlb_invalidate(bd->d_cmlbh, 0); 1959 } 1960 } 1961 } 1962 1963 static int 1964 bd_check_state(bd_t *bd, enum dkio_state *state) 1965 { 1966 clock_t when; 1967 1968 for (;;) { 1969 1970 bd_update_state(bd); 1971 1972 mutex_enter(&bd->d_statemutex); 1973 1974 if (bd->d_state != *state) { 1975 *state = bd->d_state; 1976 mutex_exit(&bd->d_statemutex); 1977 break; 1978 } 1979 1980 when = drv_usectohz(1000000); 1981 if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex, 1982 when, TR_CLOCK_TICK) == 0) { 1983 mutex_exit(&bd->d_statemutex); 1984 return (EINTR); 1985 } 1986 1987 mutex_exit(&bd->d_statemutex); 1988 } 1989 1990 return (0); 1991 } 1992 1993 static int 1994 bd_flush_write_cache_done(struct buf *bp) 1995 { 1996 struct dk_callback *dc = (void *)bp->b_private; 1997 1998 (*dc->dkc_callback)(dc->dkc_cookie, geterror(bp)); 1999 kmem_free(dc, sizeof (*dc)); 2000 freerbuf(bp); 2001 return (0); 2002 } 2003 2004 static int 2005 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc) 2006 { 2007 buf_t *bp; 2008 struct dk_callback *dc; 2009 bd_xfer_impl_t *xi; 2010 int rv; 2011 2012 if (bd->d_ops.o_sync_cache == NULL) { 2013 return (ENOTSUP); 2014 } 2015 if ((bp = getrbuf(KM_SLEEP)) == NULL) { 2016 return (ENOMEM); 2017 } 2018 bp->b_resid = 0; 2019 bp->b_bcount = 0; 2020 2021 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP); 2022 if (xi == NULL) { 2023 rv = geterror(bp); 2024 freerbuf(bp); 2025 return (rv); 2026 } 2027 2028 /* Make an asynchronous flush, but only if there is a callback */ 2029 if (dkc != NULL && dkc->dkc_callback != NULL) { 2030 /* Make a private copy of the callback structure */ 2031 dc = kmem_alloc(sizeof (*dc), KM_SLEEP); 2032 *dc = *dkc; 2033 bp->b_private = dc; 2034 bp->b_iodone = bd_flush_write_cache_done; 2035 2036 bd_submit(bd, xi); 2037 return (0); 2038 } 2039 2040 /* In case there is no callback, perform a synchronous flush */ 2041 bd_submit(bd, xi); 2042 (void) biowait(bp); 2043 rv = geterror(bp); 2044 freerbuf(bp); 2045 2046 return (rv); 2047 } 2048 2049 static int 2050 bd_free_space_done(struct buf *bp) 2051 { 2052 freerbuf(bp); 2053 return (0); 2054 } 2055 2056 static int 2057 bd_free_space_cb(dkioc_free_list_t *dfl, void *arg, int kmflag) 2058 { 2059 bd_t *bd = arg; 2060 buf_t *bp = NULL; 2061 bd_xfer_impl_t *xi = NULL; 2062 boolean_t sync = DFL_ISSYNC(dfl) ? B_TRUE : B_FALSE; 2063 int rv = 0; 2064 2065 bp = getrbuf(KM_SLEEP); 2066 bp->b_resid = 0; 2067 bp->b_bcount = 0; 2068 bp->b_lblkno = 0; 2069 2070 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_free_space, kmflag); 2071 xi->i_dfl = dfl; 2072 2073 if (!sync) { 2074 bp->b_iodone = bd_free_space_done; 2075 bd_submit(bd, xi); 2076 return (0); 2077 } 2078 2079 xi->i_flags |= BD_XFER_POLL; 2080 bd_submit(bd, xi); 2081 2082 (void) biowait(bp); 2083 rv = geterror(bp); 2084 freerbuf(bp); 2085 2086 return (rv); 2087 } 2088 2089 static int 2090 bd_free_space(dev_t dev, bd_t *bd, dkioc_free_list_t *dfl) 2091 { 2092 diskaddr_t p_len, p_offset; 2093 uint64_t offset_bytes, len_bytes; 2094 minor_t part = BDPART(dev); 2095 const uint_t bshift = bd->d_blkshift; 2096 dkioc_free_info_t dfi = { 2097 .dfi_bshift = bshift, 2098 .dfi_align = bd->d_free_align << bshift, 2099 .dfi_max_bytes = bd->d_max_free_blks << bshift, 2100 .dfi_max_ext = bd->d_max_free_seg, 2101 .dfi_max_ext_bytes = bd->d_max_free_seg_blks << bshift, 2102 }; 2103 2104 if (cmlb_partinfo(bd->d_cmlbh, part, &p_len, &p_offset, NULL, 2105 NULL, 0) != 0) { 2106 dfl_free(dfl); 2107 return (ENXIO); 2108 } 2109 2110 /* 2111 * bd_ioctl created our own copy of dfl, so we can modify as 2112 * necessary 2113 */ 2114 offset_bytes = (uint64_t)p_offset << bshift; 2115 len_bytes = (uint64_t)p_len << bshift; 2116 2117 dfl->dfl_offset += offset_bytes; 2118 if (dfl->dfl_offset < offset_bytes) { 2119 dfl_free(dfl); 2120 return (EOVERFLOW); 2121 } 2122 2123 return (dfl_iter(dfl, &dfi, offset_bytes + len_bytes, bd_free_space_cb, 2124 bd, KM_SLEEP)); 2125 } 2126 2127 /* 2128 * Nexus support. 2129 */ 2130 int 2131 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop, 2132 void *arg, void *result) 2133 { 2134 bd_handle_t hdl; 2135 2136 switch (ctlop) { 2137 case DDI_CTLOPS_REPORTDEV: 2138 cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n", 2139 ddi_node_name(rdip), ddi_get_name_addr(rdip), 2140 ddi_driver_name(rdip), ddi_get_instance(rdip)); 2141 return (DDI_SUCCESS); 2142 2143 case DDI_CTLOPS_INITCHILD: 2144 hdl = ddi_get_parent_data((dev_info_t *)arg); 2145 if (hdl == NULL) { 2146 return (DDI_NOT_WELL_FORMED); 2147 } 2148 ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr); 2149 return (DDI_SUCCESS); 2150 2151 case DDI_CTLOPS_UNINITCHILD: 2152 ddi_set_name_addr((dev_info_t *)arg, NULL); 2153 ndi_prop_remove_all((dev_info_t *)arg); 2154 return (DDI_SUCCESS); 2155 2156 default: 2157 return (ddi_ctlops(dip, rdip, ctlop, arg, result)); 2158 } 2159 } 2160 2161 /* 2162 * Functions for device drivers. 2163 */ 2164 bd_handle_t 2165 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag) 2166 { 2167 bd_handle_t hdl; 2168 2169 switch (ops->o_version) { 2170 case BD_OPS_VERSION_0: 2171 case BD_OPS_VERSION_1: 2172 case BD_OPS_VERSION_2: 2173 break; 2174 2175 default: 2176 /* Unsupported version */ 2177 return (NULL); 2178 } 2179 2180 hdl = kmem_zalloc(sizeof (*hdl), kmflag); 2181 if (hdl == NULL) { 2182 return (NULL); 2183 } 2184 2185 switch (ops->o_version) { 2186 case BD_OPS_VERSION_2: 2187 hdl->h_ops.o_free_space = ops->o_free_space; 2188 /*FALLTHRU*/ 2189 case BD_OPS_VERSION_1: 2190 case BD_OPS_VERSION_0: 2191 hdl->h_ops.o_drive_info = ops->o_drive_info; 2192 hdl->h_ops.o_media_info = ops->o_media_info; 2193 hdl->h_ops.o_devid_init = ops->o_devid_init; 2194 hdl->h_ops.o_sync_cache = ops->o_sync_cache; 2195 hdl->h_ops.o_read = ops->o_read; 2196 hdl->h_ops.o_write = ops->o_write; 2197 break; 2198 } 2199 2200 hdl->h_dma = dma; 2201 hdl->h_private = private; 2202 2203 return (hdl); 2204 } 2205 2206 void 2207 bd_free_handle(bd_handle_t hdl) 2208 { 2209 kmem_free(hdl, sizeof (*hdl)); 2210 } 2211 2212 int 2213 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl) 2214 { 2215 dev_info_t *child; 2216 bd_drive_t drive = { 0 }; 2217 2218 /* 2219 * It's not an error if bd_attach_handle() is called on a handle that 2220 * already is attached. We just ignore the request to attach and return. 2221 * This way drivers using blkdev don't have to keep track about blkdev 2222 * state, they can just call this function to make sure it attached. 2223 */ 2224 if (hdl->h_child != NULL) { 2225 return (DDI_SUCCESS); 2226 } 2227 2228 /* if drivers don't override this, make it assume none */ 2229 drive.d_lun = -1; 2230 hdl->h_ops.o_drive_info(hdl->h_private, &drive); 2231 2232 hdl->h_parent = dip; 2233 hdl->h_name = "blkdev"; 2234 2235 /*LINTED: E_BAD_PTR_CAST_ALIGN*/ 2236 if (*(uint64_t *)drive.d_eui64 != 0) { 2237 if (drive.d_lun >= 0) { 2238 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), 2239 "w%02X%02X%02X%02X%02X%02X%02X%02X,%X", 2240 drive.d_eui64[0], drive.d_eui64[1], 2241 drive.d_eui64[2], drive.d_eui64[3], 2242 drive.d_eui64[4], drive.d_eui64[5], 2243 drive.d_eui64[6], drive.d_eui64[7], drive.d_lun); 2244 } else { 2245 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), 2246 "w%02X%02X%02X%02X%02X%02X%02X%02X", 2247 drive.d_eui64[0], drive.d_eui64[1], 2248 drive.d_eui64[2], drive.d_eui64[3], 2249 drive.d_eui64[4], drive.d_eui64[5], 2250 drive.d_eui64[6], drive.d_eui64[7]); 2251 } 2252 } else { 2253 if (drive.d_lun >= 0) { 2254 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), 2255 "%X,%X", drive.d_target, drive.d_lun); 2256 } else { 2257 (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), 2258 "%X", drive.d_target); 2259 } 2260 } 2261 2262 if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID, 2263 &child) != NDI_SUCCESS) { 2264 cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s", 2265 ddi_driver_name(dip), ddi_get_instance(dip), 2266 "blkdev", hdl->h_addr); 2267 return (DDI_FAILURE); 2268 } 2269 2270 ddi_set_parent_data(child, hdl); 2271 hdl->h_child = child; 2272 2273 if (ndi_devi_online(child, 0) != NDI_SUCCESS) { 2274 cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online", 2275 ddi_driver_name(dip), ddi_get_instance(dip), 2276 hdl->h_name, hdl->h_addr); 2277 (void) ndi_devi_free(child); 2278 hdl->h_child = NULL; 2279 return (DDI_FAILURE); 2280 } 2281 2282 return (DDI_SUCCESS); 2283 } 2284 2285 int 2286 bd_detach_handle(bd_handle_t hdl) 2287 { 2288 int circ; 2289 int rv; 2290 char *devnm; 2291 2292 /* 2293 * It's not an error if bd_detach_handle() is called on a handle that 2294 * already is detached. We just ignore the request to detach and return. 2295 * This way drivers using blkdev don't have to keep track about blkdev 2296 * state, they can just call this function to make sure it detached. 2297 */ 2298 if (hdl->h_child == NULL) { 2299 return (DDI_SUCCESS); 2300 } 2301 ndi_devi_enter(hdl->h_parent, &circ); 2302 if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) { 2303 rv = ddi_remove_child(hdl->h_child, 0); 2304 } else { 2305 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); 2306 (void) ddi_deviname(hdl->h_child, devnm); 2307 (void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE); 2308 rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL, 2309 NDI_DEVI_REMOVE | NDI_UNCONFIG); 2310 kmem_free(devnm, MAXNAMELEN + 1); 2311 } 2312 if (rv == 0) { 2313 hdl->h_child = NULL; 2314 } 2315 2316 ndi_devi_exit(hdl->h_parent, circ); 2317 return (rv == NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE); 2318 } 2319 2320 void 2321 bd_xfer_done(bd_xfer_t *xfer, int err) 2322 { 2323 bd_xfer_impl_t *xi = (void *)xfer; 2324 buf_t *bp = xi->i_bp; 2325 int rv = DDI_SUCCESS; 2326 bd_t *bd = xi->i_bd; 2327 size_t len; 2328 2329 if (err != 0) { 2330 bd_runq_exit(xi, err); 2331 atomic_inc_32(&bd->d_kerr->bd_harderrs.value.ui32); 2332 2333 bp->b_resid += xi->i_resid; 2334 bd_xfer_free(xi); 2335 bioerror(bp, err); 2336 biodone(bp); 2337 return; 2338 } 2339 2340 xi->i_cur_win++; 2341 xi->i_resid -= xi->i_len; 2342 2343 if (xi->i_resid == 0) { 2344 /* Job completed succcessfully! */ 2345 bd_runq_exit(xi, 0); 2346 2347 bd_xfer_free(xi); 2348 biodone(bp); 2349 return; 2350 } 2351 2352 xi->i_blkno += xi->i_nblks; 2353 2354 if (bd->d_use_dma) { 2355 /* More transfer still pending... advance to next DMA window. */ 2356 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win, 2357 &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac); 2358 } else { 2359 /* Advance memory window. */ 2360 xi->i_kaddr += xi->i_len; 2361 xi->i_offset += xi->i_len; 2362 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer); 2363 } 2364 2365 2366 if ((rv != DDI_SUCCESS) || 2367 (P2PHASE(len, (1U << xi->i_blkshift)) != 0)) { 2368 bd_runq_exit(xi, EFAULT); 2369 2370 bp->b_resid += xi->i_resid; 2371 bd_xfer_free(xi); 2372 bioerror(bp, EFAULT); 2373 biodone(bp); 2374 return; 2375 } 2376 xi->i_len = len; 2377 xi->i_nblks = len >> xi->i_blkshift; 2378 2379 /* Submit next window to hardware. */ 2380 rv = xi->i_func(bd->d_private, &xi->i_public); 2381 if (rv != 0) { 2382 bd_runq_exit(xi, rv); 2383 2384 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32); 2385 2386 bp->b_resid += xi->i_resid; 2387 bd_xfer_free(xi); 2388 bioerror(bp, rv); 2389 biodone(bp); 2390 } 2391 } 2392 2393 void 2394 bd_error(bd_xfer_t *xfer, int error) 2395 { 2396 bd_xfer_impl_t *xi = (void *)xfer; 2397 bd_t *bd = xi->i_bd; 2398 2399 switch (error) { 2400 case BD_ERR_MEDIA: 2401 atomic_inc_32(&bd->d_kerr->bd_rq_media_err.value.ui32); 2402 break; 2403 case BD_ERR_NTRDY: 2404 atomic_inc_32(&bd->d_kerr->bd_rq_ntrdy_err.value.ui32); 2405 break; 2406 case BD_ERR_NODEV: 2407 atomic_inc_32(&bd->d_kerr->bd_rq_nodev_err.value.ui32); 2408 break; 2409 case BD_ERR_RECOV: 2410 atomic_inc_32(&bd->d_kerr->bd_rq_recov_err.value.ui32); 2411 break; 2412 case BD_ERR_ILLRQ: 2413 atomic_inc_32(&bd->d_kerr->bd_rq_illrq_err.value.ui32); 2414 break; 2415 case BD_ERR_PFA: 2416 atomic_inc_32(&bd->d_kerr->bd_rq_pfa_err.value.ui32); 2417 break; 2418 default: 2419 cmn_err(CE_PANIC, "bd_error: unknown error type %d", error); 2420 break; 2421 } 2422 } 2423 2424 void 2425 bd_state_change(bd_handle_t hdl) 2426 { 2427 bd_t *bd; 2428 2429 if ((bd = hdl->h_bd) != NULL) { 2430 bd_update_state(bd); 2431 } 2432 } 2433 2434 void 2435 bd_mod_init(struct dev_ops *devops) 2436 { 2437 static struct bus_ops bd_bus_ops = { 2438 BUSO_REV, /* busops_rev */ 2439 nullbusmap, /* bus_map */ 2440 NULL, /* bus_get_intrspec (OBSOLETE) */ 2441 NULL, /* bus_add_intrspec (OBSOLETE) */ 2442 NULL, /* bus_remove_intrspec (OBSOLETE) */ 2443 i_ddi_map_fault, /* bus_map_fault */ 2444 NULL, /* bus_dma_map (OBSOLETE) */ 2445 ddi_dma_allochdl, /* bus_dma_allochdl */ 2446 ddi_dma_freehdl, /* bus_dma_freehdl */ 2447 ddi_dma_bindhdl, /* bus_dma_bindhdl */ 2448 ddi_dma_unbindhdl, /* bus_dma_unbindhdl */ 2449 ddi_dma_flush, /* bus_dma_flush */ 2450 ddi_dma_win, /* bus_dma_win */ 2451 ddi_dma_mctl, /* bus_dma_ctl */ 2452 bd_bus_ctl, /* bus_ctl */ 2453 ddi_bus_prop_op, /* bus_prop_op */ 2454 NULL, /* bus_get_eventcookie */ 2455 NULL, /* bus_add_eventcall */ 2456 NULL, /* bus_remove_eventcall */ 2457 NULL, /* bus_post_event */ 2458 NULL, /* bus_intr_ctl (OBSOLETE) */ 2459 NULL, /* bus_config */ 2460 NULL, /* bus_unconfig */ 2461 NULL, /* bus_fm_init */ 2462 NULL, /* bus_fm_fini */ 2463 NULL, /* bus_fm_access_enter */ 2464 NULL, /* bus_fm_access_exit */ 2465 NULL, /* bus_power */ 2466 NULL, /* bus_intr_op */ 2467 }; 2468 2469 devops->devo_bus_ops = &bd_bus_ops; 2470 2471 /* 2472 * NB: The device driver is free to supply its own 2473 * character entry device support. 2474 */ 2475 } 2476 2477 void 2478 bd_mod_fini(struct dev_ops *devops) 2479 { 2480 devops->devo_bus_ops = NULL; 2481 } 2482