1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved.
24 * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved.
25 * Copyright 2017 The MathWorks, Inc. All rights reserved.
26 * Copyright 2020 Joyent, Inc.
27 * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
28 * Copyright 2022 Tintri by DDN, Inc. All rights reserved.
29 * Copyright 2023 Oxide Computer Company
30 */
31
32 #include <sys/types.h>
33 #include <sys/ksynch.h>
34 #include <sys/kmem.h>
35 #include <sys/file.h>
36 #include <sys/errno.h>
37 #include <sys/open.h>
38 #include <sys/buf.h>
39 #include <sys/uio.h>
40 #include <sys/aio_req.h>
41 #include <sys/cred.h>
42 #include <sys/modctl.h>
43 #include <sys/cmlb.h>
44 #include <sys/conf.h>
45 #include <sys/devops.h>
46 #include <sys/list.h>
47 #include <sys/sysmacros.h>
48 #include <sys/dkio.h>
49 #include <sys/dkioc_free_util.h>
50 #include <sys/vtoc.h>
51 #include <sys/scsi/scsi.h> /* for DTYPE_DIRECT */
52 #include <sys/kstat.h>
53 #include <sys/fs/dv_node.h>
54 #include <sys/ddi.h>
55 #include <sys/sunddi.h>
56 #include <sys/note.h>
57 #include <sys/blkdev.h>
58 #include <sys/scsi/impl/inquiry.h>
59 #include <sys/taskq.h>
60 #include <sys/taskq_impl.h>
61 #include <sys/disp.h>
62 #include <sys/sysevent/eventdefs.h>
63 #include <sys/sysevent/dev.h>
64
65 /*
66 * blkdev is a driver which provides a lot of the common functionality
67 * a block device driver may need and helps by removing code which
68 * is frequently duplicated in block device drivers.
69 *
70 * Within this driver all the struct cb_ops functions required for a
71 * block device driver are written with appropriate call back functions
72 * to be provided by the parent driver.
73 *
74 * To use blkdev, a driver needs to:
75 * 1. Create a bd_ops_t structure which has the call back operations
76 * blkdev will use.
77 * 2. Create a handle by calling bd_alloc_handle(). One of the
78 * arguments to this function is the bd_ops_t.
79 * 3. Call bd_attach_handle(). This will instantiate a blkdev device
80 * as a child device node of the calling driver.
81 *
82 * A parent driver is not restricted to just allocating and attaching a
83 * single instance, it may attach as many as it wishes. For each handle
84 * attached, appropriate entries in /dev/[r]dsk are created.
85 *
86 * The bd_ops_t routines that a parent of blkdev need to provide are:
87 *
88 * o_drive_info: Provide information to blkdev such as how many I/O queues
89 * to create and the size of those queues. Also some device
90 * specifics such as EUI, vendor, product, model, serial
91 * number ....
92 *
93 * o_media_info: Provide information about the media. Eg size and block size.
94 *
95 * o_devid_init: Creates and initializes the device id. Typically calls
96 * ddi_devid_init().
97 *
98 * o_sync_cache: Issues a device appropriate command to flush any write
99 * caches.
100 *
101 * o_read: Read data as described by bd_xfer_t argument.
102 *
103 * o_write: Write data as described by bd_xfer_t argument.
104 *
105 * o_free_space: Free the space described by bd_xfer_t argument (optional).
106 *
107 * Queues
108 * ------
109 * Part of the drive_info data is a queue count. blkdev will create
110 * "queue count" number of waitq/runq pairs. Each waitq/runq pair
111 * operates independently. As an I/O is scheduled up to the parent
112 * driver via o_read or o_write its queue number is given. If the
113 * parent driver supports multiple hardware queues it can then select
114 * where to submit the I/O request.
115 *
116 * Currently blkdev uses a simplistic round-robin queue selection method.
117 * It has the advantage that it is lockless. In the future it will be
118 * worthwhile reviewing this strategy for something which prioritizes queues
119 * depending on how busy they are.
120 *
121 * Each waitq/runq pair is protected by its mutex (q_iomutex). Incoming
122 * I/O requests are initially added to the waitq. They are taken off the
123 * waitq, added to the runq and submitted, providing the runq is less
124 * than the qsize as specified in the drive_info. As an I/O request
125 * completes, the parent driver is required to call bd_xfer_done(), which
126 * will remove the I/O request from the runq and pass I/O completion
127 * status up the stack.
128 *
129 * Locks
130 * -----
131 * There are 5 instance global locks d_ocmutex, d_ksmutex, d_errmutex,
132 * d_statemutex and d_dle_mutex. As well a q_iomutex per waitq/runq pair.
133 *
134 * Lock Hierarchy
135 * --------------
136 * The only two locks which may be held simultaneously are q_iomutex and
137 * d_ksmutex. In all cases q_iomutex must be acquired before d_ksmutex.
138 */
139
140 #define BD_MAXPART 64
141 #define BDINST(dev) (getminor(dev) / BD_MAXPART)
142 #define BDPART(dev) (getminor(dev) % BD_MAXPART)
143
144 typedef struct bd bd_t;
145 typedef struct bd_xfer_impl bd_xfer_impl_t;
146 typedef struct bd_queue bd_queue_t;
147
148 typedef enum {
149 BD_DLE_PENDING = 1 << 0,
150 BD_DLE_RUNNING = 1 << 1
151 } bd_dle_state_t;
152
153 struct bd {
154 void *d_private;
155 dev_info_t *d_dip;
156 kmutex_t d_ocmutex; /* open/close */
157 kmutex_t d_ksmutex; /* kstat */
158 kmutex_t d_errmutex;
159 kmutex_t d_statemutex;
160 kcondvar_t d_statecv;
161 enum dkio_state d_state;
162 cmlb_handle_t d_cmlbh;
163 unsigned d_open_lyr[BD_MAXPART]; /* open count */
164 uint64_t d_open_excl; /* bit mask indexed by partition */
165 uint64_t d_open_reg[OTYPCNT]; /* bit mask */
166 uint64_t d_io_counter;
167
168 uint32_t d_qcount;
169 uint32_t d_maxxfer;
170 uint32_t d_blkshift;
171 uint32_t d_pblkshift;
172 uint64_t d_numblks;
173 ddi_devid_t d_devid;
174
175 uint64_t d_max_free_seg;
176 uint64_t d_max_free_blks;
177 uint64_t d_max_free_seg_blks;
178 uint64_t d_free_align;
179
180 kmem_cache_t *d_cache;
181 bd_queue_t *d_queues;
182 kstat_t *d_ksp;
183 kstat_io_t *d_kiop;
184 kstat_t *d_errstats;
185 struct bd_errstats *d_kerr;
186
187 boolean_t d_rdonly;
188 boolean_t d_ssd;
189 boolean_t d_removable;
190 boolean_t d_hotpluggable;
191 boolean_t d_use_dma;
192
193 ddi_dma_attr_t d_dma;
194 bd_ops_t d_ops;
195 bd_handle_t d_handle;
196
197 kmutex_t d_dle_mutex;
198 taskq_ent_t d_dle_ent;
199 bd_dle_state_t d_dle_state;
200 };
201
202 struct bd_handle {
203 bd_ops_t h_ops;
204 ddi_dma_attr_t *h_dma;
205 dev_info_t *h_parent;
206 dev_info_t *h_child;
207 void *h_private;
208 bd_t *h_bd;
209 char *h_name;
210 char h_addr[50]; /* enough for w%0.32x,%X */
211 };
212
213 struct bd_xfer_impl {
214 bd_xfer_t i_public;
215 list_node_t i_linkage;
216 bd_t *i_bd;
217 buf_t *i_bp;
218 bd_queue_t *i_bq;
219 uint_t i_num_win;
220 uint_t i_cur_win;
221 off_t i_offset;
222 int (*i_func)(void *, bd_xfer_t *);
223 uint32_t i_blkshift;
224 size_t i_len;
225 size_t i_resid;
226 };
227
228 struct bd_queue {
229 kmutex_t q_iomutex;
230 uint32_t q_qsize;
231 uint32_t q_qactive;
232 list_t q_runq;
233 list_t q_waitq;
234 };
235
236 #define i_dmah i_public.x_dmah
237 #define i_dmac i_public.x_dmac
238 #define i_ndmac i_public.x_ndmac
239 #define i_kaddr i_public.x_kaddr
240 #define i_nblks i_public.x_nblks
241 #define i_blkno i_public.x_blkno
242 #define i_flags i_public.x_flags
243 #define i_qnum i_public.x_qnum
244 #define i_dfl i_public.x_dfl
245
246 #define CAN_FREESPACE(bd) \
247 (((bd)->d_ops.o_free_space == NULL) ? B_FALSE : B_TRUE)
248
249 /*
250 * Private prototypes.
251 */
252
253 static void bd_prop_update_inqstring(dev_info_t *, char *, char *, size_t);
254 static void bd_create_inquiry_props(dev_info_t *, bd_drive_t *);
255 static void bd_create_errstats(bd_t *, int, bd_drive_t *);
256 static void bd_destroy_errstats(bd_t *);
257 static void bd_errstats_setstr(kstat_named_t *, char *, size_t, char *);
258 static void bd_init_errstats(bd_t *, bd_drive_t *);
259 static void bd_fini_errstats(bd_t *);
260
261 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
262 static int bd_attach(dev_info_t *, ddi_attach_cmd_t);
263 static int bd_detach(dev_info_t *, ddi_detach_cmd_t);
264
265 static int bd_open(dev_t *, int, int, cred_t *);
266 static int bd_close(dev_t, int, int, cred_t *);
267 static int bd_strategy(struct buf *);
268 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
269 static int bd_dump(dev_t, caddr_t, daddr_t, int);
270 static int bd_read(dev_t, struct uio *, cred_t *);
271 static int bd_write(dev_t, struct uio *, cred_t *);
272 static int bd_aread(dev_t, struct aio_req *, cred_t *);
273 static int bd_awrite(dev_t, struct aio_req *, cred_t *);
274 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *,
275 caddr_t, int *);
276
277 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t,
278 void *);
279 static int bd_tg_getinfo(dev_info_t *, int, void *, void *);
280 static int bd_xfer_ctor(void *, void *, int);
281 static void bd_xfer_dtor(void *, void *);
282 static void bd_sched(bd_t *, bd_queue_t *);
283 static void bd_submit(bd_t *, bd_xfer_impl_t *);
284 static void bd_runq_exit(bd_xfer_impl_t *, int);
285 static void bd_update_state(bd_t *);
286 static int bd_check_state(bd_t *, enum dkio_state *);
287 static int bd_flush_write_cache(bd_t *, struct dk_callback *);
288 static int bd_check_uio(dev_t, struct uio *);
289 static int bd_free_space(dev_t, bd_t *, dkioc_free_list_t *);
290
291 struct cmlb_tg_ops bd_tg_ops = {
292 TG_DK_OPS_VERSION_1,
293 bd_tg_rdwr,
294 bd_tg_getinfo,
295 };
296
297 static struct cb_ops bd_cb_ops = {
298 bd_open, /* open */
299 bd_close, /* close */
300 bd_strategy, /* strategy */
301 nodev, /* print */
302 bd_dump, /* dump */
303 bd_read, /* read */
304 bd_write, /* write */
305 bd_ioctl, /* ioctl */
306 nodev, /* devmap */
307 nodev, /* mmap */
308 nodev, /* segmap */
309 nochpoll, /* poll */
310 bd_prop_op, /* cb_prop_op */
311 0, /* streamtab */
312 D_64BIT | D_MP, /* Driver comaptibility flag */
313 CB_REV, /* cb_rev */
314 bd_aread, /* async read */
315 bd_awrite /* async write */
316 };
317
318 struct dev_ops bd_dev_ops = {
319 DEVO_REV, /* devo_rev, */
320 0, /* refcnt */
321 bd_getinfo, /* getinfo */
322 nulldev, /* identify */
323 nulldev, /* probe */
324 bd_attach, /* attach */
325 bd_detach, /* detach */
326 nodev, /* reset */
327 &bd_cb_ops, /* driver operations */
328 NULL, /* bus operations */
329 NULL, /* power */
330 ddi_quiesce_not_needed, /* quiesce */
331 };
332
333 static struct modldrv modldrv = {
334 &mod_driverops,
335 "Generic Block Device",
336 &bd_dev_ops,
337 };
338
339 static struct modlinkage modlinkage = {
340 MODREV_1, { &modldrv, NULL }
341 };
342
343 static void *bd_state;
344 static krwlock_t bd_lock;
345 static taskq_t *bd_taskq;
346
347 int
_init(void)348 _init(void)
349 {
350 char taskq_name[TASKQ_NAMELEN];
351 const char *name;
352 int rv;
353
354 rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2);
355 if (rv != DDI_SUCCESS)
356 return (rv);
357
358 name = mod_modname(&modlinkage);
359 (void) snprintf(taskq_name, sizeof (taskq_name), "%s_taskq", name);
360 bd_taskq = taskq_create(taskq_name, 1, minclsyspri, 0, 0, 0);
361 if (bd_taskq == NULL) {
362 cmn_err(CE_WARN, "%s: unable to create %s", name, taskq_name);
363 ddi_soft_state_fini(&bd_state);
364 return (DDI_FAILURE);
365 }
366
367 rw_init(&bd_lock, NULL, RW_DRIVER, NULL);
368
369 rv = mod_install(&modlinkage);
370 if (rv != DDI_SUCCESS) {
371 rw_destroy(&bd_lock);
372 taskq_destroy(bd_taskq);
373 ddi_soft_state_fini(&bd_state);
374 }
375 return (rv);
376 }
377
378 int
_fini(void)379 _fini(void)
380 {
381 int rv;
382
383 rv = mod_remove(&modlinkage);
384 if (rv == DDI_SUCCESS) {
385 rw_destroy(&bd_lock);
386 taskq_destroy(bd_taskq);
387 ddi_soft_state_fini(&bd_state);
388 }
389 return (rv);
390 }
391
392 int
_info(struct modinfo * modinfop)393 _info(struct modinfo *modinfop)
394 {
395 return (mod_info(&modlinkage, modinfop));
396 }
397
398 static int
bd_getinfo(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** resultp)399 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp)
400 {
401 bd_t *bd;
402 minor_t inst;
403
404 _NOTE(ARGUNUSED(dip));
405
406 inst = BDINST((dev_t)arg);
407
408 switch (cmd) {
409 case DDI_INFO_DEVT2DEVINFO:
410 bd = ddi_get_soft_state(bd_state, inst);
411 if (bd == NULL) {
412 return (DDI_FAILURE);
413 }
414 *resultp = (void *)bd->d_dip;
415 break;
416
417 case DDI_INFO_DEVT2INSTANCE:
418 *resultp = (void *)(intptr_t)inst;
419 break;
420
421 default:
422 return (DDI_FAILURE);
423 }
424 return (DDI_SUCCESS);
425 }
426
427 static void
bd_prop_update_inqstring(dev_info_t * dip,char * name,char * data,size_t len)428 bd_prop_update_inqstring(dev_info_t *dip, char *name, char *data, size_t len)
429 {
430 int ilen;
431 char *data_string;
432
433 ilen = scsi_ascii_inquiry_len(data, len);
434 ASSERT3U(ilen, <=, len);
435 if (ilen <= 0)
436 return;
437 /* ensure null termination */
438 data_string = kmem_zalloc(ilen + 1, KM_SLEEP);
439 bcopy(data, data_string, ilen);
440 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, name, data_string);
441 kmem_free(data_string, ilen + 1);
442 }
443
444 static void
bd_create_inquiry_props(dev_info_t * dip,bd_drive_t * drive)445 bd_create_inquiry_props(dev_info_t *dip, bd_drive_t *drive)
446 {
447 if (drive->d_vendor_len > 0)
448 bd_prop_update_inqstring(dip, INQUIRY_VENDOR_ID,
449 drive->d_vendor, drive->d_vendor_len);
450
451 if (drive->d_product_len > 0)
452 bd_prop_update_inqstring(dip, INQUIRY_PRODUCT_ID,
453 drive->d_product, drive->d_product_len);
454
455 if (drive->d_serial_len > 0)
456 bd_prop_update_inqstring(dip, INQUIRY_SERIAL_NO,
457 drive->d_serial, drive->d_serial_len);
458
459 if (drive->d_revision_len > 0)
460 bd_prop_update_inqstring(dip, INQUIRY_REVISION_ID,
461 drive->d_revision, drive->d_revision_len);
462 }
463
464 static void
bd_create_errstats(bd_t * bd,int inst,bd_drive_t * drive)465 bd_create_errstats(bd_t *bd, int inst, bd_drive_t *drive)
466 {
467 char ks_module[KSTAT_STRLEN];
468 char ks_name[KSTAT_STRLEN];
469 int ndata = sizeof (struct bd_errstats) / sizeof (kstat_named_t);
470
471 if (bd->d_errstats != NULL)
472 return;
473
474 (void) snprintf(ks_module, sizeof (ks_module), "%serr",
475 ddi_driver_name(bd->d_dip));
476 (void) snprintf(ks_name, sizeof (ks_name), "%s%d,err",
477 ddi_driver_name(bd->d_dip), inst);
478
479 bd->d_errstats = kstat_create(ks_module, inst, ks_name, "device_error",
480 KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
481
482 mutex_init(&bd->d_errmutex, NULL, MUTEX_DRIVER, NULL);
483 if (bd->d_errstats == NULL) {
484 /*
485 * Even if we cannot create the kstat, we create a
486 * scratch kstat. The reason for this is to ensure
487 * that we can update the kstat all of the time,
488 * without adding an extra branch instruction.
489 */
490 bd->d_kerr = kmem_zalloc(sizeof (struct bd_errstats),
491 KM_SLEEP);
492 } else {
493 bd->d_errstats->ks_lock = &bd->d_errmutex;
494 bd->d_kerr = (struct bd_errstats *)bd->d_errstats->ks_data;
495 }
496
497 kstat_named_init(&bd->d_kerr->bd_softerrs, "Soft Errors",
498 KSTAT_DATA_UINT32);
499 kstat_named_init(&bd->d_kerr->bd_harderrs, "Hard Errors",
500 KSTAT_DATA_UINT32);
501 kstat_named_init(&bd->d_kerr->bd_transerrs, "Transport Errors",
502 KSTAT_DATA_UINT32);
503
504 if (drive->d_model_len > 0) {
505 kstat_named_init(&bd->d_kerr->bd_model, "Model",
506 KSTAT_DATA_STRING);
507 } else {
508 kstat_named_init(&bd->d_kerr->bd_vid, "Vendor",
509 KSTAT_DATA_STRING);
510 kstat_named_init(&bd->d_kerr->bd_pid, "Product",
511 KSTAT_DATA_STRING);
512 }
513
514 kstat_named_init(&bd->d_kerr->bd_revision, "Revision",
515 KSTAT_DATA_STRING);
516 kstat_named_init(&bd->d_kerr->bd_serial, "Serial No",
517 KSTAT_DATA_STRING);
518 kstat_named_init(&bd->d_kerr->bd_capacity, "Size",
519 KSTAT_DATA_ULONGLONG);
520 kstat_named_init(&bd->d_kerr->bd_rq_media_err, "Media Error",
521 KSTAT_DATA_UINT32);
522 kstat_named_init(&bd->d_kerr->bd_rq_ntrdy_err, "Device Not Ready",
523 KSTAT_DATA_UINT32);
524 kstat_named_init(&bd->d_kerr->bd_rq_nodev_err, "No Device",
525 KSTAT_DATA_UINT32);
526 kstat_named_init(&bd->d_kerr->bd_rq_recov_err, "Recoverable",
527 KSTAT_DATA_UINT32);
528 kstat_named_init(&bd->d_kerr->bd_rq_illrq_err, "Illegal Request",
529 KSTAT_DATA_UINT32);
530 kstat_named_init(&bd->d_kerr->bd_rq_pfa_err,
531 "Predictive Failure Analysis", KSTAT_DATA_UINT32);
532
533 bd->d_errstats->ks_private = bd;
534
535 kstat_install(bd->d_errstats);
536 bd_init_errstats(bd, drive);
537 }
538
539 static void
bd_destroy_errstats(bd_t * bd)540 bd_destroy_errstats(bd_t *bd)
541 {
542 if (bd->d_errstats != NULL) {
543 bd_fini_errstats(bd);
544 kstat_delete(bd->d_errstats);
545 bd->d_errstats = NULL;
546 } else {
547 kmem_free(bd->d_kerr, sizeof (struct bd_errstats));
548 bd->d_kerr = NULL;
549 mutex_destroy(&bd->d_errmutex);
550 }
551 }
552
553 static void
bd_errstats_setstr(kstat_named_t * k,char * str,size_t len,char * alt)554 bd_errstats_setstr(kstat_named_t *k, char *str, size_t len, char *alt)
555 {
556 char *tmp;
557 size_t km_len;
558
559 if (KSTAT_NAMED_STR_PTR(k) == NULL) {
560 if (len > 0)
561 km_len = strnlen(str, len);
562 else if (alt != NULL)
563 km_len = strlen(alt);
564 else
565 return;
566
567 tmp = kmem_alloc(km_len + 1, KM_SLEEP);
568 bcopy(len > 0 ? str : alt, tmp, km_len);
569 tmp[km_len] = '\0';
570
571 kstat_named_setstr(k, tmp);
572 }
573 }
574
575 static void
bd_errstats_clrstr(kstat_named_t * k)576 bd_errstats_clrstr(kstat_named_t *k)
577 {
578 if (KSTAT_NAMED_STR_PTR(k) == NULL)
579 return;
580
581 kmem_free(KSTAT_NAMED_STR_PTR(k), KSTAT_NAMED_STR_BUFLEN(k));
582 kstat_named_setstr(k, NULL);
583 }
584
585 static void
bd_init_errstats(bd_t * bd,bd_drive_t * drive)586 bd_init_errstats(bd_t *bd, bd_drive_t *drive)
587 {
588 struct bd_errstats *est = bd->d_kerr;
589
590 mutex_enter(&bd->d_errmutex);
591
592 if (drive->d_model_len > 0 &&
593 KSTAT_NAMED_STR_PTR(&est->bd_model) == NULL) {
594 bd_errstats_setstr(&est->bd_model, drive->d_model,
595 drive->d_model_len, NULL);
596 } else {
597 bd_errstats_setstr(&est->bd_vid, drive->d_vendor,
598 drive->d_vendor_len, "Unknown ");
599 bd_errstats_setstr(&est->bd_pid, drive->d_product,
600 drive->d_product_len, "Unknown ");
601 }
602
603 bd_errstats_setstr(&est->bd_revision, drive->d_revision,
604 drive->d_revision_len, "0001");
605 bd_errstats_setstr(&est->bd_serial, drive->d_serial,
606 drive->d_serial_len, "0 ");
607
608 mutex_exit(&bd->d_errmutex);
609 }
610
611 static void
bd_fini_errstats(bd_t * bd)612 bd_fini_errstats(bd_t *bd)
613 {
614 struct bd_errstats *est = bd->d_kerr;
615
616 mutex_enter(&bd->d_errmutex);
617
618 bd_errstats_clrstr(&est->bd_model);
619 bd_errstats_clrstr(&est->bd_vid);
620 bd_errstats_clrstr(&est->bd_pid);
621 bd_errstats_clrstr(&est->bd_revision);
622 bd_errstats_clrstr(&est->bd_serial);
623
624 mutex_exit(&bd->d_errmutex);
625 }
626
627 static void
bd_queues_free(bd_t * bd)628 bd_queues_free(bd_t *bd)
629 {
630 uint32_t i;
631
632 for (i = 0; i < bd->d_qcount; i++) {
633 bd_queue_t *bq = &bd->d_queues[i];
634
635 mutex_destroy(&bq->q_iomutex);
636 list_destroy(&bq->q_waitq);
637 list_destroy(&bq->q_runq);
638 }
639
640 kmem_free(bd->d_queues, sizeof (*bd->d_queues) * bd->d_qcount);
641 }
642
643 static int
bd_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)644 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
645 {
646 int inst;
647 bd_handle_t hdl;
648 bd_t *bd;
649 bd_drive_t drive;
650 uint32_t i;
651 int rv;
652 char name[16];
653 char kcache[32];
654 char *node_type;
655
656 switch (cmd) {
657 case DDI_ATTACH:
658 break;
659 case DDI_RESUME:
660 /* We don't do anything native for suspend/resume */
661 return (DDI_SUCCESS);
662 default:
663 return (DDI_FAILURE);
664 }
665
666 inst = ddi_get_instance(dip);
667 hdl = ddi_get_parent_data(dip);
668
669 (void) snprintf(name, sizeof (name), "%s%d",
670 ddi_driver_name(dip), ddi_get_instance(dip));
671 (void) snprintf(kcache, sizeof (kcache), "%s_xfer", name);
672
673 if (hdl == NULL) {
674 cmn_err(CE_WARN, "%s: missing parent data!", name);
675 return (DDI_FAILURE);
676 }
677
678 if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) {
679 cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name);
680 return (DDI_FAILURE);
681 }
682 bd = ddi_get_soft_state(bd_state, inst);
683
684 if (hdl->h_dma) {
685 bd->d_dma = *(hdl->h_dma);
686 bd->d_dma.dma_attr_granular =
687 max(DEV_BSIZE, bd->d_dma.dma_attr_granular);
688 bd->d_use_dma = B_TRUE;
689
690 if (bd->d_maxxfer &&
691 (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) {
692 cmn_err(CE_WARN,
693 "%s: inconsistent maximum transfer size!",
694 name);
695 /* We force it */
696 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer;
697 } else {
698 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer;
699 }
700 } else {
701 bd->d_use_dma = B_FALSE;
702 if (bd->d_maxxfer == 0) {
703 bd->d_maxxfer = 1024 * 1024;
704 }
705 }
706 bd->d_ops = hdl->h_ops;
707 bd->d_private = hdl->h_private;
708 bd->d_blkshift = DEV_BSHIFT; /* 512 bytes, to start */
709
710 if (bd->d_maxxfer % DEV_BSIZE) {
711 cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name);
712 bd->d_maxxfer &= ~(DEV_BSIZE - 1);
713 }
714 if (bd->d_maxxfer < DEV_BSIZE) {
715 cmn_err(CE_WARN, "%s: maximum transfer size too small!", name);
716 ddi_soft_state_free(bd_state, inst);
717 return (DDI_FAILURE);
718 }
719
720 bd->d_dip = dip;
721 bd->d_handle = hdl;
722 ddi_set_driver_private(dip, bd);
723
724 mutex_init(&bd->d_ksmutex, NULL, MUTEX_DRIVER, NULL);
725 mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL);
726 mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL);
727 cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL);
728 mutex_init(&bd->d_dle_mutex, NULL, MUTEX_DRIVER, NULL);
729 bd->d_dle_state = 0;
730
731 bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8,
732 bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0);
733
734 bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk",
735 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
736 if (bd->d_ksp != NULL) {
737 bd->d_ksp->ks_lock = &bd->d_ksmutex;
738 kstat_install(bd->d_ksp);
739 bd->d_kiop = bd->d_ksp->ks_data;
740 } else {
741 /*
742 * Even if we cannot create the kstat, we create a
743 * scratch kstat. The reason for this is to ensure
744 * that we can update the kstat all of the time,
745 * without adding an extra branch instruction.
746 */
747 bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP);
748 }
749
750 cmlb_alloc_handle(&bd->d_cmlbh);
751
752 bd->d_state = DKIO_NONE;
753
754 bzero(&drive, sizeof (drive));
755 /*
756 * Default to one queue, and no restrictions on free space requests
757 * (if driver provides method) parent driver can override.
758 */
759 drive.d_qcount = 1;
760 drive.d_free_align = 1;
761 bd->d_ops.o_drive_info(bd->d_private, &drive);
762
763 /*
764 * Several checks to make sure o_drive_info() didn't return bad
765 * values:
766 *
767 * There must be at least one queue
768 */
769 if (drive.d_qcount == 0)
770 goto fail_drive_info;
771
772 /* FREE/UNMAP/TRIM alignment needs to be at least 1 block */
773 if (drive.d_free_align == 0)
774 goto fail_drive_info;
775
776 /*
777 * If d_max_free_blks is not unlimited (not 0), then we cannot allow
778 * an unlimited segment size. It is however permissible to not impose
779 * a limit on the total number of blocks freed while limiting the
780 * amount allowed in an individual segment.
781 */
782 if ((drive.d_max_free_blks > 0 && drive.d_max_free_seg_blks == 0))
783 goto fail_drive_info;
784
785 /*
786 * If a limit is set on d_max_free_blks (by the above check, we know
787 * if there's a limit on d_max_free_blks, d_max_free_seg_blks cannot
788 * be unlimited), it cannot be smaller than the limit on an individual
789 * segment.
790 */
791 if ((drive.d_max_free_blks > 0 &&
792 drive.d_max_free_seg_blks > drive.d_max_free_blks)) {
793 goto fail_drive_info;
794 }
795
796 bd->d_qcount = drive.d_qcount;
797 bd->d_removable = drive.d_removable;
798 bd->d_hotpluggable = drive.d_hotpluggable;
799
800 if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer)
801 bd->d_maxxfer = drive.d_maxxfer;
802
803 bd->d_free_align = drive.d_free_align;
804 bd->d_max_free_seg = drive.d_max_free_seg;
805 bd->d_max_free_blks = drive.d_max_free_blks;
806 bd->d_max_free_seg_blks = drive.d_max_free_seg_blks;
807
808 bd_create_inquiry_props(dip, &drive);
809 bd_create_errstats(bd, inst, &drive);
810 bd_update_state(bd);
811
812 bd->d_queues = kmem_alloc(sizeof (*bd->d_queues) * bd->d_qcount,
813 KM_SLEEP);
814 for (i = 0; i < bd->d_qcount; i++) {
815 bd_queue_t *bq = &bd->d_queues[i];
816
817 bq->q_qsize = drive.d_qsize;
818 bq->q_qactive = 0;
819 mutex_init(&bq->q_iomutex, NULL, MUTEX_DRIVER, NULL);
820
821 list_create(&bq->q_waitq, sizeof (bd_xfer_impl_t),
822 offsetof(struct bd_xfer_impl, i_linkage));
823 list_create(&bq->q_runq, sizeof (bd_xfer_impl_t),
824 offsetof(struct bd_xfer_impl, i_linkage));
825 }
826
827 if (*(uint64_t *)drive.d_eui64 != 0 ||
828 *(uint64_t *)drive.d_guid != 0 ||
829 *((uint64_t *)drive.d_guid + 1) != 0)
830 node_type = DDI_NT_BLOCK_BLKDEV;
831 else if (drive.d_lun >= 0)
832 node_type = DDI_NT_BLOCK_CHAN;
833 else
834 node_type = DDI_NT_BLOCK;
835
836 rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT,
837 bd->d_removable, bd->d_hotpluggable, node_type,
838 CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0);
839 if (rv != 0) {
840 goto fail_cmlb_attach;
841 }
842
843 if (bd->d_ops.o_devid_init != NULL) {
844 rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid);
845 if (rv == DDI_SUCCESS) {
846 if (ddi_devid_register(dip, bd->d_devid) !=
847 DDI_SUCCESS) {
848 cmn_err(CE_WARN,
849 "%s: unable to register devid", name);
850 }
851 }
852 }
853
854 /*
855 * Add a zero-length attribute to tell the world we support
856 * kernel ioctls (for layered drivers). Also set up properties
857 * used by HAL to identify removable media.
858 */
859 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
860 DDI_KERNEL_IOCTL, NULL, 0);
861 if (bd->d_removable) {
862 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
863 "removable-media", NULL, 0);
864 }
865 if (bd->d_hotpluggable) {
866 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
867 "hotpluggable", NULL, 0);
868 }
869
870 /*
871 * Before we proceed, we need to ensure that the geometry and labels on
872 * the cmlb disk are reasonable. When cmlb first attaches, it does not
873 * perform label validation and creates minor nodes based on the
874 * assumption of the size. This may not be correct and the rest of the
875 * system assumes that this will have been done before we allow opens
876 * to proceed. Otherwise, on first open, this'll all end up changing
877 * around on users. We do not care if it succeeds or not. It is totally
878 * acceptable for this device to be unlabeled or not to have anything on
879 * it.
880 */
881 (void) cmlb_validate(bd->d_cmlbh, 0, 0);
882
883 hdl->h_bd = bd;
884 ddi_report_dev(dip);
885
886 return (DDI_SUCCESS);
887
888 fail_cmlb_attach:
889 bd_queues_free(bd);
890 bd_destroy_errstats(bd);
891
892 fail_drive_info:
893 cmlb_free_handle(&bd->d_cmlbh);
894
895 if (bd->d_ksp != NULL) {
896 kstat_delete(bd->d_ksp);
897 bd->d_ksp = NULL;
898 } else {
899 kmem_free(bd->d_kiop, sizeof (kstat_io_t));
900 }
901
902 kmem_cache_destroy(bd->d_cache);
903 cv_destroy(&bd->d_statecv);
904 mutex_destroy(&bd->d_statemutex);
905 mutex_destroy(&bd->d_ocmutex);
906 mutex_destroy(&bd->d_ksmutex);
907 mutex_destroy(&bd->d_dle_mutex);
908 ddi_soft_state_free(bd_state, inst);
909 return (DDI_FAILURE);
910 }
911
912 static int
bd_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)913 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
914 {
915 bd_handle_t hdl;
916 bd_t *bd;
917
918 bd = ddi_get_driver_private(dip);
919 hdl = ddi_get_parent_data(dip);
920
921 switch (cmd) {
922 case DDI_DETACH:
923 break;
924 case DDI_SUSPEND:
925 /* We don't suspend, but our parent does */
926 return (DDI_SUCCESS);
927 default:
928 return (DDI_FAILURE);
929 }
930
931 hdl->h_bd = NULL;
932
933 if (bd->d_ksp != NULL) {
934 kstat_delete(bd->d_ksp);
935 bd->d_ksp = NULL;
936 } else {
937 kmem_free(bd->d_kiop, sizeof (kstat_io_t));
938 }
939
940 bd_destroy_errstats(bd);
941 cmlb_detach(bd->d_cmlbh, 0);
942 cmlb_free_handle(&bd->d_cmlbh);
943 if (bd->d_devid)
944 ddi_devid_free(bd->d_devid);
945 kmem_cache_destroy(bd->d_cache);
946 mutex_destroy(&bd->d_ksmutex);
947 mutex_destroy(&bd->d_ocmutex);
948 mutex_destroy(&bd->d_statemutex);
949 cv_destroy(&bd->d_statecv);
950 mutex_destroy(&bd->d_dle_mutex);
951 bd_queues_free(bd);
952 ddi_soft_state_free(bd_state, ddi_get_instance(dip));
953 return (DDI_SUCCESS);
954 }
955
956 static int
bd_xfer_ctor(void * buf,void * arg,int kmflag)957 bd_xfer_ctor(void *buf, void *arg, int kmflag)
958 {
959 bd_xfer_impl_t *xi;
960 bd_t *bd = arg;
961 int (*dcb)(caddr_t);
962
963 if (kmflag == KM_PUSHPAGE || kmflag == KM_SLEEP) {
964 dcb = DDI_DMA_SLEEP;
965 } else {
966 dcb = DDI_DMA_DONTWAIT;
967 }
968
969 xi = buf;
970 bzero(xi, sizeof (*xi));
971 xi->i_bd = bd;
972
973 if (bd->d_use_dma) {
974 if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL,
975 &xi->i_dmah) != DDI_SUCCESS) {
976 return (-1);
977 }
978 }
979
980 return (0);
981 }
982
983 static void
bd_xfer_dtor(void * buf,void * arg)984 bd_xfer_dtor(void *buf, void *arg)
985 {
986 bd_xfer_impl_t *xi = buf;
987
988 _NOTE(ARGUNUSED(arg));
989
990 if (xi->i_dmah)
991 ddi_dma_free_handle(&xi->i_dmah);
992 xi->i_dmah = NULL;
993 }
994
995 static bd_xfer_impl_t *
bd_xfer_alloc(bd_t * bd,struct buf * bp,int (* func)(void *,bd_xfer_t *),int kmflag)996 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *),
997 int kmflag)
998 {
999 bd_xfer_impl_t *xi;
1000 int rv = 0;
1001 int status;
1002 unsigned dir;
1003 int (*cb)(caddr_t);
1004 size_t len;
1005 uint32_t shift;
1006
1007 if (kmflag == KM_SLEEP) {
1008 cb = DDI_DMA_SLEEP;
1009 } else {
1010 cb = DDI_DMA_DONTWAIT;
1011 }
1012
1013 xi = kmem_cache_alloc(bd->d_cache, kmflag);
1014 if (xi == NULL) {
1015 bioerror(bp, ENOMEM);
1016 return (NULL);
1017 }
1018
1019 ASSERT(bp);
1020
1021 xi->i_bp = bp;
1022 xi->i_func = func;
1023 xi->i_blkno = bp->b_lblkno >> (bd->d_blkshift - DEV_BSHIFT);
1024
1025 if (bp->b_bcount == 0) {
1026 xi->i_len = 0;
1027 xi->i_nblks = 0;
1028 xi->i_kaddr = NULL;
1029 xi->i_resid = 0;
1030 xi->i_num_win = 0;
1031 goto done;
1032 }
1033
1034 if (bp->b_flags & B_READ) {
1035 dir = DDI_DMA_READ;
1036 xi->i_func = bd->d_ops.o_read;
1037 } else {
1038 dir = DDI_DMA_WRITE;
1039 xi->i_func = bd->d_ops.o_write;
1040 }
1041
1042 shift = bd->d_blkshift;
1043 xi->i_blkshift = shift;
1044
1045 if (!bd->d_use_dma) {
1046 bp_mapin(bp);
1047 rv = 0;
1048 xi->i_offset = 0;
1049 xi->i_num_win =
1050 (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer;
1051 xi->i_cur_win = 0;
1052 xi->i_len = min(bp->b_bcount, bd->d_maxxfer);
1053 xi->i_nblks = xi->i_len >> shift;
1054 xi->i_kaddr = bp->b_un.b_addr;
1055 xi->i_resid = bp->b_bcount;
1056 } else {
1057
1058 /*
1059 * We have to use consistent DMA if the address is misaligned.
1060 */
1061 if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) &&
1062 ((uintptr_t)bp->b_un.b_addr & 0x7)) {
1063 dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL;
1064 } else {
1065 dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL;
1066 }
1067
1068 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb,
1069 NULL, &xi->i_dmac, &xi->i_ndmac);
1070 switch (status) {
1071 case DDI_DMA_MAPPED:
1072 xi->i_num_win = 1;
1073 xi->i_cur_win = 0;
1074 xi->i_offset = 0;
1075 xi->i_len = bp->b_bcount;
1076 xi->i_nblks = xi->i_len >> shift;
1077 xi->i_resid = bp->b_bcount;
1078 rv = 0;
1079 break;
1080 case DDI_DMA_PARTIAL_MAP:
1081 xi->i_cur_win = 0;
1082
1083 if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) !=
1084 DDI_SUCCESS) ||
1085 (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset,
1086 &len, &xi->i_dmac, &xi->i_ndmac) !=
1087 DDI_SUCCESS) ||
1088 (P2PHASE(len, (1U << shift)) != 0)) {
1089 (void) ddi_dma_unbind_handle(xi->i_dmah);
1090 rv = EFAULT;
1091 goto done;
1092 }
1093 xi->i_len = len;
1094 xi->i_nblks = xi->i_len >> shift;
1095 xi->i_resid = bp->b_bcount;
1096 rv = 0;
1097 break;
1098 case DDI_DMA_NORESOURCES:
1099 rv = EAGAIN;
1100 goto done;
1101 case DDI_DMA_TOOBIG:
1102 rv = EINVAL;
1103 goto done;
1104 case DDI_DMA_NOMAPPING:
1105 case DDI_DMA_INUSE:
1106 default:
1107 rv = EFAULT;
1108 goto done;
1109 }
1110 }
1111
1112 done:
1113 if (rv != 0) {
1114 kmem_cache_free(bd->d_cache, xi);
1115 bioerror(bp, rv);
1116 return (NULL);
1117 }
1118
1119 return (xi);
1120 }
1121
1122 static void
bd_xfer_free(bd_xfer_impl_t * xi)1123 bd_xfer_free(bd_xfer_impl_t *xi)
1124 {
1125 if (xi->i_dmah) {
1126 (void) ddi_dma_unbind_handle(xi->i_dmah);
1127 }
1128 if (xi->i_dfl != NULL) {
1129 dfl_free((dkioc_free_list_t *)xi->i_dfl);
1130 xi->i_dfl = NULL;
1131 }
1132 kmem_cache_free(xi->i_bd->d_cache, xi);
1133 }
1134
1135 static int
bd_open(dev_t * devp,int flag,int otyp,cred_t * credp)1136 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp)
1137 {
1138 dev_t dev = *devp;
1139 bd_t *bd;
1140 minor_t part;
1141 minor_t inst;
1142 uint64_t mask;
1143 boolean_t ndelay;
1144 int rv;
1145 diskaddr_t nblks;
1146 diskaddr_t lba;
1147
1148 _NOTE(ARGUNUSED(credp));
1149
1150 part = BDPART(dev);
1151 inst = BDINST(dev);
1152
1153 if (otyp >= OTYPCNT)
1154 return (EINVAL);
1155
1156 ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE;
1157
1158 /*
1159 * Block any DR events from changing the set of registered
1160 * devices while we function.
1161 */
1162 rw_enter(&bd_lock, RW_READER);
1163 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1164 rw_exit(&bd_lock);
1165 return (ENXIO);
1166 }
1167
1168 mutex_enter(&bd->d_ocmutex);
1169
1170 ASSERT(part < 64);
1171 mask = (1U << part);
1172
1173 bd_update_state(bd);
1174
1175 if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) {
1176
1177 /* non-blocking opens are allowed to succeed */
1178 if (!ndelay) {
1179 rv = ENXIO;
1180 goto done;
1181 }
1182 } else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba,
1183 NULL, NULL, 0) == 0) {
1184
1185 /*
1186 * We read the partinfo, verify valid ranges. If the
1187 * partition is invalid, and we aren't blocking or
1188 * doing a raw access, then fail. (Non-blocking and
1189 * raw accesses can still succeed to allow a disk with
1190 * bad partition data to opened by format and fdisk.)
1191 */
1192 if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) {
1193 rv = ENXIO;
1194 goto done;
1195 }
1196 } else if (!ndelay) {
1197 /*
1198 * cmlb_partinfo failed -- invalid partition or no
1199 * disk label.
1200 */
1201 rv = ENXIO;
1202 goto done;
1203 }
1204
1205 if ((flag & FWRITE) && bd->d_rdonly) {
1206 rv = EROFS;
1207 goto done;
1208 }
1209
1210 if ((bd->d_open_excl) & (mask)) {
1211 rv = EBUSY;
1212 goto done;
1213 }
1214 if (flag & FEXCL) {
1215 if (bd->d_open_lyr[part]) {
1216 rv = EBUSY;
1217 goto done;
1218 }
1219 for (int i = 0; i < OTYP_LYR; i++) {
1220 if (bd->d_open_reg[i] & mask) {
1221 rv = EBUSY;
1222 goto done;
1223 }
1224 }
1225 }
1226
1227 if (otyp == OTYP_LYR) {
1228 bd->d_open_lyr[part]++;
1229 } else {
1230 bd->d_open_reg[otyp] |= mask;
1231 }
1232 if (flag & FEXCL) {
1233 bd->d_open_excl |= mask;
1234 }
1235
1236 rv = 0;
1237 done:
1238 mutex_exit(&bd->d_ocmutex);
1239 rw_exit(&bd_lock);
1240
1241 return (rv);
1242 }
1243
1244 static int
bd_close(dev_t dev,int flag,int otyp,cred_t * credp)1245 bd_close(dev_t dev, int flag, int otyp, cred_t *credp)
1246 {
1247 bd_t *bd;
1248 minor_t inst;
1249 minor_t part;
1250 uint64_t mask;
1251 boolean_t last = B_TRUE;
1252
1253 _NOTE(ARGUNUSED(flag));
1254 _NOTE(ARGUNUSED(credp));
1255
1256 part = BDPART(dev);
1257 inst = BDINST(dev);
1258
1259 ASSERT(part < 64);
1260 mask = (1U << part);
1261
1262 rw_enter(&bd_lock, RW_READER);
1263
1264 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1265 rw_exit(&bd_lock);
1266 return (ENXIO);
1267 }
1268
1269 mutex_enter(&bd->d_ocmutex);
1270 if (bd->d_open_excl & mask) {
1271 bd->d_open_excl &= ~mask;
1272 }
1273 if (otyp == OTYP_LYR) {
1274 bd->d_open_lyr[part]--;
1275 } else {
1276 bd->d_open_reg[otyp] &= ~mask;
1277 }
1278 for (int i = 0; i < 64; i++) {
1279 if (bd->d_open_lyr[part]) {
1280 last = B_FALSE;
1281 }
1282 }
1283 for (int i = 0; last && (i < OTYP_LYR); i++) {
1284 if (bd->d_open_reg[i]) {
1285 last = B_FALSE;
1286 }
1287 }
1288 mutex_exit(&bd->d_ocmutex);
1289
1290 if (last) {
1291 cmlb_invalidate(bd->d_cmlbh, 0);
1292 }
1293 rw_exit(&bd_lock);
1294
1295 return (0);
1296 }
1297
1298 static int
bd_dump(dev_t dev,caddr_t caddr,daddr_t blkno,int nblk)1299 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk)
1300 {
1301 minor_t inst;
1302 minor_t part;
1303 diskaddr_t pstart;
1304 diskaddr_t psize;
1305 bd_t *bd;
1306 bd_xfer_impl_t *xi;
1307 buf_t *bp;
1308 int rv;
1309 uint32_t shift;
1310 daddr_t d_blkno;
1311 int d_nblk;
1312
1313 rw_enter(&bd_lock, RW_READER);
1314
1315 part = BDPART(dev);
1316 inst = BDINST(dev);
1317
1318 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1319 rw_exit(&bd_lock);
1320 return (ENXIO);
1321 }
1322 shift = bd->d_blkshift;
1323 d_blkno = blkno >> (shift - DEV_BSHIFT);
1324 d_nblk = nblk >> (shift - DEV_BSHIFT);
1325 /*
1326 * do cmlb, but do it synchronously unless we already have the
1327 * partition (which we probably should.)
1328 */
1329 if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL,
1330 (void *)1)) {
1331 rw_exit(&bd_lock);
1332 return (ENXIO);
1333 }
1334
1335 if ((d_blkno + d_nblk) > psize) {
1336 rw_exit(&bd_lock);
1337 return (EINVAL);
1338 }
1339 bp = getrbuf(KM_NOSLEEP);
1340 if (bp == NULL) {
1341 rw_exit(&bd_lock);
1342 return (ENOMEM);
1343 }
1344
1345 bp->b_bcount = nblk << DEV_BSHIFT;
1346 bp->b_resid = bp->b_bcount;
1347 bp->b_lblkno = blkno;
1348 bp->b_un.b_addr = caddr;
1349
1350 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_write, KM_NOSLEEP);
1351 if (xi == NULL) {
1352 rw_exit(&bd_lock);
1353 freerbuf(bp);
1354 return (ENOMEM);
1355 }
1356 xi->i_blkno = d_blkno + pstart;
1357 xi->i_flags = BD_XFER_POLL;
1358 bd_submit(bd, xi);
1359 rw_exit(&bd_lock);
1360
1361 /*
1362 * Generally, we should have run this entirely synchronously
1363 * at this point and the biowait call should be a no-op. If
1364 * it didn't happen this way, it's a bug in the underlying
1365 * driver not honoring BD_XFER_POLL.
1366 */
1367 (void) biowait(bp);
1368 rv = geterror(bp);
1369 freerbuf(bp);
1370 return (rv);
1371 }
1372
1373 void
bd_minphys(struct buf * bp)1374 bd_minphys(struct buf *bp)
1375 {
1376 minor_t inst;
1377 bd_t *bd;
1378 inst = BDINST(bp->b_edev);
1379
1380 bd = ddi_get_soft_state(bd_state, inst);
1381
1382 /*
1383 * In a non-debug kernel, bd_strategy will catch !bd as
1384 * well, and will fail nicely.
1385 */
1386 ASSERT(bd);
1387
1388 if (bp->b_bcount > bd->d_maxxfer)
1389 bp->b_bcount = bd->d_maxxfer;
1390 }
1391
1392 static int
bd_check_uio(dev_t dev,struct uio * uio)1393 bd_check_uio(dev_t dev, struct uio *uio)
1394 {
1395 bd_t *bd;
1396 uint32_t shift;
1397
1398 if ((bd = ddi_get_soft_state(bd_state, BDINST(dev))) == NULL) {
1399 return (ENXIO);
1400 }
1401
1402 shift = bd->d_blkshift;
1403 if ((P2PHASE(uio->uio_loffset, (1U << shift)) != 0) ||
1404 (P2PHASE(uio->uio_iov->iov_len, (1U << shift)) != 0)) {
1405 return (EINVAL);
1406 }
1407
1408 return (0);
1409 }
1410
1411 static int
bd_read(dev_t dev,struct uio * uio,cred_t * credp)1412 bd_read(dev_t dev, struct uio *uio, cred_t *credp)
1413 {
1414 _NOTE(ARGUNUSED(credp));
1415 int ret = bd_check_uio(dev, uio);
1416 if (ret != 0) {
1417 return (ret);
1418 }
1419 return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio));
1420 }
1421
1422 static int
bd_write(dev_t dev,struct uio * uio,cred_t * credp)1423 bd_write(dev_t dev, struct uio *uio, cred_t *credp)
1424 {
1425 _NOTE(ARGUNUSED(credp));
1426 int ret = bd_check_uio(dev, uio);
1427 if (ret != 0) {
1428 return (ret);
1429 }
1430 return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio));
1431 }
1432
1433 static int
bd_aread(dev_t dev,struct aio_req * aio,cred_t * credp)1434 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp)
1435 {
1436 _NOTE(ARGUNUSED(credp));
1437 int ret = bd_check_uio(dev, aio->aio_uio);
1438 if (ret != 0) {
1439 return (ret);
1440 }
1441 return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio));
1442 }
1443
1444 static int
bd_awrite(dev_t dev,struct aio_req * aio,cred_t * credp)1445 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp)
1446 {
1447 _NOTE(ARGUNUSED(credp));
1448 int ret = bd_check_uio(dev, aio->aio_uio);
1449 if (ret != 0) {
1450 return (ret);
1451 }
1452 return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio));
1453 }
1454
1455 static int
bd_strategy(struct buf * bp)1456 bd_strategy(struct buf *bp)
1457 {
1458 minor_t inst;
1459 minor_t part;
1460 bd_t *bd;
1461 diskaddr_t p_lba;
1462 diskaddr_t p_nblks;
1463 diskaddr_t b_nblks;
1464 bd_xfer_impl_t *xi;
1465 uint32_t shift;
1466 int (*func)(void *, bd_xfer_t *);
1467 diskaddr_t lblkno;
1468
1469 part = BDPART(bp->b_edev);
1470 inst = BDINST(bp->b_edev);
1471
1472 ASSERT(bp);
1473
1474 bp->b_resid = bp->b_bcount;
1475
1476 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1477 bioerror(bp, ENXIO);
1478 biodone(bp);
1479 return (0);
1480 }
1481
1482 if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba,
1483 NULL, NULL, 0)) {
1484 bioerror(bp, ENXIO);
1485 biodone(bp);
1486 return (0);
1487 }
1488
1489 shift = bd->d_blkshift;
1490 lblkno = bp->b_lblkno >> (shift - DEV_BSHIFT);
1491 if ((P2PHASE(bp->b_lblkno, (1U << (shift - DEV_BSHIFT))) != 0) ||
1492 (P2PHASE(bp->b_bcount, (1U << shift)) != 0) ||
1493 (lblkno > p_nblks)) {
1494 bioerror(bp, EINVAL);
1495 biodone(bp);
1496 return (0);
1497 }
1498 b_nblks = bp->b_bcount >> shift;
1499 if ((lblkno == p_nblks) || (bp->b_bcount == 0)) {
1500 biodone(bp);
1501 return (0);
1502 }
1503
1504 if ((b_nblks + lblkno) > p_nblks) {
1505 bp->b_resid = ((lblkno + b_nblks - p_nblks) << shift);
1506 bp->b_bcount -= bp->b_resid;
1507 } else {
1508 bp->b_resid = 0;
1509 }
1510 func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write;
1511
1512 xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP);
1513 if (xi == NULL) {
1514 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE);
1515 }
1516 if (xi == NULL) {
1517 /* bd_request_alloc will have done bioerror */
1518 biodone(bp);
1519 return (0);
1520 }
1521 xi->i_blkno = lblkno + p_lba;
1522
1523 bd_submit(bd, xi);
1524
1525 return (0);
1526 }
1527
1528 static int
bd_ioctl(dev_t dev,int cmd,intptr_t arg,int flag,cred_t * credp,int * rvalp)1529 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp)
1530 {
1531 minor_t inst;
1532 uint16_t part;
1533 bd_t *bd;
1534 void *ptr = (void *)arg;
1535 int rv;
1536
1537 part = BDPART(dev);
1538 inst = BDINST(dev);
1539
1540 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1541 return (ENXIO);
1542 }
1543
1544 rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0);
1545 if (rv != ENOTTY)
1546 return (rv);
1547
1548 if (rvalp != NULL) {
1549 /* the return value of the ioctl is 0 by default */
1550 *rvalp = 0;
1551 }
1552
1553 switch (cmd) {
1554 case DKIOCGMEDIAINFO: {
1555 struct dk_minfo minfo;
1556
1557 /* make sure our state information is current */
1558 bd_update_state(bd);
1559 bzero(&minfo, sizeof (minfo));
1560 minfo.dki_media_type = DK_FIXED_DISK;
1561 minfo.dki_lbsize = (1U << bd->d_blkshift);
1562 minfo.dki_capacity = bd->d_numblks;
1563 if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) {
1564 return (EFAULT);
1565 }
1566 return (0);
1567 }
1568 case DKIOCGMEDIAINFOEXT: {
1569 struct dk_minfo_ext miext;
1570 size_t len;
1571
1572 /* make sure our state information is current */
1573 bd_update_state(bd);
1574 bzero(&miext, sizeof (miext));
1575 miext.dki_media_type = DK_FIXED_DISK;
1576 miext.dki_lbsize = (1U << bd->d_blkshift);
1577 miext.dki_pbsize = (1U << bd->d_pblkshift);
1578 miext.dki_capacity = bd->d_numblks;
1579
1580 switch (ddi_model_convert_from(flag & FMODELS)) {
1581 case DDI_MODEL_ILP32:
1582 len = sizeof (struct dk_minfo_ext32);
1583 break;
1584 default:
1585 len = sizeof (struct dk_minfo_ext);
1586 break;
1587 }
1588
1589 if (ddi_copyout(&miext, ptr, len, flag)) {
1590 return (EFAULT);
1591 }
1592 return (0);
1593 }
1594 case DKIOCINFO: {
1595 struct dk_cinfo cinfo;
1596 bzero(&cinfo, sizeof (cinfo));
1597 cinfo.dki_ctype = DKC_BLKDEV;
1598 cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip));
1599 (void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname),
1600 "%s", ddi_driver_name(ddi_get_parent(bd->d_dip)));
1601 (void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname),
1602 "%s", ddi_driver_name(bd->d_dip));
1603 cinfo.dki_unit = inst;
1604 cinfo.dki_flags = DKI_FMTVOL;
1605 cinfo.dki_partition = part;
1606 cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE;
1607 cinfo.dki_addr = 0;
1608 cinfo.dki_slave = 0;
1609 cinfo.dki_space = 0;
1610 cinfo.dki_prio = 0;
1611 cinfo.dki_vec = 0;
1612 if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) {
1613 return (EFAULT);
1614 }
1615 return (0);
1616 }
1617 case DKIOCREMOVABLE: {
1618 int i;
1619 i = bd->d_removable ? 1 : 0;
1620 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1621 return (EFAULT);
1622 }
1623 return (0);
1624 }
1625 case DKIOCHOTPLUGGABLE: {
1626 int i;
1627 i = bd->d_hotpluggable ? 1 : 0;
1628 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1629 return (EFAULT);
1630 }
1631 return (0);
1632 }
1633 case DKIOCREADONLY: {
1634 int i;
1635 i = bd->d_rdonly ? 1 : 0;
1636 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1637 return (EFAULT);
1638 }
1639 return (0);
1640 }
1641 case DKIOCSOLIDSTATE: {
1642 int i;
1643 i = bd->d_ssd ? 1 : 0;
1644 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1645 return (EFAULT);
1646 }
1647 return (0);
1648 }
1649 case DKIOCSTATE: {
1650 enum dkio_state state;
1651 if (ddi_copyin(ptr, &state, sizeof (state), flag)) {
1652 return (EFAULT);
1653 }
1654 if ((rv = bd_check_state(bd, &state)) != 0) {
1655 return (rv);
1656 }
1657 if (ddi_copyout(&state, ptr, sizeof (state), flag)) {
1658 return (EFAULT);
1659 }
1660 return (0);
1661 }
1662 case DKIOCFLUSHWRITECACHE: {
1663 struct dk_callback *dkc = NULL;
1664
1665 if (flag & FKIOCTL)
1666 dkc = (void *)arg;
1667
1668 rv = bd_flush_write_cache(bd, dkc);
1669 return (rv);
1670 }
1671 case DKIOCFREE: {
1672 dkioc_free_list_t *dfl = NULL;
1673
1674 /*
1675 * Check free space support early to avoid copyin/allocation
1676 * when unnecessary.
1677 */
1678 if (!CAN_FREESPACE(bd))
1679 return (ENOTSUP);
1680
1681 rv = dfl_copyin(ptr, &dfl, flag, KM_SLEEP);
1682 if (rv != 0)
1683 return (rv);
1684
1685 /*
1686 * bd_free_space() consumes 'dfl'. bd_free_space() will
1687 * call dfl_iter() which will normally try to pass dfl through
1688 * to bd_free_space_cb() which attaches dfl to the bd_xfer_t
1689 * that is then queued for the underlying driver. Once the
1690 * driver processes the request, the bd_xfer_t instance is
1691 * disposed of, including any attached dkioc_free_list_t.
1692 *
1693 * If dfl cannot be processed by the underlying driver due to
1694 * size or alignment requirements of the driver, dfl_iter()
1695 * will replace dfl with one or more new dkioc_free_list_t
1696 * instances with the correct alignment and sizes for the driver
1697 * (and free the original dkioc_free_list_t).
1698 */
1699 rv = bd_free_space(dev, bd, dfl);
1700 return (rv);
1701 }
1702
1703 case DKIOC_CANFREE: {
1704 boolean_t supported = CAN_FREESPACE(bd);
1705
1706 if (ddi_copyout(&supported, (void *)arg, sizeof (supported),
1707 flag) != 0) {
1708 return (EFAULT);
1709 }
1710
1711 return (0);
1712 }
1713
1714 default:
1715 break;
1716
1717 }
1718 return (ENOTTY);
1719 }
1720
1721 static int
bd_prop_op(dev_t dev,dev_info_t * dip,ddi_prop_op_t prop_op,int mod_flags,char * name,caddr_t valuep,int * lengthp)1722 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
1723 char *name, caddr_t valuep, int *lengthp)
1724 {
1725 bd_t *bd;
1726
1727 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1728 if (bd == NULL)
1729 return (ddi_prop_op(dev, dip, prop_op, mod_flags,
1730 name, valuep, lengthp));
1731
1732 return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name,
1733 valuep, lengthp, BDPART(dev), 0));
1734 }
1735
1736
1737 static int
bd_tg_rdwr(dev_info_t * dip,uchar_t cmd,void * bufaddr,diskaddr_t start,size_t length,void * tg_cookie)1738 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start,
1739 size_t length, void *tg_cookie)
1740 {
1741 bd_t *bd;
1742 buf_t *bp;
1743 bd_xfer_impl_t *xi;
1744 int rv;
1745 int (*func)(void *, bd_xfer_t *);
1746 int kmflag;
1747
1748 /*
1749 * If we are running in polled mode (such as during dump(9e)
1750 * execution), then we cannot sleep for kernel allocations.
1751 */
1752 kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP;
1753
1754 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1755
1756 if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) {
1757 /* We can only transfer whole blocks at a time! */
1758 return (EINVAL);
1759 }
1760
1761 if ((bp = getrbuf(kmflag)) == NULL) {
1762 return (ENOMEM);
1763 }
1764
1765 switch (cmd) {
1766 case TG_READ:
1767 bp->b_flags = B_READ;
1768 func = bd->d_ops.o_read;
1769 break;
1770 case TG_WRITE:
1771 bp->b_flags = B_WRITE;
1772 func = bd->d_ops.o_write;
1773 break;
1774 default:
1775 freerbuf(bp);
1776 return (EINVAL);
1777 }
1778
1779 bp->b_un.b_addr = bufaddr;
1780 bp->b_bcount = length;
1781 xi = bd_xfer_alloc(bd, bp, func, kmflag);
1782 if (xi == NULL) {
1783 rv = geterror(bp);
1784 freerbuf(bp);
1785 return (rv);
1786 }
1787 xi->i_flags = tg_cookie ? BD_XFER_POLL : 0;
1788 xi->i_blkno = start;
1789 bd_submit(bd, xi);
1790 (void) biowait(bp);
1791 rv = geterror(bp);
1792 freerbuf(bp);
1793
1794 return (rv);
1795 }
1796
1797 static int
bd_tg_getinfo(dev_info_t * dip,int cmd,void * arg,void * tg_cookie)1798 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
1799 {
1800 bd_t *bd;
1801
1802 _NOTE(ARGUNUSED(tg_cookie));
1803 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1804
1805 switch (cmd) {
1806 case TG_GETPHYGEOM:
1807 case TG_GETVIRTGEOM:
1808 /*
1809 * We don't have any "geometry" as such, let cmlb
1810 * fabricate something.
1811 */
1812 return (ENOTTY);
1813
1814 case TG_GETCAPACITY:
1815 bd_update_state(bd);
1816 *(diskaddr_t *)arg = bd->d_numblks;
1817 return (0);
1818
1819 case TG_GETBLOCKSIZE:
1820 *(uint32_t *)arg = (1U << bd->d_blkshift);
1821 return (0);
1822
1823 case TG_GETATTR:
1824 /*
1825 * It turns out that cmlb really doesn't do much for
1826 * non-writable media, but lets make the information
1827 * available for it in case it does more in the
1828 * future. (The value is currently used for
1829 * triggering special behavior for CD-ROMs.)
1830 */
1831 bd_update_state(bd);
1832 ((tg_attribute_t *)arg)->media_is_writable =
1833 bd->d_rdonly ? B_FALSE : B_TRUE;
1834 ((tg_attribute_t *)arg)->media_is_solid_state = bd->d_ssd;
1835 ((tg_attribute_t *)arg)->media_is_rotational = B_FALSE;
1836 return (0);
1837
1838 default:
1839 return (EINVAL);
1840 }
1841 }
1842
1843
1844 static void
bd_sched(bd_t * bd,bd_queue_t * bq)1845 bd_sched(bd_t *bd, bd_queue_t *bq)
1846 {
1847 bd_xfer_impl_t *xi;
1848 struct buf *bp;
1849 int rv;
1850
1851 mutex_enter(&bq->q_iomutex);
1852
1853 while ((bq->q_qactive < bq->q_qsize) &&
1854 ((xi = list_remove_head(&bq->q_waitq)) != NULL)) {
1855 mutex_enter(&bd->d_ksmutex);
1856 kstat_waitq_to_runq(bd->d_kiop);
1857 mutex_exit(&bd->d_ksmutex);
1858
1859 bq->q_qactive++;
1860 list_insert_tail(&bq->q_runq, xi);
1861
1862 /*
1863 * Submit the job to the driver. We drop the I/O mutex
1864 * so that we can deal with the case where the driver
1865 * completion routine calls back into us synchronously.
1866 */
1867
1868 mutex_exit(&bq->q_iomutex);
1869
1870 rv = xi->i_func(bd->d_private, &xi->i_public);
1871 if (rv != 0) {
1872 bp = xi->i_bp;
1873 bioerror(bp, rv);
1874 biodone(bp);
1875
1876 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1877
1878 mutex_enter(&bq->q_iomutex);
1879
1880 mutex_enter(&bd->d_ksmutex);
1881 kstat_runq_exit(bd->d_kiop);
1882 mutex_exit(&bd->d_ksmutex);
1883
1884 bq->q_qactive--;
1885 list_remove(&bq->q_runq, xi);
1886 bd_xfer_free(xi);
1887 } else {
1888 mutex_enter(&bq->q_iomutex);
1889 }
1890 }
1891
1892 mutex_exit(&bq->q_iomutex);
1893 }
1894
1895 static void
bd_submit(bd_t * bd,bd_xfer_impl_t * xi)1896 bd_submit(bd_t *bd, bd_xfer_impl_t *xi)
1897 {
1898 uint64_t nv = atomic_inc_64_nv(&bd->d_io_counter);
1899 unsigned q = nv % bd->d_qcount;
1900 bd_queue_t *bq = &bd->d_queues[q];
1901
1902 xi->i_bq = bq;
1903 xi->i_qnum = q;
1904
1905 mutex_enter(&bq->q_iomutex);
1906
1907 list_insert_tail(&bq->q_waitq, xi);
1908
1909 mutex_enter(&bd->d_ksmutex);
1910 kstat_waitq_enter(bd->d_kiop);
1911 mutex_exit(&bd->d_ksmutex);
1912
1913 mutex_exit(&bq->q_iomutex);
1914
1915 bd_sched(bd, bq);
1916 }
1917
1918 static void
bd_runq_exit(bd_xfer_impl_t * xi,int err)1919 bd_runq_exit(bd_xfer_impl_t *xi, int err)
1920 {
1921 bd_t *bd = xi->i_bd;
1922 buf_t *bp = xi->i_bp;
1923 bd_queue_t *bq = xi->i_bq;
1924
1925 mutex_enter(&bq->q_iomutex);
1926 bq->q_qactive--;
1927
1928 mutex_enter(&bd->d_ksmutex);
1929 kstat_runq_exit(bd->d_kiop);
1930 mutex_exit(&bd->d_ksmutex);
1931
1932 list_remove(&bq->q_runq, xi);
1933 mutex_exit(&bq->q_iomutex);
1934
1935 if (err == 0) {
1936 if (bp->b_flags & B_READ) {
1937 atomic_inc_uint(&bd->d_kiop->reads);
1938 atomic_add_64((uint64_t *)&bd->d_kiop->nread,
1939 bp->b_bcount - xi->i_resid);
1940 } else {
1941 atomic_inc_uint(&bd->d_kiop->writes);
1942 atomic_add_64((uint64_t *)&bd->d_kiop->nwritten,
1943 bp->b_bcount - xi->i_resid);
1944 }
1945 }
1946 bd_sched(bd, bq);
1947 }
1948
1949 static void
bd_dle_sysevent_task(void * arg)1950 bd_dle_sysevent_task(void *arg)
1951 {
1952 nvlist_t *attr = NULL;
1953 char *path = NULL;
1954 bd_t *bd = arg;
1955 dev_info_t *dip = bd->d_dip;
1956 size_t n;
1957
1958 mutex_enter(&bd->d_dle_mutex);
1959 bd->d_dle_state &= ~BD_DLE_PENDING;
1960 bd->d_dle_state |= BD_DLE_RUNNING;
1961 mutex_exit(&bd->d_dle_mutex);
1962
1963 dev_err(dip, CE_NOTE, "!dynamic LUN expansion");
1964
1965 if (nvlist_alloc(&attr, NV_UNIQUE_NAME_TYPE, KM_SLEEP) != 0) {
1966 mutex_enter(&bd->d_dle_mutex);
1967 bd->d_dle_state &= ~(BD_DLE_RUNNING|BD_DLE_PENDING);
1968 mutex_exit(&bd->d_dle_mutex);
1969 return;
1970 }
1971
1972 path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1973
1974 n = snprintf(path, MAXPATHLEN, "/devices");
1975 (void) ddi_pathname(dip, path + n);
1976 n = strlen(path);
1977 n += snprintf(path + n, MAXPATHLEN - n, ":x");
1978
1979 for (;;) {
1980 /*
1981 * On receipt of this event, the ZFS sysevent module will scan
1982 * active zpools for child vdevs matching this physical path.
1983 * In order to catch both whole disk pools and those with an
1984 * EFI boot partition, generate separate sysevents for minor
1985 * node 'a' and 'b'.
1986 */
1987 for (char c = 'a'; c < 'c'; c++) {
1988 path[n - 1] = c;
1989
1990 if (nvlist_add_string(attr, DEV_PHYS_PATH, path) != 0)
1991 break;
1992
1993 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW,
1994 EC_DEV_STATUS, ESC_DEV_DLE, attr, NULL, DDI_SLEEP);
1995 }
1996
1997 mutex_enter(&bd->d_dle_mutex);
1998 if ((bd->d_dle_state & BD_DLE_PENDING) == 0) {
1999 bd->d_dle_state &= ~BD_DLE_RUNNING;
2000 mutex_exit(&bd->d_dle_mutex);
2001 break;
2002 }
2003 bd->d_dle_state &= ~BD_DLE_PENDING;
2004 mutex_exit(&bd->d_dle_mutex);
2005 }
2006
2007 nvlist_free(attr);
2008 kmem_free(path, MAXPATHLEN);
2009 }
2010
2011 static void
bd_update_state(bd_t * bd)2012 bd_update_state(bd_t *bd)
2013 {
2014 enum dkio_state state = DKIO_INSERTED;
2015 boolean_t docmlb = B_FALSE;
2016 bd_media_t media;
2017
2018 bzero(&media, sizeof (media));
2019
2020 mutex_enter(&bd->d_statemutex);
2021 if (bd->d_ops.o_media_info(bd->d_private, &media) != 0) {
2022 bd->d_numblks = 0;
2023 state = DKIO_EJECTED;
2024 goto done;
2025 }
2026
2027 if ((media.m_blksize < 512) ||
2028 (!ISP2(media.m_blksize)) ||
2029 (P2PHASE(bd->d_maxxfer, media.m_blksize))) {
2030 dev_err(bd->d_dip, CE_WARN, "Invalid media block size (%d)",
2031 media.m_blksize);
2032 /*
2033 * We can't use the media, treat it as not present.
2034 */
2035 state = DKIO_EJECTED;
2036 bd->d_numblks = 0;
2037 goto done;
2038 }
2039
2040 if (((1U << bd->d_blkshift) != media.m_blksize) ||
2041 (bd->d_numblks != media.m_nblks)) {
2042 /* Device size changed */
2043 docmlb = B_TRUE;
2044 }
2045
2046 bd->d_blkshift = ddi_ffs(media.m_blksize) - 1;
2047 bd->d_pblkshift = bd->d_blkshift;
2048 bd->d_numblks = media.m_nblks;
2049 bd->d_rdonly = media.m_readonly;
2050 bd->d_ssd = media.m_solidstate;
2051
2052 /*
2053 * Only use the supplied physical block size if it is non-zero,
2054 * greater or equal to the block size, and a power of 2. Ignore it
2055 * if not, it's just informational and we can still use the media.
2056 */
2057 if ((media.m_pblksize != 0) &&
2058 (media.m_pblksize >= media.m_blksize) &&
2059 (ISP2(media.m_pblksize)))
2060 bd->d_pblkshift = ddi_ffs(media.m_pblksize) - 1;
2061
2062 done:
2063 if (state != bd->d_state) {
2064 bd->d_state = state;
2065 cv_broadcast(&bd->d_statecv);
2066 docmlb = B_TRUE;
2067 }
2068 mutex_exit(&bd->d_statemutex);
2069
2070 bd->d_kerr->bd_capacity.value.ui64 = bd->d_numblks << bd->d_blkshift;
2071
2072 if (docmlb) {
2073 if (state == DKIO_INSERTED) {
2074 (void) cmlb_validate(bd->d_cmlbh, 0, 0);
2075
2076 mutex_enter(&bd->d_dle_mutex);
2077 /*
2078 * If there is already an event pending, there's
2079 * nothing to do; we coalesce multiple events.
2080 */
2081 if ((bd->d_dle_state & BD_DLE_PENDING) == 0) {
2082 if ((bd->d_dle_state & BD_DLE_RUNNING) == 0) {
2083 taskq_dispatch_ent(bd_taskq,
2084 bd_dle_sysevent_task, bd, 0,
2085 &bd->d_dle_ent);
2086 }
2087 bd->d_dle_state |= BD_DLE_PENDING;
2088 }
2089 mutex_exit(&bd->d_dle_mutex);
2090 } else {
2091 cmlb_invalidate(bd->d_cmlbh, 0);
2092 }
2093 }
2094 }
2095
2096 static int
bd_check_state(bd_t * bd,enum dkio_state * state)2097 bd_check_state(bd_t *bd, enum dkio_state *state)
2098 {
2099 clock_t when;
2100
2101 for (;;) {
2102
2103 bd_update_state(bd);
2104
2105 mutex_enter(&bd->d_statemutex);
2106
2107 if (bd->d_state != *state) {
2108 *state = bd->d_state;
2109 mutex_exit(&bd->d_statemutex);
2110 break;
2111 }
2112
2113 when = drv_usectohz(1000000);
2114 if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex,
2115 when, TR_CLOCK_TICK) == 0) {
2116 mutex_exit(&bd->d_statemutex);
2117 return (EINTR);
2118 }
2119
2120 mutex_exit(&bd->d_statemutex);
2121 }
2122
2123 return (0);
2124 }
2125
2126 static int
bd_flush_write_cache_done(struct buf * bp)2127 bd_flush_write_cache_done(struct buf *bp)
2128 {
2129 struct dk_callback *dc = (void *)bp->b_private;
2130
2131 (*dc->dkc_callback)(dc->dkc_cookie, geterror(bp));
2132 kmem_free(dc, sizeof (*dc));
2133 freerbuf(bp);
2134 return (0);
2135 }
2136
2137 static int
bd_flush_write_cache(bd_t * bd,struct dk_callback * dkc)2138 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc)
2139 {
2140 buf_t *bp;
2141 struct dk_callback *dc;
2142 bd_xfer_impl_t *xi;
2143 int rv;
2144
2145 if (bd->d_ops.o_sync_cache == NULL) {
2146 return (ENOTSUP);
2147 }
2148 if ((bp = getrbuf(KM_SLEEP)) == NULL) {
2149 return (ENOMEM);
2150 }
2151 bp->b_resid = 0;
2152 bp->b_bcount = 0;
2153
2154 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP);
2155 if (xi == NULL) {
2156 rv = geterror(bp);
2157 freerbuf(bp);
2158 return (rv);
2159 }
2160
2161 /* Make an asynchronous flush, but only if there is a callback */
2162 if (dkc != NULL && dkc->dkc_callback != NULL) {
2163 /* Make a private copy of the callback structure */
2164 dc = kmem_alloc(sizeof (*dc), KM_SLEEP);
2165 *dc = *dkc;
2166 bp->b_private = dc;
2167 bp->b_iodone = bd_flush_write_cache_done;
2168
2169 bd_submit(bd, xi);
2170 return (0);
2171 }
2172
2173 /* In case there is no callback, perform a synchronous flush */
2174 bd_submit(bd, xi);
2175 (void) biowait(bp);
2176 rv = geterror(bp);
2177 freerbuf(bp);
2178
2179 return (rv);
2180 }
2181
2182 static int
bd_free_space_done(struct buf * bp)2183 bd_free_space_done(struct buf *bp)
2184 {
2185 freerbuf(bp);
2186 return (0);
2187 }
2188
2189 static int
bd_free_space_cb(dkioc_free_list_t * dfl,void * arg,int kmflag)2190 bd_free_space_cb(dkioc_free_list_t *dfl, void *arg, int kmflag)
2191 {
2192 bd_t *bd = arg;
2193 buf_t *bp = NULL;
2194 bd_xfer_impl_t *xi = NULL;
2195 boolean_t sync = DFL_ISSYNC(dfl) ? B_TRUE : B_FALSE;
2196 int rv = 0;
2197
2198 bp = getrbuf(KM_SLEEP);
2199 bp->b_resid = 0;
2200 bp->b_bcount = 0;
2201 bp->b_lblkno = 0;
2202
2203 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_free_space, kmflag);
2204 xi->i_dfl = dfl;
2205
2206 if (!sync) {
2207 bp->b_iodone = bd_free_space_done;
2208 bd_submit(bd, xi);
2209 return (0);
2210 }
2211
2212 xi->i_flags |= BD_XFER_POLL;
2213 bd_submit(bd, xi);
2214
2215 (void) biowait(bp);
2216 rv = geterror(bp);
2217 freerbuf(bp);
2218
2219 return (rv);
2220 }
2221
2222 static int
bd_free_space(dev_t dev,bd_t * bd,dkioc_free_list_t * dfl)2223 bd_free_space(dev_t dev, bd_t *bd, dkioc_free_list_t *dfl)
2224 {
2225 diskaddr_t p_len, p_offset;
2226 uint64_t offset_bytes, len_bytes;
2227 minor_t part = BDPART(dev);
2228 const uint_t bshift = bd->d_blkshift;
2229 dkioc_free_info_t dfi = {
2230 .dfi_bshift = bshift,
2231 .dfi_align = bd->d_free_align << bshift,
2232 .dfi_max_bytes = bd->d_max_free_blks << bshift,
2233 .dfi_max_ext = bd->d_max_free_seg,
2234 .dfi_max_ext_bytes = bd->d_max_free_seg_blks << bshift,
2235 };
2236
2237 if (cmlb_partinfo(bd->d_cmlbh, part, &p_len, &p_offset, NULL,
2238 NULL, 0) != 0) {
2239 dfl_free(dfl);
2240 return (ENXIO);
2241 }
2242
2243 /*
2244 * bd_ioctl created our own copy of dfl, so we can modify as
2245 * necessary
2246 */
2247 offset_bytes = (uint64_t)p_offset << bshift;
2248 len_bytes = (uint64_t)p_len << bshift;
2249
2250 dfl->dfl_offset += offset_bytes;
2251 if (dfl->dfl_offset < offset_bytes) {
2252 dfl_free(dfl);
2253 return (EOVERFLOW);
2254 }
2255
2256 return (dfl_iter(dfl, &dfi, offset_bytes + len_bytes, bd_free_space_cb,
2257 bd, KM_SLEEP));
2258 }
2259
2260 /*
2261 * Nexus support.
2262 */
2263 int
bd_bus_ctl(dev_info_t * dip,dev_info_t * rdip,ddi_ctl_enum_t ctlop,void * arg,void * result)2264 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
2265 void *arg, void *result)
2266 {
2267 bd_handle_t hdl;
2268
2269 switch (ctlop) {
2270 case DDI_CTLOPS_REPORTDEV:
2271 cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n",
2272 ddi_node_name(rdip), ddi_get_name_addr(rdip),
2273 ddi_driver_name(rdip), ddi_get_instance(rdip));
2274 return (DDI_SUCCESS);
2275
2276 case DDI_CTLOPS_INITCHILD:
2277 hdl = ddi_get_parent_data((dev_info_t *)arg);
2278 if (hdl == NULL) {
2279 return (DDI_NOT_WELL_FORMED);
2280 }
2281 ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr);
2282 return (DDI_SUCCESS);
2283
2284 case DDI_CTLOPS_UNINITCHILD:
2285 ddi_set_name_addr((dev_info_t *)arg, NULL);
2286 ndi_prop_remove_all((dev_info_t *)arg);
2287 return (DDI_SUCCESS);
2288
2289 default:
2290 return (ddi_ctlops(dip, rdip, ctlop, arg, result));
2291 }
2292 }
2293
2294 /*
2295 * Functions for device drivers.
2296 */
2297 bd_handle_t
bd_alloc_handle(void * private,bd_ops_t * ops,ddi_dma_attr_t * dma,int kmflag)2298 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag)
2299 {
2300 bd_handle_t hdl;
2301
2302 switch (ops->o_version) {
2303 case BD_OPS_VERSION_0:
2304 case BD_OPS_VERSION_1:
2305 case BD_OPS_VERSION_2:
2306 break;
2307
2308 default:
2309 /* Unsupported version */
2310 return (NULL);
2311 }
2312
2313 hdl = kmem_zalloc(sizeof (*hdl), kmflag);
2314 if (hdl == NULL) {
2315 return (NULL);
2316 }
2317
2318 switch (ops->o_version) {
2319 case BD_OPS_VERSION_2:
2320 hdl->h_ops.o_free_space = ops->o_free_space;
2321 /*FALLTHRU*/
2322 case BD_OPS_VERSION_1:
2323 case BD_OPS_VERSION_0:
2324 hdl->h_ops.o_drive_info = ops->o_drive_info;
2325 hdl->h_ops.o_media_info = ops->o_media_info;
2326 hdl->h_ops.o_devid_init = ops->o_devid_init;
2327 hdl->h_ops.o_sync_cache = ops->o_sync_cache;
2328 hdl->h_ops.o_read = ops->o_read;
2329 hdl->h_ops.o_write = ops->o_write;
2330 break;
2331 }
2332
2333 hdl->h_dma = dma;
2334 hdl->h_private = private;
2335
2336 return (hdl);
2337 }
2338
2339 void
bd_free_handle(bd_handle_t hdl)2340 bd_free_handle(bd_handle_t hdl)
2341 {
2342 kmem_free(hdl, sizeof (*hdl));
2343 }
2344
2345 int
bd_attach_handle(dev_info_t * dip,bd_handle_t hdl)2346 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl)
2347 {
2348 bd_drive_t drive = { 0 };
2349 dev_info_t *child;
2350 size_t len;
2351
2352 /*
2353 * It's not an error if bd_attach_handle() is called on a handle that
2354 * already is attached. We just ignore the request to attach and return.
2355 * This way drivers using blkdev don't have to keep track about blkdev
2356 * state, they can just call this function to make sure it attached.
2357 */
2358 if (hdl->h_child != NULL) {
2359 return (DDI_SUCCESS);
2360 }
2361
2362 /* if drivers don't override this, make it assume none */
2363 drive.d_lun = -1;
2364 hdl->h_ops.o_drive_info(hdl->h_private, &drive);
2365
2366 hdl->h_parent = dip;
2367 hdl->h_name = "blkdev";
2368
2369 /*
2370 * Prefer the GUID over the EUI64.
2371 */
2372 if (*(uint64_t *)drive.d_guid != 0 ||
2373 *((uint64_t *)drive.d_guid + 1) != 0) {
2374 len = snprintf(hdl->h_addr, sizeof (hdl->h_addr),
2375 "w%02X%02X%02X%02X%02X%02X%02X%02X"
2376 "%02X%02X%02X%02X%02X%02X%02X%02X",
2377 drive.d_guid[0], drive.d_guid[1], drive.d_guid[2],
2378 drive.d_guid[3], drive.d_guid[4], drive.d_guid[5],
2379 drive.d_guid[6], drive.d_guid[7], drive.d_guid[8],
2380 drive.d_guid[9], drive.d_guid[10], drive.d_guid[11],
2381 drive.d_guid[12], drive.d_guid[13], drive.d_guid[14],
2382 drive.d_guid[15]);
2383 } else if (*(uint64_t *)drive.d_eui64 != 0) {
2384 len = snprintf(hdl->h_addr, sizeof (hdl->h_addr),
2385 "w%02X%02X%02X%02X%02X%02X%02X%02X",
2386 drive.d_eui64[0], drive.d_eui64[1],
2387 drive.d_eui64[2], drive.d_eui64[3],
2388 drive.d_eui64[4], drive.d_eui64[5],
2389 drive.d_eui64[6], drive.d_eui64[7]);
2390 } else {
2391 len = snprintf(hdl->h_addr, sizeof (hdl->h_addr),
2392 "%X", drive.d_target);
2393 }
2394
2395 VERIFY(len <= sizeof (hdl->h_addr));
2396
2397 if (drive.d_lun >= 0) {
2398 (void) snprintf(hdl->h_addr + len, sizeof (hdl->h_addr) - len,
2399 ",%X", drive.d_lun);
2400 }
2401
2402 if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID,
2403 &child) != NDI_SUCCESS) {
2404 cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s",
2405 ddi_driver_name(dip), ddi_get_instance(dip),
2406 "blkdev", hdl->h_addr);
2407 return (DDI_FAILURE);
2408 }
2409
2410 ddi_set_parent_data(child, hdl);
2411 hdl->h_child = child;
2412
2413 if (ndi_devi_online(child, 0) != NDI_SUCCESS) {
2414 cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online",
2415 ddi_driver_name(dip), ddi_get_instance(dip),
2416 hdl->h_name, hdl->h_addr);
2417 (void) ndi_devi_free(child);
2418 hdl->h_child = NULL;
2419 return (DDI_FAILURE);
2420 }
2421
2422 return (DDI_SUCCESS);
2423 }
2424
2425 int
bd_detach_handle(bd_handle_t hdl)2426 bd_detach_handle(bd_handle_t hdl)
2427 {
2428 int rv;
2429 char *devnm;
2430
2431 /*
2432 * It's not an error if bd_detach_handle() is called on a handle that
2433 * already is detached. We just ignore the request to detach and return.
2434 * This way drivers using blkdev don't have to keep track about blkdev
2435 * state, they can just call this function to make sure it detached.
2436 */
2437 if (hdl->h_child == NULL) {
2438 return (DDI_SUCCESS);
2439 }
2440 ndi_devi_enter(hdl->h_parent);
2441 if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) {
2442 rv = ddi_remove_child(hdl->h_child, 0);
2443 } else {
2444 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
2445 (void) ddi_deviname(hdl->h_child, devnm);
2446 (void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE);
2447 rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL,
2448 NDI_DEVI_REMOVE | NDI_UNCONFIG);
2449 kmem_free(devnm, MAXNAMELEN + 1);
2450 }
2451 if (rv == 0) {
2452 hdl->h_child = NULL;
2453 }
2454
2455 ndi_devi_exit(hdl->h_parent);
2456 return (rv == NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
2457 }
2458
2459 void
bd_xfer_done(bd_xfer_t * xfer,int err)2460 bd_xfer_done(bd_xfer_t *xfer, int err)
2461 {
2462 bd_xfer_impl_t *xi = (void *)xfer;
2463 buf_t *bp = xi->i_bp;
2464 int rv = DDI_SUCCESS;
2465 bd_t *bd = xi->i_bd;
2466 size_t len;
2467
2468 if (err != 0) {
2469 bd_runq_exit(xi, err);
2470 atomic_inc_32(&bd->d_kerr->bd_harderrs.value.ui32);
2471
2472 bp->b_resid += xi->i_resid;
2473 bd_xfer_free(xi);
2474 bioerror(bp, err);
2475 biodone(bp);
2476 return;
2477 }
2478
2479 xi->i_cur_win++;
2480 xi->i_resid -= xi->i_len;
2481
2482 if (xi->i_resid == 0) {
2483 /* Job completed succcessfully! */
2484 bd_runq_exit(xi, 0);
2485
2486 bd_xfer_free(xi);
2487 biodone(bp);
2488 return;
2489 }
2490
2491 xi->i_blkno += xi->i_nblks;
2492
2493 if (bd->d_use_dma) {
2494 /* More transfer still pending... advance to next DMA window. */
2495 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win,
2496 &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac);
2497 } else {
2498 /* Advance memory window. */
2499 xi->i_kaddr += xi->i_len;
2500 xi->i_offset += xi->i_len;
2501 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer);
2502 }
2503
2504
2505 if ((rv != DDI_SUCCESS) ||
2506 (P2PHASE(len, (1U << xi->i_blkshift)) != 0)) {
2507 bd_runq_exit(xi, EFAULT);
2508
2509 bp->b_resid += xi->i_resid;
2510 bd_xfer_free(xi);
2511 bioerror(bp, EFAULT);
2512 biodone(bp);
2513 return;
2514 }
2515 xi->i_len = len;
2516 xi->i_nblks = len >> xi->i_blkshift;
2517
2518 /* Submit next window to hardware. */
2519 rv = xi->i_func(bd->d_private, &xi->i_public);
2520 if (rv != 0) {
2521 bd_runq_exit(xi, rv);
2522
2523 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
2524
2525 bp->b_resid += xi->i_resid;
2526 bd_xfer_free(xi);
2527 bioerror(bp, rv);
2528 biodone(bp);
2529 }
2530 }
2531
2532 void
bd_error(bd_xfer_t * xfer,int error)2533 bd_error(bd_xfer_t *xfer, int error)
2534 {
2535 bd_xfer_impl_t *xi = (void *)xfer;
2536 bd_t *bd = xi->i_bd;
2537
2538 switch (error) {
2539 case BD_ERR_MEDIA:
2540 atomic_inc_32(&bd->d_kerr->bd_rq_media_err.value.ui32);
2541 break;
2542 case BD_ERR_NTRDY:
2543 atomic_inc_32(&bd->d_kerr->bd_rq_ntrdy_err.value.ui32);
2544 break;
2545 case BD_ERR_NODEV:
2546 atomic_inc_32(&bd->d_kerr->bd_rq_nodev_err.value.ui32);
2547 break;
2548 case BD_ERR_RECOV:
2549 atomic_inc_32(&bd->d_kerr->bd_rq_recov_err.value.ui32);
2550 break;
2551 case BD_ERR_ILLRQ:
2552 atomic_inc_32(&bd->d_kerr->bd_rq_illrq_err.value.ui32);
2553 break;
2554 case BD_ERR_PFA:
2555 atomic_inc_32(&bd->d_kerr->bd_rq_pfa_err.value.ui32);
2556 break;
2557 default:
2558 cmn_err(CE_PANIC, "bd_error: unknown error type %d", error);
2559 break;
2560 }
2561 }
2562
2563 void
bd_state_change(bd_handle_t hdl)2564 bd_state_change(bd_handle_t hdl)
2565 {
2566 bd_t *bd;
2567
2568 if ((bd = hdl->h_bd) != NULL) {
2569 bd_update_state(bd);
2570 }
2571 }
2572
2573 const char *
bd_address(bd_handle_t hdl)2574 bd_address(bd_handle_t hdl)
2575 {
2576 return (hdl->h_addr);
2577 }
2578
2579 void
bd_mod_init(struct dev_ops * devops)2580 bd_mod_init(struct dev_ops *devops)
2581 {
2582 static struct bus_ops bd_bus_ops = {
2583 BUSO_REV, /* busops_rev */
2584 nullbusmap, /* bus_map */
2585 NULL, /* bus_get_intrspec (OBSOLETE) */
2586 NULL, /* bus_add_intrspec (OBSOLETE) */
2587 NULL, /* bus_remove_intrspec (OBSOLETE) */
2588 i_ddi_map_fault, /* bus_map_fault */
2589 NULL, /* bus_dma_map (OBSOLETE) */
2590 ddi_dma_allochdl, /* bus_dma_allochdl */
2591 ddi_dma_freehdl, /* bus_dma_freehdl */
2592 ddi_dma_bindhdl, /* bus_dma_bindhdl */
2593 ddi_dma_unbindhdl, /* bus_dma_unbindhdl */
2594 ddi_dma_flush, /* bus_dma_flush */
2595 ddi_dma_win, /* bus_dma_win */
2596 ddi_dma_mctl, /* bus_dma_ctl */
2597 bd_bus_ctl, /* bus_ctl */
2598 ddi_bus_prop_op, /* bus_prop_op */
2599 NULL, /* bus_get_eventcookie */
2600 NULL, /* bus_add_eventcall */
2601 NULL, /* bus_remove_eventcall */
2602 NULL, /* bus_post_event */
2603 NULL, /* bus_intr_ctl (OBSOLETE) */
2604 NULL, /* bus_config */
2605 NULL, /* bus_unconfig */
2606 NULL, /* bus_fm_init */
2607 NULL, /* bus_fm_fini */
2608 NULL, /* bus_fm_access_enter */
2609 NULL, /* bus_fm_access_exit */
2610 NULL, /* bus_power */
2611 NULL, /* bus_intr_op */
2612 };
2613
2614 devops->devo_bus_ops = &bd_bus_ops;
2615
2616 /*
2617 * NB: The device driver is free to supply its own
2618 * character entry device support.
2619 */
2620 }
2621
2622 void
bd_mod_fini(struct dev_ops * devops)2623 bd_mod_fini(struct dev_ops *devops)
2624 {
2625 devops->devo_bus_ops = NULL;
2626 }
2627