xref: /titanic_50/usr/src/uts/common/io/blkdev/blkdev.c (revision 7a5aac98bc37534537d4896efd4efd30627d221e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
24  * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved.
25  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
26  */
27 
28 #include <sys/types.h>
29 #include <sys/ksynch.h>
30 #include <sys/kmem.h>
31 #include <sys/file.h>
32 #include <sys/errno.h>
33 #include <sys/open.h>
34 #include <sys/buf.h>
35 #include <sys/uio.h>
36 #include <sys/aio_req.h>
37 #include <sys/cred.h>
38 #include <sys/modctl.h>
39 #include <sys/cmlb.h>
40 #include <sys/conf.h>
41 #include <sys/devops.h>
42 #include <sys/list.h>
43 #include <sys/sysmacros.h>
44 #include <sys/dkio.h>
45 #include <sys/vtoc.h>
46 #include <sys/scsi/scsi.h>	/* for DTYPE_DIRECT */
47 #include <sys/kstat.h>
48 #include <sys/fs/dv_node.h>
49 #include <sys/ddi.h>
50 #include <sys/sunddi.h>
51 #include <sys/note.h>
52 #include <sys/blkdev.h>
53 #include <sys/scsi/impl/inquiry.h>
54 
55 #define	BD_MAXPART	64
56 #define	BDINST(dev)	(getminor(dev) / BD_MAXPART)
57 #define	BDPART(dev)	(getminor(dev) % BD_MAXPART)
58 
59 typedef struct bd bd_t;
60 typedef struct bd_xfer_impl bd_xfer_impl_t;
61 
62 struct bd {
63 	void		*d_private;
64 	dev_info_t	*d_dip;
65 	kmutex_t	d_ocmutex;
66 	kmutex_t	d_iomutex;
67 	kmutex_t	*d_errmutex;
68 	kmutex_t	d_statemutex;
69 	kcondvar_t	d_statecv;
70 	enum dkio_state	d_state;
71 	cmlb_handle_t	d_cmlbh;
72 	unsigned	d_open_lyr[BD_MAXPART];	/* open count */
73 	uint64_t	d_open_excl;	/* bit mask indexed by partition */
74 	uint64_t	d_open_reg[OTYPCNT];		/* bit mask */
75 
76 	uint32_t	d_qsize;
77 	uint32_t	d_qactive;
78 	uint32_t	d_maxxfer;
79 	uint32_t	d_blkshift;
80 	uint32_t	d_pblkshift;
81 	uint64_t	d_numblks;
82 	ddi_devid_t	d_devid;
83 
84 	kmem_cache_t	*d_cache;
85 	list_t		d_runq;
86 	list_t		d_waitq;
87 	kstat_t		*d_ksp;
88 	kstat_io_t	*d_kiop;
89 	kstat_t		*d_errstats;
90 	struct bd_errstats *d_kerr;
91 
92 	boolean_t	d_rdonly;
93 	boolean_t	d_ssd;
94 	boolean_t	d_removable;
95 	boolean_t	d_hotpluggable;
96 	boolean_t	d_use_dma;
97 
98 	ddi_dma_attr_t	d_dma;
99 	bd_ops_t	d_ops;
100 	bd_handle_t	d_handle;
101 };
102 
103 struct bd_handle {
104 	bd_ops_t	h_ops;
105 	ddi_dma_attr_t	*h_dma;
106 	dev_info_t	*h_parent;
107 	dev_info_t	*h_child;
108 	void		*h_private;
109 	bd_t		*h_bd;
110 	char		*h_name;
111 	char		h_addr[20];	/* enough for %X,%X */
112 };
113 
114 struct bd_xfer_impl {
115 	bd_xfer_t	i_public;
116 	list_node_t	i_linkage;
117 	bd_t		*i_bd;
118 	buf_t		*i_bp;
119 	uint_t		i_num_win;
120 	uint_t		i_cur_win;
121 	off_t		i_offset;
122 	int		(*i_func)(void *, bd_xfer_t *);
123 	uint32_t	i_blkshift;
124 	size_t		i_len;
125 	size_t		i_resid;
126 };
127 
128 #define	i_dmah		i_public.x_dmah
129 #define	i_dmac		i_public.x_dmac
130 #define	i_ndmac		i_public.x_ndmac
131 #define	i_kaddr		i_public.x_kaddr
132 #define	i_nblks		i_public.x_nblks
133 #define	i_blkno		i_public.x_blkno
134 #define	i_flags		i_public.x_flags
135 
136 
137 /*
138  * Private prototypes.
139  */
140 
141 static void bd_prop_update_inqstring(dev_info_t *, char *, char *, size_t);
142 static void bd_create_inquiry_props(dev_info_t *, bd_drive_t *);
143 static void bd_create_errstats(bd_t *, int, bd_drive_t *);
144 static void bd_errstats_setstr(kstat_named_t *, char *, size_t, char *);
145 static void bd_init_errstats(bd_t *, bd_drive_t *);
146 
147 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
148 static int bd_attach(dev_info_t *, ddi_attach_cmd_t);
149 static int bd_detach(dev_info_t *, ddi_detach_cmd_t);
150 
151 static int bd_open(dev_t *, int, int, cred_t *);
152 static int bd_close(dev_t, int, int, cred_t *);
153 static int bd_strategy(struct buf *);
154 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
155 static int bd_dump(dev_t, caddr_t, daddr_t, int);
156 static int bd_read(dev_t, struct uio *, cred_t *);
157 static int bd_write(dev_t, struct uio *, cred_t *);
158 static int bd_aread(dev_t, struct aio_req *, cred_t *);
159 static int bd_awrite(dev_t, struct aio_req *, cred_t *);
160 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *,
161     caddr_t, int *);
162 
163 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t,
164     void *);
165 static int bd_tg_getinfo(dev_info_t *, int, void *, void *);
166 static int bd_xfer_ctor(void *, void *, int);
167 static void bd_xfer_dtor(void *, void *);
168 static void bd_sched(bd_t *);
169 static void bd_submit(bd_t *, bd_xfer_impl_t *);
170 static void bd_runq_exit(bd_xfer_impl_t *, int);
171 static void bd_update_state(bd_t *);
172 static int bd_check_state(bd_t *, enum dkio_state *);
173 static int bd_flush_write_cache(bd_t *, struct dk_callback *);
174 
175 struct cmlb_tg_ops bd_tg_ops = {
176 	TG_DK_OPS_VERSION_1,
177 	bd_tg_rdwr,
178 	bd_tg_getinfo,
179 };
180 
181 static struct cb_ops bd_cb_ops = {
182 	bd_open, 		/* open */
183 	bd_close, 		/* close */
184 	bd_strategy, 		/* strategy */
185 	nodev, 			/* print */
186 	bd_dump,		/* dump */
187 	bd_read, 		/* read */
188 	bd_write, 		/* write */
189 	bd_ioctl, 		/* ioctl */
190 	nodev, 			/* devmap */
191 	nodev, 			/* mmap */
192 	nodev, 			/* segmap */
193 	nochpoll, 		/* poll */
194 	bd_prop_op, 		/* cb_prop_op */
195 	0, 			/* streamtab  */
196 	D_64BIT | D_MP,		/* Driver comaptibility flag */
197 	CB_REV,			/* cb_rev */
198 	bd_aread,		/* async read */
199 	bd_awrite		/* async write */
200 };
201 
202 struct dev_ops bd_dev_ops = {
203 	DEVO_REV, 		/* devo_rev, */
204 	0, 			/* refcnt  */
205 	bd_getinfo,		/* getinfo */
206 	nulldev, 		/* identify */
207 	nulldev, 		/* probe */
208 	bd_attach, 		/* attach */
209 	bd_detach,		/* detach */
210 	nodev, 			/* reset */
211 	&bd_cb_ops, 		/* driver operations */
212 	NULL,			/* bus operations */
213 	NULL,			/* power */
214 	ddi_quiesce_not_needed,	/* quiesce */
215 };
216 
217 static struct modldrv modldrv = {
218 	&mod_driverops,
219 	"Generic Block Device",
220 	&bd_dev_ops,
221 };
222 
223 static struct modlinkage modlinkage = {
224 	MODREV_1, { &modldrv, NULL }
225 };
226 
227 static void *bd_state;
228 static krwlock_t bd_lock;
229 
230 int
231 _init(void)
232 {
233 	int	rv;
234 
235 	rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2);
236 	if (rv != DDI_SUCCESS) {
237 		return (rv);
238 	}
239 	rw_init(&bd_lock, NULL, RW_DRIVER, NULL);
240 	rv = mod_install(&modlinkage);
241 	if (rv != DDI_SUCCESS) {
242 		rw_destroy(&bd_lock);
243 		ddi_soft_state_fini(&bd_state);
244 	}
245 	return (rv);
246 }
247 
248 int
249 _fini(void)
250 {
251 	int	rv;
252 
253 	rv = mod_remove(&modlinkage);
254 	if (rv == DDI_SUCCESS) {
255 		rw_destroy(&bd_lock);
256 		ddi_soft_state_fini(&bd_state);
257 	}
258 	return (rv);
259 }
260 
261 int
262 _info(struct modinfo *modinfop)
263 {
264 	return (mod_info(&modlinkage, modinfop));
265 }
266 
267 static int
268 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp)
269 {
270 	bd_t	*bd;
271 	minor_t	inst;
272 
273 	_NOTE(ARGUNUSED(dip));
274 
275 	inst = BDINST((dev_t)arg);
276 
277 	switch (cmd) {
278 	case DDI_INFO_DEVT2DEVINFO:
279 		bd = ddi_get_soft_state(bd_state, inst);
280 		if (bd == NULL) {
281 			return (DDI_FAILURE);
282 		}
283 		*resultp = (void *)bd->d_dip;
284 		break;
285 
286 	case DDI_INFO_DEVT2INSTANCE:
287 		*resultp = (void *)(intptr_t)inst;
288 		break;
289 
290 	default:
291 		return (DDI_FAILURE);
292 	}
293 	return (DDI_SUCCESS);
294 }
295 
296 static void
297 bd_prop_update_inqstring(dev_info_t *dip, char *name, char *data, size_t len)
298 {
299 	int	ilen;
300 	char	*data_string;
301 
302 	ilen = scsi_ascii_inquiry_len(data, len);
303 	ASSERT3U(ilen, <=, len);
304 	if (ilen <= 0)
305 		return;
306 	/* ensure null termination */
307 	data_string = kmem_zalloc(ilen + 1, KM_SLEEP);
308 	bcopy(data, data_string, ilen);
309 	(void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, name, data_string);
310 	kmem_free(data_string, ilen + 1);
311 }
312 
313 static void
314 bd_create_inquiry_props(dev_info_t *dip, bd_drive_t *drive)
315 {
316 	if (drive->d_vendor_len > 0)
317 		bd_prop_update_inqstring(dip, INQUIRY_VENDOR_ID,
318 		    drive->d_vendor, drive->d_vendor_len);
319 
320 	if (drive->d_product_len > 0)
321 		bd_prop_update_inqstring(dip, INQUIRY_PRODUCT_ID,
322 		    drive->d_product, drive->d_product_len);
323 
324 	if (drive->d_serial_len > 0)
325 		bd_prop_update_inqstring(dip, INQUIRY_SERIAL_NO,
326 		    drive->d_serial, drive->d_serial_len);
327 
328 	if (drive->d_revision_len > 0)
329 		bd_prop_update_inqstring(dip, INQUIRY_REVISION_ID,
330 		    drive->d_revision, drive->d_revision_len);
331 }
332 
333 static void
334 bd_create_errstats(bd_t *bd, int inst, bd_drive_t *drive)
335 {
336 	char	ks_module[KSTAT_STRLEN];
337 	char	ks_name[KSTAT_STRLEN];
338 	int	ndata = sizeof (struct bd_errstats) / sizeof (kstat_named_t);
339 
340 	if (bd->d_errstats != NULL)
341 		return;
342 
343 	(void) snprintf(ks_module, sizeof (ks_module), "%serr",
344 	    ddi_driver_name(bd->d_dip));
345 	(void) snprintf(ks_name, sizeof (ks_name), "%s%d,err",
346 	    ddi_driver_name(bd->d_dip), inst);
347 
348 	bd->d_errstats = kstat_create(ks_module, inst, ks_name, "device_error",
349 	    KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
350 
351 	if (bd->d_errstats == NULL) {
352 		/*
353 		 * Even if we cannot create the kstat, we create a
354 		 * scratch kstat.  The reason for this is to ensure
355 		 * that we can update the kstat all of the time,
356 		 * without adding an extra branch instruction.
357 		 */
358 		bd->d_kerr = kmem_zalloc(sizeof (struct bd_errstats),
359 		    KM_SLEEP);
360 		bd->d_errmutex = kmem_zalloc(sizeof (kmutex_t), KM_SLEEP);
361 		mutex_init(bd->d_errmutex, NULL, MUTEX_DRIVER, NULL);
362 	} else {
363 		if (bd->d_errstats->ks_lock == NULL) {
364 			bd->d_errstats->ks_lock = kmem_zalloc(sizeof (kmutex_t),
365 			    KM_SLEEP);
366 			mutex_init(bd->d_errstats->ks_lock, NULL, MUTEX_DRIVER,
367 			    NULL);
368 		}
369 
370 		bd->d_errmutex = bd->d_errstats->ks_lock;
371 		bd->d_kerr = (struct bd_errstats *)bd->d_errstats->ks_data;
372 	}
373 
374 	kstat_named_init(&bd->d_kerr->bd_softerrs,	"Soft Errors",
375 	    KSTAT_DATA_UINT32);
376 	kstat_named_init(&bd->d_kerr->bd_harderrs,	"Hard Errors",
377 	    KSTAT_DATA_UINT32);
378 	kstat_named_init(&bd->d_kerr->bd_transerrs,	"Transport Errors",
379 	    KSTAT_DATA_UINT32);
380 
381 	if (drive->d_model_len > 0) {
382 		kstat_named_init(&bd->d_kerr->bd_model,	"Model",
383 		    KSTAT_DATA_STRING);
384 	} else {
385 		kstat_named_init(&bd->d_kerr->bd_vid,	"Vendor",
386 		    KSTAT_DATA_STRING);
387 		kstat_named_init(&bd->d_kerr->bd_pid,	"Product",
388 		    KSTAT_DATA_STRING);
389 	}
390 
391 	kstat_named_init(&bd->d_kerr->bd_revision,	"Revision",
392 	    KSTAT_DATA_STRING);
393 	kstat_named_init(&bd->d_kerr->bd_serial,	"Serial No",
394 	    KSTAT_DATA_STRING);
395 	kstat_named_init(&bd->d_kerr->bd_capacity,	"Size",
396 	    KSTAT_DATA_ULONGLONG);
397 	kstat_named_init(&bd->d_kerr->bd_rq_media_err,	"Media Error",
398 	    KSTAT_DATA_UINT32);
399 	kstat_named_init(&bd->d_kerr->bd_rq_ntrdy_err,	"Device Not Ready",
400 	    KSTAT_DATA_UINT32);
401 	kstat_named_init(&bd->d_kerr->bd_rq_nodev_err,	"No Device",
402 	    KSTAT_DATA_UINT32);
403 	kstat_named_init(&bd->d_kerr->bd_rq_recov_err,	"Recoverable",
404 	    KSTAT_DATA_UINT32);
405 	kstat_named_init(&bd->d_kerr->bd_rq_illrq_err,	"Illegal Request",
406 	    KSTAT_DATA_UINT32);
407 	kstat_named_init(&bd->d_kerr->bd_rq_pfa_err,
408 	    "Predictive Failure Analysis", KSTAT_DATA_UINT32);
409 
410 	bd->d_errstats->ks_private = bd;
411 
412 	kstat_install(bd->d_errstats);
413 }
414 
415 static void
416 bd_errstats_setstr(kstat_named_t *k, char *str, size_t len, char *alt)
417 {
418 	char	*tmp;
419 
420 	if (KSTAT_NAMED_STR_PTR(k) == NULL) {
421 		if (len > 0) {
422 			tmp = kmem_alloc(len + 1, KM_SLEEP);
423 			(void) strlcpy(tmp, str, len);
424 		} else {
425 			tmp = alt;
426 		}
427 
428 		kstat_named_setstr(k, tmp);
429 	}
430 }
431 
432 static void
433 bd_init_errstats(bd_t *bd, bd_drive_t *drive)
434 {
435 	struct bd_errstats	*est = bd->d_kerr;
436 
437 	mutex_enter(bd->d_errmutex);
438 
439 	if (drive->d_model_len > 0 &&
440 	    KSTAT_NAMED_STR_PTR(&est->bd_model) == NULL) {
441 		bd_errstats_setstr(&est->bd_model, drive->d_model,
442 		    drive->d_model_len, NULL);
443 	} else {
444 		bd_errstats_setstr(&est->bd_vid, drive->d_vendor,
445 		    drive->d_vendor_len, "Unknown ");
446 		bd_errstats_setstr(&est->bd_pid, drive->d_product,
447 		    drive->d_product_len, "Unknown         ");
448 	}
449 
450 	bd_errstats_setstr(&est->bd_revision, drive->d_revision,
451 	    drive->d_revision_len, "0001");
452 	bd_errstats_setstr(&est->bd_serial, drive->d_serial,
453 	    drive->d_serial_len, "0               ");
454 
455 	mutex_exit(bd->d_errmutex);
456 }
457 
458 static int
459 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
460 {
461 	int		inst;
462 	bd_handle_t	hdl;
463 	bd_t		*bd;
464 	bd_drive_t	drive;
465 	int		rv;
466 	char		name[16];
467 	char		kcache[32];
468 
469 	switch (cmd) {
470 	case DDI_ATTACH:
471 		break;
472 	case DDI_RESUME:
473 		/* We don't do anything native for suspend/resume */
474 		return (DDI_SUCCESS);
475 	default:
476 		return (DDI_FAILURE);
477 	}
478 
479 	inst = ddi_get_instance(dip);
480 	hdl = ddi_get_parent_data(dip);
481 
482 	(void) snprintf(name, sizeof (name), "%s%d",
483 	    ddi_driver_name(dip), ddi_get_instance(dip));
484 	(void) snprintf(kcache, sizeof (kcache), "%s_xfer", name);
485 
486 	if (hdl == NULL) {
487 		cmn_err(CE_WARN, "%s: missing parent data!", name);
488 		return (DDI_FAILURE);
489 	}
490 
491 	if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) {
492 		cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name);
493 		return (DDI_FAILURE);
494 	}
495 	bd = ddi_get_soft_state(bd_state, inst);
496 
497 	if (hdl->h_dma) {
498 		bd->d_dma = *(hdl->h_dma);
499 		bd->d_dma.dma_attr_granular =
500 		    max(DEV_BSIZE, bd->d_dma.dma_attr_granular);
501 		bd->d_use_dma = B_TRUE;
502 
503 		if (bd->d_maxxfer &&
504 		    (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) {
505 			cmn_err(CE_WARN,
506 			    "%s: inconsistent maximum transfer size!",
507 			    name);
508 			/* We force it */
509 			bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer;
510 		} else {
511 			bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer;
512 		}
513 	} else {
514 		bd->d_use_dma = B_FALSE;
515 		if (bd->d_maxxfer == 0) {
516 			bd->d_maxxfer = 1024 * 1024;
517 		}
518 	}
519 	bd->d_ops = hdl->h_ops;
520 	bd->d_private = hdl->h_private;
521 	bd->d_blkshift = 9;	/* 512 bytes, to start */
522 
523 	if (bd->d_maxxfer % DEV_BSIZE) {
524 		cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name);
525 		bd->d_maxxfer &= ~(DEV_BSIZE - 1);
526 	}
527 	if (bd->d_maxxfer < DEV_BSIZE) {
528 		cmn_err(CE_WARN, "%s: maximum transfer size too small!", name);
529 		ddi_soft_state_free(bd_state, inst);
530 		return (DDI_FAILURE);
531 	}
532 
533 	bd->d_dip = dip;
534 	bd->d_handle = hdl;
535 	hdl->h_bd = bd;
536 	ddi_set_driver_private(dip, bd);
537 
538 	mutex_init(&bd->d_iomutex, NULL, MUTEX_DRIVER, NULL);
539 	mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL);
540 	mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL);
541 	cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL);
542 
543 	list_create(&bd->d_waitq, sizeof (bd_xfer_impl_t),
544 	    offsetof(struct bd_xfer_impl, i_linkage));
545 	list_create(&bd->d_runq, sizeof (bd_xfer_impl_t),
546 	    offsetof(struct bd_xfer_impl, i_linkage));
547 
548 	bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8,
549 	    bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0);
550 
551 	bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk",
552 	    KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
553 	if (bd->d_ksp != NULL) {
554 		bd->d_ksp->ks_lock = &bd->d_iomutex;
555 		kstat_install(bd->d_ksp);
556 		bd->d_kiop = bd->d_ksp->ks_data;
557 	} else {
558 		/*
559 		 * Even if we cannot create the kstat, we create a
560 		 * scratch kstat.  The reason for this is to ensure
561 		 * that we can update the kstat all of the time,
562 		 * without adding an extra branch instruction.
563 		 */
564 		bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP);
565 	}
566 
567 	cmlb_alloc_handle(&bd->d_cmlbh);
568 
569 	bd->d_state = DKIO_NONE;
570 
571 	bzero(&drive, sizeof (drive));
572 	bd->d_ops.o_drive_info(bd->d_private, &drive);
573 	bd->d_qsize = drive.d_qsize;
574 	bd->d_removable = drive.d_removable;
575 	bd->d_hotpluggable = drive.d_hotpluggable;
576 
577 	if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer)
578 		bd->d_maxxfer = drive.d_maxxfer;
579 
580 	bd_create_inquiry_props(dip, &drive);
581 
582 	bd_create_errstats(bd, inst, &drive);
583 	bd_init_errstats(bd, &drive);
584 	bd_update_state(bd);
585 
586 	rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT,
587 	    bd->d_removable, bd->d_hotpluggable,
588 	    drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK,
589 	    CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0);
590 	if (rv != 0) {
591 		cmlb_free_handle(&bd->d_cmlbh);
592 		kmem_cache_destroy(bd->d_cache);
593 		mutex_destroy(&bd->d_iomutex);
594 		mutex_destroy(&bd->d_ocmutex);
595 		mutex_destroy(&bd->d_statemutex);
596 		cv_destroy(&bd->d_statecv);
597 		list_destroy(&bd->d_waitq);
598 		list_destroy(&bd->d_runq);
599 		if (bd->d_ksp != NULL) {
600 			kstat_delete(bd->d_ksp);
601 			bd->d_ksp = NULL;
602 		} else {
603 			kmem_free(bd->d_kiop, sizeof (kstat_io_t));
604 		}
605 		ddi_soft_state_free(bd_state, inst);
606 		return (DDI_FAILURE);
607 	}
608 
609 	if (bd->d_ops.o_devid_init != NULL) {
610 		rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid);
611 		if (rv == DDI_SUCCESS) {
612 			if (ddi_devid_register(dip, bd->d_devid) !=
613 			    DDI_SUCCESS) {
614 				cmn_err(CE_WARN,
615 				    "%s: unable to register devid", name);
616 			}
617 		}
618 	}
619 
620 	/*
621 	 * Add a zero-length attribute to tell the world we support
622 	 * kernel ioctls (for layered drivers).  Also set up properties
623 	 * used by HAL to identify removable media.
624 	 */
625 	(void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
626 	    DDI_KERNEL_IOCTL, NULL, 0);
627 	if (bd->d_removable) {
628 		(void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
629 		    "removable-media", NULL, 0);
630 	}
631 	if (bd->d_hotpluggable) {
632 		(void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
633 		    "hotpluggable", NULL, 0);
634 	}
635 
636 	ddi_report_dev(dip);
637 
638 	return (DDI_SUCCESS);
639 }
640 
641 static int
642 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
643 {
644 	bd_t	*bd;
645 
646 	bd = ddi_get_driver_private(dip);
647 
648 	switch (cmd) {
649 	case DDI_DETACH:
650 		break;
651 	case DDI_SUSPEND:
652 		/* We don't suspend, but our parent does */
653 		return (DDI_SUCCESS);
654 	default:
655 		return (DDI_FAILURE);
656 	}
657 	if (bd->d_ksp != NULL) {
658 		kstat_delete(bd->d_ksp);
659 		bd->d_ksp = NULL;
660 	} else {
661 		kmem_free(bd->d_kiop, sizeof (kstat_io_t));
662 	}
663 
664 	if (bd->d_errstats != NULL) {
665 		kstat_delete(bd->d_errstats);
666 		bd->d_errstats = NULL;
667 	} else {
668 		kmem_free(bd->d_kerr, sizeof (struct bd_errstats));
669 		mutex_destroy(bd->d_errmutex);
670 	}
671 
672 	cmlb_detach(bd->d_cmlbh, 0);
673 	cmlb_free_handle(&bd->d_cmlbh);
674 	if (bd->d_devid)
675 		ddi_devid_free(bd->d_devid);
676 	kmem_cache_destroy(bd->d_cache);
677 	mutex_destroy(&bd->d_iomutex);
678 	mutex_destroy(&bd->d_ocmutex);
679 	mutex_destroy(&bd->d_statemutex);
680 	cv_destroy(&bd->d_statecv);
681 	list_destroy(&bd->d_waitq);
682 	list_destroy(&bd->d_runq);
683 	ddi_soft_state_free(bd_state, ddi_get_instance(dip));
684 	return (DDI_SUCCESS);
685 }
686 
687 static int
688 bd_xfer_ctor(void *buf, void *arg, int kmflag)
689 {
690 	bd_xfer_impl_t	*xi;
691 	bd_t		*bd = arg;
692 	int		(*dcb)(caddr_t);
693 
694 	if (kmflag == KM_PUSHPAGE || kmflag == KM_SLEEP) {
695 		dcb = DDI_DMA_SLEEP;
696 	} else {
697 		dcb = DDI_DMA_DONTWAIT;
698 	}
699 
700 	xi = buf;
701 	bzero(xi, sizeof (*xi));
702 	xi->i_bd = bd;
703 
704 	if (bd->d_use_dma) {
705 		if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL,
706 		    &xi->i_dmah) != DDI_SUCCESS) {
707 			return (-1);
708 		}
709 	}
710 
711 	return (0);
712 }
713 
714 static void
715 bd_xfer_dtor(void *buf, void *arg)
716 {
717 	bd_xfer_impl_t	*xi = buf;
718 
719 	_NOTE(ARGUNUSED(arg));
720 
721 	if (xi->i_dmah)
722 		ddi_dma_free_handle(&xi->i_dmah);
723 	xi->i_dmah = NULL;
724 }
725 
726 static bd_xfer_impl_t *
727 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *),
728     int kmflag)
729 {
730 	bd_xfer_impl_t		*xi;
731 	int			rv = 0;
732 	int			status;
733 	unsigned		dir;
734 	int			(*cb)(caddr_t);
735 	size_t			len;
736 	uint32_t		shift;
737 
738 	if (kmflag == KM_SLEEP) {
739 		cb = DDI_DMA_SLEEP;
740 	} else {
741 		cb = DDI_DMA_DONTWAIT;
742 	}
743 
744 	xi = kmem_cache_alloc(bd->d_cache, kmflag);
745 	if (xi == NULL) {
746 		bioerror(bp, ENOMEM);
747 		return (NULL);
748 	}
749 
750 	ASSERT(bp);
751 
752 	xi->i_bp = bp;
753 	xi->i_func = func;
754 	xi->i_blkno = bp->b_lblkno;
755 
756 	if (bp->b_bcount == 0) {
757 		xi->i_len = 0;
758 		xi->i_nblks = 0;
759 		xi->i_kaddr = NULL;
760 		xi->i_resid = 0;
761 		xi->i_num_win = 0;
762 		goto done;
763 	}
764 
765 	if (bp->b_flags & B_READ) {
766 		dir = DDI_DMA_READ;
767 		xi->i_func = bd->d_ops.o_read;
768 	} else {
769 		dir = DDI_DMA_WRITE;
770 		xi->i_func = bd->d_ops.o_write;
771 	}
772 
773 	shift = bd->d_blkshift;
774 	xi->i_blkshift = shift;
775 
776 	if (!bd->d_use_dma) {
777 		bp_mapin(bp);
778 		rv = 0;
779 		xi->i_offset = 0;
780 		xi->i_num_win =
781 		    (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer;
782 		xi->i_cur_win = 0;
783 		xi->i_len = min(bp->b_bcount, bd->d_maxxfer);
784 		xi->i_nblks = xi->i_len >> shift;
785 		xi->i_kaddr = bp->b_un.b_addr;
786 		xi->i_resid = bp->b_bcount;
787 	} else {
788 
789 		/*
790 		 * We have to use consistent DMA if the address is misaligned.
791 		 */
792 		if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) &&
793 		    ((uintptr_t)bp->b_un.b_addr & 0x7)) {
794 			dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL;
795 		} else {
796 			dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL;
797 		}
798 
799 		status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb,
800 		    NULL, &xi->i_dmac, &xi->i_ndmac);
801 		switch (status) {
802 		case DDI_DMA_MAPPED:
803 			xi->i_num_win = 1;
804 			xi->i_cur_win = 0;
805 			xi->i_offset = 0;
806 			xi->i_len = bp->b_bcount;
807 			xi->i_nblks = xi->i_len >> shift;
808 			xi->i_resid = bp->b_bcount;
809 			rv = 0;
810 			break;
811 		case DDI_DMA_PARTIAL_MAP:
812 			xi->i_cur_win = 0;
813 
814 			if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) !=
815 			    DDI_SUCCESS) ||
816 			    (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset,
817 			    &len, &xi->i_dmac, &xi->i_ndmac) !=
818 			    DDI_SUCCESS) ||
819 			    (P2PHASE(len, shift) != 0)) {
820 				(void) ddi_dma_unbind_handle(xi->i_dmah);
821 				rv = EFAULT;
822 				goto done;
823 			}
824 			xi->i_len = len;
825 			xi->i_nblks = xi->i_len >> shift;
826 			xi->i_resid = bp->b_bcount;
827 			rv = 0;
828 			break;
829 		case DDI_DMA_NORESOURCES:
830 			rv = EAGAIN;
831 			goto done;
832 		case DDI_DMA_TOOBIG:
833 			rv = EINVAL;
834 			goto done;
835 		case DDI_DMA_NOMAPPING:
836 		case DDI_DMA_INUSE:
837 		default:
838 			rv = EFAULT;
839 			goto done;
840 		}
841 	}
842 
843 done:
844 	if (rv != 0) {
845 		kmem_cache_free(bd->d_cache, xi);
846 		bioerror(bp, rv);
847 		return (NULL);
848 	}
849 
850 	return (xi);
851 }
852 
853 static void
854 bd_xfer_free(bd_xfer_impl_t *xi)
855 {
856 	if (xi->i_dmah) {
857 		(void) ddi_dma_unbind_handle(xi->i_dmah);
858 	}
859 	kmem_cache_free(xi->i_bd->d_cache, xi);
860 }
861 
862 static int
863 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp)
864 {
865 	dev_t		dev = *devp;
866 	bd_t		*bd;
867 	minor_t		part;
868 	minor_t		inst;
869 	uint64_t	mask;
870 	boolean_t	ndelay;
871 	int		rv;
872 	diskaddr_t	nblks;
873 	diskaddr_t	lba;
874 
875 	_NOTE(ARGUNUSED(credp));
876 
877 	part = BDPART(dev);
878 	inst = BDINST(dev);
879 
880 	if (otyp >= OTYPCNT)
881 		return (EINVAL);
882 
883 	ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE;
884 
885 	/*
886 	 * Block any DR events from changing the set of registered
887 	 * devices while we function.
888 	 */
889 	rw_enter(&bd_lock, RW_READER);
890 	if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
891 		rw_exit(&bd_lock);
892 		return (ENXIO);
893 	}
894 
895 	mutex_enter(&bd->d_ocmutex);
896 
897 	ASSERT(part < 64);
898 	mask = (1U << part);
899 
900 	bd_update_state(bd);
901 
902 	if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) {
903 
904 		/* non-blocking opens are allowed to succeed */
905 		if (!ndelay) {
906 			rv = ENXIO;
907 			goto done;
908 		}
909 	} else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba,
910 	    NULL, NULL, 0) == 0) {
911 
912 		/*
913 		 * We read the partinfo, verify valid ranges.  If the
914 		 * partition is invalid, and we aren't blocking or
915 		 * doing a raw access, then fail. (Non-blocking and
916 		 * raw accesses can still succeed to allow a disk with
917 		 * bad partition data to opened by format and fdisk.)
918 		 */
919 		if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) {
920 			rv = ENXIO;
921 			goto done;
922 		}
923 	} else if (!ndelay) {
924 		/*
925 		 * cmlb_partinfo failed -- invalid partition or no
926 		 * disk label.
927 		 */
928 		rv = ENXIO;
929 		goto done;
930 	}
931 
932 	if ((flag & FWRITE) && bd->d_rdonly) {
933 		rv = EROFS;
934 		goto done;
935 	}
936 
937 	if ((bd->d_open_excl) & (mask)) {
938 		rv = EBUSY;
939 		goto done;
940 	}
941 	if (flag & FEXCL) {
942 		if (bd->d_open_lyr[part]) {
943 			rv = EBUSY;
944 			goto done;
945 		}
946 		for (int i = 0; i < OTYP_LYR; i++) {
947 			if (bd->d_open_reg[i] & mask) {
948 				rv = EBUSY;
949 				goto done;
950 			}
951 		}
952 	}
953 
954 	if (otyp == OTYP_LYR) {
955 		bd->d_open_lyr[part]++;
956 	} else {
957 		bd->d_open_reg[otyp] |= mask;
958 	}
959 	if (flag & FEXCL) {
960 		bd->d_open_excl |= mask;
961 	}
962 
963 	rv = 0;
964 done:
965 	mutex_exit(&bd->d_ocmutex);
966 	rw_exit(&bd_lock);
967 
968 	return (rv);
969 }
970 
971 static int
972 bd_close(dev_t dev, int flag, int otyp, cred_t *credp)
973 {
974 	bd_t		*bd;
975 	minor_t		inst;
976 	minor_t		part;
977 	uint64_t	mask;
978 	boolean_t	last = B_TRUE;
979 
980 	_NOTE(ARGUNUSED(flag));
981 	_NOTE(ARGUNUSED(credp));
982 
983 	part = BDPART(dev);
984 	inst = BDINST(dev);
985 
986 	ASSERT(part < 64);
987 	mask = (1U << part);
988 
989 	rw_enter(&bd_lock, RW_READER);
990 
991 	if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
992 		rw_exit(&bd_lock);
993 		return (ENXIO);
994 	}
995 
996 	mutex_enter(&bd->d_ocmutex);
997 	if (bd->d_open_excl & mask) {
998 		bd->d_open_excl &= ~mask;
999 	}
1000 	if (otyp == OTYP_LYR) {
1001 		bd->d_open_lyr[part]--;
1002 	} else {
1003 		bd->d_open_reg[otyp] &= ~mask;
1004 	}
1005 	for (int i = 0; i < 64; i++) {
1006 		if (bd->d_open_lyr[part]) {
1007 			last = B_FALSE;
1008 		}
1009 	}
1010 	for (int i = 0; last && (i < OTYP_LYR); i++) {
1011 		if (bd->d_open_reg[i]) {
1012 			last = B_FALSE;
1013 		}
1014 	}
1015 	mutex_exit(&bd->d_ocmutex);
1016 
1017 	if (last) {
1018 		cmlb_invalidate(bd->d_cmlbh, 0);
1019 	}
1020 	rw_exit(&bd_lock);
1021 
1022 	return (0);
1023 }
1024 
1025 static int
1026 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk)
1027 {
1028 	minor_t		inst;
1029 	minor_t		part;
1030 	diskaddr_t	pstart;
1031 	diskaddr_t	psize;
1032 	bd_t		*bd;
1033 	bd_xfer_impl_t	*xi;
1034 	buf_t		*bp;
1035 	int		rv;
1036 
1037 	rw_enter(&bd_lock, RW_READER);
1038 
1039 	part = BDPART(dev);
1040 	inst = BDINST(dev);
1041 
1042 	if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1043 		rw_exit(&bd_lock);
1044 		return (ENXIO);
1045 	}
1046 	/*
1047 	 * do cmlb, but do it synchronously unless we already have the
1048 	 * partition (which we probably should.)
1049 	 */
1050 	if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL,
1051 	    (void *)1)) {
1052 		rw_exit(&bd_lock);
1053 		return (ENXIO);
1054 	}
1055 
1056 	if ((blkno + nblk) > psize) {
1057 		rw_exit(&bd_lock);
1058 		return (EINVAL);
1059 	}
1060 	bp = getrbuf(KM_NOSLEEP);
1061 	if (bp == NULL) {
1062 		rw_exit(&bd_lock);
1063 		return (ENOMEM);
1064 	}
1065 
1066 	bp->b_bcount = nblk << bd->d_blkshift;
1067 	bp->b_resid = bp->b_bcount;
1068 	bp->b_lblkno = blkno;
1069 	bp->b_un.b_addr = caddr;
1070 
1071 	xi = bd_xfer_alloc(bd, bp,  bd->d_ops.o_write, KM_NOSLEEP);
1072 	if (xi == NULL) {
1073 		rw_exit(&bd_lock);
1074 		freerbuf(bp);
1075 		return (ENOMEM);
1076 	}
1077 	xi->i_blkno = blkno + pstart;
1078 	xi->i_flags = BD_XFER_POLL;
1079 	bd_submit(bd, xi);
1080 	rw_exit(&bd_lock);
1081 
1082 	/*
1083 	 * Generally, we should have run this entirely synchronously
1084 	 * at this point and the biowait call should be a no-op.  If
1085 	 * it didn't happen this way, it's a bug in the underlying
1086 	 * driver not honoring BD_XFER_POLL.
1087 	 */
1088 	(void) biowait(bp);
1089 	rv = geterror(bp);
1090 	freerbuf(bp);
1091 	return (rv);
1092 }
1093 
1094 void
1095 bd_minphys(struct buf *bp)
1096 {
1097 	minor_t inst;
1098 	bd_t	*bd;
1099 	inst = BDINST(bp->b_edev);
1100 
1101 	bd = ddi_get_soft_state(bd_state, inst);
1102 
1103 	/*
1104 	 * In a non-debug kernel, bd_strategy will catch !bd as
1105 	 * well, and will fail nicely.
1106 	 */
1107 	ASSERT(bd);
1108 
1109 	if (bp->b_bcount > bd->d_maxxfer)
1110 		bp->b_bcount = bd->d_maxxfer;
1111 }
1112 
1113 static int
1114 bd_read(dev_t dev, struct uio *uio, cred_t *credp)
1115 {
1116 	_NOTE(ARGUNUSED(credp));
1117 	return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio));
1118 }
1119 
1120 static int
1121 bd_write(dev_t dev, struct uio *uio, cred_t *credp)
1122 {
1123 	_NOTE(ARGUNUSED(credp));
1124 	return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio));
1125 }
1126 
1127 static int
1128 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp)
1129 {
1130 	_NOTE(ARGUNUSED(credp));
1131 	return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio));
1132 }
1133 
1134 static int
1135 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp)
1136 {
1137 	_NOTE(ARGUNUSED(credp));
1138 	return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio));
1139 }
1140 
1141 static int
1142 bd_strategy(struct buf *bp)
1143 {
1144 	minor_t		inst;
1145 	minor_t		part;
1146 	bd_t		*bd;
1147 	diskaddr_t	p_lba;
1148 	diskaddr_t	p_nblks;
1149 	diskaddr_t	b_nblks;
1150 	bd_xfer_impl_t	*xi;
1151 	uint32_t	shift;
1152 	int		(*func)(void *, bd_xfer_t *);
1153 
1154 	part = BDPART(bp->b_edev);
1155 	inst = BDINST(bp->b_edev);
1156 
1157 	ASSERT(bp);
1158 
1159 	bp->b_resid = bp->b_bcount;
1160 
1161 	if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1162 		bioerror(bp, ENXIO);
1163 		biodone(bp);
1164 		return (0);
1165 	}
1166 
1167 	if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba,
1168 	    NULL, NULL, 0)) {
1169 		bioerror(bp, ENXIO);
1170 		biodone(bp);
1171 		return (0);
1172 	}
1173 
1174 	shift = bd->d_blkshift;
1175 
1176 	if ((P2PHASE(bp->b_bcount, (1U << shift)) != 0) ||
1177 	    (bp->b_lblkno > p_nblks)) {
1178 		bioerror(bp, ENXIO);
1179 		biodone(bp);
1180 		return (0);
1181 	}
1182 	b_nblks = bp->b_bcount >> shift;
1183 	if ((bp->b_lblkno == p_nblks) || (bp->b_bcount == 0)) {
1184 		biodone(bp);
1185 		return (0);
1186 	}
1187 
1188 	if ((b_nblks + bp->b_lblkno) > p_nblks) {
1189 		bp->b_resid = ((bp->b_lblkno + b_nblks - p_nblks) << shift);
1190 		bp->b_bcount -= bp->b_resid;
1191 	} else {
1192 		bp->b_resid = 0;
1193 	}
1194 	func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write;
1195 
1196 	xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP);
1197 	if (xi == NULL) {
1198 		xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE);
1199 	}
1200 	if (xi == NULL) {
1201 		/* bd_request_alloc will have done bioerror */
1202 		biodone(bp);
1203 		return (0);
1204 	}
1205 	xi->i_blkno = bp->b_lblkno + p_lba;
1206 
1207 	bd_submit(bd, xi);
1208 
1209 	return (0);
1210 }
1211 
1212 static int
1213 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp)
1214 {
1215 	minor_t		inst;
1216 	uint16_t	part;
1217 	bd_t		*bd;
1218 	void		*ptr = (void *)arg;
1219 	int		rv;
1220 
1221 	part = BDPART(dev);
1222 	inst = BDINST(dev);
1223 
1224 	if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1225 		return (ENXIO);
1226 	}
1227 
1228 	rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0);
1229 	if (rv != ENOTTY)
1230 		return (rv);
1231 
1232 	if (rvalp != NULL) {
1233 		/* the return value of the ioctl is 0 by default */
1234 		*rvalp = 0;
1235 	}
1236 
1237 	switch (cmd) {
1238 	case DKIOCGMEDIAINFO: {
1239 		struct dk_minfo minfo;
1240 
1241 		/* make sure our state information is current */
1242 		bd_update_state(bd);
1243 		bzero(&minfo, sizeof (minfo));
1244 		minfo.dki_media_type = DK_FIXED_DISK;
1245 		minfo.dki_lbsize = (1U << bd->d_blkshift);
1246 		minfo.dki_capacity = bd->d_numblks;
1247 		if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) {
1248 			return (EFAULT);
1249 		}
1250 		return (0);
1251 	}
1252 	case DKIOCGMEDIAINFOEXT: {
1253 		struct dk_minfo_ext miext;
1254 
1255 		/* make sure our state information is current */
1256 		bd_update_state(bd);
1257 		bzero(&miext, sizeof (miext));
1258 		miext.dki_media_type = DK_FIXED_DISK;
1259 		miext.dki_lbsize = (1U << bd->d_blkshift);
1260 		miext.dki_pbsize = (1U << bd->d_pblkshift);
1261 		miext.dki_capacity = bd->d_numblks;
1262 		if (ddi_copyout(&miext, ptr, sizeof (miext), flag)) {
1263 			return (EFAULT);
1264 		}
1265 		return (0);
1266 	}
1267 	case DKIOCINFO: {
1268 		struct dk_cinfo cinfo;
1269 		bzero(&cinfo, sizeof (cinfo));
1270 		cinfo.dki_ctype = DKC_BLKDEV;
1271 		cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip));
1272 		(void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname),
1273 		    "%s", ddi_driver_name(ddi_get_parent(bd->d_dip)));
1274 		(void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname),
1275 		    "%s", ddi_driver_name(bd->d_dip));
1276 		cinfo.dki_unit = inst;
1277 		cinfo.dki_flags = DKI_FMTVOL;
1278 		cinfo.dki_partition = part;
1279 		cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE;
1280 		cinfo.dki_addr = 0;
1281 		cinfo.dki_slave = 0;
1282 		cinfo.dki_space = 0;
1283 		cinfo.dki_prio = 0;
1284 		cinfo.dki_vec = 0;
1285 		if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) {
1286 			return (EFAULT);
1287 		}
1288 		return (0);
1289 	}
1290 	case DKIOCREMOVABLE: {
1291 		int i;
1292 		i = bd->d_removable ? 1 : 0;
1293 		if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1294 			return (EFAULT);
1295 		}
1296 		return (0);
1297 	}
1298 	case DKIOCHOTPLUGGABLE: {
1299 		int i;
1300 		i = bd->d_hotpluggable ? 1 : 0;
1301 		if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1302 			return (EFAULT);
1303 		}
1304 		return (0);
1305 	}
1306 	case DKIOCREADONLY: {
1307 		int i;
1308 		i = bd->d_rdonly ? 1 : 0;
1309 		if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1310 			return (EFAULT);
1311 		}
1312 		return (0);
1313 	}
1314 	case DKIOCSOLIDSTATE: {
1315 		int i;
1316 		i = bd->d_ssd ? 1 : 0;
1317 		if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1318 			return (EFAULT);
1319 		}
1320 		return (0);
1321 	}
1322 	case DKIOCSTATE: {
1323 		enum dkio_state	state;
1324 		if (ddi_copyin(ptr, &state, sizeof (state), flag)) {
1325 			return (EFAULT);
1326 		}
1327 		if ((rv = bd_check_state(bd, &state)) != 0) {
1328 			return (rv);
1329 		}
1330 		if (ddi_copyout(&state, ptr, sizeof (state), flag)) {
1331 			return (EFAULT);
1332 		}
1333 		return (0);
1334 	}
1335 	case DKIOCFLUSHWRITECACHE: {
1336 		struct dk_callback *dkc = NULL;
1337 
1338 		if (flag & FKIOCTL)
1339 			dkc = (void *)arg;
1340 
1341 		rv = bd_flush_write_cache(bd, dkc);
1342 		return (rv);
1343 	}
1344 
1345 	default:
1346 		break;
1347 
1348 	}
1349 	return (ENOTTY);
1350 }
1351 
1352 static int
1353 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
1354     char *name, caddr_t valuep, int *lengthp)
1355 {
1356 	bd_t	*bd;
1357 
1358 	bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1359 	if (bd == NULL)
1360 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
1361 		    name, valuep, lengthp));
1362 
1363 	return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name,
1364 	    valuep, lengthp, BDPART(dev), 0));
1365 }
1366 
1367 
1368 static int
1369 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start,
1370     size_t length, void *tg_cookie)
1371 {
1372 	bd_t		*bd;
1373 	buf_t		*bp;
1374 	bd_xfer_impl_t	*xi;
1375 	int		rv;
1376 	int		(*func)(void *, bd_xfer_t *);
1377 	int		kmflag;
1378 
1379 	/*
1380 	 * If we are running in polled mode (such as during dump(9e)
1381 	 * execution), then we cannot sleep for kernel allocations.
1382 	 */
1383 	kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP;
1384 
1385 	bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1386 
1387 	if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) {
1388 		/* We can only transfer whole blocks at a time! */
1389 		return (EINVAL);
1390 	}
1391 
1392 	if ((bp = getrbuf(kmflag)) == NULL) {
1393 		return (ENOMEM);
1394 	}
1395 
1396 	switch (cmd) {
1397 	case TG_READ:
1398 		bp->b_flags = B_READ;
1399 		func = bd->d_ops.o_read;
1400 		break;
1401 	case TG_WRITE:
1402 		bp->b_flags = B_WRITE;
1403 		func = bd->d_ops.o_write;
1404 		break;
1405 	default:
1406 		freerbuf(bp);
1407 		return (EINVAL);
1408 	}
1409 
1410 	bp->b_un.b_addr = bufaddr;
1411 	bp->b_bcount = length;
1412 	xi = bd_xfer_alloc(bd, bp, func, kmflag);
1413 	if (xi == NULL) {
1414 		rv = geterror(bp);
1415 		freerbuf(bp);
1416 		return (rv);
1417 	}
1418 	xi->i_flags = tg_cookie ? BD_XFER_POLL : 0;
1419 	xi->i_blkno = start;
1420 	bd_submit(bd, xi);
1421 	(void) biowait(bp);
1422 	rv = geterror(bp);
1423 	freerbuf(bp);
1424 
1425 	return (rv);
1426 }
1427 
1428 static int
1429 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
1430 {
1431 	bd_t		*bd;
1432 
1433 	_NOTE(ARGUNUSED(tg_cookie));
1434 	bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1435 
1436 	switch (cmd) {
1437 	case TG_GETPHYGEOM:
1438 	case TG_GETVIRTGEOM:
1439 		/*
1440 		 * We don't have any "geometry" as such, let cmlb
1441 		 * fabricate something.
1442 		 */
1443 		return (ENOTTY);
1444 
1445 	case TG_GETCAPACITY:
1446 		bd_update_state(bd);
1447 		*(diskaddr_t *)arg = bd->d_numblks;
1448 		return (0);
1449 
1450 	case TG_GETBLOCKSIZE:
1451 		*(uint32_t *)arg = (1U << bd->d_blkshift);
1452 		return (0);
1453 
1454 	case TG_GETATTR:
1455 		/*
1456 		 * It turns out that cmlb really doesn't do much for
1457 		 * non-writable media, but lets make the information
1458 		 * available for it in case it does more in the
1459 		 * future.  (The value is currently used for
1460 		 * triggering special behavior for CD-ROMs.)
1461 		 */
1462 		bd_update_state(bd);
1463 		((tg_attribute_t *)arg)->media_is_writable =
1464 		    bd->d_rdonly ? B_FALSE : B_TRUE;
1465 		((tg_attribute_t *)arg)->media_is_solid_state = bd->d_ssd;
1466 		return (0);
1467 
1468 	default:
1469 		return (EINVAL);
1470 	}
1471 }
1472 
1473 
1474 static void
1475 bd_sched(bd_t *bd)
1476 {
1477 	bd_xfer_impl_t	*xi;
1478 	struct buf	*bp;
1479 	int		rv;
1480 
1481 	mutex_enter(&bd->d_iomutex);
1482 
1483 	while ((bd->d_qactive < bd->d_qsize) &&
1484 	    ((xi = list_remove_head(&bd->d_waitq)) != NULL)) {
1485 		bd->d_qactive++;
1486 		kstat_waitq_to_runq(bd->d_kiop);
1487 		list_insert_tail(&bd->d_runq, xi);
1488 
1489 		/*
1490 		 * Submit the job to the driver.  We drop the I/O mutex
1491 		 * so that we can deal with the case where the driver
1492 		 * completion routine calls back into us synchronously.
1493 		 */
1494 
1495 		mutex_exit(&bd->d_iomutex);
1496 
1497 		rv = xi->i_func(bd->d_private, &xi->i_public);
1498 		if (rv != 0) {
1499 			bp = xi->i_bp;
1500 			bioerror(bp, rv);
1501 			biodone(bp);
1502 
1503 			atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1504 
1505 			mutex_enter(&bd->d_iomutex);
1506 			bd->d_qactive--;
1507 			kstat_runq_exit(bd->d_kiop);
1508 			list_remove(&bd->d_runq, xi);
1509 			bd_xfer_free(xi);
1510 		} else {
1511 			mutex_enter(&bd->d_iomutex);
1512 		}
1513 	}
1514 
1515 	mutex_exit(&bd->d_iomutex);
1516 }
1517 
1518 static void
1519 bd_submit(bd_t *bd, bd_xfer_impl_t *xi)
1520 {
1521 	mutex_enter(&bd->d_iomutex);
1522 	list_insert_tail(&bd->d_waitq, xi);
1523 	kstat_waitq_enter(bd->d_kiop);
1524 	mutex_exit(&bd->d_iomutex);
1525 
1526 	bd_sched(bd);
1527 }
1528 
1529 static void
1530 bd_runq_exit(bd_xfer_impl_t *xi, int err)
1531 {
1532 	bd_t	*bd = xi->i_bd;
1533 	buf_t	*bp = xi->i_bp;
1534 
1535 	mutex_enter(&bd->d_iomutex);
1536 	bd->d_qactive--;
1537 	kstat_runq_exit(bd->d_kiop);
1538 	list_remove(&bd->d_runq, xi);
1539 	mutex_exit(&bd->d_iomutex);
1540 
1541 	if (err == 0) {
1542 		if (bp->b_flags & B_READ) {
1543 			bd->d_kiop->reads++;
1544 			bd->d_kiop->nread += (bp->b_bcount - xi->i_resid);
1545 		} else {
1546 			bd->d_kiop->writes++;
1547 			bd->d_kiop->nwritten += (bp->b_bcount - xi->i_resid);
1548 		}
1549 	}
1550 	bd_sched(bd);
1551 }
1552 
1553 static void
1554 bd_update_state(bd_t *bd)
1555 {
1556 	enum	dkio_state	state = DKIO_INSERTED;
1557 	boolean_t		docmlb = B_FALSE;
1558 	bd_media_t		media;
1559 
1560 	bzero(&media, sizeof (media));
1561 
1562 	mutex_enter(&bd->d_statemutex);
1563 	if (bd->d_ops.o_media_info(bd->d_private, &media) != 0) {
1564 		bd->d_numblks = 0;
1565 		state = DKIO_EJECTED;
1566 		goto done;
1567 	}
1568 
1569 	if ((media.m_blksize < 512) ||
1570 	    (!ISP2(media.m_blksize)) ||
1571 	    (P2PHASE(bd->d_maxxfer, media.m_blksize))) {
1572 		cmn_err(CE_WARN, "%s%d: Invalid media block size (%d)",
1573 		    ddi_driver_name(bd->d_dip), ddi_get_instance(bd->d_dip),
1574 		    media.m_blksize);
1575 		/*
1576 		 * We can't use the media, treat it as not present.
1577 		 */
1578 		state = DKIO_EJECTED;
1579 		bd->d_numblks = 0;
1580 		goto done;
1581 	}
1582 
1583 	if (((1U << bd->d_blkshift) != media.m_blksize) ||
1584 	    (bd->d_numblks != media.m_nblks)) {
1585 		/* Device size changed */
1586 		docmlb = B_TRUE;
1587 	}
1588 
1589 	bd->d_blkshift = ddi_ffs(media.m_blksize) - 1;
1590 	bd->d_pblkshift = bd->d_blkshift;
1591 	bd->d_numblks = media.m_nblks;
1592 	bd->d_rdonly = media.m_readonly;
1593 	bd->d_ssd = media.m_solidstate;
1594 
1595 	/*
1596 	 * Only use the supplied physical block size if it is non-zero,
1597 	 * greater or equal to the block size, and a power of 2. Ignore it
1598 	 * if not, it's just informational and we can still use the media.
1599 	 */
1600 	if ((media.m_pblksize != 0) &&
1601 	    (media.m_pblksize >= media.m_blksize) &&
1602 	    (ISP2(media.m_pblksize)))
1603 		bd->d_pblkshift = ddi_ffs(media.m_pblksize) - 1;
1604 
1605 done:
1606 	if (state != bd->d_state) {
1607 		bd->d_state = state;
1608 		cv_broadcast(&bd->d_statecv);
1609 		docmlb = B_TRUE;
1610 	}
1611 	mutex_exit(&bd->d_statemutex);
1612 
1613 	bd->d_kerr->bd_capacity.value.ui64 = bd->d_numblks << bd->d_blkshift;
1614 
1615 	if (docmlb) {
1616 		if (state == DKIO_INSERTED) {
1617 			(void) cmlb_validate(bd->d_cmlbh, 0, 0);
1618 		} else {
1619 			cmlb_invalidate(bd->d_cmlbh, 0);
1620 		}
1621 	}
1622 }
1623 
1624 static int
1625 bd_check_state(bd_t *bd, enum dkio_state *state)
1626 {
1627 	clock_t		when;
1628 
1629 	for (;;) {
1630 
1631 		bd_update_state(bd);
1632 
1633 		mutex_enter(&bd->d_statemutex);
1634 
1635 		if (bd->d_state != *state) {
1636 			*state = bd->d_state;
1637 			mutex_exit(&bd->d_statemutex);
1638 			break;
1639 		}
1640 
1641 		when = drv_usectohz(1000000);
1642 		if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex,
1643 		    when, TR_CLOCK_TICK) == 0) {
1644 			mutex_exit(&bd->d_statemutex);
1645 			return (EINTR);
1646 		}
1647 
1648 		mutex_exit(&bd->d_statemutex);
1649 	}
1650 
1651 	return (0);
1652 }
1653 
1654 static int
1655 bd_flush_write_cache_done(struct buf *bp)
1656 {
1657 	struct dk_callback *dc = (void *)bp->b_private;
1658 
1659 	(*dc->dkc_callback)(dc->dkc_cookie, geterror(bp));
1660 	kmem_free(dc, sizeof (*dc));
1661 	freerbuf(bp);
1662 	return (0);
1663 }
1664 
1665 static int
1666 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc)
1667 {
1668 	buf_t			*bp;
1669 	struct dk_callback	*dc;
1670 	bd_xfer_impl_t		*xi;
1671 	int			rv;
1672 
1673 	if (bd->d_ops.o_sync_cache == NULL) {
1674 		return (ENOTSUP);
1675 	}
1676 	if ((bp = getrbuf(KM_SLEEP)) == NULL) {
1677 		return (ENOMEM);
1678 	}
1679 	bp->b_resid = 0;
1680 	bp->b_bcount = 0;
1681 
1682 	xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP);
1683 	if (xi == NULL) {
1684 		rv = geterror(bp);
1685 		freerbuf(bp);
1686 		return (rv);
1687 	}
1688 
1689 	/* Make an asynchronous flush, but only if there is a callback */
1690 	if (dkc != NULL && dkc->dkc_callback != NULL) {
1691 		/* Make a private copy of the callback structure */
1692 		dc = kmem_alloc(sizeof (*dc), KM_SLEEP);
1693 		*dc = *dkc;
1694 		bp->b_private = dc;
1695 		bp->b_iodone = bd_flush_write_cache_done;
1696 
1697 		bd_submit(bd, xi);
1698 		return (0);
1699 	}
1700 
1701 	/* In case there is no callback, perform a synchronous flush */
1702 	bd_submit(bd, xi);
1703 	(void) biowait(bp);
1704 	rv = geterror(bp);
1705 	freerbuf(bp);
1706 
1707 	return (rv);
1708 }
1709 
1710 /*
1711  * Nexus support.
1712  */
1713 int
1714 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
1715     void *arg, void *result)
1716 {
1717 	bd_handle_t	hdl;
1718 
1719 	switch (ctlop) {
1720 	case DDI_CTLOPS_REPORTDEV:
1721 		cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n",
1722 		    ddi_node_name(rdip), ddi_get_name_addr(rdip),
1723 		    ddi_driver_name(rdip), ddi_get_instance(rdip));
1724 		return (DDI_SUCCESS);
1725 
1726 	case DDI_CTLOPS_INITCHILD:
1727 		hdl = ddi_get_parent_data((dev_info_t *)arg);
1728 		if (hdl == NULL) {
1729 			return (DDI_NOT_WELL_FORMED);
1730 		}
1731 		ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr);
1732 		return (DDI_SUCCESS);
1733 
1734 	case DDI_CTLOPS_UNINITCHILD:
1735 		ddi_set_name_addr((dev_info_t *)arg, NULL);
1736 		ndi_prop_remove_all((dev_info_t *)arg);
1737 		return (DDI_SUCCESS);
1738 
1739 	default:
1740 		return (ddi_ctlops(dip, rdip, ctlop, arg, result));
1741 	}
1742 }
1743 
1744 /*
1745  * Functions for device drivers.
1746  */
1747 bd_handle_t
1748 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag)
1749 {
1750 	bd_handle_t	hdl;
1751 
1752 	hdl = kmem_zalloc(sizeof (*hdl), kmflag);
1753 	if (hdl != NULL) {
1754 		hdl->h_ops = *ops;
1755 		hdl->h_dma = dma;
1756 		hdl->h_private = private;
1757 	}
1758 
1759 	return (hdl);
1760 }
1761 
1762 void
1763 bd_free_handle(bd_handle_t hdl)
1764 {
1765 	kmem_free(hdl, sizeof (*hdl));
1766 }
1767 
1768 int
1769 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl)
1770 {
1771 	dev_info_t	*child;
1772 	bd_drive_t	drive = { 0 };
1773 
1774 	/* if drivers don't override this, make it assume none */
1775 	drive.d_lun = -1;
1776 	hdl->h_ops.o_drive_info(hdl->h_private, &drive);
1777 
1778 	hdl->h_parent = dip;
1779 	hdl->h_name = "blkdev";
1780 
1781 	if (drive.d_lun >= 0) {
1782 		(void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), "%X,%X",
1783 		    drive.d_target, drive.d_lun);
1784 	} else {
1785 		(void) snprintf(hdl->h_addr, sizeof (hdl->h_addr), "%X",
1786 		    drive.d_target);
1787 	}
1788 	if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID,
1789 	    &child) != NDI_SUCCESS) {
1790 		cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s",
1791 		    ddi_driver_name(dip), ddi_get_instance(dip),
1792 		    "blkdev", hdl->h_addr);
1793 		return (DDI_FAILURE);
1794 	}
1795 
1796 	ddi_set_parent_data(child, hdl);
1797 	hdl->h_child = child;
1798 
1799 	if (ndi_devi_online(child, 0) == NDI_FAILURE) {
1800 		cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online",
1801 		    ddi_driver_name(dip), ddi_get_instance(dip),
1802 		    hdl->h_name, hdl->h_addr);
1803 		(void) ndi_devi_free(child);
1804 		return (DDI_FAILURE);
1805 	}
1806 
1807 	return (DDI_SUCCESS);
1808 }
1809 
1810 int
1811 bd_detach_handle(bd_handle_t hdl)
1812 {
1813 	int	circ;
1814 	int	rv;
1815 	char	*devnm;
1816 
1817 	if (hdl->h_child == NULL) {
1818 		return (DDI_SUCCESS);
1819 	}
1820 	ndi_devi_enter(hdl->h_parent, &circ);
1821 	if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) {
1822 		rv = ddi_remove_child(hdl->h_child, 0);
1823 	} else {
1824 		devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
1825 		(void) ddi_deviname(hdl->h_child, devnm);
1826 		(void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE);
1827 		rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL,
1828 		    NDI_DEVI_REMOVE | NDI_UNCONFIG);
1829 		kmem_free(devnm, MAXNAMELEN + 1);
1830 	}
1831 	if (rv == 0) {
1832 		hdl->h_child = NULL;
1833 	}
1834 
1835 	ndi_devi_exit(hdl->h_parent, circ);
1836 	return (rv == NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
1837 }
1838 
1839 void
1840 bd_xfer_done(bd_xfer_t *xfer, int err)
1841 {
1842 	bd_xfer_impl_t	*xi = (void *)xfer;
1843 	buf_t		*bp = xi->i_bp;
1844 	int		rv = DDI_SUCCESS;
1845 	bd_t		*bd = xi->i_bd;
1846 	size_t		len;
1847 
1848 	if (err != 0) {
1849 		bd_runq_exit(xi, err);
1850 		atomic_inc_32(&bd->d_kerr->bd_harderrs.value.ui32);
1851 
1852 		bp->b_resid += xi->i_resid;
1853 		bd_xfer_free(xi);
1854 		bioerror(bp, err);
1855 		biodone(bp);
1856 		return;
1857 	}
1858 
1859 	xi->i_cur_win++;
1860 	xi->i_resid -= xi->i_len;
1861 
1862 	if (xi->i_resid == 0) {
1863 		/* Job completed succcessfully! */
1864 		bd_runq_exit(xi, 0);
1865 
1866 		bd_xfer_free(xi);
1867 		biodone(bp);
1868 		return;
1869 	}
1870 
1871 	xi->i_blkno += xi->i_nblks;
1872 
1873 	if (bd->d_use_dma) {
1874 		/* More transfer still pending... advance to next DMA window. */
1875 		rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win,
1876 		    &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac);
1877 	} else {
1878 		/* Advance memory window. */
1879 		xi->i_kaddr += xi->i_len;
1880 		xi->i_offset += xi->i_len;
1881 		len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer);
1882 	}
1883 
1884 
1885 	if ((rv != DDI_SUCCESS) ||
1886 	    (P2PHASE(len, (1U << xi->i_blkshift) != 0))) {
1887 		bd_runq_exit(xi, EFAULT);
1888 
1889 		bp->b_resid += xi->i_resid;
1890 		bd_xfer_free(xi);
1891 		bioerror(bp, EFAULT);
1892 		biodone(bp);
1893 		return;
1894 	}
1895 	xi->i_len = len;
1896 	xi->i_nblks = len >> xi->i_blkshift;
1897 
1898 	/* Submit next window to hardware. */
1899 	rv = xi->i_func(bd->d_private, &xi->i_public);
1900 	if (rv != 0) {
1901 		bd_runq_exit(xi, rv);
1902 
1903 		atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1904 
1905 		bp->b_resid += xi->i_resid;
1906 		bd_xfer_free(xi);
1907 		bioerror(bp, rv);
1908 		biodone(bp);
1909 	}
1910 }
1911 
1912 void
1913 bd_error(bd_xfer_t *xfer, int error)
1914 {
1915 	bd_xfer_impl_t	*xi = (void *)xfer;
1916 	bd_t		*bd = xi->i_bd;
1917 
1918 	switch (error) {
1919 	case BD_ERR_MEDIA:
1920 		atomic_inc_32(&bd->d_kerr->bd_rq_media_err.value.ui32);
1921 		break;
1922 	case BD_ERR_NTRDY:
1923 		atomic_inc_32(&bd->d_kerr->bd_rq_ntrdy_err.value.ui32);
1924 		break;
1925 	case BD_ERR_NODEV:
1926 		atomic_inc_32(&bd->d_kerr->bd_rq_nodev_err.value.ui32);
1927 		break;
1928 	case BD_ERR_RECOV:
1929 		atomic_inc_32(&bd->d_kerr->bd_rq_recov_err.value.ui32);
1930 		break;
1931 	case BD_ERR_ILLRQ:
1932 		atomic_inc_32(&bd->d_kerr->bd_rq_illrq_err.value.ui32);
1933 		break;
1934 	case BD_ERR_PFA:
1935 		atomic_inc_32(&bd->d_kerr->bd_rq_pfa_err.value.ui32);
1936 		break;
1937 	default:
1938 		cmn_err(CE_PANIC, "bd_error: unknown error type %d", error);
1939 		break;
1940 	}
1941 }
1942 
1943 void
1944 bd_state_change(bd_handle_t hdl)
1945 {
1946 	bd_t		*bd;
1947 
1948 	if ((bd = hdl->h_bd) != NULL) {
1949 		bd_update_state(bd);
1950 	}
1951 }
1952 
1953 void
1954 bd_mod_init(struct dev_ops *devops)
1955 {
1956 	static struct bus_ops bd_bus_ops = {
1957 		BUSO_REV,		/* busops_rev */
1958 		nullbusmap,		/* bus_map */
1959 		NULL,			/* bus_get_intrspec (OBSOLETE) */
1960 		NULL,			/* bus_add_intrspec (OBSOLETE) */
1961 		NULL,			/* bus_remove_intrspec (OBSOLETE) */
1962 		i_ddi_map_fault,	/* bus_map_fault */
1963 		NULL,			/* bus_dma_map (OBSOLETE) */
1964 		ddi_dma_allochdl,	/* bus_dma_allochdl */
1965 		ddi_dma_freehdl,	/* bus_dma_freehdl */
1966 		ddi_dma_bindhdl,	/* bus_dma_bindhdl */
1967 		ddi_dma_unbindhdl,	/* bus_dma_unbindhdl */
1968 		ddi_dma_flush,		/* bus_dma_flush */
1969 		ddi_dma_win,		/* bus_dma_win */
1970 		ddi_dma_mctl,		/* bus_dma_ctl */
1971 		bd_bus_ctl,		/* bus_ctl */
1972 		ddi_bus_prop_op,	/* bus_prop_op */
1973 		NULL,			/* bus_get_eventcookie */
1974 		NULL,			/* bus_add_eventcall */
1975 		NULL,			/* bus_remove_eventcall */
1976 		NULL,			/* bus_post_event */
1977 		NULL,			/* bus_intr_ctl (OBSOLETE) */
1978 		NULL,			/* bus_config */
1979 		NULL,			/* bus_unconfig */
1980 		NULL,			/* bus_fm_init */
1981 		NULL,			/* bus_fm_fini */
1982 		NULL,			/* bus_fm_access_enter */
1983 		NULL,			/* bus_fm_access_exit */
1984 		NULL,			/* bus_power */
1985 		NULL,			/* bus_intr_op */
1986 	};
1987 
1988 	devops->devo_bus_ops = &bd_bus_ops;
1989 
1990 	/*
1991 	 * NB: The device driver is free to supply its own
1992 	 * character entry device support.
1993 	 */
1994 }
1995 
1996 void
1997 bd_mod_fini(struct dev_ops *devops)
1998 {
1999 	devops->devo_bus_ops = NULL;
2000 }
2001