xref: /illumos-gate/usr/src/uts/common/io/blkdev/blkdev.c (revision 04904ca2a4492f1b3e2ec393f82d81a9a1c9611e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
24  * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved.
25  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
26  */
27 
28 #include <sys/types.h>
29 #include <sys/ksynch.h>
30 #include <sys/kmem.h>
31 #include <sys/file.h>
32 #include <sys/errno.h>
33 #include <sys/open.h>
34 #include <sys/buf.h>
35 #include <sys/uio.h>
36 #include <sys/aio_req.h>
37 #include <sys/cred.h>
38 #include <sys/modctl.h>
39 #include <sys/cmlb.h>
40 #include <sys/conf.h>
41 #include <sys/devops.h>
42 #include <sys/list.h>
43 #include <sys/sysmacros.h>
44 #include <sys/dkio.h>
45 #include <sys/vtoc.h>
46 #include <sys/scsi/scsi.h>	/* for DTYPE_DIRECT */
47 #include <sys/kstat.h>
48 #include <sys/fs/dv_node.h>
49 #include <sys/ddi.h>
50 #include <sys/sunddi.h>
51 #include <sys/note.h>
52 #include <sys/blkdev.h>
53 #include <sys/scsi/impl/inquiry.h>
54 
55 #define	BD_MAXPART	64
56 #define	BDINST(dev)	(getminor(dev) / BD_MAXPART)
57 #define	BDPART(dev)	(getminor(dev) % BD_MAXPART)
58 
59 typedef struct bd bd_t;
60 typedef struct bd_xfer_impl bd_xfer_impl_t;
61 
62 struct bd {
63 	void		*d_private;
64 	dev_info_t	*d_dip;
65 	kmutex_t	d_ocmutex;
66 	kmutex_t	d_iomutex;
67 	kmutex_t	*d_errmutex;
68 	kmutex_t	d_statemutex;
69 	kcondvar_t	d_statecv;
70 	enum dkio_state	d_state;
71 	cmlb_handle_t	d_cmlbh;
72 	unsigned	d_open_lyr[BD_MAXPART];	/* open count */
73 	uint64_t	d_open_excl;	/* bit mask indexed by partition */
74 	uint64_t	d_open_reg[OTYPCNT];		/* bit mask */
75 
76 	uint32_t	d_qsize;
77 	uint32_t	d_qactive;
78 	uint32_t	d_maxxfer;
79 	uint32_t	d_blkshift;
80 	uint32_t	d_pblkshift;
81 	uint64_t	d_numblks;
82 	ddi_devid_t	d_devid;
83 
84 	kmem_cache_t	*d_cache;
85 	list_t		d_runq;
86 	list_t		d_waitq;
87 	kstat_t		*d_ksp;
88 	kstat_io_t	*d_kiop;
89 	kstat_t		*d_errstats;
90 	struct bd_errstats *d_kerr;
91 
92 	boolean_t	d_rdonly;
93 	boolean_t	d_ssd;
94 	boolean_t	d_removable;
95 	boolean_t	d_hotpluggable;
96 	boolean_t	d_use_dma;
97 
98 	ddi_dma_attr_t	d_dma;
99 	bd_ops_t	d_ops;
100 	bd_handle_t	d_handle;
101 };
102 
103 struct bd_handle {
104 	bd_ops_t	h_ops;
105 	ddi_dma_attr_t	*h_dma;
106 	dev_info_t	*h_parent;
107 	dev_info_t	*h_child;
108 	void		*h_private;
109 	bd_t		*h_bd;
110 	char		*h_name;
111 	char		h_addr[30];	/* enough for w%0.16x,%X */
112 };
113 
114 struct bd_xfer_impl {
115 	bd_xfer_t	i_public;
116 	list_node_t	i_linkage;
117 	bd_t		*i_bd;
118 	buf_t		*i_bp;
119 	uint_t		i_num_win;
120 	uint_t		i_cur_win;
121 	off_t		i_offset;
122 	int		(*i_func)(void *, bd_xfer_t *);
123 	uint32_t	i_blkshift;
124 	size_t		i_len;
125 	size_t		i_resid;
126 };
127 
128 #define	i_dmah		i_public.x_dmah
129 #define	i_dmac		i_public.x_dmac
130 #define	i_ndmac		i_public.x_ndmac
131 #define	i_kaddr		i_public.x_kaddr
132 #define	i_nblks		i_public.x_nblks
133 #define	i_blkno		i_public.x_blkno
134 #define	i_flags		i_public.x_flags
135 
136 
137 /*
138  * Private prototypes.
139  */
140 
141 static void bd_prop_update_inqstring(dev_info_t *, char *, char *, size_t);
142 static void bd_create_inquiry_props(dev_info_t *, bd_drive_t *);
143 static void bd_create_errstats(bd_t *, int, bd_drive_t *);
144 static void bd_errstats_setstr(kstat_named_t *, char *, size_t, char *);
145 static void bd_init_errstats(bd_t *, bd_drive_t *);
146 
147 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
148 static int bd_attach(dev_info_t *, ddi_attach_cmd_t);
149 static int bd_detach(dev_info_t *, ddi_detach_cmd_t);
150 
151 static int bd_open(dev_t *, int, int, cred_t *);
152 static int bd_close(dev_t, int, int, cred_t *);
153 static int bd_strategy(struct buf *);
154 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
155 static int bd_dump(dev_t, caddr_t, daddr_t, int);
156 static int bd_read(dev_t, struct uio *, cred_t *);
157 static int bd_write(dev_t, struct uio *, cred_t *);
158 static int bd_aread(dev_t, struct aio_req *, cred_t *);
159 static int bd_awrite(dev_t, struct aio_req *, cred_t *);
160 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *,
161     caddr_t, int *);
162 
163 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t,
164     void *);
165 static int bd_tg_getinfo(dev_info_t *, int, void *, void *);
166 static int bd_xfer_ctor(void *, void *, int);
167 static void bd_xfer_dtor(void *, void *);
168 static void bd_sched(bd_t *);
169 static void bd_submit(bd_t *, bd_xfer_impl_t *);
170 static void bd_runq_exit(bd_xfer_impl_t *, int);
171 static void bd_update_state(bd_t *);
172 static int bd_check_state(bd_t *, enum dkio_state *);
173 static int bd_flush_write_cache(bd_t *, struct dk_callback *);
174 
175 struct cmlb_tg_ops bd_tg_ops = {
176 	TG_DK_OPS_VERSION_1,
177 	bd_tg_rdwr,
178 	bd_tg_getinfo,
179 };
180 
181 static struct cb_ops bd_cb_ops = {
182 	bd_open, 		/* open */
183 	bd_close, 		/* close */
184 	bd_strategy, 		/* strategy */
185 	nodev, 			/* print */
186 	bd_dump,		/* dump */
187 	bd_read, 		/* read */
188 	bd_write, 		/* write */
189 	bd_ioctl, 		/* ioctl */
190 	nodev, 			/* devmap */
191 	nodev, 			/* mmap */
192 	nodev, 			/* segmap */
193 	nochpoll, 		/* poll */
194 	bd_prop_op, 		/* cb_prop_op */
195 	0, 			/* streamtab  */
196 	D_64BIT | D_MP,		/* Driver comaptibility flag */
197 	CB_REV,			/* cb_rev */
198 	bd_aread,		/* async read */
199 	bd_awrite		/* async write */
200 };
201 
202 struct dev_ops bd_dev_ops = {
203 	DEVO_REV, 		/* devo_rev, */
204 	0, 			/* refcnt  */
205 	bd_getinfo,		/* getinfo */
206 	nulldev, 		/* identify */
207 	nulldev, 		/* probe */
208 	bd_attach, 		/* attach */
209 	bd_detach,		/* detach */
210 	nodev, 			/* reset */
211 	&bd_cb_ops, 		/* driver operations */
212 	NULL,			/* bus operations */
213 	NULL,			/* power */
214 	ddi_quiesce_not_needed,	/* quiesce */
215 };
216 
217 static struct modldrv modldrv = {
218 	&mod_driverops,
219 	"Generic Block Device",
220 	&bd_dev_ops,
221 };
222 
223 static struct modlinkage modlinkage = {
224 	MODREV_1, { &modldrv, NULL }
225 };
226 
227 static void *bd_state;
228 static krwlock_t bd_lock;
229 
230 int
231 _init(void)
232 {
233 	int	rv;
234 
235 	rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2);
236 	if (rv != DDI_SUCCESS) {
237 		return (rv);
238 	}
239 	rw_init(&bd_lock, NULL, RW_DRIVER, NULL);
240 	rv = mod_install(&modlinkage);
241 	if (rv != DDI_SUCCESS) {
242 		rw_destroy(&bd_lock);
243 		ddi_soft_state_fini(&bd_state);
244 	}
245 	return (rv);
246 }
247 
248 int
249 _fini(void)
250 {
251 	int	rv;
252 
253 	rv = mod_remove(&modlinkage);
254 	if (rv == DDI_SUCCESS) {
255 		rw_destroy(&bd_lock);
256 		ddi_soft_state_fini(&bd_state);
257 	}
258 	return (rv);
259 }
260 
261 int
262 _info(struct modinfo *modinfop)
263 {
264 	return (mod_info(&modlinkage, modinfop));
265 }
266 
267 static int
268 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp)
269 {
270 	bd_t	*bd;
271 	minor_t	inst;
272 
273 	_NOTE(ARGUNUSED(dip));
274 
275 	inst = BDINST((dev_t)arg);
276 
277 	switch (cmd) {
278 	case DDI_INFO_DEVT2DEVINFO:
279 		bd = ddi_get_soft_state(bd_state, inst);
280 		if (bd == NULL) {
281 			return (DDI_FAILURE);
282 		}
283 		*resultp = (void *)bd->d_dip;
284 		break;
285 
286 	case DDI_INFO_DEVT2INSTANCE:
287 		*resultp = (void *)(intptr_t)inst;
288 		break;
289 
290 	default:
291 		return (DDI_FAILURE);
292 	}
293 	return (DDI_SUCCESS);
294 }
295 
296 static void
297 bd_prop_update_inqstring(dev_info_t *dip, char *name, char *data, size_t len)
298 {
299 	int	ilen;
300 	char	*data_string;
301 
302 	ilen = scsi_ascii_inquiry_len(data, len);
303 	ASSERT3U(ilen, <=, len);
304 	if (ilen <= 0)
305 		return;
306 	/* ensure null termination */
307 	data_string = kmem_zalloc(ilen + 1, KM_SLEEP);
308 	bcopy(data, data_string, ilen);
309 	(void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, name, data_string);
310 	kmem_free(data_string, ilen + 1);
311 }
312 
313 static void
314 bd_create_inquiry_props(dev_info_t *dip, bd_drive_t *drive)
315 {
316 	if (drive->d_vendor_len > 0)
317 		bd_prop_update_inqstring(dip, INQUIRY_VENDOR_ID,
318 		    drive->d_vendor, drive->d_vendor_len);
319 
320 	if (drive->d_product_len > 0)
321 		bd_prop_update_inqstring(dip, INQUIRY_PRODUCT_ID,
322 		    drive->d_product, drive->d_product_len);
323 
324 	if (drive->d_serial_len > 0)
325 		bd_prop_update_inqstring(dip, INQUIRY_SERIAL_NO,
326 		    drive->d_serial, drive->d_serial_len);
327 
328 	if (drive->d_revision_len > 0)
329 		bd_prop_update_inqstring(dip, INQUIRY_REVISION_ID,
330 		    drive->d_revision, drive->d_revision_len);
331 }
332 
333 static void
334 bd_create_errstats(bd_t *bd, int inst, bd_drive_t *drive)
335 {
336 	char	ks_module[KSTAT_STRLEN];
337 	char	ks_name[KSTAT_STRLEN];
338 	int	ndata = sizeof (struct bd_errstats) / sizeof (kstat_named_t);
339 
340 	if (bd->d_errstats != NULL)
341 		return;
342 
343 	(void) snprintf(ks_module, sizeof (ks_module), "%serr",
344 	    ddi_driver_name(bd->d_dip));
345 	(void) snprintf(ks_name, sizeof (ks_name), "%s%d,err",
346 	    ddi_driver_name(bd->d_dip), inst);
347 
348 	bd->d_errstats = kstat_create(ks_module, inst, ks_name, "device_error",
349 	    KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
350 
351 	if (bd->d_errstats == NULL) {
352 		/*
353 		 * Even if we cannot create the kstat, we create a
354 		 * scratch kstat.  The reason for this is to ensure
355 		 * that we can update the kstat all of the time,
356 		 * without adding an extra branch instruction.
357 		 */
358 		bd->d_kerr = kmem_zalloc(sizeof (struct bd_errstats),
359 		    KM_SLEEP);
360 		bd->d_errmutex = kmem_zalloc(sizeof (kmutex_t), KM_SLEEP);
361 		mutex_init(bd->d_errmutex, NULL, MUTEX_DRIVER, NULL);
362 	} else {
363 		if (bd->d_errstats->ks_lock == NULL) {
364 			bd->d_errstats->ks_lock = kmem_zalloc(sizeof (kmutex_t),
365 			    KM_SLEEP);
366 			mutex_init(bd->d_errstats->ks_lock, NULL, MUTEX_DRIVER,
367 			    NULL);
368 		}
369 
370 		bd->d_errmutex = bd->d_errstats->ks_lock;
371 		bd->d_kerr = (struct bd_errstats *)bd->d_errstats->ks_data;
372 	}
373 
374 	kstat_named_init(&bd->d_kerr->bd_softerrs,	"Soft Errors",
375 	    KSTAT_DATA_UINT32);
376 	kstat_named_init(&bd->d_kerr->bd_harderrs,	"Hard Errors",
377 	    KSTAT_DATA_UINT32);
378 	kstat_named_init(&bd->d_kerr->bd_transerrs,	"Transport Errors",
379 	    KSTAT_DATA_UINT32);
380 
381 	if (drive->d_model_len > 0) {
382 		kstat_named_init(&bd->d_kerr->bd_model,	"Model",
383 		    KSTAT_DATA_STRING);
384 	} else {
385 		kstat_named_init(&bd->d_kerr->bd_vid,	"Vendor",
386 		    KSTAT_DATA_STRING);
387 		kstat_named_init(&bd->d_kerr->bd_pid,	"Product",
388 		    KSTAT_DATA_STRING);
389 	}
390 
391 	kstat_named_init(&bd->d_kerr->bd_revision,	"Revision",
392 	    KSTAT_DATA_STRING);
393 	kstat_named_init(&bd->d_kerr->bd_serial,	"Serial No",
394 	    KSTAT_DATA_STRING);
395 	kstat_named_init(&bd->d_kerr->bd_capacity,	"Size",
396 	    KSTAT_DATA_ULONGLONG);
397 	kstat_named_init(&bd->d_kerr->bd_rq_media_err,	"Media Error",
398 	    KSTAT_DATA_UINT32);
399 	kstat_named_init(&bd->d_kerr->bd_rq_ntrdy_err,	"Device Not Ready",
400 	    KSTAT_DATA_UINT32);
401 	kstat_named_init(&bd->d_kerr->bd_rq_nodev_err,	"No Device",
402 	    KSTAT_DATA_UINT32);
403 	kstat_named_init(&bd->d_kerr->bd_rq_recov_err,	"Recoverable",
404 	    KSTAT_DATA_UINT32);
405 	kstat_named_init(&bd->d_kerr->bd_rq_illrq_err,	"Illegal Request",
406 	    KSTAT_DATA_UINT32);
407 	kstat_named_init(&bd->d_kerr->bd_rq_pfa_err,
408 	    "Predictive Failure Analysis", KSTAT_DATA_UINT32);
409 
410 	bd->d_errstats->ks_private = bd;
411 
412 	kstat_install(bd->d_errstats);
413 }
414 
415 static void
416 bd_errstats_setstr(kstat_named_t *k, char *str, size_t len, char *alt)
417 {
418 	char	*tmp;
419 
420 	if (KSTAT_NAMED_STR_PTR(k) == NULL) {
421 		if (len > 0) {
422 			tmp = kmem_alloc(len + 1, KM_SLEEP);
423 			(void) strlcpy(tmp, str, len + 1);
424 		} else {
425 			tmp = alt;
426 		}
427 
428 		kstat_named_setstr(k, tmp);
429 	}
430 }
431 
432 static void
433 bd_init_errstats(bd_t *bd, bd_drive_t *drive)
434 {
435 	struct bd_errstats	*est = bd->d_kerr;
436 
437 	mutex_enter(bd->d_errmutex);
438 
439 	if (drive->d_model_len > 0 &&
440 	    KSTAT_NAMED_STR_PTR(&est->bd_model) == NULL) {
441 		bd_errstats_setstr(&est->bd_model, drive->d_model,
442 		    drive->d_model_len, NULL);
443 	} else {
444 		bd_errstats_setstr(&est->bd_vid, drive->d_vendor,
445 		    drive->d_vendor_len, "Unknown ");
446 		bd_errstats_setstr(&est->bd_pid, drive->d_product,
447 		    drive->d_product_len, "Unknown         ");
448 	}
449 
450 	bd_errstats_setstr(&est->bd_revision, drive->d_revision,
451 	    drive->d_revision_len, "0001");
452 	bd_errstats_setstr(&est->bd_serial, drive->d_serial,
453 	    drive->d_serial_len, "0               ");
454 
455 	mutex_exit(bd->d_errmutex);
456 }
457 
458 static int
459 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
460 {
461 	int		inst;
462 	bd_handle_t	hdl;
463 	bd_t		*bd;
464 	bd_drive_t	drive;
465 	int		rv;
466 	char		name[16];
467 	char		kcache[32];
468 
469 	switch (cmd) {
470 	case DDI_ATTACH:
471 		break;
472 	case DDI_RESUME:
473 		/* We don't do anything native for suspend/resume */
474 		return (DDI_SUCCESS);
475 	default:
476 		return (DDI_FAILURE);
477 	}
478 
479 	inst = ddi_get_instance(dip);
480 	hdl = ddi_get_parent_data(dip);
481 
482 	(void) snprintf(name, sizeof (name), "%s%d",
483 	    ddi_driver_name(dip), ddi_get_instance(dip));
484 	(void) snprintf(kcache, sizeof (kcache), "%s_xfer", name);
485 
486 	if (hdl == NULL) {
487 		cmn_err(CE_WARN, "%s: missing parent data!", name);
488 		return (DDI_FAILURE);
489 	}
490 
491 	if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) {
492 		cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name);
493 		return (DDI_FAILURE);
494 	}
495 	bd = ddi_get_soft_state(bd_state, inst);
496 
497 	if (hdl->h_dma) {
498 		bd->d_dma = *(hdl->h_dma);
499 		bd->d_dma.dma_attr_granular =
500 		    max(DEV_BSIZE, bd->d_dma.dma_attr_granular);
501 		bd->d_use_dma = B_TRUE;
502 
503 		if (bd->d_maxxfer &&
504 		    (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) {
505 			cmn_err(CE_WARN,
506 			    "%s: inconsistent maximum transfer size!",
507 			    name);
508 			/* We force it */
509 			bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer;
510 		} else {
511 			bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer;
512 		}
513 	} else {
514 		bd->d_use_dma = B_FALSE;
515 		if (bd->d_maxxfer == 0) {
516 			bd->d_maxxfer = 1024 * 1024;
517 		}
518 	}
519 	bd->d_ops = hdl->h_ops;
520 	bd->d_private = hdl->h_private;
521 	bd->d_blkshift = 9;	/* 512 bytes, to start */
522 
523 	if (bd->d_maxxfer % DEV_BSIZE) {
524 		cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name);
525 		bd->d_maxxfer &= ~(DEV_BSIZE - 1);
526 	}
527 	if (bd->d_maxxfer < DEV_BSIZE) {
528 		cmn_err(CE_WARN, "%s: maximum transfer size too small!", name);
529 		ddi_soft_state_free(bd_state, inst);
530 		return (DDI_FAILURE);
531 	}
532 
533 	bd->d_dip = dip;
534 	bd->d_handle = hdl;
535 	hdl->h_bd = bd;
536 	ddi_set_driver_private(dip, bd);
537 
538 	mutex_init(&bd->d_iomutex, NULL, MUTEX_DRIVER, NULL);
539 	mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL);
540 	mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL);
541 	cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL);
542 
543 	list_create(&bd->d_waitq, sizeof (bd_xfer_impl_t),
544 	    offsetof(struct bd_xfer_impl, i_linkage));
545 	list_create(&bd->d_runq, sizeof (bd_xfer_impl_t),
546 	    offsetof(struct bd_xfer_impl, i_linkage));
547 
548 	bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8,
549 	    bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0);
550 
551 	bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk",
552 	    KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
553 	if (bd->d_ksp != NULL) {
554 		bd->d_ksp->ks_lock = &bd->d_iomutex;
555 		kstat_install(bd->d_ksp);
556 		bd->d_kiop = bd->d_ksp->ks_data;
557 	} else {
558 		/*
559 		 * Even if we cannot create the kstat, we create a
560 		 * scratch kstat.  The reason for this is to ensure
561 		 * that we can update the kstat all of the time,
562 		 * without adding an extra branch instruction.
563 		 */
564 		bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP);
565 	}
566 
567 	cmlb_alloc_handle(&bd->d_cmlbh);
568 
569 	bd->d_state = DKIO_NONE;
570 
571 	bzero(&drive, sizeof (drive));
572 	bd->d_ops.o_drive_info(bd->d_private, &drive);
573 	bd->d_qsize = drive.d_qsize;
574 	bd->d_removable = drive.d_removable;
575 	bd->d_hotpluggable = drive.d_hotpluggable;
576 
577 	if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer)
578 		bd->d_maxxfer = drive.d_maxxfer;
579 
580 	bd_create_inquiry_props(dip, &drive);
581 
582 	bd_create_errstats(bd, inst, &drive);
583 	bd_init_errstats(bd, &drive);
584 	bd_update_state(bd);
585 
586 	rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT,
587 	    bd->d_removable, bd->d_hotpluggable,
588 	    /*LINTED: E_BAD_PTR_CAST_ALIGN*/
589 	    *(uint64_t *)drive.d_eui64 != 0 ? DDI_NT_BLOCK_BLKDEV :
590 	    drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK,
591 	    CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0);
592 	if (rv != 0) {
593 		cmlb_free_handle(&bd->d_cmlbh);
594 		kmem_cache_destroy(bd->d_cache);
595 		mutex_destroy(&bd->d_iomutex);
596 		mutex_destroy(&bd->d_ocmutex);
597 		mutex_destroy(&bd->d_statemutex);
598 		cv_destroy(&bd->d_statecv);
599 		list_destroy(&bd->d_waitq);
600 		list_destroy(&bd->d_runq);
601 		if (bd->d_ksp != NULL) {
602 			kstat_delete(bd->d_ksp);
603 			bd->d_ksp = NULL;
604 		} else {
605 			kmem_free(bd->d_kiop, sizeof (kstat_io_t));
606 		}
607 		ddi_soft_state_free(bd_state, inst);
608 		return (DDI_FAILURE);
609 	}
610 
611 	if (bd->d_ops.o_devid_init != NULL) {
612 		rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid);
613 		if (rv == DDI_SUCCESS) {
614 			if (ddi_devid_register(dip, bd->d_devid) !=
615 			    DDI_SUCCESS) {
616 				cmn_err(CE_WARN,
617 				    "%s: unable to register devid", name);
618 			}
619 		}
620 	}
621 
622 	/*
623 	 * Add a zero-length attribute to tell the world we support
624 	 * kernel ioctls (for layered drivers).  Also set up properties
625 	 * used by HAL to identify removable media.
626 	 */
627 	(void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
628 	    DDI_KERNEL_IOCTL, NULL, 0);
629 	if (bd->d_removable) {
630 		(void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
631 		    "removable-media", NULL, 0);
632 	}
633 	if (bd->d_hotpluggable) {
634 		(void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
635 		    "hotpluggable", NULL, 0);
636 	}
637 
638 	ddi_report_dev(dip);
639 
640 	return (DDI_SUCCESS);
641 }
642 
643 static int
644 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
645 {
646 	bd_t	*bd;
647 
648 	bd = ddi_get_driver_private(dip);
649 
650 	switch (cmd) {
651 	case DDI_DETACH:
652 		break;
653 	case DDI_SUSPEND:
654 		/* We don't suspend, but our parent does */
655 		return (DDI_SUCCESS);
656 	default:
657 		return (DDI_FAILURE);
658 	}
659 	if (bd->d_ksp != NULL) {
660 		kstat_delete(bd->d_ksp);
661 		bd->d_ksp = NULL;
662 	} else {
663 		kmem_free(bd->d_kiop, sizeof (kstat_io_t));
664 	}
665 
666 	if (bd->d_errstats != NULL) {
667 		kstat_delete(bd->d_errstats);
668 		bd->d_errstats = NULL;
669 	} else {
670 		kmem_free(bd->d_kerr, sizeof (struct bd_errstats));
671 		mutex_destroy(bd->d_errmutex);
672 	}
673 
674 	cmlb_detach(bd->d_cmlbh, 0);
675 	cmlb_free_handle(&bd->d_cmlbh);
676 	if (bd->d_devid)
677 		ddi_devid_free(bd->d_devid);
678 	kmem_cache_destroy(bd->d_cache);
679 	mutex_destroy(&bd->d_iomutex);
680 	mutex_destroy(&bd->d_ocmutex);
681 	mutex_destroy(&bd->d_statemutex);
682 	cv_destroy(&bd->d_statecv);
683 	list_destroy(&bd->d_waitq);
684 	list_destroy(&bd->d_runq);
685 	ddi_soft_state_free(bd_state, ddi_get_instance(dip));
686 	return (DDI_SUCCESS);
687 }
688 
689 static int
690 bd_xfer_ctor(void *buf, void *arg, int kmflag)
691 {
692 	bd_xfer_impl_t	*xi;
693 	bd_t		*bd = arg;
694 	int		(*dcb)(caddr_t);
695 
696 	if (kmflag == KM_PUSHPAGE || kmflag == KM_SLEEP) {
697 		dcb = DDI_DMA_SLEEP;
698 	} else {
699 		dcb = DDI_DMA_DONTWAIT;
700 	}
701 
702 	xi = buf;
703 	bzero(xi, sizeof (*xi));
704 	xi->i_bd = bd;
705 
706 	if (bd->d_use_dma) {
707 		if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL,
708 		    &xi->i_dmah) != DDI_SUCCESS) {
709 			return (-1);
710 		}
711 	}
712 
713 	return (0);
714 }
715 
716 static void
717 bd_xfer_dtor(void *buf, void *arg)
718 {
719 	bd_xfer_impl_t	*xi = buf;
720 
721 	_NOTE(ARGUNUSED(arg));
722 
723 	if (xi->i_dmah)
724 		ddi_dma_free_handle(&xi->i_dmah);
725 	xi->i_dmah = NULL;
726 }
727 
728 static bd_xfer_impl_t *
729 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *),
730     int kmflag)
731 {
732 	bd_xfer_impl_t		*xi;
733 	int			rv = 0;
734 	int			status;
735 	unsigned		dir;
736 	int			(*cb)(caddr_t);
737 	size_t			len;
738 	uint32_t		shift;
739 
740 	if (kmflag == KM_SLEEP) {
741 		cb = DDI_DMA_SLEEP;
742 	} else {
743 		cb = DDI_DMA_DONTWAIT;
744 	}
745 
746 	xi = kmem_cache_alloc(bd->d_cache, kmflag);
747 	if (xi == NULL) {
748 		bioerror(bp, ENOMEM);
749 		return (NULL);
750 	}
751 
752 	ASSERT(bp);
753 
754 	xi->i_bp = bp;
755 	xi->i_func = func;
756 	xi->i_blkno = bp->b_lblkno;
757 
758 	if (bp->b_bcount == 0) {
759 		xi->i_len = 0;
760 		xi->i_nblks = 0;
761 		xi->i_kaddr = NULL;
762 		xi->i_resid = 0;
763 		xi->i_num_win = 0;
764 		goto done;
765 	}
766 
767 	if (bp->b_flags & B_READ) {
768 		dir = DDI_DMA_READ;
769 		xi->i_func = bd->d_ops.o_read;
770 	} else {
771 		dir = DDI_DMA_WRITE;
772 		xi->i_func = bd->d_ops.o_write;
773 	}
774 
775 	shift = bd->d_blkshift;
776 	xi->i_blkshift = shift;
777 
778 	if (!bd->d_use_dma) {
779 		bp_mapin(bp);
780 		rv = 0;
781 		xi->i_offset = 0;
782 		xi->i_num_win =
783 		    (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer;
784 		xi->i_cur_win = 0;
785 		xi->i_len = min(bp->b_bcount, bd->d_maxxfer);
786 		xi->i_nblks = xi->i_len >> shift;
787 		xi->i_kaddr = bp->b_un.b_addr;
788 		xi->i_resid = bp->b_bcount;
789 	} else {
790 
791 		/*
792 		 * We have to use consistent DMA if the address is misaligned.
793 		 */
794 		if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) &&
795 		    ((uintptr_t)bp->b_un.b_addr & 0x7)) {
796 			dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL;
797 		} else {
798 			dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL;
799 		}
800 
801 		status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb,
802 		    NULL, &xi->i_dmac, &xi->i_ndmac);
803 		switch (status) {
804 		case DDI_DMA_MAPPED:
805 			xi->i_num_win = 1;
806 			xi->i_cur_win = 0;
807 			xi->i_offset = 0;
808 			xi->i_len = bp->b_bcount;
809 			xi->i_nblks = xi->i_len >> shift;
810 			xi->i_resid = bp->b_bcount;
811 			rv = 0;
812 			break;
813 		case DDI_DMA_PARTIAL_MAP:
814 			xi->i_cur_win = 0;
815 
816 			if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) !=
817 			    DDI_SUCCESS) ||
818 			    (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset,
819 			    &len, &xi->i_dmac, &xi->i_ndmac) !=
820 			    DDI_SUCCESS) ||
821 			    (P2PHASE(len, shift) != 0)) {
822 				(void) ddi_dma_unbind_handle(xi->i_dmah);
823 				rv = EFAULT;
824 				goto done;
825 			}
826 			xi->i_len = len;
827 			xi->i_nblks = xi->i_len >> shift;
828 			xi->i_resid = bp->b_bcount;
829 			rv = 0;
830 			break;
831 		case DDI_DMA_NORESOURCES:
832 			rv = EAGAIN;
833 			goto done;
834 		case DDI_DMA_TOOBIG:
835 			rv = EINVAL;
836 			goto done;
837 		case DDI_DMA_NOMAPPING:
838 		case DDI_DMA_INUSE:
839 		default:
840 			rv = EFAULT;
841 			goto done;
842 		}
843 	}
844 
845 done:
846 	if (rv != 0) {
847 		kmem_cache_free(bd->d_cache, xi);
848 		bioerror(bp, rv);
849 		return (NULL);
850 	}
851 
852 	return (xi);
853 }
854 
855 static void
856 bd_xfer_free(bd_xfer_impl_t *xi)
857 {
858 	if (xi->i_dmah) {
859 		(void) ddi_dma_unbind_handle(xi->i_dmah);
860 	}
861 	kmem_cache_free(xi->i_bd->d_cache, xi);
862 }
863 
864 static int
865 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp)
866 {
867 	dev_t		dev = *devp;
868 	bd_t		*bd;
869 	minor_t		part;
870 	minor_t		inst;
871 	uint64_t	mask;
872 	boolean_t	ndelay;
873 	int		rv;
874 	diskaddr_t	nblks;
875 	diskaddr_t	lba;
876 
877 	_NOTE(ARGUNUSED(credp));
878 
879 	part = BDPART(dev);
880 	inst = BDINST(dev);
881 
882 	if (otyp >= OTYPCNT)
883 		return (EINVAL);
884 
885 	ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE;
886 
887 	/*
888 	 * Block any DR events from changing the set of registered
889 	 * devices while we function.
890 	 */
891 	rw_enter(&bd_lock, RW_READER);
892 	if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
893 		rw_exit(&bd_lock);
894 		return (ENXIO);
895 	}
896 
897 	mutex_enter(&bd->d_ocmutex);
898 
899 	ASSERT(part < 64);
900 	mask = (1U << part);
901 
902 	bd_update_state(bd);
903 
904 	if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) {
905 
906 		/* non-blocking opens are allowed to succeed */
907 		if (!ndelay) {
908 			rv = ENXIO;
909 			goto done;
910 		}
911 	} else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba,
912 	    NULL, NULL, 0) == 0) {
913 
914 		/*
915 		 * We read the partinfo, verify valid ranges.  If the
916 		 * partition is invalid, and we aren't blocking or
917 		 * doing a raw access, then fail. (Non-blocking and
918 		 * raw accesses can still succeed to allow a disk with
919 		 * bad partition data to opened by format and fdisk.)
920 		 */
921 		if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) {
922 			rv = ENXIO;
923 			goto done;
924 		}
925 	} else if (!ndelay) {
926 		/*
927 		 * cmlb_partinfo failed -- invalid partition or no
928 		 * disk label.
929 		 */
930 		rv = ENXIO;
931 		goto done;
932 	}
933 
934 	if ((flag & FWRITE) && bd->d_rdonly) {
935 		rv = EROFS;
936 		goto done;
937 	}
938 
939 	if ((bd->d_open_excl) & (mask)) {
940 		rv = EBUSY;
941 		goto done;
942 	}
943 	if (flag & FEXCL) {
944 		if (bd->d_open_lyr[part]) {
945 			rv = EBUSY;
946 			goto done;
947 		}
948 		for (int i = 0; i < OTYP_LYR; i++) {
949 			if (bd->d_open_reg[i] & mask) {
950 				rv = EBUSY;
951 				goto done;
952 			}
953 		}
954 	}
955 
956 	if (otyp == OTYP_LYR) {
957 		bd->d_open_lyr[part]++;
958 	} else {
959 		bd->d_open_reg[otyp] |= mask;
960 	}
961 	if (flag & FEXCL) {
962 		bd->d_open_excl |= mask;
963 	}
964 
965 	rv = 0;
966 done:
967 	mutex_exit(&bd->d_ocmutex);
968 	rw_exit(&bd_lock);
969 
970 	return (rv);
971 }
972 
973 static int
974 bd_close(dev_t dev, int flag, int otyp, cred_t *credp)
975 {
976 	bd_t		*bd;
977 	minor_t		inst;
978 	minor_t		part;
979 	uint64_t	mask;
980 	boolean_t	last = B_TRUE;
981 
982 	_NOTE(ARGUNUSED(flag));
983 	_NOTE(ARGUNUSED(credp));
984 
985 	part = BDPART(dev);
986 	inst = BDINST(dev);
987 
988 	ASSERT(part < 64);
989 	mask = (1U << part);
990 
991 	rw_enter(&bd_lock, RW_READER);
992 
993 	if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
994 		rw_exit(&bd_lock);
995 		return (ENXIO);
996 	}
997 
998 	mutex_enter(&bd->d_ocmutex);
999 	if (bd->d_open_excl & mask) {
1000 		bd->d_open_excl &= ~mask;
1001 	}
1002 	if (otyp == OTYP_LYR) {
1003 		bd->d_open_lyr[part]--;
1004 	} else {
1005 		bd->d_open_reg[otyp] &= ~mask;
1006 	}
1007 	for (int i = 0; i < 64; i++) {
1008 		if (bd->d_open_lyr[part]) {
1009 			last = B_FALSE;
1010 		}
1011 	}
1012 	for (int i = 0; last && (i < OTYP_LYR); i++) {
1013 		if (bd->d_open_reg[i]) {
1014 			last = B_FALSE;
1015 		}
1016 	}
1017 	mutex_exit(&bd->d_ocmutex);
1018 
1019 	if (last) {
1020 		cmlb_invalidate(bd->d_cmlbh, 0);
1021 	}
1022 	rw_exit(&bd_lock);
1023 
1024 	return (0);
1025 }
1026 
1027 static int
1028 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk)
1029 {
1030 	minor_t		inst;
1031 	minor_t		part;
1032 	diskaddr_t	pstart;
1033 	diskaddr_t	psize;
1034 	bd_t		*bd;
1035 	bd_xfer_impl_t	*xi;
1036 	buf_t		*bp;
1037 	int		rv;
1038 
1039 	rw_enter(&bd_lock, RW_READER);
1040 
1041 	part = BDPART(dev);
1042 	inst = BDINST(dev);
1043 
1044 	if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1045 		rw_exit(&bd_lock);
1046 		return (ENXIO);
1047 	}
1048 	/*
1049 	 * do cmlb, but do it synchronously unless we already have the
1050 	 * partition (which we probably should.)
1051 	 */
1052 	if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL,
1053 	    (void *)1)) {
1054 		rw_exit(&bd_lock);
1055 		return (ENXIO);
1056 	}
1057 
1058 	if ((blkno + nblk) > psize) {
1059 		rw_exit(&bd_lock);
1060 		return (EINVAL);
1061 	}
1062 	bp = getrbuf(KM_NOSLEEP);
1063 	if (bp == NULL) {
1064 		rw_exit(&bd_lock);
1065 		return (ENOMEM);
1066 	}
1067 
1068 	bp->b_bcount = nblk << bd->d_blkshift;
1069 	bp->b_resid = bp->b_bcount;
1070 	bp->b_lblkno = blkno;
1071 	bp->b_un.b_addr = caddr;
1072 
1073 	xi = bd_xfer_alloc(bd, bp,  bd->d_ops.o_write, KM_NOSLEEP);
1074 	if (xi == NULL) {
1075 		rw_exit(&bd_lock);
1076 		freerbuf(bp);
1077 		return (ENOMEM);
1078 	}
1079 	xi->i_blkno = blkno + pstart;
1080 	xi->i_flags = BD_XFER_POLL;
1081 	bd_submit(bd, xi);
1082 	rw_exit(&bd_lock);
1083 
1084 	/*
1085 	 * Generally, we should have run this entirely synchronously
1086 	 * at this point and the biowait call should be a no-op.  If
1087 	 * it didn't happen this way, it's a bug in the underlying
1088 	 * driver not honoring BD_XFER_POLL.
1089 	 */
1090 	(void) biowait(bp);
1091 	rv = geterror(bp);
1092 	freerbuf(bp);
1093 	return (rv);
1094 }
1095 
1096 void
1097 bd_minphys(struct buf *bp)
1098 {
1099 	minor_t inst;
1100 	bd_t	*bd;
1101 	inst = BDINST(bp->b_edev);
1102 
1103 	bd = ddi_get_soft_state(bd_state, inst);
1104 
1105 	/*
1106 	 * In a non-debug kernel, bd_strategy will catch !bd as
1107 	 * well, and will fail nicely.
1108 	 */
1109 	ASSERT(bd);
1110 
1111 	if (bp->b_bcount > bd->d_maxxfer)
1112 		bp->b_bcount = bd->d_maxxfer;
1113 }
1114 
1115 static int
1116 bd_read(dev_t dev, struct uio *uio, cred_t *credp)
1117 {
1118 	_NOTE(ARGUNUSED(credp));
1119 	return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio));
1120 }
1121 
1122 static int
1123 bd_write(dev_t dev, struct uio *uio, cred_t *credp)
1124 {
1125 	_NOTE(ARGUNUSED(credp));
1126 	return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio));
1127 }
1128 
1129 static int
1130 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp)
1131 {
1132 	_NOTE(ARGUNUSED(credp));
1133 	return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio));
1134 }
1135 
1136 static int
1137 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp)
1138 {
1139 	_NOTE(ARGUNUSED(credp));
1140 	return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio));
1141 }
1142 
1143 static int
1144 bd_strategy(struct buf *bp)
1145 {
1146 	minor_t		inst;
1147 	minor_t		part;
1148 	bd_t		*bd;
1149 	diskaddr_t	p_lba;
1150 	diskaddr_t	p_nblks;
1151 	diskaddr_t	b_nblks;
1152 	bd_xfer_impl_t	*xi;
1153 	uint32_t	shift;
1154 	int		(*func)(void *, bd_xfer_t *);
1155 
1156 	part = BDPART(bp->b_edev);
1157 	inst = BDINST(bp->b_edev);
1158 
1159 	ASSERT(bp);
1160 
1161 	bp->b_resid = bp->b_bcount;
1162 
1163 	if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1164 		bioerror(bp, ENXIO);
1165 		biodone(bp);
1166 		return (0);
1167 	}
1168 
1169 	if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba,
1170 	    NULL, NULL, 0)) {
1171 		bioerror(bp, ENXIO);
1172 		biodone(bp);
1173 		return (0);
1174 	}
1175 
1176 	shift = bd->d_blkshift;
1177 
1178 	if ((P2PHASE(bp->b_bcount, (1U << shift)) != 0) ||
1179 	    (bp->b_lblkno > p_nblks)) {
1180 		bioerror(bp, ENXIO);
1181 		biodone(bp);
1182 		return (0);
1183 	}
1184 	b_nblks = bp->b_bcount >> shift;
1185 	if ((bp->b_lblkno == p_nblks) || (bp->b_bcount == 0)) {
1186 		biodone(bp);
1187 		return (0);
1188 	}
1189 
1190 	if ((b_nblks + bp->b_lblkno) > p_nblks) {
1191 		bp->b_resid = ((bp->b_lblkno + b_nblks - p_nblks) << shift);
1192 		bp->b_bcount -= bp->b_resid;
1193 	} else {
1194 		bp->b_resid = 0;
1195 	}
1196 	func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write;
1197 
1198 	xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP);
1199 	if (xi == NULL) {
1200 		xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE);
1201 	}
1202 	if (xi == NULL) {
1203 		/* bd_request_alloc will have done bioerror */
1204 		biodone(bp);
1205 		return (0);
1206 	}
1207 	xi->i_blkno = bp->b_lblkno + p_lba;
1208 
1209 	bd_submit(bd, xi);
1210 
1211 	return (0);
1212 }
1213 
1214 static int
1215 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp)
1216 {
1217 	minor_t		inst;
1218 	uint16_t	part;
1219 	bd_t		*bd;
1220 	void		*ptr = (void *)arg;
1221 	int		rv;
1222 
1223 	part = BDPART(dev);
1224 	inst = BDINST(dev);
1225 
1226 	if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1227 		return (ENXIO);
1228 	}
1229 
1230 	rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0);
1231 	if (rv != ENOTTY)
1232 		return (rv);
1233 
1234 	if (rvalp != NULL) {
1235 		/* the return value of the ioctl is 0 by default */
1236 		*rvalp = 0;
1237 	}
1238 
1239 	switch (cmd) {
1240 	case DKIOCGMEDIAINFO: {
1241 		struct dk_minfo minfo;
1242 
1243 		/* make sure our state information is current */
1244 		bd_update_state(bd);
1245 		bzero(&minfo, sizeof (minfo));
1246 		minfo.dki_media_type = DK_FIXED_DISK;
1247 		minfo.dki_lbsize = (1U << bd->d_blkshift);
1248 		minfo.dki_capacity = bd->d_numblks;
1249 		if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) {
1250 			return (EFAULT);
1251 		}
1252 		return (0);
1253 	}
1254 	case DKIOCGMEDIAINFOEXT: {
1255 		struct dk_minfo_ext miext;
1256 
1257 		/* make sure our state information is current */
1258 		bd_update_state(bd);
1259 		bzero(&miext, sizeof (miext));
1260 		miext.dki_media_type = DK_FIXED_DISK;
1261 		miext.dki_lbsize = (1U << bd->d_blkshift);
1262 		miext.dki_pbsize = (1U << bd->d_pblkshift);
1263 		miext.dki_capacity = bd->d_numblks;
1264 		if (ddi_copyout(&miext, ptr, sizeof (miext), flag)) {
1265 			return (EFAULT);
1266 		}
1267 		return (0);
1268 	}
1269 	case DKIOCINFO: {
1270 		struct dk_cinfo cinfo;
1271 		bzero(&cinfo, sizeof (cinfo));
1272 		cinfo.dki_ctype = DKC_BLKDEV;
1273 		cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip));
1274 		(void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname),
1275 		    "%s", ddi_driver_name(ddi_get_parent(bd->d_dip)));
1276 		(void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname),
1277 		    "%s", ddi_driver_name(bd->d_dip));
1278 		cinfo.dki_unit = inst;
1279 		cinfo.dki_flags = DKI_FMTVOL;
1280 		cinfo.dki_partition = part;
1281 		cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE;
1282 		cinfo.dki_addr = 0;
1283 		cinfo.dki_slave = 0;
1284 		cinfo.dki_space = 0;
1285 		cinfo.dki_prio = 0;
1286 		cinfo.dki_vec = 0;
1287 		if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) {
1288 			return (EFAULT);
1289 		}
1290 		return (0);
1291 	}
1292 	case DKIOCREMOVABLE: {
1293 		int i;
1294 		i = bd->d_removable ? 1 : 0;
1295 		if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1296 			return (EFAULT);
1297 		}
1298 		return (0);
1299 	}
1300 	case DKIOCHOTPLUGGABLE: {
1301 		int i;
1302 		i = bd->d_hotpluggable ? 1 : 0;
1303 		if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1304 			return (EFAULT);
1305 		}
1306 		return (0);
1307 	}
1308 	case DKIOCREADONLY: {
1309 		int i;
1310 		i = bd->d_rdonly ? 1 : 0;
1311 		if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1312 			return (EFAULT);
1313 		}
1314 		return (0);
1315 	}
1316 	case DKIOCSOLIDSTATE: {
1317 		int i;
1318 		i = bd->d_ssd ? 1 : 0;
1319 		if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1320 			return (EFAULT);
1321 		}
1322 		return (0);
1323 	}
1324 	case DKIOCSTATE: {
1325 		enum dkio_state	state;
1326 		if (ddi_copyin(ptr, &state, sizeof (state), flag)) {
1327 			return (EFAULT);
1328 		}
1329 		if ((rv = bd_check_state(bd, &state)) != 0) {
1330 			return (rv);
1331 		}
1332 		if (ddi_copyout(&state, ptr, sizeof (state), flag)) {
1333 			return (EFAULT);
1334 		}
1335 		return (0);
1336 	}
1337 	case DKIOCFLUSHWRITECACHE: {
1338 		struct dk_callback *dkc = NULL;
1339 
1340 		if (flag & FKIOCTL)
1341 			dkc = (void *)arg;
1342 
1343 		rv = bd_flush_write_cache(bd, dkc);
1344 		return (rv);
1345 	}
1346 
1347 	default:
1348 		break;
1349 
1350 	}
1351 	return (ENOTTY);
1352 }
1353 
1354 static int
1355 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
1356     char *name, caddr_t valuep, int *lengthp)
1357 {
1358 	bd_t	*bd;
1359 
1360 	bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1361 	if (bd == NULL)
1362 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
1363 		    name, valuep, lengthp));
1364 
1365 	return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name,
1366 	    valuep, lengthp, BDPART(dev), 0));
1367 }
1368 
1369 
1370 static int
1371 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start,
1372     size_t length, void *tg_cookie)
1373 {
1374 	bd_t		*bd;
1375 	buf_t		*bp;
1376 	bd_xfer_impl_t	*xi;
1377 	int		rv;
1378 	int		(*func)(void *, bd_xfer_t *);
1379 	int		kmflag;
1380 
1381 	/*
1382 	 * If we are running in polled mode (such as during dump(9e)
1383 	 * execution), then we cannot sleep for kernel allocations.
1384 	 */
1385 	kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP;
1386 
1387 	bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1388 
1389 	if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) {
1390 		/* We can only transfer whole blocks at a time! */
1391 		return (EINVAL);
1392 	}
1393 
1394 	if ((bp = getrbuf(kmflag)) == NULL) {
1395 		return (ENOMEM);
1396 	}
1397 
1398 	switch (cmd) {
1399 	case TG_READ:
1400 		bp->b_flags = B_READ;
1401 		func = bd->d_ops.o_read;
1402 		break;
1403 	case TG_WRITE:
1404 		bp->b_flags = B_WRITE;
1405 		func = bd->d_ops.o_write;
1406 		break;
1407 	default:
1408 		freerbuf(bp);
1409 		return (EINVAL);
1410 	}
1411 
1412 	bp->b_un.b_addr = bufaddr;
1413 	bp->b_bcount = length;
1414 	xi = bd_xfer_alloc(bd, bp, func, kmflag);
1415 	if (xi == NULL) {
1416 		rv = geterror(bp);
1417 		freerbuf(bp);
1418 		return (rv);
1419 	}
1420 	xi->i_flags = tg_cookie ? BD_XFER_POLL : 0;
1421 	xi->i_blkno = start;
1422 	bd_submit(bd, xi);
1423 	(void) biowait(bp);
1424 	rv = geterror(bp);
1425 	freerbuf(bp);
1426 
1427 	return (rv);
1428 }
1429 
1430 static int
1431 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
1432 {
1433 	bd_t		*bd;
1434 
1435 	_NOTE(ARGUNUSED(tg_cookie));
1436 	bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1437 
1438 	switch (cmd) {
1439 	case TG_GETPHYGEOM:
1440 	case TG_GETVIRTGEOM:
1441 		/*
1442 		 * We don't have any "geometry" as such, let cmlb
1443 		 * fabricate something.
1444 		 */
1445 		return (ENOTTY);
1446 
1447 	case TG_GETCAPACITY:
1448 		bd_update_state(bd);
1449 		*(diskaddr_t *)arg = bd->d_numblks;
1450 		return (0);
1451 
1452 	case TG_GETBLOCKSIZE:
1453 		*(uint32_t *)arg = (1U << bd->d_blkshift);
1454 		return (0);
1455 
1456 	case TG_GETATTR:
1457 		/*
1458 		 * It turns out that cmlb really doesn't do much for
1459 		 * non-writable media, but lets make the information
1460 		 * available for it in case it does more in the
1461 		 * future.  (The value is currently used for
1462 		 * triggering special behavior for CD-ROMs.)
1463 		 */
1464 		bd_update_state(bd);
1465 		((tg_attribute_t *)arg)->media_is_writable =
1466 		    bd->d_rdonly ? B_FALSE : B_TRUE;
1467 		((tg_attribute_t *)arg)->media_is_solid_state = bd->d_ssd;
1468 		return (0);
1469 
1470 	default:
1471 		return (EINVAL);
1472 	}
1473 }
1474 
1475 
1476 static void
1477 bd_sched(bd_t *bd)
1478 {
1479 	bd_xfer_impl_t	*xi;
1480 	struct buf	*bp;
1481 	int		rv;
1482 
1483 	mutex_enter(&bd->d_iomutex);
1484 
1485 	while ((bd->d_qactive < bd->d_qsize) &&
1486 	    ((xi = list_remove_head(&bd->d_waitq)) != NULL)) {
1487 		bd->d_qactive++;
1488 		kstat_waitq_to_runq(bd->d_kiop);
1489 		list_insert_tail(&bd->d_runq, xi);
1490 
1491 		/*
1492 		 * Submit the job to the driver.  We drop the I/O mutex
1493 		 * so that we can deal with the case where the driver
1494 		 * completion routine calls back into us synchronously.
1495 		 */
1496 
1497 		mutex_exit(&bd->d_iomutex);
1498 
1499 		rv = xi->i_func(bd->d_private, &xi->i_public);
1500 		if (rv != 0) {
1501 			bp = xi->i_bp;
1502 			bioerror(bp, rv);
1503 			biodone(bp);
1504 
1505 			atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1506 
1507 			mutex_enter(&bd->d_iomutex);
1508 			bd->d_qactive--;
1509 			kstat_runq_exit(bd->d_kiop);
1510 			list_remove(&bd->d_runq, xi);
1511 			bd_xfer_free(xi);
1512 		} else {
1513 			mutex_enter(&bd->d_iomutex);
1514 		}
1515 	}
1516 
1517 	mutex_exit(&bd->d_iomutex);
1518 }
1519 
1520 static void
1521 bd_submit(bd_t *bd, bd_xfer_impl_t *xi)
1522 {
1523 	mutex_enter(&bd->d_iomutex);
1524 	list_insert_tail(&bd->d_waitq, xi);
1525 	kstat_waitq_enter(bd->d_kiop);
1526 	mutex_exit(&bd->d_iomutex);
1527 
1528 	bd_sched(bd);
1529 }
1530 
1531 static void
1532 bd_runq_exit(bd_xfer_impl_t *xi, int err)
1533 {
1534 	bd_t	*bd = xi->i_bd;
1535 	buf_t	*bp = xi->i_bp;
1536 
1537 	mutex_enter(&bd->d_iomutex);
1538 	bd->d_qactive--;
1539 	kstat_runq_exit(bd->d_kiop);
1540 	list_remove(&bd->d_runq, xi);
1541 	mutex_exit(&bd->d_iomutex);
1542 
1543 	if (err == 0) {
1544 		if (bp->b_flags & B_READ) {
1545 			bd->d_kiop->reads++;
1546 			bd->d_kiop->nread += (bp->b_bcount - xi->i_resid);
1547 		} else {
1548 			bd->d_kiop->writes++;
1549 			bd->d_kiop->nwritten += (bp->b_bcount - xi->i_resid);
1550 		}
1551 	}
1552 	bd_sched(bd);
1553 }
1554 
1555 static void
1556 bd_update_state(bd_t *bd)
1557 {
1558 	enum	dkio_state	state = DKIO_INSERTED;
1559 	boolean_t		docmlb = B_FALSE;
1560 	bd_media_t		media;
1561 
1562 	bzero(&media, sizeof (media));
1563 
1564 	mutex_enter(&bd->d_statemutex);
1565 	if (bd->d_ops.o_media_info(bd->d_private, &media) != 0) {
1566 		bd->d_numblks = 0;
1567 		state = DKIO_EJECTED;
1568 		goto done;
1569 	}
1570 
1571 	if ((media.m_blksize < 512) ||
1572 	    (!ISP2(media.m_blksize)) ||
1573 	    (P2PHASE(bd->d_maxxfer, media.m_blksize))) {
1574 		cmn_err(CE_WARN, "%s%d: Invalid media block size (%d)",
1575 		    ddi_driver_name(bd->d_dip), ddi_get_instance(bd->d_dip),
1576 		    media.m_blksize);
1577 		/*
1578 		 * We can't use the media, treat it as not present.
1579 		 */
1580 		state = DKIO_EJECTED;
1581 		bd->d_numblks = 0;
1582 		goto done;
1583 	}
1584 
1585 	if (((1U << bd->d_blkshift) != media.m_blksize) ||
1586 	    (bd->d_numblks != media.m_nblks)) {
1587 		/* Device size changed */
1588 		docmlb = B_TRUE;
1589 	}
1590 
1591 	bd->d_blkshift = ddi_ffs(media.m_blksize) - 1;
1592 	bd->d_pblkshift = bd->d_blkshift;
1593 	bd->d_numblks = media.m_nblks;
1594 	bd->d_rdonly = media.m_readonly;
1595 	bd->d_ssd = media.m_solidstate;
1596 
1597 	/*
1598 	 * Only use the supplied physical block size if it is non-zero,
1599 	 * greater or equal to the block size, and a power of 2. Ignore it
1600 	 * if not, it's just informational and we can still use the media.
1601 	 */
1602 	if ((media.m_pblksize != 0) &&
1603 	    (media.m_pblksize >= media.m_blksize) &&
1604 	    (ISP2(media.m_pblksize)))
1605 		bd->d_pblkshift = ddi_ffs(media.m_pblksize) - 1;
1606 
1607 done:
1608 	if (state != bd->d_state) {
1609 		bd->d_state = state;
1610 		cv_broadcast(&bd->d_statecv);
1611 		docmlb = B_TRUE;
1612 	}
1613 	mutex_exit(&bd->d_statemutex);
1614 
1615 	bd->d_kerr->bd_capacity.value.ui64 = bd->d_numblks << bd->d_blkshift;
1616 
1617 	if (docmlb) {
1618 		if (state == DKIO_INSERTED) {
1619 			(void) cmlb_validate(bd->d_cmlbh, 0, 0);
1620 		} else {
1621 			cmlb_invalidate(bd->d_cmlbh, 0);
1622 		}
1623 	}
1624 }
1625 
1626 static int
1627 bd_check_state(bd_t *bd, enum dkio_state *state)
1628 {
1629 	clock_t		when;
1630 
1631 	for (;;) {
1632 
1633 		bd_update_state(bd);
1634 
1635 		mutex_enter(&bd->d_statemutex);
1636 
1637 		if (bd->d_state != *state) {
1638 			*state = bd->d_state;
1639 			mutex_exit(&bd->d_statemutex);
1640 			break;
1641 		}
1642 
1643 		when = drv_usectohz(1000000);
1644 		if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex,
1645 		    when, TR_CLOCK_TICK) == 0) {
1646 			mutex_exit(&bd->d_statemutex);
1647 			return (EINTR);
1648 		}
1649 
1650 		mutex_exit(&bd->d_statemutex);
1651 	}
1652 
1653 	return (0);
1654 }
1655 
1656 static int
1657 bd_flush_write_cache_done(struct buf *bp)
1658 {
1659 	struct dk_callback *dc = (void *)bp->b_private;
1660 
1661 	(*dc->dkc_callback)(dc->dkc_cookie, geterror(bp));
1662 	kmem_free(dc, sizeof (*dc));
1663 	freerbuf(bp);
1664 	return (0);
1665 }
1666 
1667 static int
1668 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc)
1669 {
1670 	buf_t			*bp;
1671 	struct dk_callback	*dc;
1672 	bd_xfer_impl_t		*xi;
1673 	int			rv;
1674 
1675 	if (bd->d_ops.o_sync_cache == NULL) {
1676 		return (ENOTSUP);
1677 	}
1678 	if ((bp = getrbuf(KM_SLEEP)) == NULL) {
1679 		return (ENOMEM);
1680 	}
1681 	bp->b_resid = 0;
1682 	bp->b_bcount = 0;
1683 
1684 	xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP);
1685 	if (xi == NULL) {
1686 		rv = geterror(bp);
1687 		freerbuf(bp);
1688 		return (rv);
1689 	}
1690 
1691 	/* Make an asynchronous flush, but only if there is a callback */
1692 	if (dkc != NULL && dkc->dkc_callback != NULL) {
1693 		/* Make a private copy of the callback structure */
1694 		dc = kmem_alloc(sizeof (*dc), KM_SLEEP);
1695 		*dc = *dkc;
1696 		bp->b_private = dc;
1697 		bp->b_iodone = bd_flush_write_cache_done;
1698 
1699 		bd_submit(bd, xi);
1700 		return (0);
1701 	}
1702 
1703 	/* In case there is no callback, perform a synchronous flush */
1704 	bd_submit(bd, xi);
1705 	(void) biowait(bp);
1706 	rv = geterror(bp);
1707 	freerbuf(bp);
1708 
1709 	return (rv);
1710 }
1711 
1712 /*
1713  * Nexus support.
1714  */
1715 int
1716 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
1717     void *arg, void *result)
1718 {
1719 	bd_handle_t	hdl;
1720 
1721 	switch (ctlop) {
1722 	case DDI_CTLOPS_REPORTDEV:
1723 		cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n",
1724 		    ddi_node_name(rdip), ddi_get_name_addr(rdip),
1725 		    ddi_driver_name(rdip), ddi_get_instance(rdip));
1726 		return (DDI_SUCCESS);
1727 
1728 	case DDI_CTLOPS_INITCHILD:
1729 		hdl = ddi_get_parent_data((dev_info_t *)arg);
1730 		if (hdl == NULL) {
1731 			return (DDI_NOT_WELL_FORMED);
1732 		}
1733 		ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr);
1734 		return (DDI_SUCCESS);
1735 
1736 	case DDI_CTLOPS_UNINITCHILD:
1737 		ddi_set_name_addr((dev_info_t *)arg, NULL);
1738 		ndi_prop_remove_all((dev_info_t *)arg);
1739 		return (DDI_SUCCESS);
1740 
1741 	default:
1742 		return (ddi_ctlops(dip, rdip, ctlop, arg, result));
1743 	}
1744 }
1745 
1746 /*
1747  * Functions for device drivers.
1748  */
1749 bd_handle_t
1750 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag)
1751 {
1752 	bd_handle_t	hdl;
1753 
1754 	hdl = kmem_zalloc(sizeof (*hdl), kmflag);
1755 	if (hdl != NULL) {
1756 		hdl->h_ops = *ops;
1757 		hdl->h_dma = dma;
1758 		hdl->h_private = private;
1759 	}
1760 
1761 	return (hdl);
1762 }
1763 
1764 void
1765 bd_free_handle(bd_handle_t hdl)
1766 {
1767 	kmem_free(hdl, sizeof (*hdl));
1768 }
1769 
1770 int
1771 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl)
1772 {
1773 	dev_info_t	*child;
1774 	bd_drive_t	drive = { 0 };
1775 
1776 	/* if drivers don't override this, make it assume none */
1777 	drive.d_lun = -1;
1778 	hdl->h_ops.o_drive_info(hdl->h_private, &drive);
1779 
1780 	hdl->h_parent = dip;
1781 	hdl->h_name = "blkdev";
1782 
1783 	/*LINTED: E_BAD_PTR_CAST_ALIGN*/
1784 	if (*(uint64_t *)drive.d_eui64 != 0) {
1785 		if (drive.d_lun >= 0) {
1786 			(void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1787 			    "w%02X%02X%02X%02X%02X%02X%02X%02X,%X",
1788 			    drive.d_eui64[0], drive.d_eui64[1],
1789 			    drive.d_eui64[2], drive.d_eui64[3],
1790 			    drive.d_eui64[4], drive.d_eui64[5],
1791 			    drive.d_eui64[6], drive.d_eui64[7], drive.d_lun);
1792 		} else {
1793 			(void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1794 			    "w%02X%02X%02X%02X%02X%02X%02X%02X",
1795 			    drive.d_eui64[0], drive.d_eui64[1],
1796 			    drive.d_eui64[2], drive.d_eui64[3],
1797 			    drive.d_eui64[4], drive.d_eui64[5],
1798 			    drive.d_eui64[6], drive.d_eui64[7]);
1799 		}
1800 	} else {
1801 		if (drive.d_lun >= 0) {
1802 			(void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1803 			    "%X,%X", drive.d_target, drive.d_lun);
1804 		} else {
1805 			(void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1806 			    "%X", drive.d_target);
1807 		}
1808 	}
1809 
1810 	if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID,
1811 	    &child) != NDI_SUCCESS) {
1812 		cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s",
1813 		    ddi_driver_name(dip), ddi_get_instance(dip),
1814 		    "blkdev", hdl->h_addr);
1815 		return (DDI_FAILURE);
1816 	}
1817 
1818 	ddi_set_parent_data(child, hdl);
1819 	hdl->h_child = child;
1820 
1821 	if (ndi_devi_online(child, 0) == NDI_FAILURE) {
1822 		cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online",
1823 		    ddi_driver_name(dip), ddi_get_instance(dip),
1824 		    hdl->h_name, hdl->h_addr);
1825 		(void) ndi_devi_free(child);
1826 		return (DDI_FAILURE);
1827 	}
1828 
1829 	return (DDI_SUCCESS);
1830 }
1831 
1832 int
1833 bd_detach_handle(bd_handle_t hdl)
1834 {
1835 	int	circ;
1836 	int	rv;
1837 	char	*devnm;
1838 
1839 	if (hdl->h_child == NULL) {
1840 		return (DDI_SUCCESS);
1841 	}
1842 	ndi_devi_enter(hdl->h_parent, &circ);
1843 	if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) {
1844 		rv = ddi_remove_child(hdl->h_child, 0);
1845 	} else {
1846 		devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
1847 		(void) ddi_deviname(hdl->h_child, devnm);
1848 		(void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE);
1849 		rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL,
1850 		    NDI_DEVI_REMOVE | NDI_UNCONFIG);
1851 		kmem_free(devnm, MAXNAMELEN + 1);
1852 	}
1853 	if (rv == 0) {
1854 		hdl->h_child = NULL;
1855 	}
1856 
1857 	ndi_devi_exit(hdl->h_parent, circ);
1858 	return (rv == NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
1859 }
1860 
1861 void
1862 bd_xfer_done(bd_xfer_t *xfer, int err)
1863 {
1864 	bd_xfer_impl_t	*xi = (void *)xfer;
1865 	buf_t		*bp = xi->i_bp;
1866 	int		rv = DDI_SUCCESS;
1867 	bd_t		*bd = xi->i_bd;
1868 	size_t		len;
1869 
1870 	if (err != 0) {
1871 		bd_runq_exit(xi, err);
1872 		atomic_inc_32(&bd->d_kerr->bd_harderrs.value.ui32);
1873 
1874 		bp->b_resid += xi->i_resid;
1875 		bd_xfer_free(xi);
1876 		bioerror(bp, err);
1877 		biodone(bp);
1878 		return;
1879 	}
1880 
1881 	xi->i_cur_win++;
1882 	xi->i_resid -= xi->i_len;
1883 
1884 	if (xi->i_resid == 0) {
1885 		/* Job completed succcessfully! */
1886 		bd_runq_exit(xi, 0);
1887 
1888 		bd_xfer_free(xi);
1889 		biodone(bp);
1890 		return;
1891 	}
1892 
1893 	xi->i_blkno += xi->i_nblks;
1894 
1895 	if (bd->d_use_dma) {
1896 		/* More transfer still pending... advance to next DMA window. */
1897 		rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win,
1898 		    &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac);
1899 	} else {
1900 		/* Advance memory window. */
1901 		xi->i_kaddr += xi->i_len;
1902 		xi->i_offset += xi->i_len;
1903 		len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer);
1904 	}
1905 
1906 
1907 	if ((rv != DDI_SUCCESS) ||
1908 	    (P2PHASE(len, (1U << xi->i_blkshift) != 0))) {
1909 		bd_runq_exit(xi, EFAULT);
1910 
1911 		bp->b_resid += xi->i_resid;
1912 		bd_xfer_free(xi);
1913 		bioerror(bp, EFAULT);
1914 		biodone(bp);
1915 		return;
1916 	}
1917 	xi->i_len = len;
1918 	xi->i_nblks = len >> xi->i_blkshift;
1919 
1920 	/* Submit next window to hardware. */
1921 	rv = xi->i_func(bd->d_private, &xi->i_public);
1922 	if (rv != 0) {
1923 		bd_runq_exit(xi, rv);
1924 
1925 		atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1926 
1927 		bp->b_resid += xi->i_resid;
1928 		bd_xfer_free(xi);
1929 		bioerror(bp, rv);
1930 		biodone(bp);
1931 	}
1932 }
1933 
1934 void
1935 bd_error(bd_xfer_t *xfer, int error)
1936 {
1937 	bd_xfer_impl_t	*xi = (void *)xfer;
1938 	bd_t		*bd = xi->i_bd;
1939 
1940 	switch (error) {
1941 	case BD_ERR_MEDIA:
1942 		atomic_inc_32(&bd->d_kerr->bd_rq_media_err.value.ui32);
1943 		break;
1944 	case BD_ERR_NTRDY:
1945 		atomic_inc_32(&bd->d_kerr->bd_rq_ntrdy_err.value.ui32);
1946 		break;
1947 	case BD_ERR_NODEV:
1948 		atomic_inc_32(&bd->d_kerr->bd_rq_nodev_err.value.ui32);
1949 		break;
1950 	case BD_ERR_RECOV:
1951 		atomic_inc_32(&bd->d_kerr->bd_rq_recov_err.value.ui32);
1952 		break;
1953 	case BD_ERR_ILLRQ:
1954 		atomic_inc_32(&bd->d_kerr->bd_rq_illrq_err.value.ui32);
1955 		break;
1956 	case BD_ERR_PFA:
1957 		atomic_inc_32(&bd->d_kerr->bd_rq_pfa_err.value.ui32);
1958 		break;
1959 	default:
1960 		cmn_err(CE_PANIC, "bd_error: unknown error type %d", error);
1961 		break;
1962 	}
1963 }
1964 
1965 void
1966 bd_state_change(bd_handle_t hdl)
1967 {
1968 	bd_t		*bd;
1969 
1970 	if ((bd = hdl->h_bd) != NULL) {
1971 		bd_update_state(bd);
1972 	}
1973 }
1974 
1975 void
1976 bd_mod_init(struct dev_ops *devops)
1977 {
1978 	static struct bus_ops bd_bus_ops = {
1979 		BUSO_REV,		/* busops_rev */
1980 		nullbusmap,		/* bus_map */
1981 		NULL,			/* bus_get_intrspec (OBSOLETE) */
1982 		NULL,			/* bus_add_intrspec (OBSOLETE) */
1983 		NULL,			/* bus_remove_intrspec (OBSOLETE) */
1984 		i_ddi_map_fault,	/* bus_map_fault */
1985 		NULL,			/* bus_dma_map (OBSOLETE) */
1986 		ddi_dma_allochdl,	/* bus_dma_allochdl */
1987 		ddi_dma_freehdl,	/* bus_dma_freehdl */
1988 		ddi_dma_bindhdl,	/* bus_dma_bindhdl */
1989 		ddi_dma_unbindhdl,	/* bus_dma_unbindhdl */
1990 		ddi_dma_flush,		/* bus_dma_flush */
1991 		ddi_dma_win,		/* bus_dma_win */
1992 		ddi_dma_mctl,		/* bus_dma_ctl */
1993 		bd_bus_ctl,		/* bus_ctl */
1994 		ddi_bus_prop_op,	/* bus_prop_op */
1995 		NULL,			/* bus_get_eventcookie */
1996 		NULL,			/* bus_add_eventcall */
1997 		NULL,			/* bus_remove_eventcall */
1998 		NULL,			/* bus_post_event */
1999 		NULL,			/* bus_intr_ctl (OBSOLETE) */
2000 		NULL,			/* bus_config */
2001 		NULL,			/* bus_unconfig */
2002 		NULL,			/* bus_fm_init */
2003 		NULL,			/* bus_fm_fini */
2004 		NULL,			/* bus_fm_access_enter */
2005 		NULL,			/* bus_fm_access_exit */
2006 		NULL,			/* bus_power */
2007 		NULL,			/* bus_intr_op */
2008 	};
2009 
2010 	devops->devo_bus_ops = &bd_bus_ops;
2011 
2012 	/*
2013 	 * NB: The device driver is free to supply its own
2014 	 * character entry device support.
2015 	 */
2016 }
2017 
2018 void
2019 bd_mod_fini(struct dev_ops *devops)
2020 {
2021 	devops->devo_bus_ops = NULL;
2022 }
2023