xref: /titanic_50/usr/src/uts/common/io/blkdev/blkdev.c (revision d89ac0fd66e33a023e949753f614738fb3757194)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
24  * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved.
25  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
26  * Copyright 2017 The MathWorks, Inc.  All rights reserved.
27  */
28 
29 #include <sys/types.h>
30 #include <sys/ksynch.h>
31 #include <sys/kmem.h>
32 #include <sys/file.h>
33 #include <sys/errno.h>
34 #include <sys/open.h>
35 #include <sys/buf.h>
36 #include <sys/uio.h>
37 #include <sys/aio_req.h>
38 #include <sys/cred.h>
39 #include <sys/modctl.h>
40 #include <sys/cmlb.h>
41 #include <sys/conf.h>
42 #include <sys/devops.h>
43 #include <sys/list.h>
44 #include <sys/sysmacros.h>
45 #include <sys/dkio.h>
46 #include <sys/vtoc.h>
47 #include <sys/scsi/scsi.h>	/* for DTYPE_DIRECT */
48 #include <sys/kstat.h>
49 #include <sys/fs/dv_node.h>
50 #include <sys/ddi.h>
51 #include <sys/sunddi.h>
52 #include <sys/note.h>
53 #include <sys/blkdev.h>
54 #include <sys/scsi/impl/inquiry.h>
55 
56 #define	BD_MAXPART	64
57 #define	BDINST(dev)	(getminor(dev) / BD_MAXPART)
58 #define	BDPART(dev)	(getminor(dev) % BD_MAXPART)
59 
60 typedef struct bd bd_t;
61 typedef struct bd_xfer_impl bd_xfer_impl_t;
62 
63 struct bd {
64 	void		*d_private;
65 	dev_info_t	*d_dip;
66 	kmutex_t	d_ocmutex;
67 	kmutex_t	d_iomutex;
68 	kmutex_t	*d_errmutex;
69 	kmutex_t	d_statemutex;
70 	kcondvar_t	d_statecv;
71 	enum dkio_state	d_state;
72 	cmlb_handle_t	d_cmlbh;
73 	unsigned	d_open_lyr[BD_MAXPART];	/* open count */
74 	uint64_t	d_open_excl;	/* bit mask indexed by partition */
75 	uint64_t	d_open_reg[OTYPCNT];		/* bit mask */
76 
77 	uint32_t	d_qsize;
78 	uint32_t	d_qactive;
79 	uint32_t	d_maxxfer;
80 	uint32_t	d_blkshift;
81 	uint32_t	d_pblkshift;
82 	uint64_t	d_numblks;
83 	ddi_devid_t	d_devid;
84 
85 	kmem_cache_t	*d_cache;
86 	list_t		d_runq;
87 	list_t		d_waitq;
88 	kstat_t		*d_ksp;
89 	kstat_io_t	*d_kiop;
90 	kstat_t		*d_errstats;
91 	struct bd_errstats *d_kerr;
92 
93 	boolean_t	d_rdonly;
94 	boolean_t	d_ssd;
95 	boolean_t	d_removable;
96 	boolean_t	d_hotpluggable;
97 	boolean_t	d_use_dma;
98 
99 	ddi_dma_attr_t	d_dma;
100 	bd_ops_t	d_ops;
101 	bd_handle_t	d_handle;
102 };
103 
104 struct bd_handle {
105 	bd_ops_t	h_ops;
106 	ddi_dma_attr_t	*h_dma;
107 	dev_info_t	*h_parent;
108 	dev_info_t	*h_child;
109 	void		*h_private;
110 	bd_t		*h_bd;
111 	char		*h_name;
112 	char		h_addr[30];	/* enough for w%0.16x,%X */
113 };
114 
115 struct bd_xfer_impl {
116 	bd_xfer_t	i_public;
117 	list_node_t	i_linkage;
118 	bd_t		*i_bd;
119 	buf_t		*i_bp;
120 	uint_t		i_num_win;
121 	uint_t		i_cur_win;
122 	off_t		i_offset;
123 	int		(*i_func)(void *, bd_xfer_t *);
124 	uint32_t	i_blkshift;
125 	size_t		i_len;
126 	size_t		i_resid;
127 };
128 
129 #define	i_dmah		i_public.x_dmah
130 #define	i_dmac		i_public.x_dmac
131 #define	i_ndmac		i_public.x_ndmac
132 #define	i_kaddr		i_public.x_kaddr
133 #define	i_nblks		i_public.x_nblks
134 #define	i_blkno		i_public.x_blkno
135 #define	i_flags		i_public.x_flags
136 
137 
138 /*
139  * Private prototypes.
140  */
141 
142 static void bd_prop_update_inqstring(dev_info_t *, char *, char *, size_t);
143 static void bd_create_inquiry_props(dev_info_t *, bd_drive_t *);
144 static void bd_create_errstats(bd_t *, int, bd_drive_t *);
145 static void bd_errstats_setstr(kstat_named_t *, char *, size_t, char *);
146 static void bd_init_errstats(bd_t *, bd_drive_t *);
147 
148 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
149 static int bd_attach(dev_info_t *, ddi_attach_cmd_t);
150 static int bd_detach(dev_info_t *, ddi_detach_cmd_t);
151 
152 static int bd_open(dev_t *, int, int, cred_t *);
153 static int bd_close(dev_t, int, int, cred_t *);
154 static int bd_strategy(struct buf *);
155 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
156 static int bd_dump(dev_t, caddr_t, daddr_t, int);
157 static int bd_read(dev_t, struct uio *, cred_t *);
158 static int bd_write(dev_t, struct uio *, cred_t *);
159 static int bd_aread(dev_t, struct aio_req *, cred_t *);
160 static int bd_awrite(dev_t, struct aio_req *, cred_t *);
161 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *,
162     caddr_t, int *);
163 
164 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t,
165     void *);
166 static int bd_tg_getinfo(dev_info_t *, int, void *, void *);
167 static int bd_xfer_ctor(void *, void *, int);
168 static void bd_xfer_dtor(void *, void *);
169 static void bd_sched(bd_t *);
170 static void bd_submit(bd_t *, bd_xfer_impl_t *);
171 static void bd_runq_exit(bd_xfer_impl_t *, int);
172 static void bd_update_state(bd_t *);
173 static int bd_check_state(bd_t *, enum dkio_state *);
174 static int bd_flush_write_cache(bd_t *, struct dk_callback *);
175 static int bd_check_uio(dev_t, struct uio *);
176 
177 struct cmlb_tg_ops bd_tg_ops = {
178 	TG_DK_OPS_VERSION_1,
179 	bd_tg_rdwr,
180 	bd_tg_getinfo,
181 };
182 
183 static struct cb_ops bd_cb_ops = {
184 	bd_open, 		/* open */
185 	bd_close, 		/* close */
186 	bd_strategy, 		/* strategy */
187 	nodev, 			/* print */
188 	bd_dump,		/* dump */
189 	bd_read, 		/* read */
190 	bd_write, 		/* write */
191 	bd_ioctl, 		/* ioctl */
192 	nodev, 			/* devmap */
193 	nodev, 			/* mmap */
194 	nodev, 			/* segmap */
195 	nochpoll, 		/* poll */
196 	bd_prop_op, 		/* cb_prop_op */
197 	0, 			/* streamtab  */
198 	D_64BIT | D_MP,		/* Driver comaptibility flag */
199 	CB_REV,			/* cb_rev */
200 	bd_aread,		/* async read */
201 	bd_awrite		/* async write */
202 };
203 
204 struct dev_ops bd_dev_ops = {
205 	DEVO_REV, 		/* devo_rev, */
206 	0, 			/* refcnt  */
207 	bd_getinfo,		/* getinfo */
208 	nulldev, 		/* identify */
209 	nulldev, 		/* probe */
210 	bd_attach, 		/* attach */
211 	bd_detach,		/* detach */
212 	nodev, 			/* reset */
213 	&bd_cb_ops, 		/* driver operations */
214 	NULL,			/* bus operations */
215 	NULL,			/* power */
216 	ddi_quiesce_not_needed,	/* quiesce */
217 };
218 
219 static struct modldrv modldrv = {
220 	&mod_driverops,
221 	"Generic Block Device",
222 	&bd_dev_ops,
223 };
224 
225 static struct modlinkage modlinkage = {
226 	MODREV_1, { &modldrv, NULL }
227 };
228 
229 static void *bd_state;
230 static krwlock_t bd_lock;
231 
232 int
233 _init(void)
234 {
235 	int	rv;
236 
237 	rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2);
238 	if (rv != DDI_SUCCESS) {
239 		return (rv);
240 	}
241 	rw_init(&bd_lock, NULL, RW_DRIVER, NULL);
242 	rv = mod_install(&modlinkage);
243 	if (rv != DDI_SUCCESS) {
244 		rw_destroy(&bd_lock);
245 		ddi_soft_state_fini(&bd_state);
246 	}
247 	return (rv);
248 }
249 
250 int
251 _fini(void)
252 {
253 	int	rv;
254 
255 	rv = mod_remove(&modlinkage);
256 	if (rv == DDI_SUCCESS) {
257 		rw_destroy(&bd_lock);
258 		ddi_soft_state_fini(&bd_state);
259 	}
260 	return (rv);
261 }
262 
263 int
264 _info(struct modinfo *modinfop)
265 {
266 	return (mod_info(&modlinkage, modinfop));
267 }
268 
269 static int
270 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp)
271 {
272 	bd_t	*bd;
273 	minor_t	inst;
274 
275 	_NOTE(ARGUNUSED(dip));
276 
277 	inst = BDINST((dev_t)arg);
278 
279 	switch (cmd) {
280 	case DDI_INFO_DEVT2DEVINFO:
281 		bd = ddi_get_soft_state(bd_state, inst);
282 		if (bd == NULL) {
283 			return (DDI_FAILURE);
284 		}
285 		*resultp = (void *)bd->d_dip;
286 		break;
287 
288 	case DDI_INFO_DEVT2INSTANCE:
289 		*resultp = (void *)(intptr_t)inst;
290 		break;
291 
292 	default:
293 		return (DDI_FAILURE);
294 	}
295 	return (DDI_SUCCESS);
296 }
297 
298 static void
299 bd_prop_update_inqstring(dev_info_t *dip, char *name, char *data, size_t len)
300 {
301 	int	ilen;
302 	char	*data_string;
303 
304 	ilen = scsi_ascii_inquiry_len(data, len);
305 	ASSERT3U(ilen, <=, len);
306 	if (ilen <= 0)
307 		return;
308 	/* ensure null termination */
309 	data_string = kmem_zalloc(ilen + 1, KM_SLEEP);
310 	bcopy(data, data_string, ilen);
311 	(void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, name, data_string);
312 	kmem_free(data_string, ilen + 1);
313 }
314 
315 static void
316 bd_create_inquiry_props(dev_info_t *dip, bd_drive_t *drive)
317 {
318 	if (drive->d_vendor_len > 0)
319 		bd_prop_update_inqstring(dip, INQUIRY_VENDOR_ID,
320 		    drive->d_vendor, drive->d_vendor_len);
321 
322 	if (drive->d_product_len > 0)
323 		bd_prop_update_inqstring(dip, INQUIRY_PRODUCT_ID,
324 		    drive->d_product, drive->d_product_len);
325 
326 	if (drive->d_serial_len > 0)
327 		bd_prop_update_inqstring(dip, INQUIRY_SERIAL_NO,
328 		    drive->d_serial, drive->d_serial_len);
329 
330 	if (drive->d_revision_len > 0)
331 		bd_prop_update_inqstring(dip, INQUIRY_REVISION_ID,
332 		    drive->d_revision, drive->d_revision_len);
333 }
334 
335 static void
336 bd_create_errstats(bd_t *bd, int inst, bd_drive_t *drive)
337 {
338 	char	ks_module[KSTAT_STRLEN];
339 	char	ks_name[KSTAT_STRLEN];
340 	int	ndata = sizeof (struct bd_errstats) / sizeof (kstat_named_t);
341 
342 	if (bd->d_errstats != NULL)
343 		return;
344 
345 	(void) snprintf(ks_module, sizeof (ks_module), "%serr",
346 	    ddi_driver_name(bd->d_dip));
347 	(void) snprintf(ks_name, sizeof (ks_name), "%s%d,err",
348 	    ddi_driver_name(bd->d_dip), inst);
349 
350 	bd->d_errstats = kstat_create(ks_module, inst, ks_name, "device_error",
351 	    KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
352 
353 	if (bd->d_errstats == NULL) {
354 		/*
355 		 * Even if we cannot create the kstat, we create a
356 		 * scratch kstat.  The reason for this is to ensure
357 		 * that we can update the kstat all of the time,
358 		 * without adding an extra branch instruction.
359 		 */
360 		bd->d_kerr = kmem_zalloc(sizeof (struct bd_errstats),
361 		    KM_SLEEP);
362 		bd->d_errmutex = kmem_zalloc(sizeof (kmutex_t), KM_SLEEP);
363 		mutex_init(bd->d_errmutex, NULL, MUTEX_DRIVER, NULL);
364 	} else {
365 		if (bd->d_errstats->ks_lock == NULL) {
366 			bd->d_errstats->ks_lock = kmem_zalloc(sizeof (kmutex_t),
367 			    KM_SLEEP);
368 			mutex_init(bd->d_errstats->ks_lock, NULL, MUTEX_DRIVER,
369 			    NULL);
370 		}
371 
372 		bd->d_errmutex = bd->d_errstats->ks_lock;
373 		bd->d_kerr = (struct bd_errstats *)bd->d_errstats->ks_data;
374 	}
375 
376 	kstat_named_init(&bd->d_kerr->bd_softerrs,	"Soft Errors",
377 	    KSTAT_DATA_UINT32);
378 	kstat_named_init(&bd->d_kerr->bd_harderrs,	"Hard Errors",
379 	    KSTAT_DATA_UINT32);
380 	kstat_named_init(&bd->d_kerr->bd_transerrs,	"Transport Errors",
381 	    KSTAT_DATA_UINT32);
382 
383 	if (drive->d_model_len > 0) {
384 		kstat_named_init(&bd->d_kerr->bd_model,	"Model",
385 		    KSTAT_DATA_STRING);
386 	} else {
387 		kstat_named_init(&bd->d_kerr->bd_vid,	"Vendor",
388 		    KSTAT_DATA_STRING);
389 		kstat_named_init(&bd->d_kerr->bd_pid,	"Product",
390 		    KSTAT_DATA_STRING);
391 	}
392 
393 	kstat_named_init(&bd->d_kerr->bd_revision,	"Revision",
394 	    KSTAT_DATA_STRING);
395 	kstat_named_init(&bd->d_kerr->bd_serial,	"Serial No",
396 	    KSTAT_DATA_STRING);
397 	kstat_named_init(&bd->d_kerr->bd_capacity,	"Size",
398 	    KSTAT_DATA_ULONGLONG);
399 	kstat_named_init(&bd->d_kerr->bd_rq_media_err,	"Media Error",
400 	    KSTAT_DATA_UINT32);
401 	kstat_named_init(&bd->d_kerr->bd_rq_ntrdy_err,	"Device Not Ready",
402 	    KSTAT_DATA_UINT32);
403 	kstat_named_init(&bd->d_kerr->bd_rq_nodev_err,	"No Device",
404 	    KSTAT_DATA_UINT32);
405 	kstat_named_init(&bd->d_kerr->bd_rq_recov_err,	"Recoverable",
406 	    KSTAT_DATA_UINT32);
407 	kstat_named_init(&bd->d_kerr->bd_rq_illrq_err,	"Illegal Request",
408 	    KSTAT_DATA_UINT32);
409 	kstat_named_init(&bd->d_kerr->bd_rq_pfa_err,
410 	    "Predictive Failure Analysis", KSTAT_DATA_UINT32);
411 
412 	bd->d_errstats->ks_private = bd;
413 
414 	kstat_install(bd->d_errstats);
415 }
416 
417 static void
418 bd_errstats_setstr(kstat_named_t *k, char *str, size_t len, char *alt)
419 {
420 	char	*tmp;
421 
422 	if (KSTAT_NAMED_STR_PTR(k) == NULL) {
423 		if (len > 0) {
424 			tmp = kmem_alloc(len + 1, KM_SLEEP);
425 			(void) strlcpy(tmp, str, len + 1);
426 		} else {
427 			tmp = alt;
428 		}
429 
430 		kstat_named_setstr(k, tmp);
431 	}
432 }
433 
434 static void
435 bd_init_errstats(bd_t *bd, bd_drive_t *drive)
436 {
437 	struct bd_errstats	*est = bd->d_kerr;
438 
439 	mutex_enter(bd->d_errmutex);
440 
441 	if (drive->d_model_len > 0 &&
442 	    KSTAT_NAMED_STR_PTR(&est->bd_model) == NULL) {
443 		bd_errstats_setstr(&est->bd_model, drive->d_model,
444 		    drive->d_model_len, NULL);
445 	} else {
446 		bd_errstats_setstr(&est->bd_vid, drive->d_vendor,
447 		    drive->d_vendor_len, "Unknown ");
448 		bd_errstats_setstr(&est->bd_pid, drive->d_product,
449 		    drive->d_product_len, "Unknown         ");
450 	}
451 
452 	bd_errstats_setstr(&est->bd_revision, drive->d_revision,
453 	    drive->d_revision_len, "0001");
454 	bd_errstats_setstr(&est->bd_serial, drive->d_serial,
455 	    drive->d_serial_len, "0               ");
456 
457 	mutex_exit(bd->d_errmutex);
458 }
459 
460 static int
461 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
462 {
463 	int		inst;
464 	bd_handle_t	hdl;
465 	bd_t		*bd;
466 	bd_drive_t	drive;
467 	int		rv;
468 	char		name[16];
469 	char		kcache[32];
470 
471 	switch (cmd) {
472 	case DDI_ATTACH:
473 		break;
474 	case DDI_RESUME:
475 		/* We don't do anything native for suspend/resume */
476 		return (DDI_SUCCESS);
477 	default:
478 		return (DDI_FAILURE);
479 	}
480 
481 	inst = ddi_get_instance(dip);
482 	hdl = ddi_get_parent_data(dip);
483 
484 	(void) snprintf(name, sizeof (name), "%s%d",
485 	    ddi_driver_name(dip), ddi_get_instance(dip));
486 	(void) snprintf(kcache, sizeof (kcache), "%s_xfer", name);
487 
488 	if (hdl == NULL) {
489 		cmn_err(CE_WARN, "%s: missing parent data!", name);
490 		return (DDI_FAILURE);
491 	}
492 
493 	if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) {
494 		cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name);
495 		return (DDI_FAILURE);
496 	}
497 	bd = ddi_get_soft_state(bd_state, inst);
498 
499 	if (hdl->h_dma) {
500 		bd->d_dma = *(hdl->h_dma);
501 		bd->d_dma.dma_attr_granular =
502 		    max(DEV_BSIZE, bd->d_dma.dma_attr_granular);
503 		bd->d_use_dma = B_TRUE;
504 
505 		if (bd->d_maxxfer &&
506 		    (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) {
507 			cmn_err(CE_WARN,
508 			    "%s: inconsistent maximum transfer size!",
509 			    name);
510 			/* We force it */
511 			bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer;
512 		} else {
513 			bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer;
514 		}
515 	} else {
516 		bd->d_use_dma = B_FALSE;
517 		if (bd->d_maxxfer == 0) {
518 			bd->d_maxxfer = 1024 * 1024;
519 		}
520 	}
521 	bd->d_ops = hdl->h_ops;
522 	bd->d_private = hdl->h_private;
523 	bd->d_blkshift = 9;	/* 512 bytes, to start */
524 
525 	if (bd->d_maxxfer % DEV_BSIZE) {
526 		cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name);
527 		bd->d_maxxfer &= ~(DEV_BSIZE - 1);
528 	}
529 	if (bd->d_maxxfer < DEV_BSIZE) {
530 		cmn_err(CE_WARN, "%s: maximum transfer size too small!", name);
531 		ddi_soft_state_free(bd_state, inst);
532 		return (DDI_FAILURE);
533 	}
534 
535 	bd->d_dip = dip;
536 	bd->d_handle = hdl;
537 	hdl->h_bd = bd;
538 	ddi_set_driver_private(dip, bd);
539 
540 	mutex_init(&bd->d_iomutex, NULL, MUTEX_DRIVER, NULL);
541 	mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL);
542 	mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL);
543 	cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL);
544 
545 	list_create(&bd->d_waitq, sizeof (bd_xfer_impl_t),
546 	    offsetof(struct bd_xfer_impl, i_linkage));
547 	list_create(&bd->d_runq, sizeof (bd_xfer_impl_t),
548 	    offsetof(struct bd_xfer_impl, i_linkage));
549 
550 	bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8,
551 	    bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0);
552 
553 	bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk",
554 	    KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
555 	if (bd->d_ksp != NULL) {
556 		bd->d_ksp->ks_lock = &bd->d_iomutex;
557 		kstat_install(bd->d_ksp);
558 		bd->d_kiop = bd->d_ksp->ks_data;
559 	} else {
560 		/*
561 		 * Even if we cannot create the kstat, we create a
562 		 * scratch kstat.  The reason for this is to ensure
563 		 * that we can update the kstat all of the time,
564 		 * without adding an extra branch instruction.
565 		 */
566 		bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP);
567 	}
568 
569 	cmlb_alloc_handle(&bd->d_cmlbh);
570 
571 	bd->d_state = DKIO_NONE;
572 
573 	bzero(&drive, sizeof (drive));
574 	bd->d_ops.o_drive_info(bd->d_private, &drive);
575 	bd->d_qsize = drive.d_qsize;
576 	bd->d_removable = drive.d_removable;
577 	bd->d_hotpluggable = drive.d_hotpluggable;
578 
579 	if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer)
580 		bd->d_maxxfer = drive.d_maxxfer;
581 
582 	bd_create_inquiry_props(dip, &drive);
583 
584 	bd_create_errstats(bd, inst, &drive);
585 	bd_init_errstats(bd, &drive);
586 	bd_update_state(bd);
587 
588 	rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT,
589 	    bd->d_removable, bd->d_hotpluggable,
590 	    /*LINTED: E_BAD_PTR_CAST_ALIGN*/
591 	    *(uint64_t *)drive.d_eui64 != 0 ? DDI_NT_BLOCK_BLKDEV :
592 	    drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK,
593 	    CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0);
594 	if (rv != 0) {
595 		cmlb_free_handle(&bd->d_cmlbh);
596 		kmem_cache_destroy(bd->d_cache);
597 		mutex_destroy(&bd->d_iomutex);
598 		mutex_destroy(&bd->d_ocmutex);
599 		mutex_destroy(&bd->d_statemutex);
600 		cv_destroy(&bd->d_statecv);
601 		list_destroy(&bd->d_waitq);
602 		list_destroy(&bd->d_runq);
603 		if (bd->d_ksp != NULL) {
604 			kstat_delete(bd->d_ksp);
605 			bd->d_ksp = NULL;
606 		} else {
607 			kmem_free(bd->d_kiop, sizeof (kstat_io_t));
608 		}
609 		ddi_soft_state_free(bd_state, inst);
610 		return (DDI_FAILURE);
611 	}
612 
613 	if (bd->d_ops.o_devid_init != NULL) {
614 		rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid);
615 		if (rv == DDI_SUCCESS) {
616 			if (ddi_devid_register(dip, bd->d_devid) !=
617 			    DDI_SUCCESS) {
618 				cmn_err(CE_WARN,
619 				    "%s: unable to register devid", name);
620 			}
621 		}
622 	}
623 
624 	/*
625 	 * Add a zero-length attribute to tell the world we support
626 	 * kernel ioctls (for layered drivers).  Also set up properties
627 	 * used by HAL to identify removable media.
628 	 */
629 	(void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
630 	    DDI_KERNEL_IOCTL, NULL, 0);
631 	if (bd->d_removable) {
632 		(void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
633 		    "removable-media", NULL, 0);
634 	}
635 	if (bd->d_hotpluggable) {
636 		(void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
637 		    "hotpluggable", NULL, 0);
638 	}
639 
640 	ddi_report_dev(dip);
641 
642 	return (DDI_SUCCESS);
643 }
644 
645 static int
646 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
647 {
648 	bd_t	*bd;
649 
650 	bd = ddi_get_driver_private(dip);
651 
652 	switch (cmd) {
653 	case DDI_DETACH:
654 		break;
655 	case DDI_SUSPEND:
656 		/* We don't suspend, but our parent does */
657 		return (DDI_SUCCESS);
658 	default:
659 		return (DDI_FAILURE);
660 	}
661 	if (bd->d_ksp != NULL) {
662 		kstat_delete(bd->d_ksp);
663 		bd->d_ksp = NULL;
664 	} else {
665 		kmem_free(bd->d_kiop, sizeof (kstat_io_t));
666 	}
667 
668 	if (bd->d_errstats != NULL) {
669 		kstat_delete(bd->d_errstats);
670 		bd->d_errstats = NULL;
671 	} else {
672 		kmem_free(bd->d_kerr, sizeof (struct bd_errstats));
673 		mutex_destroy(bd->d_errmutex);
674 	}
675 
676 	cmlb_detach(bd->d_cmlbh, 0);
677 	cmlb_free_handle(&bd->d_cmlbh);
678 	if (bd->d_devid)
679 		ddi_devid_free(bd->d_devid);
680 	kmem_cache_destroy(bd->d_cache);
681 	mutex_destroy(&bd->d_iomutex);
682 	mutex_destroy(&bd->d_ocmutex);
683 	mutex_destroy(&bd->d_statemutex);
684 	cv_destroy(&bd->d_statecv);
685 	list_destroy(&bd->d_waitq);
686 	list_destroy(&bd->d_runq);
687 	ddi_soft_state_free(bd_state, ddi_get_instance(dip));
688 	return (DDI_SUCCESS);
689 }
690 
691 static int
692 bd_xfer_ctor(void *buf, void *arg, int kmflag)
693 {
694 	bd_xfer_impl_t	*xi;
695 	bd_t		*bd = arg;
696 	int		(*dcb)(caddr_t);
697 
698 	if (kmflag == KM_PUSHPAGE || kmflag == KM_SLEEP) {
699 		dcb = DDI_DMA_SLEEP;
700 	} else {
701 		dcb = DDI_DMA_DONTWAIT;
702 	}
703 
704 	xi = buf;
705 	bzero(xi, sizeof (*xi));
706 	xi->i_bd = bd;
707 
708 	if (bd->d_use_dma) {
709 		if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL,
710 		    &xi->i_dmah) != DDI_SUCCESS) {
711 			return (-1);
712 		}
713 	}
714 
715 	return (0);
716 }
717 
718 static void
719 bd_xfer_dtor(void *buf, void *arg)
720 {
721 	bd_xfer_impl_t	*xi = buf;
722 
723 	_NOTE(ARGUNUSED(arg));
724 
725 	if (xi->i_dmah)
726 		ddi_dma_free_handle(&xi->i_dmah);
727 	xi->i_dmah = NULL;
728 }
729 
730 static bd_xfer_impl_t *
731 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *),
732     int kmflag)
733 {
734 	bd_xfer_impl_t		*xi;
735 	int			rv = 0;
736 	int			status;
737 	unsigned		dir;
738 	int			(*cb)(caddr_t);
739 	size_t			len;
740 	uint32_t		shift;
741 
742 	if (kmflag == KM_SLEEP) {
743 		cb = DDI_DMA_SLEEP;
744 	} else {
745 		cb = DDI_DMA_DONTWAIT;
746 	}
747 
748 	xi = kmem_cache_alloc(bd->d_cache, kmflag);
749 	if (xi == NULL) {
750 		bioerror(bp, ENOMEM);
751 		return (NULL);
752 	}
753 
754 	ASSERT(bp);
755 
756 	xi->i_bp = bp;
757 	xi->i_func = func;
758 	xi->i_blkno = bp->b_lblkno >> (bd->d_blkshift - DEV_BSHIFT);
759 
760 	if (bp->b_bcount == 0) {
761 		xi->i_len = 0;
762 		xi->i_nblks = 0;
763 		xi->i_kaddr = NULL;
764 		xi->i_resid = 0;
765 		xi->i_num_win = 0;
766 		goto done;
767 	}
768 
769 	if (bp->b_flags & B_READ) {
770 		dir = DDI_DMA_READ;
771 		xi->i_func = bd->d_ops.o_read;
772 	} else {
773 		dir = DDI_DMA_WRITE;
774 		xi->i_func = bd->d_ops.o_write;
775 	}
776 
777 	shift = bd->d_blkshift;
778 	xi->i_blkshift = shift;
779 
780 	if (!bd->d_use_dma) {
781 		bp_mapin(bp);
782 		rv = 0;
783 		xi->i_offset = 0;
784 		xi->i_num_win =
785 		    (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer;
786 		xi->i_cur_win = 0;
787 		xi->i_len = min(bp->b_bcount, bd->d_maxxfer);
788 		xi->i_nblks = xi->i_len >> shift;
789 		xi->i_kaddr = bp->b_un.b_addr;
790 		xi->i_resid = bp->b_bcount;
791 	} else {
792 
793 		/*
794 		 * We have to use consistent DMA if the address is misaligned.
795 		 */
796 		if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) &&
797 		    ((uintptr_t)bp->b_un.b_addr & 0x7)) {
798 			dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL;
799 		} else {
800 			dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL;
801 		}
802 
803 		status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb,
804 		    NULL, &xi->i_dmac, &xi->i_ndmac);
805 		switch (status) {
806 		case DDI_DMA_MAPPED:
807 			xi->i_num_win = 1;
808 			xi->i_cur_win = 0;
809 			xi->i_offset = 0;
810 			xi->i_len = bp->b_bcount;
811 			xi->i_nblks = xi->i_len >> shift;
812 			xi->i_resid = bp->b_bcount;
813 			rv = 0;
814 			break;
815 		case DDI_DMA_PARTIAL_MAP:
816 			xi->i_cur_win = 0;
817 
818 			if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) !=
819 			    DDI_SUCCESS) ||
820 			    (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset,
821 			    &len, &xi->i_dmac, &xi->i_ndmac) !=
822 			    DDI_SUCCESS) ||
823 			    (P2PHASE(len, (1U << shift)) != 0)) {
824 				(void) ddi_dma_unbind_handle(xi->i_dmah);
825 				rv = EFAULT;
826 				goto done;
827 			}
828 			xi->i_len = len;
829 			xi->i_nblks = xi->i_len >> shift;
830 			xi->i_resid = bp->b_bcount;
831 			rv = 0;
832 			break;
833 		case DDI_DMA_NORESOURCES:
834 			rv = EAGAIN;
835 			goto done;
836 		case DDI_DMA_TOOBIG:
837 			rv = EINVAL;
838 			goto done;
839 		case DDI_DMA_NOMAPPING:
840 		case DDI_DMA_INUSE:
841 		default:
842 			rv = EFAULT;
843 			goto done;
844 		}
845 	}
846 
847 done:
848 	if (rv != 0) {
849 		kmem_cache_free(bd->d_cache, xi);
850 		bioerror(bp, rv);
851 		return (NULL);
852 	}
853 
854 	return (xi);
855 }
856 
857 static void
858 bd_xfer_free(bd_xfer_impl_t *xi)
859 {
860 	if (xi->i_dmah) {
861 		(void) ddi_dma_unbind_handle(xi->i_dmah);
862 	}
863 	kmem_cache_free(xi->i_bd->d_cache, xi);
864 }
865 
866 static int
867 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp)
868 {
869 	dev_t		dev = *devp;
870 	bd_t		*bd;
871 	minor_t		part;
872 	minor_t		inst;
873 	uint64_t	mask;
874 	boolean_t	ndelay;
875 	int		rv;
876 	diskaddr_t	nblks;
877 	diskaddr_t	lba;
878 
879 	_NOTE(ARGUNUSED(credp));
880 
881 	part = BDPART(dev);
882 	inst = BDINST(dev);
883 
884 	if (otyp >= OTYPCNT)
885 		return (EINVAL);
886 
887 	ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE;
888 
889 	/*
890 	 * Block any DR events from changing the set of registered
891 	 * devices while we function.
892 	 */
893 	rw_enter(&bd_lock, RW_READER);
894 	if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
895 		rw_exit(&bd_lock);
896 		return (ENXIO);
897 	}
898 
899 	mutex_enter(&bd->d_ocmutex);
900 
901 	ASSERT(part < 64);
902 	mask = (1U << part);
903 
904 	bd_update_state(bd);
905 
906 	if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) {
907 
908 		/* non-blocking opens are allowed to succeed */
909 		if (!ndelay) {
910 			rv = ENXIO;
911 			goto done;
912 		}
913 	} else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba,
914 	    NULL, NULL, 0) == 0) {
915 
916 		/*
917 		 * We read the partinfo, verify valid ranges.  If the
918 		 * partition is invalid, and we aren't blocking or
919 		 * doing a raw access, then fail. (Non-blocking and
920 		 * raw accesses can still succeed to allow a disk with
921 		 * bad partition data to opened by format and fdisk.)
922 		 */
923 		if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) {
924 			rv = ENXIO;
925 			goto done;
926 		}
927 	} else if (!ndelay) {
928 		/*
929 		 * cmlb_partinfo failed -- invalid partition or no
930 		 * disk label.
931 		 */
932 		rv = ENXIO;
933 		goto done;
934 	}
935 
936 	if ((flag & FWRITE) && bd->d_rdonly) {
937 		rv = EROFS;
938 		goto done;
939 	}
940 
941 	if ((bd->d_open_excl) & (mask)) {
942 		rv = EBUSY;
943 		goto done;
944 	}
945 	if (flag & FEXCL) {
946 		if (bd->d_open_lyr[part]) {
947 			rv = EBUSY;
948 			goto done;
949 		}
950 		for (int i = 0; i < OTYP_LYR; i++) {
951 			if (bd->d_open_reg[i] & mask) {
952 				rv = EBUSY;
953 				goto done;
954 			}
955 		}
956 	}
957 
958 	if (otyp == OTYP_LYR) {
959 		bd->d_open_lyr[part]++;
960 	} else {
961 		bd->d_open_reg[otyp] |= mask;
962 	}
963 	if (flag & FEXCL) {
964 		bd->d_open_excl |= mask;
965 	}
966 
967 	rv = 0;
968 done:
969 	mutex_exit(&bd->d_ocmutex);
970 	rw_exit(&bd_lock);
971 
972 	return (rv);
973 }
974 
975 static int
976 bd_close(dev_t dev, int flag, int otyp, cred_t *credp)
977 {
978 	bd_t		*bd;
979 	minor_t		inst;
980 	minor_t		part;
981 	uint64_t	mask;
982 	boolean_t	last = B_TRUE;
983 
984 	_NOTE(ARGUNUSED(flag));
985 	_NOTE(ARGUNUSED(credp));
986 
987 	part = BDPART(dev);
988 	inst = BDINST(dev);
989 
990 	ASSERT(part < 64);
991 	mask = (1U << part);
992 
993 	rw_enter(&bd_lock, RW_READER);
994 
995 	if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
996 		rw_exit(&bd_lock);
997 		return (ENXIO);
998 	}
999 
1000 	mutex_enter(&bd->d_ocmutex);
1001 	if (bd->d_open_excl & mask) {
1002 		bd->d_open_excl &= ~mask;
1003 	}
1004 	if (otyp == OTYP_LYR) {
1005 		bd->d_open_lyr[part]--;
1006 	} else {
1007 		bd->d_open_reg[otyp] &= ~mask;
1008 	}
1009 	for (int i = 0; i < 64; i++) {
1010 		if (bd->d_open_lyr[part]) {
1011 			last = B_FALSE;
1012 		}
1013 	}
1014 	for (int i = 0; last && (i < OTYP_LYR); i++) {
1015 		if (bd->d_open_reg[i]) {
1016 			last = B_FALSE;
1017 		}
1018 	}
1019 	mutex_exit(&bd->d_ocmutex);
1020 
1021 	if (last) {
1022 		cmlb_invalidate(bd->d_cmlbh, 0);
1023 	}
1024 	rw_exit(&bd_lock);
1025 
1026 	return (0);
1027 }
1028 
1029 static int
1030 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk)
1031 {
1032 	minor_t		inst;
1033 	minor_t		part;
1034 	diskaddr_t	pstart;
1035 	diskaddr_t	psize;
1036 	bd_t		*bd;
1037 	bd_xfer_impl_t	*xi;
1038 	buf_t		*bp;
1039 	int		rv;
1040 	uint32_t	shift;
1041 	daddr_t		d_blkno;
1042 	int	d_nblk;
1043 
1044 	rw_enter(&bd_lock, RW_READER);
1045 
1046 	part = BDPART(dev);
1047 	inst = BDINST(dev);
1048 
1049 	if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1050 		rw_exit(&bd_lock);
1051 		return (ENXIO);
1052 	}
1053 	shift = bd->d_blkshift;
1054 	d_blkno = blkno >> (shift - DEV_BSHIFT);
1055 	d_nblk = nblk >> (shift - DEV_BSHIFT);
1056 	/*
1057 	 * do cmlb, but do it synchronously unless we already have the
1058 	 * partition (which we probably should.)
1059 	 */
1060 	if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL,
1061 	    (void *)1)) {
1062 		rw_exit(&bd_lock);
1063 		return (ENXIO);
1064 	}
1065 
1066 	if ((d_blkno + d_nblk) > psize) {
1067 		rw_exit(&bd_lock);
1068 		return (EINVAL);
1069 	}
1070 	bp = getrbuf(KM_NOSLEEP);
1071 	if (bp == NULL) {
1072 		rw_exit(&bd_lock);
1073 		return (ENOMEM);
1074 	}
1075 
1076 	bp->b_bcount = nblk << DEV_BSHIFT;
1077 	bp->b_resid = bp->b_bcount;
1078 	bp->b_lblkno = blkno;
1079 	bp->b_un.b_addr = caddr;
1080 
1081 	xi = bd_xfer_alloc(bd, bp,  bd->d_ops.o_write, KM_NOSLEEP);
1082 	if (xi == NULL) {
1083 		rw_exit(&bd_lock);
1084 		freerbuf(bp);
1085 		return (ENOMEM);
1086 	}
1087 	xi->i_blkno = d_blkno + pstart;
1088 	xi->i_flags = BD_XFER_POLL;
1089 	bd_submit(bd, xi);
1090 	rw_exit(&bd_lock);
1091 
1092 	/*
1093 	 * Generally, we should have run this entirely synchronously
1094 	 * at this point and the biowait call should be a no-op.  If
1095 	 * it didn't happen this way, it's a bug in the underlying
1096 	 * driver not honoring BD_XFER_POLL.
1097 	 */
1098 	(void) biowait(bp);
1099 	rv = geterror(bp);
1100 	freerbuf(bp);
1101 	return (rv);
1102 }
1103 
1104 void
1105 bd_minphys(struct buf *bp)
1106 {
1107 	minor_t inst;
1108 	bd_t	*bd;
1109 	inst = BDINST(bp->b_edev);
1110 
1111 	bd = ddi_get_soft_state(bd_state, inst);
1112 
1113 	/*
1114 	 * In a non-debug kernel, bd_strategy will catch !bd as
1115 	 * well, and will fail nicely.
1116 	 */
1117 	ASSERT(bd);
1118 
1119 	if (bp->b_bcount > bd->d_maxxfer)
1120 		bp->b_bcount = bd->d_maxxfer;
1121 }
1122 
1123 static int
1124 bd_check_uio(dev_t dev, struct uio *uio)
1125 {
1126 	bd_t		*bd;
1127 	uint32_t	shift;
1128 
1129 	if ((bd = ddi_get_soft_state(bd_state, BDINST(dev))) == NULL) {
1130 		return (ENXIO);
1131 	}
1132 
1133 	shift = bd->d_blkshift;
1134 	if ((P2PHASE(uio->uio_loffset, (1U << shift)) != 0) ||
1135 	    (P2PHASE(uio->uio_iov->iov_len, (1U << shift)) != 0)) {
1136 		return (EINVAL);
1137 	}
1138 
1139 	return (0);
1140 }
1141 
1142 static int
1143 bd_read(dev_t dev, struct uio *uio, cred_t *credp)
1144 {
1145 	_NOTE(ARGUNUSED(credp));
1146 	int	ret = bd_check_uio(dev, uio);
1147 	if (ret != 0) {
1148 		return (ret);
1149 	}
1150 	return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio));
1151 }
1152 
1153 static int
1154 bd_write(dev_t dev, struct uio *uio, cred_t *credp)
1155 {
1156 	_NOTE(ARGUNUSED(credp));
1157 	int	ret = bd_check_uio(dev, uio);
1158 	if (ret != 0) {
1159 		return (ret);
1160 	}
1161 	return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio));
1162 }
1163 
1164 static int
1165 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp)
1166 {
1167 	_NOTE(ARGUNUSED(credp));
1168 	int	ret = bd_check_uio(dev, aio->aio_uio);
1169 	if (ret != 0) {
1170 		return (ret);
1171 	}
1172 	return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio));
1173 }
1174 
1175 static int
1176 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp)
1177 {
1178 	_NOTE(ARGUNUSED(credp));
1179 	int	ret = bd_check_uio(dev, aio->aio_uio);
1180 	if (ret != 0) {
1181 		return (ret);
1182 	}
1183 	return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio));
1184 }
1185 
1186 static int
1187 bd_strategy(struct buf *bp)
1188 {
1189 	minor_t		inst;
1190 	minor_t		part;
1191 	bd_t		*bd;
1192 	diskaddr_t	p_lba;
1193 	diskaddr_t	p_nblks;
1194 	diskaddr_t	b_nblks;
1195 	bd_xfer_impl_t	*xi;
1196 	uint32_t	shift;
1197 	int		(*func)(void *, bd_xfer_t *);
1198 	diskaddr_t 	lblkno;
1199 
1200 	part = BDPART(bp->b_edev);
1201 	inst = BDINST(bp->b_edev);
1202 
1203 	ASSERT(bp);
1204 
1205 	bp->b_resid = bp->b_bcount;
1206 
1207 	if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1208 		bioerror(bp, ENXIO);
1209 		biodone(bp);
1210 		return (0);
1211 	}
1212 
1213 	if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba,
1214 	    NULL, NULL, 0)) {
1215 		bioerror(bp, ENXIO);
1216 		biodone(bp);
1217 		return (0);
1218 	}
1219 
1220 	shift = bd->d_blkshift;
1221 	lblkno = bp->b_lblkno >> (shift - DEV_BSHIFT);
1222 	if ((P2PHASE(bp->b_lblkno, (1U << (shift - DEV_BSHIFT))) != 0) ||
1223 	    (P2PHASE(bp->b_bcount, (1U << shift)) != 0) ||
1224 	    (lblkno > p_nblks)) {
1225 		bioerror(bp, EINVAL);
1226 		biodone(bp);
1227 		return (0);
1228 	}
1229 	b_nblks = bp->b_bcount >> shift;
1230 	if ((lblkno == p_nblks) || (bp->b_bcount == 0)) {
1231 		biodone(bp);
1232 		return (0);
1233 	}
1234 
1235 	if ((b_nblks + lblkno) > p_nblks) {
1236 		bp->b_resid = ((lblkno + b_nblks - p_nblks) << shift);
1237 		bp->b_bcount -= bp->b_resid;
1238 	} else {
1239 		bp->b_resid = 0;
1240 	}
1241 	func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write;
1242 
1243 	xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP);
1244 	if (xi == NULL) {
1245 		xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE);
1246 	}
1247 	if (xi == NULL) {
1248 		/* bd_request_alloc will have done bioerror */
1249 		biodone(bp);
1250 		return (0);
1251 	}
1252 	xi->i_blkno = lblkno + p_lba;
1253 
1254 	bd_submit(bd, xi);
1255 
1256 	return (0);
1257 }
1258 
1259 static int
1260 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp)
1261 {
1262 	minor_t		inst;
1263 	uint16_t	part;
1264 	bd_t		*bd;
1265 	void		*ptr = (void *)arg;
1266 	int		rv;
1267 
1268 	part = BDPART(dev);
1269 	inst = BDINST(dev);
1270 
1271 	if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1272 		return (ENXIO);
1273 	}
1274 
1275 	rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0);
1276 	if (rv != ENOTTY)
1277 		return (rv);
1278 
1279 	if (rvalp != NULL) {
1280 		/* the return value of the ioctl is 0 by default */
1281 		*rvalp = 0;
1282 	}
1283 
1284 	switch (cmd) {
1285 	case DKIOCGMEDIAINFO: {
1286 		struct dk_minfo minfo;
1287 
1288 		/* make sure our state information is current */
1289 		bd_update_state(bd);
1290 		bzero(&minfo, sizeof (minfo));
1291 		minfo.dki_media_type = DK_FIXED_DISK;
1292 		minfo.dki_lbsize = (1U << bd->d_blkshift);
1293 		minfo.dki_capacity = bd->d_numblks;
1294 		if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) {
1295 			return (EFAULT);
1296 		}
1297 		return (0);
1298 	}
1299 	case DKIOCGMEDIAINFOEXT: {
1300 		struct dk_minfo_ext miext;
1301 
1302 		/* make sure our state information is current */
1303 		bd_update_state(bd);
1304 		bzero(&miext, sizeof (miext));
1305 		miext.dki_media_type = DK_FIXED_DISK;
1306 		miext.dki_lbsize = (1U << bd->d_blkshift);
1307 		miext.dki_pbsize = (1U << bd->d_pblkshift);
1308 		miext.dki_capacity = bd->d_numblks;
1309 		if (ddi_copyout(&miext, ptr, sizeof (miext), flag)) {
1310 			return (EFAULT);
1311 		}
1312 		return (0);
1313 	}
1314 	case DKIOCINFO: {
1315 		struct dk_cinfo cinfo;
1316 		bzero(&cinfo, sizeof (cinfo));
1317 		cinfo.dki_ctype = DKC_BLKDEV;
1318 		cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip));
1319 		(void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname),
1320 		    "%s", ddi_driver_name(ddi_get_parent(bd->d_dip)));
1321 		(void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname),
1322 		    "%s", ddi_driver_name(bd->d_dip));
1323 		cinfo.dki_unit = inst;
1324 		cinfo.dki_flags = DKI_FMTVOL;
1325 		cinfo.dki_partition = part;
1326 		cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE;
1327 		cinfo.dki_addr = 0;
1328 		cinfo.dki_slave = 0;
1329 		cinfo.dki_space = 0;
1330 		cinfo.dki_prio = 0;
1331 		cinfo.dki_vec = 0;
1332 		if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) {
1333 			return (EFAULT);
1334 		}
1335 		return (0);
1336 	}
1337 	case DKIOCREMOVABLE: {
1338 		int i;
1339 		i = bd->d_removable ? 1 : 0;
1340 		if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1341 			return (EFAULT);
1342 		}
1343 		return (0);
1344 	}
1345 	case DKIOCHOTPLUGGABLE: {
1346 		int i;
1347 		i = bd->d_hotpluggable ? 1 : 0;
1348 		if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1349 			return (EFAULT);
1350 		}
1351 		return (0);
1352 	}
1353 	case DKIOCREADONLY: {
1354 		int i;
1355 		i = bd->d_rdonly ? 1 : 0;
1356 		if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1357 			return (EFAULT);
1358 		}
1359 		return (0);
1360 	}
1361 	case DKIOCSOLIDSTATE: {
1362 		int i;
1363 		i = bd->d_ssd ? 1 : 0;
1364 		if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1365 			return (EFAULT);
1366 		}
1367 		return (0);
1368 	}
1369 	case DKIOCSTATE: {
1370 		enum dkio_state	state;
1371 		if (ddi_copyin(ptr, &state, sizeof (state), flag)) {
1372 			return (EFAULT);
1373 		}
1374 		if ((rv = bd_check_state(bd, &state)) != 0) {
1375 			return (rv);
1376 		}
1377 		if (ddi_copyout(&state, ptr, sizeof (state), flag)) {
1378 			return (EFAULT);
1379 		}
1380 		return (0);
1381 	}
1382 	case DKIOCFLUSHWRITECACHE: {
1383 		struct dk_callback *dkc = NULL;
1384 
1385 		if (flag & FKIOCTL)
1386 			dkc = (void *)arg;
1387 
1388 		rv = bd_flush_write_cache(bd, dkc);
1389 		return (rv);
1390 	}
1391 
1392 	default:
1393 		break;
1394 
1395 	}
1396 	return (ENOTTY);
1397 }
1398 
1399 static int
1400 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
1401     char *name, caddr_t valuep, int *lengthp)
1402 {
1403 	bd_t	*bd;
1404 
1405 	bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1406 	if (bd == NULL)
1407 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
1408 		    name, valuep, lengthp));
1409 
1410 	return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name,
1411 	    valuep, lengthp, BDPART(dev), 0));
1412 }
1413 
1414 
1415 static int
1416 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start,
1417     size_t length, void *tg_cookie)
1418 {
1419 	bd_t		*bd;
1420 	buf_t		*bp;
1421 	bd_xfer_impl_t	*xi;
1422 	int		rv;
1423 	int		(*func)(void *, bd_xfer_t *);
1424 	int		kmflag;
1425 
1426 	/*
1427 	 * If we are running in polled mode (such as during dump(9e)
1428 	 * execution), then we cannot sleep for kernel allocations.
1429 	 */
1430 	kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP;
1431 
1432 	bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1433 
1434 	if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) {
1435 		/* We can only transfer whole blocks at a time! */
1436 		return (EINVAL);
1437 	}
1438 
1439 	if ((bp = getrbuf(kmflag)) == NULL) {
1440 		return (ENOMEM);
1441 	}
1442 
1443 	switch (cmd) {
1444 	case TG_READ:
1445 		bp->b_flags = B_READ;
1446 		func = bd->d_ops.o_read;
1447 		break;
1448 	case TG_WRITE:
1449 		bp->b_flags = B_WRITE;
1450 		func = bd->d_ops.o_write;
1451 		break;
1452 	default:
1453 		freerbuf(bp);
1454 		return (EINVAL);
1455 	}
1456 
1457 	bp->b_un.b_addr = bufaddr;
1458 	bp->b_bcount = length;
1459 	xi = bd_xfer_alloc(bd, bp, func, kmflag);
1460 	if (xi == NULL) {
1461 		rv = geterror(bp);
1462 		freerbuf(bp);
1463 		return (rv);
1464 	}
1465 	xi->i_flags = tg_cookie ? BD_XFER_POLL : 0;
1466 	xi->i_blkno = start;
1467 	bd_submit(bd, xi);
1468 	(void) biowait(bp);
1469 	rv = geterror(bp);
1470 	freerbuf(bp);
1471 
1472 	return (rv);
1473 }
1474 
1475 static int
1476 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
1477 {
1478 	bd_t		*bd;
1479 
1480 	_NOTE(ARGUNUSED(tg_cookie));
1481 	bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1482 
1483 	switch (cmd) {
1484 	case TG_GETPHYGEOM:
1485 	case TG_GETVIRTGEOM:
1486 		/*
1487 		 * We don't have any "geometry" as such, let cmlb
1488 		 * fabricate something.
1489 		 */
1490 		return (ENOTTY);
1491 
1492 	case TG_GETCAPACITY:
1493 		bd_update_state(bd);
1494 		*(diskaddr_t *)arg = bd->d_numblks;
1495 		return (0);
1496 
1497 	case TG_GETBLOCKSIZE:
1498 		*(uint32_t *)arg = (1U << bd->d_blkshift);
1499 		return (0);
1500 
1501 	case TG_GETATTR:
1502 		/*
1503 		 * It turns out that cmlb really doesn't do much for
1504 		 * non-writable media, but lets make the information
1505 		 * available for it in case it does more in the
1506 		 * future.  (The value is currently used for
1507 		 * triggering special behavior for CD-ROMs.)
1508 		 */
1509 		bd_update_state(bd);
1510 		((tg_attribute_t *)arg)->media_is_writable =
1511 		    bd->d_rdonly ? B_FALSE : B_TRUE;
1512 		((tg_attribute_t *)arg)->media_is_solid_state = bd->d_ssd;
1513 		((tg_attribute_t *)arg)->media_is_rotational = B_FALSE;
1514 		return (0);
1515 
1516 	default:
1517 		return (EINVAL);
1518 	}
1519 }
1520 
1521 
1522 static void
1523 bd_sched(bd_t *bd)
1524 {
1525 	bd_xfer_impl_t	*xi;
1526 	struct buf	*bp;
1527 	int		rv;
1528 
1529 	mutex_enter(&bd->d_iomutex);
1530 
1531 	while ((bd->d_qactive < bd->d_qsize) &&
1532 	    ((xi = list_remove_head(&bd->d_waitq)) != NULL)) {
1533 		bd->d_qactive++;
1534 		kstat_waitq_to_runq(bd->d_kiop);
1535 		list_insert_tail(&bd->d_runq, xi);
1536 
1537 		/*
1538 		 * Submit the job to the driver.  We drop the I/O mutex
1539 		 * so that we can deal with the case where the driver
1540 		 * completion routine calls back into us synchronously.
1541 		 */
1542 
1543 		mutex_exit(&bd->d_iomutex);
1544 
1545 		rv = xi->i_func(bd->d_private, &xi->i_public);
1546 		if (rv != 0) {
1547 			bp = xi->i_bp;
1548 			bioerror(bp, rv);
1549 			biodone(bp);
1550 
1551 			atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1552 
1553 			mutex_enter(&bd->d_iomutex);
1554 			bd->d_qactive--;
1555 			kstat_runq_exit(bd->d_kiop);
1556 			list_remove(&bd->d_runq, xi);
1557 			bd_xfer_free(xi);
1558 		} else {
1559 			mutex_enter(&bd->d_iomutex);
1560 		}
1561 	}
1562 
1563 	mutex_exit(&bd->d_iomutex);
1564 }
1565 
1566 static void
1567 bd_submit(bd_t *bd, bd_xfer_impl_t *xi)
1568 {
1569 	mutex_enter(&bd->d_iomutex);
1570 	list_insert_tail(&bd->d_waitq, xi);
1571 	kstat_waitq_enter(bd->d_kiop);
1572 	mutex_exit(&bd->d_iomutex);
1573 
1574 	bd_sched(bd);
1575 }
1576 
1577 static void
1578 bd_runq_exit(bd_xfer_impl_t *xi, int err)
1579 {
1580 	bd_t	*bd = xi->i_bd;
1581 	buf_t	*bp = xi->i_bp;
1582 
1583 	mutex_enter(&bd->d_iomutex);
1584 	bd->d_qactive--;
1585 	kstat_runq_exit(bd->d_kiop);
1586 	list_remove(&bd->d_runq, xi);
1587 	mutex_exit(&bd->d_iomutex);
1588 
1589 	if (err == 0) {
1590 		if (bp->b_flags & B_READ) {
1591 			bd->d_kiop->reads++;
1592 			bd->d_kiop->nread += (bp->b_bcount - xi->i_resid);
1593 		} else {
1594 			bd->d_kiop->writes++;
1595 			bd->d_kiop->nwritten += (bp->b_bcount - xi->i_resid);
1596 		}
1597 	}
1598 	bd_sched(bd);
1599 }
1600 
1601 static void
1602 bd_update_state(bd_t *bd)
1603 {
1604 	enum	dkio_state	state = DKIO_INSERTED;
1605 	boolean_t		docmlb = B_FALSE;
1606 	bd_media_t		media;
1607 
1608 	bzero(&media, sizeof (media));
1609 
1610 	mutex_enter(&bd->d_statemutex);
1611 	if (bd->d_ops.o_media_info(bd->d_private, &media) != 0) {
1612 		bd->d_numblks = 0;
1613 		state = DKIO_EJECTED;
1614 		goto done;
1615 	}
1616 
1617 	if ((media.m_blksize < 512) ||
1618 	    (!ISP2(media.m_blksize)) ||
1619 	    (P2PHASE(bd->d_maxxfer, media.m_blksize))) {
1620 		cmn_err(CE_WARN, "%s%d: Invalid media block size (%d)",
1621 		    ddi_driver_name(bd->d_dip), ddi_get_instance(bd->d_dip),
1622 		    media.m_blksize);
1623 		/*
1624 		 * We can't use the media, treat it as not present.
1625 		 */
1626 		state = DKIO_EJECTED;
1627 		bd->d_numblks = 0;
1628 		goto done;
1629 	}
1630 
1631 	if (((1U << bd->d_blkshift) != media.m_blksize) ||
1632 	    (bd->d_numblks != media.m_nblks)) {
1633 		/* Device size changed */
1634 		docmlb = B_TRUE;
1635 	}
1636 
1637 	bd->d_blkshift = ddi_ffs(media.m_blksize) - 1;
1638 	bd->d_pblkshift = bd->d_blkshift;
1639 	bd->d_numblks = media.m_nblks;
1640 	bd->d_rdonly = media.m_readonly;
1641 	bd->d_ssd = media.m_solidstate;
1642 
1643 	/*
1644 	 * Only use the supplied physical block size if it is non-zero,
1645 	 * greater or equal to the block size, and a power of 2. Ignore it
1646 	 * if not, it's just informational and we can still use the media.
1647 	 */
1648 	if ((media.m_pblksize != 0) &&
1649 	    (media.m_pblksize >= media.m_blksize) &&
1650 	    (ISP2(media.m_pblksize)))
1651 		bd->d_pblkshift = ddi_ffs(media.m_pblksize) - 1;
1652 
1653 done:
1654 	if (state != bd->d_state) {
1655 		bd->d_state = state;
1656 		cv_broadcast(&bd->d_statecv);
1657 		docmlb = B_TRUE;
1658 	}
1659 	mutex_exit(&bd->d_statemutex);
1660 
1661 	bd->d_kerr->bd_capacity.value.ui64 = bd->d_numblks << bd->d_blkshift;
1662 
1663 	if (docmlb) {
1664 		if (state == DKIO_INSERTED) {
1665 			(void) cmlb_validate(bd->d_cmlbh, 0, 0);
1666 		} else {
1667 			cmlb_invalidate(bd->d_cmlbh, 0);
1668 		}
1669 	}
1670 }
1671 
1672 static int
1673 bd_check_state(bd_t *bd, enum dkio_state *state)
1674 {
1675 	clock_t		when;
1676 
1677 	for (;;) {
1678 
1679 		bd_update_state(bd);
1680 
1681 		mutex_enter(&bd->d_statemutex);
1682 
1683 		if (bd->d_state != *state) {
1684 			*state = bd->d_state;
1685 			mutex_exit(&bd->d_statemutex);
1686 			break;
1687 		}
1688 
1689 		when = drv_usectohz(1000000);
1690 		if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex,
1691 		    when, TR_CLOCK_TICK) == 0) {
1692 			mutex_exit(&bd->d_statemutex);
1693 			return (EINTR);
1694 		}
1695 
1696 		mutex_exit(&bd->d_statemutex);
1697 	}
1698 
1699 	return (0);
1700 }
1701 
1702 static int
1703 bd_flush_write_cache_done(struct buf *bp)
1704 {
1705 	struct dk_callback *dc = (void *)bp->b_private;
1706 
1707 	(*dc->dkc_callback)(dc->dkc_cookie, geterror(bp));
1708 	kmem_free(dc, sizeof (*dc));
1709 	freerbuf(bp);
1710 	return (0);
1711 }
1712 
1713 static int
1714 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc)
1715 {
1716 	buf_t			*bp;
1717 	struct dk_callback	*dc;
1718 	bd_xfer_impl_t		*xi;
1719 	int			rv;
1720 
1721 	if (bd->d_ops.o_sync_cache == NULL) {
1722 		return (ENOTSUP);
1723 	}
1724 	if ((bp = getrbuf(KM_SLEEP)) == NULL) {
1725 		return (ENOMEM);
1726 	}
1727 	bp->b_resid = 0;
1728 	bp->b_bcount = 0;
1729 
1730 	xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP);
1731 	if (xi == NULL) {
1732 		rv = geterror(bp);
1733 		freerbuf(bp);
1734 		return (rv);
1735 	}
1736 
1737 	/* Make an asynchronous flush, but only if there is a callback */
1738 	if (dkc != NULL && dkc->dkc_callback != NULL) {
1739 		/* Make a private copy of the callback structure */
1740 		dc = kmem_alloc(sizeof (*dc), KM_SLEEP);
1741 		*dc = *dkc;
1742 		bp->b_private = dc;
1743 		bp->b_iodone = bd_flush_write_cache_done;
1744 
1745 		bd_submit(bd, xi);
1746 		return (0);
1747 	}
1748 
1749 	/* In case there is no callback, perform a synchronous flush */
1750 	bd_submit(bd, xi);
1751 	(void) biowait(bp);
1752 	rv = geterror(bp);
1753 	freerbuf(bp);
1754 
1755 	return (rv);
1756 }
1757 
1758 /*
1759  * Nexus support.
1760  */
1761 int
1762 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
1763     void *arg, void *result)
1764 {
1765 	bd_handle_t	hdl;
1766 
1767 	switch (ctlop) {
1768 	case DDI_CTLOPS_REPORTDEV:
1769 		cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n",
1770 		    ddi_node_name(rdip), ddi_get_name_addr(rdip),
1771 		    ddi_driver_name(rdip), ddi_get_instance(rdip));
1772 		return (DDI_SUCCESS);
1773 
1774 	case DDI_CTLOPS_INITCHILD:
1775 		hdl = ddi_get_parent_data((dev_info_t *)arg);
1776 		if (hdl == NULL) {
1777 			return (DDI_NOT_WELL_FORMED);
1778 		}
1779 		ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr);
1780 		return (DDI_SUCCESS);
1781 
1782 	case DDI_CTLOPS_UNINITCHILD:
1783 		ddi_set_name_addr((dev_info_t *)arg, NULL);
1784 		ndi_prop_remove_all((dev_info_t *)arg);
1785 		return (DDI_SUCCESS);
1786 
1787 	default:
1788 		return (ddi_ctlops(dip, rdip, ctlop, arg, result));
1789 	}
1790 }
1791 
1792 /*
1793  * Functions for device drivers.
1794  */
1795 bd_handle_t
1796 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag)
1797 {
1798 	bd_handle_t	hdl;
1799 
1800 	hdl = kmem_zalloc(sizeof (*hdl), kmflag);
1801 	if (hdl != NULL) {
1802 		hdl->h_ops = *ops;
1803 		hdl->h_dma = dma;
1804 		hdl->h_private = private;
1805 	}
1806 
1807 	return (hdl);
1808 }
1809 
1810 void
1811 bd_free_handle(bd_handle_t hdl)
1812 {
1813 	kmem_free(hdl, sizeof (*hdl));
1814 }
1815 
1816 int
1817 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl)
1818 {
1819 	dev_info_t	*child;
1820 	bd_drive_t	drive = { 0 };
1821 
1822 	/*
1823 	 * It's not an error if bd_attach_handle() is called on a handle that
1824 	 * already is attached. We just ignore the request to attach and return.
1825 	 * This way drivers using blkdev don't have to keep track about blkdev
1826 	 * state, they can just call this function to make sure it attached.
1827 	 */
1828 	if (hdl->h_child != NULL) {
1829 		return (DDI_SUCCESS);
1830 	}
1831 
1832 	/* if drivers don't override this, make it assume none */
1833 	drive.d_lun = -1;
1834 	hdl->h_ops.o_drive_info(hdl->h_private, &drive);
1835 
1836 	hdl->h_parent = dip;
1837 	hdl->h_name = "blkdev";
1838 
1839 	/*LINTED: E_BAD_PTR_CAST_ALIGN*/
1840 	if (*(uint64_t *)drive.d_eui64 != 0) {
1841 		if (drive.d_lun >= 0) {
1842 			(void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1843 			    "w%02X%02X%02X%02X%02X%02X%02X%02X,%X",
1844 			    drive.d_eui64[0], drive.d_eui64[1],
1845 			    drive.d_eui64[2], drive.d_eui64[3],
1846 			    drive.d_eui64[4], drive.d_eui64[5],
1847 			    drive.d_eui64[6], drive.d_eui64[7], drive.d_lun);
1848 		} else {
1849 			(void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1850 			    "w%02X%02X%02X%02X%02X%02X%02X%02X",
1851 			    drive.d_eui64[0], drive.d_eui64[1],
1852 			    drive.d_eui64[2], drive.d_eui64[3],
1853 			    drive.d_eui64[4], drive.d_eui64[5],
1854 			    drive.d_eui64[6], drive.d_eui64[7]);
1855 		}
1856 	} else {
1857 		if (drive.d_lun >= 0) {
1858 			(void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1859 			    "%X,%X", drive.d_target, drive.d_lun);
1860 		} else {
1861 			(void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1862 			    "%X", drive.d_target);
1863 		}
1864 	}
1865 
1866 	if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID,
1867 	    &child) != NDI_SUCCESS) {
1868 		cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s",
1869 		    ddi_driver_name(dip), ddi_get_instance(dip),
1870 		    "blkdev", hdl->h_addr);
1871 		return (DDI_FAILURE);
1872 	}
1873 
1874 	ddi_set_parent_data(child, hdl);
1875 	hdl->h_child = child;
1876 
1877 	if (ndi_devi_online(child, 0) == NDI_FAILURE) {
1878 		cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online",
1879 		    ddi_driver_name(dip), ddi_get_instance(dip),
1880 		    hdl->h_name, hdl->h_addr);
1881 		(void) ndi_devi_free(child);
1882 		return (DDI_FAILURE);
1883 	}
1884 
1885 	return (DDI_SUCCESS);
1886 }
1887 
1888 int
1889 bd_detach_handle(bd_handle_t hdl)
1890 {
1891 	int	circ;
1892 	int	rv;
1893 	char	*devnm;
1894 
1895 	/*
1896 	 * It's not an error if bd_detach_handle() is called on a handle that
1897 	 * already is detached. We just ignore the request to detach and return.
1898 	 * This way drivers using blkdev don't have to keep track about blkdev
1899 	 * state, they can just call this function to make sure it detached.
1900 	 */
1901 	if (hdl->h_child == NULL) {
1902 		return (DDI_SUCCESS);
1903 	}
1904 	ndi_devi_enter(hdl->h_parent, &circ);
1905 	if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) {
1906 		rv = ddi_remove_child(hdl->h_child, 0);
1907 	} else {
1908 		devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
1909 		(void) ddi_deviname(hdl->h_child, devnm);
1910 		(void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE);
1911 		rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL,
1912 		    NDI_DEVI_REMOVE | NDI_UNCONFIG);
1913 		kmem_free(devnm, MAXNAMELEN + 1);
1914 	}
1915 	if (rv == 0) {
1916 		hdl->h_child = NULL;
1917 	}
1918 
1919 	ndi_devi_exit(hdl->h_parent, circ);
1920 	return (rv == NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
1921 }
1922 
1923 void
1924 bd_xfer_done(bd_xfer_t *xfer, int err)
1925 {
1926 	bd_xfer_impl_t	*xi = (void *)xfer;
1927 	buf_t		*bp = xi->i_bp;
1928 	int		rv = DDI_SUCCESS;
1929 	bd_t		*bd = xi->i_bd;
1930 	size_t		len;
1931 
1932 	if (err != 0) {
1933 		bd_runq_exit(xi, err);
1934 		atomic_inc_32(&bd->d_kerr->bd_harderrs.value.ui32);
1935 
1936 		bp->b_resid += xi->i_resid;
1937 		bd_xfer_free(xi);
1938 		bioerror(bp, err);
1939 		biodone(bp);
1940 		return;
1941 	}
1942 
1943 	xi->i_cur_win++;
1944 	xi->i_resid -= xi->i_len;
1945 
1946 	if (xi->i_resid == 0) {
1947 		/* Job completed succcessfully! */
1948 		bd_runq_exit(xi, 0);
1949 
1950 		bd_xfer_free(xi);
1951 		biodone(bp);
1952 		return;
1953 	}
1954 
1955 	xi->i_blkno += xi->i_nblks;
1956 
1957 	if (bd->d_use_dma) {
1958 		/* More transfer still pending... advance to next DMA window. */
1959 		rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win,
1960 		    &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac);
1961 	} else {
1962 		/* Advance memory window. */
1963 		xi->i_kaddr += xi->i_len;
1964 		xi->i_offset += xi->i_len;
1965 		len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer);
1966 	}
1967 
1968 
1969 	if ((rv != DDI_SUCCESS) ||
1970 	    (P2PHASE(len, (1U << xi->i_blkshift)) != 0)) {
1971 		bd_runq_exit(xi, EFAULT);
1972 
1973 		bp->b_resid += xi->i_resid;
1974 		bd_xfer_free(xi);
1975 		bioerror(bp, EFAULT);
1976 		biodone(bp);
1977 		return;
1978 	}
1979 	xi->i_len = len;
1980 	xi->i_nblks = len >> xi->i_blkshift;
1981 
1982 	/* Submit next window to hardware. */
1983 	rv = xi->i_func(bd->d_private, &xi->i_public);
1984 	if (rv != 0) {
1985 		bd_runq_exit(xi, rv);
1986 
1987 		atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1988 
1989 		bp->b_resid += xi->i_resid;
1990 		bd_xfer_free(xi);
1991 		bioerror(bp, rv);
1992 		biodone(bp);
1993 	}
1994 }
1995 
1996 void
1997 bd_error(bd_xfer_t *xfer, int error)
1998 {
1999 	bd_xfer_impl_t	*xi = (void *)xfer;
2000 	bd_t		*bd = xi->i_bd;
2001 
2002 	switch (error) {
2003 	case BD_ERR_MEDIA:
2004 		atomic_inc_32(&bd->d_kerr->bd_rq_media_err.value.ui32);
2005 		break;
2006 	case BD_ERR_NTRDY:
2007 		atomic_inc_32(&bd->d_kerr->bd_rq_ntrdy_err.value.ui32);
2008 		break;
2009 	case BD_ERR_NODEV:
2010 		atomic_inc_32(&bd->d_kerr->bd_rq_nodev_err.value.ui32);
2011 		break;
2012 	case BD_ERR_RECOV:
2013 		atomic_inc_32(&bd->d_kerr->bd_rq_recov_err.value.ui32);
2014 		break;
2015 	case BD_ERR_ILLRQ:
2016 		atomic_inc_32(&bd->d_kerr->bd_rq_illrq_err.value.ui32);
2017 		break;
2018 	case BD_ERR_PFA:
2019 		atomic_inc_32(&bd->d_kerr->bd_rq_pfa_err.value.ui32);
2020 		break;
2021 	default:
2022 		cmn_err(CE_PANIC, "bd_error: unknown error type %d", error);
2023 		break;
2024 	}
2025 }
2026 
2027 void
2028 bd_state_change(bd_handle_t hdl)
2029 {
2030 	bd_t		*bd;
2031 
2032 	if ((bd = hdl->h_bd) != NULL) {
2033 		bd_update_state(bd);
2034 	}
2035 }
2036 
2037 void
2038 bd_mod_init(struct dev_ops *devops)
2039 {
2040 	static struct bus_ops bd_bus_ops = {
2041 		BUSO_REV,		/* busops_rev */
2042 		nullbusmap,		/* bus_map */
2043 		NULL,			/* bus_get_intrspec (OBSOLETE) */
2044 		NULL,			/* bus_add_intrspec (OBSOLETE) */
2045 		NULL,			/* bus_remove_intrspec (OBSOLETE) */
2046 		i_ddi_map_fault,	/* bus_map_fault */
2047 		NULL,			/* bus_dma_map (OBSOLETE) */
2048 		ddi_dma_allochdl,	/* bus_dma_allochdl */
2049 		ddi_dma_freehdl,	/* bus_dma_freehdl */
2050 		ddi_dma_bindhdl,	/* bus_dma_bindhdl */
2051 		ddi_dma_unbindhdl,	/* bus_dma_unbindhdl */
2052 		ddi_dma_flush,		/* bus_dma_flush */
2053 		ddi_dma_win,		/* bus_dma_win */
2054 		ddi_dma_mctl,		/* bus_dma_ctl */
2055 		bd_bus_ctl,		/* bus_ctl */
2056 		ddi_bus_prop_op,	/* bus_prop_op */
2057 		NULL,			/* bus_get_eventcookie */
2058 		NULL,			/* bus_add_eventcall */
2059 		NULL,			/* bus_remove_eventcall */
2060 		NULL,			/* bus_post_event */
2061 		NULL,			/* bus_intr_ctl (OBSOLETE) */
2062 		NULL,			/* bus_config */
2063 		NULL,			/* bus_unconfig */
2064 		NULL,			/* bus_fm_init */
2065 		NULL,			/* bus_fm_fini */
2066 		NULL,			/* bus_fm_access_enter */
2067 		NULL,			/* bus_fm_access_exit */
2068 		NULL,			/* bus_power */
2069 		NULL,			/* bus_intr_op */
2070 	};
2071 
2072 	devops->devo_bus_ops = &bd_bus_ops;
2073 
2074 	/*
2075 	 * NB: The device driver is free to supply its own
2076 	 * character entry device support.
2077 	 */
2078 }
2079 
2080 void
2081 bd_mod_fini(struct dev_ops *devops)
2082 {
2083 	devops->devo_bus_ops = NULL;
2084 }
2085