xref: /titanic_50/usr/src/uts/common/io/scsi/targets/sd.c (revision 4610e4a00999c6d2291b3fc263926b890ec500a5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * SCSI disk target driver.
31  */
32 
33 #include <sys/scsi/scsi.h>
34 #include <sys/dkbad.h>
35 #include <sys/dklabel.h>
36 #include <sys/dkio.h>
37 #include <sys/fdio.h>
38 #include <sys/cdio.h>
39 #include <sys/mhd.h>
40 #include <sys/vtoc.h>
41 #include <sys/dktp/fdisk.h>
42 #include <sys/file.h>
43 #include <sys/stat.h>
44 #include <sys/kstat.h>
45 #include <sys/vtrace.h>
46 #include <sys/note.h>
47 #include <sys/thread.h>
48 #include <sys/proc.h>
49 #include <sys/efi_partition.h>
50 #include <sys/var.h>
51 #include <sys/aio_req.h>
52 #if (defined(__fibre))
53 /* Note: is there a leadville version of the following? */
54 #include <sys/fc4/fcal_linkapp.h>
55 #endif
56 #include <sys/taskq.h>
57 #include <sys/uuid.h>
58 #include <sys/byteorder.h>
59 #include <sys/sdt.h>
60 
61 #include "sd_xbuf.h"
62 
63 #include <sys/scsi/targets/sddef.h>
64 
65 
66 /*
67  * Loadable module info.
68  */
69 #if (defined(__fibre))
70 #define	SD_MODULE_NAME	"SCSI SSA/FCAL Disk Driver %I%"
71 char _depends_on[]	= "misc/scsi drv/fcp";
72 #else
73 #define	SD_MODULE_NAME	"SCSI Disk Driver %I%"
74 char _depends_on[]	= "misc/scsi";
75 #endif
76 
77 /*
78  * Define the interconnect type, to allow the driver to distinguish
79  * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
80  *
81  * This is really for backward compatability. In the future, the driver
82  * should actually check the "interconnect-type" property as reported by
83  * the HBA; however at present this property is not defined by all HBAs,
84  * so we will use this #define (1) to permit the driver to run in
85  * backward-compatability mode; and (2) to print a notification message
86  * if an FC HBA does not support the "interconnect-type" property.  The
87  * behavior of the driver will be to assume parallel SCSI behaviors unless
88  * the "interconnect-type" property is defined by the HBA **AND** has a
89  * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
90  * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
91  * Channel behaviors (as per the old ssd).  (Note that the
92  * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
93  * will result in the driver assuming parallel SCSI behaviors.)
94  *
95  * (see common/sys/scsi/impl/services.h)
96  *
97  * Note: For ssd semantics, don't use INTERCONNECT_FABRIC as the default
98  * since some FC HBAs may already support that, and there is some code in
99  * the driver that already looks for it.  Using INTERCONNECT_FABRIC as the
100  * default would confuse that code, and besides things should work fine
101  * anyways if the FC HBA already reports INTERCONNECT_FABRIC for the
102  * "interconnect_type" property.
103  */
104 #if (defined(__fibre))
105 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_FIBRE
106 #else
107 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
108 #endif
109 
110 /*
111  * The name of the driver, established from the module name in _init.
112  */
113 static	char *sd_label			= NULL;
114 
115 /*
116  * Driver name is unfortunately prefixed on some driver.conf properties.
117  */
118 #if (defined(__fibre))
119 #define	sd_max_xfer_size		ssd_max_xfer_size
120 #define	sd_config_list			ssd_config_list
121 static	char *sd_max_xfer_size		= "ssd_max_xfer_size";
122 static	char *sd_config_list		= "ssd-config-list";
123 #else
124 static	char *sd_max_xfer_size		= "sd_max_xfer_size";
125 static	char *sd_config_list		= "sd-config-list";
126 #endif
127 
128 /*
129  * Driver global variables
130  */
131 
132 #if (defined(__fibre))
133 /*
134  * These #defines are to avoid namespace collisions that occur because this
135  * code is currently used to compile two seperate driver modules: sd and ssd.
136  * All global variables need to be treated this way (even if declared static)
137  * in order to allow the debugger to resolve the names properly.
138  * It is anticipated that in the near future the ssd module will be obsoleted,
139  * at which time this namespace issue should go away.
140  */
141 #define	sd_state			ssd_state
142 #define	sd_io_time			ssd_io_time
143 #define	sd_failfast_enable		ssd_failfast_enable
144 #define	sd_ua_retry_count		ssd_ua_retry_count
145 #define	sd_report_pfa			ssd_report_pfa
146 #define	sd_max_throttle			ssd_max_throttle
147 #define	sd_min_throttle			ssd_min_throttle
148 #define	sd_rot_delay			ssd_rot_delay
149 
150 #define	sd_retry_on_reservation_conflict	\
151 					ssd_retry_on_reservation_conflict
152 #define	sd_reinstate_resv_delay		ssd_reinstate_resv_delay
153 #define	sd_resv_conflict_name		ssd_resv_conflict_name
154 
155 #define	sd_component_mask		ssd_component_mask
156 #define	sd_level_mask			ssd_level_mask
157 #define	sd_debug_un			ssd_debug_un
158 #define	sd_error_level			ssd_error_level
159 
160 #define	sd_xbuf_active_limit		ssd_xbuf_active_limit
161 #define	sd_xbuf_reserve_limit		ssd_xbuf_reserve_limit
162 
163 #define	sd_tr				ssd_tr
164 #define	sd_reset_throttle_timeout	ssd_reset_throttle_timeout
165 #define	sd_qfull_throttle_timeout	ssd_qfull_throttle_timeout
166 #define	sd_qfull_throttle_enable	ssd_qfull_throttle_enable
167 #define	sd_check_media_time		ssd_check_media_time
168 #define	sd_wait_cmds_complete		ssd_wait_cmds_complete
169 #define	sd_label_mutex			ssd_label_mutex
170 #define	sd_detach_mutex			ssd_detach_mutex
171 #define	sd_log_buf			ssd_log_buf
172 #define	sd_log_mutex			ssd_log_mutex
173 
174 #define	sd_disk_table			ssd_disk_table
175 #define	sd_disk_table_size		ssd_disk_table_size
176 #define	sd_sense_mutex			ssd_sense_mutex
177 #define	sd_cdbtab			ssd_cdbtab
178 
179 #define	sd_cb_ops			ssd_cb_ops
180 #define	sd_ops				ssd_ops
181 #define	sd_additional_codes		ssd_additional_codes
182 
183 #define	sd_minor_data			ssd_minor_data
184 #define	sd_minor_data_efi		ssd_minor_data_efi
185 
186 #define	sd_tq				ssd_tq
187 #define	sd_wmr_tq			ssd_wmr_tq
188 #define	sd_taskq_name			ssd_taskq_name
189 #define	sd_wmr_taskq_name		ssd_wmr_taskq_name
190 #define	sd_taskq_minalloc		ssd_taskq_minalloc
191 #define	sd_taskq_maxalloc		ssd_taskq_maxalloc
192 
193 #define	sd_dump_format_string		ssd_dump_format_string
194 
195 #define	sd_iostart_chain		ssd_iostart_chain
196 #define	sd_iodone_chain			ssd_iodone_chain
197 
198 #define	sd_pm_idletime			ssd_pm_idletime
199 
200 #define	sd_force_pm_supported		ssd_force_pm_supported
201 
202 #define	sd_dtype_optical_bind		ssd_dtype_optical_bind
203 
204 #endif
205 
206 
207 #ifdef	SDDEBUG
208 int	sd_force_pm_supported		= 0;
209 #endif	/* SDDEBUG */
210 
211 void *sd_state				= NULL;
212 int sd_io_time				= SD_IO_TIME;
213 int sd_failfast_enable			= 1;
214 int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
215 int sd_report_pfa			= 1;
216 int sd_max_throttle			= SD_MAX_THROTTLE;
217 int sd_min_throttle			= SD_MIN_THROTTLE;
218 int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
219 int sd_qfull_throttle_enable		= TRUE;
220 
221 int sd_retry_on_reservation_conflict	= 1;
222 int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
223 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
224 
225 static int sd_dtype_optical_bind	= -1;
226 
227 /* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
228 static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
229 
230 /*
231  * Global data for debug logging. To enable debug printing, sd_component_mask
232  * and sd_level_mask should be set to the desired bit patterns as outlined in
233  * sddef.h.
234  */
235 uint_t	sd_component_mask		= 0x0;
236 uint_t	sd_level_mask			= 0x0;
237 struct	sd_lun *sd_debug_un		= NULL;
238 uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
239 
240 /* Note: these may go away in the future... */
241 static uint32_t	sd_xbuf_active_limit	= 512;
242 static uint32_t sd_xbuf_reserve_limit	= 16;
243 
244 static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
245 
246 /*
247  * Timer value used to reset the throttle after it has been reduced
248  * (typically in response to TRAN_BUSY or STATUS_QFULL)
249  */
250 static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
251 static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
252 
253 /*
254  * Interval value associated with the media change scsi watch.
255  */
256 static int sd_check_media_time		= 3000000;
257 
258 /*
259  * Wait value used for in progress operations during a DDI_SUSPEND
260  */
261 static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
262 
263 /*
264  * sd_label_mutex protects a static buffer used in the disk label
265  * component of the driver
266  */
267 static kmutex_t sd_label_mutex;
268 
269 /*
270  * sd_detach_mutex protects un_layer_count, un_detach_count, and
271  * un_opens_in_progress in the sd_lun structure.
272  */
273 static kmutex_t sd_detach_mutex;
274 
275 _NOTE(MUTEX_PROTECTS_DATA(sd_detach_mutex,
276 	sd_lun::{un_layer_count un_detach_count un_opens_in_progress}))
277 
278 /*
279  * Global buffer and mutex for debug logging
280  */
281 static char	sd_log_buf[1024];
282 static kmutex_t	sd_log_mutex;
283 
284 
285 /*
286  * "Smart" Probe Caching structs, globals, #defines, etc.
287  * For parallel scsi and non-self-identify device only.
288  */
289 
290 /*
291  * The following resources and routines are implemented to support
292  * "smart" probing, which caches the scsi_probe() results in an array,
293  * in order to help avoid long probe times.
294  */
295 struct sd_scsi_probe_cache {
296 	struct	sd_scsi_probe_cache	*next;
297 	dev_info_t	*pdip;
298 	int		cache[NTARGETS_WIDE];
299 };
300 
301 static kmutex_t	sd_scsi_probe_cache_mutex;
302 static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
303 
304 /*
305  * Really we only need protection on the head of the linked list, but
306  * better safe than sorry.
307  */
308 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
309     sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
310 
311 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
312     sd_scsi_probe_cache_head))
313 
314 
315 /*
316  * Vendor specific data name property declarations
317  */
318 
319 #if defined(__fibre) || defined(__i386) ||defined(__amd64)
320 
321 static sd_tunables seagate_properties = {
322 	SEAGATE_THROTTLE_VALUE,
323 	0,
324 	0,
325 	0,
326 	0,
327 	0,
328 	0,
329 	0,
330 	0
331 };
332 
333 
334 static sd_tunables fujitsu_properties = {
335 	FUJITSU_THROTTLE_VALUE,
336 	0,
337 	0,
338 	0,
339 	0,
340 	0,
341 	0,
342 	0,
343 	0
344 };
345 
346 static sd_tunables ibm_properties = {
347 	IBM_THROTTLE_VALUE,
348 	0,
349 	0,
350 	0,
351 	0,
352 	0,
353 	0,
354 	0,
355 	0
356 };
357 
358 static sd_tunables purple_properties = {
359 	PURPLE_THROTTLE_VALUE,
360 	0,
361 	0,
362 	PURPLE_BUSY_RETRIES,
363 	PURPLE_RESET_RETRY_COUNT,
364 	PURPLE_RESERVE_RELEASE_TIME,
365 	0,
366 	0,
367 	0
368 };
369 
370 static sd_tunables sve_properties = {
371 	SVE_THROTTLE_VALUE,
372 	0,
373 	0,
374 	SVE_BUSY_RETRIES,
375 	SVE_RESET_RETRY_COUNT,
376 	SVE_RESERVE_RELEASE_TIME,
377 	SVE_MIN_THROTTLE_VALUE,
378 	SVE_DISKSORT_DISABLED_FLAG,
379 	0
380 };
381 
382 static sd_tunables maserati_properties = {
383 	0,
384 	0,
385 	0,
386 	0,
387 	0,
388 	0,
389 	0,
390 	MASERATI_DISKSORT_DISABLED_FLAG,
391 	MASERATI_LUN_RESET_ENABLED_FLAG
392 };
393 
394 static sd_tunables pirus_properties = {
395 	PIRUS_THROTTLE_VALUE,
396 	0,
397 	PIRUS_NRR_COUNT,
398 	PIRUS_BUSY_RETRIES,
399 	PIRUS_RESET_RETRY_COUNT,
400 	0,
401 	PIRUS_MIN_THROTTLE_VALUE,
402 	PIRUS_DISKSORT_DISABLED_FLAG,
403 	PIRUS_LUN_RESET_ENABLED_FLAG
404 };
405 
406 #endif
407 
408 #if (defined(__sparc) && !defined(__fibre)) || \
409 	(defined(__i386) || defined(__amd64))
410 
411 
412 static sd_tunables elite_properties = {
413 	ELITE_THROTTLE_VALUE,
414 	0,
415 	0,
416 	0,
417 	0,
418 	0,
419 	0,
420 	0,
421 	0
422 };
423 
424 static sd_tunables st31200n_properties = {
425 	ST31200N_THROTTLE_VALUE,
426 	0,
427 	0,
428 	0,
429 	0,
430 	0,
431 	0,
432 	0,
433 	0
434 };
435 
436 #endif /* Fibre or not */
437 
438 static sd_tunables lsi_properties_scsi = {
439 	LSI_THROTTLE_VALUE,
440 	0,
441 	LSI_NOTREADY_RETRIES,
442 	0,
443 	0,
444 	0,
445 	0,
446 	0,
447 	0
448 };
449 
450 static sd_tunables symbios_properties = {
451 	SYMBIOS_THROTTLE_VALUE,
452 	0,
453 	SYMBIOS_NOTREADY_RETRIES,
454 	0,
455 	0,
456 	0,
457 	0,
458 	0,
459 	0
460 };
461 
462 static sd_tunables lsi_properties = {
463 	0,
464 	0,
465 	LSI_NOTREADY_RETRIES,
466 	0,
467 	0,
468 	0,
469 	0,
470 	0,
471 	0
472 };
473 
474 static sd_tunables lsi_oem_properties = {
475 	0,
476 	0,
477 	LSI_OEM_NOTREADY_RETRIES,
478 	0,
479 	0,
480 	0,
481 	0,
482 	0,
483 	0
484 };
485 
486 
487 
488 #if (defined(SD_PROP_TST))
489 
490 #define	SD_TST_CTYPE_VAL	CTYPE_CDROM
491 #define	SD_TST_THROTTLE_VAL	16
492 #define	SD_TST_NOTREADY_VAL	12
493 #define	SD_TST_BUSY_VAL		60
494 #define	SD_TST_RST_RETRY_VAL	36
495 #define	SD_TST_RSV_REL_TIME	60
496 
497 static sd_tunables tst_properties = {
498 	SD_TST_THROTTLE_VAL,
499 	SD_TST_CTYPE_VAL,
500 	SD_TST_NOTREADY_VAL,
501 	SD_TST_BUSY_VAL,
502 	SD_TST_RST_RETRY_VAL,
503 	SD_TST_RSV_REL_TIME,
504 	0,
505 	0,
506 	0
507 };
508 #endif
509 
510 /* This is similiar to the ANSI toupper implementation */
511 #define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
512 
513 /*
514  * Static Driver Configuration Table
515  *
516  * This is the table of disks which need throttle adjustment (or, perhaps
517  * something else as defined by the flags at a future time.)  device_id
518  * is a string consisting of concatenated vid (vendor), pid (product/model)
519  * and revision strings as defined in the scsi_inquiry structure.  Offsets of
520  * the parts of the string are as defined by the sizes in the scsi_inquiry
521  * structure.  Device type is searched as far as the device_id string is
522  * defined.  Flags defines which values are to be set in the driver from the
523  * properties list.
524  *
525  * Entries below which begin and end with a "*" are a special case.
526  * These do not have a specific vendor, and the string which follows
527  * can appear anywhere in the 16 byte PID portion of the inquiry data.
528  *
529  * Entries below which begin and end with a " " (blank) are a special
530  * case. The comparison function will treat multiple consecutive blanks
531  * as equivalent to a single blank. For example, this causes a
532  * sd_disk_table entry of " NEC CDROM " to match a device's id string
533  * of  "NEC       CDROM".
534  *
535  * Note: The MD21 controller type has been obsoleted.
536  *	 ST318202F is a Legacy device
537  *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
538  *	 made with an FC connection. The entries here are a legacy.
539  */
540 static sd_disk_config_t sd_disk_table[] = {
541 #if defined(__fibre) || defined(__i386) || defined(__amd64)
542 	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
543 	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
544 	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
545 	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
546 	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
547 	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
548 	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
549 	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
550 	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
551 	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
552 	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
553 	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
554 	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
555 	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
556 	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
557 	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
558 	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
559 	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
560 	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
561 	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
562 	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
563 	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
564 	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
565 	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
566 	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
567 	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
568 	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
569 	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
570 	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
571 	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
572 	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
573 	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
574 	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
575 	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
576 	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
577 	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
578 	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
579 	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
580 	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
581 	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
582 	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
583 	{ "SUN     T3", SD_CONF_BSET_THROTTLE |
584 			SD_CONF_BSET_BSY_RETRY_COUNT|
585 			SD_CONF_BSET_RST_RETRIES|
586 			SD_CONF_BSET_RSV_REL_TIME,
587 		&purple_properties },
588 	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
589 		SD_CONF_BSET_BSY_RETRY_COUNT|
590 		SD_CONF_BSET_RST_RETRIES|
591 		SD_CONF_BSET_RSV_REL_TIME|
592 		SD_CONF_BSET_MIN_THROTTLE|
593 		SD_CONF_BSET_DISKSORT_DISABLED,
594 		&sve_properties },
595 	{ "SUN     T4", SD_CONF_BSET_THROTTLE |
596 			SD_CONF_BSET_BSY_RETRY_COUNT|
597 			SD_CONF_BSET_RST_RETRIES|
598 			SD_CONF_BSET_RSV_REL_TIME,
599 		&purple_properties },
600 	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
601 		SD_CONF_BSET_LUN_RESET_ENABLED,
602 		&maserati_properties },
603 	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
604 		SD_CONF_BSET_NRR_COUNT|
605 		SD_CONF_BSET_BSY_RETRY_COUNT|
606 		SD_CONF_BSET_RST_RETRIES|
607 		SD_CONF_BSET_MIN_THROTTLE|
608 		SD_CONF_BSET_DISKSORT_DISABLED|
609 		SD_CONF_BSET_LUN_RESET_ENABLED,
610 		&pirus_properties },
611 	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
612 		SD_CONF_BSET_NRR_COUNT|
613 		SD_CONF_BSET_BSY_RETRY_COUNT|
614 		SD_CONF_BSET_RST_RETRIES|
615 		SD_CONF_BSET_MIN_THROTTLE|
616 		SD_CONF_BSET_DISKSORT_DISABLED|
617 		SD_CONF_BSET_LUN_RESET_ENABLED,
618 		&pirus_properties },
619 	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
620 		SD_CONF_BSET_NRR_COUNT|
621 		SD_CONF_BSET_BSY_RETRY_COUNT|
622 		SD_CONF_BSET_RST_RETRIES|
623 		SD_CONF_BSET_MIN_THROTTLE|
624 		SD_CONF_BSET_DISKSORT_DISABLED|
625 		SD_CONF_BSET_LUN_RESET_ENABLED,
626 		&pirus_properties },
627 	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
628 	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
629 	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
630 	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
631 	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
632 #endif /* fibre or NON-sparc platforms */
633 #if ((defined(__sparc) && !defined(__fibre)) ||\
634 	(defined(__i386) || defined(__amd64)))
635 	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
636 	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
637 	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
638 	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
639 	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
640 	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
641 	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
642 	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
643 	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
644 	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
645 	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
646 	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
647 	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
648 	    &symbios_properties },
649 	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
650 	    &lsi_properties_scsi },
651 #if defined(__i386) || defined(__amd64)
652 	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
653 				    | SD_CONF_BSET_READSUB_BCD
654 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
655 				    | SD_CONF_BSET_NO_READ_HEADER
656 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
657 
658 	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
659 				    | SD_CONF_BSET_READSUB_BCD
660 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
661 				    | SD_CONF_BSET_NO_READ_HEADER
662 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
663 #endif /* __i386 || __amd64 */
664 #endif /* sparc NON-fibre or NON-sparc platforms */
665 
666 #if (defined(SD_PROP_TST))
667 	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
668 				| SD_CONF_BSET_CTYPE
669 				| SD_CONF_BSET_NRR_COUNT
670 				| SD_CONF_BSET_FAB_DEVID
671 				| SD_CONF_BSET_NOCACHE
672 				| SD_CONF_BSET_BSY_RETRY_COUNT
673 				| SD_CONF_BSET_PLAYMSF_BCD
674 				| SD_CONF_BSET_READSUB_BCD
675 				| SD_CONF_BSET_READ_TOC_TRK_BCD
676 				| SD_CONF_BSET_READ_TOC_ADDR_BCD
677 				| SD_CONF_BSET_NO_READ_HEADER
678 				| SD_CONF_BSET_READ_CD_XD4
679 				| SD_CONF_BSET_RST_RETRIES
680 				| SD_CONF_BSET_RSV_REL_TIME
681 				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
682 #endif
683 };
684 
685 static const int sd_disk_table_size =
686 	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
687 
688 
689 /*
690  * Return codes of sd_uselabel().
691  */
692 #define	SD_LABEL_IS_VALID		0
693 #define	SD_LABEL_IS_INVALID		1
694 
695 #define	SD_INTERCONNECT_PARALLEL	0
696 #define	SD_INTERCONNECT_FABRIC		1
697 #define	SD_INTERCONNECT_FIBRE		2
698 #define	SD_INTERCONNECT_SSA		3
699 #define	SD_IS_PARALLEL_SCSI(un)		\
700 	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
701 
702 /*
703  * Definitions used by device id registration routines
704  */
705 #define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
706 #define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
707 #define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
708 #define	WD_NODE			7	/* the whole disk minor */
709 
710 static kmutex_t sd_sense_mutex = {0};
711 
712 /*
713  * Macros for updates of the driver state
714  */
715 #define	New_state(un, s)        \
716 	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
717 #define	Restore_state(un)	\
718 	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
719 
720 static struct sd_cdbinfo sd_cdbtab[] = {
721 	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
722 	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
723 	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
724 	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
725 };
726 
727 /*
728  * Specifies the number of seconds that must have elapsed since the last
729  * cmd. has completed for a device to be declared idle to the PM framework.
730  */
731 static int sd_pm_idletime = 1;
732 
733 /*
734  * Internal function prototypes
735  */
736 
737 #if (defined(__fibre))
738 /*
739  * These #defines are to avoid namespace collisions that occur because this
740  * code is currently used to compile two seperate driver modules: sd and ssd.
741  * All function names need to be treated this way (even if declared static)
742  * in order to allow the debugger to resolve the names properly.
743  * It is anticipated that in the near future the ssd module will be obsoleted,
744  * at which time this ugliness should go away.
745  */
746 #define	sd_log_trace			ssd_log_trace
747 #define	sd_log_info			ssd_log_info
748 #define	sd_log_err			ssd_log_err
749 #define	sdprobe				ssdprobe
750 #define	sdinfo				ssdinfo
751 #define	sd_prop_op			ssd_prop_op
752 #define	sd_scsi_probe_cache_init	ssd_scsi_probe_cache_init
753 #define	sd_scsi_probe_cache_fini	ssd_scsi_probe_cache_fini
754 #define	sd_scsi_clear_probe_cache	ssd_scsi_clear_probe_cache
755 #define	sd_scsi_probe_with_cache	ssd_scsi_probe_with_cache
756 #define	sd_spin_up_unit			ssd_spin_up_unit
757 #define	sd_enable_descr_sense		ssd_enable_descr_sense
758 #define	sd_set_mmc_caps			ssd_set_mmc_caps
759 #define	sd_read_unit_properties		ssd_read_unit_properties
760 #define	sd_process_sdconf_file		ssd_process_sdconf_file
761 #define	sd_process_sdconf_table		ssd_process_sdconf_table
762 #define	sd_sdconf_id_match		ssd_sdconf_id_match
763 #define	sd_blank_cmp			ssd_blank_cmp
764 #define	sd_chk_vers1_data		ssd_chk_vers1_data
765 #define	sd_set_vers1_properties		ssd_set_vers1_properties
766 #define	sd_validate_geometry		ssd_validate_geometry
767 
768 #if defined(_SUNOS_VTOC_16)
769 #define	sd_convert_geometry		ssd_convert_geometry
770 #endif
771 
772 #define	sd_resync_geom_caches		ssd_resync_geom_caches
773 #define	sd_read_fdisk			ssd_read_fdisk
774 #define	sd_get_physical_geometry	ssd_get_physical_geometry
775 #define	sd_get_virtual_geometry		ssd_get_virtual_geometry
776 #define	sd_update_block_info		ssd_update_block_info
777 #define	sd_swap_efi_gpt			ssd_swap_efi_gpt
778 #define	sd_swap_efi_gpe			ssd_swap_efi_gpe
779 #define	sd_validate_efi			ssd_validate_efi
780 #define	sd_use_efi			ssd_use_efi
781 #define	sd_uselabel			ssd_uselabel
782 #define	sd_build_default_label		ssd_build_default_label
783 #define	sd_has_max_chs_vals		ssd_has_max_chs_vals
784 #define	sd_inq_fill			ssd_inq_fill
785 #define	sd_register_devid		ssd_register_devid
786 #define	sd_get_devid_block		ssd_get_devid_block
787 #define	sd_get_devid			ssd_get_devid
788 #define	sd_create_devid			ssd_create_devid
789 #define	sd_write_deviceid		ssd_write_deviceid
790 #define	sd_check_vpd_page_support	ssd_check_vpd_page_support
791 #define	sd_setup_pm			ssd_setup_pm
792 #define	sd_create_pm_components		ssd_create_pm_components
793 #define	sd_ddi_suspend			ssd_ddi_suspend
794 #define	sd_ddi_pm_suspend		ssd_ddi_pm_suspend
795 #define	sd_ddi_resume			ssd_ddi_resume
796 #define	sd_ddi_pm_resume		ssd_ddi_pm_resume
797 #define	sdpower				ssdpower
798 #define	sdattach			ssdattach
799 #define	sddetach			ssddetach
800 #define	sd_unit_attach			ssd_unit_attach
801 #define	sd_unit_detach			ssd_unit_detach
802 #define	sd_create_minor_nodes		ssd_create_minor_nodes
803 #define	sd_create_errstats		ssd_create_errstats
804 #define	sd_set_errstats			ssd_set_errstats
805 #define	sd_set_pstats			ssd_set_pstats
806 #define	sddump				ssddump
807 #define	sd_scsi_poll			ssd_scsi_poll
808 #define	sd_send_polled_RQS		ssd_send_polled_RQS
809 #define	sd_ddi_scsi_poll		ssd_ddi_scsi_poll
810 #define	sd_init_event_callbacks		ssd_init_event_callbacks
811 #define	sd_event_callback		ssd_event_callback
812 #define	sd_disable_caching		ssd_disable_caching
813 #define	sd_get_write_cache_enabled	ssd_get_write_cache_enabled
814 #define	sd_make_device			ssd_make_device
815 #define	sdopen				ssdopen
816 #define	sdclose				ssdclose
817 #define	sd_ready_and_valid		ssd_ready_and_valid
818 #define	sdmin				ssdmin
819 #define	sdread				ssdread
820 #define	sdwrite				ssdwrite
821 #define	sdaread				ssdaread
822 #define	sdawrite			ssdawrite
823 #define	sdstrategy			ssdstrategy
824 #define	sdioctl				ssdioctl
825 #define	sd_mapblockaddr_iostart		ssd_mapblockaddr_iostart
826 #define	sd_mapblocksize_iostart		ssd_mapblocksize_iostart
827 #define	sd_checksum_iostart		ssd_checksum_iostart
828 #define	sd_checksum_uscsi_iostart	ssd_checksum_uscsi_iostart
829 #define	sd_pm_iostart			ssd_pm_iostart
830 #define	sd_core_iostart			ssd_core_iostart
831 #define	sd_mapblockaddr_iodone		ssd_mapblockaddr_iodone
832 #define	sd_mapblocksize_iodone		ssd_mapblocksize_iodone
833 #define	sd_checksum_iodone		ssd_checksum_iodone
834 #define	sd_checksum_uscsi_iodone	ssd_checksum_uscsi_iodone
835 #define	sd_pm_iodone			ssd_pm_iodone
836 #define	sd_initpkt_for_buf		ssd_initpkt_for_buf
837 #define	sd_destroypkt_for_buf		ssd_destroypkt_for_buf
838 #define	sd_setup_rw_pkt			ssd_setup_rw_pkt
839 #define	sd_setup_next_rw_pkt		ssd_setup_next_rw_pkt
840 #define	sd_buf_iodone			ssd_buf_iodone
841 #define	sd_uscsi_strategy		ssd_uscsi_strategy
842 #define	sd_initpkt_for_uscsi		ssd_initpkt_for_uscsi
843 #define	sd_destroypkt_for_uscsi		ssd_destroypkt_for_uscsi
844 #define	sd_uscsi_iodone			ssd_uscsi_iodone
845 #define	sd_xbuf_strategy		ssd_xbuf_strategy
846 #define	sd_xbuf_init			ssd_xbuf_init
847 #define	sd_pm_entry			ssd_pm_entry
848 #define	sd_pm_exit			ssd_pm_exit
849 
850 #define	sd_pm_idletimeout_handler	ssd_pm_idletimeout_handler
851 #define	sd_pm_timeout_handler		ssd_pm_timeout_handler
852 
853 #define	sd_add_buf_to_waitq		ssd_add_buf_to_waitq
854 #define	sdintr				ssdintr
855 #define	sd_start_cmds			ssd_start_cmds
856 #define	sd_send_scsi_cmd		ssd_send_scsi_cmd
857 #define	sd_bioclone_alloc		ssd_bioclone_alloc
858 #define	sd_bioclone_free		ssd_bioclone_free
859 #define	sd_shadow_buf_alloc		ssd_shadow_buf_alloc
860 #define	sd_shadow_buf_free		ssd_shadow_buf_free
861 #define	sd_print_transport_rejected_message	\
862 					ssd_print_transport_rejected_message
863 #define	sd_retry_command		ssd_retry_command
864 #define	sd_set_retry_bp			ssd_set_retry_bp
865 #define	sd_send_request_sense_command	ssd_send_request_sense_command
866 #define	sd_start_retry_command		ssd_start_retry_command
867 #define	sd_start_direct_priority_command	\
868 					ssd_start_direct_priority_command
869 #define	sd_return_failed_command	ssd_return_failed_command
870 #define	sd_return_failed_command_no_restart	\
871 					ssd_return_failed_command_no_restart
872 #define	sd_return_command		ssd_return_command
873 #define	sd_sync_with_callback		ssd_sync_with_callback
874 #define	sdrunout			ssdrunout
875 #define	sd_mark_rqs_busy		ssd_mark_rqs_busy
876 #define	sd_mark_rqs_idle		ssd_mark_rqs_idle
877 #define	sd_reduce_throttle		ssd_reduce_throttle
878 #define	sd_restore_throttle		ssd_restore_throttle
879 #define	sd_print_incomplete_msg		ssd_print_incomplete_msg
880 #define	sd_init_cdb_limits		ssd_init_cdb_limits
881 #define	sd_pkt_status_good		ssd_pkt_status_good
882 #define	sd_pkt_status_check_condition	ssd_pkt_status_check_condition
883 #define	sd_pkt_status_busy		ssd_pkt_status_busy
884 #define	sd_pkt_status_reservation_conflict	\
885 					ssd_pkt_status_reservation_conflict
886 #define	sd_pkt_status_qfull		ssd_pkt_status_qfull
887 #define	sd_handle_request_sense		ssd_handle_request_sense
888 #define	sd_handle_auto_request_sense	ssd_handle_auto_request_sense
889 #define	sd_print_sense_failed_msg	ssd_print_sense_failed_msg
890 #define	sd_validate_sense_data		ssd_validate_sense_data
891 #define	sd_decode_sense			ssd_decode_sense
892 #define	sd_print_sense_msg		ssd_print_sense_msg
893 #define	sd_extract_sense_info_descr	ssd_extract_sense_info_descr
894 #define	sd_sense_key_no_sense		ssd_sense_key_no_sense
895 #define	sd_sense_key_recoverable_error	ssd_sense_key_recoverable_error
896 #define	sd_sense_key_not_ready		ssd_sense_key_not_ready
897 #define	sd_sense_key_medium_or_hardware_error	\
898 					ssd_sense_key_medium_or_hardware_error
899 #define	sd_sense_key_illegal_request	ssd_sense_key_illegal_request
900 #define	sd_sense_key_unit_attention	ssd_sense_key_unit_attention
901 #define	sd_sense_key_fail_command	ssd_sense_key_fail_command
902 #define	sd_sense_key_blank_check	ssd_sense_key_blank_check
903 #define	sd_sense_key_aborted_command	ssd_sense_key_aborted_command
904 #define	sd_sense_key_default		ssd_sense_key_default
905 #define	sd_print_retry_msg		ssd_print_retry_msg
906 #define	sd_print_cmd_incomplete_msg	ssd_print_cmd_incomplete_msg
907 #define	sd_pkt_reason_cmd_incomplete	ssd_pkt_reason_cmd_incomplete
908 #define	sd_pkt_reason_cmd_tran_err	ssd_pkt_reason_cmd_tran_err
909 #define	sd_pkt_reason_cmd_reset		ssd_pkt_reason_cmd_reset
910 #define	sd_pkt_reason_cmd_aborted	ssd_pkt_reason_cmd_aborted
911 #define	sd_pkt_reason_cmd_timeout	ssd_pkt_reason_cmd_timeout
912 #define	sd_pkt_reason_cmd_unx_bus_free	ssd_pkt_reason_cmd_unx_bus_free
913 #define	sd_pkt_reason_cmd_tag_reject	ssd_pkt_reason_cmd_tag_reject
914 #define	sd_pkt_reason_default		ssd_pkt_reason_default
915 #define	sd_reset_target			ssd_reset_target
916 #define	sd_start_stop_unit_callback	ssd_start_stop_unit_callback
917 #define	sd_start_stop_unit_task		ssd_start_stop_unit_task
918 #define	sd_taskq_create			ssd_taskq_create
919 #define	sd_taskq_delete			ssd_taskq_delete
920 #define	sd_media_change_task		ssd_media_change_task
921 #define	sd_handle_mchange		ssd_handle_mchange
922 #define	sd_send_scsi_DOORLOCK		ssd_send_scsi_DOORLOCK
923 #define	sd_send_scsi_READ_CAPACITY	ssd_send_scsi_READ_CAPACITY
924 #define	sd_send_scsi_READ_CAPACITY_16	ssd_send_scsi_READ_CAPACITY_16
925 #define	sd_send_scsi_GET_CONFIGURATION	ssd_send_scsi_GET_CONFIGURATION
926 #define	sd_send_scsi_feature_GET_CONFIGURATION	\
927 					sd_send_scsi_feature_GET_CONFIGURATION
928 #define	sd_send_scsi_START_STOP_UNIT	ssd_send_scsi_START_STOP_UNIT
929 #define	sd_send_scsi_INQUIRY		ssd_send_scsi_INQUIRY
930 #define	sd_send_scsi_TEST_UNIT_READY	ssd_send_scsi_TEST_UNIT_READY
931 #define	sd_send_scsi_PERSISTENT_RESERVE_IN	\
932 					ssd_send_scsi_PERSISTENT_RESERVE_IN
933 #define	sd_send_scsi_PERSISTENT_RESERVE_OUT	\
934 					ssd_send_scsi_PERSISTENT_RESERVE_OUT
935 #define	sd_send_scsi_SYNCHRONIZE_CACHE	ssd_send_scsi_SYNCHRONIZE_CACHE
936 #define	sd_send_scsi_SYNCHRONIZE_CACHE_biodone	\
937 					ssd_send_scsi_SYNCHRONIZE_CACHE_biodone
938 #define	sd_send_scsi_MODE_SENSE		ssd_send_scsi_MODE_SENSE
939 #define	sd_send_scsi_MODE_SELECT	ssd_send_scsi_MODE_SELECT
940 #define	sd_send_scsi_RDWR		ssd_send_scsi_RDWR
941 #define	sd_send_scsi_LOG_SENSE		ssd_send_scsi_LOG_SENSE
942 #define	sd_alloc_rqs			ssd_alloc_rqs
943 #define	sd_free_rqs			ssd_free_rqs
944 #define	sd_dump_memory			ssd_dump_memory
945 #define	sd_uscsi_ioctl			ssd_uscsi_ioctl
946 #define	sd_get_media_info		ssd_get_media_info
947 #define	sd_dkio_ctrl_info		ssd_dkio_ctrl_info
948 #define	sd_dkio_get_geometry		ssd_dkio_get_geometry
949 #define	sd_dkio_set_geometry		ssd_dkio_set_geometry
950 #define	sd_dkio_get_partition		ssd_dkio_get_partition
951 #define	sd_dkio_set_partition		ssd_dkio_set_partition
952 #define	sd_dkio_partition		ssd_dkio_partition
953 #define	sd_dkio_get_vtoc		ssd_dkio_get_vtoc
954 #define	sd_dkio_get_efi			ssd_dkio_get_efi
955 #define	sd_build_user_vtoc		ssd_build_user_vtoc
956 #define	sd_dkio_set_vtoc		ssd_dkio_set_vtoc
957 #define	sd_dkio_set_efi			ssd_dkio_set_efi
958 #define	sd_build_label_vtoc		ssd_build_label_vtoc
959 #define	sd_write_label			ssd_write_label
960 #define	sd_clear_vtoc			ssd_clear_vtoc
961 #define	sd_clear_efi			ssd_clear_efi
962 #define	sd_get_tunables_from_conf	ssd_get_tunables_from_conf
963 #define	sd_setup_next_xfer		ssd_setup_next_xfer
964 #define	sd_dkio_get_temp		ssd_dkio_get_temp
965 #define	sd_dkio_get_mboot		ssd_dkio_get_mboot
966 #define	sd_dkio_set_mboot		ssd_dkio_set_mboot
967 #define	sd_setup_default_geometry	ssd_setup_default_geometry
968 #define	sd_update_fdisk_and_vtoc	ssd_update_fdisk_and_vtoc
969 #define	sd_check_mhd			ssd_check_mhd
970 #define	sd_mhd_watch_cb			ssd_mhd_watch_cb
971 #define	sd_mhd_watch_incomplete		ssd_mhd_watch_incomplete
972 #define	sd_sname			ssd_sname
973 #define	sd_mhd_resvd_recover		ssd_mhd_resvd_recover
974 #define	sd_resv_reclaim_thread		ssd_resv_reclaim_thread
975 #define	sd_take_ownership		ssd_take_ownership
976 #define	sd_reserve_release		ssd_reserve_release
977 #define	sd_rmv_resv_reclaim_req		ssd_rmv_resv_reclaim_req
978 #define	sd_mhd_reset_notify_cb		ssd_mhd_reset_notify_cb
979 #define	sd_persistent_reservation_in_read_keys	\
980 					ssd_persistent_reservation_in_read_keys
981 #define	sd_persistent_reservation_in_read_resv	\
982 					ssd_persistent_reservation_in_read_resv
983 #define	sd_mhdioc_takeown		ssd_mhdioc_takeown
984 #define	sd_mhdioc_failfast		ssd_mhdioc_failfast
985 #define	sd_mhdioc_release		ssd_mhdioc_release
986 #define	sd_mhdioc_register_devid	ssd_mhdioc_register_devid
987 #define	sd_mhdioc_inkeys		ssd_mhdioc_inkeys
988 #define	sd_mhdioc_inresv		ssd_mhdioc_inresv
989 #define	sr_change_blkmode		ssr_change_blkmode
990 #define	sr_change_speed			ssr_change_speed
991 #define	sr_atapi_change_speed		ssr_atapi_change_speed
992 #define	sr_pause_resume			ssr_pause_resume
993 #define	sr_play_msf			ssr_play_msf
994 #define	sr_play_trkind			ssr_play_trkind
995 #define	sr_read_all_subcodes		ssr_read_all_subcodes
996 #define	sr_read_subchannel		ssr_read_subchannel
997 #define	sr_read_tocentry		ssr_read_tocentry
998 #define	sr_read_tochdr			ssr_read_tochdr
999 #define	sr_read_cdda			ssr_read_cdda
1000 #define	sr_read_cdxa			ssr_read_cdxa
1001 #define	sr_read_mode1			ssr_read_mode1
1002 #define	sr_read_mode2			ssr_read_mode2
1003 #define	sr_read_cd_mode2		ssr_read_cd_mode2
1004 #define	sr_sector_mode			ssr_sector_mode
1005 #define	sr_eject			ssr_eject
1006 #define	sr_ejected			ssr_ejected
1007 #define	sr_check_wp			ssr_check_wp
1008 #define	sd_check_media			ssd_check_media
1009 #define	sd_media_watch_cb		ssd_media_watch_cb
1010 #define	sd_delayed_cv_broadcast		ssd_delayed_cv_broadcast
1011 #define	sr_volume_ctrl			ssr_volume_ctrl
1012 #define	sr_read_sony_session_offset	ssr_read_sony_session_offset
1013 #define	sd_log_page_supported		ssd_log_page_supported
1014 #define	sd_check_for_writable_cd	ssd_check_for_writable_cd
1015 #define	sd_wm_cache_constructor		ssd_wm_cache_constructor
1016 #define	sd_wm_cache_destructor		ssd_wm_cache_destructor
1017 #define	sd_range_lock			ssd_range_lock
1018 #define	sd_get_range			ssd_get_range
1019 #define	sd_free_inlist_wmap		ssd_free_inlist_wmap
1020 #define	sd_range_unlock			ssd_range_unlock
1021 #define	sd_read_modify_write_task	ssd_read_modify_write_task
1022 #define	sddump_do_read_of_rmw		ssddump_do_read_of_rmw
1023 
1024 #define	sd_iostart_chain		ssd_iostart_chain
1025 #define	sd_iodone_chain			ssd_iodone_chain
1026 #define	sd_initpkt_map			ssd_initpkt_map
1027 #define	sd_destroypkt_map		ssd_destroypkt_map
1028 #define	sd_chain_type_map		ssd_chain_type_map
1029 #define	sd_chain_index_map		ssd_chain_index_map
1030 
1031 #define	sd_failfast_flushctl		ssd_failfast_flushctl
1032 #define	sd_failfast_flushq		ssd_failfast_flushq
1033 #define	sd_failfast_flushq_callback	ssd_failfast_flushq_callback
1034 
1035 #define	sd_is_lsi			ssd_is_lsi
1036 
1037 #endif	/* #if (defined(__fibre)) */
1038 
1039 
1040 int _init(void);
1041 int _fini(void);
1042 int _info(struct modinfo *modinfop);
1043 
1044 /*PRINTFLIKE3*/
1045 static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1046 /*PRINTFLIKE3*/
1047 static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1048 /*PRINTFLIKE3*/
1049 static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1050 
1051 static int sdprobe(dev_info_t *devi);
1052 static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
1053     void **result);
1054 static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
1055     int mod_flags, char *name, caddr_t valuep, int *lengthp);
1056 
1057 /*
1058  * Smart probe for parallel scsi
1059  */
1060 static void sd_scsi_probe_cache_init(void);
1061 static void sd_scsi_probe_cache_fini(void);
1062 static void sd_scsi_clear_probe_cache(void);
1063 static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
1064 
1065 static int	sd_spin_up_unit(struct sd_lun *un);
1066 #ifdef _LP64
1067 static void	sd_enable_descr_sense(struct sd_lun *un);
1068 #endif /* _LP64 */
1069 static void	sd_set_mmc_caps(struct sd_lun *un);
1070 
1071 static void sd_read_unit_properties(struct sd_lun *un);
1072 static int  sd_process_sdconf_file(struct sd_lun *un);
1073 static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
1074     int *data_list, sd_tunables *values);
1075 static void sd_process_sdconf_table(struct sd_lun *un);
1076 static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
1077 static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
1078 static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
1079 	int list_len, char *dataname_ptr);
1080 static void sd_set_vers1_properties(struct sd_lun *un, int flags,
1081     sd_tunables *prop_list);
1082 static int  sd_validate_geometry(struct sd_lun *un, int path_flag);
1083 
1084 #if defined(_SUNOS_VTOC_16)
1085 static void sd_convert_geometry(uint64_t capacity, struct dk_geom *un_g);
1086 #endif
1087 
1088 static void sd_resync_geom_caches(struct sd_lun *un, int capacity, int lbasize,
1089 	int path_flag);
1090 static int  sd_read_fdisk(struct sd_lun *un, uint_t capacity, int lbasize,
1091 	int path_flag);
1092 static void sd_get_physical_geometry(struct sd_lun *un,
1093 	struct geom_cache *pgeom_p, int capacity, int lbasize, int path_flag);
1094 static void sd_get_virtual_geometry(struct sd_lun *un, int capacity,
1095 	int lbasize);
1096 static int  sd_uselabel(struct sd_lun *un, struct dk_label *l, int path_flag);
1097 static void sd_swap_efi_gpt(efi_gpt_t *);
1098 static void sd_swap_efi_gpe(int nparts, efi_gpe_t *);
1099 static int sd_validate_efi(efi_gpt_t *);
1100 static int sd_use_efi(struct sd_lun *, int);
1101 static void sd_build_default_label(struct sd_lun *un);
1102 
1103 #if defined(_FIRMWARE_NEEDS_FDISK)
1104 static int  sd_has_max_chs_vals(struct ipart *fdp);
1105 #endif
1106 static void sd_inq_fill(char *p, int l, char *s);
1107 
1108 
1109 static void sd_register_devid(struct sd_lun *un, dev_info_t *devi,
1110     int reservation_flag);
1111 static daddr_t  sd_get_devid_block(struct sd_lun *un);
1112 static int  sd_get_devid(struct sd_lun *un);
1113 static int  sd_get_serialnum(struct sd_lun *un, uchar_t *wwn, int *len);
1114 static ddi_devid_t sd_create_devid(struct sd_lun *un);
1115 static int  sd_write_deviceid(struct sd_lun *un);
1116 static int  sd_get_devid_page(struct sd_lun *un, uchar_t *wwn, int *len);
1117 static int  sd_check_vpd_page_support(struct sd_lun *un);
1118 
1119 static void sd_setup_pm(struct sd_lun *un, dev_info_t *devi);
1120 static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
1121 
1122 static int  sd_ddi_suspend(dev_info_t *devi);
1123 static int  sd_ddi_pm_suspend(struct sd_lun *un);
1124 static int  sd_ddi_resume(dev_info_t *devi);
1125 static int  sd_ddi_pm_resume(struct sd_lun *un);
1126 static int  sdpower(dev_info_t *devi, int component, int level);
1127 
1128 static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
1129 static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
1130 static int  sd_unit_attach(dev_info_t *devi);
1131 static int  sd_unit_detach(dev_info_t *devi);
1132 
1133 static int  sd_create_minor_nodes(struct sd_lun *un, dev_info_t *devi);
1134 static void sd_create_errstats(struct sd_lun *un, int instance);
1135 static void sd_set_errstats(struct sd_lun *un);
1136 static void sd_set_pstats(struct sd_lun *un);
1137 
1138 static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
1139 static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
1140 static int  sd_send_polled_RQS(struct sd_lun *un);
1141 static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
1142 
1143 #if (defined(__fibre))
1144 /*
1145  * Event callbacks (photon)
1146  */
1147 static void sd_init_event_callbacks(struct sd_lun *un);
1148 static void  sd_event_callback(dev_info_t *, ddi_eventcookie_t, void *, void *);
1149 #endif
1150 
1151 
1152 static int   sd_disable_caching(struct sd_lun *un);
1153 static int   sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled);
1154 static dev_t sd_make_device(dev_info_t *devi);
1155 
1156 static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
1157 	uint64_t capacity);
1158 
1159 /*
1160  * Driver entry point functions.
1161  */
1162 static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
1163 static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
1164 static int  sd_ready_and_valid(struct sd_lun *un);
1165 
1166 static void sdmin(struct buf *bp);
1167 static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
1168 static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
1169 static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1170 static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1171 
1172 static int sdstrategy(struct buf *bp);
1173 static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
1174 
1175 /*
1176  * Function prototypes for layering functions in the iostart chain.
1177  */
1178 static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
1179 	struct buf *bp);
1180 static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
1181 	struct buf *bp);
1182 static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
1183 static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
1184 	struct buf *bp);
1185 static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
1186 static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
1187 
1188 /*
1189  * Function prototypes for layering functions in the iodone chain.
1190  */
1191 static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
1192 static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
1193 static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
1194 	struct buf *bp);
1195 static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
1196 	struct buf *bp);
1197 static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
1198 static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
1199 	struct buf *bp);
1200 static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
1201 
1202 /*
1203  * Prototypes for functions to support buf(9S) based IO.
1204  */
1205 static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
1206 static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
1207 static void sd_destroypkt_for_buf(struct buf *);
1208 static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
1209 	struct buf *bp, int flags,
1210 	int (*callback)(caddr_t), caddr_t callback_arg,
1211 	diskaddr_t lba, uint32_t blockcount);
1212 #if defined(__i386) || defined(__amd64)
1213 static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
1214 	struct buf *bp, diskaddr_t lba, uint32_t blockcount);
1215 #endif /* defined(__i386) || defined(__amd64) */
1216 
1217 /*
1218  * Prototypes for functions to support USCSI IO.
1219  */
1220 static int sd_uscsi_strategy(struct buf *bp);
1221 static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
1222 static void sd_destroypkt_for_uscsi(struct buf *);
1223 
1224 static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
1225 	uchar_t chain_type, void *pktinfop);
1226 
1227 static int  sd_pm_entry(struct sd_lun *un);
1228 static void sd_pm_exit(struct sd_lun *un);
1229 
1230 static void sd_pm_idletimeout_handler(void *arg);
1231 
1232 /*
1233  * sd_core internal functions (used at the sd_core_io layer).
1234  */
1235 static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
1236 static void sdintr(struct scsi_pkt *pktp);
1237 static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
1238 
1239 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd,
1240 	enum uio_seg cdbspace, enum uio_seg dataspace, enum uio_seg rqbufspace,
1241 	int path_flag);
1242 
1243 static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
1244 	daddr_t blkno, int (*func)(struct buf *));
1245 static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
1246 	uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
1247 static void sd_bioclone_free(struct buf *bp);
1248 static void sd_shadow_buf_free(struct buf *bp);
1249 
1250 static void sd_print_transport_rejected_message(struct sd_lun *un,
1251 	struct sd_xbuf *xp, int code);
1252 
1253 static void sd_retry_command(struct sd_lun *un, struct buf *bp,
1254 	int retry_check_flag,
1255 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp,
1256 		int c),
1257 	void *user_arg, int failure_code,  clock_t retry_delay,
1258 	void (*statp)(kstat_io_t *));
1259 
1260 static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
1261 	clock_t retry_delay, void (*statp)(kstat_io_t *));
1262 
1263 static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
1264 	struct scsi_pkt *pktp);
1265 static void sd_start_retry_command(void *arg);
1266 static void sd_start_direct_priority_command(void *arg);
1267 static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
1268 	int errcode);
1269 static void sd_return_failed_command_no_restart(struct sd_lun *un,
1270 	struct buf *bp, int errcode);
1271 static void sd_return_command(struct sd_lun *un, struct buf *bp);
1272 static void sd_sync_with_callback(struct sd_lun *un);
1273 static int sdrunout(caddr_t arg);
1274 
1275 static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
1276 static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
1277 
1278 static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
1279 static void sd_restore_throttle(void *arg);
1280 
1281 static void sd_init_cdb_limits(struct sd_lun *un);
1282 
1283 static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
1284 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1285 
1286 /*
1287  * Error handling functions
1288  */
1289 static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
1290 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1291 static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
1292 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1293 static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
1294 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1295 static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
1296 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1297 
1298 static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
1299 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1300 static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1301 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1302 static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1303 	struct sd_xbuf *xp);
1304 static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1305 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1306 
1307 static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1308 	void *arg, int code);
1309 static diskaddr_t sd_extract_sense_info_descr(
1310 	struct scsi_descr_sense_hdr *sdsp);
1311 
1312 static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1313 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1314 static void sd_sense_key_recoverable_error(struct sd_lun *un,
1315 	uint8_t asc,
1316 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1317 static void sd_sense_key_not_ready(struct sd_lun *un,
1318 	uint8_t asc, uint8_t ascq,
1319 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1320 static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1321 	int sense_key, uint8_t asc,
1322 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1323 static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1324 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1325 static void sd_sense_key_unit_attention(struct sd_lun *un,
1326 	uint8_t asc,
1327 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1328 static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1329 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1330 static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1331 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1332 static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1333 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1334 static void sd_sense_key_default(struct sd_lun *un,
1335 	int sense_key,
1336 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1337 
1338 static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1339 	void *arg, int flag);
1340 
1341 static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1342 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1343 static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1344 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1345 static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1346 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1347 static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1348 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1349 static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1350 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1351 static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1352 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1353 static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1354 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1355 static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1356 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1357 
1358 static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1359 
1360 static void sd_start_stop_unit_callback(void *arg);
1361 static void sd_start_stop_unit_task(void *arg);
1362 
1363 static void sd_taskq_create(void);
1364 static void sd_taskq_delete(void);
1365 static void sd_media_change_task(void *arg);
1366 
1367 static int sd_handle_mchange(struct sd_lun *un);
1368 static int sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag);
1369 static int sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp,
1370 	uint32_t *lbap, int path_flag);
1371 static int sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
1372 	uint32_t *lbap, int path_flag);
1373 static int sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag,
1374 	int path_flag);
1375 static int sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr,
1376 	size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1377 static int sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag);
1378 static int sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un,
1379 	uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1380 static int sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un,
1381 	uchar_t usr_cmd, uchar_t *usr_bufp);
1382 static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1383 	struct dk_callback *dkc);
1384 static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1385 static int sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un,
1386 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1387 	uchar_t *bufaddr, uint_t buflen);
1388 static int sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
1389 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1390 	uchar_t *bufaddr, uint_t buflen, char feature);
1391 static int sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize,
1392 	uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1393 static int sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize,
1394 	uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1395 static int sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
1396 	size_t buflen, daddr_t start_block, int path_flag);
1397 #define	sd_send_scsi_READ(un, bufaddr, buflen, start_block, path_flag)	\
1398 	sd_send_scsi_RDWR(un, SCMD_READ, bufaddr, buflen, start_block, \
1399 	path_flag)
1400 #define	sd_send_scsi_WRITE(un, bufaddr, buflen, start_block, path_flag)	\
1401 	sd_send_scsi_RDWR(un, SCMD_WRITE, bufaddr, buflen, start_block,\
1402 	path_flag)
1403 
1404 static int sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr,
1405 	uint16_t buflen, uchar_t page_code, uchar_t page_control,
1406 	uint16_t param_ptr, int path_flag);
1407 
1408 static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1409 static void sd_free_rqs(struct sd_lun *un);
1410 
1411 static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1412 	uchar_t *data, int len, int fmt);
1413 
1414 /*
1415  * Disk Ioctl Function Prototypes
1416  */
1417 static int sd_uscsi_ioctl(dev_t dev, caddr_t arg, int flag);
1418 static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1419 static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1420 static int sd_dkio_get_geometry(dev_t dev, caddr_t arg, int flag,
1421 	int geom_validated);
1422 static int sd_dkio_set_geometry(dev_t dev, caddr_t arg, int flag);
1423 static int sd_dkio_get_partition(dev_t dev, caddr_t arg, int flag,
1424 	int geom_validated);
1425 static int sd_dkio_set_partition(dev_t dev, caddr_t arg, int flag);
1426 static int sd_dkio_get_vtoc(dev_t dev, caddr_t arg, int flag,
1427 	int geom_validated);
1428 static int sd_dkio_get_efi(dev_t dev, caddr_t arg, int flag);
1429 static int sd_dkio_partition(dev_t dev, caddr_t arg, int flag);
1430 static void sd_build_user_vtoc(struct sd_lun *un, struct vtoc *user_vtoc);
1431 static int sd_dkio_set_vtoc(dev_t dev, caddr_t arg, int flag);
1432 static int sd_dkio_set_efi(dev_t dev, caddr_t arg, int flag);
1433 static int sd_build_label_vtoc(struct sd_lun *un, struct vtoc *user_vtoc);
1434 static int sd_write_label(dev_t dev);
1435 static int sd_set_vtoc(struct sd_lun *un, struct dk_label *dkl);
1436 static void sd_clear_vtoc(struct sd_lun *un);
1437 static void sd_clear_efi(struct sd_lun *un);
1438 static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1439 static int sd_dkio_get_mboot(dev_t dev, caddr_t arg, int flag);
1440 static int sd_dkio_set_mboot(dev_t dev, caddr_t arg, int flag);
1441 static void sd_setup_default_geometry(struct sd_lun *un);
1442 #if defined(__i386) || defined(__amd64)
1443 static int sd_update_fdisk_and_vtoc(struct sd_lun *un);
1444 #endif
1445 
1446 /*
1447  * Multi-host Ioctl Prototypes
1448  */
1449 static int sd_check_mhd(dev_t dev, int interval);
1450 static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1451 static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1452 static char *sd_sname(uchar_t status);
1453 static void sd_mhd_resvd_recover(void *arg);
1454 static void sd_resv_reclaim_thread();
1455 static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1456 static int sd_reserve_release(dev_t dev, int cmd);
1457 static void sd_rmv_resv_reclaim_req(dev_t dev);
1458 static void sd_mhd_reset_notify_cb(caddr_t arg);
1459 static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1460 	mhioc_inkeys_t *usrp, int flag);
1461 static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1462 	mhioc_inresvs_t *usrp, int flag);
1463 static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1464 static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1465 static int sd_mhdioc_release(dev_t dev);
1466 static int sd_mhdioc_register_devid(dev_t dev);
1467 static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1468 static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1469 
1470 /*
1471  * SCSI removable prototypes
1472  */
1473 static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1474 static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1475 static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1476 static int sr_pause_resume(dev_t dev, int mode);
1477 static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1478 static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1479 static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1480 static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1481 static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1482 static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1483 static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1484 static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1485 static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1486 static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1487 static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1488 static int sr_sector_mode(dev_t dev, uint32_t blksize);
1489 static int sr_eject(dev_t dev);
1490 static void sr_ejected(register struct sd_lun *un);
1491 static int sr_check_wp(dev_t dev);
1492 static int sd_check_media(dev_t dev, enum dkio_state state);
1493 static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1494 static void sd_delayed_cv_broadcast(void *arg);
1495 static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1496 static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1497 
1498 static int sd_log_page_supported(struct sd_lun *un, int log_page);
1499 
1500 /*
1501  * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1502  */
1503 static void sd_check_for_writable_cd(struct sd_lun *un);
1504 static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1505 static void sd_wm_cache_destructor(void *wm, void *un);
1506 static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1507 	daddr_t endb, ushort_t typ);
1508 static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1509 	daddr_t endb);
1510 static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1511 static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1512 static void sd_read_modify_write_task(void * arg);
1513 static int
1514 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1515 	struct buf **bpp);
1516 
1517 
1518 /*
1519  * Function prototypes for failfast support.
1520  */
1521 static void sd_failfast_flushq(struct sd_lun *un);
1522 static int sd_failfast_flushq_callback(struct buf *bp);
1523 
1524 /*
1525  * Function prototypes to check for lsi devices
1526  */
1527 static void sd_is_lsi(struct sd_lun *un);
1528 
1529 /*
1530  * Function prototypes for x86 support
1531  */
1532 #if defined(__i386) || defined(__amd64)
1533 static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1534 		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1535 #endif
1536 
1537 /*
1538  * Constants for failfast support:
1539  *
1540  * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1541  * failfast processing being performed.
1542  *
1543  * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1544  * failfast processing on all bufs with B_FAILFAST set.
1545  */
1546 
1547 #define	SD_FAILFAST_INACTIVE		0
1548 #define	SD_FAILFAST_ACTIVE		1
1549 
1550 /*
1551  * Bitmask to control behavior of buf(9S) flushes when a transition to
1552  * the failfast state occurs. Optional bits include:
1553  *
1554  * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1555  * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1556  * be flushed.
1557  *
1558  * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1559  * driver, in addition to the regular wait queue. This includes the xbuf
1560  * queues. When clear, only the driver's wait queue will be flushed.
1561  */
1562 #define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1563 #define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1564 
1565 /*
1566  * The default behavior is to only flush bufs that have B_FAILFAST set, but
1567  * to flush all queues within the driver.
1568  */
1569 static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1570 
1571 
1572 /*
1573  * SD Testing Fault Injection
1574  */
1575 #ifdef SD_FAULT_INJECTION
1576 static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1577 static void sd_faultinjection(struct scsi_pkt *pktp);
1578 static void sd_injection_log(char *buf, struct sd_lun *un);
1579 #endif
1580 
1581 /*
1582  * Device driver ops vector
1583  */
1584 static struct cb_ops sd_cb_ops = {
1585 	sdopen,			/* open */
1586 	sdclose,		/* close */
1587 	sdstrategy,		/* strategy */
1588 	nodev,			/* print */
1589 	sddump,			/* dump */
1590 	sdread,			/* read */
1591 	sdwrite,		/* write */
1592 	sdioctl,		/* ioctl */
1593 	nodev,			/* devmap */
1594 	nodev,			/* mmap */
1595 	nodev,			/* segmap */
1596 	nochpoll,		/* poll */
1597 	sd_prop_op,		/* cb_prop_op */
1598 	0,			/* streamtab  */
1599 	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1600 	CB_REV,			/* cb_rev */
1601 	sdaread, 		/* async I/O read entry point */
1602 	sdawrite		/* async I/O write entry point */
1603 };
1604 
1605 static struct dev_ops sd_ops = {
1606 	DEVO_REV,		/* devo_rev, */
1607 	0,			/* refcnt  */
1608 	sdinfo,			/* info */
1609 	nulldev,		/* identify */
1610 	sdprobe,		/* probe */
1611 	sdattach,		/* attach */
1612 	sddetach,		/* detach */
1613 	nodev,			/* reset */
1614 	&sd_cb_ops,		/* driver operations */
1615 	NULL,			/* bus operations */
1616 	sdpower			/* power */
1617 };
1618 
1619 
1620 /*
1621  * This is the loadable module wrapper.
1622  */
1623 #include <sys/modctl.h>
1624 
1625 static struct modldrv modldrv = {
1626 	&mod_driverops,		/* Type of module. This one is a driver */
1627 	SD_MODULE_NAME,		/* Module name. */
1628 	&sd_ops			/* driver ops */
1629 };
1630 
1631 
1632 static struct modlinkage modlinkage = {
1633 	MODREV_1,
1634 	&modldrv,
1635 	NULL
1636 };
1637 
1638 
1639 static struct scsi_asq_key_strings sd_additional_codes[] = {
1640 	0x81, 0, "Logical Unit is Reserved",
1641 	0x85, 0, "Audio Address Not Valid",
1642 	0xb6, 0, "Media Load Mechanism Failed",
1643 	0xB9, 0, "Audio Play Operation Aborted",
1644 	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1645 	0x53, 2, "Medium removal prevented",
1646 	0x6f, 0, "Authentication failed during key exchange",
1647 	0x6f, 1, "Key not present",
1648 	0x6f, 2, "Key not established",
1649 	0x6f, 3, "Read without proper authentication",
1650 	0x6f, 4, "Mismatched region to this logical unit",
1651 	0x6f, 5, "Region reset count error",
1652 	0xffff, 0x0, NULL
1653 };
1654 
1655 
1656 /*
1657  * Struct for passing printing information for sense data messages
1658  */
1659 struct sd_sense_info {
1660 	int	ssi_severity;
1661 	int	ssi_pfa_flag;
1662 };
1663 
1664 /*
1665  * Table of function pointers for iostart-side routines. Seperate "chains"
1666  * of layered function calls are formed by placing the function pointers
1667  * sequentially in the desired order. Functions are called according to an
1668  * incrementing table index ordering. The last function in each chain must
1669  * be sd_core_iostart(). The corresponding iodone-side routines are expected
1670  * in the sd_iodone_chain[] array.
1671  *
1672  * Note: It may seem more natural to organize both the iostart and iodone
1673  * functions together, into an array of structures (or some similar
1674  * organization) with a common index, rather than two seperate arrays which
1675  * must be maintained in synchronization. The purpose of this division is
1676  * to achiece improved performance: individual arrays allows for more
1677  * effective cache line utilization on certain platforms.
1678  */
1679 
1680 typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1681 
1682 
1683 static sd_chain_t sd_iostart_chain[] = {
1684 
1685 	/* Chain for buf IO for disk drive targets (PM enabled) */
1686 	sd_mapblockaddr_iostart,	/* Index: 0 */
1687 	sd_pm_iostart,			/* Index: 1 */
1688 	sd_core_iostart,		/* Index: 2 */
1689 
1690 	/* Chain for buf IO for disk drive targets (PM disabled) */
1691 	sd_mapblockaddr_iostart,	/* Index: 3 */
1692 	sd_core_iostart,		/* Index: 4 */
1693 
1694 	/* Chain for buf IO for removable-media targets (PM enabled) */
1695 	sd_mapblockaddr_iostart,	/* Index: 5 */
1696 	sd_mapblocksize_iostart,	/* Index: 6 */
1697 	sd_pm_iostart,			/* Index: 7 */
1698 	sd_core_iostart,		/* Index: 8 */
1699 
1700 	/* Chain for buf IO for removable-media targets (PM disabled) */
1701 	sd_mapblockaddr_iostart,	/* Index: 9 */
1702 	sd_mapblocksize_iostart,	/* Index: 10 */
1703 	sd_core_iostart,		/* Index: 11 */
1704 
1705 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1706 	sd_mapblockaddr_iostart,	/* Index: 12 */
1707 	sd_checksum_iostart,		/* Index: 13 */
1708 	sd_pm_iostart,			/* Index: 14 */
1709 	sd_core_iostart,		/* Index: 15 */
1710 
1711 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1712 	sd_mapblockaddr_iostart,	/* Index: 16 */
1713 	sd_checksum_iostart,		/* Index: 17 */
1714 	sd_core_iostart,		/* Index: 18 */
1715 
1716 	/* Chain for USCSI commands (all targets) */
1717 	sd_pm_iostart,			/* Index: 19 */
1718 	sd_core_iostart,		/* Index: 20 */
1719 
1720 	/* Chain for checksumming USCSI commands (all targets) */
1721 	sd_checksum_uscsi_iostart,	/* Index: 21 */
1722 	sd_pm_iostart,			/* Index: 22 */
1723 	sd_core_iostart,		/* Index: 23 */
1724 
1725 	/* Chain for "direct" USCSI commands (all targets) */
1726 	sd_core_iostart,		/* Index: 24 */
1727 
1728 	/* Chain for "direct priority" USCSI commands (all targets) */
1729 	sd_core_iostart,		/* Index: 25 */
1730 };
1731 
1732 /*
1733  * Macros to locate the first function of each iostart chain in the
1734  * sd_iostart_chain[] array. These are located by the index in the array.
1735  */
1736 #define	SD_CHAIN_DISK_IOSTART			0
1737 #define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1738 #define	SD_CHAIN_RMMEDIA_IOSTART		5
1739 #define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1740 #define	SD_CHAIN_CHKSUM_IOSTART			12
1741 #define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1742 #define	SD_CHAIN_USCSI_CMD_IOSTART		19
1743 #define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1744 #define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1745 #define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1746 
1747 
1748 /*
1749  * Table of function pointers for the iodone-side routines for the driver-
1750  * internal layering mechanism.  The calling sequence for iodone routines
1751  * uses a decrementing table index, so the last routine called in a chain
1752  * must be at the lowest array index location for that chain.  The last
1753  * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1754  * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1755  * of the functions in an iodone side chain must correspond to the ordering
1756  * of the iostart routines for that chain.  Note that there is no iodone
1757  * side routine that corresponds to sd_core_iostart(), so there is no
1758  * entry in the table for this.
1759  */
1760 
1761 static sd_chain_t sd_iodone_chain[] = {
1762 
1763 	/* Chain for buf IO for disk drive targets (PM enabled) */
1764 	sd_buf_iodone,			/* Index: 0 */
1765 	sd_mapblockaddr_iodone,		/* Index: 1 */
1766 	sd_pm_iodone,			/* Index: 2 */
1767 
1768 	/* Chain for buf IO for disk drive targets (PM disabled) */
1769 	sd_buf_iodone,			/* Index: 3 */
1770 	sd_mapblockaddr_iodone,		/* Index: 4 */
1771 
1772 	/* Chain for buf IO for removable-media targets (PM enabled) */
1773 	sd_buf_iodone,			/* Index: 5 */
1774 	sd_mapblockaddr_iodone,		/* Index: 6 */
1775 	sd_mapblocksize_iodone,		/* Index: 7 */
1776 	sd_pm_iodone,			/* Index: 8 */
1777 
1778 	/* Chain for buf IO for removable-media targets (PM disabled) */
1779 	sd_buf_iodone,			/* Index: 9 */
1780 	sd_mapblockaddr_iodone,		/* Index: 10 */
1781 	sd_mapblocksize_iodone,		/* Index: 11 */
1782 
1783 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1784 	sd_buf_iodone,			/* Index: 12 */
1785 	sd_mapblockaddr_iodone,		/* Index: 13 */
1786 	sd_checksum_iodone,		/* Index: 14 */
1787 	sd_pm_iodone,			/* Index: 15 */
1788 
1789 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1790 	sd_buf_iodone,			/* Index: 16 */
1791 	sd_mapblockaddr_iodone,		/* Index: 17 */
1792 	sd_checksum_iodone,		/* Index: 18 */
1793 
1794 	/* Chain for USCSI commands (non-checksum targets) */
1795 	sd_uscsi_iodone,		/* Index: 19 */
1796 	sd_pm_iodone,			/* Index: 20 */
1797 
1798 	/* Chain for USCSI commands (checksum targets) */
1799 	sd_uscsi_iodone,		/* Index: 21 */
1800 	sd_checksum_uscsi_iodone,	/* Index: 22 */
1801 	sd_pm_iodone,			/* Index: 22 */
1802 
1803 	/* Chain for "direct" USCSI commands (all targets) */
1804 	sd_uscsi_iodone,		/* Index: 24 */
1805 
1806 	/* Chain for "direct priority" USCSI commands (all targets) */
1807 	sd_uscsi_iodone,		/* Index: 25 */
1808 };
1809 
1810 
1811 /*
1812  * Macros to locate the "first" function in the sd_iodone_chain[] array for
1813  * each iodone-side chain. These are located by the array index, but as the
1814  * iodone side functions are called in a decrementing-index order, the
1815  * highest index number in each chain must be specified (as these correspond
1816  * to the first function in the iodone chain that will be called by the core
1817  * at IO completion time).
1818  */
1819 
1820 #define	SD_CHAIN_DISK_IODONE			2
1821 #define	SD_CHAIN_DISK_IODONE_NO_PM		4
1822 #define	SD_CHAIN_RMMEDIA_IODONE			8
1823 #define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
1824 #define	SD_CHAIN_CHKSUM_IODONE			15
1825 #define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
1826 #define	SD_CHAIN_USCSI_CMD_IODONE		20
1827 #define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
1828 #define	SD_CHAIN_DIRECT_CMD_IODONE		24
1829 #define	SD_CHAIN_PRIORITY_CMD_IODONE		25
1830 
1831 
1832 
1833 
1834 /*
1835  * Array to map a layering chain index to the appropriate initpkt routine.
1836  * The redundant entries are present so that the index used for accessing
1837  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1838  * with this table as well.
1839  */
1840 typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
1841 
1842 static sd_initpkt_t	sd_initpkt_map[] = {
1843 
1844 	/* Chain for buf IO for disk drive targets (PM enabled) */
1845 	sd_initpkt_for_buf,		/* Index: 0 */
1846 	sd_initpkt_for_buf,		/* Index: 1 */
1847 	sd_initpkt_for_buf,		/* Index: 2 */
1848 
1849 	/* Chain for buf IO for disk drive targets (PM disabled) */
1850 	sd_initpkt_for_buf,		/* Index: 3 */
1851 	sd_initpkt_for_buf,		/* Index: 4 */
1852 
1853 	/* Chain for buf IO for removable-media targets (PM enabled) */
1854 	sd_initpkt_for_buf,		/* Index: 5 */
1855 	sd_initpkt_for_buf,		/* Index: 6 */
1856 	sd_initpkt_for_buf,		/* Index: 7 */
1857 	sd_initpkt_for_buf,		/* Index: 8 */
1858 
1859 	/* Chain for buf IO for removable-media targets (PM disabled) */
1860 	sd_initpkt_for_buf,		/* Index: 9 */
1861 	sd_initpkt_for_buf,		/* Index: 10 */
1862 	sd_initpkt_for_buf,		/* Index: 11 */
1863 
1864 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1865 	sd_initpkt_for_buf,		/* Index: 12 */
1866 	sd_initpkt_for_buf,		/* Index: 13 */
1867 	sd_initpkt_for_buf,		/* Index: 14 */
1868 	sd_initpkt_for_buf,		/* Index: 15 */
1869 
1870 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1871 	sd_initpkt_for_buf,		/* Index: 16 */
1872 	sd_initpkt_for_buf,		/* Index: 17 */
1873 	sd_initpkt_for_buf,		/* Index: 18 */
1874 
1875 	/* Chain for USCSI commands (non-checksum targets) */
1876 	sd_initpkt_for_uscsi,		/* Index: 19 */
1877 	sd_initpkt_for_uscsi,		/* Index: 20 */
1878 
1879 	/* Chain for USCSI commands (checksum targets) */
1880 	sd_initpkt_for_uscsi,		/* Index: 21 */
1881 	sd_initpkt_for_uscsi,		/* Index: 22 */
1882 	sd_initpkt_for_uscsi,		/* Index: 22 */
1883 
1884 	/* Chain for "direct" USCSI commands (all targets) */
1885 	sd_initpkt_for_uscsi,		/* Index: 24 */
1886 
1887 	/* Chain for "direct priority" USCSI commands (all targets) */
1888 	sd_initpkt_for_uscsi,		/* Index: 25 */
1889 
1890 };
1891 
1892 
1893 /*
1894  * Array to map a layering chain index to the appropriate destroypktpkt routine.
1895  * The redundant entries are present so that the index used for accessing
1896  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1897  * with this table as well.
1898  */
1899 typedef void (*sd_destroypkt_t)(struct buf *);
1900 
1901 static sd_destroypkt_t	sd_destroypkt_map[] = {
1902 
1903 	/* Chain for buf IO for disk drive targets (PM enabled) */
1904 	sd_destroypkt_for_buf,		/* Index: 0 */
1905 	sd_destroypkt_for_buf,		/* Index: 1 */
1906 	sd_destroypkt_for_buf,		/* Index: 2 */
1907 
1908 	/* Chain for buf IO for disk drive targets (PM disabled) */
1909 	sd_destroypkt_for_buf,		/* Index: 3 */
1910 	sd_destroypkt_for_buf,		/* Index: 4 */
1911 
1912 	/* Chain for buf IO for removable-media targets (PM enabled) */
1913 	sd_destroypkt_for_buf,		/* Index: 5 */
1914 	sd_destroypkt_for_buf,		/* Index: 6 */
1915 	sd_destroypkt_for_buf,		/* Index: 7 */
1916 	sd_destroypkt_for_buf,		/* Index: 8 */
1917 
1918 	/* Chain for buf IO for removable-media targets (PM disabled) */
1919 	sd_destroypkt_for_buf,		/* Index: 9 */
1920 	sd_destroypkt_for_buf,		/* Index: 10 */
1921 	sd_destroypkt_for_buf,		/* Index: 11 */
1922 
1923 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1924 	sd_destroypkt_for_buf,		/* Index: 12 */
1925 	sd_destroypkt_for_buf,		/* Index: 13 */
1926 	sd_destroypkt_for_buf,		/* Index: 14 */
1927 	sd_destroypkt_for_buf,		/* Index: 15 */
1928 
1929 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1930 	sd_destroypkt_for_buf,		/* Index: 16 */
1931 	sd_destroypkt_for_buf,		/* Index: 17 */
1932 	sd_destroypkt_for_buf,		/* Index: 18 */
1933 
1934 	/* Chain for USCSI commands (non-checksum targets) */
1935 	sd_destroypkt_for_uscsi,	/* Index: 19 */
1936 	sd_destroypkt_for_uscsi,	/* Index: 20 */
1937 
1938 	/* Chain for USCSI commands (checksum targets) */
1939 	sd_destroypkt_for_uscsi,	/* Index: 21 */
1940 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1941 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1942 
1943 	/* Chain for "direct" USCSI commands (all targets) */
1944 	sd_destroypkt_for_uscsi,	/* Index: 24 */
1945 
1946 	/* Chain for "direct priority" USCSI commands (all targets) */
1947 	sd_destroypkt_for_uscsi,	/* Index: 25 */
1948 
1949 };
1950 
1951 
1952 
1953 /*
1954  * Array to map a layering chain index to the appropriate chain "type".
1955  * The chain type indicates a specific property/usage of the chain.
1956  * The redundant entries are present so that the index used for accessing
1957  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1958  * with this table as well.
1959  */
1960 
1961 #define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
1962 #define	SD_CHAIN_BUFIO			1	/* regular buf IO */
1963 #define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
1964 #define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
1965 #define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
1966 						/* (for error recovery) */
1967 
1968 static int sd_chain_type_map[] = {
1969 
1970 	/* Chain for buf IO for disk drive targets (PM enabled) */
1971 	SD_CHAIN_BUFIO,			/* Index: 0 */
1972 	SD_CHAIN_BUFIO,			/* Index: 1 */
1973 	SD_CHAIN_BUFIO,			/* Index: 2 */
1974 
1975 	/* Chain for buf IO for disk drive targets (PM disabled) */
1976 	SD_CHAIN_BUFIO,			/* Index: 3 */
1977 	SD_CHAIN_BUFIO,			/* Index: 4 */
1978 
1979 	/* Chain for buf IO for removable-media targets (PM enabled) */
1980 	SD_CHAIN_BUFIO,			/* Index: 5 */
1981 	SD_CHAIN_BUFIO,			/* Index: 6 */
1982 	SD_CHAIN_BUFIO,			/* Index: 7 */
1983 	SD_CHAIN_BUFIO,			/* Index: 8 */
1984 
1985 	/* Chain for buf IO for removable-media targets (PM disabled) */
1986 	SD_CHAIN_BUFIO,			/* Index: 9 */
1987 	SD_CHAIN_BUFIO,			/* Index: 10 */
1988 	SD_CHAIN_BUFIO,			/* Index: 11 */
1989 
1990 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1991 	SD_CHAIN_BUFIO,			/* Index: 12 */
1992 	SD_CHAIN_BUFIO,			/* Index: 13 */
1993 	SD_CHAIN_BUFIO,			/* Index: 14 */
1994 	SD_CHAIN_BUFIO,			/* Index: 15 */
1995 
1996 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1997 	SD_CHAIN_BUFIO,			/* Index: 16 */
1998 	SD_CHAIN_BUFIO,			/* Index: 17 */
1999 	SD_CHAIN_BUFIO,			/* Index: 18 */
2000 
2001 	/* Chain for USCSI commands (non-checksum targets) */
2002 	SD_CHAIN_USCSI,			/* Index: 19 */
2003 	SD_CHAIN_USCSI,			/* Index: 20 */
2004 
2005 	/* Chain for USCSI commands (checksum targets) */
2006 	SD_CHAIN_USCSI,			/* Index: 21 */
2007 	SD_CHAIN_USCSI,			/* Index: 22 */
2008 	SD_CHAIN_USCSI,			/* Index: 22 */
2009 
2010 	/* Chain for "direct" USCSI commands (all targets) */
2011 	SD_CHAIN_DIRECT,		/* Index: 24 */
2012 
2013 	/* Chain for "direct priority" USCSI commands (all targets) */
2014 	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
2015 };
2016 
2017 
2018 /* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
2019 #define	SD_IS_BUFIO(xp)			\
2020 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
2021 
2022 /* Macro to return TRUE if the IO has come from the "direct priority" chain. */
2023 #define	SD_IS_DIRECT_PRIORITY(xp)	\
2024 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
2025 
2026 
2027 
2028 /*
2029  * Struct, array, and macros to map a specific chain to the appropriate
2030  * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
2031  *
2032  * The sd_chain_index_map[] array is used at attach time to set the various
2033  * un_xxx_chain type members of the sd_lun softstate to the specific layering
2034  * chain to be used with the instance. This allows different instances to use
2035  * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
2036  * and xb_chain_iodone index values in the sd_xbuf are initialized to these
2037  * values at sd_xbuf init time, this allows (1) layering chains may be changed
2038  * dynamically & without the use of locking; and (2) a layer may update the
2039  * xb_chain_io[start|done] member in a given xbuf with its current index value,
2040  * to allow for deferred processing of an IO within the same chain from a
2041  * different execution context.
2042  */
2043 
2044 struct sd_chain_index {
2045 	int	sci_iostart_index;
2046 	int	sci_iodone_index;
2047 };
2048 
2049 static struct sd_chain_index	sd_chain_index_map[] = {
2050 	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
2051 	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
2052 	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
2053 	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
2054 	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
2055 	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
2056 	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
2057 	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
2058 	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
2059 	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
2060 };
2061 
2062 
2063 /*
2064  * The following are indexes into the sd_chain_index_map[] array.
2065  */
2066 
2067 /* un->un_buf_chain_type must be set to one of these */
2068 #define	SD_CHAIN_INFO_DISK		0
2069 #define	SD_CHAIN_INFO_DISK_NO_PM	1
2070 #define	SD_CHAIN_INFO_RMMEDIA		2
2071 #define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
2072 #define	SD_CHAIN_INFO_CHKSUM		4
2073 #define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
2074 
2075 /* un->un_uscsi_chain_type must be set to one of these */
2076 #define	SD_CHAIN_INFO_USCSI_CMD		6
2077 /* USCSI with PM disabled is the same as DIRECT */
2078 #define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
2079 #define	SD_CHAIN_INFO_USCSI_CHKSUM	7
2080 
2081 /* un->un_direct_chain_type must be set to one of these */
2082 #define	SD_CHAIN_INFO_DIRECT_CMD	8
2083 
2084 /* un->un_priority_chain_type must be set to one of these */
2085 #define	SD_CHAIN_INFO_PRIORITY_CMD	9
2086 
2087 /* size for devid inquiries */
2088 #define	MAX_INQUIRY_SIZE		0xF0
2089 
2090 /*
2091  * Macros used by functions to pass a given buf(9S) struct along to the
2092  * next function in the layering chain for further processing.
2093  *
2094  * In the following macros, passing more than three arguments to the called
2095  * routines causes the optimizer for the SPARC compiler to stop doing tail
2096  * call elimination which results in significant performance degradation.
2097  */
2098 #define	SD_BEGIN_IOSTART(index, un, bp)	\
2099 	((*(sd_iostart_chain[index]))(index, un, bp))
2100 
2101 #define	SD_BEGIN_IODONE(index, un, bp)	\
2102 	((*(sd_iodone_chain[index]))(index, un, bp))
2103 
2104 #define	SD_NEXT_IOSTART(index, un, bp)				\
2105 	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
2106 
2107 #define	SD_NEXT_IODONE(index, un, bp)				\
2108 	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
2109 
2110 
2111 /*
2112  *    Function: _init
2113  *
2114  * Description: This is the driver _init(9E) entry point.
2115  *
2116  * Return Code: Returns the value from mod_install(9F) or
2117  *		ddi_soft_state_init(9F) as appropriate.
2118  *
2119  *     Context: Called when driver module loaded.
2120  */
2121 
2122 int
2123 _init(void)
2124 {
2125 	int	err;
2126 
2127 	/* establish driver name from module name */
2128 	sd_label = mod_modname(&modlinkage);
2129 
2130 	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
2131 		SD_MAXUNIT);
2132 
2133 	if (err != 0) {
2134 		return (err);
2135 	}
2136 
2137 	mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
2138 	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
2139 	mutex_init(&sd_label_mutex,  NULL, MUTEX_DRIVER, NULL);
2140 
2141 	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
2142 	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
2143 	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
2144 
2145 	/*
2146 	 * it's ok to init here even for fibre device
2147 	 */
2148 	sd_scsi_probe_cache_init();
2149 
2150 	/*
2151 	 * Creating taskq before mod_install ensures that all callers (threads)
2152 	 * that enter the module after a successfull mod_install encounter
2153 	 * a valid taskq.
2154 	 */
2155 	sd_taskq_create();
2156 
2157 	err = mod_install(&modlinkage);
2158 	if (err != 0) {
2159 		/* delete taskq if install fails */
2160 		sd_taskq_delete();
2161 
2162 		mutex_destroy(&sd_detach_mutex);
2163 		mutex_destroy(&sd_log_mutex);
2164 		mutex_destroy(&sd_label_mutex);
2165 
2166 		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2167 		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2168 		cv_destroy(&sd_tr.srq_inprocess_cv);
2169 
2170 		sd_scsi_probe_cache_fini();
2171 
2172 		ddi_soft_state_fini(&sd_state);
2173 		return (err);
2174 	}
2175 
2176 	return (err);
2177 }
2178 
2179 
2180 /*
2181  *    Function: _fini
2182  *
2183  * Description: This is the driver _fini(9E) entry point.
2184  *
2185  * Return Code: Returns the value from mod_remove(9F)
2186  *
2187  *     Context: Called when driver module is unloaded.
2188  */
2189 
2190 int
2191 _fini(void)
2192 {
2193 	int err;
2194 
2195 	if ((err = mod_remove(&modlinkage)) != 0) {
2196 		return (err);
2197 	}
2198 
2199 	sd_taskq_delete();
2200 
2201 	mutex_destroy(&sd_detach_mutex);
2202 	mutex_destroy(&sd_log_mutex);
2203 	mutex_destroy(&sd_label_mutex);
2204 	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2205 
2206 	sd_scsi_probe_cache_fini();
2207 
2208 	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2209 	cv_destroy(&sd_tr.srq_inprocess_cv);
2210 
2211 	ddi_soft_state_fini(&sd_state);
2212 
2213 	return (err);
2214 }
2215 
2216 
2217 /*
2218  *    Function: _info
2219  *
2220  * Description: This is the driver _info(9E) entry point.
2221  *
2222  *   Arguments: modinfop - pointer to the driver modinfo structure
2223  *
2224  * Return Code: Returns the value from mod_info(9F).
2225  *
2226  *     Context: Kernel thread context
2227  */
2228 
2229 int
2230 _info(struct modinfo *modinfop)
2231 {
2232 	return (mod_info(&modlinkage, modinfop));
2233 }
2234 
2235 
2236 /*
2237  * The following routines implement the driver message logging facility.
2238  * They provide component- and level- based debug output filtering.
2239  * Output may also be restricted to messages for a single instance by
2240  * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2241  * to NULL, then messages for all instances are printed.
2242  *
2243  * These routines have been cloned from each other due to the language
2244  * constraints of macros and variable argument list processing.
2245  */
2246 
2247 
2248 /*
2249  *    Function: sd_log_err
2250  *
2251  * Description: This routine is called by the SD_ERROR macro for debug
2252  *		logging of error conditions.
2253  *
2254  *   Arguments: comp - driver component being logged
2255  *		dev  - pointer to driver info structure
2256  *		fmt  - error string and format to be logged
2257  */
2258 
2259 static void
2260 sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2261 {
2262 	va_list		ap;
2263 	dev_info_t	*dev;
2264 
2265 	ASSERT(un != NULL);
2266 	dev = SD_DEVINFO(un);
2267 	ASSERT(dev != NULL);
2268 
2269 	/*
2270 	 * Filter messages based on the global component and level masks.
2271 	 * Also print if un matches the value of sd_debug_un, or if
2272 	 * sd_debug_un is set to NULL.
2273 	 */
2274 	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2275 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2276 		mutex_enter(&sd_log_mutex);
2277 		va_start(ap, fmt);
2278 		(void) vsprintf(sd_log_buf, fmt, ap);
2279 		va_end(ap);
2280 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2281 		mutex_exit(&sd_log_mutex);
2282 	}
2283 #ifdef SD_FAULT_INJECTION
2284 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2285 	if (un->sd_injection_mask & comp) {
2286 		mutex_enter(&sd_log_mutex);
2287 		va_start(ap, fmt);
2288 		(void) vsprintf(sd_log_buf, fmt, ap);
2289 		va_end(ap);
2290 		sd_injection_log(sd_log_buf, un);
2291 		mutex_exit(&sd_log_mutex);
2292 	}
2293 #endif
2294 }
2295 
2296 
2297 /*
2298  *    Function: sd_log_info
2299  *
2300  * Description: This routine is called by the SD_INFO macro for debug
2301  *		logging of general purpose informational conditions.
2302  *
2303  *   Arguments: comp - driver component being logged
2304  *		dev  - pointer to driver info structure
2305  *		fmt  - info string and format to be logged
2306  */
2307 
2308 static void
2309 sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2310 {
2311 	va_list		ap;
2312 	dev_info_t	*dev;
2313 
2314 	ASSERT(un != NULL);
2315 	dev = SD_DEVINFO(un);
2316 	ASSERT(dev != NULL);
2317 
2318 	/*
2319 	 * Filter messages based on the global component and level masks.
2320 	 * Also print if un matches the value of sd_debug_un, or if
2321 	 * sd_debug_un is set to NULL.
2322 	 */
2323 	if ((sd_component_mask & component) &&
2324 	    (sd_level_mask & SD_LOGMASK_INFO) &&
2325 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2326 		mutex_enter(&sd_log_mutex);
2327 		va_start(ap, fmt);
2328 		(void) vsprintf(sd_log_buf, fmt, ap);
2329 		va_end(ap);
2330 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2331 		mutex_exit(&sd_log_mutex);
2332 	}
2333 #ifdef SD_FAULT_INJECTION
2334 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2335 	if (un->sd_injection_mask & component) {
2336 		mutex_enter(&sd_log_mutex);
2337 		va_start(ap, fmt);
2338 		(void) vsprintf(sd_log_buf, fmt, ap);
2339 		va_end(ap);
2340 		sd_injection_log(sd_log_buf, un);
2341 		mutex_exit(&sd_log_mutex);
2342 	}
2343 #endif
2344 }
2345 
2346 
2347 /*
2348  *    Function: sd_log_trace
2349  *
2350  * Description: This routine is called by the SD_TRACE macro for debug
2351  *		logging of trace conditions (i.e. function entry/exit).
2352  *
2353  *   Arguments: comp - driver component being logged
2354  *		dev  - pointer to driver info structure
2355  *		fmt  - trace string and format to be logged
2356  */
2357 
2358 static void
2359 sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2360 {
2361 	va_list		ap;
2362 	dev_info_t	*dev;
2363 
2364 	ASSERT(un != NULL);
2365 	dev = SD_DEVINFO(un);
2366 	ASSERT(dev != NULL);
2367 
2368 	/*
2369 	 * Filter messages based on the global component and level masks.
2370 	 * Also print if un matches the value of sd_debug_un, or if
2371 	 * sd_debug_un is set to NULL.
2372 	 */
2373 	if ((sd_component_mask & component) &&
2374 	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2375 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2376 		mutex_enter(&sd_log_mutex);
2377 		va_start(ap, fmt);
2378 		(void) vsprintf(sd_log_buf, fmt, ap);
2379 		va_end(ap);
2380 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2381 		mutex_exit(&sd_log_mutex);
2382 	}
2383 #ifdef SD_FAULT_INJECTION
2384 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2385 	if (un->sd_injection_mask & component) {
2386 		mutex_enter(&sd_log_mutex);
2387 		va_start(ap, fmt);
2388 		(void) vsprintf(sd_log_buf, fmt, ap);
2389 		va_end(ap);
2390 		sd_injection_log(sd_log_buf, un);
2391 		mutex_exit(&sd_log_mutex);
2392 	}
2393 #endif
2394 }
2395 
2396 
2397 /*
2398  *    Function: sdprobe
2399  *
2400  * Description: This is the driver probe(9e) entry point function.
2401  *
2402  *   Arguments: devi - opaque device info handle
2403  *
2404  * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2405  *              DDI_PROBE_FAILURE: If the probe failed.
2406  *              DDI_PROBE_PARTIAL: If the instance is not present now,
2407  *				   but may be present in the future.
2408  */
2409 
2410 static int
2411 sdprobe(dev_info_t *devi)
2412 {
2413 	struct scsi_device	*devp;
2414 	int			rval;
2415 	int			instance;
2416 
2417 	/*
2418 	 * if it wasn't for pln, sdprobe could actually be nulldev
2419 	 * in the "__fibre" case.
2420 	 */
2421 	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2422 		return (DDI_PROBE_DONTCARE);
2423 	}
2424 
2425 	devp = ddi_get_driver_private(devi);
2426 
2427 	if (devp == NULL) {
2428 		/* Ooops... nexus driver is mis-configured... */
2429 		return (DDI_PROBE_FAILURE);
2430 	}
2431 
2432 	instance = ddi_get_instance(devi);
2433 
2434 	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2435 		return (DDI_PROBE_PARTIAL);
2436 	}
2437 
2438 	/*
2439 	 * Call the SCSA utility probe routine to see if we actually
2440 	 * have a target at this SCSI nexus.
2441 	 */
2442 	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2443 	case SCSIPROBE_EXISTS:
2444 		switch (devp->sd_inq->inq_dtype) {
2445 		case DTYPE_DIRECT:
2446 			rval = DDI_PROBE_SUCCESS;
2447 			break;
2448 		case DTYPE_RODIRECT:
2449 			/* CDs etc. Can be removable media */
2450 			rval = DDI_PROBE_SUCCESS;
2451 			break;
2452 		case DTYPE_OPTICAL:
2453 			/*
2454 			 * Rewritable optical driver HP115AA
2455 			 * Can also be removable media
2456 			 */
2457 
2458 			/*
2459 			 * Do not attempt to bind to  DTYPE_OPTICAL if
2460 			 * pre solaris 9 sparc sd behavior is required
2461 			 *
2462 			 * If first time through and sd_dtype_optical_bind
2463 			 * has not been set in /etc/system check properties
2464 			 */
2465 
2466 			if (sd_dtype_optical_bind  < 0) {
2467 			    sd_dtype_optical_bind = ddi_prop_get_int
2468 				(DDI_DEV_T_ANY,	devi,	0,
2469 				"optical-device-bind",	1);
2470 			}
2471 
2472 			if (sd_dtype_optical_bind == 0) {
2473 				rval = DDI_PROBE_FAILURE;
2474 			} else {
2475 				rval = DDI_PROBE_SUCCESS;
2476 			}
2477 			break;
2478 
2479 		case DTYPE_NOTPRESENT:
2480 		default:
2481 			rval = DDI_PROBE_FAILURE;
2482 			break;
2483 		}
2484 		break;
2485 	default:
2486 		rval = DDI_PROBE_PARTIAL;
2487 		break;
2488 	}
2489 
2490 	/*
2491 	 * This routine checks for resource allocation prior to freeing,
2492 	 * so it will take care of the "smart probing" case where a
2493 	 * scsi_probe() may or may not have been issued and will *not*
2494 	 * free previously-freed resources.
2495 	 */
2496 	scsi_unprobe(devp);
2497 	return (rval);
2498 }
2499 
2500 
2501 /*
2502  *    Function: sdinfo
2503  *
2504  * Description: This is the driver getinfo(9e) entry point function.
2505  * 		Given the device number, return the devinfo pointer from
2506  *		the scsi_device structure or the instance number
2507  *		associated with the dev_t.
2508  *
2509  *   Arguments: dip     - pointer to device info structure
2510  *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2511  *			  DDI_INFO_DEVT2INSTANCE)
2512  *		arg     - driver dev_t
2513  *		resultp - user buffer for request response
2514  *
2515  * Return Code: DDI_SUCCESS
2516  *              DDI_FAILURE
2517  */
2518 /* ARGSUSED */
2519 static int
2520 sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2521 {
2522 	struct sd_lun	*un;
2523 	dev_t		dev;
2524 	int		instance;
2525 	int		error;
2526 
2527 	switch (infocmd) {
2528 	case DDI_INFO_DEVT2DEVINFO:
2529 		dev = (dev_t)arg;
2530 		instance = SDUNIT(dev);
2531 		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2532 			return (DDI_FAILURE);
2533 		}
2534 		*result = (void *) SD_DEVINFO(un);
2535 		error = DDI_SUCCESS;
2536 		break;
2537 	case DDI_INFO_DEVT2INSTANCE:
2538 		dev = (dev_t)arg;
2539 		instance = SDUNIT(dev);
2540 		*result = (void *)(uintptr_t)instance;
2541 		error = DDI_SUCCESS;
2542 		break;
2543 	default:
2544 		error = DDI_FAILURE;
2545 	}
2546 	return (error);
2547 }
2548 
2549 /*
2550  *    Function: sd_prop_op
2551  *
2552  * Description: This is the driver prop_op(9e) entry point function.
2553  *		Return the number of blocks for the partition in question
2554  *		or forward the request to the property facilities.
2555  *
2556  *   Arguments: dev       - device number
2557  *		dip       - pointer to device info structure
2558  *		prop_op   - property operator
2559  *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2560  *		name      - pointer to property name
2561  *		valuep    - pointer or address of the user buffer
2562  *		lengthp   - property length
2563  *
2564  * Return Code: DDI_PROP_SUCCESS
2565  *              DDI_PROP_NOT_FOUND
2566  *              DDI_PROP_UNDEFINED
2567  *              DDI_PROP_NO_MEMORY
2568  *              DDI_PROP_BUF_TOO_SMALL
2569  */
2570 
2571 static int
2572 sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2573 	char *name, caddr_t valuep, int *lengthp)
2574 {
2575 	int		instance = ddi_get_instance(dip);
2576 	struct sd_lun	*un;
2577 	uint64_t	nblocks64;
2578 
2579 	/*
2580 	 * Our dynamic properties are all device specific and size oriented.
2581 	 * Requests issued under conditions where size is valid are passed
2582 	 * to ddi_prop_op_nblocks with the size information, otherwise the
2583 	 * request is passed to ddi_prop_op. Size depends on valid geometry.
2584 	 */
2585 	un = ddi_get_soft_state(sd_state, instance);
2586 	if ((dev == DDI_DEV_T_ANY) || (un == NULL) ||
2587 	    (un->un_f_geometry_is_valid == FALSE)) {
2588 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2589 		    name, valuep, lengthp));
2590 	} else {
2591 		/* get nblocks value */
2592 		ASSERT(!mutex_owned(SD_MUTEX(un)));
2593 		mutex_enter(SD_MUTEX(un));
2594 		nblocks64 = (ulong_t)un->un_map[SDPART(dev)].dkl_nblk;
2595 		mutex_exit(SD_MUTEX(un));
2596 
2597 		return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags,
2598 		    name, valuep, lengthp, nblocks64));
2599 	}
2600 }
2601 
2602 /*
2603  * The following functions are for smart probing:
2604  * sd_scsi_probe_cache_init()
2605  * sd_scsi_probe_cache_fini()
2606  * sd_scsi_clear_probe_cache()
2607  * sd_scsi_probe_with_cache()
2608  */
2609 
2610 /*
2611  *    Function: sd_scsi_probe_cache_init
2612  *
2613  * Description: Initializes the probe response cache mutex and head pointer.
2614  *
2615  *     Context: Kernel thread context
2616  */
2617 
2618 static void
2619 sd_scsi_probe_cache_init(void)
2620 {
2621 	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2622 	sd_scsi_probe_cache_head = NULL;
2623 }
2624 
2625 
2626 /*
2627  *    Function: sd_scsi_probe_cache_fini
2628  *
2629  * Description: Frees all resources associated with the probe response cache.
2630  *
2631  *     Context: Kernel thread context
2632  */
2633 
2634 static void
2635 sd_scsi_probe_cache_fini(void)
2636 {
2637 	struct sd_scsi_probe_cache *cp;
2638 	struct sd_scsi_probe_cache *ncp;
2639 
2640 	/* Clean up our smart probing linked list */
2641 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2642 		ncp = cp->next;
2643 		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2644 	}
2645 	sd_scsi_probe_cache_head = NULL;
2646 	mutex_destroy(&sd_scsi_probe_cache_mutex);
2647 }
2648 
2649 
2650 /*
2651  *    Function: sd_scsi_clear_probe_cache
2652  *
2653  * Description: This routine clears the probe response cache. This is
2654  *		done when open() returns ENXIO so that when deferred
2655  *		attach is attempted (possibly after a device has been
2656  *		turned on) we will retry the probe. Since we don't know
2657  *		which target we failed to open, we just clear the
2658  *		entire cache.
2659  *
2660  *     Context: Kernel thread context
2661  */
2662 
2663 static void
2664 sd_scsi_clear_probe_cache(void)
2665 {
2666 	struct sd_scsi_probe_cache	*cp;
2667 	int				i;
2668 
2669 	mutex_enter(&sd_scsi_probe_cache_mutex);
2670 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2671 		/*
2672 		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2673 		 * force probing to be performed the next time
2674 		 * sd_scsi_probe_with_cache is called.
2675 		 */
2676 		for (i = 0; i < NTARGETS_WIDE; i++) {
2677 			cp->cache[i] = SCSIPROBE_EXISTS;
2678 		}
2679 	}
2680 	mutex_exit(&sd_scsi_probe_cache_mutex);
2681 }
2682 
2683 
2684 /*
2685  *    Function: sd_scsi_probe_with_cache
2686  *
2687  * Description: This routine implements support for a scsi device probe
2688  *		with cache. The driver maintains a cache of the target
2689  *		responses to scsi probes. If we get no response from a
2690  *		target during a probe inquiry, we remember that, and we
2691  *		avoid additional calls to scsi_probe on non-zero LUNs
2692  *		on the same target until the cache is cleared. By doing
2693  *		so we avoid the 1/4 sec selection timeout for nonzero
2694  *		LUNs. lun0 of a target is always probed.
2695  *
2696  *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2697  *              waitfunc - indicates what the allocator routines should
2698  *			   do when resources are not available. This value
2699  *			   is passed on to scsi_probe() when that routine
2700  *			   is called.
2701  *
2702  * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2703  *		otherwise the value returned by scsi_probe(9F).
2704  *
2705  *     Context: Kernel thread context
2706  */
2707 
2708 static int
2709 sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2710 {
2711 	struct sd_scsi_probe_cache	*cp;
2712 	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2713 	int		lun, tgt;
2714 
2715 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2716 	    SCSI_ADDR_PROP_LUN, 0);
2717 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2718 	    SCSI_ADDR_PROP_TARGET, -1);
2719 
2720 	/* Make sure caching enabled and target in range */
2721 	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2722 		/* do it the old way (no cache) */
2723 		return (scsi_probe(devp, waitfn));
2724 	}
2725 
2726 	mutex_enter(&sd_scsi_probe_cache_mutex);
2727 
2728 	/* Find the cache for this scsi bus instance */
2729 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2730 		if (cp->pdip == pdip) {
2731 			break;
2732 		}
2733 	}
2734 
2735 	/* If we can't find a cache for this pdip, create one */
2736 	if (cp == NULL) {
2737 		int i;
2738 
2739 		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
2740 		    KM_SLEEP);
2741 		cp->pdip = pdip;
2742 		cp->next = sd_scsi_probe_cache_head;
2743 		sd_scsi_probe_cache_head = cp;
2744 		for (i = 0; i < NTARGETS_WIDE; i++) {
2745 			cp->cache[i] = SCSIPROBE_EXISTS;
2746 		}
2747 	}
2748 
2749 	mutex_exit(&sd_scsi_probe_cache_mutex);
2750 
2751 	/* Recompute the cache for this target if LUN zero */
2752 	if (lun == 0) {
2753 		cp->cache[tgt] = SCSIPROBE_EXISTS;
2754 	}
2755 
2756 	/* Don't probe if cache remembers a NORESP from a previous LUN. */
2757 	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
2758 		return (SCSIPROBE_NORESP);
2759 	}
2760 
2761 	/* Do the actual probe; save & return the result */
2762 	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
2763 }
2764 
2765 
2766 /*
2767  *    Function: sd_spin_up_unit
2768  *
2769  * Description: Issues the following commands to spin-up the device:
2770  *		START STOP UNIT, and INQUIRY.
2771  *
2772  *   Arguments: un - driver soft state (unit) structure
2773  *
2774  * Return Code: 0 - success
2775  *		EIO - failure
2776  *		EACCES - reservation conflict
2777  *
2778  *     Context: Kernel thread context
2779  */
2780 
2781 static int
2782 sd_spin_up_unit(struct sd_lun *un)
2783 {
2784 	size_t	resid		= 0;
2785 	int	has_conflict	= FALSE;
2786 	uchar_t *bufaddr;
2787 
2788 	ASSERT(un != NULL);
2789 
2790 	/*
2791 	 * Send a throwaway START UNIT command.
2792 	 *
2793 	 * If we fail on this, we don't care presently what precisely
2794 	 * is wrong.  EMC's arrays will also fail this with a check
2795 	 * condition (0x2/0x4/0x3) if the device is "inactive," but
2796 	 * we don't want to fail the attach because it may become
2797 	 * "active" later.
2798 	 */
2799 	if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START, SD_PATH_DIRECT)
2800 	    == EACCES)
2801 		has_conflict = TRUE;
2802 
2803 	/*
2804 	 * Send another INQUIRY command to the target. This is necessary for
2805 	 * non-removable media direct access devices because their INQUIRY data
2806 	 * may not be fully qualified until they are spun up (perhaps via the
2807 	 * START command above).  Note: This seems to be needed for some
2808 	 * legacy devices only.) The INQUIRY command should succeed even if a
2809 	 * Reservation Conflict is present.
2810 	 */
2811 	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
2812 	if (sd_send_scsi_INQUIRY(un, bufaddr, SUN_INQSIZE, 0, 0, &resid) != 0) {
2813 		kmem_free(bufaddr, SUN_INQSIZE);
2814 		return (EIO);
2815 	}
2816 
2817 	/*
2818 	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
2819 	 * Note that this routine does not return a failure here even if the
2820 	 * INQUIRY command did not return any data.  This is a legacy behavior.
2821 	 */
2822 	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
2823 		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
2824 	}
2825 
2826 	kmem_free(bufaddr, SUN_INQSIZE);
2827 
2828 	/* If we hit a reservation conflict above, tell the caller. */
2829 	if (has_conflict == TRUE) {
2830 		return (EACCES);
2831 	}
2832 
2833 	return (0);
2834 }
2835 
2836 #ifdef _LP64
2837 /*
2838  *    Function: sd_enable_descr_sense
2839  *
2840  * Description: This routine attempts to select descriptor sense format
2841  *		using the Control mode page.  Devices that support 64 bit
2842  *		LBAs (for >2TB luns) should also implement descriptor
2843  *		sense data so we will call this function whenever we see
2844  *		a lun larger than 2TB.  If for some reason the device
2845  *		supports 64 bit LBAs but doesn't support descriptor sense
2846  *		presumably the mode select will fail.  Everything will
2847  *		continue to work normally except that we will not get
2848  *		complete sense data for commands that fail with an LBA
2849  *		larger than 32 bits.
2850  *
2851  *   Arguments: un - driver soft state (unit) structure
2852  *
2853  *     Context: Kernel thread context only
2854  */
2855 
2856 static void
2857 sd_enable_descr_sense(struct sd_lun *un)
2858 {
2859 	uchar_t			*header;
2860 	struct mode_control_scsi3 *ctrl_bufp;
2861 	size_t			buflen;
2862 	size_t			bd_len;
2863 
2864 	/*
2865 	 * Read MODE SENSE page 0xA, Control Mode Page
2866 	 */
2867 	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
2868 	    sizeof (struct mode_control_scsi3);
2869 	header = kmem_zalloc(buflen, KM_SLEEP);
2870 	if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
2871 	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT) != 0) {
2872 		SD_ERROR(SD_LOG_COMMON, un,
2873 		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
2874 		goto eds_exit;
2875 	}
2876 
2877 	/*
2878 	 * Determine size of Block Descriptors in order to locate
2879 	 * the mode page data. ATAPI devices return 0, SCSI devices
2880 	 * should return MODE_BLK_DESC_LENGTH.
2881 	 */
2882 	bd_len  = ((struct mode_header *)header)->bdesc_length;
2883 
2884 	ctrl_bufp = (struct mode_control_scsi3 *)
2885 	    (header + MODE_HEADER_LENGTH + bd_len);
2886 
2887 	/*
2888 	 * Clear PS bit for MODE SELECT
2889 	 */
2890 	ctrl_bufp->mode_page.ps = 0;
2891 
2892 	/*
2893 	 * Set D_SENSE to enable descriptor sense format.
2894 	 */
2895 	ctrl_bufp->d_sense = 1;
2896 
2897 	/*
2898 	 * Use MODE SELECT to commit the change to the D_SENSE bit
2899 	 */
2900 	if (sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
2901 	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT) != 0) {
2902 		SD_INFO(SD_LOG_COMMON, un,
2903 		    "sd_enable_descr_sense: mode select ctrl page failed\n");
2904 		goto eds_exit;
2905 	}
2906 
2907 eds_exit:
2908 	kmem_free(header, buflen);
2909 }
2910 #endif /* _LP64 */
2911 
2912 
2913 /*
2914  *    Function: sd_set_mmc_caps
2915  *
2916  * Description: This routine determines if the device is MMC compliant and if
2917  *		the device supports CDDA via a mode sense of the CDVD
2918  *		capabilities mode page. Also checks if the device is a
2919  *		dvdram writable device.
2920  *
2921  *   Arguments: un - driver soft state (unit) structure
2922  *
2923  *     Context: Kernel thread context only
2924  */
2925 
2926 static void
2927 sd_set_mmc_caps(struct sd_lun *un)
2928 {
2929 	struct mode_header_grp2		*sense_mhp;
2930 	uchar_t				*sense_page;
2931 	caddr_t				buf;
2932 	int				bd_len;
2933 	int				status;
2934 	struct uscsi_cmd		com;
2935 	int				rtn;
2936 	uchar_t				*out_data_rw, *out_data_hd;
2937 	uchar_t				*rqbuf_rw, *rqbuf_hd;
2938 
2939 	ASSERT(un != NULL);
2940 
2941 	/*
2942 	 * The flags which will be set in this function are - mmc compliant,
2943 	 * dvdram writable device, cdda support. Initialize them to FALSE
2944 	 * and if a capability is detected - it will be set to TRUE.
2945 	 */
2946 	un->un_f_mmc_cap = FALSE;
2947 	un->un_f_dvdram_writable_device = FALSE;
2948 	un->un_f_cfg_cdda = FALSE;
2949 
2950 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
2951 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
2952 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
2953 
2954 	if (status != 0) {
2955 		/* command failed; just return */
2956 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
2957 		return;
2958 	}
2959 	/*
2960 	 * If the mode sense request for the CDROM CAPABILITIES
2961 	 * page (0x2A) succeeds the device is assumed to be MMC.
2962 	 */
2963 	un->un_f_mmc_cap = TRUE;
2964 
2965 	/* Get to the page data */
2966 	sense_mhp = (struct mode_header_grp2 *)buf;
2967 	bd_len = (sense_mhp->bdesc_length_hi << 8) |
2968 	    sense_mhp->bdesc_length_lo;
2969 	if (bd_len > MODE_BLK_DESC_LENGTH) {
2970 		/*
2971 		 * We did not get back the expected block descriptor
2972 		 * length so we cannot determine if the device supports
2973 		 * CDDA. However, we still indicate the device is MMC
2974 		 * according to the successful response to the page
2975 		 * 0x2A mode sense request.
2976 		 */
2977 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
2978 		    "sd_set_mmc_caps: Mode Sense returned "
2979 		    "invalid block descriptor length\n");
2980 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
2981 		return;
2982 	}
2983 
2984 	/* See if read CDDA is supported */
2985 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
2986 	    bd_len);
2987 	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
2988 
2989 	/* See if writing DVD RAM is supported. */
2990 	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
2991 	if (un->un_f_dvdram_writable_device == TRUE) {
2992 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
2993 		return;
2994 	}
2995 
2996 	/*
2997 	 * If the device presents DVD or CD capabilities in the mode
2998 	 * page, we can return here since a RRD will not have
2999 	 * these capabilities.
3000 	 */
3001 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3002 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3003 		return;
3004 	}
3005 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3006 
3007 	/*
3008 	 * If un->un_f_dvdram_writable_device is still FALSE,
3009 	 * check for a Removable Rigid Disk (RRD).  A RRD
3010 	 * device is identified by the features RANDOM_WRITABLE and
3011 	 * HARDWARE_DEFECT_MANAGEMENT.
3012 	 */
3013 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3014 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3015 
3016 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3017 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3018 	    RANDOM_WRITABLE);
3019 	if (rtn != 0) {
3020 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3021 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3022 		return;
3023 	}
3024 
3025 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3026 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3027 
3028 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3029 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3030 	    HARDWARE_DEFECT_MANAGEMENT);
3031 	if (rtn == 0) {
3032 		/*
3033 		 * We have good information, check for random writable
3034 		 * and hardware defect features.
3035 		 */
3036 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3037 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3038 			un->un_f_dvdram_writable_device = TRUE;
3039 		}
3040 	}
3041 
3042 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3043 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3044 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3045 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3046 }
3047 
3048 /*
3049  *    Function: sd_check_for_writable_cd
3050  *
3051  * Description: This routine determines if the media in the device is
3052  *		writable or not. It uses the get configuration command (0x46)
3053  *		to determine if the media is writable
3054  *
3055  *   Arguments: un - driver soft state (unit) structure
3056  *
3057  *     Context: Never called at interrupt context.
3058  */
3059 
3060 static void
3061 sd_check_for_writable_cd(struct sd_lun *un)
3062 {
3063 	struct uscsi_cmd		com;
3064 	uchar_t				*out_data;
3065 	uchar_t				*rqbuf;
3066 	int				rtn;
3067 	uchar_t				*out_data_rw, *out_data_hd;
3068 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3069 	struct mode_header_grp2		*sense_mhp;
3070 	uchar_t				*sense_page;
3071 	caddr_t				buf;
3072 	int				bd_len;
3073 	int				status;
3074 
3075 	ASSERT(un != NULL);
3076 	ASSERT(mutex_owned(SD_MUTEX(un)));
3077 
3078 	/*
3079 	 * Initialize the writable media to false, if configuration info.
3080 	 * tells us otherwise then only we will set it.
3081 	 */
3082 	un->un_f_mmc_writable_media = FALSE;
3083 	mutex_exit(SD_MUTEX(un));
3084 
3085 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3086 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3087 
3088 	rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf, SENSE_LENGTH,
3089 	    out_data, SD_PROFILE_HEADER_LEN);
3090 
3091 	mutex_enter(SD_MUTEX(un));
3092 	if (rtn == 0) {
3093 		/*
3094 		 * We have good information, check for writable DVD.
3095 		 */
3096 		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3097 			un->un_f_mmc_writable_media = TRUE;
3098 			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3099 			kmem_free(rqbuf, SENSE_LENGTH);
3100 			return;
3101 		}
3102 	}
3103 
3104 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3105 	kmem_free(rqbuf, SENSE_LENGTH);
3106 
3107 	/*
3108 	 * Determine if this is a RRD type device.
3109 	 */
3110 	mutex_exit(SD_MUTEX(un));
3111 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3112 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3113 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3114 	mutex_enter(SD_MUTEX(un));
3115 	if (status != 0) {
3116 		/* command failed; just return */
3117 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3118 		return;
3119 	}
3120 
3121 	/* Get to the page data */
3122 	sense_mhp = (struct mode_header_grp2 *)buf;
3123 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3124 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3125 		/*
3126 		 * We did not get back the expected block descriptor length so
3127 		 * we cannot check the mode page.
3128 		 */
3129 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3130 		    "sd_check_for_writable_cd: Mode Sense returned "
3131 		    "invalid block descriptor length\n");
3132 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3133 		return;
3134 	}
3135 
3136 	/*
3137 	 * If the device presents DVD or CD capabilities in the mode
3138 	 * page, we can return here since a RRD device will not have
3139 	 * these capabilities.
3140 	 */
3141 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3142 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3143 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3144 		return;
3145 	}
3146 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3147 
3148 	/*
3149 	 * If un->un_f_mmc_writable_media is still FALSE,
3150 	 * check for RRD type media.  A RRD device is identified
3151 	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3152 	 */
3153 	mutex_exit(SD_MUTEX(un));
3154 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3155 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3156 
3157 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3158 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3159 	    RANDOM_WRITABLE);
3160 	if (rtn != 0) {
3161 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3162 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3163 		mutex_enter(SD_MUTEX(un));
3164 		return;
3165 	}
3166 
3167 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3168 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3169 
3170 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3171 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3172 	    HARDWARE_DEFECT_MANAGEMENT);
3173 	mutex_enter(SD_MUTEX(un));
3174 	if (rtn == 0) {
3175 		/*
3176 		 * We have good information, check for random writable
3177 		 * and hardware defect features as current.
3178 		 */
3179 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3180 		    (out_data_rw[10] & 0x1) &&
3181 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3182 		    (out_data_hd[10] & 0x1)) {
3183 			un->un_f_mmc_writable_media = TRUE;
3184 		}
3185 	}
3186 
3187 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3188 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3189 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3190 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3191 }
3192 
3193 /*
3194  *    Function: sd_read_unit_properties
3195  *
3196  * Description: The following implements a property lookup mechanism.
3197  *		Properties for particular disks (keyed on vendor, model
3198  *		and rev numbers) are sought in the sd.conf file via
3199  *		sd_process_sdconf_file(), and if not found there, are
3200  *		looked for in a list hardcoded in this driver via
3201  *		sd_process_sdconf_table() Once located the properties
3202  *		are used to update the driver unit structure.
3203  *
3204  *   Arguments: un - driver soft state (unit) structure
3205  */
3206 
3207 static void
3208 sd_read_unit_properties(struct sd_lun *un)
3209 {
3210 	/*
3211 	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3212 	 * the "sd-config-list" property (from the sd.conf file) or if
3213 	 * there was not a match for the inquiry vid/pid. If this event
3214 	 * occurs the static driver configuration table is searched for
3215 	 * a match.
3216 	 */
3217 	ASSERT(un != NULL);
3218 	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3219 		sd_process_sdconf_table(un);
3220 	}
3221 
3222 	/* check for LSI device */
3223 	sd_is_lsi(un);
3224 
3225 	/*
3226 	 * Set this in sd.conf to 0 in order to disable kstats.  The default
3227 	 * is 1, so they are enabled by default.
3228 	 */
3229 	un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
3230 	    SD_DEVINFO(un), DDI_PROP_DONTPASS, "enable-partition-kstats", 1));
3231 }
3232 
3233 
3234 /*
3235  *    Function: sd_process_sdconf_file
3236  *
3237  * Description: Use ddi_getlongprop to obtain the properties from the
3238  *		driver's config file (ie, sd.conf) and update the driver
3239  *		soft state structure accordingly.
3240  *
3241  *   Arguments: un - driver soft state (unit) structure
3242  *
3243  * Return Code: SD_SUCCESS - The properties were successfully set according
3244  *			     to the driver configuration file.
3245  *		SD_FAILURE - The driver config list was not obtained or
3246  *			     there was no vid/pid match. This indicates that
3247  *			     the static config table should be used.
3248  *
3249  * The config file has a property, "sd-config-list", which consists of
3250  * one or more duplets as follows:
3251  *
3252  *  sd-config-list=
3253  *	<duplet>,
3254  *	[<duplet>,]
3255  *	[<duplet>];
3256  *
3257  * The structure of each duplet is as follows:
3258  *
3259  *  <duplet>:= <vid+pid>,<data-property-name_list>
3260  *
3261  * The first entry of the duplet is the device ID string (the concatenated
3262  * vid & pid; not to be confused with a device_id).  This is defined in
3263  * the same way as in the sd_disk_table.
3264  *
3265  * The second part of the duplet is a string that identifies a
3266  * data-property-name-list. The data-property-name-list is defined as
3267  * follows:
3268  *
3269  *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3270  *
3271  * The syntax of <data-property-name> depends on the <version> field.
3272  *
3273  * If version = SD_CONF_VERSION_1 we have the following syntax:
3274  *
3275  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3276  *
3277  * where the prop0 value will be used to set prop0 if bit0 set in the
3278  * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3279  *
3280  */
3281 
3282 static int
3283 sd_process_sdconf_file(struct sd_lun *un)
3284 {
3285 	char	*config_list = NULL;
3286 	int	config_list_len;
3287 	int	len;
3288 	int	dupletlen = 0;
3289 	char	*vidptr;
3290 	int	vidlen;
3291 	char	*dnlist_ptr;
3292 	char	*dataname_ptr;
3293 	int	dnlist_len;
3294 	int	dataname_len;
3295 	int	*data_list;
3296 	int	data_list_len;
3297 	int	rval = SD_FAILURE;
3298 	int	i;
3299 
3300 	ASSERT(un != NULL);
3301 
3302 	/* Obtain the configuration list associated with the .conf file */
3303 	if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), DDI_PROP_DONTPASS,
3304 	    sd_config_list, (caddr_t)&config_list, &config_list_len)
3305 	    != DDI_PROP_SUCCESS) {
3306 		return (SD_FAILURE);
3307 	}
3308 
3309 	/*
3310 	 * Compare vids in each duplet to the inquiry vid - if a match is
3311 	 * made, get the data value and update the soft state structure
3312 	 * accordingly.
3313 	 *
3314 	 * Note: This algorithm is complex and difficult to maintain. It should
3315 	 * be replaced with a more robust implementation.
3316 	 */
3317 	for (len = config_list_len, vidptr = config_list; len > 0;
3318 	    vidptr += dupletlen, len -= dupletlen) {
3319 		/*
3320 		 * Note: The assumption here is that each vid entry is on
3321 		 * a unique line from its associated duplet.
3322 		 */
3323 		vidlen = dupletlen = (int)strlen(vidptr);
3324 		if ((vidlen == 0) ||
3325 		    (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS)) {
3326 			dupletlen++;
3327 			continue;
3328 		}
3329 
3330 		/*
3331 		 * dnlist contains 1 or more blank separated
3332 		 * data-property-name entries
3333 		 */
3334 		dnlist_ptr = vidptr + vidlen + 1;
3335 		dnlist_len = (int)strlen(dnlist_ptr);
3336 		dupletlen += dnlist_len + 2;
3337 
3338 		/*
3339 		 * Set a pointer for the first data-property-name
3340 		 * entry in the list
3341 		 */
3342 		dataname_ptr = dnlist_ptr;
3343 		dataname_len = 0;
3344 
3345 		/*
3346 		 * Loop through all data-property-name entries in the
3347 		 * data-property-name-list setting the properties for each.
3348 		 */
3349 		while (dataname_len < dnlist_len) {
3350 			int version;
3351 
3352 			/*
3353 			 * Determine the length of the current
3354 			 * data-property-name entry by indexing until a
3355 			 * blank or NULL is encountered. When the space is
3356 			 * encountered reset it to a NULL for compliance
3357 			 * with ddi_getlongprop().
3358 			 */
3359 			for (i = 0; ((dataname_ptr[i] != ' ') &&
3360 			    (dataname_ptr[i] != '\0')); i++) {
3361 				;
3362 			}
3363 
3364 			dataname_len += i;
3365 			/* If not null terminated, Make it so */
3366 			if (dataname_ptr[i] == ' ') {
3367 				dataname_ptr[i] = '\0';
3368 			}
3369 			dataname_len++;
3370 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3371 			    "sd_process_sdconf_file: disk:%s, data:%s\n",
3372 			    vidptr, dataname_ptr);
3373 
3374 			/* Get the data list */
3375 			if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), 0,
3376 			    dataname_ptr, (caddr_t)&data_list, &data_list_len)
3377 			    != DDI_PROP_SUCCESS) {
3378 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3379 				    "sd_process_sdconf_file: data property (%s)"
3380 				    " has no value\n", dataname_ptr);
3381 				dataname_ptr = dnlist_ptr + dataname_len;
3382 				continue;
3383 			}
3384 
3385 			version = data_list[0];
3386 
3387 			if (version == SD_CONF_VERSION_1) {
3388 				sd_tunables values;
3389 
3390 				/* Set the properties */
3391 				if (sd_chk_vers1_data(un, data_list[1],
3392 				    &data_list[2], data_list_len, dataname_ptr)
3393 				    == SD_SUCCESS) {
3394 					sd_get_tunables_from_conf(un,
3395 					    data_list[1], &data_list[2],
3396 					    &values);
3397 					sd_set_vers1_properties(un,
3398 					    data_list[1], &values);
3399 					rval = SD_SUCCESS;
3400 				} else {
3401 					rval = SD_FAILURE;
3402 				}
3403 			} else {
3404 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3405 				    "data property %s version 0x%x is invalid.",
3406 				    dataname_ptr, version);
3407 				rval = SD_FAILURE;
3408 			}
3409 			kmem_free(data_list, data_list_len);
3410 			dataname_ptr = dnlist_ptr + dataname_len;
3411 		}
3412 	}
3413 
3414 	/* free up the memory allocated by ddi_getlongprop */
3415 	if (config_list) {
3416 		kmem_free(config_list, config_list_len);
3417 	}
3418 
3419 	return (rval);
3420 }
3421 
3422 /*
3423  *    Function: sd_get_tunables_from_conf()
3424  *
3425  *
3426  *    This function reads the data list from the sd.conf file and pulls
3427  *    the values that can have numeric values as arguments and places
3428  *    the values in the apropriate sd_tunables member.
3429  *    Since the order of the data list members varies across platforms
3430  *    This function reads them from the data list in a platform specific
3431  *    order and places them into the correct sd_tunable member that is
3432  *    a consistant across all platforms.
3433  */
3434 static void
3435 sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
3436     sd_tunables *values)
3437 {
3438 	int i;
3439 	int mask;
3440 
3441 	bzero(values, sizeof (sd_tunables));
3442 
3443 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3444 
3445 		mask = 1 << i;
3446 		if (mask > flags) {
3447 			break;
3448 		}
3449 
3450 		switch (mask & flags) {
3451 		case 0:	/* This mask bit not set in flags */
3452 			continue;
3453 		case SD_CONF_BSET_THROTTLE:
3454 			values->sdt_throttle = data_list[i];
3455 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3456 			    "sd_get_tunables_from_conf: throttle = %d\n",
3457 			    values->sdt_throttle);
3458 			break;
3459 		case SD_CONF_BSET_CTYPE:
3460 			values->sdt_ctype = data_list[i];
3461 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3462 			    "sd_get_tunables_from_conf: ctype = %d\n",
3463 			    values->sdt_ctype);
3464 			break;
3465 		case SD_CONF_BSET_NRR_COUNT:
3466 			values->sdt_not_rdy_retries = data_list[i];
3467 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3468 			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
3469 			    values->sdt_not_rdy_retries);
3470 			break;
3471 		case SD_CONF_BSET_BSY_RETRY_COUNT:
3472 			values->sdt_busy_retries = data_list[i];
3473 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3474 			    "sd_get_tunables_from_conf: busy_retries = %d\n",
3475 			    values->sdt_busy_retries);
3476 			break;
3477 		case SD_CONF_BSET_RST_RETRIES:
3478 			values->sdt_reset_retries = data_list[i];
3479 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3480 			    "sd_get_tunables_from_conf: reset_retries = %d\n",
3481 			    values->sdt_reset_retries);
3482 			break;
3483 		case SD_CONF_BSET_RSV_REL_TIME:
3484 			values->sdt_reserv_rel_time = data_list[i];
3485 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3486 			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
3487 			    values->sdt_reserv_rel_time);
3488 			break;
3489 		case SD_CONF_BSET_MIN_THROTTLE:
3490 			values->sdt_min_throttle = data_list[i];
3491 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3492 			    "sd_get_tunables_from_conf: min_throttle = %d\n",
3493 			    values->sdt_min_throttle);
3494 			break;
3495 		case SD_CONF_BSET_DISKSORT_DISABLED:
3496 			values->sdt_disk_sort_dis = data_list[i];
3497 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3498 			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
3499 			    values->sdt_disk_sort_dis);
3500 			break;
3501 		case SD_CONF_BSET_LUN_RESET_ENABLED:
3502 			values->sdt_lun_reset_enable = data_list[i];
3503 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3504 			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
3505 			    "\n", values->sdt_lun_reset_enable);
3506 			break;
3507 		}
3508 	}
3509 }
3510 
3511 /*
3512  *    Function: sd_process_sdconf_table
3513  *
3514  * Description: Search the static configuration table for a match on the
3515  *		inquiry vid/pid and update the driver soft state structure
3516  *		according to the table property values for the device.
3517  *
3518  *		The form of a configuration table entry is:
3519  *		  <vid+pid>,<flags>,<property-data>
3520  *		  "SEAGATE ST42400N",1,63,0,0			(Fibre)
3521  *		  "SEAGATE ST42400N",1,63,0,0,0,0		(Sparc)
3522  *		  "SEAGATE ST42400N",1,63,0,0,0,0,0,0,0,0,0,0	(Intel)
3523  *
3524  *   Arguments: un - driver soft state (unit) structure
3525  */
3526 
3527 static void
3528 sd_process_sdconf_table(struct sd_lun *un)
3529 {
3530 	char	*id = NULL;
3531 	int	table_index;
3532 	int	idlen;
3533 
3534 	ASSERT(un != NULL);
3535 	for (table_index = 0; table_index < sd_disk_table_size;
3536 	    table_index++) {
3537 		id = sd_disk_table[table_index].device_id;
3538 		idlen = strlen(id);
3539 		if (idlen == 0) {
3540 			continue;
3541 		}
3542 
3543 		/*
3544 		 * The static configuration table currently does not
3545 		 * implement version 10 properties. Additionally,
3546 		 * multiple data-property-name entries are not
3547 		 * implemented in the static configuration table.
3548 		 */
3549 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
3550 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3551 			    "sd_process_sdconf_table: disk %s\n", id);
3552 			sd_set_vers1_properties(un,
3553 			    sd_disk_table[table_index].flags,
3554 			    sd_disk_table[table_index].properties);
3555 			break;
3556 		}
3557 	}
3558 }
3559 
3560 
3561 /*
3562  *    Function: sd_sdconf_id_match
3563  *
3564  * Description: This local function implements a case sensitive vid/pid
3565  *		comparison as well as the boundary cases of wild card and
3566  *		multiple blanks.
3567  *
3568  *		Note: An implicit assumption made here is that the scsi
3569  *		inquiry structure will always keep the vid, pid and
3570  *		revision strings in consecutive sequence, so they can be
3571  *		read as a single string. If this assumption is not the
3572  *		case, a separate string, to be used for the check, needs
3573  *		to be built with these strings concatenated.
3574  *
3575  *   Arguments: un - driver soft state (unit) structure
3576  *		id - table or config file vid/pid
3577  *		idlen  - length of the vid/pid (bytes)
3578  *
3579  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3580  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3581  */
3582 
3583 static int
3584 sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
3585 {
3586 	struct scsi_inquiry	*sd_inq;
3587 	int 			rval = SD_SUCCESS;
3588 
3589 	ASSERT(un != NULL);
3590 	sd_inq = un->un_sd->sd_inq;
3591 	ASSERT(id != NULL);
3592 
3593 	/*
3594 	 * We use the inq_vid as a pointer to a buffer containing the
3595 	 * vid and pid and use the entire vid/pid length of the table
3596 	 * entry for the comparison. This works because the inq_pid
3597 	 * data member follows inq_vid in the scsi_inquiry structure.
3598 	 */
3599 	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
3600 		/*
3601 		 * The user id string is compared to the inquiry vid/pid
3602 		 * using a case insensitive comparison and ignoring
3603 		 * multiple spaces.
3604 		 */
3605 		rval = sd_blank_cmp(un, id, idlen);
3606 		if (rval != SD_SUCCESS) {
3607 			/*
3608 			 * User id strings that start and end with a "*"
3609 			 * are a special case. These do not have a
3610 			 * specific vendor, and the product string can
3611 			 * appear anywhere in the 16 byte PID portion of
3612 			 * the inquiry data. This is a simple strstr()
3613 			 * type search for the user id in the inquiry data.
3614 			 */
3615 			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
3616 				char	*pidptr = &id[1];
3617 				int	i;
3618 				int	j;
3619 				int	pidstrlen = idlen - 2;
3620 				j = sizeof (SD_INQUIRY(un)->inq_pid) -
3621 				    pidstrlen;
3622 
3623 				if (j < 0) {
3624 					return (SD_FAILURE);
3625 				}
3626 				for (i = 0; i < j; i++) {
3627 					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
3628 					    pidptr, pidstrlen) == 0) {
3629 						rval = SD_SUCCESS;
3630 						break;
3631 					}
3632 				}
3633 			}
3634 		}
3635 	}
3636 	return (rval);
3637 }
3638 
3639 
3640 /*
3641  *    Function: sd_blank_cmp
3642  *
3643  * Description: If the id string starts and ends with a space, treat
3644  *		multiple consecutive spaces as equivalent to a single
3645  *		space. For example, this causes a sd_disk_table entry
3646  *		of " NEC CDROM " to match a device's id string of
3647  *		"NEC       CDROM".
3648  *
3649  *		Note: The success exit condition for this routine is if
3650  *		the pointer to the table entry is '\0' and the cnt of
3651  *		the inquiry length is zero. This will happen if the inquiry
3652  *		string returned by the device is padded with spaces to be
3653  *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
3654  *		SCSI spec states that the inquiry string is to be padded with
3655  *		spaces.
3656  *
3657  *   Arguments: un - driver soft state (unit) structure
3658  *		id - table or config file vid/pid
3659  *		idlen  - length of the vid/pid (bytes)
3660  *
3661  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3662  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3663  */
3664 
3665 static int
3666 sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
3667 {
3668 	char		*p1;
3669 	char		*p2;
3670 	int		cnt;
3671 	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
3672 	    sizeof (SD_INQUIRY(un)->inq_pid);
3673 
3674 	ASSERT(un != NULL);
3675 	p2 = un->un_sd->sd_inq->inq_vid;
3676 	ASSERT(id != NULL);
3677 	p1 = id;
3678 
3679 	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
3680 		/*
3681 		 * Note: string p1 is terminated by a NUL but string p2
3682 		 * isn't.  The end of p2 is determined by cnt.
3683 		 */
3684 		for (;;) {
3685 			/* skip over any extra blanks in both strings */
3686 			while ((*p1 != '\0') && (*p1 == ' ')) {
3687 				p1++;
3688 			}
3689 			while ((cnt != 0) && (*p2 == ' ')) {
3690 				p2++;
3691 				cnt--;
3692 			}
3693 
3694 			/* compare the two strings */
3695 			if ((cnt == 0) ||
3696 			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
3697 				break;
3698 			}
3699 			while ((cnt > 0) &&
3700 			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
3701 				p1++;
3702 				p2++;
3703 				cnt--;
3704 			}
3705 		}
3706 	}
3707 
3708 	/* return SD_SUCCESS if both strings match */
3709 	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
3710 }
3711 
3712 
3713 /*
3714  *    Function: sd_chk_vers1_data
3715  *
3716  * Description: Verify the version 1 device properties provided by the
3717  *		user via the configuration file
3718  *
3719  *   Arguments: un	     - driver soft state (unit) structure
3720  *		flags	     - integer mask indicating properties to be set
3721  *		prop_list    - integer list of property values
3722  *		list_len     - length of user provided data
3723  *
3724  * Return Code: SD_SUCCESS - Indicates the user provided data is valid
3725  *		SD_FAILURE - Indicates the user provided data is invalid
3726  */
3727 
3728 static int
3729 sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
3730     int list_len, char *dataname_ptr)
3731 {
3732 	int i;
3733 	int mask = 1;
3734 	int index = 0;
3735 
3736 	ASSERT(un != NULL);
3737 
3738 	/* Check for a NULL property name and list */
3739 	if (dataname_ptr == NULL) {
3740 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3741 		    "sd_chk_vers1_data: NULL data property name.");
3742 		return (SD_FAILURE);
3743 	}
3744 	if (prop_list == NULL) {
3745 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3746 		    "sd_chk_vers1_data: %s NULL data property list.",
3747 		    dataname_ptr);
3748 		return (SD_FAILURE);
3749 	}
3750 
3751 	/* Display a warning if undefined bits are set in the flags */
3752 	if (flags & ~SD_CONF_BIT_MASK) {
3753 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3754 		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
3755 		    "Properties not set.",
3756 		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
3757 		return (SD_FAILURE);
3758 	}
3759 
3760 	/*
3761 	 * Verify the length of the list by identifying the highest bit set
3762 	 * in the flags and validating that the property list has a length
3763 	 * up to the index of this bit.
3764 	 */
3765 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3766 		if (flags & mask) {
3767 			index++;
3768 		}
3769 		mask = 1 << i;
3770 	}
3771 	if ((list_len / sizeof (int)) < (index + 2)) {
3772 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3773 		    "sd_chk_vers1_data: "
3774 		    "Data property list %s size is incorrect. "
3775 		    "Properties not set.", dataname_ptr);
3776 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
3777 		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
3778 		return (SD_FAILURE);
3779 	}
3780 	return (SD_SUCCESS);
3781 }
3782 
3783 
3784 /*
3785  *    Function: sd_set_vers1_properties
3786  *
3787  * Description: Set version 1 device properties based on a property list
3788  *		retrieved from the driver configuration file or static
3789  *		configuration table. Version 1 properties have the format:
3790  *
3791  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3792  *
3793  *		where the prop0 value will be used to set prop0 if bit0
3794  *		is set in the flags
3795  *
3796  *   Arguments: un	     - driver soft state (unit) structure
3797  *		flags	     - integer mask indicating properties to be set
3798  *		prop_list    - integer list of property values
3799  */
3800 
3801 static void
3802 sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
3803 {
3804 	ASSERT(un != NULL);
3805 
3806 	/*
3807 	 * Set the flag to indicate cache is to be disabled. An attempt
3808 	 * to disable the cache via sd_disable_caching() will be made
3809 	 * later during attach once the basic initialization is complete.
3810 	 */
3811 	if (flags & SD_CONF_BSET_NOCACHE) {
3812 		un->un_f_opt_disable_cache = TRUE;
3813 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3814 		    "sd_set_vers1_properties: caching disabled flag set\n");
3815 	}
3816 
3817 	/* CD-specific configuration parameters */
3818 	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
3819 		un->un_f_cfg_playmsf_bcd = TRUE;
3820 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3821 		    "sd_set_vers1_properties: playmsf_bcd set\n");
3822 	}
3823 	if (flags & SD_CONF_BSET_READSUB_BCD) {
3824 		un->un_f_cfg_readsub_bcd = TRUE;
3825 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3826 		    "sd_set_vers1_properties: readsub_bcd set\n");
3827 	}
3828 	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
3829 		un->un_f_cfg_read_toc_trk_bcd = TRUE;
3830 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3831 		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
3832 	}
3833 	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
3834 		un->un_f_cfg_read_toc_addr_bcd = TRUE;
3835 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3836 		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
3837 	}
3838 	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
3839 		un->un_f_cfg_no_read_header = TRUE;
3840 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3841 			    "sd_set_vers1_properties: no_read_header set\n");
3842 	}
3843 	if (flags & SD_CONF_BSET_READ_CD_XD4) {
3844 		un->un_f_cfg_read_cd_xd4 = TRUE;
3845 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3846 		    "sd_set_vers1_properties: read_cd_xd4 set\n");
3847 	}
3848 
3849 	/* Support for devices which do not have valid/unique serial numbers */
3850 	if (flags & SD_CONF_BSET_FAB_DEVID) {
3851 		un->un_f_opt_fab_devid = TRUE;
3852 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3853 		    "sd_set_vers1_properties: fab_devid bit set\n");
3854 	}
3855 
3856 	/* Support for user throttle configuration */
3857 	if (flags & SD_CONF_BSET_THROTTLE) {
3858 		ASSERT(prop_list != NULL);
3859 		un->un_saved_throttle = un->un_throttle =
3860 		    prop_list->sdt_throttle;
3861 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3862 		    "sd_set_vers1_properties: throttle set to %d\n",
3863 		    prop_list->sdt_throttle);
3864 	}
3865 
3866 	/* Set the per disk retry count according to the conf file or table. */
3867 	if (flags & SD_CONF_BSET_NRR_COUNT) {
3868 		ASSERT(prop_list != NULL);
3869 		if (prop_list->sdt_not_rdy_retries) {
3870 			un->un_notready_retry_count =
3871 				prop_list->sdt_not_rdy_retries;
3872 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3873 			    "sd_set_vers1_properties: not ready retry count"
3874 			    " set to %d\n", un->un_notready_retry_count);
3875 		}
3876 	}
3877 
3878 	/* The controller type is reported for generic disk driver ioctls */
3879 	if (flags & SD_CONF_BSET_CTYPE) {
3880 		ASSERT(prop_list != NULL);
3881 		switch (prop_list->sdt_ctype) {
3882 		case CTYPE_CDROM:
3883 			un->un_ctype = prop_list->sdt_ctype;
3884 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3885 			    "sd_set_vers1_properties: ctype set to "
3886 			    "CTYPE_CDROM\n");
3887 			break;
3888 		case CTYPE_CCS:
3889 			un->un_ctype = prop_list->sdt_ctype;
3890 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3891 				"sd_set_vers1_properties: ctype set to "
3892 				"CTYPE_CCS\n");
3893 			break;
3894 		case CTYPE_ROD:		/* RW optical */
3895 			un->un_ctype = prop_list->sdt_ctype;
3896 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3897 			    "sd_set_vers1_properties: ctype set to "
3898 			    "CTYPE_ROD\n");
3899 			break;
3900 		default:
3901 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3902 			    "sd_set_vers1_properties: Could not set "
3903 			    "invalid ctype value (%d)",
3904 			    prop_list->sdt_ctype);
3905 		}
3906 	}
3907 
3908 	/* Purple failover timeout */
3909 	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
3910 		ASSERT(prop_list != NULL);
3911 		un->un_busy_retry_count =
3912 			prop_list->sdt_busy_retries;
3913 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3914 		    "sd_set_vers1_properties: "
3915 		    "busy retry count set to %d\n",
3916 		    un->un_busy_retry_count);
3917 	}
3918 
3919 	/* Purple reset retry count */
3920 	if (flags & SD_CONF_BSET_RST_RETRIES) {
3921 		ASSERT(prop_list != NULL);
3922 		un->un_reset_retry_count =
3923 			prop_list->sdt_reset_retries;
3924 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3925 		    "sd_set_vers1_properties: "
3926 		    "reset retry count set to %d\n",
3927 		    un->un_reset_retry_count);
3928 	}
3929 
3930 	/* Purple reservation release timeout */
3931 	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
3932 		ASSERT(prop_list != NULL);
3933 		un->un_reserve_release_time =
3934 			prop_list->sdt_reserv_rel_time;
3935 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3936 		    "sd_set_vers1_properties: "
3937 		    "reservation release timeout set to %d\n",
3938 		    un->un_reserve_release_time);
3939 	}
3940 
3941 	/*
3942 	 * Driver flag telling the driver to verify that no commands are pending
3943 	 * for a device before issuing a Test Unit Ready. This is a workaround
3944 	 * for a firmware bug in some Seagate eliteI drives.
3945 	 */
3946 	if (flags & SD_CONF_BSET_TUR_CHECK) {
3947 		un->un_f_cfg_tur_check = TRUE;
3948 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3949 		    "sd_set_vers1_properties: tur queue check set\n");
3950 	}
3951 
3952 	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
3953 		un->un_min_throttle = prop_list->sdt_min_throttle;
3954 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3955 		    "sd_set_vers1_properties: min throttle set to %d\n",
3956 		    un->un_min_throttle);
3957 	}
3958 
3959 	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
3960 		un->un_f_disksort_disabled =
3961 		    (prop_list->sdt_disk_sort_dis != 0) ?
3962 		    TRUE : FALSE;
3963 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3964 		    "sd_set_vers1_properties: disksort disabled "
3965 		    "flag set to %d\n",
3966 		    prop_list->sdt_disk_sort_dis);
3967 	}
3968 
3969 	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
3970 		un->un_f_lun_reset_enabled =
3971 		    (prop_list->sdt_lun_reset_enable != 0) ?
3972 		    TRUE : FALSE;
3973 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3974 		    "sd_set_vers1_properties: lun reset enabled "
3975 		    "flag set to %d\n",
3976 		    prop_list->sdt_lun_reset_enable);
3977 	}
3978 
3979 	/*
3980 	 * Validate the throttle values.
3981 	 * If any of the numbers are invalid, set everything to defaults.
3982 	 */
3983 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
3984 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
3985 	    (un->un_min_throttle > un->un_throttle)) {
3986 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
3987 		un->un_min_throttle = sd_min_throttle;
3988 	}
3989 }
3990 
3991 /*
3992  *   Function: sd_is_lsi()
3993  *
3994  *   Description: Check for lsi devices, step throught the static device
3995  *	table to match vid/pid.
3996  *
3997  *   Args: un - ptr to sd_lun
3998  *
3999  *   Notes:  When creating new LSI property, need to add the new LSI property
4000  *		to this function.
4001  */
4002 static void
4003 sd_is_lsi(struct sd_lun *un)
4004 {
4005 	char	*id = NULL;
4006 	int	table_index;
4007 	int	idlen;
4008 	void	*prop;
4009 
4010 	ASSERT(un != NULL);
4011 	for (table_index = 0; table_index < sd_disk_table_size;
4012 	    table_index++) {
4013 		id = sd_disk_table[table_index].device_id;
4014 		idlen = strlen(id);
4015 		if (idlen == 0) {
4016 			continue;
4017 		}
4018 
4019 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4020 			prop = sd_disk_table[table_index].properties;
4021 			if (prop == &lsi_properties ||
4022 			    prop == &lsi_oem_properties ||
4023 			    prop == &lsi_properties_scsi ||
4024 			    prop == &symbios_properties) {
4025 				un->un_f_cfg_is_lsi = TRUE;
4026 			}
4027 			break;
4028 		}
4029 	}
4030 }
4031 
4032 
4033 /*
4034  * The following routines support reading and interpretation of disk labels,
4035  * including Solaris BE (8-slice) vtoc's, Solaris LE (16-slice) vtoc's, and
4036  * fdisk tables.
4037  */
4038 
4039 /*
4040  *    Function: sd_validate_geometry
4041  *
4042  * Description: Read the label from the disk (if present). Update the unit's
4043  *		geometry and vtoc information from the data in the label.
4044  *		Verify that the label is valid.
4045  *
4046  *   Arguments: un - driver soft state (unit) structure
4047  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4048  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4049  *			to use the USCSI "direct" chain and bypass the normal
4050  *			command waitq.
4051  *
4052  * Return Code: 0 - Successful completion
4053  *		EINVAL  - Invalid value in un->un_tgt_blocksize or
4054  *			  un->un_blockcount; or label on disk is corrupted
4055  *			  or unreadable.
4056  *		EACCES  - Reservation conflict at the device.
4057  *		ENOMEM  - Resource allocation error
4058  *		ENOTSUP - geometry not applicable
4059  *
4060  *     Context: Kernel thread only (can sleep).
4061  */
4062 
4063 static int
4064 sd_validate_geometry(struct sd_lun *un, int path_flag)
4065 {
4066 	static	char		labelstring[128];
4067 	static	char		buf[256];
4068 	char	*label		= NULL;
4069 	int	label_error	= 0;
4070 	int	gvalid		= un->un_f_geometry_is_valid;
4071 	int	lbasize;
4072 	uint_t	capacity;
4073 	int	count;
4074 
4075 	ASSERT(un != NULL);
4076 	ASSERT(mutex_owned(SD_MUTEX(un)));
4077 
4078 	/*
4079 	 * If the required values are not valid, then try getting them
4080 	 * once via read capacity. If that fails, then fail this call.
4081 	 * This is necessary with the new mpxio failover behavior in
4082 	 * the T300 where we can get an attach for the inactive path
4083 	 * before the active path. The inactive path fails commands with
4084 	 * sense data of 02,04,88 which happens to the read capacity
4085 	 * before mpxio has had sufficient knowledge to know if it should
4086 	 * force a fail over or not. (Which it won't do at attach anyhow).
4087 	 * If the read capacity at attach time fails, un_tgt_blocksize and
4088 	 * un_blockcount won't be valid.
4089 	 */
4090 	if ((un->un_f_tgt_blocksize_is_valid != TRUE) ||
4091 	    (un->un_f_blockcount_is_valid != TRUE)) {
4092 		uint64_t	cap;
4093 		uint32_t	lbasz;
4094 		int		rval;
4095 
4096 		mutex_exit(SD_MUTEX(un));
4097 		rval = sd_send_scsi_READ_CAPACITY(un, &cap,
4098 		    &lbasz, SD_PATH_DIRECT);
4099 		mutex_enter(SD_MUTEX(un));
4100 		if (rval == 0) {
4101 			/*
4102 			 * The following relies on
4103 			 * sd_send_scsi_READ_CAPACITY never
4104 			 * returning 0 for capacity and/or lbasize.
4105 			 */
4106 			sd_update_block_info(un, lbasz, cap);
4107 		}
4108 
4109 		if ((un->un_f_tgt_blocksize_is_valid != TRUE) ||
4110 		    (un->un_f_blockcount_is_valid != TRUE)) {
4111 			return (EINVAL);
4112 		}
4113 	}
4114 
4115 	/*
4116 	 * Copy the lbasize and capacity so that if they're reset while we're
4117 	 * not holding the SD_MUTEX, we will continue to use valid values
4118 	 * after the SD_MUTEX is reacquired. (4119659)
4119 	 */
4120 	lbasize  = un->un_tgt_blocksize;
4121 	capacity = un->un_blockcount;
4122 
4123 #if defined(_SUNOS_VTOC_16)
4124 	/*
4125 	 * Set up the "whole disk" fdisk partition; this should always
4126 	 * exist, regardless of whether the disk contains an fdisk table
4127 	 * or vtoc.
4128 	 */
4129 	un->un_map[P0_RAW_DISK].dkl_cylno = 0;
4130 	un->un_map[P0_RAW_DISK].dkl_nblk  = capacity;
4131 #endif
4132 
4133 	/*
4134 	 * Refresh the logical and physical geometry caches.
4135 	 * (data from MODE SENSE format/rigid disk geometry pages,
4136 	 * and scsi_ifgetcap("geometry").
4137 	 */
4138 	sd_resync_geom_caches(un, capacity, lbasize, path_flag);
4139 
4140 	label_error = sd_use_efi(un, path_flag);
4141 	if (label_error == 0) {
4142 		/* found a valid EFI label */
4143 		SD_TRACE(SD_LOG_IO_PARTITION, un,
4144 			"sd_validate_geometry: found EFI label\n");
4145 		un->un_solaris_offset = 0;
4146 		un->un_solaris_size = capacity;
4147 		return (ENOTSUP);
4148 	}
4149 	if (un->un_blockcount > DK_MAX_BLOCKS) {
4150 		if (label_error == ESRCH) {
4151 			/*
4152 			 * they've configured a LUN over 1TB, but used
4153 			 * format.dat to restrict format's view of the
4154 			 * capacity to be under 1TB
4155 			 */
4156 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4157 "is >1TB and has a VTOC label: use format(1M) to either decrease the");
4158 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
4159 "size to be < 1TB or relabel the disk with an EFI label");
4160 		} else {
4161 			/* unlabeled disk over 1TB */
4162 			return (ENOTSUP);
4163 		}
4164 	}
4165 	label_error = 0;
4166 
4167 	/*
4168 	 * at this point it is either labeled with a VTOC or it is
4169 	 * under 1TB
4170 	 */
4171 
4172 	/*
4173 	 * Only DIRECT ACCESS devices will have Sun labels.
4174 	 * CD's supposedly have a Sun label, too
4175 	 */
4176 	if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT || ISREMOVABLE(un)) {
4177 		struct	dk_label *dkl;
4178 		offset_t dkl1;
4179 		offset_t label_addr, real_addr;
4180 		int	rval;
4181 		size_t	buffer_size;
4182 
4183 		/*
4184 		 * Note: This will set up un->un_solaris_size and
4185 		 * un->un_solaris_offset.
4186 		 */
4187 		switch (sd_read_fdisk(un, capacity, lbasize, path_flag)) {
4188 		case SD_CMD_RESERVATION_CONFLICT:
4189 			ASSERT(mutex_owned(SD_MUTEX(un)));
4190 			return (EACCES);
4191 		case SD_CMD_FAILURE:
4192 			ASSERT(mutex_owned(SD_MUTEX(un)));
4193 			return (ENOMEM);
4194 		}
4195 
4196 		if (un->un_solaris_size <= DK_LABEL_LOC) {
4197 			/*
4198 			 * Found fdisk table but no Solaris partition entry,
4199 			 * so don't call sd_uselabel() and don't create
4200 			 * a default label.
4201 			 */
4202 			label_error = 0;
4203 			un->un_f_geometry_is_valid = TRUE;
4204 			goto no_solaris_partition;
4205 		}
4206 		label_addr = (daddr_t)(un->un_solaris_offset + DK_LABEL_LOC);
4207 
4208 		/*
4209 		 * sys_blocksize != tgt_blocksize, need to re-adjust
4210 		 * blkno and save the index to beginning of dk_label
4211 		 */
4212 		real_addr = SD_SYS2TGTBLOCK(un, label_addr);
4213 		buffer_size = SD_REQBYTES2TGTBYTES(un,
4214 		    sizeof (struct dk_label));
4215 
4216 		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_validate_geometry: "
4217 		    "label_addr: 0x%x allocation size: 0x%x\n",
4218 		    label_addr, buffer_size);
4219 		dkl = kmem_zalloc(buffer_size, KM_NOSLEEP);
4220 		if (dkl == NULL) {
4221 			return (ENOMEM);
4222 		}
4223 
4224 		mutex_exit(SD_MUTEX(un));
4225 		rval = sd_send_scsi_READ(un, dkl, buffer_size, real_addr,
4226 		    path_flag);
4227 		mutex_enter(SD_MUTEX(un));
4228 
4229 		switch (rval) {
4230 		case 0:
4231 			/*
4232 			 * sd_uselabel will establish that the geometry
4233 			 * is valid.
4234 			 * For sys_blocksize != tgt_blocksize, need
4235 			 * to index into the beginning of dk_label
4236 			 */
4237 			dkl1 = (daddr_t)dkl
4238 				+ SD_TGTBYTEOFFSET(un, label_addr, real_addr);
4239 			if (sd_uselabel(un, (struct dk_label *)(uintptr_t)dkl1,
4240 			    path_flag) != SD_LABEL_IS_VALID) {
4241 				label_error = EINVAL;
4242 			}
4243 			break;
4244 		case EACCES:
4245 			label_error = EACCES;
4246 			break;
4247 		default:
4248 			label_error = EINVAL;
4249 			break;
4250 		}
4251 
4252 		kmem_free(dkl, buffer_size);
4253 
4254 #if defined(_SUNOS_VTOC_8)
4255 		label = (char *)un->un_asciilabel;
4256 #elif defined(_SUNOS_VTOC_16)
4257 		label = (char *)un->un_vtoc.v_asciilabel;
4258 #else
4259 #error "No VTOC format defined."
4260 #endif
4261 	}
4262 
4263 	/*
4264 	 * If a valid label was not found, AND if no reservation conflict
4265 	 * was detected, then go ahead and create a default label (4069506).
4266 	 *
4267 	 * Note: currently, for VTOC_8 devices, the default label is created
4268 	 * for removables only.  For VTOC_16 devices, the default label will
4269 	 * be created for both removables and non-removables alike.
4270 	 * (see sd_build_default_label)
4271 	 */
4272 #if defined(_SUNOS_VTOC_8)
4273 	if (ISREMOVABLE(un) && (label_error != EACCES)) {
4274 #elif defined(_SUNOS_VTOC_16)
4275 	if (label_error != EACCES) {
4276 #endif
4277 		if (un->un_f_geometry_is_valid == FALSE) {
4278 			sd_build_default_label(un);
4279 		}
4280 		label_error = 0;
4281 	}
4282 
4283 no_solaris_partition:
4284 	if ((!ISREMOVABLE(un) ||
4285 	    (ISREMOVABLE(un) && un->un_mediastate == DKIO_EJECTED)) &&
4286 	    (un->un_state == SD_STATE_NORMAL && gvalid == FALSE)) {
4287 		/*
4288 		 * Print out a message indicating who and what we are.
4289 		 * We do this only when we happen to really validate the
4290 		 * geometry. We may call sd_validate_geometry() at other
4291 		 * times, e.g., ioctl()'s like Get VTOC in which case we
4292 		 * don't want to print the label.
4293 		 * If the geometry is valid, print the label string,
4294 		 * else print vendor and product info, if available
4295 		 */
4296 		if ((un->un_f_geometry_is_valid == TRUE) && (label != NULL)) {
4297 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "?<%s>\n", label);
4298 		} else {
4299 			mutex_enter(&sd_label_mutex);
4300 			sd_inq_fill(SD_INQUIRY(un)->inq_vid, VIDMAX,
4301 			    labelstring);
4302 			sd_inq_fill(SD_INQUIRY(un)->inq_pid, PIDMAX,
4303 			    &labelstring[64]);
4304 			(void) sprintf(buf, "?Vendor '%s', product '%s'",
4305 			    labelstring, &labelstring[64]);
4306 			if (un->un_f_blockcount_is_valid == TRUE) {
4307 				(void) sprintf(&buf[strlen(buf)],
4308 				    ", %llu %u byte blocks\n",
4309 				    (longlong_t)un->un_blockcount,
4310 				    un->un_tgt_blocksize);
4311 			} else {
4312 				(void) sprintf(&buf[strlen(buf)],
4313 				    ", (unknown capacity)\n");
4314 			}
4315 			SD_INFO(SD_LOG_ATTACH_DETACH, un, buf);
4316 			mutex_exit(&sd_label_mutex);
4317 		}
4318 	}
4319 
4320 #if defined(_SUNOS_VTOC_16)
4321 	/*
4322 	 * If we have valid geometry, set up the remaining fdisk partitions.
4323 	 * Note that dkl_cylno is not used for the fdisk map entries, so
4324 	 * we set it to an entirely bogus value.
4325 	 */
4326 	for (count = 0; count < FD_NUMPART; count++) {
4327 		un->un_map[FDISK_P1 + count].dkl_cylno = -1;
4328 		un->un_map[FDISK_P1 + count].dkl_nblk =
4329 		    un->un_fmap[count].fmap_nblk;
4330 
4331 		un->un_offset[FDISK_P1 + count] =
4332 		    un->un_fmap[count].fmap_start;
4333 	}
4334 #endif
4335 
4336 	for (count = 0; count < NDKMAP; count++) {
4337 #if defined(_SUNOS_VTOC_8)
4338 		struct dk_map *lp  = &un->un_map[count];
4339 		un->un_offset[count] =
4340 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
4341 #elif defined(_SUNOS_VTOC_16)
4342 		struct dkl_partition *vp = &un->un_vtoc.v_part[count];
4343 
4344 		un->un_offset[count] = vp->p_start + un->un_solaris_offset;
4345 #else
4346 #error "No VTOC format defined."
4347 #endif
4348 	}
4349 
4350 	return (label_error);
4351 }
4352 
4353 
4354 #if defined(_SUNOS_VTOC_16)
4355 /*
4356  * Macro: MAX_BLKS
4357  *
4358  *	This macro is used for table entries where we need to have the largest
4359  *	possible sector value for that head & SPT (sectors per track)
4360  *	combination.  Other entries for some smaller disk sizes are set by
4361  *	convention to match those used by X86 BIOS usage.
4362  */
4363 #define	MAX_BLKS(heads, spt)	UINT16_MAX * heads * spt, heads, spt
4364 
4365 /*
4366  *    Function: sd_convert_geometry
4367  *
4368  * Description: Convert physical geometry into a dk_geom structure. In
4369  *		other words, make sure we don't wrap 16-bit values.
4370  *		e.g. converting from geom_cache to dk_geom
4371  *
4372  *     Context: Kernel thread only
4373  */
4374 static void
4375 sd_convert_geometry(uint64_t capacity, struct dk_geom *un_g)
4376 {
4377 	int i;
4378 	static const struct chs_values {
4379 		uint_t max_cap;		/* Max Capacity for this HS. */
4380 		uint_t nhead;		/* Heads to use. */
4381 		uint_t nsect;		/* SPT to use. */
4382 	} CHS_values[] = {
4383 		{0x00200000,  64, 32},		/* 1GB or smaller disk. */
4384 		{0x01000000, 128, 32},		/* 8GB or smaller disk. */
4385 		{MAX_BLKS(255,  63)},		/* 502.02GB or smaller disk. */
4386 		{MAX_BLKS(255, 126)},		/* .98TB or smaller disk. */
4387 		{DK_MAX_BLOCKS, 255, 189}	/* Max size is just under 1TB */
4388 	};
4389 
4390 	/* Unlabeled SCSI floppy device */
4391 	if (capacity <= 0x1000) {
4392 		un_g->dkg_nhead = 2;
4393 		un_g->dkg_ncyl = 80;
4394 		un_g->dkg_nsect = capacity / (un_g->dkg_nhead * un_g->dkg_ncyl);
4395 		return;
4396 	}
4397 
4398 	/*
4399 	 * For all devices we calculate cylinders using the
4400 	 * heads and sectors we assign based on capacity of the
4401 	 * device.  The table is designed to be compatible with the
4402 	 * way other operating systems lay out fdisk tables for X86
4403 	 * and to insure that the cylinders never exceed 65535 to
4404 	 * prevent problems with X86 ioctls that report geometry.
4405 	 * We use SPT that are multiples of 63, since other OSes that
4406 	 * are not limited to 16-bits for cylinders stop at 63 SPT
4407 	 * we make do by using multiples of 63 SPT.
4408 	 *
4409 	 * Note than capacities greater than or equal to 1TB will simply
4410 	 * get the largest geometry from the table. This should be okay
4411 	 * since disks this large shouldn't be using CHS values anyway.
4412 	 */
4413 	for (i = 0; CHS_values[i].max_cap < capacity &&
4414 	    CHS_values[i].max_cap != DK_MAX_BLOCKS; i++)
4415 		;
4416 
4417 	un_g->dkg_nhead = CHS_values[i].nhead;
4418 	un_g->dkg_nsect = CHS_values[i].nsect;
4419 }
4420 #endif
4421 
4422 
4423 /*
4424  *    Function: sd_resync_geom_caches
4425  *
4426  * Description: (Re)initialize both geometry caches: the virtual geometry
4427  *		information is extracted from the HBA (the "geometry"
4428  *		capability), and the physical geometry cache data is
4429  *		generated by issuing MODE SENSE commands.
4430  *
4431  *   Arguments: un - driver soft state (unit) structure
4432  *		capacity - disk capacity in #blocks
4433  *		lbasize - disk block size in bytes
4434  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4435  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4436  *			to use the USCSI "direct" chain and bypass the normal
4437  *			command waitq.
4438  *
4439  *     Context: Kernel thread only (can sleep).
4440  */
4441 
4442 static void
4443 sd_resync_geom_caches(struct sd_lun *un, int capacity, int lbasize,
4444 	int path_flag)
4445 {
4446 	struct 	geom_cache 	pgeom;
4447 	struct 	geom_cache	*pgeom_p = &pgeom;
4448 	int 	spc;
4449 	unsigned short nhead;
4450 	unsigned short nsect;
4451 
4452 	ASSERT(un != NULL);
4453 	ASSERT(mutex_owned(SD_MUTEX(un)));
4454 
4455 	/*
4456 	 * Ask the controller for its logical geometry.
4457 	 * Note: if the HBA does not support scsi_ifgetcap("geometry"),
4458 	 * then the lgeom cache will be invalid.
4459 	 */
4460 	sd_get_virtual_geometry(un, capacity, lbasize);
4461 
4462 	/*
4463 	 * Initialize the pgeom cache from lgeom, so that if MODE SENSE
4464 	 * doesn't work, DKIOCG_PHYSGEOM can return reasonable values.
4465 	 */
4466 	if (un->un_lgeom.g_nsect == 0 || un->un_lgeom.g_nhead == 0) {
4467 		/*
4468 		 * Note: Perhaps this needs to be more adaptive? The rationale
4469 		 * is that, if there's no HBA geometry from the HBA driver, any
4470 		 * guess is good, since this is the physical geometry. If MODE
4471 		 * SENSE fails this gives a max cylinder size for non-LBA access
4472 		 */
4473 		nhead = 255;
4474 		nsect = 63;
4475 	} else {
4476 		nhead = un->un_lgeom.g_nhead;
4477 		nsect = un->un_lgeom.g_nsect;
4478 	}
4479 
4480 	if (ISCD(un)) {
4481 		pgeom_p->g_nhead = 1;
4482 		pgeom_p->g_nsect = nsect * nhead;
4483 	} else {
4484 		pgeom_p->g_nhead = nhead;
4485 		pgeom_p->g_nsect = nsect;
4486 	}
4487 
4488 	spc = pgeom_p->g_nhead * pgeom_p->g_nsect;
4489 	pgeom_p->g_capacity = capacity;
4490 	pgeom_p->g_ncyl = pgeom_p->g_capacity / spc;
4491 	pgeom_p->g_acyl = 0;
4492 
4493 	/*
4494 	 * Retrieve fresh geometry data from the hardware, stash it
4495 	 * here temporarily before we rebuild the incore label.
4496 	 *
4497 	 * We want to use the MODE SENSE commands to derive the
4498 	 * physical geometry of the device, but if either command
4499 	 * fails, the logical geometry is used as the fallback for
4500 	 * disk label geometry.
4501 	 */
4502 	mutex_exit(SD_MUTEX(un));
4503 	sd_get_physical_geometry(un, pgeom_p, capacity, lbasize, path_flag);
4504 	mutex_enter(SD_MUTEX(un));
4505 
4506 	/*
4507 	 * Now update the real copy while holding the mutex. This
4508 	 * way the global copy is never in an inconsistent state.
4509 	 */
4510 	bcopy(pgeom_p, &un->un_pgeom,  sizeof (un->un_pgeom));
4511 
4512 	SD_INFO(SD_LOG_COMMON, un, "sd_resync_geom_caches: "
4513 	    "(cached from lgeom)\n");
4514 	SD_INFO(SD_LOG_COMMON, un,
4515 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
4516 	    un->un_pgeom.g_ncyl, un->un_pgeom.g_acyl,
4517 	    un->un_pgeom.g_nhead, un->un_pgeom.g_nsect);
4518 	SD_INFO(SD_LOG_COMMON, un, "   lbasize: %d; capacity: %ld; "
4519 	    "intrlv: %d; rpm: %d\n", un->un_pgeom.g_secsize,
4520 	    un->un_pgeom.g_capacity, un->un_pgeom.g_intrlv,
4521 	    un->un_pgeom.g_rpm);
4522 }
4523 
4524 
4525 /*
4526  *    Function: sd_read_fdisk
4527  *
4528  * Description: utility routine to read the fdisk table.
4529  *
4530  *   Arguments: un - driver soft state (unit) structure
4531  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4532  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4533  *			to use the USCSI "direct" chain and bypass the normal
4534  *			command waitq.
4535  *
4536  * Return Code: SD_CMD_SUCCESS
4537  *		SD_CMD_FAILURE
4538  *
4539  *     Context: Kernel thread only (can sleep).
4540  */
4541 /* ARGSUSED */
4542 static int
4543 sd_read_fdisk(struct sd_lun *un, uint_t capacity, int lbasize, int path_flag)
4544 {
4545 #if defined(_NO_FDISK_PRESENT)
4546 
4547 	un->un_solaris_offset = 0;
4548 	un->un_solaris_size = capacity;
4549 	bzero(un->un_fmap, sizeof (struct fmap) * FD_NUMPART);
4550 	return (SD_CMD_SUCCESS);
4551 
4552 #elif defined(_FIRMWARE_NEEDS_FDISK)
4553 
4554 	struct ipart	*fdp;
4555 	struct mboot	*mbp;
4556 	struct ipart	fdisk[FD_NUMPART];
4557 	int		i;
4558 	char		sigbuf[2];
4559 	caddr_t		bufp;
4560 	int		uidx;
4561 	int		rval;
4562 	int		lba = 0;
4563 	uint_t		solaris_offset;	/* offset to solaris part. */
4564 	daddr_t		solaris_size;	/* size of solaris partition */
4565 	uint32_t	blocksize;
4566 
4567 	ASSERT(un != NULL);
4568 	ASSERT(mutex_owned(SD_MUTEX(un)));
4569 	ASSERT(un->un_f_tgt_blocksize_is_valid == TRUE);
4570 
4571 	blocksize = un->un_tgt_blocksize;
4572 
4573 	/*
4574 	 * Start off assuming no fdisk table
4575 	 */
4576 	solaris_offset = 0;
4577 	solaris_size   = capacity;
4578 
4579 	mutex_exit(SD_MUTEX(un));
4580 	bufp = kmem_zalloc(blocksize, KM_SLEEP);
4581 	rval = sd_send_scsi_READ(un, bufp, blocksize, 0, path_flag);
4582 	mutex_enter(SD_MUTEX(un));
4583 
4584 	if (rval != 0) {
4585 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4586 		    "sd_read_fdisk: fdisk read err\n");
4587 		kmem_free(bufp, blocksize);
4588 		return (SD_CMD_FAILURE);
4589 	}
4590 
4591 	mbp = (struct mboot *)bufp;
4592 
4593 	/*
4594 	 * The fdisk table does not begin on a 4-byte boundary within the
4595 	 * master boot record, so we copy it to an aligned structure to avoid
4596 	 * alignment exceptions on some processors.
4597 	 */
4598 	bcopy(&mbp->parts[0], fdisk, sizeof (fdisk));
4599 
4600 	/*
4601 	 * Check for lba support before verifying sig; sig might not be
4602 	 * there, say on a blank disk, but the max_chs mark may still
4603 	 * be present.
4604 	 *
4605 	 * Note: LBA support and BEFs are an x86-only concept but this
4606 	 * code should work OK on SPARC as well.
4607 	 */
4608 
4609 	/*
4610 	 * First, check for lba-access-ok on root node (or prom root node)
4611 	 * if present there, don't need to search fdisk table.
4612 	 */
4613 	if (ddi_getprop(DDI_DEV_T_ANY, ddi_root_node(), 0,
4614 	    "lba-access-ok", 0) != 0) {
4615 		/* All drives do LBA; don't search fdisk table */
4616 		lba = 1;
4617 	} else {
4618 		/* Okay, look for mark in fdisk table */
4619 		for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) {
4620 			/* accumulate "lba" value from all partitions */
4621 			lba = (lba || sd_has_max_chs_vals(fdp));
4622 		}
4623 	}
4624 
4625 	if (lba != 0) {
4626 		dev_t dev = sd_make_device(SD_DEVINFO(un));
4627 
4628 		if (ddi_getprop(dev, SD_DEVINFO(un), DDI_PROP_DONTPASS,
4629 		    "lba-access-ok", 0) == 0) {
4630 			/* not found; create it */
4631 			if (ddi_prop_create(dev, SD_DEVINFO(un), 0,
4632 			    "lba-access-ok", (caddr_t)NULL, 0) !=
4633 			    DDI_PROP_SUCCESS) {
4634 				SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4635 				    "sd_read_fdisk: Can't create lba property "
4636 				    "for instance %d\n",
4637 				    ddi_get_instance(SD_DEVINFO(un)));
4638 			}
4639 		}
4640 	}
4641 
4642 	bcopy(&mbp->signature, sigbuf, sizeof (sigbuf));
4643 
4644 	/*
4645 	 * Endian-independent signature check
4646 	 */
4647 	if (((sigbuf[1] & 0xFF) != ((MBB_MAGIC >> 8) & 0xFF)) ||
4648 	    (sigbuf[0] != (MBB_MAGIC & 0xFF))) {
4649 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4650 		    "sd_read_fdisk: no fdisk\n");
4651 		bzero(un->un_fmap, sizeof (struct fmap) * FD_NUMPART);
4652 		rval = SD_CMD_SUCCESS;
4653 		goto done;
4654 	}
4655 
4656 #ifdef SDDEBUG
4657 	if (sd_level_mask & SD_LOGMASK_INFO) {
4658 		fdp = fdisk;
4659 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_read_fdisk:\n");
4660 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "         relsect    "
4661 		    "numsect         sysid       bootid\n");
4662 		for (i = 0; i < FD_NUMPART; i++, fdp++) {
4663 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4664 			    "    %d:  %8d   %8d     0x%08x     0x%08x\n",
4665 			    i, fdp->relsect, fdp->numsect,
4666 			    fdp->systid, fdp->bootid);
4667 		}
4668 	}
4669 #endif
4670 
4671 	/*
4672 	 * Try to find the unix partition
4673 	 */
4674 	uidx = -1;
4675 	solaris_offset = 0;
4676 	solaris_size   = 0;
4677 
4678 	for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) {
4679 		int	relsect;
4680 		int	numsect;
4681 
4682 		if (fdp->numsect == 0) {
4683 			un->un_fmap[i].fmap_start = 0;
4684 			un->un_fmap[i].fmap_nblk  = 0;
4685 			continue;
4686 		}
4687 
4688 		/*
4689 		 * Data in the fdisk table is little-endian.
4690 		 */
4691 		relsect = LE_32(fdp->relsect);
4692 		numsect = LE_32(fdp->numsect);
4693 
4694 		un->un_fmap[i].fmap_start = relsect;
4695 		un->un_fmap[i].fmap_nblk  = numsect;
4696 
4697 		if (fdp->systid != SUNIXOS &&
4698 		    fdp->systid != SUNIXOS2 &&
4699 		    fdp->systid != EFI_PMBR) {
4700 			continue;
4701 		}
4702 
4703 		/*
4704 		 * use the last active solaris partition id found
4705 		 * (there should only be 1 active partition id)
4706 		 *
4707 		 * if there are no active solaris partition id
4708 		 * then use the first inactive solaris partition id
4709 		 */
4710 		if ((uidx == -1) || (fdp->bootid == ACTIVE)) {
4711 			uidx = i;
4712 			solaris_offset = relsect;
4713 			solaris_size   = numsect;
4714 		}
4715 	}
4716 
4717 	SD_INFO(SD_LOG_ATTACH_DETACH, un, "fdisk 0x%x 0x%lx",
4718 	    un->un_solaris_offset, un->un_solaris_size);
4719 
4720 	rval = SD_CMD_SUCCESS;
4721 
4722 done:
4723 
4724 	/*
4725 	 * Clear the VTOC info, only if the Solaris partition entry
4726 	 * has moved, changed size, been deleted, or if the size of
4727 	 * the partition is too small to even fit the label sector.
4728 	 */
4729 	if ((un->un_solaris_offset != solaris_offset) ||
4730 	    (un->un_solaris_size != solaris_size) ||
4731 	    solaris_size <= DK_LABEL_LOC) {
4732 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "fdisk moved 0x%x 0x%lx",
4733 			solaris_offset, solaris_size);
4734 		bzero(&un->un_g, sizeof (struct dk_geom));
4735 		bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
4736 		bzero(&un->un_map, NDKMAP * (sizeof (struct dk_map)));
4737 		un->un_f_geometry_is_valid = FALSE;
4738 	}
4739 	un->un_solaris_offset = solaris_offset;
4740 	un->un_solaris_size = solaris_size;
4741 	kmem_free(bufp, blocksize);
4742 	return (rval);
4743 
4744 #else	/* #elif defined(_FIRMWARE_NEEDS_FDISK) */
4745 #error "fdisk table presence undetermined for this platform."
4746 #endif	/* #if defined(_NO_FDISK_PRESENT) */
4747 }
4748 
4749 
4750 /*
4751  *    Function: sd_get_physical_geometry
4752  *
4753  * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4754  *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4755  *		target, and use this information to initialize the physical
4756  *		geometry cache specified by pgeom_p.
4757  *
4758  *		MODE SENSE is an optional command, so failure in this case
4759  *		does not necessarily denote an error. We want to use the
4760  *		MODE SENSE commands to derive the physical geometry of the
4761  *		device, but if either command fails, the logical geometry is
4762  *		used as the fallback for disk label geometry.
4763  *
4764  *		This requires that un->un_blockcount and un->un_tgt_blocksize
4765  *		have already been initialized for the current target and
4766  *		that the current values be passed as args so that we don't
4767  *		end up ever trying to use -1 as a valid value. This could
4768  *		happen if either value is reset while we're not holding
4769  *		the mutex.
4770  *
4771  *   Arguments: un - driver soft state (unit) structure
4772  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4773  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4774  *			to use the USCSI "direct" chain and bypass the normal
4775  *			command waitq.
4776  *
4777  *     Context: Kernel thread only (can sleep).
4778  */
4779 
4780 static void
4781 sd_get_physical_geometry(struct sd_lun *un, struct geom_cache *pgeom_p,
4782 	int capacity, int lbasize, int path_flag)
4783 {
4784 	struct	mode_format	*page3p;
4785 	struct	mode_geometry	*page4p;
4786 	struct	mode_header	*headerp;
4787 	int	sector_size;
4788 	int	nsect;
4789 	int	nhead;
4790 	int	ncyl;
4791 	int	intrlv;
4792 	int	spc;
4793 	int	modesense_capacity;
4794 	int	rpm;
4795 	int	bd_len;
4796 	int	mode_header_length;
4797 	uchar_t	*p3bufp;
4798 	uchar_t	*p4bufp;
4799 	int	cdbsize;
4800 
4801 	ASSERT(un != NULL);
4802 	ASSERT(!(mutex_owned(SD_MUTEX(un))));
4803 
4804 	if (un->un_f_blockcount_is_valid != TRUE) {
4805 		return;
4806 	}
4807 
4808 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
4809 		return;
4810 	}
4811 
4812 	if (lbasize == 0) {
4813 		if (ISCD(un)) {
4814 			lbasize = 2048;
4815 		} else {
4816 			lbasize = un->un_sys_blocksize;
4817 		}
4818 	}
4819 	pgeom_p->g_secsize = (unsigned short)lbasize;
4820 
4821 	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4822 
4823 	/*
4824 	 * Retrieve MODE SENSE page 3 - Format Device Page
4825 	 */
4826 	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4827 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p3bufp,
4828 	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag)
4829 	    != 0) {
4830 		SD_ERROR(SD_LOG_COMMON, un,
4831 		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4832 		goto page3_exit;
4833 	}
4834 
4835 	/*
4836 	 * Determine size of Block Descriptors in order to locate the mode
4837 	 * page data.  ATAPI devices return 0, SCSI devices should return
4838 	 * MODE_BLK_DESC_LENGTH.
4839 	 */
4840 	headerp = (struct mode_header *)p3bufp;
4841 	if (un->un_f_cfg_is_atapi == TRUE) {
4842 		struct mode_header_grp2 *mhp =
4843 		    (struct mode_header_grp2 *)headerp;
4844 		mode_header_length = MODE_HEADER_LENGTH_GRP2;
4845 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4846 	} else {
4847 		mode_header_length = MODE_HEADER_LENGTH;
4848 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4849 	}
4850 
4851 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4852 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4853 		    "received unexpected bd_len of %d, page3\n", bd_len);
4854 		goto page3_exit;
4855 	}
4856 
4857 	page3p = (struct mode_format *)
4858 	    ((caddr_t)headerp + mode_header_length + bd_len);
4859 
4860 	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
4861 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4862 		    "mode sense pg3 code mismatch %d\n",
4863 		    page3p->mode_page.code);
4864 		goto page3_exit;
4865 	}
4866 
4867 	/*
4868 	 * Use this physical geometry data only if BOTH MODE SENSE commands
4869 	 * complete successfully; otherwise, revert to the logical geometry.
4870 	 * So, we need to save everything in temporary variables.
4871 	 */
4872 	sector_size = BE_16(page3p->data_bytes_sect);
4873 
4874 	/*
4875 	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
4876 	 */
4877 	if (sector_size == 0) {
4878 		sector_size = (ISCD(un)) ? 2048 : un->un_sys_blocksize;
4879 	} else {
4880 		sector_size &= ~(un->un_sys_blocksize - 1);
4881 	}
4882 
4883 	nsect  = BE_16(page3p->sect_track);
4884 	intrlv = BE_16(page3p->interleave);
4885 
4886 	SD_INFO(SD_LOG_COMMON, un,
4887 	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
4888 	SD_INFO(SD_LOG_COMMON, un,
4889 	    "   mode page: %d; nsect: %d; sector size: %d;\n",
4890 	    page3p->mode_page.code, nsect, sector_size);
4891 	SD_INFO(SD_LOG_COMMON, un,
4892 	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
4893 	    BE_16(page3p->track_skew),
4894 	    BE_16(page3p->cylinder_skew));
4895 
4896 
4897 	/*
4898 	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
4899 	 */
4900 	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
4901 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p4bufp,
4902 	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag)
4903 	    != 0) {
4904 		SD_ERROR(SD_LOG_COMMON, un,
4905 		    "sd_get_physical_geometry: mode sense page 4 failed\n");
4906 		goto page4_exit;
4907 	}
4908 
4909 	/*
4910 	 * Determine size of Block Descriptors in order to locate the mode
4911 	 * page data.  ATAPI devices return 0, SCSI devices should return
4912 	 * MODE_BLK_DESC_LENGTH.
4913 	 */
4914 	headerp = (struct mode_header *)p4bufp;
4915 	if (un->un_f_cfg_is_atapi == TRUE) {
4916 		struct mode_header_grp2 *mhp =
4917 		    (struct mode_header_grp2 *)headerp;
4918 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4919 	} else {
4920 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4921 	}
4922 
4923 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4924 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4925 		    "received unexpected bd_len of %d, page4\n", bd_len);
4926 		goto page4_exit;
4927 	}
4928 
4929 	page4p = (struct mode_geometry *)
4930 	    ((caddr_t)headerp + mode_header_length + bd_len);
4931 
4932 	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
4933 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4934 		    "mode sense pg4 code mismatch %d\n",
4935 		    page4p->mode_page.code);
4936 		goto page4_exit;
4937 	}
4938 
4939 	/*
4940 	 * Stash the data now, after we know that both commands completed.
4941 	 */
4942 
4943 	mutex_enter(SD_MUTEX(un));
4944 
4945 	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
4946 	spc   = nhead * nsect;
4947 	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
4948 	rpm   = BE_16(page4p->rpm);
4949 
4950 	modesense_capacity = spc * ncyl;
4951 
4952 	SD_INFO(SD_LOG_COMMON, un,
4953 	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
4954 	SD_INFO(SD_LOG_COMMON, un,
4955 	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
4956 	SD_INFO(SD_LOG_COMMON, un,
4957 	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
4958 	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
4959 	    (void *)pgeom_p, capacity);
4960 
4961 	/*
4962 	 * Compensate if the drive's geometry is not rectangular, i.e.,
4963 	 * the product of C * H * S returned by MODE SENSE >= that returned
4964 	 * by read capacity. This is an idiosyncrasy of the original x86
4965 	 * disk subsystem.
4966 	 */
4967 	if (modesense_capacity >= capacity) {
4968 		SD_INFO(SD_LOG_COMMON, un,
4969 		    "sd_get_physical_geometry: adjusting acyl; "
4970 		    "old: %d; new: %d\n", pgeom_p->g_acyl,
4971 		    (modesense_capacity - capacity + spc - 1) / spc);
4972 		if (sector_size != 0) {
4973 			/* 1243403: NEC D38x7 drives don't support sec size */
4974 			pgeom_p->g_secsize = (unsigned short)sector_size;
4975 		}
4976 		pgeom_p->g_nsect    = (unsigned short)nsect;
4977 		pgeom_p->g_nhead    = (unsigned short)nhead;
4978 		pgeom_p->g_capacity = capacity;
4979 		pgeom_p->g_acyl	    =
4980 		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
4981 		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
4982 	}
4983 
4984 	pgeom_p->g_rpm    = (unsigned short)rpm;
4985 	pgeom_p->g_intrlv = (unsigned short)intrlv;
4986 
4987 	SD_INFO(SD_LOG_COMMON, un,
4988 	    "sd_get_physical_geometry: mode sense geometry:\n");
4989 	SD_INFO(SD_LOG_COMMON, un,
4990 	    "   nsect: %d; sector size: %d; interlv: %d\n",
4991 	    nsect, sector_size, intrlv);
4992 	SD_INFO(SD_LOG_COMMON, un,
4993 	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
4994 	    nhead, ncyl, rpm, modesense_capacity);
4995 	SD_INFO(SD_LOG_COMMON, un,
4996 	    "sd_get_physical_geometry: (cached)\n");
4997 	SD_INFO(SD_LOG_COMMON, un,
4998 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
4999 	    un->un_pgeom.g_ncyl,  un->un_pgeom.g_acyl,
5000 	    un->un_pgeom.g_nhead, un->un_pgeom.g_nsect);
5001 	SD_INFO(SD_LOG_COMMON, un,
5002 	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
5003 	    un->un_pgeom.g_secsize, un->un_pgeom.g_capacity,
5004 	    un->un_pgeom.g_intrlv, un->un_pgeom.g_rpm);
5005 
5006 	mutex_exit(SD_MUTEX(un));
5007 
5008 page4_exit:
5009 	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
5010 page3_exit:
5011 	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
5012 }
5013 
5014 
5015 /*
5016  *    Function: sd_get_virtual_geometry
5017  *
5018  * Description: Ask the controller to tell us about the target device.
5019  *
5020  *   Arguments: un - pointer to softstate
5021  *		capacity - disk capacity in #blocks
5022  *		lbasize - disk block size in bytes
5023  *
5024  *     Context: Kernel thread only
5025  */
5026 
5027 static void
5028 sd_get_virtual_geometry(struct sd_lun *un, int capacity, int lbasize)
5029 {
5030 	struct	geom_cache 	*lgeom_p = &un->un_lgeom;
5031 	uint_t	geombuf;
5032 	int	spc;
5033 
5034 	ASSERT(un != NULL);
5035 	ASSERT(mutex_owned(SD_MUTEX(un)));
5036 
5037 	mutex_exit(SD_MUTEX(un));
5038 
5039 	/* Set sector size, and total number of sectors */
5040 	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
5041 	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
5042 
5043 	/* Let the HBA tell us its geometry */
5044 	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
5045 
5046 	mutex_enter(SD_MUTEX(un));
5047 
5048 	/* A value of -1 indicates an undefined "geometry" property */
5049 	if (geombuf == (-1)) {
5050 		return;
5051 	}
5052 
5053 	/* Initialize the logical geometry cache. */
5054 	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
5055 	lgeom_p->g_nsect   = geombuf & 0xffff;
5056 	lgeom_p->g_secsize = un->un_sys_blocksize;
5057 
5058 	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
5059 
5060 	/*
5061 	 * Note: The driver originally converted the capacity value from
5062 	 * target blocks to system blocks. However, the capacity value passed
5063 	 * to this routine is already in terms of system blocks (this scaling
5064 	 * is done when the READ CAPACITY command is issued and processed).
5065 	 * This 'error' may have gone undetected because the usage of g_ncyl
5066 	 * (which is based upon g_capacity) is very limited within the driver
5067 	 */
5068 	lgeom_p->g_capacity = capacity;
5069 
5070 	/*
5071 	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
5072 	 * hba may return zero values if the device has been removed.
5073 	 */
5074 	if (spc == 0) {
5075 		lgeom_p->g_ncyl = 0;
5076 	} else {
5077 		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
5078 	}
5079 	lgeom_p->g_acyl = 0;
5080 
5081 	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
5082 	SD_INFO(SD_LOG_COMMON, un,
5083 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5084 	    un->un_lgeom.g_ncyl,  un->un_lgeom.g_acyl,
5085 	    un->un_lgeom.g_nhead, un->un_lgeom.g_nsect);
5086 	SD_INFO(SD_LOG_COMMON, un, "   lbasize: %d; capacity: %ld; "
5087 	    "intrlv: %d; rpm: %d\n", un->un_lgeom.g_secsize,
5088 	    un->un_lgeom.g_capacity, un->un_lgeom.g_intrlv, un->un_lgeom.g_rpm);
5089 }
5090 
5091 
5092 /*
5093  *    Function: sd_update_block_info
5094  *
5095  * Description: Calculate a byte count to sector count bitshift value
5096  *		from sector size.
5097  *
5098  *   Arguments: un: unit struct.
5099  *		lbasize: new target sector size
5100  *		capacity: new target capacity, ie. block count
5101  *
5102  *     Context: Kernel thread context
5103  */
5104 
5105 static void
5106 sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
5107 {
5108 	if (lbasize != 0) {
5109 		un->un_tgt_blocksize = lbasize;
5110 		un->un_f_tgt_blocksize_is_valid	= TRUE;
5111 	}
5112 
5113 	if (capacity != 0) {
5114 		un->un_blockcount		= capacity;
5115 		un->un_f_blockcount_is_valid	= TRUE;
5116 	}
5117 }
5118 
5119 
5120 static void
5121 sd_swap_efi_gpt(efi_gpt_t *e)
5122 {
5123 	_NOTE(ASSUMING_PROTECTED(*e))
5124 	e->efi_gpt_Signature = LE_64(e->efi_gpt_Signature);
5125 	e->efi_gpt_Revision = LE_32(e->efi_gpt_Revision);
5126 	e->efi_gpt_HeaderSize = LE_32(e->efi_gpt_HeaderSize);
5127 	e->efi_gpt_HeaderCRC32 = LE_32(e->efi_gpt_HeaderCRC32);
5128 	e->efi_gpt_MyLBA = LE_64(e->efi_gpt_MyLBA);
5129 	e->efi_gpt_AlternateLBA = LE_64(e->efi_gpt_AlternateLBA);
5130 	e->efi_gpt_FirstUsableLBA = LE_64(e->efi_gpt_FirstUsableLBA);
5131 	e->efi_gpt_LastUsableLBA = LE_64(e->efi_gpt_LastUsableLBA);
5132 	UUID_LE_CONVERT(e->efi_gpt_DiskGUID, e->efi_gpt_DiskGUID);
5133 	e->efi_gpt_PartitionEntryLBA = LE_64(e->efi_gpt_PartitionEntryLBA);
5134 	e->efi_gpt_NumberOfPartitionEntries =
5135 	    LE_32(e->efi_gpt_NumberOfPartitionEntries);
5136 	e->efi_gpt_SizeOfPartitionEntry =
5137 	    LE_32(e->efi_gpt_SizeOfPartitionEntry);
5138 	e->efi_gpt_PartitionEntryArrayCRC32 =
5139 	    LE_32(e->efi_gpt_PartitionEntryArrayCRC32);
5140 }
5141 
5142 static void
5143 sd_swap_efi_gpe(int nparts, efi_gpe_t *p)
5144 {
5145 	int i;
5146 
5147 	_NOTE(ASSUMING_PROTECTED(*p))
5148 	for (i = 0; i < nparts; i++) {
5149 		UUID_LE_CONVERT(p[i].efi_gpe_PartitionTypeGUID,
5150 		    p[i].efi_gpe_PartitionTypeGUID);
5151 		p[i].efi_gpe_StartingLBA = LE_64(p[i].efi_gpe_StartingLBA);
5152 		p[i].efi_gpe_EndingLBA = LE_64(p[i].efi_gpe_EndingLBA);
5153 		/* PartitionAttrs */
5154 	}
5155 }
5156 
5157 static int
5158 sd_validate_efi(efi_gpt_t *labp)
5159 {
5160 	if (labp->efi_gpt_Signature != EFI_SIGNATURE)
5161 		return (EINVAL);
5162 	/* at least 96 bytes in this version of the spec. */
5163 	if (sizeof (efi_gpt_t) - sizeof (labp->efi_gpt_Reserved2) >
5164 	    labp->efi_gpt_HeaderSize)
5165 		return (EINVAL);
5166 	/* this should be 128 bytes */
5167 	if (labp->efi_gpt_SizeOfPartitionEntry != sizeof (efi_gpe_t))
5168 		return (EINVAL);
5169 	return (0);
5170 }
5171 
5172 static int
5173 sd_use_efi(struct sd_lun *un, int path_flag)
5174 {
5175 	int		i;
5176 	int		rval = 0;
5177 	efi_gpe_t	*partitions;
5178 	uchar_t		*buf;
5179 	uint_t		lbasize;
5180 	uint64_t	cap;
5181 	uint_t		nparts;
5182 	diskaddr_t	gpe_lba;
5183 
5184 	ASSERT(mutex_owned(SD_MUTEX(un)));
5185 	lbasize = un->un_tgt_blocksize;
5186 
5187 	mutex_exit(SD_MUTEX(un));
5188 
5189 	buf = kmem_zalloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP);
5190 
5191 	if (un->un_tgt_blocksize != un->un_sys_blocksize) {
5192 		rval = EINVAL;
5193 		goto done_err;
5194 	}
5195 
5196 	rval = sd_send_scsi_READ(un, buf, lbasize, 0, path_flag);
5197 	if (rval) {
5198 		goto done_err;
5199 	}
5200 	if (((struct dk_label *)buf)->dkl_magic == DKL_MAGIC) {
5201 		/* not ours */
5202 		rval = ESRCH;
5203 		goto done_err;
5204 	}
5205 
5206 	rval = sd_send_scsi_READ(un, buf, lbasize, 1, path_flag);
5207 	if (rval) {
5208 		goto done_err;
5209 	}
5210 	sd_swap_efi_gpt((efi_gpt_t *)buf);
5211 
5212 	if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0) {
5213 		/*
5214 		 * Couldn't read the primary, try the backup.  Our
5215 		 * capacity at this point could be based on CHS, so
5216 		 * check what the device reports.
5217 		 */
5218 		rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
5219 		    path_flag);
5220 		if (rval) {
5221 			goto done_err;
5222 		}
5223 		if ((rval = sd_send_scsi_READ(un, buf, lbasize,
5224 		    cap - 1, path_flag)) != 0) {
5225 			goto done_err;
5226 		}
5227 		sd_swap_efi_gpt((efi_gpt_t *)buf);
5228 		if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0)
5229 			goto done_err;
5230 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5231 		    "primary label corrupt; using backup\n");
5232 	}
5233 
5234 	nparts = ((efi_gpt_t *)buf)->efi_gpt_NumberOfPartitionEntries;
5235 	gpe_lba = ((efi_gpt_t *)buf)->efi_gpt_PartitionEntryLBA;
5236 
5237 	rval = sd_send_scsi_READ(un, buf, EFI_MIN_ARRAY_SIZE, gpe_lba,
5238 	    path_flag);
5239 	if (rval) {
5240 		goto done_err;
5241 	}
5242 	partitions = (efi_gpe_t *)buf;
5243 
5244 	if (nparts > MAXPART) {
5245 		nparts = MAXPART;
5246 	}
5247 	sd_swap_efi_gpe(nparts, partitions);
5248 
5249 	mutex_enter(SD_MUTEX(un));
5250 
5251 	/* Fill in partition table. */
5252 	for (i = 0; i < nparts; i++) {
5253 		if (partitions->efi_gpe_StartingLBA != 0 ||
5254 		    partitions->efi_gpe_EndingLBA != 0) {
5255 			un->un_map[i].dkl_cylno =
5256 			    partitions->efi_gpe_StartingLBA;
5257 			un->un_map[i].dkl_nblk =
5258 			    partitions->efi_gpe_EndingLBA -
5259 			    partitions->efi_gpe_StartingLBA + 1;
5260 			un->un_offset[i] =
5261 			    partitions->efi_gpe_StartingLBA;
5262 		}
5263 		if (i == WD_NODE) {
5264 			/*
5265 			 * minor number 7 corresponds to the whole disk
5266 			 */
5267 			un->un_map[i].dkl_cylno = 0;
5268 			un->un_map[i].dkl_nblk = un->un_blockcount;
5269 			un->un_offset[i] = 0;
5270 		}
5271 		partitions++;
5272 	}
5273 	un->un_solaris_offset = 0;
5274 	un->un_solaris_size = cap;
5275 	un->un_f_geometry_is_valid = TRUE;
5276 	kmem_free(buf, EFI_MIN_ARRAY_SIZE);
5277 	return (0);
5278 
5279 done_err:
5280 	kmem_free(buf, EFI_MIN_ARRAY_SIZE);
5281 	mutex_enter(SD_MUTEX(un));
5282 	/*
5283 	 * if we didn't find something that could look like a VTOC
5284 	 * and the disk is over 1TB, we know there isn't a valid label.
5285 	 * Otherwise let sd_uselabel decide what to do.  We only
5286 	 * want to invalidate this if we're certain the label isn't
5287 	 * valid because sd_prop_op will now fail, which in turn
5288 	 * causes things like opens and stats on the partition to fail.
5289 	 */
5290 	if ((un->un_blockcount > DK_MAX_BLOCKS) && (rval != ESRCH)) {
5291 		un->un_f_geometry_is_valid = FALSE;
5292 	}
5293 	return (rval);
5294 }
5295 
5296 
5297 /*
5298  *    Function: sd_uselabel
5299  *
5300  * Description: Validate the disk label and update the relevant data (geometry,
5301  *		partition, vtoc, and capacity data) in the sd_lun struct.
5302  *		Marks the geometry of the unit as being valid.
5303  *
5304  *   Arguments: un: unit struct.
5305  *		dk_label: disk label
5306  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
5307  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
5308  *			to use the USCSI "direct" chain and bypass the normal
5309  *			command waitq.
5310  *
5311  * Return Code: SD_LABEL_IS_VALID: Label read from disk is OK; geometry,
5312  *		partition, vtoc, and capacity data are good.
5313  *
5314  *		SD_LABEL_IS_INVALID: Magic number or checksum error in the
5315  *		label; or computed capacity does not jibe with capacity
5316  *		reported from the READ CAPACITY command.
5317  *
5318  *     Context: Kernel thread only (can sleep).
5319  */
5320 
5321 static int
5322 sd_uselabel(struct sd_lun *un, struct dk_label *labp, int path_flag)
5323 {
5324 	short	*sp;
5325 	short	sum;
5326 	short	count;
5327 	int	label_error = SD_LABEL_IS_VALID;
5328 	int	i;
5329 	int	capacity;
5330 	int	part_end;
5331 	int	track_capacity;
5332 	int	err;
5333 #if defined(_SUNOS_VTOC_16)
5334 	struct	dkl_partition	*vpartp;
5335 #endif
5336 	ASSERT(un != NULL);
5337 	ASSERT(mutex_owned(SD_MUTEX(un)));
5338 
5339 	/* Validate the magic number of the label. */
5340 	if (labp->dkl_magic != DKL_MAGIC) {
5341 #if defined(__sparc)
5342 		if ((un->un_state == SD_STATE_NORMAL) &&
5343 		    !ISREMOVABLE(un)) {
5344 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5345 			    "Corrupt label; wrong magic number\n");
5346 		}
5347 #endif
5348 		return (SD_LABEL_IS_INVALID);
5349 	}
5350 
5351 	/* Validate the checksum of the label. */
5352 	sp  = (short *)labp;
5353 	sum = 0;
5354 	count = sizeof (struct dk_label) / sizeof (short);
5355 	while (count--)	 {
5356 		sum ^= *sp++;
5357 	}
5358 
5359 	if (sum != 0) {
5360 #if defined(_SUNOS_VTOC_16)
5361 		if (un->un_state == SD_STATE_NORMAL && !ISCD(un)) {
5362 #elif defined(_SUNOS_VTOC_8)
5363 		if (un->un_state == SD_STATE_NORMAL && !ISREMOVABLE(un)) {
5364 #endif
5365 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5366 			    "Corrupt label - label checksum failed\n");
5367 		}
5368 		return (SD_LABEL_IS_INVALID);
5369 	}
5370 
5371 
5372 	/*
5373 	 * Fill in geometry structure with data from label.
5374 	 */
5375 	bzero(&un->un_g, sizeof (struct dk_geom));
5376 	un->un_g.dkg_ncyl   = labp->dkl_ncyl;
5377 	un->un_g.dkg_acyl   = labp->dkl_acyl;
5378 	un->un_g.dkg_bcyl   = 0;
5379 	un->un_g.dkg_nhead  = labp->dkl_nhead;
5380 	un->un_g.dkg_nsect  = labp->dkl_nsect;
5381 	un->un_g.dkg_intrlv = labp->dkl_intrlv;
5382 
5383 #if defined(_SUNOS_VTOC_8)
5384 	un->un_g.dkg_gap1   = labp->dkl_gap1;
5385 	un->un_g.dkg_gap2   = labp->dkl_gap2;
5386 	un->un_g.dkg_bhead  = labp->dkl_bhead;
5387 #endif
5388 #if defined(_SUNOS_VTOC_16)
5389 	un->un_dkg_skew = labp->dkl_skew;
5390 #endif
5391 
5392 #if defined(__i386) || defined(__amd64)
5393 	un->un_g.dkg_apc = labp->dkl_apc;
5394 #endif
5395 
5396 	/*
5397 	 * Currently we rely on the values in the label being accurate. If
5398 	 * dlk_rpm or dlk_pcly are zero in the label, use a default value.
5399 	 *
5400 	 * Note: In the future a MODE SENSE may be used to retrieve this data,
5401 	 * although this command is optional in SCSI-2.
5402 	 */
5403 	un->un_g.dkg_rpm  = (labp->dkl_rpm  != 0) ? labp->dkl_rpm  : 3600;
5404 	un->un_g.dkg_pcyl = (labp->dkl_pcyl != 0) ? labp->dkl_pcyl :
5405 	    (un->un_g.dkg_ncyl + un->un_g.dkg_acyl);
5406 
5407 	/*
5408 	 * The Read and Write reinstruct values may not be valid
5409 	 * for older disks.
5410 	 */
5411 	un->un_g.dkg_read_reinstruct  = labp->dkl_read_reinstruct;
5412 	un->un_g.dkg_write_reinstruct = labp->dkl_write_reinstruct;
5413 
5414 	/* Fill in partition table. */
5415 #if defined(_SUNOS_VTOC_8)
5416 	for (i = 0; i < NDKMAP; i++) {
5417 		un->un_map[i].dkl_cylno = labp->dkl_map[i].dkl_cylno;
5418 		un->un_map[i].dkl_nblk  = labp->dkl_map[i].dkl_nblk;
5419 	}
5420 #endif
5421 #if  defined(_SUNOS_VTOC_16)
5422 	vpartp		= labp->dkl_vtoc.v_part;
5423 	track_capacity	= labp->dkl_nhead * labp->dkl_nsect;
5424 
5425 	for (i = 0; i < NDKMAP; i++, vpartp++) {
5426 		un->un_map[i].dkl_cylno = vpartp->p_start / track_capacity;
5427 		un->un_map[i].dkl_nblk  = vpartp->p_size;
5428 	}
5429 #endif
5430 
5431 	/* Fill in VTOC Structure. */
5432 	bcopy(&labp->dkl_vtoc, &un->un_vtoc, sizeof (struct dk_vtoc));
5433 #if defined(_SUNOS_VTOC_8)
5434 	/*
5435 	 * The 8-slice vtoc does not include the ascii label; save it into
5436 	 * the device's soft state structure here.
5437 	 */
5438 	bcopy(labp->dkl_asciilabel, un->un_asciilabel, LEN_DKL_ASCII);
5439 #endif
5440 
5441 	/* Mark the geometry as valid. */
5442 	un->un_f_geometry_is_valid = TRUE;
5443 
5444 	/* Now look for a valid capacity. */
5445 	track_capacity	= (un->un_g.dkg_nhead * un->un_g.dkg_nsect);
5446 	capacity	= (un->un_g.dkg_ncyl  * track_capacity);
5447 
5448 	if (un->un_g.dkg_acyl) {
5449 #if defined(__i386) || defined(__amd64)
5450 		/* we may have > 1 alts cylinder */
5451 		capacity += (track_capacity * un->un_g.dkg_acyl);
5452 #else
5453 		capacity += track_capacity;
5454 #endif
5455 	}
5456 
5457 	/*
5458 	 * At this point, un->un_blockcount should contain valid data from
5459 	 * the READ CAPACITY command.
5460 	 */
5461 	if (un->un_f_blockcount_is_valid != TRUE) {
5462 		/*
5463 		 * We have a situation where the target didn't give us a good
5464 		 * READ CAPACITY value, yet there appears to be a valid label.
5465 		 * In this case, we'll fake the capacity.
5466 		 */
5467 		un->un_blockcount = capacity;
5468 		un->un_f_blockcount_is_valid = TRUE;
5469 		goto done;
5470 	}
5471 
5472 
5473 	if ((capacity <= un->un_blockcount) ||
5474 	    (un->un_state != SD_STATE_NORMAL)) {
5475 #if defined(_SUNOS_VTOC_8)
5476 		/*
5477 		 * We can't let this happen on drives that are subdivided
5478 		 * into logical disks (i.e., that have an fdisk table).
5479 		 * The un_blockcount field should always hold the full media
5480 		 * size in sectors, period.  This code would overwrite
5481 		 * un_blockcount with the size of the Solaris fdisk partition.
5482 		 */
5483 		SD_ERROR(SD_LOG_COMMON, un,
5484 		    "sd_uselabel: Label %d blocks; Drive %d blocks\n",
5485 		    capacity, un->un_blockcount);
5486 		un->un_blockcount = capacity;
5487 		un->un_f_blockcount_is_valid = TRUE;
5488 #endif	/* defined(_SUNOS_VTOC_8) */
5489 		goto done;
5490 	}
5491 
5492 	if (ISCD(un)) {
5493 		/* For CDROMs, we trust that the data in the label is OK. */
5494 #if defined(_SUNOS_VTOC_8)
5495 		for (i = 0; i < NDKMAP; i++) {
5496 			part_end = labp->dkl_nhead * labp->dkl_nsect *
5497 			    labp->dkl_map[i].dkl_cylno +
5498 			    labp->dkl_map[i].dkl_nblk  - 1;
5499 
5500 			if ((labp->dkl_map[i].dkl_nblk) &&
5501 			    (part_end > un->un_blockcount)) {
5502 				un->un_f_geometry_is_valid = FALSE;
5503 				break;
5504 			}
5505 		}
5506 #endif
5507 #if defined(_SUNOS_VTOC_16)
5508 		vpartp = &(labp->dkl_vtoc.v_part[0]);
5509 		for (i = 0; i < NDKMAP; i++, vpartp++) {
5510 			part_end = vpartp->p_start + vpartp->p_size;
5511 			if ((vpartp->p_size > 0) &&
5512 			    (part_end > un->un_blockcount)) {
5513 				un->un_f_geometry_is_valid = FALSE;
5514 				break;
5515 			}
5516 		}
5517 #endif
5518 	} else {
5519 		uint64_t t_capacity;
5520 		uint32_t t_lbasize;
5521 
5522 		mutex_exit(SD_MUTEX(un));
5523 		err = sd_send_scsi_READ_CAPACITY(un, &t_capacity, &t_lbasize,
5524 		    path_flag);
5525 		ASSERT(t_capacity <= DK_MAX_BLOCKS);
5526 		mutex_enter(SD_MUTEX(un));
5527 
5528 		if (err == 0) {
5529 			sd_update_block_info(un, t_lbasize, t_capacity);
5530 		}
5531 
5532 		if (capacity > un->un_blockcount) {
5533 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5534 			    "Corrupt label - bad geometry\n");
5535 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
5536 			    "Label says %u blocks; Drive says %llu blocks\n",
5537 			    capacity, (unsigned long long)un->un_blockcount);
5538 			un->un_f_geometry_is_valid = FALSE;
5539 			label_error = SD_LABEL_IS_INVALID;
5540 		}
5541 	}
5542 
5543 done:
5544 
5545 	SD_INFO(SD_LOG_COMMON, un, "sd_uselabel: (label geometry)\n");
5546 	SD_INFO(SD_LOG_COMMON, un,
5547 	    "   ncyl: %d; acyl: %d; nhead: %d; nsect: %d\n",
5548 	    un->un_g.dkg_ncyl,  un->un_g.dkg_acyl,
5549 	    un->un_g.dkg_nhead, un->un_g.dkg_nsect);
5550 	SD_INFO(SD_LOG_COMMON, un,
5551 	    "   lbasize: %d; capacity: %d; intrlv: %d; rpm: %d\n",
5552 	    un->un_tgt_blocksize, un->un_blockcount,
5553 	    un->un_g.dkg_intrlv, un->un_g.dkg_rpm);
5554 	SD_INFO(SD_LOG_COMMON, un, "   wrt_reinstr: %d; rd_reinstr: %d\n",
5555 	    un->un_g.dkg_write_reinstruct, un->un_g.dkg_read_reinstruct);
5556 
5557 	ASSERT(mutex_owned(SD_MUTEX(un)));
5558 
5559 	return (label_error);
5560 }
5561 
5562 
5563 /*
5564  *    Function: sd_build_default_label
5565  *
5566  * Description: Generate a default label for those devices that do not have
5567  *		one, e.g., new media, removable cartridges, etc..
5568  *
5569  *     Context: Kernel thread only
5570  */
5571 
5572 static void
5573 sd_build_default_label(struct sd_lun *un)
5574 {
5575 #if defined(_SUNOS_VTOC_16)
5576 	uint_t	phys_spc;
5577 	uint_t	disksize;
5578 	struct	dk_geom un_g;
5579 #endif
5580 
5581 	ASSERT(un != NULL);
5582 	ASSERT(mutex_owned(SD_MUTEX(un)));
5583 
5584 #if defined(_SUNOS_VTOC_8)
5585 	/*
5586 	 * Note: This is a legacy check for non-removable devices on VTOC_8
5587 	 * only. This may be a valid check for VTOC_16 as well.
5588 	 */
5589 	if (!ISREMOVABLE(un)) {
5590 		return;
5591 	}
5592 #endif
5593 
5594 	bzero(&un->un_g, sizeof (struct dk_geom));
5595 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
5596 	bzero(&un->un_map, NDKMAP * (sizeof (struct dk_map)));
5597 
5598 #if defined(_SUNOS_VTOC_8)
5599 
5600 	/*
5601 	 * It's a REMOVABLE media, therefore no label (on sparc, anyway).
5602 	 * But it is still necessary to set up various geometry information,
5603 	 * and we are doing this here.
5604 	 */
5605 
5606 	/*
5607 	 * For the rpm, we use the minimum for the disk.  For the head, cyl,
5608 	 * and number of sector per track, if the capacity <= 1GB, head = 64,
5609 	 * sect = 32.  else head = 255, sect 63 Note: the capacity should be
5610 	 * equal to C*H*S values.  This will cause some truncation of size due
5611 	 * to round off errors. For CD-ROMs, this truncation can have adverse
5612 	 * side effects, so returning ncyl and nhead as 1. The nsect will
5613 	 * overflow for most of CD-ROMs as nsect is of type ushort. (4190569)
5614 	 */
5615 	if (ISCD(un)) {
5616 		/*
5617 		 * Preserve the old behavior for non-writable
5618 		 * medias. Since dkg_nsect is a ushort, it
5619 		 * will lose bits as cdroms have more than
5620 		 * 65536 sectors. So if we recalculate
5621 		 * capacity, it will become much shorter.
5622 		 * But the dkg_* information is not
5623 		 * used for CDROMs so it is OK. But for
5624 		 * Writable CDs we need this information
5625 		 * to be valid (for newfs say). So we
5626 		 * make nsect and nhead > 1 that way
5627 		 * nsect can still stay within ushort limit
5628 		 * without losing any bits.
5629 		 */
5630 		if (un->un_f_mmc_writable_media == TRUE) {
5631 			un->un_g.dkg_nhead = 64;
5632 			un->un_g.dkg_nsect = 32;
5633 			un->un_g.dkg_ncyl = un->un_blockcount / (64 * 32);
5634 			un->un_blockcount = un->un_g.dkg_ncyl *
5635 			    un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5636 		} else {
5637 			un->un_g.dkg_ncyl  = 1;
5638 			un->un_g.dkg_nhead = 1;
5639 			un->un_g.dkg_nsect = un->un_blockcount;
5640 		}
5641 	} else {
5642 		if (un->un_blockcount <= 0x1000) {
5643 			/* unlabeled SCSI floppy device */
5644 			un->un_g.dkg_nhead = 2;
5645 			un->un_g.dkg_ncyl = 80;
5646 			un->un_g.dkg_nsect = un->un_blockcount / (2 * 80);
5647 		} else if (un->un_blockcount <= 0x200000) {
5648 			un->un_g.dkg_nhead = 64;
5649 			un->un_g.dkg_nsect = 32;
5650 			un->un_g.dkg_ncyl  = un->un_blockcount / (64 * 32);
5651 		} else {
5652 			un->un_g.dkg_nhead = 255;
5653 			un->un_g.dkg_nsect = 63;
5654 			un->un_g.dkg_ncyl  = un->un_blockcount / (255 * 63);
5655 		}
5656 		un->un_blockcount =
5657 		    un->un_g.dkg_ncyl * un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5658 	}
5659 
5660 	un->un_g.dkg_acyl	= 0;
5661 	un->un_g.dkg_bcyl	= 0;
5662 	un->un_g.dkg_rpm	= 200;
5663 	un->un_asciilabel[0]	= '\0';
5664 	un->un_g.dkg_pcyl	= un->un_g.dkg_ncyl;
5665 
5666 	un->un_map[0].dkl_cylno = 0;
5667 	un->un_map[0].dkl_nblk  = un->un_blockcount;
5668 	un->un_map[2].dkl_cylno = 0;
5669 	un->un_map[2].dkl_nblk  = un->un_blockcount;
5670 
5671 #elif defined(_SUNOS_VTOC_16)
5672 
5673 	if (un->un_solaris_size == 0) {
5674 		/*
5675 		 * Got fdisk table but no solaris entry therefore
5676 		 * don't create a default label
5677 		 */
5678 		un->un_f_geometry_is_valid = TRUE;
5679 		return;
5680 	}
5681 
5682 	/*
5683 	 * For CDs we continue to use the physical geometry to calculate
5684 	 * number of cylinders. All other devices must convert the
5685 	 * physical geometry (geom_cache) to values that will fit
5686 	 * in a dk_geom structure.
5687 	 */
5688 	if (ISCD(un)) {
5689 		phys_spc = un->un_pgeom.g_nhead * un->un_pgeom.g_nsect;
5690 	} else {
5691 		/* Convert physical geometry to disk geometry */
5692 		bzero(&un_g, sizeof (struct dk_geom));
5693 		sd_convert_geometry(un->un_blockcount, &un_g);
5694 		bcopy(&un_g, &un->un_g, sizeof (un->un_g));
5695 		phys_spc = un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5696 	}
5697 
5698 	un->un_g.dkg_pcyl = un->un_solaris_size / phys_spc;
5699 	un->un_g.dkg_acyl = DK_ACYL;
5700 	un->un_g.dkg_ncyl = un->un_g.dkg_pcyl - DK_ACYL;
5701 	disksize = un->un_g.dkg_ncyl * phys_spc;
5702 
5703 	if (ISCD(un)) {
5704 		/*
5705 		 * CD's don't use the "heads * sectors * cyls"-type of
5706 		 * geometry, but instead use the entire capacity of the media.
5707 		 */
5708 		disksize = un->un_solaris_size;
5709 		un->un_g.dkg_nhead = 1;
5710 		un->un_g.dkg_nsect = 1;
5711 		un->un_g.dkg_rpm =
5712 		    (un->un_pgeom.g_rpm == 0) ? 200 : un->un_pgeom.g_rpm;
5713 
5714 		un->un_vtoc.v_part[0].p_start = 0;
5715 		un->un_vtoc.v_part[0].p_size  = disksize;
5716 		un->un_vtoc.v_part[0].p_tag   = V_BACKUP;
5717 		un->un_vtoc.v_part[0].p_flag  = V_UNMNT;
5718 
5719 		un->un_map[0].dkl_cylno = 0;
5720 		un->un_map[0].dkl_nblk  = disksize;
5721 		un->un_offset[0] = 0;
5722 
5723 	} else {
5724 		/*
5725 		 * Hard disks and removable media cartridges
5726 		 */
5727 		un->un_g.dkg_rpm =
5728 		    (un->un_pgeom.g_rpm == 0) ? 3600: un->un_pgeom.g_rpm;
5729 		un->un_vtoc.v_sectorsz = un->un_sys_blocksize;
5730 
5731 		/* Add boot slice */
5732 		un->un_vtoc.v_part[8].p_start = 0;
5733 		un->un_vtoc.v_part[8].p_size  = phys_spc;
5734 		un->un_vtoc.v_part[8].p_tag   = V_BOOT;
5735 		un->un_vtoc.v_part[8].p_flag  = V_UNMNT;
5736 
5737 		un->un_map[8].dkl_cylno = 0;
5738 		un->un_map[8].dkl_nblk  = phys_spc;
5739 		un->un_offset[8] = 0;
5740 	}
5741 
5742 	un->un_g.dkg_apc = 0;
5743 	un->un_vtoc.v_nparts = V_NUMPAR;
5744 	un->un_vtoc.v_version = V_VERSION;
5745 
5746 	/* Add backup slice */
5747 	un->un_vtoc.v_part[2].p_start = 0;
5748 	un->un_vtoc.v_part[2].p_size  = disksize;
5749 	un->un_vtoc.v_part[2].p_tag   = V_BACKUP;
5750 	un->un_vtoc.v_part[2].p_flag  = V_UNMNT;
5751 
5752 	un->un_map[2].dkl_cylno = 0;
5753 	un->un_map[2].dkl_nblk  = disksize;
5754 	un->un_offset[2] = 0;
5755 
5756 	(void) sprintf(un->un_vtoc.v_asciilabel, "DEFAULT cyl %d alt %d"
5757 	    " hd %d sec %d", un->un_g.dkg_ncyl, un->un_g.dkg_acyl,
5758 	    un->un_g.dkg_nhead, un->un_g.dkg_nsect);
5759 
5760 #else
5761 #error "No VTOC format defined."
5762 #endif
5763 
5764 	un->un_g.dkg_read_reinstruct  = 0;
5765 	un->un_g.dkg_write_reinstruct = 0;
5766 
5767 	un->un_g.dkg_intrlv = 1;
5768 
5769 	un->un_vtoc.v_sanity  = VTOC_SANE;
5770 
5771 	un->un_f_geometry_is_valid = TRUE;
5772 
5773 	SD_INFO(SD_LOG_COMMON, un,
5774 	    "sd_build_default_label: Default label created: "
5775 	    "cyl: %d\tacyl: %d\tnhead: %d\tnsect: %d\tcap: %d\n",
5776 	    un->un_g.dkg_ncyl, un->un_g.dkg_acyl, un->un_g.dkg_nhead,
5777 	    un->un_g.dkg_nsect, un->un_blockcount);
5778 }
5779 
5780 
5781 #if defined(_FIRMWARE_NEEDS_FDISK)
5782 /*
5783  * Max CHS values, as they are encoded into bytes, for 1022/254/63
5784  */
5785 #define	LBA_MAX_SECT	(63 | ((1022 & 0x300) >> 2))
5786 #define	LBA_MAX_CYL	(1022 & 0xFF)
5787 #define	LBA_MAX_HEAD	(254)
5788 
5789 
5790 /*
5791  *    Function: sd_has_max_chs_vals
5792  *
5793  * Description: Return TRUE if Cylinder-Head-Sector values are all at maximum.
5794  *
5795  *   Arguments: fdp - ptr to CHS info
5796  *
5797  * Return Code: True or false
5798  *
5799  *     Context: Any.
5800  */
5801 
5802 static int
5803 sd_has_max_chs_vals(struct ipart *fdp)
5804 {
5805 	return ((fdp->begcyl  == LBA_MAX_CYL)	&&
5806 	    (fdp->beghead == LBA_MAX_HEAD)	&&
5807 	    (fdp->begsect == LBA_MAX_SECT)	&&
5808 	    (fdp->endcyl  == LBA_MAX_CYL)	&&
5809 	    (fdp->endhead == LBA_MAX_HEAD)	&&
5810 	    (fdp->endsect == LBA_MAX_SECT));
5811 }
5812 #endif
5813 
5814 
5815 /*
5816  *    Function: sd_inq_fill
5817  *
5818  * Description: Print a piece of inquiry data, cleaned up for non-printable
5819  *		characters and stopping at the first space character after
5820  *		the beginning of the passed string;
5821  *
5822  *   Arguments: p - source string
5823  *		l - maximum length to copy
5824  *		s - destination string
5825  *
5826  *     Context: Any.
5827  */
5828 
5829 static void
5830 sd_inq_fill(char *p, int l, char *s)
5831 {
5832 	unsigned i = 0;
5833 	char c;
5834 
5835 	while (i++ < l) {
5836 		if ((c = *p++) < ' ' || c >= 0x7F) {
5837 			c = '*';
5838 		} else if (i != 1 && c == ' ') {
5839 			break;
5840 		}
5841 		*s++ = c;
5842 	}
5843 	*s++ = 0;
5844 }
5845 
5846 
5847 /*
5848  *    Function: sd_register_devid
5849  *
5850  * Description: This routine will obtain the device id information from the
5851  *		target, obtain the serial number, and register the device
5852  *		id with the ddi framework.
5853  *
5854  *   Arguments: devi - the system's dev_info_t for the device.
5855  *		un - driver soft state (unit) structure
5856  *		reservation_flag - indicates if a reservation conflict
5857  *		occurred during attach
5858  *
5859  *     Context: Kernel Thread
5860  */
5861 static void
5862 sd_register_devid(struct sd_lun *un, dev_info_t *devi, int reservation_flag)
5863 {
5864 	int		rval		= 0;
5865 	uchar_t		*inq80		= NULL;
5866 	size_t		inq80_len	= MAX_INQUIRY_SIZE;
5867 	size_t		inq80_resid	= 0;
5868 	uchar_t		*inq83		= NULL;
5869 	size_t		inq83_len	= MAX_INQUIRY_SIZE;
5870 	size_t		inq83_resid	= 0;
5871 
5872 	ASSERT(un != NULL);
5873 	ASSERT(mutex_owned(SD_MUTEX(un)));
5874 	ASSERT((SD_DEVINFO(un)) == devi);
5875 
5876 	/*
5877 	 * This is the case of antiquated Sun disk drives that have the
5878 	 * FAB_DEVID property set in the disk_table.  These drives
5879 	 * manage the devid's by storing them in last 2 available sectors
5880 	 * on the drive and have them fabricated by the ddi layer by calling
5881 	 * ddi_devid_init and passing the DEVID_FAB flag.
5882 	 */
5883 	if (un->un_f_opt_fab_devid == TRUE) {
5884 		/*
5885 		 * Depending on EINVAL isn't reliable, since a reserved disk
5886 		 * may result in invalid geometry, so check to make sure a
5887 		 * reservation conflict did not occur during attach.
5888 		 */
5889 		if ((sd_get_devid(un) == EINVAL) &&
5890 		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
5891 			/*
5892 			 * The devid is invalid AND there is no reservation
5893 			 * conflict.  Fabricate a new devid.
5894 			 */
5895 			(void) sd_create_devid(un);
5896 		}
5897 
5898 		/* Register the devid if it exists */
5899 		if (un->un_devid != NULL) {
5900 			(void) ddi_devid_register(SD_DEVINFO(un),
5901 			    un->un_devid);
5902 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
5903 			    "sd_register_devid: Devid Fabricated\n");
5904 		}
5905 		return;
5906 	}
5907 
5908 	/*
5909 	 * We check the availibility of the World Wide Name (0x83) and Unit
5910 	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
5911 	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
5912 	 * 0x83 is availible, that is the best choice.  Our next choice is
5913 	 * 0x80.  If neither are availible, we munge the devid from the device
5914 	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
5915 	 * to fabricate a devid for non-Sun qualified disks.
5916 	 */
5917 	if (sd_check_vpd_page_support(un) == 0) {
5918 		/* collect page 80 data if available */
5919 		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
5920 
5921 			mutex_exit(SD_MUTEX(un));
5922 			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
5923 			rval = sd_send_scsi_INQUIRY(un, inq80, inq80_len,
5924 			    0x01, 0x80, &inq80_resid);
5925 
5926 			if (rval != 0) {
5927 				kmem_free(inq80, inq80_len);
5928 				inq80 = NULL;
5929 				inq80_len = 0;
5930 			}
5931 			mutex_enter(SD_MUTEX(un));
5932 		}
5933 
5934 		/* collect page 83 data if available */
5935 		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
5936 
5937 			mutex_exit(SD_MUTEX(un));
5938 			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
5939 			rval = sd_send_scsi_INQUIRY(un, inq83, inq83_len,
5940 			    0x01, 0x83, &inq83_resid);
5941 
5942 			if (rval != 0) {
5943 				kmem_free(inq83, inq83_len);
5944 				inq83 = NULL;
5945 				inq83_len = 0;
5946 			}
5947 			mutex_enter(SD_MUTEX(un));
5948 		}
5949 	}
5950 
5951 	/* encode best devid possible based on data available */
5952 	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
5953 	    (char *)ddi_driver_name(SD_DEVINFO(un)),
5954 	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
5955 	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
5956 	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
5957 
5958 		/* devid successfully encoded, register devid */
5959 		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
5960 
5961 	} else {
5962 		/*
5963 		 * Unable to encode a devid based on data available.
5964 		 * This is not a Sun qualified disk.  Older Sun disk
5965 		 * drives that have the SD_FAB_DEVID property
5966 		 * set in the disk_table and non Sun qualified
5967 		 * disks are treated in the same manner.  These
5968 		 * drives manage the devid's by storing them in
5969 		 * last 2 available sectors on the drive and
5970 		 * have them fabricated by the ddi layer by
5971 		 * calling ddi_devid_init and passing the
5972 		 * DEVID_FAB flag.
5973 		 * Create a fabricate devid only if there's no
5974 		 * fabricate devid existed.
5975 		 */
5976 		if (sd_get_devid(un) == EINVAL) {
5977 			(void) sd_create_devid(un);
5978 			un->un_f_opt_fab_devid = TRUE;
5979 		}
5980 
5981 		/* Register the devid if it exists */
5982 		if (un->un_devid != NULL) {
5983 			(void) ddi_devid_register(SD_DEVINFO(un),
5984 			    un->un_devid);
5985 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
5986 			    "sd_register_devid: devid fabricated using "
5987 			    "ddi framework\n");
5988 		}
5989 	}
5990 
5991 	/* clean up resources */
5992 	if (inq80 != NULL) {
5993 		kmem_free(inq80, inq80_len);
5994 	}
5995 	if (inq83 != NULL) {
5996 		kmem_free(inq83, inq83_len);
5997 	}
5998 }
5999 
6000 static daddr_t
6001 sd_get_devid_block(struct sd_lun *un)
6002 {
6003 	daddr_t			spc, blk, head, cyl;
6004 
6005 	if (un->un_blockcount <= DK_MAX_BLOCKS) {
6006 		/* this geometry doesn't allow us to write a devid */
6007 		if (un->un_g.dkg_acyl < 2) {
6008 			return (-1);
6009 		}
6010 
6011 		/*
6012 		 * Subtract 2 guarantees that the next to last cylinder
6013 		 * is used
6014 		 */
6015 		cyl  = un->un_g.dkg_ncyl  + un->un_g.dkg_acyl - 2;
6016 		spc  = un->un_g.dkg_nhead * un->un_g.dkg_nsect;
6017 		head = un->un_g.dkg_nhead - 1;
6018 		blk  = (cyl * (spc - un->un_g.dkg_apc)) +
6019 		    (head * un->un_g.dkg_nsect) + 1;
6020 	} else {
6021 		if (un->un_reserved != -1) {
6022 			blk = un->un_map[un->un_reserved].dkl_cylno + 1;
6023 		} else {
6024 			return (-1);
6025 		}
6026 	}
6027 	return (blk);
6028 }
6029 
6030 /*
6031  *    Function: sd_get_devid
6032  *
6033  * Description: This routine will return 0 if a valid device id has been
6034  *		obtained from the target and stored in the soft state. If a
6035  *		valid device id has not been previously read and stored, a
6036  *		read attempt will be made.
6037  *
6038  *   Arguments: un - driver soft state (unit) structure
6039  *
6040  * Return Code: 0 if we successfully get the device id
6041  *
6042  *     Context: Kernel Thread
6043  */
6044 
6045 static int
6046 sd_get_devid(struct sd_lun *un)
6047 {
6048 	struct dk_devid		*dkdevid;
6049 	ddi_devid_t		tmpid;
6050 	uint_t			*ip;
6051 	size_t			sz;
6052 	daddr_t			blk;
6053 	int			status;
6054 	int			chksum;
6055 	int			i;
6056 	size_t			buffer_size;
6057 
6058 	ASSERT(un != NULL);
6059 	ASSERT(mutex_owned(SD_MUTEX(un)));
6060 
6061 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
6062 	    un);
6063 
6064 	if (un->un_devid != NULL) {
6065 		return (0);
6066 	}
6067 
6068 	blk = sd_get_devid_block(un);
6069 	if (blk < 0)
6070 		return (EINVAL);
6071 
6072 	/*
6073 	 * Read and verify device id, stored in the reserved cylinders at the
6074 	 * end of the disk. Backup label is on the odd sectors of the last
6075 	 * track of the last cylinder. Device id will be on track of the next
6076 	 * to last cylinder.
6077 	 */
6078 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
6079 	mutex_exit(SD_MUTEX(un));
6080 	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
6081 	status = sd_send_scsi_READ(un, dkdevid, buffer_size, blk,
6082 	    SD_PATH_DIRECT);
6083 	if (status != 0) {
6084 		goto error;
6085 	}
6086 
6087 	/* Validate the revision */
6088 	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
6089 	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
6090 		status = EINVAL;
6091 		goto error;
6092 	}
6093 
6094 	/* Calculate the checksum */
6095 	chksum = 0;
6096 	ip = (uint_t *)dkdevid;
6097 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
6098 	    i++) {
6099 		chksum ^= ip[i];
6100 	}
6101 
6102 	/* Compare the checksums */
6103 	if (DKD_GETCHKSUM(dkdevid) != chksum) {
6104 		status = EINVAL;
6105 		goto error;
6106 	}
6107 
6108 	/* Validate the device id */
6109 	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
6110 		status = EINVAL;
6111 		goto error;
6112 	}
6113 
6114 	/*
6115 	 * Store the device id in the driver soft state
6116 	 */
6117 	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
6118 	tmpid = kmem_alloc(sz, KM_SLEEP);
6119 
6120 	mutex_enter(SD_MUTEX(un));
6121 
6122 	un->un_devid = tmpid;
6123 	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
6124 
6125 	kmem_free(dkdevid, buffer_size);
6126 
6127 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
6128 
6129 	return (status);
6130 error:
6131 	mutex_enter(SD_MUTEX(un));
6132 	kmem_free(dkdevid, buffer_size);
6133 	return (status);
6134 }
6135 
6136 
6137 /*
6138  *    Function: sd_create_devid
6139  *
6140  * Description: This routine will fabricate the device id and write it
6141  *		to the disk.
6142  *
6143  *   Arguments: un - driver soft state (unit) structure
6144  *
6145  * Return Code: value of the fabricated device id
6146  *
6147  *     Context: Kernel Thread
6148  */
6149 
6150 static ddi_devid_t
6151 sd_create_devid(struct sd_lun *un)
6152 {
6153 	ASSERT(un != NULL);
6154 
6155 	/* Fabricate the devid */
6156 	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
6157 	    == DDI_FAILURE) {
6158 		return (NULL);
6159 	}
6160 
6161 	/* Write the devid to disk */
6162 	if (sd_write_deviceid(un) != 0) {
6163 		ddi_devid_free(un->un_devid);
6164 		un->un_devid = NULL;
6165 	}
6166 
6167 	return (un->un_devid);
6168 }
6169 
6170 
6171 /*
6172  *    Function: sd_write_deviceid
6173  *
6174  * Description: This routine will write the device id to the disk
6175  *		reserved sector.
6176  *
6177  *   Arguments: un - driver soft state (unit) structure
6178  *
6179  * Return Code: EINVAL
6180  *		value returned by sd_send_scsi_cmd
6181  *
6182  *     Context: Kernel Thread
6183  */
6184 
6185 static int
6186 sd_write_deviceid(struct sd_lun *un)
6187 {
6188 	struct dk_devid		*dkdevid;
6189 	daddr_t			blk;
6190 	uint_t			*ip, chksum;
6191 	int			status;
6192 	int			i;
6193 
6194 	ASSERT(mutex_owned(SD_MUTEX(un)));
6195 
6196 	blk = sd_get_devid_block(un);
6197 	if (blk < 0)
6198 		return (-1);
6199 	mutex_exit(SD_MUTEX(un));
6200 
6201 	/* Allocate the buffer */
6202 	dkdevid = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
6203 
6204 	/* Fill in the revision */
6205 	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
6206 	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
6207 
6208 	/* Copy in the device id */
6209 	mutex_enter(SD_MUTEX(un));
6210 	bcopy(un->un_devid, &dkdevid->dkd_devid,
6211 	    ddi_devid_sizeof(un->un_devid));
6212 	mutex_exit(SD_MUTEX(un));
6213 
6214 	/* Calculate the checksum */
6215 	chksum = 0;
6216 	ip = (uint_t *)dkdevid;
6217 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
6218 	    i++) {
6219 		chksum ^= ip[i];
6220 	}
6221 
6222 	/* Fill-in checksum */
6223 	DKD_FORMCHKSUM(chksum, dkdevid);
6224 
6225 	/* Write the reserved sector */
6226 	status = sd_send_scsi_WRITE(un, dkdevid, un->un_sys_blocksize, blk,
6227 	    SD_PATH_DIRECT);
6228 
6229 	kmem_free(dkdevid, un->un_sys_blocksize);
6230 
6231 	mutex_enter(SD_MUTEX(un));
6232 	return (status);
6233 }
6234 
6235 
6236 /*
6237  *    Function: sd_check_vpd_page_support
6238  *
6239  * Description: This routine sends an inquiry command with the EVPD bit set and
6240  *		a page code of 0x00 to the device. It is used to determine which
6241  *		vital product pages are availible to find the devid. We are
6242  *		looking for pages 0x83 or 0x80.  If we return a negative 1, the
6243  *		device does not support that command.
6244  *
6245  *   Arguments: un  - driver soft state (unit) structure
6246  *
6247  * Return Code: 0 - success
6248  *		1 - check condition
6249  *
6250  *     Context: This routine can sleep.
6251  */
6252 
6253 static int
6254 sd_check_vpd_page_support(struct sd_lun *un)
6255 {
6256 	uchar_t	*page_list	= NULL;
6257 	uchar_t	page_length	= 0xff;	/* Use max possible length */
6258 	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
6259 	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
6260 	int    	rval		= 0;
6261 	int	counter;
6262 
6263 	ASSERT(un != NULL);
6264 	ASSERT(mutex_owned(SD_MUTEX(un)));
6265 
6266 	mutex_exit(SD_MUTEX(un));
6267 
6268 	/*
6269 	 * We'll set the page length to the maximum to save figuring it out
6270 	 * with an additional call.
6271 	 */
6272 	page_list =  kmem_zalloc(page_length, KM_SLEEP);
6273 
6274 	rval = sd_send_scsi_INQUIRY(un, page_list, page_length, evpd,
6275 	    page_code, NULL);
6276 
6277 	mutex_enter(SD_MUTEX(un));
6278 
6279 	/*
6280 	 * Now we must validate that the device accepted the command, as some
6281 	 * drives do not support it.  If the drive does support it, we will
6282 	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
6283 	 * not, we return -1.
6284 	 */
6285 	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
6286 		/* Loop to find one of the 2 pages we need */
6287 		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
6288 
6289 		/*
6290 		 * Pages are returned in ascending order, and 0x83 is what we
6291 		 * are hoping for.
6292 		 */
6293 		while ((page_list[counter] <= 0x83) &&
6294 		    (counter <= (page_list[VPD_PAGE_LENGTH] +
6295 		    VPD_HEAD_OFFSET))) {
6296 			/*
6297 			 * Add 3 because page_list[3] is the number of
6298 			 * pages minus 3
6299 			 */
6300 
6301 			switch (page_list[counter]) {
6302 			case 0x00:
6303 				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
6304 				break;
6305 			case 0x80:
6306 				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
6307 				break;
6308 			case 0x81:
6309 				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
6310 				break;
6311 			case 0x82:
6312 				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
6313 				break;
6314 			case 0x83:
6315 				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
6316 				break;
6317 			}
6318 			counter++;
6319 		}
6320 
6321 	} else {
6322 		rval = -1;
6323 
6324 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6325 		    "sd_check_vpd_page_support: This drive does not implement "
6326 		    "VPD pages.\n");
6327 	}
6328 
6329 	kmem_free(page_list, page_length);
6330 
6331 	return (rval);
6332 }
6333 
6334 
6335 /*
6336  *    Function: sd_setup_pm
6337  *
6338  * Description: Initialize Power Management on the device
6339  *
6340  *     Context: Kernel Thread
6341  */
6342 
6343 static void
6344 sd_setup_pm(struct sd_lun *un, dev_info_t *devi)
6345 {
6346 	uint_t	log_page_size;
6347 	uchar_t	*log_page_data;
6348 	int	rval;
6349 
6350 	/*
6351 	 * Since we are called from attach, holding a mutex for
6352 	 * un is unnecessary. Because some of the routines called
6353 	 * from here require SD_MUTEX to not be held, assert this
6354 	 * right up front.
6355 	 */
6356 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6357 	/*
6358 	 * Since the sd device does not have the 'reg' property,
6359 	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
6360 	 * The following code is to tell cpr that this device
6361 	 * DOES need to be suspended and resumed.
6362 	 */
6363 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
6364 	    "pm-hardware-state", "needs-suspend-resume");
6365 
6366 	/*
6367 	 * Check if HBA has set the "pm-capable" property.
6368 	 * If "pm-capable" exists and is non-zero then we can
6369 	 * power manage the device without checking the start/stop
6370 	 * cycle count log sense page.
6371 	 *
6372 	 * If "pm-capable" exists and is SD_PM_CAPABLE_FALSE (0)
6373 	 * then we should not power manage the device.
6374 	 *
6375 	 * If "pm-capable" doesn't exist then un->un_pm_capable_prop will
6376 	 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case, sd will
6377 	 * check the start/stop cycle count log sense page and power manage
6378 	 * the device if the cycle count limit has not been exceeded.
6379 	 */
6380 	un->un_pm_capable_prop =
6381 	    ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
6382 		"pm-capable", SD_PM_CAPABLE_UNDEFINED);
6383 	if (un->un_pm_capable_prop != SD_PM_CAPABLE_UNDEFINED) {
6384 		/*
6385 		 * pm-capable property exists.
6386 		 *
6387 		 * Convert "TRUE" values for un_pm_capable_prop to
6388 		 * SD_PM_CAPABLE_TRUE (1) to make it easier to check later.
6389 		 * "TRUE" values are any values except SD_PM_CAPABLE_FALSE (0)
6390 		 *  and SD_PM_CAPABLE_UNDEFINED (-1)
6391 		 */
6392 		if (un->un_pm_capable_prop != SD_PM_CAPABLE_FALSE) {
6393 			un->un_pm_capable_prop = SD_PM_CAPABLE_TRUE;
6394 		}
6395 
6396 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6397 		    "sd_unit_attach: un:0x%p pm-capable "
6398 		    "property set to %d.\n", un, un->un_pm_capable_prop);
6399 	}
6400 
6401 	/*
6402 	 * This complies with the new power management framework
6403 	 * for certain desktop machines. Create the pm_components
6404 	 * property as a string array property.
6405 	 *
6406 	 * If this is a removable device or if the pm-capable property
6407 	 * is SD_PM_CAPABLE_TRUE (1) then we should create the
6408 	 * pm_components property without checking for the existance of
6409 	 * the start-stop cycle counter log page
6410 	 */
6411 	if (ISREMOVABLE(un) ||
6412 	    un->un_pm_capable_prop == SD_PM_CAPABLE_TRUE) {
6413 		/*
6414 		 * not all devices have a motor, try it first.
6415 		 * some devices may return ILLEGAL REQUEST, some
6416 		 * will hang
6417 		 */
6418 		un->un_f_start_stop_supported = TRUE;
6419 		if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
6420 		    SD_PATH_DIRECT) != 0) {
6421 			un->un_f_start_stop_supported = FALSE;
6422 		}
6423 
6424 		/*
6425 		 * create pm properties anyways otherwise the parent can't
6426 		 * go to sleep
6427 		 */
6428 		(void) sd_create_pm_components(devi, un);
6429 		un->un_f_pm_is_enabled = TRUE;
6430 
6431 		/*
6432 		 * Need to create a zero length (Boolean) property
6433 		 * removable-media for the removable media devices.
6434 		 * Note that the return value of the property is not being
6435 		 * checked, since if unable to create the property
6436 		 * then do not want the attach to fail altogether. Consistent
6437 		 * with other property creation in attach.
6438 		 */
6439 		if (ISREMOVABLE(un)) {
6440 			(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
6441 			    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
6442 		}
6443 		return;
6444 	}
6445 
6446 	rval = sd_log_page_supported(un, START_STOP_CYCLE_PAGE);
6447 
6448 #ifdef	SDDEBUG
6449 	if (sd_force_pm_supported) {
6450 		/* Force a successful result */
6451 		rval = 1;
6452 	}
6453 #endif
6454 
6455 	/*
6456 	 * If the start-stop cycle counter log page is not supported
6457 	 * or if the pm-capable property is SD_PM_CAPABLE_FALSE (0)
6458 	 * then we should not create the pm_components property.
6459 	 */
6460 	if (rval == -1 || un->un_pm_capable_prop == SD_PM_CAPABLE_FALSE) {
6461 		/*
6462 		 * Error.
6463 		 * Reading log sense failed, most likely this is
6464 		 * an older drive that does not support log sense.
6465 		 * If this fails auto-pm is not supported.
6466 		 */
6467 		un->un_power_level = SD_SPINDLE_ON;
6468 		un->un_f_pm_is_enabled = FALSE;
6469 
6470 	} else if (rval == 0) {
6471 		/*
6472 		 * Page not found.
6473 		 * The start stop cycle counter is implemented as page
6474 		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
6475 		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
6476 		 */
6477 		if (sd_log_page_supported(un, START_STOP_CYCLE_VU_PAGE) == 1) {
6478 			/*
6479 			 * Page found, use this one.
6480 			 */
6481 			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
6482 			un->un_f_pm_is_enabled = TRUE;
6483 		} else {
6484 			/*
6485 			 * Error or page not found.
6486 			 * auto-pm is not supported for this device.
6487 			 */
6488 			un->un_power_level = SD_SPINDLE_ON;
6489 			un->un_f_pm_is_enabled = FALSE;
6490 		}
6491 	} else {
6492 		/*
6493 		 * Page found, use it.
6494 		 */
6495 		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
6496 		un->un_f_pm_is_enabled = TRUE;
6497 	}
6498 
6499 
6500 	if (un->un_f_pm_is_enabled == TRUE) {
6501 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
6502 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
6503 
6504 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
6505 		    log_page_size, un->un_start_stop_cycle_page,
6506 		    0x01, 0, SD_PATH_DIRECT);
6507 #ifdef	SDDEBUG
6508 		if (sd_force_pm_supported) {
6509 			/* Force a successful result */
6510 			rval = 0;
6511 		}
6512 #endif
6513 
6514 		/*
6515 		 * If the Log sense for Page( Start/stop cycle counter page)
6516 		 * succeeds, then power managment is supported and we can
6517 		 * enable auto-pm.
6518 		 */
6519 		if (rval == 0)  {
6520 			(void) sd_create_pm_components(devi, un);
6521 		} else {
6522 			un->un_power_level = SD_SPINDLE_ON;
6523 			un->un_f_pm_is_enabled = FALSE;
6524 		}
6525 
6526 		kmem_free(log_page_data, log_page_size);
6527 	}
6528 }
6529 
6530 
6531 /*
6532  *    Function: sd_create_pm_components
6533  *
6534  * Description: Initialize PM property.
6535  *
6536  *     Context: Kernel thread context
6537  */
6538 
6539 static void
6540 sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
6541 {
6542 	char *pm_comp[] = { "NAME=spindle-motor", "0=off", "1=on", NULL };
6543 
6544 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6545 
6546 	if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
6547 	    "pm-components", pm_comp, 3) == DDI_PROP_SUCCESS) {
6548 		/*
6549 		 * When components are initially created they are idle,
6550 		 * power up any non-removables.
6551 		 * Note: the return value of pm_raise_power can't be used
6552 		 * for determining if PM should be enabled for this device.
6553 		 * Even if you check the return values and remove this
6554 		 * property created above, the PM framework will not honor the
6555 		 * change after the first call to pm_raise_power. Hence,
6556 		 * removal of that property does not help if pm_raise_power
6557 		 * fails. In the case of removable media, the start/stop
6558 		 * will fail if the media is not present.
6559 		 */
6560 		if ((!ISREMOVABLE(un)) && (pm_raise_power(SD_DEVINFO(un), 0,
6561 		    SD_SPINDLE_ON) == DDI_SUCCESS)) {
6562 			mutex_enter(SD_MUTEX(un));
6563 			un->un_power_level = SD_SPINDLE_ON;
6564 			mutex_enter(&un->un_pm_mutex);
6565 			/* Set to on and not busy. */
6566 			un->un_pm_count = 0;
6567 		} else {
6568 			mutex_enter(SD_MUTEX(un));
6569 			un->un_power_level = SD_SPINDLE_OFF;
6570 			mutex_enter(&un->un_pm_mutex);
6571 			/* Set to off. */
6572 			un->un_pm_count = -1;
6573 		}
6574 		mutex_exit(&un->un_pm_mutex);
6575 		mutex_exit(SD_MUTEX(un));
6576 	} else {
6577 		un->un_power_level = SD_SPINDLE_ON;
6578 		un->un_f_pm_is_enabled = FALSE;
6579 	}
6580 }
6581 
6582 
6583 /*
6584  *    Function: sd_ddi_suspend
6585  *
6586  * Description: Performs system power-down operations. This includes
6587  *		setting the drive state to indicate its suspended so
6588  *		that no new commands will be accepted. Also, wait for
6589  *		all commands that are in transport or queued to a timer
6590  *		for retry to complete. All timeout threads are cancelled.
6591  *
6592  * Return Code: DDI_FAILURE or DDI_SUCCESS
6593  *
6594  *     Context: Kernel thread context
6595  */
6596 
6597 static int
6598 sd_ddi_suspend(dev_info_t *devi)
6599 {
6600 	struct	sd_lun	*un;
6601 	clock_t		wait_cmds_complete;
6602 
6603 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6604 	if (un == NULL) {
6605 		return (DDI_FAILURE);
6606 	}
6607 
6608 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
6609 
6610 	mutex_enter(SD_MUTEX(un));
6611 
6612 	/* Return success if the device is already suspended. */
6613 	if (un->un_state == SD_STATE_SUSPENDED) {
6614 		mutex_exit(SD_MUTEX(un));
6615 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6616 		    "device already suspended, exiting\n");
6617 		return (DDI_SUCCESS);
6618 	}
6619 
6620 	/* Return failure if the device is being used by HA */
6621 	if (un->un_resvd_status &
6622 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
6623 		mutex_exit(SD_MUTEX(un));
6624 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6625 		    "device in use by HA, exiting\n");
6626 		return (DDI_FAILURE);
6627 	}
6628 
6629 	/*
6630 	 * Return failure if the device is in a resource wait
6631 	 * or power changing state.
6632 	 */
6633 	if ((un->un_state == SD_STATE_RWAIT) ||
6634 	    (un->un_state == SD_STATE_PM_CHANGING)) {
6635 		mutex_exit(SD_MUTEX(un));
6636 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6637 		    "device in resource wait state, exiting\n");
6638 		return (DDI_FAILURE);
6639 	}
6640 
6641 
6642 	un->un_save_state = un->un_last_state;
6643 	New_state(un, SD_STATE_SUSPENDED);
6644 
6645 	/*
6646 	 * Wait for all commands that are in transport or queued to a timer
6647 	 * for retry to complete.
6648 	 *
6649 	 * While waiting, no new commands will be accepted or sent because of
6650 	 * the new state we set above.
6651 	 *
6652 	 * Wait till current operation has completed. If we are in the resource
6653 	 * wait state (with an intr outstanding) then we need to wait till the
6654 	 * intr completes and starts the next cmd. We want to wait for
6655 	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
6656 	 */
6657 	wait_cmds_complete = ddi_get_lbolt() +
6658 	    (sd_wait_cmds_complete * drv_usectohz(1000000));
6659 
6660 	while (un->un_ncmds_in_transport != 0) {
6661 		/*
6662 		 * Fail if commands do not finish in the specified time.
6663 		 */
6664 		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
6665 		    wait_cmds_complete) == -1) {
6666 			/*
6667 			 * Undo the state changes made above. Everything
6668 			 * must go back to it's original value.
6669 			 */
6670 			Restore_state(un);
6671 			un->un_last_state = un->un_save_state;
6672 			/* Wake up any threads that might be waiting. */
6673 			cv_broadcast(&un->un_suspend_cv);
6674 			mutex_exit(SD_MUTEX(un));
6675 			SD_ERROR(SD_LOG_IO_PM, un,
6676 			    "sd_ddi_suspend: failed due to outstanding cmds\n");
6677 			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
6678 			return (DDI_FAILURE);
6679 		}
6680 	}
6681 
6682 	/*
6683 	 * Cancel SCSI watch thread and timeouts, if any are active
6684 	 */
6685 
6686 	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
6687 		opaque_t temp_token = un->un_swr_token;
6688 		mutex_exit(SD_MUTEX(un));
6689 		scsi_watch_suspend(temp_token);
6690 		mutex_enter(SD_MUTEX(un));
6691 	}
6692 
6693 	if (un->un_reset_throttle_timeid != NULL) {
6694 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
6695 		un->un_reset_throttle_timeid = NULL;
6696 		mutex_exit(SD_MUTEX(un));
6697 		(void) untimeout(temp_id);
6698 		mutex_enter(SD_MUTEX(un));
6699 	}
6700 
6701 	if (un->un_dcvb_timeid != NULL) {
6702 		timeout_id_t temp_id = un->un_dcvb_timeid;
6703 		un->un_dcvb_timeid = NULL;
6704 		mutex_exit(SD_MUTEX(un));
6705 		(void) untimeout(temp_id);
6706 		mutex_enter(SD_MUTEX(un));
6707 	}
6708 
6709 	mutex_enter(&un->un_pm_mutex);
6710 	if (un->un_pm_timeid != NULL) {
6711 		timeout_id_t temp_id = un->un_pm_timeid;
6712 		un->un_pm_timeid = NULL;
6713 		mutex_exit(&un->un_pm_mutex);
6714 		mutex_exit(SD_MUTEX(un));
6715 		(void) untimeout(temp_id);
6716 		mutex_enter(SD_MUTEX(un));
6717 	} else {
6718 		mutex_exit(&un->un_pm_mutex);
6719 	}
6720 
6721 	if (un->un_retry_timeid != NULL) {
6722 		timeout_id_t temp_id = un->un_retry_timeid;
6723 		un->un_retry_timeid = NULL;
6724 		mutex_exit(SD_MUTEX(un));
6725 		(void) untimeout(temp_id);
6726 		mutex_enter(SD_MUTEX(un));
6727 	}
6728 
6729 	if (un->un_direct_priority_timeid != NULL) {
6730 		timeout_id_t temp_id = un->un_direct_priority_timeid;
6731 		un->un_direct_priority_timeid = NULL;
6732 		mutex_exit(SD_MUTEX(un));
6733 		(void) untimeout(temp_id);
6734 		mutex_enter(SD_MUTEX(un));
6735 	}
6736 
6737 	if (un->un_f_is_fibre == TRUE) {
6738 		/*
6739 		 * Remove callbacks for insert and remove events
6740 		 */
6741 		if (un->un_insert_event != NULL) {
6742 			mutex_exit(SD_MUTEX(un));
6743 			(void) ddi_remove_event_handler(un->un_insert_cb_id);
6744 			mutex_enter(SD_MUTEX(un));
6745 			un->un_insert_event = NULL;
6746 		}
6747 
6748 		if (un->un_remove_event != NULL) {
6749 			mutex_exit(SD_MUTEX(un));
6750 			(void) ddi_remove_event_handler(un->un_remove_cb_id);
6751 			mutex_enter(SD_MUTEX(un));
6752 			un->un_remove_event = NULL;
6753 		}
6754 	}
6755 
6756 	mutex_exit(SD_MUTEX(un));
6757 
6758 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
6759 
6760 	return (DDI_SUCCESS);
6761 }
6762 
6763 
6764 /*
6765  *    Function: sd_ddi_pm_suspend
6766  *
6767  * Description: Set the drive state to low power.
6768  *		Someone else is required to actually change the drive
6769  *		power level.
6770  *
6771  *   Arguments: un - driver soft state (unit) structure
6772  *
6773  * Return Code: DDI_FAILURE or DDI_SUCCESS
6774  *
6775  *     Context: Kernel thread context
6776  */
6777 
6778 static int
6779 sd_ddi_pm_suspend(struct sd_lun *un)
6780 {
6781 	ASSERT(un != NULL);
6782 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: entry\n");
6783 
6784 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6785 	mutex_enter(SD_MUTEX(un));
6786 
6787 	/*
6788 	 * Exit if power management is not enabled for this device, or if
6789 	 * the device is being used by HA.
6790 	 */
6791 	if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
6792 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
6793 		mutex_exit(SD_MUTEX(un));
6794 		SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exiting\n");
6795 		return (DDI_SUCCESS);
6796 	}
6797 
6798 	SD_INFO(SD_LOG_POWER, un, "sd_ddi_pm_suspend: un_ncmds_in_driver=%ld\n",
6799 	    un->un_ncmds_in_driver);
6800 
6801 	/*
6802 	 * See if the device is not busy, ie.:
6803 	 *    - we have no commands in the driver for this device
6804 	 *    - not waiting for resources
6805 	 */
6806 	if ((un->un_ncmds_in_driver == 0) &&
6807 	    (un->un_state != SD_STATE_RWAIT)) {
6808 		/*
6809 		 * The device is not busy, so it is OK to go to low power state.
6810 		 * Indicate low power, but rely on someone else to actually
6811 		 * change it.
6812 		 */
6813 		mutex_enter(&un->un_pm_mutex);
6814 		un->un_pm_count = -1;
6815 		mutex_exit(&un->un_pm_mutex);
6816 		un->un_power_level = SD_SPINDLE_OFF;
6817 	}
6818 
6819 	mutex_exit(SD_MUTEX(un));
6820 
6821 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exit\n");
6822 
6823 	return (DDI_SUCCESS);
6824 }
6825 
6826 
6827 /*
6828  *    Function: sd_ddi_resume
6829  *
6830  * Description: Performs system power-up operations..
6831  *
6832  * Return Code: DDI_SUCCESS
6833  *		DDI_FAILURE
6834  *
6835  *     Context: Kernel thread context
6836  */
6837 
6838 static int
6839 sd_ddi_resume(dev_info_t *devi)
6840 {
6841 	struct	sd_lun	*un;
6842 
6843 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6844 	if (un == NULL) {
6845 		return (DDI_FAILURE);
6846 	}
6847 
6848 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
6849 
6850 	mutex_enter(SD_MUTEX(un));
6851 	Restore_state(un);
6852 
6853 	/*
6854 	 * Restore the state which was saved to give the
6855 	 * the right state in un_last_state
6856 	 */
6857 	un->un_last_state = un->un_save_state;
6858 	/*
6859 	 * Note: throttle comes back at full.
6860 	 * Also note: this MUST be done before calling pm_raise_power
6861 	 * otherwise the system can get hung in biowait. The scenario where
6862 	 * this'll happen is under cpr suspend. Writing of the system
6863 	 * state goes through sddump, which writes 0 to un_throttle. If
6864 	 * writing the system state then fails, example if the partition is
6865 	 * too small, then cpr attempts a resume. If throttle isn't restored
6866 	 * from the saved value until after calling pm_raise_power then
6867 	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
6868 	 * in biowait.
6869 	 */
6870 	un->un_throttle = un->un_saved_throttle;
6871 
6872 	/*
6873 	 * The chance of failure is very rare as the only command done in power
6874 	 * entry point is START command when you transition from 0->1 or
6875 	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
6876 	 * which suspend was done. Ignore the return value as the resume should
6877 	 * not be failed. In the case of removable media the media need not be
6878 	 * inserted and hence there is a chance that raise power will fail with
6879 	 * media not present.
6880 	 */
6881 	if (!ISREMOVABLE(un)) {
6882 		mutex_exit(SD_MUTEX(un));
6883 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
6884 		mutex_enter(SD_MUTEX(un));
6885 	}
6886 
6887 	/*
6888 	 * Don't broadcast to the suspend cv and therefore possibly
6889 	 * start I/O until after power has been restored.
6890 	 */
6891 	cv_broadcast(&un->un_suspend_cv);
6892 	cv_broadcast(&un->un_state_cv);
6893 
6894 	/* restart thread */
6895 	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
6896 		scsi_watch_resume(un->un_swr_token);
6897 	}
6898 
6899 #if (defined(__fibre))
6900 	if (un->un_f_is_fibre == TRUE) {
6901 		/*
6902 		 * Add callbacks for insert and remove events
6903 		 */
6904 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
6905 			sd_init_event_callbacks(un);
6906 		}
6907 	}
6908 #endif
6909 
6910 	/*
6911 	 * Transport any pending commands to the target.
6912 	 *
6913 	 * If this is a low-activity device commands in queue will have to wait
6914 	 * until new commands come in, which may take awhile. Also, we
6915 	 * specifically don't check un_ncmds_in_transport because we know that
6916 	 * there really are no commands in progress after the unit was
6917 	 * suspended and we could have reached the throttle level, been
6918 	 * suspended, and have no new commands coming in for awhile. Highly
6919 	 * unlikely, but so is the low-activity disk scenario.
6920 	 */
6921 	ddi_xbuf_dispatch(un->un_xbuf_attr);
6922 
6923 	sd_start_cmds(un, NULL);
6924 	mutex_exit(SD_MUTEX(un));
6925 
6926 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
6927 
6928 	return (DDI_SUCCESS);
6929 }
6930 
6931 
6932 /*
6933  *    Function: sd_ddi_pm_resume
6934  *
6935  * Description: Set the drive state to powered on.
6936  *		Someone else is required to actually change the drive
6937  *		power level.
6938  *
6939  *   Arguments: un - driver soft state (unit) structure
6940  *
6941  * Return Code: DDI_SUCCESS
6942  *
6943  *     Context: Kernel thread context
6944  */
6945 
6946 static int
6947 sd_ddi_pm_resume(struct sd_lun *un)
6948 {
6949 	ASSERT(un != NULL);
6950 
6951 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6952 	mutex_enter(SD_MUTEX(un));
6953 	un->un_power_level = SD_SPINDLE_ON;
6954 
6955 	ASSERT(!mutex_owned(&un->un_pm_mutex));
6956 	mutex_enter(&un->un_pm_mutex);
6957 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
6958 		un->un_pm_count++;
6959 		ASSERT(un->un_pm_count == 0);
6960 		/*
6961 		 * Note: no longer do the cv_broadcast on un_suspend_cv. The
6962 		 * un_suspend_cv is for a system resume, not a power management
6963 		 * device resume. (4297749)
6964 		 *	 cv_broadcast(&un->un_suspend_cv);
6965 		 */
6966 	}
6967 	mutex_exit(&un->un_pm_mutex);
6968 	mutex_exit(SD_MUTEX(un));
6969 
6970 	return (DDI_SUCCESS);
6971 }
6972 
6973 
6974 /*
6975  *    Function: sd_pm_idletimeout_handler
6976  *
6977  * Description: A timer routine that's active only while a device is busy.
6978  *		The purpose is to extend slightly the pm framework's busy
6979  *		view of the device to prevent busy/idle thrashing for
6980  *		back-to-back commands. Do this by comparing the current time
6981  *		to the time at which the last command completed and when the
6982  *		difference is greater than sd_pm_idletime, call
6983  *		pm_idle_component. In addition to indicating idle to the pm
6984  *		framework, update the chain type to again use the internal pm
6985  *		layers of the driver.
6986  *
6987  *   Arguments: arg - driver soft state (unit) structure
6988  *
6989  *     Context: Executes in a timeout(9F) thread context
6990  */
6991 
6992 static void
6993 sd_pm_idletimeout_handler(void *arg)
6994 {
6995 	struct sd_lun *un = arg;
6996 
6997 	time_t	now;
6998 
6999 	mutex_enter(&sd_detach_mutex);
7000 	if (un->un_detach_count != 0) {
7001 		/* Abort if the instance is detaching */
7002 		mutex_exit(&sd_detach_mutex);
7003 		return;
7004 	}
7005 	mutex_exit(&sd_detach_mutex);
7006 
7007 	now = ddi_get_time();
7008 	/*
7009 	 * Grab both mutexes, in the proper order, since we're accessing
7010 	 * both PM and softstate variables.
7011 	 */
7012 	mutex_enter(SD_MUTEX(un));
7013 	mutex_enter(&un->un_pm_mutex);
7014 	if (((now - un->un_pm_idle_time) > sd_pm_idletime) &&
7015 	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
7016 		/*
7017 		 * Update the chain types.
7018 		 * This takes affect on the next new command received.
7019 		 */
7020 		if (ISREMOVABLE(un)) {
7021 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
7022 		} else {
7023 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
7024 		}
7025 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
7026 
7027 		SD_TRACE(SD_LOG_IO_PM, un,
7028 		    "sd_pm_idletimeout_handler: idling device\n");
7029 		(void) pm_idle_component(SD_DEVINFO(un), 0);
7030 		un->un_pm_idle_timeid = NULL;
7031 	} else {
7032 		un->un_pm_idle_timeid =
7033 			timeout(sd_pm_idletimeout_handler, un,
7034 			(drv_usectohz((clock_t)300000))); /* 300 ms. */
7035 	}
7036 	mutex_exit(&un->un_pm_mutex);
7037 	mutex_exit(SD_MUTEX(un));
7038 }
7039 
7040 
7041 /*
7042  *    Function: sd_pm_timeout_handler
7043  *
7044  * Description: Callback to tell framework we are idle.
7045  *
7046  *     Context: timeout(9f) thread context.
7047  */
7048 
7049 static void
7050 sd_pm_timeout_handler(void *arg)
7051 {
7052 	struct sd_lun *un = arg;
7053 
7054 	(void) pm_idle_component(SD_DEVINFO(un), 0);
7055 	mutex_enter(&un->un_pm_mutex);
7056 	un->un_pm_timeid = NULL;
7057 	mutex_exit(&un->un_pm_mutex);
7058 }
7059 
7060 
7061 /*
7062  *    Function: sdpower
7063  *
7064  * Description: PM entry point.
7065  *
7066  * Return Code: DDI_SUCCESS
7067  *		DDI_FAILURE
7068  *
7069  *     Context: Kernel thread context
7070  */
7071 
7072 static int
7073 sdpower(dev_info_t *devi, int component, int level)
7074 {
7075 	struct sd_lun	*un;
7076 	int		instance;
7077 	int		rval = DDI_SUCCESS;
7078 	uint_t		i, log_page_size, maxcycles, ncycles;
7079 	uchar_t		*log_page_data;
7080 	int		log_sense_page;
7081 	int		medium_present;
7082 	time_t		intvlp;
7083 	dev_t		dev;
7084 	struct pm_trans_data	sd_pm_tran_data;
7085 	uchar_t		save_state;
7086 	int		sval;
7087 	uchar_t		state_before_pm;
7088 	int		got_semaphore_here;
7089 
7090 	instance = ddi_get_instance(devi);
7091 
7092 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
7093 	    (SD_SPINDLE_OFF > level) || (level > SD_SPINDLE_ON) ||
7094 	    component != 0) {
7095 		return (DDI_FAILURE);
7096 	}
7097 
7098 	dev = sd_make_device(SD_DEVINFO(un));
7099 
7100 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
7101 
7102 	/*
7103 	 * Must synchronize power down with close.
7104 	 * Attempt to decrement/acquire the open/close semaphore,
7105 	 * but do NOT wait on it. If it's not greater than zero,
7106 	 * ie. it can't be decremented without waiting, then
7107 	 * someone else, either open or close, already has it
7108 	 * and the try returns 0. Use that knowledge here to determine
7109 	 * if it's OK to change the device power level.
7110 	 * Also, only increment it on exit if it was decremented, ie. gotten,
7111 	 * here.
7112 	 */
7113 	got_semaphore_here = sema_tryp(&un->un_semoclose);
7114 
7115 	mutex_enter(SD_MUTEX(un));
7116 
7117 	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
7118 	    un->un_ncmds_in_driver);
7119 
7120 	/*
7121 	 * If un_ncmds_in_driver is non-zero it indicates commands are
7122 	 * already being processed in the driver, or if the semaphore was
7123 	 * not gotten here it indicates an open or close is being processed.
7124 	 * At the same time somebody is requesting to go low power which
7125 	 * can't happen, therefore we need to return failure.
7126 	 */
7127 	if ((level == SD_SPINDLE_OFF) &&
7128 	    ((un->un_ncmds_in_driver != 0) || (got_semaphore_here == 0))) {
7129 		mutex_exit(SD_MUTEX(un));
7130 
7131 		if (got_semaphore_here != 0) {
7132 			sema_v(&un->un_semoclose);
7133 		}
7134 		SD_TRACE(SD_LOG_IO_PM, un,
7135 		    "sdpower: exit, device has queued cmds.\n");
7136 		return (DDI_FAILURE);
7137 	}
7138 
7139 	/*
7140 	 * if it is OFFLINE that means the disk is completely dead
7141 	 * in our case we have to put the disk in on or off by sending commands
7142 	 * Of course that will fail anyway so return back here.
7143 	 *
7144 	 * Power changes to a device that's OFFLINE or SUSPENDED
7145 	 * are not allowed.
7146 	 */
7147 	if ((un->un_state == SD_STATE_OFFLINE) ||
7148 	    (un->un_state == SD_STATE_SUSPENDED)) {
7149 		mutex_exit(SD_MUTEX(un));
7150 
7151 		if (got_semaphore_here != 0) {
7152 			sema_v(&un->un_semoclose);
7153 		}
7154 		SD_TRACE(SD_LOG_IO_PM, un,
7155 		    "sdpower: exit, device is off-line.\n");
7156 		return (DDI_FAILURE);
7157 	}
7158 
7159 	/*
7160 	 * Change the device's state to indicate it's power level
7161 	 * is being changed. Do this to prevent a power off in the
7162 	 * middle of commands, which is especially bad on devices
7163 	 * that are really powered off instead of just spun down.
7164 	 */
7165 	state_before_pm = un->un_state;
7166 	un->un_state = SD_STATE_PM_CHANGING;
7167 
7168 	mutex_exit(SD_MUTEX(un));
7169 
7170 	/*
7171 	 * Bypass checking the log sense information for removables
7172 	 * and devices for which the HBA set the pm-capable property.
7173 	 * If un->un_pm_capable_prop is SD_PM_CAPABLE_UNDEFINED (-1)
7174 	 * then the HBA did not create the property.
7175 	 */
7176 	if ((level == SD_SPINDLE_OFF) && (!ISREMOVABLE(un)) &&
7177 	    un->un_pm_capable_prop == SD_PM_CAPABLE_UNDEFINED) {
7178 		/*
7179 		 * Get the log sense information to understand whether the
7180 		 * the powercycle counts have gone beyond the threshhold.
7181 		 */
7182 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
7183 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
7184 
7185 		mutex_enter(SD_MUTEX(un));
7186 		log_sense_page = un->un_start_stop_cycle_page;
7187 		mutex_exit(SD_MUTEX(un));
7188 
7189 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
7190 		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
7191 #ifdef	SDDEBUG
7192 		if (sd_force_pm_supported) {
7193 			/* Force a successful result */
7194 			rval = 0;
7195 		}
7196 #endif
7197 		if (rval != 0) {
7198 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
7199 			    "Log Sense Failed\n");
7200 			kmem_free(log_page_data, log_page_size);
7201 			/* Cannot support power management on those drives */
7202 
7203 			if (got_semaphore_here != 0) {
7204 				sema_v(&un->un_semoclose);
7205 			}
7206 			/*
7207 			 * On exit put the state back to it's original value
7208 			 * and broadcast to anyone waiting for the power
7209 			 * change completion.
7210 			 */
7211 			mutex_enter(SD_MUTEX(un));
7212 			un->un_state = state_before_pm;
7213 			cv_broadcast(&un->un_suspend_cv);
7214 			mutex_exit(SD_MUTEX(un));
7215 			SD_TRACE(SD_LOG_IO_PM, un,
7216 			    "sdpower: exit, Log Sense Failed.\n");
7217 			return (DDI_FAILURE);
7218 		}
7219 
7220 		/*
7221 		 * From the page data - Convert the essential information to
7222 		 * pm_trans_data
7223 		 */
7224 		maxcycles =
7225 		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
7226 		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
7227 
7228 		sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
7229 
7230 		ncycles =
7231 		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
7232 		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
7233 
7234 		sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
7235 
7236 		for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
7237 			sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
7238 			    log_page_data[8+i];
7239 		}
7240 
7241 		kmem_free(log_page_data, log_page_size);
7242 
7243 		/*
7244 		 * Call pm_trans_check routine to get the Ok from
7245 		 * the global policy
7246 		 */
7247 
7248 		sd_pm_tran_data.format = DC_SCSI_FORMAT;
7249 		sd_pm_tran_data.un.scsi_cycles.flag = 0;
7250 
7251 		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
7252 #ifdef	SDDEBUG
7253 		if (sd_force_pm_supported) {
7254 			/* Force a successful result */
7255 			rval = 1;
7256 		}
7257 #endif
7258 		switch (rval) {
7259 		case 0:
7260 			/*
7261 			 * Not Ok to Power cycle or error in parameters passed
7262 			 * Would have given the advised time to consider power
7263 			 * cycle. Based on the new intvlp parameter we are
7264 			 * supposed to pretend we are busy so that pm framework
7265 			 * will never call our power entry point. Because of
7266 			 * that install a timeout handler and wait for the
7267 			 * recommended time to elapse so that power management
7268 			 * can be effective again.
7269 			 *
7270 			 * To effect this behavior, call pm_busy_component to
7271 			 * indicate to the framework this device is busy.
7272 			 * By not adjusting un_pm_count the rest of PM in
7273 			 * the driver will function normally, and independant
7274 			 * of this but because the framework is told the device
7275 			 * is busy it won't attempt powering down until it gets
7276 			 * a matching idle. The timeout handler sends this.
7277 			 * Note: sd_pm_entry can't be called here to do this
7278 			 * because sdpower may have been called as a result
7279 			 * of a call to pm_raise_power from within sd_pm_entry.
7280 			 *
7281 			 * If a timeout handler is already active then
7282 			 * don't install another.
7283 			 */
7284 			mutex_enter(&un->un_pm_mutex);
7285 			if (un->un_pm_timeid == NULL) {
7286 				un->un_pm_timeid =
7287 				    timeout(sd_pm_timeout_handler,
7288 				    un, intvlp * drv_usectohz(1000000));
7289 				mutex_exit(&un->un_pm_mutex);
7290 				(void) pm_busy_component(SD_DEVINFO(un), 0);
7291 			} else {
7292 				mutex_exit(&un->un_pm_mutex);
7293 			}
7294 			if (got_semaphore_here != 0) {
7295 				sema_v(&un->un_semoclose);
7296 			}
7297 			/*
7298 			 * On exit put the state back to it's original value
7299 			 * and broadcast to anyone waiting for the power
7300 			 * change completion.
7301 			 */
7302 			mutex_enter(SD_MUTEX(un));
7303 			un->un_state = state_before_pm;
7304 			cv_broadcast(&un->un_suspend_cv);
7305 			mutex_exit(SD_MUTEX(un));
7306 
7307 			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
7308 			    "trans check Failed, not ok to power cycle.\n");
7309 			return (DDI_FAILURE);
7310 
7311 		case -1:
7312 			if (got_semaphore_here != 0) {
7313 				sema_v(&un->un_semoclose);
7314 			}
7315 			/*
7316 			 * On exit put the state back to it's original value
7317 			 * and broadcast to anyone waiting for the power
7318 			 * change completion.
7319 			 */
7320 			mutex_enter(SD_MUTEX(un));
7321 			un->un_state = state_before_pm;
7322 			cv_broadcast(&un->un_suspend_cv);
7323 			mutex_exit(SD_MUTEX(un));
7324 			SD_TRACE(SD_LOG_IO_PM, un,
7325 			    "sdpower: exit, trans check command Failed.\n");
7326 			return (DDI_FAILURE);
7327 		}
7328 	}
7329 
7330 	if (level == SD_SPINDLE_OFF) {
7331 		/*
7332 		 * Save the last state... if the STOP FAILS we need it
7333 		 * for restoring
7334 		 */
7335 		mutex_enter(SD_MUTEX(un));
7336 		save_state = un->un_last_state;
7337 		/*
7338 		 * There must not be any cmds. getting processed
7339 		 * in the driver when we get here. Power to the
7340 		 * device is potentially going off.
7341 		 */
7342 		ASSERT(un->un_ncmds_in_driver == 0);
7343 		mutex_exit(SD_MUTEX(un));
7344 
7345 		/*
7346 		 * For now suspend the device completely before spindle is
7347 		 * turned off
7348 		 */
7349 		if ((rval = sd_ddi_pm_suspend(un)) == DDI_FAILURE) {
7350 			if (got_semaphore_here != 0) {
7351 				sema_v(&un->un_semoclose);
7352 			}
7353 			/*
7354 			 * On exit put the state back to it's original value
7355 			 * and broadcast to anyone waiting for the power
7356 			 * change completion.
7357 			 */
7358 			mutex_enter(SD_MUTEX(un));
7359 			un->un_state = state_before_pm;
7360 			cv_broadcast(&un->un_suspend_cv);
7361 			mutex_exit(SD_MUTEX(un));
7362 			SD_TRACE(SD_LOG_IO_PM, un,
7363 			    "sdpower: exit, PM suspend Failed.\n");
7364 			return (DDI_FAILURE);
7365 		}
7366 	}
7367 
7368 	/*
7369 	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
7370 	 * close, or strategy. Dump no long uses this routine, it uses it's
7371 	 * own code so it can be done in polled mode.
7372 	 */
7373 
7374 	medium_present = TRUE;
7375 
7376 	/*
7377 	 * When powering up, issue a TUR in case the device is at unit
7378 	 * attention.  Don't do retries. Bypass the PM layer, otherwise
7379 	 * a deadlock on un_pm_busy_cv will occur.
7380 	 */
7381 	if (level == SD_SPINDLE_ON) {
7382 		(void) sd_send_scsi_TEST_UNIT_READY(un,
7383 		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
7384 	}
7385 
7386 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
7387 	    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
7388 
7389 	sval = sd_send_scsi_START_STOP_UNIT(un,
7390 	    ((level == SD_SPINDLE_ON) ? SD_TARGET_START : SD_TARGET_STOP),
7391 	    SD_PATH_DIRECT);
7392 	/* Command failed, check for media present. */
7393 	if ((sval == ENXIO) && ISREMOVABLE(un)) {
7394 		medium_present = FALSE;
7395 	}
7396 
7397 	/*
7398 	 * The conditions of interest here are:
7399 	 *   if a spindle off with media present fails,
7400 	 *	then restore the state and return an error.
7401 	 *   else if a spindle on fails,
7402 	 *	then return an error (there's no state to restore).
7403 	 * In all other cases we setup for the new state
7404 	 * and return success.
7405 	 */
7406 	switch (level) {
7407 	case SD_SPINDLE_OFF:
7408 		if ((medium_present == TRUE) && (sval != 0)) {
7409 			/* The stop command from above failed */
7410 			rval = DDI_FAILURE;
7411 			/*
7412 			 * The stop command failed, and we have media
7413 			 * present. Put the level back by calling the
7414 			 * sd_pm_resume() and set the state back to
7415 			 * it's previous value.
7416 			 */
7417 			(void) sd_ddi_pm_resume(un);
7418 			mutex_enter(SD_MUTEX(un));
7419 			un->un_last_state = save_state;
7420 			mutex_exit(SD_MUTEX(un));
7421 			break;
7422 		}
7423 		/*
7424 		 * The stop command from above succeeded.
7425 		 */
7426 		if (ISREMOVABLE(un)) {
7427 			/*
7428 			 * Terminate watch thread in case of removable media
7429 			 * devices going into low power state. This is as per
7430 			 * the requirements of pm framework, otherwise commands
7431 			 * will be generated for the device (through watch
7432 			 * thread), even when the device is in low power state.
7433 			 */
7434 			mutex_enter(SD_MUTEX(un));
7435 			un->un_f_watcht_stopped = FALSE;
7436 			if (un->un_swr_token != NULL) {
7437 				opaque_t temp_token = un->un_swr_token;
7438 				un->un_f_watcht_stopped = TRUE;
7439 				un->un_swr_token = NULL;
7440 				mutex_exit(SD_MUTEX(un));
7441 				(void) scsi_watch_request_terminate(temp_token,
7442 				    SCSI_WATCH_TERMINATE_WAIT);
7443 			} else {
7444 				mutex_exit(SD_MUTEX(un));
7445 			}
7446 		}
7447 		break;
7448 
7449 	default:	/* The level requested is spindle on... */
7450 		/*
7451 		 * Legacy behavior: return success on a failed spinup
7452 		 * if there is no media in the drive.
7453 		 * Do this by looking at medium_present here.
7454 		 */
7455 		if ((sval != 0) && medium_present) {
7456 			/* The start command from above failed */
7457 			rval = DDI_FAILURE;
7458 			break;
7459 		}
7460 		/*
7461 		 * The start command from above succeeded
7462 		 * Resume the devices now that we have
7463 		 * started the disks
7464 		 */
7465 		(void) sd_ddi_pm_resume(un);
7466 
7467 		/*
7468 		 * Resume the watch thread since it was suspended
7469 		 * when the device went into low power mode.
7470 		 */
7471 		if (ISREMOVABLE(un)) {
7472 			mutex_enter(SD_MUTEX(un));
7473 			if (un->un_f_watcht_stopped == TRUE) {
7474 				opaque_t temp_token;
7475 
7476 				un->un_f_watcht_stopped = FALSE;
7477 				mutex_exit(SD_MUTEX(un));
7478 				temp_token = scsi_watch_request_submit(
7479 				    SD_SCSI_DEVP(un),
7480 				    sd_check_media_time,
7481 				    SENSE_LENGTH, sd_media_watch_cb,
7482 				    (caddr_t)dev);
7483 				mutex_enter(SD_MUTEX(un));
7484 				un->un_swr_token = temp_token;
7485 			}
7486 			mutex_exit(SD_MUTEX(un));
7487 		}
7488 	}
7489 	if (got_semaphore_here != 0) {
7490 		sema_v(&un->un_semoclose);
7491 	}
7492 	/*
7493 	 * On exit put the state back to it's original value
7494 	 * and broadcast to anyone waiting for the power
7495 	 * change completion.
7496 	 */
7497 	mutex_enter(SD_MUTEX(un));
7498 	un->un_state = state_before_pm;
7499 	cv_broadcast(&un->un_suspend_cv);
7500 	mutex_exit(SD_MUTEX(un));
7501 
7502 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
7503 
7504 	return (rval);
7505 }
7506 
7507 
7508 
7509 /*
7510  *    Function: sdattach
7511  *
7512  * Description: Driver's attach(9e) entry point function.
7513  *
7514  *   Arguments: devi - opaque device info handle
7515  *		cmd  - attach  type
7516  *
7517  * Return Code: DDI_SUCCESS
7518  *		DDI_FAILURE
7519  *
7520  *     Context: Kernel thread context
7521  */
7522 
7523 static int
7524 sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
7525 {
7526 	switch (cmd) {
7527 	case DDI_ATTACH:
7528 		return (sd_unit_attach(devi));
7529 	case DDI_RESUME:
7530 		return (sd_ddi_resume(devi));
7531 	default:
7532 		break;
7533 	}
7534 	return (DDI_FAILURE);
7535 }
7536 
7537 
7538 /*
7539  *    Function: sddetach
7540  *
7541  * Description: Driver's detach(9E) entry point function.
7542  *
7543  *   Arguments: devi - opaque device info handle
7544  *		cmd  - detach  type
7545  *
7546  * Return Code: DDI_SUCCESS
7547  *		DDI_FAILURE
7548  *
7549  *     Context: Kernel thread context
7550  */
7551 
7552 static int
7553 sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
7554 {
7555 	switch (cmd) {
7556 	case DDI_DETACH:
7557 		return (sd_unit_detach(devi));
7558 	case DDI_SUSPEND:
7559 		return (sd_ddi_suspend(devi));
7560 	default:
7561 		break;
7562 	}
7563 	return (DDI_FAILURE);
7564 }
7565 
7566 
7567 /*
7568  *     Function: sd_sync_with_callback
7569  *
7570  *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
7571  *		 state while the callback routine is active.
7572  *
7573  *    Arguments: un: softstate structure for the instance
7574  *
7575  *	Context: Kernel thread context
7576  */
7577 
7578 static void
7579 sd_sync_with_callback(struct sd_lun *un)
7580 {
7581 	ASSERT(un != NULL);
7582 
7583 	mutex_enter(SD_MUTEX(un));
7584 
7585 	ASSERT(un->un_in_callback >= 0);
7586 
7587 	while (un->un_in_callback > 0) {
7588 		mutex_exit(SD_MUTEX(un));
7589 		delay(2);
7590 		mutex_enter(SD_MUTEX(un));
7591 	}
7592 
7593 	mutex_exit(SD_MUTEX(un));
7594 }
7595 
7596 /*
7597  *    Function: sd_unit_attach
7598  *
7599  * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
7600  *		the soft state structure for the device and performs
7601  *		all necessary structure and device initializations.
7602  *
7603  *   Arguments: devi: the system's dev_info_t for the device.
7604  *
7605  * Return Code: DDI_SUCCESS if attach is successful.
7606  *		DDI_FAILURE if any part of the attach fails.
7607  *
7608  *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
7609  *		Kernel thread context only.  Can sleep.
7610  */
7611 
7612 static int
7613 sd_unit_attach(dev_info_t *devi)
7614 {
7615 	struct	scsi_device	*devp;
7616 	struct	sd_lun		*un;
7617 	char			*variantp;
7618 	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
7619 	int	instance;
7620 	int	rval;
7621 	int	wc_enabled;
7622 	uint64_t	capacity;
7623 	uint_t		lbasize;
7624 
7625 	/*
7626 	 * Retrieve the target driver's private data area. This was set
7627 	 * up by the HBA.
7628 	 */
7629 	devp = ddi_get_driver_private(devi);
7630 
7631 	/*
7632 	 * Since we have no idea what state things were left in by the last
7633 	 * user of the device, set up some 'default' settings, ie. turn 'em
7634 	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
7635 	 * Do this before the scsi_probe, which sends an inquiry.
7636 	 * This is a fix for bug (4430280).
7637 	 * Of special importance is wide-xfer. The drive could have been left
7638 	 * in wide transfer mode by the last driver to communicate with it,
7639 	 * this includes us. If that's the case, and if the following is not
7640 	 * setup properly or we don't re-negotiate with the drive prior to
7641 	 * transferring data to/from the drive, it causes bus parity errors,
7642 	 * data overruns, and unexpected interrupts. This first occurred when
7643 	 * the fix for bug (4378686) was made.
7644 	 */
7645 	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
7646 	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
7647 	(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
7648 	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
7649 
7650 	/*
7651 	 * Use scsi_probe() to issue an INQUIRY command to the device.
7652 	 * This call will allocate and fill in the scsi_inquiry structure
7653 	 * and point the sd_inq member of the scsi_device structure to it.
7654 	 * If the attach succeeds, then this memory will not be de-allocated
7655 	 * (via scsi_unprobe()) until the instance is detached.
7656 	 */
7657 	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
7658 		goto probe_failed;
7659 	}
7660 
7661 	/*
7662 	 * Check the device type as specified in the inquiry data and
7663 	 * claim it if it is of a type that we support.
7664 	 */
7665 	switch (devp->sd_inq->inq_dtype) {
7666 	case DTYPE_DIRECT:
7667 		break;
7668 	case DTYPE_RODIRECT:
7669 		break;
7670 	case DTYPE_OPTICAL:
7671 		break;
7672 	case DTYPE_NOTPRESENT:
7673 	default:
7674 		/* Unsupported device type; fail the attach. */
7675 		goto probe_failed;
7676 	}
7677 
7678 	/*
7679 	 * Allocate the soft state structure for this unit.
7680 	 *
7681 	 * We rely upon this memory being set to all zeroes by
7682 	 * ddi_soft_state_zalloc().  We assume that any member of the
7683 	 * soft state structure that is not explicitly initialized by
7684 	 * this routine will have a value of zero.
7685 	 */
7686 	instance = ddi_get_instance(devp->sd_dev);
7687 	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
7688 		goto probe_failed;
7689 	}
7690 
7691 	/*
7692 	 * Retrieve a pointer to the newly-allocated soft state.
7693 	 *
7694 	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
7695 	 * was successful, unless something has gone horribly wrong and the
7696 	 * ddi's soft state internals are corrupt (in which case it is
7697 	 * probably better to halt here than just fail the attach....)
7698 	 */
7699 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
7700 		panic("sd_unit_attach: NULL soft state on instance:0x%x",
7701 		    instance);
7702 		/*NOTREACHED*/
7703 	}
7704 
7705 	/*
7706 	 * Link the back ptr of the driver soft state to the scsi_device
7707 	 * struct for this lun.
7708 	 * Save a pointer to the softstate in the driver-private area of
7709 	 * the scsi_device struct.
7710 	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
7711 	 * we first set un->un_sd below.
7712 	 */
7713 	un->un_sd = devp;
7714 	devp->sd_private = (opaque_t)un;
7715 
7716 	/*
7717 	 * The following must be after devp is stored in the soft state struct.
7718 	 */
7719 #ifdef SDDEBUG
7720 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7721 	    "%s_unit_attach: un:0x%p instance:%d\n",
7722 	    ddi_driver_name(devi), un, instance);
7723 #endif
7724 
7725 	/*
7726 	 * Set up the device type and node type (for the minor nodes).
7727 	 * By default we assume that the device can at least support the
7728 	 * Common Command Set. Call it a CD-ROM if it reports itself
7729 	 * as a RODIRECT device.
7730 	 */
7731 	switch (devp->sd_inq->inq_dtype) {
7732 	case DTYPE_RODIRECT:
7733 		un->un_node_type = DDI_NT_CD_CHAN;
7734 		un->un_ctype	 = CTYPE_CDROM;
7735 		break;
7736 	case DTYPE_OPTICAL:
7737 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7738 		un->un_ctype	 = CTYPE_ROD;
7739 		break;
7740 	default:
7741 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7742 		un->un_ctype	 = CTYPE_CCS;
7743 		break;
7744 	}
7745 
7746 	/*
7747 	 * Try to read the interconnect type from the HBA.
7748 	 *
7749 	 * Note: This driver is currently compiled as two binaries, a parallel
7750 	 * scsi version (sd) and a fibre channel version (ssd). All functional
7751 	 * differences are determined at compile time. In the future a single
7752 	 * binary will be provided and the inteconnect type will be used to
7753 	 * differentiate between fibre and parallel scsi behaviors. At that time
7754 	 * it will be necessary for all fibre channel HBAs to support this
7755 	 * property.
7756 	 *
7757 	 * set un_f_is_fiber to TRUE ( default fiber )
7758 	 */
7759 	un->un_f_is_fibre = TRUE;
7760 	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
7761 	case INTERCONNECT_SSA:
7762 		un->un_interconnect_type = SD_INTERCONNECT_SSA;
7763 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7764 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
7765 		break;
7766 	case INTERCONNECT_PARALLEL:
7767 		un->un_f_is_fibre = FALSE;
7768 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7769 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7770 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
7771 		break;
7772 	case INTERCONNECT_FIBRE:
7773 		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
7774 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7775 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
7776 		break;
7777 	case INTERCONNECT_FABRIC:
7778 		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
7779 		un->un_node_type = DDI_NT_BLOCK_FABRIC;
7780 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7781 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
7782 		break;
7783 	default:
7784 #ifdef SD_DEFAULT_INTERCONNECT_TYPE
7785 		/*
7786 		 * The HBA does not support the "interconnect-type" property
7787 		 * (or did not provide a recognized type).
7788 		 *
7789 		 * Note: This will be obsoleted when a single fibre channel
7790 		 * and parallel scsi driver is delivered. In the meantime the
7791 		 * interconnect type will be set to the platform default.If that
7792 		 * type is not parallel SCSI, it means that we should be
7793 		 * assuming "ssd" semantics. However, here this also means that
7794 		 * the FC HBA is not supporting the "interconnect-type" property
7795 		 * like we expect it to, so log this occurrence.
7796 		 */
7797 		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
7798 		if (!SD_IS_PARALLEL_SCSI(un)) {
7799 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7800 			    "sd_unit_attach: un:0x%p Assuming "
7801 			    "INTERCONNECT_FIBRE\n", un);
7802 		} else {
7803 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7804 			    "sd_unit_attach: un:0x%p Assuming "
7805 			    "INTERCONNECT_PARALLEL\n", un);
7806 			un->un_f_is_fibre = FALSE;
7807 		}
7808 #else
7809 		/*
7810 		 * Note: This source will be implemented when a single fibre
7811 		 * channel and parallel scsi driver is delivered. The default
7812 		 * will be to assume that if a device does not support the
7813 		 * "interconnect-type" property it is a parallel SCSI HBA and
7814 		 * we will set the interconnect type for parallel scsi.
7815 		 */
7816 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7817 		un->un_f_is_fibre = FALSE;
7818 #endif
7819 		break;
7820 	}
7821 
7822 	if (un->un_f_is_fibre == TRUE) {
7823 		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
7824 			SCSI_VERSION_3) {
7825 			switch (un->un_interconnect_type) {
7826 			case SD_INTERCONNECT_FIBRE:
7827 			case SD_INTERCONNECT_SSA:
7828 				un->un_node_type = DDI_NT_BLOCK_WWN;
7829 				break;
7830 			default:
7831 				break;
7832 			}
7833 		}
7834 	}
7835 
7836 	/*
7837 	 * Initialize the Request Sense command for the target
7838 	 */
7839 	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
7840 		goto alloc_rqs_failed;
7841 	}
7842 
7843 	/*
7844 	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
7845 	 * with seperate binary for sd and ssd.
7846 	 *
7847 	 * x86 has 1 binary, un_retry_count is set base on connection type.
7848 	 * The hardcoded values will go away when Sparc uses 1 binary
7849 	 * for sd and ssd.  This hardcoded values need to match
7850 	 * SD_RETRY_COUNT in sddef.h
7851 	 * The value used is base on interconnect type.
7852 	 * fibre = 3, parallel = 5
7853 	 */
7854 #if defined(__i386) || defined(__amd64)
7855 	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
7856 #else
7857 	un->un_retry_count = SD_RETRY_COUNT;
7858 #endif
7859 
7860 	/*
7861 	 * Set the per disk retry count to the default number of retries
7862 	 * for disks and CDROMs. This value can be overridden by the
7863 	 * disk property list or an entry in sd.conf.
7864 	 */
7865 	un->un_notready_retry_count =
7866 	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
7867 			: DISK_NOT_READY_RETRY_COUNT(un);
7868 
7869 	/*
7870 	 * Set the busy retry count to the default value of un_retry_count.
7871 	 * This can be overridden by entries in sd.conf or the device
7872 	 * config table.
7873 	 */
7874 	un->un_busy_retry_count = un->un_retry_count;
7875 
7876 	/*
7877 	 * Init the reset threshold for retries.  This number determines
7878 	 * how many retries must be performed before a reset can be issued
7879 	 * (for certain error conditions). This can be overridden by entries
7880 	 * in sd.conf or the device config table.
7881 	 */
7882 	un->un_reset_retry_count = (un->un_retry_count / 2);
7883 
7884 	/*
7885 	 * Set the victim_retry_count to the default un_retry_count
7886 	 */
7887 	un->un_victim_retry_count = (2 * un->un_retry_count);
7888 
7889 	/*
7890 	 * Set the reservation release timeout to the default value of
7891 	 * 5 seconds. This can be overridden by entries in ssd.conf or the
7892 	 * device config table.
7893 	 */
7894 	un->un_reserve_release_time = 5;
7895 
7896 	/*
7897 	 * Set up the default maximum transfer size. Note that this may
7898 	 * get updated later in the attach, when setting up default wide
7899 	 * operations for disks.
7900 	 */
7901 #if defined(__i386) || defined(__amd64)
7902 	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
7903 #else
7904 	un->un_max_xfer_size = (uint_t)maxphys;
7905 #endif
7906 
7907 	/*
7908 	 * Get "allow bus device reset" property (defaults to "enabled" if
7909 	 * the property was not defined). This is to disable bus resets for
7910 	 * certain kinds of error recovery. Note: In the future when a run-time
7911 	 * fibre check is available the soft state flag should default to
7912 	 * enabled.
7913 	 */
7914 	if (un->un_f_is_fibre == TRUE) {
7915 		un->un_f_allow_bus_device_reset = TRUE;
7916 	} else {
7917 		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
7918 			"allow-bus-device-reset", 1) != 0) {
7919 			un->un_f_allow_bus_device_reset = TRUE;
7920 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7921 			"sd_unit_attach: un:0x%p Bus device reset enabled\n",
7922 				un);
7923 		} else {
7924 			un->un_f_allow_bus_device_reset = FALSE;
7925 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7926 			"sd_unit_attach: un:0x%p Bus device reset disabled\n",
7927 				un);
7928 		}
7929 	}
7930 
7931 	/*
7932 	 * Check if this is an ATAPI device. ATAPI devices use Group 1
7933 	 * Read/Write commands and Group 2 Mode Sense/Select commands.
7934 	 *
7935 	 * Note: The "obsolete" way of doing this is to check for the "atapi"
7936 	 * property. The new "variant" property with a value of "atapi" has been
7937 	 * introduced so that future 'variants' of standard SCSI behavior (like
7938 	 * atapi) could be specified by the underlying HBA drivers by supplying
7939 	 * a new value for the "variant" property, instead of having to define a
7940 	 * new property.
7941 	 */
7942 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
7943 		un->un_f_cfg_is_atapi = TRUE;
7944 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7945 		    "sd_unit_attach: un:0x%p Atapi device\n", un);
7946 	}
7947 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
7948 	    &variantp) == DDI_PROP_SUCCESS) {
7949 		if (strcmp(variantp, "atapi") == 0) {
7950 			un->un_f_cfg_is_atapi = TRUE;
7951 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7952 			    "sd_unit_attach: un:0x%p Atapi device\n", un);
7953 		}
7954 		ddi_prop_free(variantp);
7955 	}
7956 
7957 	/*
7958 	 * Assume doorlock commands are supported. If not, the first
7959 	 * call to sd_send_scsi_DOORLOCK() will set to FALSE
7960 	 */
7961 	un->un_f_doorlock_supported = TRUE;
7962 
7963 	un->un_cmd_timeout	= SD_IO_TIME;
7964 
7965 	/* Info on current states, statuses, etc. (Updated frequently) */
7966 	un->un_state		= SD_STATE_NORMAL;
7967 	un->un_last_state	= SD_STATE_NORMAL;
7968 
7969 	/* Control & status info for command throttling */
7970 	un->un_throttle		= sd_max_throttle;
7971 	un->un_saved_throttle	= sd_max_throttle;
7972 	un->un_min_throttle	= sd_min_throttle;
7973 
7974 	if (un->un_f_is_fibre == TRUE) {
7975 		un->un_f_use_adaptive_throttle = TRUE;
7976 	} else {
7977 		un->un_f_use_adaptive_throttle = FALSE;
7978 	}
7979 
7980 	/* Removable media support. */
7981 	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
7982 	un->un_mediastate		= DKIO_NONE;
7983 	un->un_specified_mediastate	= DKIO_NONE;
7984 
7985 	/* CVs for suspend/resume (PM or DR) */
7986 	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
7987 	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
7988 
7989 	/* Power management support. */
7990 	un->un_power_level = SD_SPINDLE_UNINIT;
7991 
7992 	/*
7993 	 * The open/close semaphore is used to serialize threads executing
7994 	 * in the driver's open & close entry point routines for a given
7995 	 * instance.
7996 	 */
7997 	(void) sema_init(&un->un_semoclose, 1, NULL, SEMA_DRIVER, NULL);
7998 
7999 	/*
8000 	 * The conf file entry and softstate variable is a forceful override,
8001 	 * meaning a non-zero value must be entered to change the default.
8002 	 */
8003 	un->un_f_disksort_disabled = FALSE;
8004 
8005 	/*
8006 	 * Retrieve the properties from the static driver table or the driver
8007 	 * configuration file (.conf) for this unit and update the soft state
8008 	 * for the device as needed for the indicated properties.
8009 	 * Note: the property configuration needs to occur here as some of the
8010 	 * following routines may have dependancies on soft state flags set
8011 	 * as part of the driver property configuration.
8012 	 */
8013 	sd_read_unit_properties(un);
8014 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8015 	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
8016 
8017 	/*
8018 	 * By default, we mark the capacity, lbazize, and geometry
8019 	 * as invalid. Only if we successfully read a valid capacity
8020 	 * will we update the un_blockcount and un_tgt_blocksize with the
8021 	 * valid values (the geometry will be validated later).
8022 	 */
8023 	un->un_f_blockcount_is_valid	= FALSE;
8024 	un->un_f_tgt_blocksize_is_valid	= FALSE;
8025 	un->un_f_geometry_is_valid	= FALSE;
8026 
8027 	/*
8028 	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
8029 	 * otherwise.
8030 	 */
8031 	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
8032 	un->un_blockcount = 0;
8033 
8034 	/*
8035 	 * Set up the per-instance info needed to determine the correct
8036 	 * CDBs and other info for issuing commands to the target.
8037 	 */
8038 	sd_init_cdb_limits(un);
8039 
8040 	/*
8041 	 * Set up the IO chains to use, based upon the target type.
8042 	 */
8043 	if (ISREMOVABLE(un)) {
8044 		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
8045 	} else {
8046 		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
8047 	}
8048 	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
8049 	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
8050 	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
8051 
8052 	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
8053 	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
8054 	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
8055 	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
8056 
8057 
8058 	if (ISCD(un)) {
8059 		un->un_additional_codes = sd_additional_codes;
8060 	} else {
8061 		un->un_additional_codes = NULL;
8062 	}
8063 
8064 	/*
8065 	 * Create the kstats here so they can be available for attach-time
8066 	 * routines that send commands to the unit (either polled or via
8067 	 * sd_send_scsi_cmd).
8068 	 *
8069 	 * Note: This is a critical sequence that needs to be maintained:
8070 	 *	1) Instantiate the kstats here, before any routines using the
8071 	 *	   iopath (i.e. sd_send_scsi_cmd).
8072 	 *	2) Initialize the error stats (sd_set_errstats) and partition
8073 	 *	   stats (sd_set_pstats), following sd_validate_geometry(),
8074 	 *	   sd_register_devid(), and sd_disable_caching().
8075 	 */
8076 
8077 	un->un_stats = kstat_create(sd_label, instance,
8078 	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
8079 	if (un->un_stats != NULL) {
8080 		un->un_stats->ks_lock = SD_MUTEX(un);
8081 		kstat_install(un->un_stats);
8082 	}
8083 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8084 	    "sd_unit_attach: un:0x%p un_stats created\n", un);
8085 
8086 	sd_create_errstats(un, instance);
8087 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8088 	    "sd_unit_attach: un:0x%p errstats created\n", un);
8089 
8090 	/*
8091 	 * The following if/else code was relocated here from below as part
8092 	 * of the fix for bug (4430280). However with the default setup added
8093 	 * on entry to this routine, it's no longer absolutely necessary for
8094 	 * this to be before the call to sd_spin_up_unit.
8095 	 */
8096 	if (SD_IS_PARALLEL_SCSI(un)) {
8097 		/*
8098 		 * If SCSI-2 tagged queueing is supported by the target
8099 		 * and by the host adapter then we will enable it.
8100 		 */
8101 		un->un_tagflags = 0;
8102 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) &&
8103 		    (devp->sd_inq->inq_cmdque) &&
8104 		    (un->un_f_arq_enabled == TRUE)) {
8105 			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
8106 			    1, 1) == 1) {
8107 				un->un_tagflags = FLAG_STAG;
8108 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8109 				    "sd_unit_attach: un:0x%p tag queueing "
8110 				    "enabled\n", un);
8111 			} else if (scsi_ifgetcap(SD_ADDRESS(un),
8112 			    "untagged-qing", 0) == 1) {
8113 				un->un_f_opt_queueing = TRUE;
8114 				un->un_saved_throttle = un->un_throttle =
8115 				    min(un->un_throttle, 3);
8116 			} else {
8117 				un->un_f_opt_queueing = FALSE;
8118 				un->un_saved_throttle = un->un_throttle = 1;
8119 			}
8120 		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
8121 		    == 1) && (un->un_f_arq_enabled == TRUE)) {
8122 			/* The Host Adapter supports internal queueing. */
8123 			un->un_f_opt_queueing = TRUE;
8124 			un->un_saved_throttle = un->un_throttle =
8125 			    min(un->un_throttle, 3);
8126 		} else {
8127 			un->un_f_opt_queueing = FALSE;
8128 			un->un_saved_throttle = un->un_throttle = 1;
8129 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8130 			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
8131 		}
8132 
8133 
8134 		/* Setup or tear down default wide operations for disks */
8135 
8136 		/*
8137 		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
8138 		 * and "ssd_max_xfer_size" to exist simultaneously on the same
8139 		 * system and be set to different values. In the future this
8140 		 * code may need to be updated when the ssd module is
8141 		 * obsoleted and removed from the system. (4299588)
8142 		 */
8143 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) &&
8144 		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
8145 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
8146 			    1, 1) == 1) {
8147 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8148 				    "sd_unit_attach: un:0x%p Wide Transfer "
8149 				    "enabled\n", un);
8150 			}
8151 
8152 			/*
8153 			 * If tagged queuing has also been enabled, then
8154 			 * enable large xfers
8155 			 */
8156 			if (un->un_saved_throttle == sd_max_throttle) {
8157 				un->un_max_xfer_size =
8158 				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8159 				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
8160 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8161 				    "sd_unit_attach: un:0x%p max transfer "
8162 				    "size=0x%x\n", un, un->un_max_xfer_size);
8163 			}
8164 		} else {
8165 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
8166 			    0, 1) == 1) {
8167 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8168 				    "sd_unit_attach: un:0x%p "
8169 				    "Wide Transfer disabled\n", un);
8170 			}
8171 		}
8172 	} else {
8173 		un->un_tagflags = FLAG_STAG;
8174 		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
8175 		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
8176 	}
8177 
8178 	/*
8179 	 * If this target supports LUN reset, try to enable it.
8180 	 */
8181 	if (un->un_f_lun_reset_enabled) {
8182 		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
8183 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
8184 			    "un:0x%p lun_reset capability set\n", un);
8185 		} else {
8186 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
8187 			    "un:0x%p lun-reset capability not set\n", un);
8188 		}
8189 	}
8190 
8191 	/*
8192 	 * At this point in the attach, we have enough info in the
8193 	 * soft state to be able to issue commands to the target.
8194 	 *
8195 	 * All command paths used below MUST issue their commands as
8196 	 * SD_PATH_DIRECT. This is important as intermediate layers
8197 	 * are not all initialized yet (such as PM).
8198 	 */
8199 
8200 	/*
8201 	 * Send a TEST UNIT READY command to the device. This should clear
8202 	 * any outstanding UNIT ATTENTION that may be present.
8203 	 *
8204 	 * Note: Don't check for success, just track if there is a reservation,
8205 	 * this is a throw away command to clear any unit attentions.
8206 	 *
8207 	 * Note: This MUST be the first command issued to the target during
8208 	 * attach to ensure power on UNIT ATTENTIONS are cleared.
8209 	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
8210 	 * with attempts at spinning up a device with no media.
8211 	 */
8212 	if (sd_send_scsi_TEST_UNIT_READY(un, SD_DONT_RETRY_TUR) == EACCES) {
8213 		reservation_flag = SD_TARGET_IS_RESERVED;
8214 	}
8215 
8216 	/*
8217 	 * If the device is NOT a removable media device, attempt to spin
8218 	 * it up (using the START_STOP_UNIT command) and read its capacity
8219 	 * (using the READ CAPACITY command).  Note, however, that either
8220 	 * of these could fail and in some cases we would continue with
8221 	 * the attach despite the failure (see below).
8222 	 */
8223 	if (devp->sd_inq->inq_dtype == DTYPE_DIRECT && !ISREMOVABLE(un)) {
8224 		switch (sd_spin_up_unit(un)) {
8225 		case 0:
8226 			/*
8227 			 * Spin-up was successful; now try to read the
8228 			 * capacity.  If successful then save the results
8229 			 * and mark the capacity & lbasize as valid.
8230 			 */
8231 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8232 			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
8233 
8234 			switch (sd_send_scsi_READ_CAPACITY(un, &capacity,
8235 			    &lbasize, SD_PATH_DIRECT)) {
8236 			case 0: {
8237 				if (capacity > DK_MAX_BLOCKS) {
8238 #ifdef _LP64
8239 					/*
8240 					 * Enable descriptor format sense data
8241 					 * so that we can get 64 bit sense
8242 					 * data fields.
8243 					 */
8244 					sd_enable_descr_sense(un);
8245 #else
8246 					/* 32-bit kernels can't handle this */
8247 					scsi_log(SD_DEVINFO(un),
8248 					    sd_label, CE_WARN,
8249 					    "disk has %llu blocks, which "
8250 					    "is too large for a 32-bit "
8251 					    "kernel", capacity);
8252 					goto spinup_failed;
8253 #endif
8254 				}
8255 				/*
8256 				 * The following relies on
8257 				 * sd_send_scsi_READ_CAPACITY never
8258 				 * returning 0 for capacity and/or lbasize.
8259 				 */
8260 				sd_update_block_info(un, lbasize, capacity);
8261 
8262 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8263 				    "sd_unit_attach: un:0x%p capacity = %ld "
8264 				    "blocks; lbasize= %ld.\n", un,
8265 				    un->un_blockcount, un->un_tgt_blocksize);
8266 
8267 				break;
8268 			}
8269 			case EACCES:
8270 				/*
8271 				 * Should never get here if the spin-up
8272 				 * succeeded, but code it in anyway.
8273 				 * From here, just continue with the attach...
8274 				 */
8275 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8276 				    "sd_unit_attach: un:0x%p "
8277 				    "sd_send_scsi_READ_CAPACITY "
8278 				    "returned reservation conflict\n", un);
8279 				reservation_flag = SD_TARGET_IS_RESERVED;
8280 				break;
8281 			default:
8282 				/*
8283 				 * Likewise, should never get here if the
8284 				 * spin-up succeeded. Just continue with
8285 				 * the attach...
8286 				 */
8287 				break;
8288 			}
8289 			break;
8290 		case EACCES:
8291 			/*
8292 			 * Device is reserved by another host.  In this case
8293 			 * we could not spin it up or read the capacity, but
8294 			 * we continue with the attach anyway.
8295 			 */
8296 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8297 			    "sd_unit_attach: un:0x%p spin-up reservation "
8298 			    "conflict.\n", un);
8299 			reservation_flag = SD_TARGET_IS_RESERVED;
8300 			break;
8301 		default:
8302 			/* Fail the attach if the spin-up failed. */
8303 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8304 			    "sd_unit_attach: un:0x%p spin-up failed.", un);
8305 			goto spinup_failed;
8306 		}
8307 	}
8308 
8309 	/*
8310 	 * Check to see if this is a MMC drive
8311 	 */
8312 	if (ISCD(un)) {
8313 		sd_set_mmc_caps(un);
8314 	}
8315 
8316 	/*
8317 	 * Create the minor nodes for the device.
8318 	 * Note: If we want to support fdisk on both sparc and intel, this will
8319 	 * have to separate out the notion that VTOC8 is always sparc, and
8320 	 * VTOC16 is always intel (tho these can be the defaults).  The vtoc
8321 	 * type will have to be determined at run-time, and the fdisk
8322 	 * partitioning will have to have been read & set up before we
8323 	 * create the minor nodes. (any other inits (such as kstats) that
8324 	 * also ought to be done before creating the minor nodes?) (Doesn't
8325 	 * setting up the minor nodes kind of imply that we're ready to
8326 	 * handle an open from userland?)
8327 	 */
8328 	if (sd_create_minor_nodes(un, devi) != DDI_SUCCESS) {
8329 		goto create_minor_nodes_failed;
8330 	}
8331 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8332 	    "sd_unit_attach: un:0x%p minor nodes created\n", un);
8333 
8334 	/*
8335 	 * Add a zero-length attribute to tell the world we support
8336 	 * kernel ioctls (for layered drivers)
8337 	 */
8338 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8339 	    DDI_KERNEL_IOCTL, NULL, 0);
8340 
8341 	/*
8342 	 * Add a boolean property to tell the world we support
8343 	 * the B_FAILFAST flag (for layered drivers)
8344 	 */
8345 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8346 	    "ddi-failfast-supported", NULL, 0);
8347 
8348 	/*
8349 	 * Initialize power management
8350 	 */
8351 	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
8352 	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
8353 	sd_setup_pm(un, devi);
8354 	if (un->un_f_pm_is_enabled == FALSE) {
8355 		/*
8356 		 * For performance, point to a jump table that does
8357 		 * not include pm.
8358 		 * The direct and priority chains don't change with PM.
8359 		 *
8360 		 * Note: this is currently done based on individual device
8361 		 * capabilities. When an interface for determining system
8362 		 * power enabled state becomes available, or when additional
8363 		 * layers are added to the command chain, these values will
8364 		 * have to be re-evaluated for correctness.
8365 		 */
8366 		if (ISREMOVABLE(un)) {
8367 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
8368 		} else {
8369 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
8370 		}
8371 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
8372 	}
8373 
8374 	/*
8375 	 * This property is set to 0 by HA software to avoid retries
8376 	 * on a reserved disk. (The preferred property name is
8377 	 * "retry-on-reservation-conflict") (1189689)
8378 	 *
8379 	 * Note: The use of a global here can have unintended consequences. A
8380 	 * per instance variable is preferrable to match the capabilities of
8381 	 * different underlying hba's (4402600)
8382 	 */
8383 	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
8384 	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
8385 	    sd_retry_on_reservation_conflict);
8386 	if (sd_retry_on_reservation_conflict != 0) {
8387 		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
8388 		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
8389 		    sd_retry_on_reservation_conflict);
8390 	}
8391 
8392 	/* Set up options for QFULL handling. */
8393 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8394 	    "qfull-retries", -1)) != -1) {
8395 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
8396 		    rval, 1);
8397 	}
8398 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8399 	    "qfull-retry-interval", -1)) != -1) {
8400 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
8401 		    rval, 1);
8402 	}
8403 
8404 	/*
8405 	 * This just prints a message that announces the existence of the
8406 	 * device. The message is always printed in the system logfile, but
8407 	 * only appears on the console if the system is booted with the
8408 	 * -v (verbose) argument.
8409 	 */
8410 	ddi_report_dev(devi);
8411 
8412 	/*
8413 	 * The framework calls driver attach routines single-threaded
8414 	 * for a given instance.  However we still acquire SD_MUTEX here
8415 	 * because this required for calling the sd_validate_geometry()
8416 	 * and sd_register_devid() functions.
8417 	 */
8418 	mutex_enter(SD_MUTEX(un));
8419 	un->un_f_geometry_is_valid = FALSE;
8420 	un->un_mediastate = DKIO_NONE;
8421 	un->un_reserved = -1;
8422 	if (!ISREMOVABLE(un)) {
8423 		/*
8424 		 * Read and validate the device's geometry (ie, disk label)
8425 		 * A new unformatted drive will not have a valid geometry, but
8426 		 * the driver needs to successfully attach to this device so
8427 		 * the drive can be formatted via ioctls.
8428 		 */
8429 		if (((sd_validate_geometry(un, SD_PATH_DIRECT) ==
8430 		    ENOTSUP)) &&
8431 		    (un->un_blockcount < DK_MAX_BLOCKS)) {
8432 			/*
8433 			 * We found a small disk with an EFI label on it;
8434 			 * we need to fix up the minor nodes accordingly.
8435 			 */
8436 			ddi_remove_minor_node(devi, "h");
8437 			ddi_remove_minor_node(devi, "h,raw");
8438 			(void) ddi_create_minor_node(devi, "wd",
8439 			    S_IFBLK,
8440 			    (instance << SDUNIT_SHIFT) | WD_NODE,
8441 			    un->un_node_type, NULL);
8442 			(void) ddi_create_minor_node(devi, "wd,raw",
8443 			    S_IFCHR,
8444 			    (instance << SDUNIT_SHIFT) | WD_NODE,
8445 			    un->un_node_type, NULL);
8446 		}
8447 	}
8448 
8449 	/*
8450 	 * Read and initialize the devid for the unit.
8451 	 */
8452 	ASSERT(un->un_errstats != NULL);
8453 	if (!ISREMOVABLE(un)) {
8454 		sd_register_devid(un, devi, reservation_flag);
8455 	}
8456 	mutex_exit(SD_MUTEX(un));
8457 
8458 #if (defined(__fibre))
8459 	/*
8460 	 * Register callbacks for fibre only.  You can't do this soley
8461 	 * on the basis of the devid_type because this is hba specific.
8462 	 * We need to query our hba capabilities to find out whether to
8463 	 * register or not.
8464 	 */
8465 	if (un->un_f_is_fibre) {
8466 	    if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
8467 		sd_init_event_callbacks(un);
8468 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8469 		    "sd_unit_attach: un:0x%p event callbacks inserted", un);
8470 	    }
8471 	}
8472 #endif
8473 
8474 	if (un->un_f_opt_disable_cache == TRUE) {
8475 		if (sd_disable_caching(un) != 0) {
8476 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8477 			    "sd_unit_attach: un:0x%p Could not disable "
8478 			    "caching", un);
8479 			goto devid_failed;
8480 		}
8481 	}
8482 
8483 	/*
8484 	 * NOTE: Since there is currently no mechanism to
8485 	 * change the state of the Write Cache Enable mode select,
8486 	 * this code just checks the value of the WCE bit
8487 	 * at device attach time.  If a mechanism
8488 	 * is added to the driver to change WCE, un_f_write_cache_enabled
8489 	 * must be updated appropriately.
8490 	 */
8491 	(void) sd_get_write_cache_enabled(un, &wc_enabled);
8492 	mutex_enter(SD_MUTEX(un));
8493 	un->un_f_write_cache_enabled = (wc_enabled != 0);
8494 	mutex_exit(SD_MUTEX(un));
8495 
8496 	/*
8497 	 * Set the pstat and error stat values here, so data obtained during the
8498 	 * previous attach-time routines is available.
8499 	 *
8500 	 * Note: This is a critical sequence that needs to be maintained:
8501 	 *	1) Instantiate the kstats before any routines using the iopath
8502 	 *	   (i.e. sd_send_scsi_cmd).
8503 	 *	2) Initialize the error stats (sd_set_errstats) and partition
8504 	 *	   stats (sd_set_pstats)here, following sd_validate_geometry(),
8505 	 *	   sd_register_devid(), and sd_disable_caching().
8506 	 */
8507 	if (!ISREMOVABLE(un) && (un->un_f_pkstats_enabled == TRUE)) {
8508 		sd_set_pstats(un);
8509 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8510 		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
8511 	}
8512 
8513 	sd_set_errstats(un);
8514 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8515 	    "sd_unit_attach: un:0x%p errstats set\n", un);
8516 
8517 	/*
8518 	 * Find out what type of reservation this disk supports.
8519 	 */
8520 	switch (sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS, 0, NULL)) {
8521 	case 0:
8522 		/*
8523 		 * SCSI-3 reservations are supported.
8524 		 */
8525 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8526 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8527 		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
8528 		break;
8529 	case ENOTSUP:
8530 		/*
8531 		 * The PERSISTENT RESERVE IN command would not be recognized by
8532 		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
8533 		 */
8534 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8535 		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
8536 		un->un_reservation_type = SD_SCSI2_RESERVATION;
8537 		break;
8538 	default:
8539 		/*
8540 		 * default to SCSI-3 reservations
8541 		 */
8542 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8543 		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
8544 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8545 		break;
8546 	}
8547 
8548 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8549 	    "sd_unit_attach: un:0x%p exit success\n", un);
8550 
8551 	return (DDI_SUCCESS);
8552 
8553 	/*
8554 	 * An error occurred during the attach; clean up & return failure.
8555 	 */
8556 
8557 devid_failed:
8558 
8559 setup_pm_failed:
8560 	ddi_remove_minor_node(devi, NULL);
8561 
8562 create_minor_nodes_failed:
8563 	/*
8564 	 * Cleanup from the scsi_ifsetcap() calls (437868)
8565 	 */
8566 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8567 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8568 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8569 
8570 	if (un->un_f_is_fibre == FALSE) {
8571 	    (void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8572 	}
8573 
8574 spinup_failed:
8575 
8576 	mutex_enter(SD_MUTEX(un));
8577 
8578 	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
8579 	if (un->un_direct_priority_timeid != NULL) {
8580 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8581 		un->un_direct_priority_timeid = NULL;
8582 		mutex_exit(SD_MUTEX(un));
8583 		(void) untimeout(temp_id);
8584 		mutex_enter(SD_MUTEX(un));
8585 	}
8586 
8587 	/* Cancel any pending start/stop timeouts */
8588 	if (un->un_startstop_timeid != NULL) {
8589 		timeout_id_t temp_id = un->un_startstop_timeid;
8590 		un->un_startstop_timeid = NULL;
8591 		mutex_exit(SD_MUTEX(un));
8592 		(void) untimeout(temp_id);
8593 		mutex_enter(SD_MUTEX(un));
8594 	}
8595 
8596 	/* Cancel any pending reset-throttle timeouts */
8597 	if (un->un_reset_throttle_timeid != NULL) {
8598 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8599 		un->un_reset_throttle_timeid = NULL;
8600 		mutex_exit(SD_MUTEX(un));
8601 		(void) untimeout(temp_id);
8602 		mutex_enter(SD_MUTEX(un));
8603 	}
8604 
8605 	/* Cancel any pending retry timeouts */
8606 	if (un->un_retry_timeid != NULL) {
8607 		timeout_id_t temp_id = un->un_retry_timeid;
8608 		un->un_retry_timeid = NULL;
8609 		mutex_exit(SD_MUTEX(un));
8610 		(void) untimeout(temp_id);
8611 		mutex_enter(SD_MUTEX(un));
8612 	}
8613 
8614 	/* Cancel any pending delayed cv broadcast timeouts */
8615 	if (un->un_dcvb_timeid != NULL) {
8616 		timeout_id_t temp_id = un->un_dcvb_timeid;
8617 		un->un_dcvb_timeid = NULL;
8618 		mutex_exit(SD_MUTEX(un));
8619 		(void) untimeout(temp_id);
8620 		mutex_enter(SD_MUTEX(un));
8621 	}
8622 
8623 	mutex_exit(SD_MUTEX(un));
8624 
8625 	/* There should not be any in-progress I/O so ASSERT this check */
8626 	ASSERT(un->un_ncmds_in_transport == 0);
8627 	ASSERT(un->un_ncmds_in_driver == 0);
8628 
8629 	/* Do not free the softstate if the callback routine is active */
8630 	sd_sync_with_callback(un);
8631 
8632 	/*
8633 	 * Partition stats apparently are not used with removables. These would
8634 	 * not have been created during attach, so no need to clean them up...
8635 	 */
8636 	if (un->un_stats != NULL) {
8637 		kstat_delete(un->un_stats);
8638 		un->un_stats = NULL;
8639 	}
8640 	if (un->un_errstats != NULL) {
8641 		kstat_delete(un->un_errstats);
8642 		un->un_errstats = NULL;
8643 	}
8644 
8645 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
8646 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
8647 
8648 	ddi_prop_remove_all(devi);
8649 	sema_destroy(&un->un_semoclose);
8650 	cv_destroy(&un->un_state_cv);
8651 
8652 getrbuf_failed:
8653 
8654 	sd_free_rqs(un);
8655 
8656 alloc_rqs_failed:
8657 
8658 	devp->sd_private = NULL;
8659 	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
8660 
8661 get_softstate_failed:
8662 	/*
8663 	 * Note: the man pages are unclear as to whether or not doing a
8664 	 * ddi_soft_state_free(sd_state, instance) is the right way to
8665 	 * clean up after the ddi_soft_state_zalloc() if the subsequent
8666 	 * ddi_get_soft_state() fails.  The implication seems to be
8667 	 * that the get_soft_state cannot fail if the zalloc succeeds.
8668 	 */
8669 	ddi_soft_state_free(sd_state, instance);
8670 
8671 probe_failed:
8672 	scsi_unprobe(devp);
8673 #ifdef SDDEBUG
8674 	if ((sd_component_mask & SD_LOG_ATTACH_DETACH) &&
8675 	    (sd_level_mask & SD_LOGMASK_TRACE)) {
8676 		cmn_err(CE_CONT, "sd_unit_attach: un:0x%p exit failure\n",
8677 		    (void *)un);
8678 	}
8679 #endif
8680 	return (DDI_FAILURE);
8681 }
8682 
8683 
8684 /*
8685  *    Function: sd_unit_detach
8686  *
8687  * Description: Performs DDI_DETACH processing for sddetach().
8688  *
8689  * Return Code: DDI_SUCCESS
8690  *		DDI_FAILURE
8691  *
8692  *     Context: Kernel thread context
8693  */
8694 
8695 static int
8696 sd_unit_detach(dev_info_t *devi)
8697 {
8698 	struct scsi_device	*devp;
8699 	struct sd_lun		*un;
8700 	int			i;
8701 	dev_t			dev;
8702 #if !(defined(__i386) || defined(__amd64)) && !defined(__fibre)
8703 	int			reset_retval;
8704 #endif
8705 	int			instance = ddi_get_instance(devi);
8706 
8707 	mutex_enter(&sd_detach_mutex);
8708 
8709 	/*
8710 	 * Fail the detach for any of the following:
8711 	 *  - Unable to get the sd_lun struct for the instance
8712 	 *  - A layered driver has an outstanding open on the instance
8713 	 *  - Another thread is already detaching this instance
8714 	 *  - Another thread is currently performing an open
8715 	 */
8716 	devp = ddi_get_driver_private(devi);
8717 	if ((devp == NULL) ||
8718 	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
8719 	    (un->un_ncmds_in_driver != 0) || (un->un_layer_count != 0) ||
8720 	    (un->un_detach_count != 0) || (un->un_opens_in_progress != 0)) {
8721 		mutex_exit(&sd_detach_mutex);
8722 		return (DDI_FAILURE);
8723 	}
8724 
8725 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
8726 
8727 	/*
8728 	 * Mark this instance as currently in a detach, to inhibit any
8729 	 * opens from a layered driver.
8730 	 */
8731 	un->un_detach_count++;
8732 	mutex_exit(&sd_detach_mutex);
8733 
8734 	dev = sd_make_device(SD_DEVINFO(un));
8735 
8736 	_NOTE(COMPETING_THREADS_NOW);
8737 
8738 	mutex_enter(SD_MUTEX(un));
8739 
8740 	/*
8741 	 * Fail the detach if there are any outstanding layered
8742 	 * opens on this device.
8743 	 */
8744 	for (i = 0; i < NDKMAP; i++) {
8745 		if (un->un_ocmap.lyropen[i] != 0) {
8746 			goto err_notclosed;
8747 		}
8748 	}
8749 
8750 	/*
8751 	 * Verify there are NO outstanding commands issued to this device.
8752 	 * ie, un_ncmds_in_transport == 0.
8753 	 * It's possible to have outstanding commands through the physio
8754 	 * code path, even though everything's closed.
8755 	 */
8756 	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
8757 	    (un->un_direct_priority_timeid != NULL) ||
8758 	    (un->un_state == SD_STATE_RWAIT)) {
8759 		mutex_exit(SD_MUTEX(un));
8760 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8761 		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
8762 		goto err_stillbusy;
8763 	}
8764 
8765 	/*
8766 	 * If we have the device reserved, release the reservation.
8767 	 */
8768 	if ((un->un_resvd_status & SD_RESERVE) &&
8769 	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
8770 		mutex_exit(SD_MUTEX(un));
8771 		/*
8772 		 * Note: sd_reserve_release sends a command to the device
8773 		 * via the sd_ioctlcmd() path, and can sleep.
8774 		 */
8775 		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
8776 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8777 			    "sd_dr_detach: Cannot release reservation \n");
8778 		}
8779 	} else {
8780 		mutex_exit(SD_MUTEX(un));
8781 	}
8782 
8783 	/*
8784 	 * Untimeout any reserve recover, throttle reset, restart unit
8785 	 * and delayed broadcast timeout threads. Protect the timeout pointer
8786 	 * from getting nulled by their callback functions.
8787 	 */
8788 	mutex_enter(SD_MUTEX(un));
8789 	if (un->un_resvd_timeid != NULL) {
8790 		timeout_id_t temp_id = un->un_resvd_timeid;
8791 		un->un_resvd_timeid = NULL;
8792 		mutex_exit(SD_MUTEX(un));
8793 		(void) untimeout(temp_id);
8794 		mutex_enter(SD_MUTEX(un));
8795 	}
8796 
8797 	if (un->un_reset_throttle_timeid != NULL) {
8798 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8799 		un->un_reset_throttle_timeid = NULL;
8800 		mutex_exit(SD_MUTEX(un));
8801 		(void) untimeout(temp_id);
8802 		mutex_enter(SD_MUTEX(un));
8803 	}
8804 
8805 	if (un->un_startstop_timeid != NULL) {
8806 		timeout_id_t temp_id = un->un_startstop_timeid;
8807 		un->un_startstop_timeid = NULL;
8808 		mutex_exit(SD_MUTEX(un));
8809 		(void) untimeout(temp_id);
8810 		mutex_enter(SD_MUTEX(un));
8811 	}
8812 
8813 	if (un->un_dcvb_timeid != NULL) {
8814 		timeout_id_t temp_id = un->un_dcvb_timeid;
8815 		un->un_dcvb_timeid = NULL;
8816 		mutex_exit(SD_MUTEX(un));
8817 		(void) untimeout(temp_id);
8818 	} else {
8819 		mutex_exit(SD_MUTEX(un));
8820 	}
8821 
8822 	/* Remove any pending reservation reclaim requests for this device */
8823 	sd_rmv_resv_reclaim_req(dev);
8824 
8825 	mutex_enter(SD_MUTEX(un));
8826 
8827 	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
8828 	if (un->un_direct_priority_timeid != NULL) {
8829 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8830 		un->un_direct_priority_timeid = NULL;
8831 		mutex_exit(SD_MUTEX(un));
8832 		(void) untimeout(temp_id);
8833 		mutex_enter(SD_MUTEX(un));
8834 	}
8835 
8836 	/* Cancel any active multi-host disk watch thread requests */
8837 	if (un->un_mhd_token != NULL) {
8838 		mutex_exit(SD_MUTEX(un));
8839 		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
8840 		if (scsi_watch_request_terminate(un->un_mhd_token,
8841 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8842 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8843 			    "sd_dr_detach: Cannot cancel mhd watch request\n");
8844 			/*
8845 			 * Note: We are returning here after having removed
8846 			 * some driver timeouts above. This is consistent with
8847 			 * the legacy implementation but perhaps the watch
8848 			 * terminate call should be made with the wait flag set.
8849 			 */
8850 			goto err_stillbusy;
8851 		}
8852 		mutex_enter(SD_MUTEX(un));
8853 		un->un_mhd_token = NULL;
8854 	}
8855 
8856 	if (un->un_swr_token != NULL) {
8857 		mutex_exit(SD_MUTEX(un));
8858 		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
8859 		if (scsi_watch_request_terminate(un->un_swr_token,
8860 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8861 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8862 			    "sd_dr_detach: Cannot cancel swr watch request\n");
8863 			/*
8864 			 * Note: We are returning here after having removed
8865 			 * some driver timeouts above. This is consistent with
8866 			 * the legacy implementation but perhaps the watch
8867 			 * terminate call should be made with the wait flag set.
8868 			 */
8869 			goto err_stillbusy;
8870 		}
8871 		mutex_enter(SD_MUTEX(un));
8872 		un->un_swr_token = NULL;
8873 	}
8874 
8875 	mutex_exit(SD_MUTEX(un));
8876 
8877 	/*
8878 	 * Clear any scsi_reset_notifies. We clear the reset notifies
8879 	 * if we have not registered one.
8880 	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
8881 	 */
8882 	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
8883 	    sd_mhd_reset_notify_cb, (caddr_t)un);
8884 
8885 
8886 
8887 #if defined(__i386) || defined(__amd64)
8888 	/*
8889 	 * Gratuitous bus resets sometimes cause an otherwise
8890 	 * okay ATA/ATAPI bus to hang. This is due the lack of
8891 	 * a clear spec of how resets should be implemented by ATA
8892 	 * disk drives.
8893 	 */
8894 #elif !defined(__fibre)		/* "#else if" does NOT work! */
8895 	/*
8896 	 * Reset target/bus.
8897 	 *
8898 	 * Note: This is a legacy workaround for Elite III dual-port drives that
8899 	 * will not come online after an aborted detach and subsequent re-attach
8900 	 * It should be removed when the Elite III FW is fixed, or the drives
8901 	 * are no longer supported.
8902 	 */
8903 	if (un->un_f_cfg_is_atapi == FALSE) {
8904 		reset_retval = 0;
8905 
8906 		/* If the device is in low power mode don't reset it */
8907 
8908 		mutex_enter(&un->un_pm_mutex);
8909 		if (!SD_DEVICE_IS_IN_LOW_POWER(un)) {
8910 			/*
8911 			 * First try a LUN reset if we can, then move on to a
8912 			 * target reset if needed; swat the bus as a last
8913 			 * resort.
8914 			 */
8915 			mutex_exit(&un->un_pm_mutex);
8916 			if (un->un_f_allow_bus_device_reset == TRUE) {
8917 				if (un->un_f_lun_reset_enabled == TRUE) {
8918 					reset_retval =
8919 					    scsi_reset(SD_ADDRESS(un),
8920 					    RESET_LUN);
8921 				}
8922 				if (reset_retval == 0) {
8923 					reset_retval =
8924 					    scsi_reset(SD_ADDRESS(un),
8925 					    RESET_TARGET);
8926 				}
8927 			}
8928 			if (reset_retval == 0) {
8929 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
8930 			}
8931 		} else {
8932 			mutex_exit(&un->un_pm_mutex);
8933 		}
8934 	}
8935 #endif
8936 
8937 	/*
8938 	 * protect the timeout pointers from getting nulled by
8939 	 * their callback functions during the cancellation process.
8940 	 * In such a scenario untimeout can be invoked with a null value.
8941 	 */
8942 	_NOTE(NO_COMPETING_THREADS_NOW);
8943 
8944 	mutex_enter(&un->un_pm_mutex);
8945 	if (un->un_pm_idle_timeid != NULL) {
8946 		timeout_id_t temp_id = un->un_pm_idle_timeid;
8947 		un->un_pm_idle_timeid = NULL;
8948 		mutex_exit(&un->un_pm_mutex);
8949 
8950 		/*
8951 		 * Timeout is active; cancel it.
8952 		 * Note that it'll never be active on a device
8953 		 * that does not support PM therefore we don't
8954 		 * have to check before calling pm_idle_component.
8955 		 */
8956 		(void) untimeout(temp_id);
8957 		(void) pm_idle_component(SD_DEVINFO(un), 0);
8958 		mutex_enter(&un->un_pm_mutex);
8959 	}
8960 
8961 	/*
8962 	 * Check whether there is already a timeout scheduled for power
8963 	 * management. If yes then don't lower the power here, that's.
8964 	 * the timeout handler's job.
8965 	 */
8966 	if (un->un_pm_timeid != NULL) {
8967 		timeout_id_t temp_id = un->un_pm_timeid;
8968 		un->un_pm_timeid = NULL;
8969 		mutex_exit(&un->un_pm_mutex);
8970 		/*
8971 		 * Timeout is active; cancel it.
8972 		 * Note that it'll never be active on a device
8973 		 * that does not support PM therefore we don't
8974 		 * have to check before calling pm_idle_component.
8975 		 */
8976 		(void) untimeout(temp_id);
8977 		(void) pm_idle_component(SD_DEVINFO(un), 0);
8978 
8979 	} else {
8980 		mutex_exit(&un->un_pm_mutex);
8981 		if ((un->un_f_pm_is_enabled == TRUE) &&
8982 		    (pm_lower_power(SD_DEVINFO(un), 0, SD_SPINDLE_OFF) !=
8983 		    DDI_SUCCESS)) {
8984 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8985 		    "sd_dr_detach: Lower power request failed, ignoring.\n");
8986 			/*
8987 			 * Fix for bug: 4297749, item # 13
8988 			 * The above test now includes a check to see if PM is
8989 			 * supported by this device before call
8990 			 * pm_lower_power().
8991 			 * Note, the following is not dead code. The call to
8992 			 * pm_lower_power above will generate a call back into
8993 			 * our sdpower routine which might result in a timeout
8994 			 * handler getting activated. Therefore the following
8995 			 * code is valid and necessary.
8996 			 */
8997 			mutex_enter(&un->un_pm_mutex);
8998 			if (un->un_pm_timeid != NULL) {
8999 				timeout_id_t temp_id = un->un_pm_timeid;
9000 				un->un_pm_timeid = NULL;
9001 				mutex_exit(&un->un_pm_mutex);
9002 				(void) untimeout(temp_id);
9003 				(void) pm_idle_component(SD_DEVINFO(un), 0);
9004 			} else {
9005 				mutex_exit(&un->un_pm_mutex);
9006 			}
9007 		}
9008 	}
9009 
9010 	/*
9011 	 * Cleanup from the scsi_ifsetcap() calls (437868)
9012 	 * Relocated here from above to be after the call to
9013 	 * pm_lower_power, which was getting errors.
9014 	 */
9015 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
9016 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
9017 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
9018 
9019 	if (un->un_f_is_fibre == FALSE) {
9020 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
9021 	}
9022 
9023 	/*
9024 	 * Remove any event callbacks, fibre only
9025 	 */
9026 	if (un->un_f_is_fibre == TRUE) {
9027 		if ((un->un_insert_event != NULL) &&
9028 			(ddi_remove_event_handler(un->un_insert_cb_id) !=
9029 				DDI_SUCCESS)) {
9030 			/*
9031 			 * Note: We are returning here after having done
9032 			 * substantial cleanup above. This is consistent
9033 			 * with the legacy implementation but this may not
9034 			 * be the right thing to do.
9035 			 */
9036 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9037 				"sd_dr_detach: Cannot cancel insert event\n");
9038 			goto err_remove_event;
9039 		}
9040 		un->un_insert_event = NULL;
9041 
9042 		if ((un->un_remove_event != NULL) &&
9043 			(ddi_remove_event_handler(un->un_remove_cb_id) !=
9044 				DDI_SUCCESS)) {
9045 			/*
9046 			 * Note: We are returning here after having done
9047 			 * substantial cleanup above. This is consistent
9048 			 * with the legacy implementation but this may not
9049 			 * be the right thing to do.
9050 			 */
9051 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9052 				"sd_dr_detach: Cannot cancel remove event\n");
9053 			goto err_remove_event;
9054 		}
9055 		un->un_remove_event = NULL;
9056 	}
9057 
9058 	/* Do not free the softstate if the callback routine is active */
9059 	sd_sync_with_callback(un);
9060 
9061 	/*
9062 	 * Hold the detach mutex here, to make sure that no other threads ever
9063 	 * can access a (partially) freed soft state structure.
9064 	 */
9065 	mutex_enter(&sd_detach_mutex);
9066 
9067 	/*
9068 	 * Clean up the soft state struct.
9069 	 * Cleanup is done in reverse order of allocs/inits.
9070 	 * At this point there should be no competing threads anymore.
9071 	 */
9072 
9073 	/* Unregister and free device id. */
9074 	ddi_devid_unregister(devi);
9075 	if (un->un_devid) {
9076 		ddi_devid_free(un->un_devid);
9077 		un->un_devid = NULL;
9078 	}
9079 
9080 	/*
9081 	 * Destroy wmap cache if it exists.
9082 	 */
9083 	if (un->un_wm_cache != NULL) {
9084 		kmem_cache_destroy(un->un_wm_cache);
9085 		un->un_wm_cache = NULL;
9086 	}
9087 
9088 	/* Remove minor nodes */
9089 	ddi_remove_minor_node(devi, NULL);
9090 
9091 	/*
9092 	 * kstat cleanup is done in detach for all device types (4363169).
9093 	 * We do not want to fail detach if the device kstats are not deleted
9094 	 * since there is a confusion about the devo_refcnt for the device.
9095 	 * We just delete the kstats and let detach complete successfully.
9096 	 */
9097 	if (un->un_stats != NULL) {
9098 		kstat_delete(un->un_stats);
9099 		un->un_stats = NULL;
9100 	}
9101 	if (un->un_errstats != NULL) {
9102 		kstat_delete(un->un_errstats);
9103 		un->un_errstats = NULL;
9104 	}
9105 
9106 	/* Remove partition stats (not created for removables) */
9107 	if (!ISREMOVABLE(un)) {
9108 		for (i = 0; i < NSDMAP; i++) {
9109 			if (un->un_pstats[i] != NULL) {
9110 				kstat_delete(un->un_pstats[i]);
9111 				un->un_pstats[i] = NULL;
9112 			}
9113 		}
9114 	}
9115 
9116 	/* Remove xbuf registration */
9117 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
9118 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
9119 
9120 	/* Remove driver properties */
9121 	ddi_prop_remove_all(devi);
9122 
9123 	mutex_destroy(&un->un_pm_mutex);
9124 	cv_destroy(&un->un_pm_busy_cv);
9125 
9126 	/* Open/close semaphore */
9127 	sema_destroy(&un->un_semoclose);
9128 
9129 	/* Removable media condvar. */
9130 	cv_destroy(&un->un_state_cv);
9131 
9132 	/* Suspend/resume condvar. */
9133 	cv_destroy(&un->un_suspend_cv);
9134 	cv_destroy(&un->un_disk_busy_cv);
9135 
9136 	sd_free_rqs(un);
9137 
9138 	/* Free up soft state */
9139 	devp->sd_private = NULL;
9140 	bzero(un, sizeof (struct sd_lun));
9141 	ddi_soft_state_free(sd_state, instance);
9142 
9143 	mutex_exit(&sd_detach_mutex);
9144 
9145 	/* This frees up the INQUIRY data associated with the device. */
9146 	scsi_unprobe(devp);
9147 
9148 	return (DDI_SUCCESS);
9149 
9150 err_notclosed:
9151 	mutex_exit(SD_MUTEX(un));
9152 
9153 err_stillbusy:
9154 	_NOTE(NO_COMPETING_THREADS_NOW);
9155 
9156 err_remove_event:
9157 	mutex_enter(&sd_detach_mutex);
9158 	un->un_detach_count--;
9159 	mutex_exit(&sd_detach_mutex);
9160 
9161 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
9162 	return (DDI_FAILURE);
9163 }
9164 
9165 
9166 /*
9167  * Driver minor node structure and data table
9168  */
9169 struct driver_minor_data {
9170 	char	*name;
9171 	minor_t	minor;
9172 	int	type;
9173 };
9174 
9175 static struct driver_minor_data sd_minor_data[] = {
9176 	{"a", 0, S_IFBLK},
9177 	{"b", 1, S_IFBLK},
9178 	{"c", 2, S_IFBLK},
9179 	{"d", 3, S_IFBLK},
9180 	{"e", 4, S_IFBLK},
9181 	{"f", 5, S_IFBLK},
9182 	{"g", 6, S_IFBLK},
9183 	{"h", 7, S_IFBLK},
9184 #if defined(_SUNOS_VTOC_16)
9185 	{"i", 8, S_IFBLK},
9186 	{"j", 9, S_IFBLK},
9187 	{"k", 10, S_IFBLK},
9188 	{"l", 11, S_IFBLK},
9189 	{"m", 12, S_IFBLK},
9190 	{"n", 13, S_IFBLK},
9191 	{"o", 14, S_IFBLK},
9192 	{"p", 15, S_IFBLK},
9193 #endif			/* defined(_SUNOS_VTOC_16) */
9194 #if defined(_FIRMWARE_NEEDS_FDISK)
9195 	{"q", 16, S_IFBLK},
9196 	{"r", 17, S_IFBLK},
9197 	{"s", 18, S_IFBLK},
9198 	{"t", 19, S_IFBLK},
9199 	{"u", 20, S_IFBLK},
9200 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9201 	{"a,raw", 0, S_IFCHR},
9202 	{"b,raw", 1, S_IFCHR},
9203 	{"c,raw", 2, S_IFCHR},
9204 	{"d,raw", 3, S_IFCHR},
9205 	{"e,raw", 4, S_IFCHR},
9206 	{"f,raw", 5, S_IFCHR},
9207 	{"g,raw", 6, S_IFCHR},
9208 	{"h,raw", 7, S_IFCHR},
9209 #if defined(_SUNOS_VTOC_16)
9210 	{"i,raw", 8, S_IFCHR},
9211 	{"j,raw", 9, S_IFCHR},
9212 	{"k,raw", 10, S_IFCHR},
9213 	{"l,raw", 11, S_IFCHR},
9214 	{"m,raw", 12, S_IFCHR},
9215 	{"n,raw", 13, S_IFCHR},
9216 	{"o,raw", 14, S_IFCHR},
9217 	{"p,raw", 15, S_IFCHR},
9218 #endif			/* defined(_SUNOS_VTOC_16) */
9219 #if defined(_FIRMWARE_NEEDS_FDISK)
9220 	{"q,raw", 16, S_IFCHR},
9221 	{"r,raw", 17, S_IFCHR},
9222 	{"s,raw", 18, S_IFCHR},
9223 	{"t,raw", 19, S_IFCHR},
9224 	{"u,raw", 20, S_IFCHR},
9225 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9226 	{0}
9227 };
9228 
9229 static struct driver_minor_data sd_minor_data_efi[] = {
9230 	{"a", 0, S_IFBLK},
9231 	{"b", 1, S_IFBLK},
9232 	{"c", 2, S_IFBLK},
9233 	{"d", 3, S_IFBLK},
9234 	{"e", 4, S_IFBLK},
9235 	{"f", 5, S_IFBLK},
9236 	{"g", 6, S_IFBLK},
9237 	{"wd", 7, S_IFBLK},
9238 #if defined(_FIRMWARE_NEEDS_FDISK)
9239 	{"q", 16, S_IFBLK},
9240 	{"r", 17, S_IFBLK},
9241 	{"s", 18, S_IFBLK},
9242 	{"t", 19, S_IFBLK},
9243 	{"u", 20, S_IFBLK},
9244 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9245 	{"a,raw", 0, S_IFCHR},
9246 	{"b,raw", 1, S_IFCHR},
9247 	{"c,raw", 2, S_IFCHR},
9248 	{"d,raw", 3, S_IFCHR},
9249 	{"e,raw", 4, S_IFCHR},
9250 	{"f,raw", 5, S_IFCHR},
9251 	{"g,raw", 6, S_IFCHR},
9252 	{"wd,raw", 7, S_IFCHR},
9253 #if defined(_FIRMWARE_NEEDS_FDISK)
9254 	{"q,raw", 16, S_IFCHR},
9255 	{"r,raw", 17, S_IFCHR},
9256 	{"s,raw", 18, S_IFCHR},
9257 	{"t,raw", 19, S_IFCHR},
9258 	{"u,raw", 20, S_IFCHR},
9259 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9260 	{0}
9261 };
9262 
9263 
9264 /*
9265  *    Function: sd_create_minor_nodes
9266  *
9267  * Description: Create the minor device nodes for the instance.
9268  *
9269  *   Arguments: un - driver soft state (unit) structure
9270  *		devi - pointer to device info structure
9271  *
9272  * Return Code: DDI_SUCCESS
9273  *		DDI_FAILURE
9274  *
9275  *     Context: Kernel thread context
9276  */
9277 
9278 static int
9279 sd_create_minor_nodes(struct sd_lun *un, dev_info_t *devi)
9280 {
9281 	struct driver_minor_data	*dmdp;
9282 	struct scsi_device		*devp;
9283 	int				instance;
9284 	char				name[48];
9285 
9286 	ASSERT(un != NULL);
9287 	devp = ddi_get_driver_private(devi);
9288 	instance = ddi_get_instance(devp->sd_dev);
9289 
9290 	/*
9291 	 * Create all the minor nodes for this target.
9292 	 */
9293 	if (un->un_blockcount > DK_MAX_BLOCKS)
9294 		dmdp = sd_minor_data_efi;
9295 	else
9296 		dmdp = sd_minor_data;
9297 	while (dmdp->name != NULL) {
9298 
9299 		(void) sprintf(name, "%s", dmdp->name);
9300 
9301 		if (ddi_create_minor_node(devi, name, dmdp->type,
9302 		    (instance << SDUNIT_SHIFT) | dmdp->minor,
9303 		    un->un_node_type, NULL) == DDI_FAILURE) {
9304 			/*
9305 			 * Clean up any nodes that may have been created, in
9306 			 * case this fails in the middle of the loop.
9307 			 */
9308 			ddi_remove_minor_node(devi, NULL);
9309 			return (DDI_FAILURE);
9310 		}
9311 		dmdp++;
9312 	}
9313 
9314 	return (DDI_SUCCESS);
9315 }
9316 
9317 
9318 /*
9319  *    Function: sd_create_errstats
9320  *
9321  * Description: This routine instantiates the device error stats.
9322  *
9323  *		Note: During attach the stats are instantiated first so they are
9324  *		available for attach-time routines that utilize the driver
9325  *		iopath to send commands to the device. The stats are initialized
9326  *		separately so data obtained during some attach-time routines is
9327  *		available. (4362483)
9328  *
9329  *   Arguments: un - driver soft state (unit) structure
9330  *		instance - driver instance
9331  *
9332  *     Context: Kernel thread context
9333  */
9334 
9335 static void
9336 sd_create_errstats(struct sd_lun *un, int instance)
9337 {
9338 	struct	sd_errstats	*stp;
9339 	char	kstatmodule_err[KSTAT_STRLEN];
9340 	char	kstatname[KSTAT_STRLEN];
9341 	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
9342 
9343 	ASSERT(un != NULL);
9344 
9345 	if (un->un_errstats != NULL) {
9346 		return;
9347 	}
9348 
9349 	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
9350 	    "%serr", sd_label);
9351 	(void) snprintf(kstatname, sizeof (kstatname),
9352 	    "%s%d,err", sd_label, instance);
9353 
9354 	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
9355 	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
9356 
9357 	if (un->un_errstats == NULL) {
9358 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9359 		    "sd_create_errstats: Failed kstat_create\n");
9360 		return;
9361 	}
9362 
9363 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9364 	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
9365 	    KSTAT_DATA_UINT32);
9366 	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
9367 	    KSTAT_DATA_UINT32);
9368 	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
9369 	    KSTAT_DATA_UINT32);
9370 	kstat_named_init(&stp->sd_vid,		"Vendor",
9371 	    KSTAT_DATA_CHAR);
9372 	kstat_named_init(&stp->sd_pid,		"Product",
9373 	    KSTAT_DATA_CHAR);
9374 	kstat_named_init(&stp->sd_revision,	"Revision",
9375 	    KSTAT_DATA_CHAR);
9376 	kstat_named_init(&stp->sd_serial,	"Serial No",
9377 	    KSTAT_DATA_CHAR);
9378 	kstat_named_init(&stp->sd_capacity,	"Size",
9379 	    KSTAT_DATA_ULONGLONG);
9380 	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
9381 	    KSTAT_DATA_UINT32);
9382 	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
9383 	    KSTAT_DATA_UINT32);
9384 	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
9385 	    KSTAT_DATA_UINT32);
9386 	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
9387 	    KSTAT_DATA_UINT32);
9388 	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
9389 	    KSTAT_DATA_UINT32);
9390 	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
9391 	    KSTAT_DATA_UINT32);
9392 
9393 	un->un_errstats->ks_private = un;
9394 	un->un_errstats->ks_update  = nulldev;
9395 
9396 	kstat_install(un->un_errstats);
9397 }
9398 
9399 
9400 /*
9401  *    Function: sd_set_errstats
9402  *
9403  * Description: This routine sets the value of the vendor id, product id,
9404  *		revision, serial number, and capacity device error stats.
9405  *
9406  *		Note: During attach the stats are instantiated first so they are
9407  *		available for attach-time routines that utilize the driver
9408  *		iopath to send commands to the device. The stats are initialized
9409  *		separately so data obtained during some attach-time routines is
9410  *		available. (4362483)
9411  *
9412  *   Arguments: un - driver soft state (unit) structure
9413  *
9414  *     Context: Kernel thread context
9415  */
9416 
9417 static void
9418 sd_set_errstats(struct sd_lun *un)
9419 {
9420 	struct	sd_errstats	*stp;
9421 
9422 	ASSERT(un != NULL);
9423 	ASSERT(un->un_errstats != NULL);
9424 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9425 	ASSERT(stp != NULL);
9426 	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
9427 	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
9428 	(void) strncpy(stp->sd_revision.value.c,
9429 	    un->un_sd->sd_inq->inq_revision, 4);
9430 
9431 	/*
9432 	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
9433 	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
9434 	 * (4376302))
9435 	 */
9436 	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
9437 		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9438 		    sizeof (SD_INQUIRY(un)->inq_serial));
9439 	}
9440 
9441 	if (un->un_f_blockcount_is_valid != TRUE) {
9442 		/*
9443 		 * Set capacity error stat to 0 for no media. This ensures
9444 		 * a valid capacity is displayed in response to 'iostat -E'
9445 		 * when no media is present in the device.
9446 		 */
9447 		stp->sd_capacity.value.ui64 = 0;
9448 	} else {
9449 		/*
9450 		 * Multiply un_blockcount by un->un_sys_blocksize to get
9451 		 * capacity.
9452 		 *
9453 		 * Note: for non-512 blocksize devices "un_blockcount" has been
9454 		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
9455 		 * (un_tgt_blocksize / un->un_sys_blocksize).
9456 		 */
9457 		stp->sd_capacity.value.ui64 = (uint64_t)
9458 		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
9459 	}
9460 }
9461 
9462 
9463 /*
9464  *    Function: sd_set_pstats
9465  *
9466  * Description: This routine instantiates and initializes the partition
9467  *              stats for each partition with more than zero blocks.
9468  *		(4363169)
9469  *
9470  *   Arguments: un - driver soft state (unit) structure
9471  *
9472  *     Context: Kernel thread context
9473  */
9474 
9475 static void
9476 sd_set_pstats(struct sd_lun *un)
9477 {
9478 	char	kstatname[KSTAT_STRLEN];
9479 	int	instance;
9480 	int	i;
9481 
9482 	ASSERT(un != NULL);
9483 
9484 	instance = ddi_get_instance(SD_DEVINFO(un));
9485 
9486 	/* Note:x86: is this a VTOC8/VTOC16 difference? */
9487 	for (i = 0; i < NSDMAP; i++) {
9488 		if ((un->un_pstats[i] == NULL) &&
9489 		    (un->un_map[i].dkl_nblk != 0)) {
9490 			(void) snprintf(kstatname, sizeof (kstatname),
9491 			    "%s%d,%s", sd_label, instance,
9492 			    sd_minor_data[i].name);
9493 			un->un_pstats[i] = kstat_create(sd_label,
9494 			    instance, kstatname, "partition", KSTAT_TYPE_IO,
9495 			    1, KSTAT_FLAG_PERSISTENT);
9496 			if (un->un_pstats[i] != NULL) {
9497 				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
9498 				kstat_install(un->un_pstats[i]);
9499 			}
9500 		}
9501 	}
9502 }
9503 
9504 
9505 #if (defined(__fibre))
9506 /*
9507  *    Function: sd_init_event_callbacks
9508  *
9509  * Description: This routine initializes the insertion and removal event
9510  *		callbacks. (fibre only)
9511  *
9512  *   Arguments: un - driver soft state (unit) structure
9513  *
9514  *     Context: Kernel thread context
9515  */
9516 
9517 static void
9518 sd_init_event_callbacks(struct sd_lun *un)
9519 {
9520 	ASSERT(un != NULL);
9521 
9522 	if ((un->un_insert_event == NULL) &&
9523 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_INSERT_EVENT,
9524 	    &un->un_insert_event) == DDI_SUCCESS)) {
9525 		/*
9526 		 * Add the callback for an insertion event
9527 		 */
9528 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9529 		    un->un_insert_event, sd_event_callback, (void *)un,
9530 		    &(un->un_insert_cb_id));
9531 	}
9532 
9533 	if ((un->un_remove_event == NULL) &&
9534 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_REMOVE_EVENT,
9535 	    &un->un_remove_event) == DDI_SUCCESS)) {
9536 		/*
9537 		 * Add the callback for a removal event
9538 		 */
9539 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9540 		    un->un_remove_event, sd_event_callback, (void *)un,
9541 		    &(un->un_remove_cb_id));
9542 	}
9543 }
9544 
9545 
9546 /*
9547  *    Function: sd_event_callback
9548  *
9549  * Description: This routine handles insert/remove events (photon). The
9550  *		state is changed to OFFLINE which can be used to supress
9551  *		error msgs. (fibre only)
9552  *
9553  *   Arguments: un - driver soft state (unit) structure
9554  *
9555  *     Context: Callout thread context
9556  */
9557 /* ARGSUSED */
9558 static void
9559 sd_event_callback(dev_info_t *dip, ddi_eventcookie_t event, void *arg,
9560     void *bus_impldata)
9561 {
9562 	struct sd_lun *un = (struct sd_lun *)arg;
9563 
9564 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_insert_event));
9565 	if (event == un->un_insert_event) {
9566 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: insert event");
9567 		mutex_enter(SD_MUTEX(un));
9568 		if (un->un_state == SD_STATE_OFFLINE) {
9569 			if (un->un_last_state != SD_STATE_SUSPENDED) {
9570 				un->un_state = un->un_last_state;
9571 			} else {
9572 				/*
9573 				 * We have gone through SUSPEND/RESUME while
9574 				 * we were offline. Restore the last state
9575 				 */
9576 				un->un_state = un->un_save_state;
9577 			}
9578 		}
9579 		mutex_exit(SD_MUTEX(un));
9580 
9581 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_remove_event));
9582 	} else if (event == un->un_remove_event) {
9583 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: remove event");
9584 		mutex_enter(SD_MUTEX(un));
9585 		/*
9586 		 * We need to handle an event callback that occurs during
9587 		 * the suspend operation, since we don't prevent it.
9588 		 */
9589 		if (un->un_state != SD_STATE_OFFLINE) {
9590 			if (un->un_state != SD_STATE_SUSPENDED) {
9591 				New_state(un, SD_STATE_OFFLINE);
9592 			} else {
9593 				un->un_last_state = SD_STATE_OFFLINE;
9594 			}
9595 		}
9596 		mutex_exit(SD_MUTEX(un));
9597 	} else {
9598 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
9599 		    "!Unknown event\n");
9600 	}
9601 
9602 }
9603 #endif
9604 
9605 
9606 /*
9607  *    Function: sd_disable_caching()
9608  *
9609  * Description: This routine is the driver entry point for disabling
9610  *		read and write caching by modifying the WCE (write cache
9611  *		enable) and RCD (read cache disable) bits of mode
9612  *		page 8 (MODEPAGE_CACHING).
9613  *
9614  *   Arguments: un - driver soft state (unit) structure
9615  *
9616  * Return Code: EIO
9617  *		code returned by sd_send_scsi_MODE_SENSE and
9618  *		sd_send_scsi_MODE_SELECT
9619  *
9620  *     Context: Kernel Thread
9621  */
9622 
9623 static int
9624 sd_disable_caching(struct sd_lun *un)
9625 {
9626 	struct mode_caching	*mode_caching_page;
9627 	uchar_t			*header;
9628 	size_t			buflen;
9629 	int			hdrlen;
9630 	int			bd_len;
9631 	int			rval = 0;
9632 
9633 	ASSERT(un != NULL);
9634 
9635 	/*
9636 	 * Do a test unit ready, otherwise a mode sense may not work if this
9637 	 * is the first command sent to the device after boot.
9638 	 */
9639 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
9640 
9641 	if (un->un_f_cfg_is_atapi == TRUE) {
9642 		hdrlen = MODE_HEADER_LENGTH_GRP2;
9643 	} else {
9644 		hdrlen = MODE_HEADER_LENGTH;
9645 	}
9646 
9647 	/*
9648 	 * Allocate memory for the retrieved mode page and its headers.  Set
9649 	 * a pointer to the page itself.
9650 	 */
9651 	buflen = hdrlen + MODE_BLK_DESC_LENGTH + sizeof (struct mode_caching);
9652 	header = kmem_zalloc(buflen, KM_SLEEP);
9653 
9654 	/* Get the information from the device. */
9655 	if (un->un_f_cfg_is_atapi == TRUE) {
9656 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
9657 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9658 	} else {
9659 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
9660 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9661 	}
9662 	if (rval != 0) {
9663 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
9664 		    "sd_disable_caching: Mode Sense Failed\n");
9665 		kmem_free(header, buflen);
9666 		return (rval);
9667 	}
9668 
9669 	/*
9670 	 * Determine size of Block Descriptors in order to locate
9671 	 * the mode page data. ATAPI devices return 0, SCSI devices
9672 	 * should return MODE_BLK_DESC_LENGTH.
9673 	 */
9674 	if (un->un_f_cfg_is_atapi == TRUE) {
9675 		struct mode_header_grp2	*mhp;
9676 		mhp	= (struct mode_header_grp2 *)header;
9677 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
9678 	} else {
9679 		bd_len  = ((struct mode_header *)header)->bdesc_length;
9680 	}
9681 
9682 	if (bd_len > MODE_BLK_DESC_LENGTH) {
9683 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9684 		    "sd_disable_caching: Mode Sense returned invalid "
9685 		    "block descriptor length\n");
9686 		kmem_free(header, buflen);
9687 		return (EIO);
9688 	}
9689 
9690 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
9691 
9692 	/* Check the relevant bits on successful mode sense. */
9693 	if ((mode_caching_page->wce) || !(mode_caching_page->rcd)) {
9694 		/*
9695 		 * Read or write caching is enabled.  Disable both of them.
9696 		 */
9697 		mode_caching_page->wce = 0;
9698 		mode_caching_page->rcd = 1;
9699 
9700 		/* Clear reserved bits before mode select. */
9701 		mode_caching_page->mode_page.ps = 0;
9702 
9703 		/*
9704 		 * Clear out mode header for mode select.
9705 		 * The rest of the retrieved page will be reused.
9706 		 */
9707 		bzero(header, hdrlen);
9708 
9709 		/* Change the cache page to disable all caching. */
9710 		if (un->un_f_cfg_is_atapi == TRUE) {
9711 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, header,
9712 			    buflen, SD_SAVE_PAGE, SD_PATH_DIRECT);
9713 		} else {
9714 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
9715 			    buflen, SD_SAVE_PAGE, SD_PATH_DIRECT);
9716 		}
9717 	}
9718 
9719 	kmem_free(header, buflen);
9720 	return (rval);
9721 }
9722 
9723 
9724 /*
9725  *    Function: sd_get_write_cache_enabled()
9726  *
9727  * Description: This routine is the driver entry point for determining if
9728  *		write caching is enabled.  It examines the WCE (write cache
9729  *		enable) bits of mode page 8 (MODEPAGE_CACHING).
9730  *
9731  *   Arguments: un - driver soft state (unit) structure
9732  *   		is_enabled - pointer to int where write cache enabled state
9733  *   			is returned (non-zero -> write cache enabled)
9734  *
9735  *
9736  * Return Code: EIO
9737  *		code returned by sd_send_scsi_MODE_SENSE
9738  *
9739  *     Context: Kernel Thread
9740  *
9741  * NOTE: If ioctl is added to disable write cache, this sequence should
9742  * be followed so that no locking is required for accesses to
9743  * un->un_f_write_cache_enabled:
9744  * 	do mode select to clear wce
9745  * 	do synchronize cache to flush cache
9746  * 	set un->un_f_write_cache_enabled = FALSE
9747  *
9748  * Conversely, an ioctl to enable the write cache should be done
9749  * in this order:
9750  * 	set un->un_f_write_cache_enabled = TRUE
9751  * 	do mode select to set wce
9752  */
9753 
9754 static int
9755 sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled)
9756 {
9757 	struct mode_caching	*mode_caching_page;
9758 	uchar_t			*header;
9759 	size_t			buflen;
9760 	int			hdrlen;
9761 	int			bd_len;
9762 	int			rval = 0;
9763 
9764 	ASSERT(un != NULL);
9765 	ASSERT(is_enabled != NULL);
9766 
9767 	/* in case of error, flag as enabled */
9768 	*is_enabled = TRUE;
9769 
9770 	/*
9771 	 * Do a test unit ready, otherwise a mode sense may not work if this
9772 	 * is the first command sent to the device after boot.
9773 	 */
9774 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
9775 
9776 	if (un->un_f_cfg_is_atapi == TRUE) {
9777 		hdrlen = MODE_HEADER_LENGTH_GRP2;
9778 	} else {
9779 		hdrlen = MODE_HEADER_LENGTH;
9780 	}
9781 
9782 	/*
9783 	 * Allocate memory for the retrieved mode page and its headers.  Set
9784 	 * a pointer to the page itself.
9785 	 */
9786 	buflen = hdrlen + MODE_BLK_DESC_LENGTH + sizeof (struct mode_caching);
9787 	header = kmem_zalloc(buflen, KM_SLEEP);
9788 
9789 	/* Get the information from the device. */
9790 	if (un->un_f_cfg_is_atapi == TRUE) {
9791 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
9792 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9793 	} else {
9794 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
9795 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9796 	}
9797 	if (rval != 0) {
9798 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
9799 		    "sd_get_write_cache_enabled: Mode Sense Failed\n");
9800 		kmem_free(header, buflen);
9801 		return (rval);
9802 	}
9803 
9804 	/*
9805 	 * Determine size of Block Descriptors in order to locate
9806 	 * the mode page data. ATAPI devices return 0, SCSI devices
9807 	 * should return MODE_BLK_DESC_LENGTH.
9808 	 */
9809 	if (un->un_f_cfg_is_atapi == TRUE) {
9810 		struct mode_header_grp2	*mhp;
9811 		mhp	= (struct mode_header_grp2 *)header;
9812 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
9813 	} else {
9814 		bd_len  = ((struct mode_header *)header)->bdesc_length;
9815 	}
9816 
9817 	if (bd_len > MODE_BLK_DESC_LENGTH) {
9818 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9819 		    "sd_get_write_cache_enabled: Mode Sense returned invalid "
9820 		    "block descriptor length\n");
9821 		kmem_free(header, buflen);
9822 		return (EIO);
9823 	}
9824 
9825 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
9826 	*is_enabled = mode_caching_page->wce;
9827 
9828 	kmem_free(header, buflen);
9829 	return (0);
9830 }
9831 
9832 
9833 /*
9834  *    Function: sd_make_device
9835  *
9836  * Description: Utility routine to return the Solaris device number from
9837  *		the data in the device's dev_info structure.
9838  *
9839  * Return Code: The Solaris device number
9840  *
9841  *     Context: Any
9842  */
9843 
9844 static dev_t
9845 sd_make_device(dev_info_t *devi)
9846 {
9847 	return (makedevice(ddi_name_to_major(ddi_get_name(devi)),
9848 	    ddi_get_instance(devi) << SDUNIT_SHIFT));
9849 }
9850 
9851 
9852 /*
9853  *    Function: sd_pm_entry
9854  *
9855  * Description: Called at the start of a new command to manage power
9856  *		and busy status of a device. This includes determining whether
9857  *		the current power state of the device is sufficient for
9858  *		performing the command or whether it must be changed.
9859  *		The PM framework is notified appropriately.
9860  *		Only with a return status of DDI_SUCCESS will the
9861  *		component be busy to the framework.
9862  *
9863  *		All callers of sd_pm_entry must check the return status
9864  *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
9865  *		of DDI_FAILURE indicates the device failed to power up.
9866  *		In this case un_pm_count has been adjusted so the result
9867  *		on exit is still powered down, ie. count is less than 0.
9868  *		Calling sd_pm_exit with this count value hits an ASSERT.
9869  *
9870  * Return Code: DDI_SUCCESS or DDI_FAILURE
9871  *
9872  *     Context: Kernel thread context.
9873  */
9874 
9875 static int
9876 sd_pm_entry(struct sd_lun *un)
9877 {
9878 	int return_status = DDI_SUCCESS;
9879 
9880 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9881 	ASSERT(!mutex_owned(&un->un_pm_mutex));
9882 
9883 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
9884 
9885 	if (un->un_f_pm_is_enabled == FALSE) {
9886 		SD_TRACE(SD_LOG_IO_PM, un,
9887 		    "sd_pm_entry: exiting, PM not enabled\n");
9888 		return (return_status);
9889 	}
9890 
9891 	/*
9892 	 * Just increment a counter if PM is enabled. On the transition from
9893 	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
9894 	 * the count with each IO and mark the device as idle when the count
9895 	 * hits 0.
9896 	 *
9897 	 * If the count is less than 0 the device is powered down. If a powered
9898 	 * down device is successfully powered up then the count must be
9899 	 * incremented to reflect the power up. Note that it'll get incremented
9900 	 * a second time to become busy.
9901 	 *
9902 	 * Because the following has the potential to change the device state
9903 	 * and must release the un_pm_mutex to do so, only one thread can be
9904 	 * allowed through at a time.
9905 	 */
9906 
9907 	mutex_enter(&un->un_pm_mutex);
9908 	while (un->un_pm_busy == TRUE) {
9909 		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
9910 	}
9911 	un->un_pm_busy = TRUE;
9912 
9913 	if (un->un_pm_count < 1) {
9914 
9915 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
9916 
9917 		/*
9918 		 * Indicate we are now busy so the framework won't attempt to
9919 		 * power down the device. This call will only fail if either
9920 		 * we passed a bad component number or the device has no
9921 		 * components. Neither of these should ever happen.
9922 		 */
9923 		mutex_exit(&un->un_pm_mutex);
9924 		return_status = pm_busy_component(SD_DEVINFO(un), 0);
9925 		ASSERT(return_status == DDI_SUCCESS);
9926 
9927 		mutex_enter(&un->un_pm_mutex);
9928 
9929 		if (un->un_pm_count < 0) {
9930 			mutex_exit(&un->un_pm_mutex);
9931 
9932 			SD_TRACE(SD_LOG_IO_PM, un,
9933 			    "sd_pm_entry: power up component\n");
9934 
9935 			/*
9936 			 * pm_raise_power will cause sdpower to be called
9937 			 * which brings the device power level to the
9938 			 * desired state, ON in this case. If successful,
9939 			 * un_pm_count and un_power_level will be updated
9940 			 * appropriately.
9941 			 */
9942 			return_status = pm_raise_power(SD_DEVINFO(un), 0,
9943 			    SD_SPINDLE_ON);
9944 
9945 			mutex_enter(&un->un_pm_mutex);
9946 
9947 			if (return_status != DDI_SUCCESS) {
9948 				/*
9949 				 * Power up failed.
9950 				 * Idle the device and adjust the count
9951 				 * so the result on exit is that we're
9952 				 * still powered down, ie. count is less than 0.
9953 				 */
9954 				SD_TRACE(SD_LOG_IO_PM, un,
9955 				    "sd_pm_entry: power up failed,"
9956 				    " idle the component\n");
9957 
9958 				(void) pm_idle_component(SD_DEVINFO(un), 0);
9959 				un->un_pm_count--;
9960 			} else {
9961 				/*
9962 				 * Device is powered up, verify the
9963 				 * count is non-negative.
9964 				 * This is debug only.
9965 				 */
9966 				ASSERT(un->un_pm_count == 0);
9967 			}
9968 		}
9969 
9970 		if (return_status == DDI_SUCCESS) {
9971 			/*
9972 			 * For performance, now that the device has been tagged
9973 			 * as busy, and it's known to be powered up, update the
9974 			 * chain types to use jump tables that do not include
9975 			 * pm. This significantly lowers the overhead and
9976 			 * therefore improves performance.
9977 			 */
9978 
9979 			mutex_exit(&un->un_pm_mutex);
9980 			mutex_enter(SD_MUTEX(un));
9981 			SD_TRACE(SD_LOG_IO_PM, un,
9982 			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
9983 			    un->un_uscsi_chain_type);
9984 
9985 			if (ISREMOVABLE(un)) {
9986 				un->un_buf_chain_type =
9987 				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
9988 			} else {
9989 				un->un_buf_chain_type =
9990 				    SD_CHAIN_INFO_DISK_NO_PM;
9991 			}
9992 			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
9993 
9994 			SD_TRACE(SD_LOG_IO_PM, un,
9995 			    "             changed  uscsi_chain_type to   %d\n",
9996 			    un->un_uscsi_chain_type);
9997 			mutex_exit(SD_MUTEX(un));
9998 			mutex_enter(&un->un_pm_mutex);
9999 
10000 			if (un->un_pm_idle_timeid == NULL) {
10001 				/* 300 ms. */
10002 				un->un_pm_idle_timeid =
10003 				    timeout(sd_pm_idletimeout_handler, un,
10004 				    (drv_usectohz((clock_t)300000)));
10005 				/*
10006 				 * Include an extra call to busy which keeps the
10007 				 * device busy with-respect-to the PM layer
10008 				 * until the timer fires, at which time it'll
10009 				 * get the extra idle call.
10010 				 */
10011 				(void) pm_busy_component(SD_DEVINFO(un), 0);
10012 			}
10013 		}
10014 	}
10015 	un->un_pm_busy = FALSE;
10016 	/* Next... */
10017 	cv_signal(&un->un_pm_busy_cv);
10018 
10019 	un->un_pm_count++;
10020 
10021 	SD_TRACE(SD_LOG_IO_PM, un,
10022 	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
10023 
10024 	mutex_exit(&un->un_pm_mutex);
10025 
10026 	return (return_status);
10027 }
10028 
10029 
10030 /*
10031  *    Function: sd_pm_exit
10032  *
10033  * Description: Called at the completion of a command to manage busy
10034  *		status for the device. If the device becomes idle the
10035  *		PM framework is notified.
10036  *
10037  *     Context: Kernel thread context
10038  */
10039 
10040 static void
10041 sd_pm_exit(struct sd_lun *un)
10042 {
10043 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10044 	ASSERT(!mutex_owned(&un->un_pm_mutex));
10045 
10046 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
10047 
10048 	/*
10049 	 * After attach the following flag is only read, so don't
10050 	 * take the penalty of acquiring a mutex for it.
10051 	 */
10052 	if (un->un_f_pm_is_enabled == TRUE) {
10053 
10054 		mutex_enter(&un->un_pm_mutex);
10055 		un->un_pm_count--;
10056 
10057 		SD_TRACE(SD_LOG_IO_PM, un,
10058 		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
10059 
10060 		ASSERT(un->un_pm_count >= 0);
10061 		if (un->un_pm_count == 0) {
10062 			mutex_exit(&un->un_pm_mutex);
10063 
10064 			SD_TRACE(SD_LOG_IO_PM, un,
10065 			    "sd_pm_exit: idle component\n");
10066 
10067 			(void) pm_idle_component(SD_DEVINFO(un), 0);
10068 
10069 		} else {
10070 			mutex_exit(&un->un_pm_mutex);
10071 		}
10072 	}
10073 
10074 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
10075 }
10076 
10077 
10078 /*
10079  *    Function: sdopen
10080  *
10081  * Description: Driver's open(9e) entry point function.
10082  *
10083  *   Arguments: dev_i   - pointer to device number
10084  *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
10085  *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10086  *		cred_p  - user credential pointer
10087  *
10088  * Return Code: EINVAL
10089  *		ENXIO
10090  *		EIO
10091  *		EROFS
10092  *		EBUSY
10093  *
10094  *     Context: Kernel thread context
10095  */
10096 /* ARGSUSED */
10097 static int
10098 sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
10099 {
10100 	struct sd_lun	*un;
10101 	int		nodelay;
10102 	int		part;
10103 	uint64_t	partmask;
10104 	int		instance;
10105 	dev_t		dev;
10106 	int		rval = EIO;
10107 
10108 	/* Validate the open type */
10109 	if (otyp >= OTYPCNT) {
10110 		return (EINVAL);
10111 	}
10112 
10113 	dev = *dev_p;
10114 	instance = SDUNIT(dev);
10115 	mutex_enter(&sd_detach_mutex);
10116 
10117 	/*
10118 	 * Fail the open if there is no softstate for the instance, or
10119 	 * if another thread somewhere is trying to detach the instance.
10120 	 */
10121 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
10122 	    (un->un_detach_count != 0)) {
10123 		mutex_exit(&sd_detach_mutex);
10124 		/*
10125 		 * The probe cache only needs to be cleared when open (9e) fails
10126 		 * with ENXIO (4238046).
10127 		 */
10128 		/*
10129 		 * un-conditionally clearing probe cache is ok with
10130 		 * separate sd/ssd binaries
10131 		 * x86 platform can be an issue with both parallel
10132 		 * and fibre in 1 binary
10133 		 */
10134 		sd_scsi_clear_probe_cache();
10135 		return (ENXIO);
10136 	}
10137 
10138 	/*
10139 	 * The un_layer_count is to prevent another thread in specfs from
10140 	 * trying to detach the instance, which can happen when we are
10141 	 * called from a higher-layer driver instead of thru specfs.
10142 	 * This will not be needed when DDI provides a layered driver
10143 	 * interface that allows specfs to know that an instance is in
10144 	 * use by a layered driver & should not be detached.
10145 	 *
10146 	 * Note: the semantics for layered driver opens are exactly one
10147 	 * close for every open.
10148 	 */
10149 	if (otyp == OTYP_LYR) {
10150 		un->un_layer_count++;
10151 	}
10152 
10153 	/*
10154 	 * Keep a count of the current # of opens in progress. This is because
10155 	 * some layered drivers try to call us as a regular open. This can
10156 	 * cause problems that we cannot prevent, however by keeping this count
10157 	 * we can at least keep our open and detach routines from racing against
10158 	 * each other under such conditions.
10159 	 */
10160 	un->un_opens_in_progress++;
10161 	mutex_exit(&sd_detach_mutex);
10162 
10163 	nodelay  = (flag & (FNDELAY | FNONBLOCK));
10164 	part	 = SDPART(dev);
10165 	partmask = 1 << part;
10166 
10167 	/*
10168 	 * We use a semaphore here in order to serialize
10169 	 * open and close requests on the device.
10170 	 */
10171 	sema_p(&un->un_semoclose);
10172 
10173 	mutex_enter(SD_MUTEX(un));
10174 
10175 	/*
10176 	 * All device accesses go thru sdstrategy() where we check
10177 	 * on suspend status but there could be a scsi_poll command,
10178 	 * which bypasses sdstrategy(), so we need to check pm
10179 	 * status.
10180 	 */
10181 
10182 	if (!nodelay) {
10183 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10184 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10185 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10186 		}
10187 
10188 		mutex_exit(SD_MUTEX(un));
10189 		if (sd_pm_entry(un) != DDI_SUCCESS) {
10190 			rval = EIO;
10191 			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
10192 			    "sdopen: sd_pm_entry failed\n");
10193 			goto open_failed_with_pm;
10194 		}
10195 		mutex_enter(SD_MUTEX(un));
10196 	}
10197 
10198 	/* check for previous exclusive open */
10199 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
10200 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10201 	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
10202 	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
10203 
10204 	if (un->un_exclopen & (partmask)) {
10205 		goto excl_open_fail;
10206 	}
10207 
10208 	if (flag & FEXCL) {
10209 		int i;
10210 		if (un->un_ocmap.lyropen[part]) {
10211 			goto excl_open_fail;
10212 		}
10213 		for (i = 0; i < (OTYPCNT - 1); i++) {
10214 			if (un->un_ocmap.regopen[i] & (partmask)) {
10215 				goto excl_open_fail;
10216 			}
10217 		}
10218 	}
10219 
10220 	/*
10221 	 * Check the write permission if this is a removable media device,
10222 	 * NDELAY has not been set, and writable permission is requested.
10223 	 *
10224 	 * Note: If NDELAY was set and this is write-protected media the WRITE
10225 	 * attempt will fail with EIO as part of the I/O processing. This is a
10226 	 * more permissive implementation that allows the open to succeed and
10227 	 * WRITE attempts to fail when appropriate.
10228 	 */
10229 	if (ISREMOVABLE(un)) {
10230 		if ((flag & FWRITE) && (!nodelay)) {
10231 			mutex_exit(SD_MUTEX(un));
10232 			/*
10233 			 * Defer the check for write permission on writable
10234 			 * DVD drive till sdstrategy and will not fail open even
10235 			 * if FWRITE is set as the device can be writable
10236 			 * depending upon the media and the media can change
10237 			 * after the call to open().
10238 			 */
10239 			if (un->un_f_dvdram_writable_device == FALSE) {
10240 				if (ISCD(un) || sr_check_wp(dev)) {
10241 				rval = EROFS;
10242 				mutex_enter(SD_MUTEX(un));
10243 				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10244 				    "write to cd or write protected media\n");
10245 				goto open_fail;
10246 				}
10247 			}
10248 			mutex_enter(SD_MUTEX(un));
10249 		}
10250 	}
10251 
10252 	/*
10253 	 * If opening in NDELAY/NONBLOCK mode, just return.
10254 	 * Check if disk is ready and has a valid geometry later.
10255 	 */
10256 	if (!nodelay) {
10257 		mutex_exit(SD_MUTEX(un));
10258 		rval = sd_ready_and_valid(un);
10259 		mutex_enter(SD_MUTEX(un));
10260 		/*
10261 		 * Fail if device is not ready or if the number of disk
10262 		 * blocks is zero or negative for non CD devices.
10263 		 */
10264 		if ((rval != SD_READY_VALID) ||
10265 		    (!ISCD(un) && un->un_map[part].dkl_nblk <= 0)) {
10266 			if (ISREMOVABLE(un)) {
10267 				rval = ENXIO;
10268 			} else {
10269 				rval = EIO;
10270 			}
10271 			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10272 			    "device not ready or invalid disk block value\n");
10273 			goto open_fail;
10274 		}
10275 #if defined(__i386) || defined(__amd64)
10276 	} else {
10277 		uchar_t *cp;
10278 		/*
10279 		 * x86 requires special nodelay handling, so that p0 is
10280 		 * always defined and accessible.
10281 		 * Invalidate geometry only if device is not already open.
10282 		 */
10283 		cp = &un->un_ocmap.chkd[0];
10284 		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10285 			if (*cp != (uchar_t)0) {
10286 			    break;
10287 			}
10288 			cp++;
10289 		}
10290 		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10291 			un->un_f_geometry_is_valid = FALSE;
10292 		}
10293 
10294 #endif
10295 	}
10296 
10297 	if (otyp == OTYP_LYR) {
10298 		un->un_ocmap.lyropen[part]++;
10299 	} else {
10300 		un->un_ocmap.regopen[otyp] |= partmask;
10301 	}
10302 
10303 	/* Set up open and exclusive open flags */
10304 	if (flag & FEXCL) {
10305 		un->un_exclopen |= (partmask);
10306 	}
10307 
10308 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10309 	    "open of part %d type %d\n", part, otyp);
10310 
10311 	mutex_exit(SD_MUTEX(un));
10312 	if (!nodelay) {
10313 		sd_pm_exit(un);
10314 	}
10315 
10316 	sema_v(&un->un_semoclose);
10317 
10318 	mutex_enter(&sd_detach_mutex);
10319 	un->un_opens_in_progress--;
10320 	mutex_exit(&sd_detach_mutex);
10321 
10322 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
10323 	return (DDI_SUCCESS);
10324 
10325 excl_open_fail:
10326 	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
10327 	rval = EBUSY;
10328 
10329 open_fail:
10330 	mutex_exit(SD_MUTEX(un));
10331 
10332 	/*
10333 	 * On a failed open we must exit the pm management.
10334 	 */
10335 	if (!nodelay) {
10336 		sd_pm_exit(un);
10337 	}
10338 open_failed_with_pm:
10339 	sema_v(&un->un_semoclose);
10340 
10341 	mutex_enter(&sd_detach_mutex);
10342 	un->un_opens_in_progress--;
10343 	if (otyp == OTYP_LYR) {
10344 		un->un_layer_count--;
10345 	}
10346 	mutex_exit(&sd_detach_mutex);
10347 
10348 	return (rval);
10349 }
10350 
10351 
10352 /*
10353  *    Function: sdclose
10354  *
10355  * Description: Driver's close(9e) entry point function.
10356  *
10357  *   Arguments: dev    - device number
10358  *		flag   - file status flag, informational only
10359  *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10360  *		cred_p - user credential pointer
10361  *
10362  * Return Code: ENXIO
10363  *
10364  *     Context: Kernel thread context
10365  */
10366 /* ARGSUSED */
10367 static int
10368 sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
10369 {
10370 	struct sd_lun	*un;
10371 	uchar_t		*cp;
10372 	int		part;
10373 	int		nodelay;
10374 	int		rval = 0;
10375 
10376 	/* Validate the open type */
10377 	if (otyp >= OTYPCNT) {
10378 		return (ENXIO);
10379 	}
10380 
10381 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10382 		return (ENXIO);
10383 	}
10384 
10385 	part = SDPART(dev);
10386 	nodelay = flag & (FNDELAY | FNONBLOCK);
10387 
10388 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10389 	    "sdclose: close of part %d type %d\n", part, otyp);
10390 
10391 	/*
10392 	 * We use a semaphore here in order to serialize
10393 	 * open and close requests on the device.
10394 	 */
10395 	sema_p(&un->un_semoclose);
10396 
10397 	mutex_enter(SD_MUTEX(un));
10398 
10399 	/* Don't proceed if power is being changed. */
10400 	while (un->un_state == SD_STATE_PM_CHANGING) {
10401 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10402 	}
10403 
10404 	if (un->un_exclopen & (1 << part)) {
10405 		un->un_exclopen &= ~(1 << part);
10406 	}
10407 
10408 	/* Update the open partition map */
10409 	if (otyp == OTYP_LYR) {
10410 		un->un_ocmap.lyropen[part] -= 1;
10411 	} else {
10412 		un->un_ocmap.regopen[otyp] &= ~(1 << part);
10413 	}
10414 
10415 	cp = &un->un_ocmap.chkd[0];
10416 	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10417 		if (*cp != NULL) {
10418 			break;
10419 		}
10420 		cp++;
10421 	}
10422 
10423 	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10424 		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
10425 
10426 		/*
10427 		 * We avoid persistance upon the last close, and set
10428 		 * the throttle back to the maximum.
10429 		 */
10430 		un->un_throttle = un->un_saved_throttle;
10431 
10432 		if (un->un_state == SD_STATE_OFFLINE) {
10433 			if (un->un_f_is_fibre == FALSE) {
10434 				scsi_log(SD_DEVINFO(un), sd_label,
10435 					CE_WARN, "offline\n");
10436 			}
10437 			un->un_f_geometry_is_valid = FALSE;
10438 
10439 		} else {
10440 			/*
10441 			 * Flush any outstanding writes in NVRAM cache.
10442 			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
10443 			 * cmd, it may not work for non-Pluto devices.
10444 			 * SYNCHRONIZE CACHE is not required for removables,
10445 			 * except DVD-RAM drives.
10446 			 *
10447 			 * Also note: because SYNCHRONIZE CACHE is currently
10448 			 * the only command issued here that requires the
10449 			 * drive be powered up, only do the power up before
10450 			 * sending the Sync Cache command. If additional
10451 			 * commands are added which require a powered up
10452 			 * drive, the following sequence may have to change.
10453 			 *
10454 			 * And finally, note that parallel SCSI on SPARC
10455 			 * only issues a Sync Cache to DVD-RAM, a newly
10456 			 * supported device.
10457 			 */
10458 #if defined(__i386) || defined(__amd64)
10459 			if (!ISREMOVABLE(un) ||
10460 			    un->un_f_dvdram_writable_device == TRUE) {
10461 #else
10462 			if (un->un_f_dvdram_writable_device == TRUE) {
10463 #endif
10464 				mutex_exit(SD_MUTEX(un));
10465 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10466 					rval =
10467 					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
10468 					    NULL);
10469 					/* ignore error if not supported */
10470 					if (rval == ENOTSUP) {
10471 						rval = 0;
10472 					} else if (rval != 0) {
10473 						rval = EIO;
10474 					}
10475 					sd_pm_exit(un);
10476 				} else {
10477 					rval = EIO;
10478 				}
10479 				mutex_enter(SD_MUTEX(un));
10480 			}
10481 
10482 			/*
10483 			 * For removable media devices, send an ALLOW MEDIA
10484 			 * REMOVAL command, but don't get upset if it fails.
10485 			 * Also invalidate the geometry. We need to raise
10486 			 * the power of the drive before we can call
10487 			 * sd_send_scsi_DOORLOCK()
10488 			 */
10489 			if (ISREMOVABLE(un)) {
10490 				mutex_exit(SD_MUTEX(un));
10491 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10492 					rval = sd_send_scsi_DOORLOCK(un,
10493 					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
10494 
10495 					sd_pm_exit(un);
10496 					if (ISCD(un) && (rval != 0) &&
10497 					    (nodelay != 0)) {
10498 						rval = ENXIO;
10499 					}
10500 				} else {
10501 					rval = EIO;
10502 				}
10503 				mutex_enter(SD_MUTEX(un));
10504 
10505 				sr_ejected(un);
10506 				/*
10507 				 * Destroy the cache (if it exists) which was
10508 				 * allocated for the write maps since this is
10509 				 * the last close for this media.
10510 				 */
10511 				if (un->un_wm_cache) {
10512 					/*
10513 					 * Check if there are pending commands.
10514 					 * and if there are give a warning and
10515 					 * do not destroy the cache.
10516 					 */
10517 					if (un->un_ncmds_in_driver > 0) {
10518 						scsi_log(SD_DEVINFO(un),
10519 						    sd_label, CE_WARN,
10520 						    "Unable to clean up memory "
10521 						    "because of pending I/O\n");
10522 					} else {
10523 						kmem_cache_destroy(
10524 						    un->un_wm_cache);
10525 						un->un_wm_cache = NULL;
10526 					}
10527 				}
10528 			}
10529 		}
10530 	}
10531 
10532 	mutex_exit(SD_MUTEX(un));
10533 	sema_v(&un->un_semoclose);
10534 
10535 	if (otyp == OTYP_LYR) {
10536 		mutex_enter(&sd_detach_mutex);
10537 		/*
10538 		 * The detach routine may run when the layer count
10539 		 * drops to zero.
10540 		 */
10541 		un->un_layer_count--;
10542 		mutex_exit(&sd_detach_mutex);
10543 	}
10544 
10545 	return (rval);
10546 }
10547 
10548 
10549 /*
10550  *    Function: sd_ready_and_valid
10551  *
10552  * Description: Test if device is ready and has a valid geometry.
10553  *
10554  *   Arguments: dev - device number
10555  *		un  - driver soft state (unit) structure
10556  *
10557  * Return Code: SD_READY_VALID		ready and valid label
10558  *		SD_READY_NOT_VALID	ready, geom ops never applicable
10559  *		SD_NOT_READY_VALID	not ready, no label
10560  *
10561  *     Context: Never called at interrupt context.
10562  */
10563 
10564 static int
10565 sd_ready_and_valid(struct sd_lun *un)
10566 {
10567 	struct sd_errstats	*stp;
10568 	uint64_t		capacity;
10569 	uint_t			lbasize;
10570 	int			rval = SD_READY_VALID;
10571 	char			name_str[48];
10572 
10573 	ASSERT(un != NULL);
10574 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10575 
10576 	mutex_enter(SD_MUTEX(un));
10577 	if (ISREMOVABLE(un)) {
10578 		mutex_exit(SD_MUTEX(un));
10579 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
10580 			rval = SD_NOT_READY_VALID;
10581 			mutex_enter(SD_MUTEX(un));
10582 			goto done;
10583 		}
10584 
10585 		mutex_enter(SD_MUTEX(un));
10586 		if ((un->un_f_geometry_is_valid == FALSE) ||
10587 		    (un->un_f_blockcount_is_valid == FALSE) ||
10588 		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
10589 
10590 			/* capacity has to be read every open. */
10591 			mutex_exit(SD_MUTEX(un));
10592 			if (sd_send_scsi_READ_CAPACITY(un, &capacity,
10593 			    &lbasize, SD_PATH_DIRECT) != 0) {
10594 				mutex_enter(SD_MUTEX(un));
10595 				un->un_f_geometry_is_valid = FALSE;
10596 				rval = SD_NOT_READY_VALID;
10597 				goto done;
10598 			} else {
10599 				mutex_enter(SD_MUTEX(un));
10600 				sd_update_block_info(un, lbasize, capacity);
10601 			}
10602 		}
10603 
10604 		/*
10605 		 * If this is a non 512 block device, allocate space for
10606 		 * the wmap cache. This is being done here since every time
10607 		 * a media is changed this routine will be called and the
10608 		 * block size is a function of media rather than device.
10609 		 */
10610 		if (NOT_DEVBSIZE(un)) {
10611 			if (!(un->un_wm_cache)) {
10612 				(void) snprintf(name_str, sizeof (name_str),
10613 				    "%s%d_cache",
10614 				    ddi_driver_name(SD_DEVINFO(un)),
10615 				    ddi_get_instance(SD_DEVINFO(un)));
10616 				un->un_wm_cache = kmem_cache_create(
10617 				    name_str, sizeof (struct sd_w_map),
10618 				    8, sd_wm_cache_constructor,
10619 				    sd_wm_cache_destructor, NULL,
10620 				    (void *)un, NULL, 0);
10621 				if (!(un->un_wm_cache)) {
10622 					rval = ENOMEM;
10623 					goto done;
10624 				}
10625 			}
10626 		}
10627 
10628 		/*
10629 		 * Check if the media in the device is writable or not.
10630 		 */
10631 		if ((un->un_f_geometry_is_valid == FALSE) && ISCD(un)) {
10632 			sd_check_for_writable_cd(un);
10633 		}
10634 
10635 	} else {
10636 		/*
10637 		 * Do a test unit ready to clear any unit attention from non-cd
10638 		 * devices.
10639 		 */
10640 		mutex_exit(SD_MUTEX(un));
10641 		(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
10642 		mutex_enter(SD_MUTEX(un));
10643 	}
10644 
10645 
10646 	if (un->un_state == SD_STATE_NORMAL) {
10647 		/*
10648 		 * If the target is not yet ready here (defined by a TUR
10649 		 * failure), invalidate the geometry and print an 'offline'
10650 		 * message. This is a legacy message, as the state of the
10651 		 * target is not actually changed to SD_STATE_OFFLINE.
10652 		 *
10653 		 * If the TUR fails for EACCES (Reservation Conflict), it
10654 		 * means there actually is nothing wrong with the target that
10655 		 * would require invalidating the geometry, so continue in
10656 		 * that case as if the TUR was successful.
10657 		 */
10658 		int err;
10659 
10660 		mutex_exit(SD_MUTEX(un));
10661 		err = sd_send_scsi_TEST_UNIT_READY(un, 0);
10662 		mutex_enter(SD_MUTEX(un));
10663 
10664 		if ((err != 0) && (err != EACCES)) {
10665 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10666 			    "offline\n");
10667 			un->un_f_geometry_is_valid = FALSE;
10668 			rval = SD_NOT_READY_VALID;
10669 			goto done;
10670 		}
10671 	}
10672 
10673 	if (un->un_f_format_in_progress == FALSE) {
10674 		/*
10675 		 * Note: sd_validate_geometry may return TRUE, but that does
10676 		 * not necessarily mean un_f_geometry_is_valid == TRUE!
10677 		 */
10678 		rval = sd_validate_geometry(un, SD_PATH_DIRECT);
10679 		if (rval == ENOTSUP) {
10680 			if (un->un_f_geometry_is_valid == TRUE)
10681 				rval = 0;
10682 			else {
10683 				rval = SD_READY_NOT_VALID;
10684 				goto done;
10685 			}
10686 		}
10687 		if (rval != 0) {
10688 			/*
10689 			 * We don't check the validity of geometry for
10690 			 * CDROMs. Also we assume we have a good label
10691 			 * even if sd_validate_geometry returned ENOMEM.
10692 			 */
10693 			if (!ISCD(un) && rval != ENOMEM) {
10694 				rval = SD_NOT_READY_VALID;
10695 				goto done;
10696 			}
10697 		}
10698 	}
10699 
10700 #ifdef DOESNTWORK /* on eliteII, see 1118607 */
10701 	/*
10702 	 * check to see if this disk is write protected, if it is and we have
10703 	 * not set read-only, then fail
10704 	 */
10705 	if ((flag & FWRITE) && (sr_check_wp(dev))) {
10706 		New_state(un, SD_STATE_CLOSED);
10707 		goto done;
10708 	}
10709 #endif
10710 
10711 	/*
10712 	 * If this is a removable media device, try and send
10713 	 * a PREVENT MEDIA REMOVAL command, but don't get upset
10714 	 * if it fails. For a CD, however, it is an error
10715 	 */
10716 	if (ISREMOVABLE(un)) {
10717 		mutex_exit(SD_MUTEX(un));
10718 		if ((sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
10719 		    SD_PATH_DIRECT) != 0) && ISCD(un)) {
10720 			rval = SD_NOT_READY_VALID;
10721 			mutex_enter(SD_MUTEX(un));
10722 			goto done;
10723 		}
10724 		mutex_enter(SD_MUTEX(un));
10725 	}
10726 
10727 	/* The state has changed, inform the media watch routines */
10728 	un->un_mediastate = DKIO_INSERTED;
10729 	cv_broadcast(&un->un_state_cv);
10730 	rval = SD_READY_VALID;
10731 
10732 done:
10733 
10734 	/*
10735 	 * Initialize the capacity kstat value, if no media previously
10736 	 * (capacity kstat is 0) and a media has been inserted
10737 	 * (un_blockcount > 0).
10738 	 * This is a more generic way then checking for ISREMOVABLE.
10739 	 */
10740 	if (un->un_errstats != NULL) {
10741 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
10742 		if ((stp->sd_capacity.value.ui64 == 0) &&
10743 		    (un->un_f_blockcount_is_valid == TRUE)) {
10744 			stp->sd_capacity.value.ui64 =
10745 			    (uint64_t)((uint64_t)un->un_blockcount *
10746 			    un->un_sys_blocksize);
10747 		}
10748 	}
10749 
10750 	mutex_exit(SD_MUTEX(un));
10751 	return (rval);
10752 }
10753 
10754 
10755 /*
10756  *    Function: sdmin
10757  *
10758  * Description: Routine to limit the size of a data transfer. Used in
10759  *		conjunction with physio(9F).
10760  *
10761  *   Arguments: bp - pointer to the indicated buf(9S) struct.
10762  *
10763  *     Context: Kernel thread context.
10764  */
10765 
10766 static void
10767 sdmin(struct buf *bp)
10768 {
10769 	struct sd_lun	*un;
10770 	int		instance;
10771 
10772 	instance = SDUNIT(bp->b_edev);
10773 
10774 	un = ddi_get_soft_state(sd_state, instance);
10775 	ASSERT(un != NULL);
10776 
10777 	if (bp->b_bcount > un->un_max_xfer_size) {
10778 		bp->b_bcount = un->un_max_xfer_size;
10779 	}
10780 }
10781 
10782 
10783 /*
10784  *    Function: sdread
10785  *
10786  * Description: Driver's read(9e) entry point function.
10787  *
10788  *   Arguments: dev   - device number
10789  *		uio   - structure pointer describing where data is to be stored
10790  *			in user's space
10791  *		cred_p  - user credential pointer
10792  *
10793  * Return Code: ENXIO
10794  *		EIO
10795  *		EINVAL
10796  *		value returned by physio
10797  *
10798  *     Context: Kernel thread context.
10799  */
10800 /* ARGSUSED */
10801 static int
10802 sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
10803 {
10804 	struct sd_lun	*un = NULL;
10805 	int		secmask;
10806 	int		err;
10807 
10808 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10809 		return (ENXIO);
10810 	}
10811 
10812 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10813 
10814 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
10815 		mutex_enter(SD_MUTEX(un));
10816 		/*
10817 		 * Because the call to sd_ready_and_valid will issue I/O we
10818 		 * must wait here if either the device is suspended or
10819 		 * if it's power level is changing.
10820 		 */
10821 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10822 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10823 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10824 		}
10825 		un->un_ncmds_in_driver++;
10826 		mutex_exit(SD_MUTEX(un));
10827 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
10828 			mutex_enter(SD_MUTEX(un));
10829 			un->un_ncmds_in_driver--;
10830 			ASSERT(un->un_ncmds_in_driver >= 0);
10831 			mutex_exit(SD_MUTEX(un));
10832 			return (EIO);
10833 		}
10834 		mutex_enter(SD_MUTEX(un));
10835 		un->un_ncmds_in_driver--;
10836 		ASSERT(un->un_ncmds_in_driver >= 0);
10837 		mutex_exit(SD_MUTEX(un));
10838 	}
10839 
10840 	/*
10841 	 * Read requests are restricted to multiples of the system block size.
10842 	 */
10843 	secmask = un->un_sys_blocksize - 1;
10844 
10845 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10846 		SD_ERROR(SD_LOG_READ_WRITE, un,
10847 		    "sdread: file offset not modulo %d\n",
10848 		    un->un_sys_blocksize);
10849 		err = EINVAL;
10850 	} else if (uio->uio_iov->iov_len & (secmask)) {
10851 		SD_ERROR(SD_LOG_READ_WRITE, un,
10852 		    "sdread: transfer length not modulo %d\n",
10853 		    un->un_sys_blocksize);
10854 		err = EINVAL;
10855 	} else {
10856 		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
10857 	}
10858 	return (err);
10859 }
10860 
10861 
10862 /*
10863  *    Function: sdwrite
10864  *
10865  * Description: Driver's write(9e) entry point function.
10866  *
10867  *   Arguments: dev   - device number
10868  *		uio   - structure pointer describing where data is stored in
10869  *			user's space
10870  *		cred_p  - user credential pointer
10871  *
10872  * Return Code: ENXIO
10873  *		EIO
10874  *		EINVAL
10875  *		value returned by physio
10876  *
10877  *     Context: Kernel thread context.
10878  */
10879 /* ARGSUSED */
10880 static int
10881 sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
10882 {
10883 	struct sd_lun	*un = NULL;
10884 	int		secmask;
10885 	int		err;
10886 
10887 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10888 		return (ENXIO);
10889 	}
10890 
10891 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10892 
10893 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
10894 		mutex_enter(SD_MUTEX(un));
10895 		/*
10896 		 * Because the call to sd_ready_and_valid will issue I/O we
10897 		 * must wait here if either the device is suspended or
10898 		 * if it's power level is changing.
10899 		 */
10900 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10901 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10902 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10903 		}
10904 		un->un_ncmds_in_driver++;
10905 		mutex_exit(SD_MUTEX(un));
10906 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
10907 			mutex_enter(SD_MUTEX(un));
10908 			un->un_ncmds_in_driver--;
10909 			ASSERT(un->un_ncmds_in_driver >= 0);
10910 			mutex_exit(SD_MUTEX(un));
10911 			return (EIO);
10912 		}
10913 		mutex_enter(SD_MUTEX(un));
10914 		un->un_ncmds_in_driver--;
10915 		ASSERT(un->un_ncmds_in_driver >= 0);
10916 		mutex_exit(SD_MUTEX(un));
10917 	}
10918 
10919 	/*
10920 	 * Write requests are restricted to multiples of the system block size.
10921 	 */
10922 	secmask = un->un_sys_blocksize - 1;
10923 
10924 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10925 		SD_ERROR(SD_LOG_READ_WRITE, un,
10926 		    "sdwrite: file offset not modulo %d\n",
10927 		    un->un_sys_blocksize);
10928 		err = EINVAL;
10929 	} else if (uio->uio_iov->iov_len & (secmask)) {
10930 		SD_ERROR(SD_LOG_READ_WRITE, un,
10931 		    "sdwrite: transfer length not modulo %d\n",
10932 		    un->un_sys_blocksize);
10933 		err = EINVAL;
10934 	} else {
10935 		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
10936 	}
10937 	return (err);
10938 }
10939 
10940 
10941 /*
10942  *    Function: sdaread
10943  *
10944  * Description: Driver's aread(9e) entry point function.
10945  *
10946  *   Arguments: dev   - device number
10947  *		aio   - structure pointer describing where data is to be stored
10948  *		cred_p  - user credential pointer
10949  *
10950  * Return Code: ENXIO
10951  *		EIO
10952  *		EINVAL
10953  *		value returned by aphysio
10954  *
10955  *     Context: Kernel thread context.
10956  */
10957 /* ARGSUSED */
10958 static int
10959 sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
10960 {
10961 	struct sd_lun	*un = NULL;
10962 	struct uio	*uio = aio->aio_uio;
10963 	int		secmask;
10964 	int		err;
10965 
10966 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10967 		return (ENXIO);
10968 	}
10969 
10970 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10971 
10972 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
10973 		mutex_enter(SD_MUTEX(un));
10974 		/*
10975 		 * Because the call to sd_ready_and_valid will issue I/O we
10976 		 * must wait here if either the device is suspended or
10977 		 * if it's power level is changing.
10978 		 */
10979 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10980 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10981 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10982 		}
10983 		un->un_ncmds_in_driver++;
10984 		mutex_exit(SD_MUTEX(un));
10985 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
10986 			mutex_enter(SD_MUTEX(un));
10987 			un->un_ncmds_in_driver--;
10988 			ASSERT(un->un_ncmds_in_driver >= 0);
10989 			mutex_exit(SD_MUTEX(un));
10990 			return (EIO);
10991 		}
10992 		mutex_enter(SD_MUTEX(un));
10993 		un->un_ncmds_in_driver--;
10994 		ASSERT(un->un_ncmds_in_driver >= 0);
10995 		mutex_exit(SD_MUTEX(un));
10996 	}
10997 
10998 	/*
10999 	 * Read requests are restricted to multiples of the system block size.
11000 	 */
11001 	secmask = un->un_sys_blocksize - 1;
11002 
11003 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11004 		SD_ERROR(SD_LOG_READ_WRITE, un,
11005 		    "sdaread: file offset not modulo %d\n",
11006 		    un->un_sys_blocksize);
11007 		err = EINVAL;
11008 	} else if (uio->uio_iov->iov_len & (secmask)) {
11009 		SD_ERROR(SD_LOG_READ_WRITE, un,
11010 		    "sdaread: transfer length not modulo %d\n",
11011 		    un->un_sys_blocksize);
11012 		err = EINVAL;
11013 	} else {
11014 		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
11015 	}
11016 	return (err);
11017 }
11018 
11019 
11020 /*
11021  *    Function: sdawrite
11022  *
11023  * Description: Driver's awrite(9e) entry point function.
11024  *
11025  *   Arguments: dev   - device number
11026  *		aio   - structure pointer describing where data is stored
11027  *		cred_p  - user credential pointer
11028  *
11029  * Return Code: ENXIO
11030  *		EIO
11031  *		EINVAL
11032  *		value returned by aphysio
11033  *
11034  *     Context: Kernel thread context.
11035  */
11036 /* ARGSUSED */
11037 static int
11038 sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11039 {
11040 	struct sd_lun	*un = NULL;
11041 	struct uio	*uio = aio->aio_uio;
11042 	int		secmask;
11043 	int		err;
11044 
11045 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11046 		return (ENXIO);
11047 	}
11048 
11049 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11050 
11051 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11052 		mutex_enter(SD_MUTEX(un));
11053 		/*
11054 		 * Because the call to sd_ready_and_valid will issue I/O we
11055 		 * must wait here if either the device is suspended or
11056 		 * if it's power level is changing.
11057 		 */
11058 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11059 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11060 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11061 		}
11062 		un->un_ncmds_in_driver++;
11063 		mutex_exit(SD_MUTEX(un));
11064 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11065 			mutex_enter(SD_MUTEX(un));
11066 			un->un_ncmds_in_driver--;
11067 			ASSERT(un->un_ncmds_in_driver >= 0);
11068 			mutex_exit(SD_MUTEX(un));
11069 			return (EIO);
11070 		}
11071 		mutex_enter(SD_MUTEX(un));
11072 		un->un_ncmds_in_driver--;
11073 		ASSERT(un->un_ncmds_in_driver >= 0);
11074 		mutex_exit(SD_MUTEX(un));
11075 	}
11076 
11077 	/*
11078 	 * Write requests are restricted to multiples of the system block size.
11079 	 */
11080 	secmask = un->un_sys_blocksize - 1;
11081 
11082 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11083 		SD_ERROR(SD_LOG_READ_WRITE, un,
11084 		    "sdawrite: file offset not modulo %d\n",
11085 		    un->un_sys_blocksize);
11086 		err = EINVAL;
11087 	} else if (uio->uio_iov->iov_len & (secmask)) {
11088 		SD_ERROR(SD_LOG_READ_WRITE, un,
11089 		    "sdawrite: transfer length not modulo %d\n",
11090 		    un->un_sys_blocksize);
11091 		err = EINVAL;
11092 	} else {
11093 		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
11094 	}
11095 	return (err);
11096 }
11097 
11098 
11099 
11100 
11101 
11102 /*
11103  * Driver IO processing follows the following sequence:
11104  *
11105  *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
11106  *         |                |                     ^
11107  *         v                v                     |
11108  * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
11109  *         |                |                     |                   |
11110  *         v                |                     |                   |
11111  * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
11112  *         |                |                     ^                   ^
11113  *         v                v                     |                   |
11114  * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
11115  *         |                |                     |                   |
11116  *     +---+                |                     +------------+      +-------+
11117  *     |                    |                                  |              |
11118  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11119  *     |                    v                                  |              |
11120  *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
11121  *     |                    |                                  ^              |
11122  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11123  *     |                    v                                  |              |
11124  *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
11125  *     |                    |                                  ^              |
11126  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11127  *     |                    v                                  |              |
11128  *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
11129  *     |                    |                                  ^              |
11130  *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
11131  *     |                    v                                  |              |
11132  *     |              sd_pm_iostart()                     sd_pm_iodone()      |
11133  *     |                    |                                  ^              |
11134  *     |                    |                                  |              |
11135  *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
11136  *                          |                           ^
11137  *                          v                           |
11138  *                   sd_core_iostart()                  |
11139  *                          |                           |
11140  *                          |                           +------>(*destroypkt)()
11141  *                          +-> sd_start_cmds() <-+     |           |
11142  *                          |                     |     |           v
11143  *                          |                     |     |  scsi_destroy_pkt(9F)
11144  *                          |                     |     |
11145  *                          +->(*initpkt)()       +- sdintr()
11146  *                          |  |                        |  |
11147  *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
11148  *                          |  +-> scsi_setup_cdb(9F)   |
11149  *                          |                           |
11150  *                          +--> scsi_transport(9F)     |
11151  *                                     |                |
11152  *                                     +----> SCSA ---->+
11153  *
11154  *
11155  * This code is based upon the following presumtions:
11156  *
11157  *   - iostart and iodone functions operate on buf(9S) structures. These
11158  *     functions perform the necessary operations on the buf(9S) and pass
11159  *     them along to the next function in the chain by using the macros
11160  *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
11161  *     (for iodone side functions).
11162  *
11163  *   - The iostart side functions may sleep. The iodone side functions
11164  *     are called under interrupt context and may NOT sleep. Therefore
11165  *     iodone side functions also may not call iostart side functions.
11166  *     (NOTE: iostart side functions should NOT sleep for memory, as
11167  *     this could result in deadlock.)
11168  *
11169  *   - An iostart side function may call its corresponding iodone side
11170  *     function directly (if necessary).
11171  *
11172  *   - In the event of an error, an iostart side function can return a buf(9S)
11173  *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
11174  *     b_error in the usual way of course).
11175  *
11176  *   - The taskq mechanism may be used by the iodone side functions to dispatch
11177  *     requests to the iostart side functions.  The iostart side functions in
11178  *     this case would be called under the context of a taskq thread, so it's
11179  *     OK for them to block/sleep/spin in this case.
11180  *
11181  *   - iostart side functions may allocate "shadow" buf(9S) structs and
11182  *     pass them along to the next function in the chain.  The corresponding
11183  *     iodone side functions must coalesce the "shadow" bufs and return
11184  *     the "original" buf to the next higher layer.
11185  *
11186  *   - The b_private field of the buf(9S) struct holds a pointer to
11187  *     an sd_xbuf struct, which contains information needed to
11188  *     construct the scsi_pkt for the command.
11189  *
11190  *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
11191  *     layer must acquire & release the SD_MUTEX(un) as needed.
11192  */
11193 
11194 
11195 /*
11196  * Create taskq for all targets in the system. This is created at
11197  * _init(9E) and destroyed at _fini(9E).
11198  *
11199  * Note: here we set the minalloc to a reasonably high number to ensure that
11200  * we will have an adequate supply of task entries available at interrupt time.
11201  * This is used in conjunction with the TASKQ_PREPOPULATE flag in
11202  * sd_create_taskq().  Since we do not want to sleep for allocations at
11203  * interrupt time, set maxalloc equal to minalloc. That way we will just fail
11204  * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
11205  * requests any one instant in time.
11206  */
11207 #define	SD_TASKQ_NUMTHREADS	8
11208 #define	SD_TASKQ_MINALLOC	256
11209 #define	SD_TASKQ_MAXALLOC	256
11210 
11211 static taskq_t	*sd_tq = NULL;
11212 static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
11213 static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
11214 
11215 /*
11216  * The following task queue is being created for the write part of
11217  * read-modify-write of non-512 block size devices.
11218  * Limit the number of threads to 1 for now. This number has been choosen
11219  * considering the fact that it applies only to dvd ram drives/MO drives
11220  * currently. Performance for which is not main criteria at this stage.
11221  * Note: It needs to be explored if we can use a single taskq in future
11222  */
11223 #define	SD_WMR_TASKQ_NUMTHREADS	1
11224 static taskq_t	*sd_wmr_tq = NULL;
11225 
11226 /*
11227  *    Function: sd_taskq_create
11228  *
11229  * Description: Create taskq thread(s) and preallocate task entries
11230  *
11231  * Return Code: Returns a pointer to the allocated taskq_t.
11232  *
11233  *     Context: Can sleep. Requires blockable context.
11234  *
11235  *       Notes: - The taskq() facility currently is NOT part of the DDI.
11236  *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
11237  *		- taskq_create() will block for memory, also it will panic
11238  *		  if it cannot create the requested number of threads.
11239  *		- Currently taskq_create() creates threads that cannot be
11240  *		  swapped.
11241  *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
11242  *		  supply of taskq entries at interrupt time (ie, so that we
11243  *		  do not have to sleep for memory)
11244  */
11245 
11246 static void
11247 sd_taskq_create(void)
11248 {
11249 	char	taskq_name[TASKQ_NAMELEN];
11250 
11251 	ASSERT(sd_tq == NULL);
11252 	ASSERT(sd_wmr_tq == NULL);
11253 
11254 	(void) snprintf(taskq_name, sizeof (taskq_name),
11255 	    "%s_drv_taskq", sd_label);
11256 	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
11257 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11258 	    TASKQ_PREPOPULATE));
11259 
11260 	(void) snprintf(taskq_name, sizeof (taskq_name),
11261 	    "%s_rmw_taskq", sd_label);
11262 	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
11263 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11264 	    TASKQ_PREPOPULATE));
11265 }
11266 
11267 
11268 /*
11269  *    Function: sd_taskq_delete
11270  *
11271  * Description: Complementary cleanup routine for sd_taskq_create().
11272  *
11273  *     Context: Kernel thread context.
11274  */
11275 
11276 static void
11277 sd_taskq_delete(void)
11278 {
11279 	ASSERT(sd_tq != NULL);
11280 	ASSERT(sd_wmr_tq != NULL);
11281 	taskq_destroy(sd_tq);
11282 	taskq_destroy(sd_wmr_tq);
11283 	sd_tq = NULL;
11284 	sd_wmr_tq = NULL;
11285 }
11286 
11287 
11288 /*
11289  *    Function: sdstrategy
11290  *
11291  * Description: Driver's strategy (9E) entry point function.
11292  *
11293  *   Arguments: bp - pointer to buf(9S)
11294  *
11295  * Return Code: Always returns zero
11296  *
11297  *     Context: Kernel thread context.
11298  */
11299 
11300 static int
11301 sdstrategy(struct buf *bp)
11302 {
11303 	struct sd_lun *un;
11304 
11305 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11306 	if (un == NULL) {
11307 		bioerror(bp, EIO);
11308 		bp->b_resid = bp->b_bcount;
11309 		biodone(bp);
11310 		return (0);
11311 	}
11312 	/* As was done in the past, fail new cmds. if state is dumping. */
11313 	if (un->un_state == SD_STATE_DUMPING) {
11314 		bioerror(bp, ENXIO);
11315 		bp->b_resid = bp->b_bcount;
11316 		biodone(bp);
11317 		return (0);
11318 	}
11319 
11320 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11321 
11322 	/*
11323 	 * Commands may sneak in while we released the mutex in
11324 	 * DDI_SUSPEND, we should block new commands. However, old
11325 	 * commands that are still in the driver at this point should
11326 	 * still be allowed to drain.
11327 	 */
11328 	mutex_enter(SD_MUTEX(un));
11329 	/*
11330 	 * Must wait here if either the device is suspended or
11331 	 * if it's power level is changing.
11332 	 */
11333 	while ((un->un_state == SD_STATE_SUSPENDED) ||
11334 	    (un->un_state == SD_STATE_PM_CHANGING)) {
11335 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11336 	}
11337 
11338 	un->un_ncmds_in_driver++;
11339 
11340 	/*
11341 	 * atapi: Since we are running the CD for now in PIO mode we need to
11342 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11343 	 * the HBA's init_pkt routine.
11344 	 */
11345 	if (un->un_f_cfg_is_atapi == TRUE) {
11346 		mutex_exit(SD_MUTEX(un));
11347 		bp_mapin(bp);
11348 		mutex_enter(SD_MUTEX(un));
11349 	}
11350 	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
11351 	    un->un_ncmds_in_driver);
11352 
11353 	mutex_exit(SD_MUTEX(un));
11354 
11355 	/*
11356 	 * This will (eventually) allocate the sd_xbuf area and
11357 	 * call sd_xbuf_strategy().  We just want to return the
11358 	 * result of ddi_xbuf_qstrategy so that we have an opt-
11359 	 * imized tail call which saves us a stack frame.
11360 	 */
11361 	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
11362 }
11363 
11364 
11365 /*
11366  *    Function: sd_xbuf_strategy
11367  *
11368  * Description: Function for initiating IO operations via the
11369  *		ddi_xbuf_qstrategy() mechanism.
11370  *
11371  *     Context: Kernel thread context.
11372  */
11373 
11374 static void
11375 sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
11376 {
11377 	struct sd_lun *un = arg;
11378 
11379 	ASSERT(bp != NULL);
11380 	ASSERT(xp != NULL);
11381 	ASSERT(un != NULL);
11382 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11383 
11384 	/*
11385 	 * Initialize the fields in the xbuf and save a pointer to the
11386 	 * xbuf in bp->b_private.
11387 	 */
11388 	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
11389 
11390 	/* Send the buf down the iostart chain */
11391 	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
11392 }
11393 
11394 
11395 /*
11396  *    Function: sd_xbuf_init
11397  *
11398  * Description: Prepare the given sd_xbuf struct for use.
11399  *
11400  *   Arguments: un - ptr to softstate
11401  *		bp - ptr to associated buf(9S)
11402  *		xp - ptr to associated sd_xbuf
11403  *		chain_type - IO chain type to use:
11404  *			SD_CHAIN_NULL
11405  *			SD_CHAIN_BUFIO
11406  *			SD_CHAIN_USCSI
11407  *			SD_CHAIN_DIRECT
11408  *			SD_CHAIN_DIRECT_PRIORITY
11409  *		pktinfop - ptr to private data struct for scsi_pkt(9S)
11410  *			initialization; may be NULL if none.
11411  *
11412  *     Context: Kernel thread context
11413  */
11414 
11415 static void
11416 sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
11417 	uchar_t chain_type, void *pktinfop)
11418 {
11419 	int index;
11420 
11421 	ASSERT(un != NULL);
11422 	ASSERT(bp != NULL);
11423 	ASSERT(xp != NULL);
11424 
11425 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
11426 	    bp, chain_type);
11427 
11428 	xp->xb_un	= un;
11429 	xp->xb_pktp	= NULL;
11430 	xp->xb_pktinfo	= pktinfop;
11431 	xp->xb_private	= bp->b_private;
11432 	xp->xb_blkno	= (daddr_t)bp->b_blkno;
11433 
11434 	/*
11435 	 * Set up the iostart and iodone chain indexes in the xbuf, based
11436 	 * upon the specified chain type to use.
11437 	 */
11438 	switch (chain_type) {
11439 	case SD_CHAIN_NULL:
11440 		/*
11441 		 * Fall thru to just use the values for the buf type, even
11442 		 * tho for the NULL chain these values will never be used.
11443 		 */
11444 		/* FALLTHRU */
11445 	case SD_CHAIN_BUFIO:
11446 		index = un->un_buf_chain_type;
11447 		break;
11448 	case SD_CHAIN_USCSI:
11449 		index = un->un_uscsi_chain_type;
11450 		break;
11451 	case SD_CHAIN_DIRECT:
11452 		index = un->un_direct_chain_type;
11453 		break;
11454 	case SD_CHAIN_DIRECT_PRIORITY:
11455 		index = un->un_priority_chain_type;
11456 		break;
11457 	default:
11458 		/* We're really broken if we ever get here... */
11459 		panic("sd_xbuf_init: illegal chain type!");
11460 		/*NOTREACHED*/
11461 	}
11462 
11463 	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
11464 	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
11465 
11466 	/*
11467 	 * It might be a bit easier to simply bzero the entire xbuf above,
11468 	 * but it turns out that since we init a fair number of members anyway,
11469 	 * we save a fair number cycles by doing explicit assignment of zero.
11470 	 */
11471 	xp->xb_pkt_flags	= 0;
11472 	xp->xb_dma_resid	= 0;
11473 	xp->xb_retry_count	= 0;
11474 	xp->xb_victim_retry_count = 0;
11475 	xp->xb_ua_retry_count	= 0;
11476 	xp->xb_sense_bp		= NULL;
11477 	xp->xb_sense_status	= 0;
11478 	xp->xb_sense_state	= 0;
11479 	xp->xb_sense_resid	= 0;
11480 
11481 	bp->b_private	= xp;
11482 	bp->b_flags	&= ~(B_DONE | B_ERROR);
11483 	bp->b_resid	= 0;
11484 	bp->av_forw	= NULL;
11485 	bp->av_back	= NULL;
11486 	bioerror(bp, 0);
11487 
11488 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
11489 }
11490 
11491 
11492 /*
11493  *    Function: sd_uscsi_strategy
11494  *
11495  * Description: Wrapper for calling into the USCSI chain via physio(9F)
11496  *
11497  *   Arguments: bp - buf struct ptr
11498  *
11499  * Return Code: Always returns 0
11500  *
11501  *     Context: Kernel thread context
11502  */
11503 
11504 static int
11505 sd_uscsi_strategy(struct buf *bp)
11506 {
11507 	struct sd_lun		*un;
11508 	struct sd_uscsi_info	*uip;
11509 	struct sd_xbuf		*xp;
11510 	uchar_t			chain_type;
11511 
11512 	ASSERT(bp != NULL);
11513 
11514 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11515 	if (un == NULL) {
11516 		bioerror(bp, EIO);
11517 		bp->b_resid = bp->b_bcount;
11518 		biodone(bp);
11519 		return (0);
11520 	}
11521 
11522 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11523 
11524 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
11525 
11526 	mutex_enter(SD_MUTEX(un));
11527 	/*
11528 	 * atapi: Since we are running the CD for now in PIO mode we need to
11529 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11530 	 * the HBA's init_pkt routine.
11531 	 */
11532 	if (un->un_f_cfg_is_atapi == TRUE) {
11533 		mutex_exit(SD_MUTEX(un));
11534 		bp_mapin(bp);
11535 		mutex_enter(SD_MUTEX(un));
11536 	}
11537 	un->un_ncmds_in_driver++;
11538 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
11539 	    un->un_ncmds_in_driver);
11540 	mutex_exit(SD_MUTEX(un));
11541 
11542 	/*
11543 	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
11544 	 */
11545 	ASSERT(bp->b_private != NULL);
11546 	uip = (struct sd_uscsi_info *)bp->b_private;
11547 
11548 	switch (uip->ui_flags) {
11549 	case SD_PATH_DIRECT:
11550 		chain_type = SD_CHAIN_DIRECT;
11551 		break;
11552 	case SD_PATH_DIRECT_PRIORITY:
11553 		chain_type = SD_CHAIN_DIRECT_PRIORITY;
11554 		break;
11555 	default:
11556 		chain_type = SD_CHAIN_USCSI;
11557 		break;
11558 	}
11559 
11560 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
11561 	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
11562 
11563 	/* Use the index obtained within xbuf_init */
11564 	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
11565 
11566 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
11567 
11568 	return (0);
11569 }
11570 
11571 
11572 /*
11573  * These routines perform raw i/o operations.
11574  */
11575 /*ARGSUSED*/
11576 static void
11577 sduscsimin(struct buf *bp)
11578 {
11579 	/*
11580 	 * do not break up because the CDB count would then
11581 	 * be incorrect and data underruns would result (incomplete
11582 	 * read/writes which would be retried and then failed, see
11583 	 * sdintr().
11584 	 */
11585 }
11586 
11587 
11588 
11589 /*
11590  *    Function: sd_send_scsi_cmd
11591  *
11592  * Description: Runs a USCSI command for user (when called thru sdioctl),
11593  *		or for the driver
11594  *
11595  *   Arguments: dev - the dev_t for the device
11596  *		incmd - ptr to a valid uscsi_cmd struct
11597  *		cdbspace - UIO_USERSPACE or UIO_SYSSPACE
11598  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11599  *		rqbufspace - UIO_USERSPACE or UIO_SYSSPACE
11600  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11601  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11602  *			to use the USCSI "direct" chain and bypass the normal
11603  *			command waitq.
11604  *
11605  * Return Code: 0 -  successful completion of the given command
11606  *		EIO - scsi_reset() failed, or see biowait()/physio() codes.
11607  *		ENXIO  - soft state not found for specified dev
11608  *		EINVAL
11609  *		EFAULT - copyin/copyout error
11610  *		return code of biowait(9F) or physio(9F):
11611  *			EIO - IO error, caller may check incmd->uscsi_status
11612  *			ENXIO
11613  *			EACCES - reservation conflict
11614  *
11615  *     Context: Waits for command to complete. Can sleep.
11616  */
11617 
11618 static int
11619 sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd,
11620 	enum uio_seg cdbspace, enum uio_seg dataspace, enum uio_seg rqbufspace,
11621 	int path_flag)
11622 {
11623 	struct sd_uscsi_info	*uip;
11624 	struct uscsi_cmd	*uscmd;
11625 	struct sd_lun	*un;
11626 	struct buf	*bp;
11627 	int	rval;
11628 	int	flags;
11629 
11630 	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
11631 	if (un == NULL) {
11632 		return (ENXIO);
11633 	}
11634 
11635 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11636 
11637 #ifdef SDDEBUG
11638 	switch (dataspace) {
11639 	case UIO_USERSPACE:
11640 		SD_TRACE(SD_LOG_IO, un,
11641 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_USERSPACE\n", un);
11642 		break;
11643 	case UIO_SYSSPACE:
11644 		SD_TRACE(SD_LOG_IO, un,
11645 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_SYSSPACE\n", un);
11646 		break;
11647 	default:
11648 		SD_TRACE(SD_LOG_IO, un,
11649 		    "sd_send_scsi_cmd: entry: un:0x%p UNEXPECTED SPACE\n", un);
11650 		break;
11651 	}
11652 #endif
11653 
11654 	/*
11655 	 * Perform resets directly; no need to generate a command to do it.
11656 	 */
11657 	if (incmd->uscsi_flags & (USCSI_RESET | USCSI_RESET_ALL)) {
11658 		flags = ((incmd->uscsi_flags & USCSI_RESET_ALL) != 0) ?
11659 		    RESET_ALL : RESET_TARGET;
11660 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: Issuing reset\n");
11661 		if (scsi_reset(SD_ADDRESS(un), flags) == 0) {
11662 			/* Reset attempt was unsuccessful */
11663 			SD_TRACE(SD_LOG_IO, un,
11664 			    "sd_send_scsi_cmd: reset: failure\n");
11665 			return (EIO);
11666 		}
11667 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: reset: success\n");
11668 		return (0);
11669 	}
11670 
11671 	/* Perfunctory sanity check... */
11672 	if (incmd->uscsi_cdblen <= 0) {
11673 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11674 		    "invalid uscsi_cdblen, returning EINVAL\n");
11675 		return (EINVAL);
11676 	}
11677 
11678 	/*
11679 	 * In order to not worry about where the uscsi structure came from
11680 	 * (or where the cdb it points to came from) we're going to make
11681 	 * kmem_alloc'd copies of them here. This will also allow reference
11682 	 * to the data they contain long after this process has gone to
11683 	 * sleep and its kernel stack has been unmapped, etc.
11684 	 *
11685 	 * First get some memory for the uscsi_cmd struct and copy the
11686 	 * contents of the given uscsi_cmd struct into it.
11687 	 */
11688 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
11689 	bcopy(incmd, uscmd, sizeof (struct uscsi_cmd));
11690 
11691 	SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_cmd: uscsi_cmd",
11692 	    (uchar_t *)uscmd, sizeof (struct uscsi_cmd), SD_LOG_HEX);
11693 
11694 	/*
11695 	 * Now get some space for the CDB, and copy the given CDB into
11696 	 * it. Use ddi_copyin() in case the data is in user space.
11697 	 */
11698 	uscmd->uscsi_cdb = kmem_zalloc((size_t)incmd->uscsi_cdblen, KM_SLEEP);
11699 	flags = (cdbspace == UIO_SYSSPACE) ? FKIOCTL : 0;
11700 	if (ddi_copyin(incmd->uscsi_cdb, uscmd->uscsi_cdb,
11701 	    (uint_t)incmd->uscsi_cdblen, flags) != 0) {
11702 		kmem_free(uscmd->uscsi_cdb, (size_t)incmd->uscsi_cdblen);
11703 		kmem_free(uscmd, sizeof (struct uscsi_cmd));
11704 		return (EFAULT);
11705 	}
11706 
11707 	SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_cmd: CDB",
11708 	    (uchar_t *)uscmd->uscsi_cdb, incmd->uscsi_cdblen, SD_LOG_HEX);
11709 
11710 	bp = getrbuf(KM_SLEEP);
11711 
11712 	/*
11713 	 * Allocate an sd_uscsi_info struct and fill it with the info
11714 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
11715 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
11716 	 * since we allocate the buf here in this function, we do not
11717 	 * need to preserve the prior contents of b_private.
11718 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
11719 	 */
11720 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
11721 	uip->ui_flags = path_flag;
11722 	uip->ui_cmdp  = uscmd;
11723 	bp->b_private = uip;
11724 
11725 	/*
11726 	 * Initialize Request Sense buffering, if requested.
11727 	 */
11728 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
11729 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
11730 		/*
11731 		 * Here uscmd->uscsi_rqbuf currently points to the caller's
11732 		 * buffer, but we replace this with a kernel buffer that
11733 		 * we allocate to use with the sense data. The sense data
11734 		 * (if present) gets copied into this new buffer before the
11735 		 * command is completed.  Then we copy the sense data from
11736 		 * our allocated buf into the caller's buffer below. Note
11737 		 * that incmd->uscsi_rqbuf and incmd->uscsi_rqlen are used
11738 		 * below to perform the copy back to the caller's buf.
11739 		 */
11740 		uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
11741 		if (rqbufspace == UIO_USERSPACE) {
11742 			uscmd->uscsi_rqlen   = SENSE_LENGTH;
11743 			uscmd->uscsi_rqresid = SENSE_LENGTH;
11744 		} else {
11745 			uchar_t rlen = min(SENSE_LENGTH, uscmd->uscsi_rqlen);
11746 			uscmd->uscsi_rqlen   = rlen;
11747 			uscmd->uscsi_rqresid = rlen;
11748 		}
11749 	} else {
11750 		uscmd->uscsi_rqbuf = NULL;
11751 		uscmd->uscsi_rqlen   = 0;
11752 		uscmd->uscsi_rqresid = 0;
11753 	}
11754 
11755 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: rqbuf:0x%p  rqlen:%d\n",
11756 	    uscmd->uscsi_rqbuf, uscmd->uscsi_rqlen);
11757 
11758 	if (un->un_f_is_fibre == FALSE) {
11759 		/*
11760 		 * Force asynchronous mode, if necessary.  Doing this here
11761 		 * has the unfortunate effect of running other queued
11762 		 * commands async also, but since the main purpose of this
11763 		 * capability is downloading new drive firmware, we can
11764 		 * probably live with it.
11765 		 */
11766 		if ((uscmd->uscsi_flags & USCSI_ASYNC) != 0) {
11767 			if (scsi_ifgetcap(SD_ADDRESS(un), "synchronous", 1)
11768 				== 1) {
11769 				if (scsi_ifsetcap(SD_ADDRESS(un),
11770 					    "synchronous", 0, 1) == 1) {
11771 					SD_TRACE(SD_LOG_IO, un,
11772 					"sd_send_scsi_cmd: forced async ok\n");
11773 				} else {
11774 					SD_TRACE(SD_LOG_IO, un,
11775 					"sd_send_scsi_cmd:\
11776 					forced async failed\n");
11777 					rval = EINVAL;
11778 					goto done;
11779 				}
11780 			}
11781 		}
11782 
11783 		/*
11784 		 * Re-enable synchronous mode, if requested
11785 		 */
11786 		if (uscmd->uscsi_flags & USCSI_SYNC) {
11787 			if (scsi_ifgetcap(SD_ADDRESS(un), "synchronous", 1)
11788 				== 0) {
11789 				int i = scsi_ifsetcap(SD_ADDRESS(un),
11790 						"synchronous", 1, 1);
11791 				SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11792 					"re-enabled sync %s\n",
11793 					(i == 1) ? "ok" : "failed");
11794 			}
11795 		}
11796 	}
11797 
11798 	/*
11799 	 * Commands sent with priority are intended for error recovery
11800 	 * situations, and do not have retries performed.
11801 	 */
11802 	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
11803 		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
11804 	}
11805 
11806 	/*
11807 	 * If we're going to do actual I/O, let physio do all the right things
11808 	 */
11809 	if (uscmd->uscsi_buflen != 0) {
11810 		struct iovec	aiov;
11811 		struct uio	auio;
11812 		struct uio	*uio = &auio;
11813 
11814 		bzero(&auio, sizeof (struct uio));
11815 		bzero(&aiov, sizeof (struct iovec));
11816 		aiov.iov_base = uscmd->uscsi_bufaddr;
11817 		aiov.iov_len  = uscmd->uscsi_buflen;
11818 		uio->uio_iov  = &aiov;
11819 
11820 		uio->uio_iovcnt  = 1;
11821 		uio->uio_resid   = uscmd->uscsi_buflen;
11822 		uio->uio_segflg  = dataspace;
11823 
11824 		/*
11825 		 * physio() will block here until the command completes....
11826 		 */
11827 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: calling physio.\n");
11828 
11829 		rval = physio(sd_uscsi_strategy, bp, dev,
11830 		    ((uscmd->uscsi_flags & USCSI_READ) ? B_READ : B_WRITE),
11831 		    sduscsimin, uio);
11832 
11833 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11834 		    "returned from physio with 0x%x\n", rval);
11835 
11836 	} else {
11837 		/*
11838 		 * We have to mimic what physio would do here! Argh!
11839 		 */
11840 		bp->b_flags  = B_BUSY |
11841 		    ((uscmd->uscsi_flags & USCSI_READ) ? B_READ : B_WRITE);
11842 		bp->b_edev   = dev;
11843 		bp->b_dev    = cmpdev(dev);	/* maybe unnecessary? */
11844 		bp->b_bcount = 0;
11845 		bp->b_blkno  = 0;
11846 
11847 		SD_TRACE(SD_LOG_IO, un,
11848 		    "sd_send_scsi_cmd: calling sd_uscsi_strategy...\n");
11849 
11850 		(void) sd_uscsi_strategy(bp);
11851 
11852 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: calling biowait\n");
11853 
11854 		rval = biowait(bp);
11855 
11856 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11857 		    "returned from  biowait with 0x%x\n", rval);
11858 	}
11859 
11860 done:
11861 
11862 #ifdef SDDEBUG
11863 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11864 	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
11865 	    uscmd->uscsi_status, uscmd->uscsi_resid);
11866 	if (uscmd->uscsi_bufaddr != NULL) {
11867 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11868 		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
11869 		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
11870 		if (dataspace == UIO_SYSSPACE) {
11871 			SD_DUMP_MEMORY(un, SD_LOG_IO,
11872 			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
11873 			    uscmd->uscsi_buflen, SD_LOG_HEX);
11874 		}
11875 	}
11876 #endif
11877 
11878 	/*
11879 	 * Get the status and residual to return to the caller.
11880 	 */
11881 	incmd->uscsi_status = uscmd->uscsi_status;
11882 	incmd->uscsi_resid  = uscmd->uscsi_resid;
11883 
11884 	/*
11885 	 * If the caller wants sense data, copy back whatever sense data
11886 	 * we may have gotten, and update the relevant rqsense info.
11887 	 */
11888 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
11889 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
11890 
11891 		int rqlen = uscmd->uscsi_rqlen - uscmd->uscsi_rqresid;
11892 		rqlen = min(((int)incmd->uscsi_rqlen), rqlen);
11893 
11894 		/* Update the Request Sense status and resid */
11895 		incmd->uscsi_rqresid  = incmd->uscsi_rqlen - rqlen;
11896 		incmd->uscsi_rqstatus = uscmd->uscsi_rqstatus;
11897 
11898 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11899 		    "uscsi_rqstatus: 0x%02x  uscsi_rqresid:0x%x\n",
11900 		    incmd->uscsi_rqstatus, incmd->uscsi_rqresid);
11901 
11902 		/* Copy out the sense data for user processes */
11903 		if ((incmd->uscsi_rqbuf != NULL) && (rqlen != 0)) {
11904 			int flags =
11905 			    (rqbufspace == UIO_USERSPACE) ? 0 : FKIOCTL;
11906 			if (ddi_copyout(uscmd->uscsi_rqbuf, incmd->uscsi_rqbuf,
11907 			    rqlen, flags) != 0) {
11908 				rval = EFAULT;
11909 			}
11910 			/*
11911 			 * Note: Can't touch incmd->uscsi_rqbuf so use
11912 			 * uscmd->uscsi_rqbuf instead. They're the same.
11913 			 */
11914 			SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11915 			    "incmd->uscsi_rqbuf: 0x%p  rqlen:%d\n",
11916 			    incmd->uscsi_rqbuf, rqlen);
11917 			SD_DUMP_MEMORY(un, SD_LOG_IO, "rq",
11918 			    (uchar_t *)uscmd->uscsi_rqbuf, rqlen, SD_LOG_HEX);
11919 		}
11920 	}
11921 
11922 	/*
11923 	 * Free allocated resources and return; mapout the buf in case it was
11924 	 * mapped in by a lower layer.
11925 	 */
11926 	bp_mapout(bp);
11927 	freerbuf(bp);
11928 	kmem_free(uip, sizeof (struct sd_uscsi_info));
11929 	if (uscmd->uscsi_rqbuf != NULL) {
11930 		kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
11931 	}
11932 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
11933 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
11934 
11935 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: exit\n");
11936 
11937 	return (rval);
11938 }
11939 
11940 
11941 /*
11942  *    Function: sd_buf_iodone
11943  *
11944  * Description: Frees the sd_xbuf & returns the buf to its originator.
11945  *
11946  *     Context: May be called from interrupt context.
11947  */
11948 /* ARGSUSED */
11949 static void
11950 sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
11951 {
11952 	struct sd_xbuf *xp;
11953 
11954 	ASSERT(un != NULL);
11955 	ASSERT(bp != NULL);
11956 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11957 
11958 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
11959 
11960 	xp = SD_GET_XBUF(bp);
11961 	ASSERT(xp != NULL);
11962 
11963 	mutex_enter(SD_MUTEX(un));
11964 
11965 	/*
11966 	 * Grab time when the cmd completed.
11967 	 * This is used for determining if the system has been
11968 	 * idle long enough to make it idle to the PM framework.
11969 	 * This is for lowering the overhead, and therefore improving
11970 	 * performance per I/O operation.
11971 	 */
11972 	un->un_pm_idle_time = ddi_get_time();
11973 
11974 	un->un_ncmds_in_driver--;
11975 	ASSERT(un->un_ncmds_in_driver >= 0);
11976 	SD_INFO(SD_LOG_IO, un, "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
11977 	    un->un_ncmds_in_driver);
11978 
11979 	mutex_exit(SD_MUTEX(un));
11980 
11981 	ddi_xbuf_done(bp, un->un_xbuf_attr);	/* xbuf is gone after this */
11982 	biodone(bp);				/* bp is gone after this */
11983 
11984 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
11985 }
11986 
11987 
11988 /*
11989  *    Function: sd_uscsi_iodone
11990  *
11991  * Description: Frees the sd_xbuf & returns the buf to its originator.
11992  *
11993  *     Context: May be called from interrupt context.
11994  */
11995 /* ARGSUSED */
11996 static void
11997 sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
11998 {
11999 	struct sd_xbuf *xp;
12000 
12001 	ASSERT(un != NULL);
12002 	ASSERT(bp != NULL);
12003 
12004 	xp = SD_GET_XBUF(bp);
12005 	ASSERT(xp != NULL);
12006 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12007 
12008 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
12009 
12010 	bp->b_private = xp->xb_private;
12011 
12012 	mutex_enter(SD_MUTEX(un));
12013 
12014 	/*
12015 	 * Grab time when the cmd completed.
12016 	 * This is used for determining if the system has been
12017 	 * idle long enough to make it idle to the PM framework.
12018 	 * This is for lowering the overhead, and therefore improving
12019 	 * performance per I/O operation.
12020 	 */
12021 	un->un_pm_idle_time = ddi_get_time();
12022 
12023 	un->un_ncmds_in_driver--;
12024 	ASSERT(un->un_ncmds_in_driver >= 0);
12025 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
12026 	    un->un_ncmds_in_driver);
12027 
12028 	mutex_exit(SD_MUTEX(un));
12029 
12030 	kmem_free(xp, sizeof (struct sd_xbuf));
12031 	biodone(bp);
12032 
12033 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
12034 }
12035 
12036 
12037 /*
12038  *    Function: sd_mapblockaddr_iostart
12039  *
12040  * Description: Verify request lies withing the partition limits for
12041  *		the indicated minor device.  Issue "overrun" buf if
12042  *		request would exceed partition range.  Converts
12043  *		partition-relative block address to absolute.
12044  *
12045  *     Context: Can sleep
12046  *
12047  *      Issues: This follows what the old code did, in terms of accessing
12048  *		some of the partition info in the unit struct without holding
12049  *		the mutext.  This is a general issue, if the partition info
12050  *		can be altered while IO is in progress... as soon as we send
12051  *		a buf, its partitioning can be invalid before it gets to the
12052  *		device.  Probably the right fix is to move partitioning out
12053  *		of the driver entirely.
12054  */
12055 
12056 static void
12057 sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
12058 {
12059 	daddr_t	nblocks;	/* #blocks in the given partition */
12060 	daddr_t	blocknum;	/* Block number specified by the buf */
12061 	size_t	requested_nblocks;
12062 	size_t	available_nblocks;
12063 	int	partition;
12064 	diskaddr_t	partition_offset;
12065 	struct sd_xbuf *xp;
12066 
12067 
12068 	ASSERT(un != NULL);
12069 	ASSERT(bp != NULL);
12070 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12071 
12072 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12073 	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
12074 
12075 	xp = SD_GET_XBUF(bp);
12076 	ASSERT(xp != NULL);
12077 
12078 	/*
12079 	 * If the geometry is not indicated as valid, attempt to access
12080 	 * the unit & verify the geometry/label. This can be the case for
12081 	 * removable-media devices, of if the device was opened in
12082 	 * NDELAY/NONBLOCK mode.
12083 	 */
12084 	if ((un->un_f_geometry_is_valid != TRUE) &&
12085 	    (sd_ready_and_valid(un) != SD_READY_VALID)) {
12086 		/*
12087 		 * For removable devices it is possible to start an I/O
12088 		 * without a media by opening the device in nodelay mode.
12089 		 * Also for writable CDs there can be many scenarios where
12090 		 * there is no geometry yet but volume manager is trying to
12091 		 * issue a read() just because it can see TOC on the CD. So
12092 		 * do not print a message for removables.
12093 		 */
12094 		if (!ISREMOVABLE(un)) {
12095 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12096 			    "i/o to invalid geometry\n");
12097 		}
12098 		bioerror(bp, EIO);
12099 		bp->b_resid = bp->b_bcount;
12100 		SD_BEGIN_IODONE(index, un, bp);
12101 		return;
12102 	}
12103 
12104 	partition = SDPART(bp->b_edev);
12105 
12106 	/* #blocks in partition */
12107 	nblocks = un->un_map[partition].dkl_nblk;    /* #blocks in partition */
12108 
12109 	/* Use of a local variable potentially improves performance slightly */
12110 	partition_offset = un->un_offset[partition];
12111 
12112 	/*
12113 	 * blocknum is the starting block number of the request. At this
12114 	 * point it is still relative to the start of the minor device.
12115 	 */
12116 	blocknum = xp->xb_blkno;
12117 
12118 	/*
12119 	 * Legacy: If the starting block number is one past the last block
12120 	 * in the partition, do not set B_ERROR in the buf.
12121 	 */
12122 	if (blocknum == nblocks)  {
12123 		goto error_exit;
12124 	}
12125 
12126 	/*
12127 	 * Confirm that the first block of the request lies within the
12128 	 * partition limits. Also the requested number of bytes must be
12129 	 * a multiple of the system block size.
12130 	 */
12131 	if ((blocknum < 0) || (blocknum >= nblocks) ||
12132 	    ((bp->b_bcount & (un->un_sys_blocksize - 1)) != 0)) {
12133 		bp->b_flags |= B_ERROR;
12134 		goto error_exit;
12135 	}
12136 
12137 	/*
12138 	 * If the requsted # blocks exceeds the available # blocks, that
12139 	 * is an overrun of the partition.
12140 	 */
12141 	requested_nblocks = SD_BYTES2SYSBLOCKS(un, bp->b_bcount);
12142 	available_nblocks = (size_t)(nblocks - blocknum);
12143 	ASSERT(nblocks >= blocknum);
12144 
12145 	if (requested_nblocks > available_nblocks) {
12146 		/*
12147 		 * Allocate an "overrun" buf to allow the request to proceed
12148 		 * for the amount of space available in the partition. The
12149 		 * amount not transferred will be added into the b_resid
12150 		 * when the operation is complete. The overrun buf
12151 		 * replaces the original buf here, and the original buf
12152 		 * is saved inside the overrun buf, for later use.
12153 		 */
12154 		size_t resid = SD_SYSBLOCKS2BYTES(un,
12155 		    (offset_t)(requested_nblocks - available_nblocks));
12156 		size_t count = bp->b_bcount - resid;
12157 		/*
12158 		 * Note: count is an unsigned entity thus it'll NEVER
12159 		 * be less than 0 so ASSERT the original values are
12160 		 * correct.
12161 		 */
12162 		ASSERT(bp->b_bcount >= resid);
12163 
12164 		bp = sd_bioclone_alloc(bp, count, blocknum,
12165 			(int (*)(struct buf *)) sd_mapblockaddr_iodone);
12166 		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
12167 		ASSERT(xp != NULL);
12168 	}
12169 
12170 	/* At this point there should be no residual for this buf. */
12171 	ASSERT(bp->b_resid == 0);
12172 
12173 	/* Convert the block number to an absolute address. */
12174 	xp->xb_blkno += partition_offset;
12175 
12176 	SD_NEXT_IOSTART(index, un, bp);
12177 
12178 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12179 	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
12180 
12181 	return;
12182 
12183 error_exit:
12184 	bp->b_resid = bp->b_bcount;
12185 	SD_BEGIN_IODONE(index, un, bp);
12186 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12187 	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
12188 }
12189 
12190 
12191 /*
12192  *    Function: sd_mapblockaddr_iodone
12193  *
12194  * Description: Completion-side processing for partition management.
12195  *
12196  *     Context: May be called under interrupt context
12197  */
12198 
12199 static void
12200 sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
12201 {
12202 	/* int	partition; */	/* Not used, see below. */
12203 	ASSERT(un != NULL);
12204 	ASSERT(bp != NULL);
12205 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12206 
12207 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12208 	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
12209 
12210 	if (bp->b_iodone == (int (*)(struct buf *)) sd_mapblockaddr_iodone) {
12211 		/*
12212 		 * We have an "overrun" buf to deal with...
12213 		 */
12214 		struct sd_xbuf	*xp;
12215 		struct buf	*obp;	/* ptr to the original buf */
12216 
12217 		xp = SD_GET_XBUF(bp);
12218 		ASSERT(xp != NULL);
12219 
12220 		/* Retrieve the pointer to the original buf */
12221 		obp = (struct buf *)xp->xb_private;
12222 		ASSERT(obp != NULL);
12223 
12224 		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
12225 		bioerror(obp, bp->b_error);
12226 
12227 		sd_bioclone_free(bp);
12228 
12229 		/*
12230 		 * Get back the original buf.
12231 		 * Note that since the restoration of xb_blkno below
12232 		 * was removed, the sd_xbuf is not needed.
12233 		 */
12234 		bp = obp;
12235 		/*
12236 		 * xp = SD_GET_XBUF(bp);
12237 		 * ASSERT(xp != NULL);
12238 		 */
12239 	}
12240 
12241 	/*
12242 	 * Convert sd->xb_blkno back to a minor-device relative value.
12243 	 * Note: this has been commented out, as it is not needed in the
12244 	 * current implementation of the driver (ie, since this function
12245 	 * is at the top of the layering chains, so the info will be
12246 	 * discarded) and it is in the "hot" IO path.
12247 	 *
12248 	 * partition = getminor(bp->b_edev) & SDPART_MASK;
12249 	 * xp->xb_blkno -= un->un_offset[partition];
12250 	 */
12251 
12252 	SD_NEXT_IODONE(index, un, bp);
12253 
12254 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12255 	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
12256 }
12257 
12258 
12259 /*
12260  *    Function: sd_mapblocksize_iostart
12261  *
12262  * Description: Convert between system block size (un->un_sys_blocksize)
12263  *		and target block size (un->un_tgt_blocksize).
12264  *
12265  *     Context: Can sleep to allocate resources.
12266  *
12267  * Assumptions: A higher layer has already performed any partition validation,
12268  *		and converted the xp->xb_blkno to an absolute value relative
12269  *		to the start of the device.
12270  *
12271  *		It is also assumed that the higher layer has implemented
12272  *		an "overrun" mechanism for the case where the request would
12273  *		read/write beyond the end of a partition.  In this case we
12274  *		assume (and ASSERT) that bp->b_resid == 0.
12275  *
12276  *		Note: The implementation for this routine assumes the target
12277  *		block size remains constant between allocation and transport.
12278  */
12279 
12280 static void
12281 sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
12282 {
12283 	struct sd_mapblocksize_info	*bsp;
12284 	struct sd_xbuf			*xp;
12285 	offset_t first_byte;
12286 	daddr_t	start_block, end_block;
12287 	daddr_t	request_bytes;
12288 	ushort_t is_aligned = FALSE;
12289 
12290 	ASSERT(un != NULL);
12291 	ASSERT(bp != NULL);
12292 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12293 	ASSERT(bp->b_resid == 0);
12294 
12295 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12296 	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
12297 
12298 	/*
12299 	 * For a non-writable CD, a write request is an error
12300 	 */
12301 	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
12302 	    (un->un_f_mmc_writable_media == FALSE)) {
12303 		bioerror(bp, EIO);
12304 		bp->b_resid = bp->b_bcount;
12305 		SD_BEGIN_IODONE(index, un, bp);
12306 		return;
12307 	}
12308 
12309 	/*
12310 	 * We do not need a shadow buf if the device is using
12311 	 * un->un_sys_blocksize as its block size or if bcount == 0.
12312 	 * In this case there is no layer-private data block allocated.
12313 	 */
12314 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12315 	    (bp->b_bcount == 0)) {
12316 		goto done;
12317 	}
12318 
12319 #if defined(__i386) || defined(__amd64)
12320 	/* We do not support non-block-aligned transfers for ROD devices */
12321 	ASSERT(!ISROD(un));
12322 #endif
12323 
12324 	xp = SD_GET_XBUF(bp);
12325 	ASSERT(xp != NULL);
12326 
12327 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12328 	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
12329 	    un->un_tgt_blocksize, un->un_sys_blocksize);
12330 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12331 	    "request start block:0x%x\n", xp->xb_blkno);
12332 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12333 	    "request len:0x%x\n", bp->b_bcount);
12334 
12335 	/*
12336 	 * Allocate the layer-private data area for the mapblocksize layer.
12337 	 * Layers are allowed to use the xp_private member of the sd_xbuf
12338 	 * struct to store the pointer to their layer-private data block, but
12339 	 * each layer also has the responsibility of restoring the prior
12340 	 * contents of xb_private before returning the buf/xbuf to the
12341 	 * higher layer that sent it.
12342 	 *
12343 	 * Here we save the prior contents of xp->xb_private into the
12344 	 * bsp->mbs_oprivate field of our layer-private data area. This value
12345 	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
12346 	 * the layer-private area and returning the buf/xbuf to the layer
12347 	 * that sent it.
12348 	 *
12349 	 * Note that here we use kmem_zalloc for the allocation as there are
12350 	 * parts of the mapblocksize code that expect certain fields to be
12351 	 * zero unless explicitly set to a required value.
12352 	 */
12353 	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12354 	bsp->mbs_oprivate = xp->xb_private;
12355 	xp->xb_private = bsp;
12356 
12357 	/*
12358 	 * This treats the data on the disk (target) as an array of bytes.
12359 	 * first_byte is the byte offset, from the beginning of the device,
12360 	 * to the location of the request. This is converted from a
12361 	 * un->un_sys_blocksize block address to a byte offset, and then back
12362 	 * to a block address based upon a un->un_tgt_blocksize block size.
12363 	 *
12364 	 * xp->xb_blkno should be absolute upon entry into this function,
12365 	 * but, but it is based upon partitions that use the "system"
12366 	 * block size. It must be adjusted to reflect the block size of
12367 	 * the target.
12368 	 *
12369 	 * Note that end_block is actually the block that follows the last
12370 	 * block of the request, but that's what is needed for the computation.
12371 	 */
12372 	first_byte  = SD_SYSBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12373 	start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
12374 	end_block   = (first_byte + bp->b_bcount + un->un_tgt_blocksize - 1) /
12375 	    un->un_tgt_blocksize;
12376 
12377 	/* request_bytes is rounded up to a multiple of the target block size */
12378 	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
12379 
12380 	/*
12381 	 * See if the starting address of the request and the request
12382 	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
12383 	 * then we do not need to allocate a shadow buf to handle the request.
12384 	 */
12385 	if (((first_byte   % un->un_tgt_blocksize) == 0) &&
12386 	    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
12387 		is_aligned = TRUE;
12388 	}
12389 
12390 	if ((bp->b_flags & B_READ) == 0) {
12391 		/*
12392 		 * Lock the range for a write operation. An aligned request is
12393 		 * considered a simple write; otherwise the request must be a
12394 		 * read-modify-write.
12395 		 */
12396 		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
12397 		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
12398 	}
12399 
12400 	/*
12401 	 * Alloc a shadow buf if the request is not aligned. Also, this is
12402 	 * where the READ command is generated for a read-modify-write. (The
12403 	 * write phase is deferred until after the read completes.)
12404 	 */
12405 	if (is_aligned == FALSE) {
12406 
12407 		struct sd_mapblocksize_info	*shadow_bsp;
12408 		struct sd_xbuf	*shadow_xp;
12409 		struct buf	*shadow_bp;
12410 
12411 		/*
12412 		 * Allocate the shadow buf and it associated xbuf. Note that
12413 		 * after this call the xb_blkno value in both the original
12414 		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
12415 		 * same: absolute relative to the start of the device, and
12416 		 * adjusted for the target block size. The b_blkno in the
12417 		 * shadow buf will also be set to this value. We should never
12418 		 * change b_blkno in the original bp however.
12419 		 *
12420 		 * Note also that the shadow buf will always need to be a
12421 		 * READ command, regardless of whether the incoming command
12422 		 * is a READ or a WRITE.
12423 		 */
12424 		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
12425 		    xp->xb_blkno,
12426 		    (int (*)(struct buf *)) sd_mapblocksize_iodone);
12427 
12428 		shadow_xp = SD_GET_XBUF(shadow_bp);
12429 
12430 		/*
12431 		 * Allocate the layer-private data for the shadow buf.
12432 		 * (No need to preserve xb_private in the shadow xbuf.)
12433 		 */
12434 		shadow_xp->xb_private = shadow_bsp =
12435 		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12436 
12437 		/*
12438 		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
12439 		 * to figure out where the start of the user data is (based upon
12440 		 * the system block size) in the data returned by the READ
12441 		 * command (which will be based upon the target blocksize). Note
12442 		 * that this is only really used if the request is unaligned.
12443 		 */
12444 		bsp->mbs_copy_offset = (ssize_t)(first_byte -
12445 		    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
12446 		ASSERT((bsp->mbs_copy_offset >= 0) &&
12447 		    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
12448 
12449 		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
12450 
12451 		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
12452 
12453 		/* Transfer the wmap (if any) to the shadow buf */
12454 		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
12455 		bsp->mbs_wmp = NULL;
12456 
12457 		/*
12458 		 * The shadow buf goes on from here in place of the
12459 		 * original buf.
12460 		 */
12461 		shadow_bsp->mbs_orig_bp = bp;
12462 		bp = shadow_bp;
12463 	}
12464 
12465 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12466 	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
12467 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12468 	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
12469 	    request_bytes);
12470 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12471 	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
12472 
12473 done:
12474 	SD_NEXT_IOSTART(index, un, bp);
12475 
12476 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12477 	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
12478 }
12479 
12480 
12481 /*
12482  *    Function: sd_mapblocksize_iodone
12483  *
12484  * Description: Completion side processing for block-size mapping.
12485  *
12486  *     Context: May be called under interrupt context
12487  */
12488 
12489 static void
12490 sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
12491 {
12492 	struct sd_mapblocksize_info	*bsp;
12493 	struct sd_xbuf	*xp;
12494 	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
12495 	struct buf	*orig_bp;	/* ptr to the original buf */
12496 	offset_t	shadow_end;
12497 	offset_t	request_end;
12498 	offset_t	shadow_start;
12499 	ssize_t		copy_offset;
12500 	size_t		copy_length;
12501 	size_t		shortfall;
12502 	uint_t		is_write;	/* TRUE if this bp is a WRITE */
12503 	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
12504 
12505 	ASSERT(un != NULL);
12506 	ASSERT(bp != NULL);
12507 
12508 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12509 	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
12510 
12511 	/*
12512 	 * There is no shadow buf or layer-private data if the target is
12513 	 * using un->un_sys_blocksize as its block size or if bcount == 0.
12514 	 */
12515 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12516 	    (bp->b_bcount == 0)) {
12517 		goto exit;
12518 	}
12519 
12520 	xp = SD_GET_XBUF(bp);
12521 	ASSERT(xp != NULL);
12522 
12523 	/* Retrieve the pointer to the layer-private data area from the xbuf. */
12524 	bsp = xp->xb_private;
12525 
12526 	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
12527 	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
12528 
12529 	if (is_write) {
12530 		/*
12531 		 * For a WRITE request we must free up the block range that
12532 		 * we have locked up.  This holds regardless of whether this is
12533 		 * an aligned write request or a read-modify-write request.
12534 		 */
12535 		sd_range_unlock(un, bsp->mbs_wmp);
12536 		bsp->mbs_wmp = NULL;
12537 	}
12538 
12539 	if ((bp->b_iodone != (int(*)(struct buf *))sd_mapblocksize_iodone)) {
12540 		/*
12541 		 * An aligned read or write command will have no shadow buf;
12542 		 * there is not much else to do with it.
12543 		 */
12544 		goto done;
12545 	}
12546 
12547 	orig_bp = bsp->mbs_orig_bp;
12548 	ASSERT(orig_bp != NULL);
12549 	orig_xp = SD_GET_XBUF(orig_bp);
12550 	ASSERT(orig_xp != NULL);
12551 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12552 
12553 	if (!is_write && has_wmap) {
12554 		/*
12555 		 * A READ with a wmap means this is the READ phase of a
12556 		 * read-modify-write. If an error occurred on the READ then
12557 		 * we do not proceed with the WRITE phase or copy any data.
12558 		 * Just release the write maps and return with an error.
12559 		 */
12560 		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
12561 			orig_bp->b_resid = orig_bp->b_bcount;
12562 			bioerror(orig_bp, bp->b_error);
12563 			sd_range_unlock(un, bsp->mbs_wmp);
12564 			goto freebuf_done;
12565 		}
12566 	}
12567 
12568 	/*
12569 	 * Here is where we set up to copy the data from the shadow buf
12570 	 * into the space associated with the original buf.
12571 	 *
12572 	 * To deal with the conversion between block sizes, these
12573 	 * computations treat the data as an array of bytes, with the
12574 	 * first byte (byte 0) corresponding to the first byte in the
12575 	 * first block on the disk.
12576 	 */
12577 
12578 	/*
12579 	 * shadow_start and shadow_len indicate the location and size of
12580 	 * the data returned with the shadow IO request.
12581 	 */
12582 	shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12583 	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
12584 
12585 	/*
12586 	 * copy_offset gives the offset (in bytes) from the start of the first
12587 	 * block of the READ request to the beginning of the data.  We retrieve
12588 	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
12589 	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
12590 	 * data to be copied (in bytes).
12591 	 */
12592 	copy_offset  = bsp->mbs_copy_offset;
12593 	ASSERT((copy_offset >= 0) && (copy_offset < un->un_tgt_blocksize));
12594 	copy_length  = orig_bp->b_bcount;
12595 	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
12596 
12597 	/*
12598 	 * Set up the resid and error fields of orig_bp as appropriate.
12599 	 */
12600 	if (shadow_end >= request_end) {
12601 		/* We got all the requested data; set resid to zero */
12602 		orig_bp->b_resid = 0;
12603 	} else {
12604 		/*
12605 		 * We failed to get enough data to fully satisfy the original
12606 		 * request. Just copy back whatever data we got and set
12607 		 * up the residual and error code as required.
12608 		 *
12609 		 * 'shortfall' is the amount by which the data received with the
12610 		 * shadow buf has "fallen short" of the requested amount.
12611 		 */
12612 		shortfall = (size_t)(request_end - shadow_end);
12613 
12614 		if (shortfall > orig_bp->b_bcount) {
12615 			/*
12616 			 * We did not get enough data to even partially
12617 			 * fulfill the original request.  The residual is
12618 			 * equal to the amount requested.
12619 			 */
12620 			orig_bp->b_resid = orig_bp->b_bcount;
12621 		} else {
12622 			/*
12623 			 * We did not get all the data that we requested
12624 			 * from the device, but we will try to return what
12625 			 * portion we did get.
12626 			 */
12627 			orig_bp->b_resid = shortfall;
12628 		}
12629 		ASSERT(copy_length >= orig_bp->b_resid);
12630 		copy_length  -= orig_bp->b_resid;
12631 	}
12632 
12633 	/* Propagate the error code from the shadow buf to the original buf */
12634 	bioerror(orig_bp, bp->b_error);
12635 
12636 	if (is_write) {
12637 		goto freebuf_done;	/* No data copying for a WRITE */
12638 	}
12639 
12640 	if (has_wmap) {
12641 		/*
12642 		 * This is a READ command from the READ phase of a
12643 		 * read-modify-write request. We have to copy the data given
12644 		 * by the user OVER the data returned by the READ command,
12645 		 * then convert the command from a READ to a WRITE and send
12646 		 * it back to the target.
12647 		 */
12648 		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
12649 		    copy_length);
12650 
12651 		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
12652 
12653 		/*
12654 		 * Dispatch the WRITE command to the taskq thread, which
12655 		 * will in turn send the command to the target. When the
12656 		 * WRITE command completes, we (sd_mapblocksize_iodone())
12657 		 * will get called again as part of the iodone chain
12658 		 * processing for it. Note that we will still be dealing
12659 		 * with the shadow buf at that point.
12660 		 */
12661 		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
12662 		    KM_NOSLEEP) != 0) {
12663 			/*
12664 			 * Dispatch was successful so we are done. Return
12665 			 * without going any higher up the iodone chain. Do
12666 			 * not free up any layer-private data until after the
12667 			 * WRITE completes.
12668 			 */
12669 			return;
12670 		}
12671 
12672 		/*
12673 		 * Dispatch of the WRITE command failed; set up the error
12674 		 * condition and send this IO back up the iodone chain.
12675 		 */
12676 		bioerror(orig_bp, EIO);
12677 		orig_bp->b_resid = orig_bp->b_bcount;
12678 
12679 	} else {
12680 		/*
12681 		 * This is a regular READ request (ie, not a RMW). Copy the
12682 		 * data from the shadow buf into the original buf. The
12683 		 * copy_offset compensates for any "misalignment" between the
12684 		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
12685 		 * original buf (with its un->un_sys_blocksize blocks).
12686 		 */
12687 		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
12688 		    copy_length);
12689 	}
12690 
12691 freebuf_done:
12692 
12693 	/*
12694 	 * At this point we still have both the shadow buf AND the original
12695 	 * buf to deal with, as well as the layer-private data area in each.
12696 	 * Local variables are as follows:
12697 	 *
12698 	 * bp -- points to shadow buf
12699 	 * xp -- points to xbuf of shadow buf
12700 	 * bsp -- points to layer-private data area of shadow buf
12701 	 * orig_bp -- points to original buf
12702 	 *
12703 	 * First free the shadow buf and its associated xbuf, then free the
12704 	 * layer-private data area from the shadow buf. There is no need to
12705 	 * restore xb_private in the shadow xbuf.
12706 	 */
12707 	sd_shadow_buf_free(bp);
12708 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12709 
12710 	/*
12711 	 * Now update the local variables to point to the original buf, xbuf,
12712 	 * and layer-private area.
12713 	 */
12714 	bp = orig_bp;
12715 	xp = SD_GET_XBUF(bp);
12716 	ASSERT(xp != NULL);
12717 	ASSERT(xp == orig_xp);
12718 	bsp = xp->xb_private;
12719 	ASSERT(bsp != NULL);
12720 
12721 done:
12722 	/*
12723 	 * Restore xb_private to whatever it was set to by the next higher
12724 	 * layer in the chain, then free the layer-private data area.
12725 	 */
12726 	xp->xb_private = bsp->mbs_oprivate;
12727 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12728 
12729 exit:
12730 	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
12731 	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
12732 
12733 	SD_NEXT_IODONE(index, un, bp);
12734 }
12735 
12736 
12737 /*
12738  *    Function: sd_checksum_iostart
12739  *
12740  * Description: A stub function for a layer that's currently not used.
12741  *		For now just a placeholder.
12742  *
12743  *     Context: Kernel thread context
12744  */
12745 
12746 static void
12747 sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
12748 {
12749 	ASSERT(un != NULL);
12750 	ASSERT(bp != NULL);
12751 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12752 	SD_NEXT_IOSTART(index, un, bp);
12753 }
12754 
12755 
12756 /*
12757  *    Function: sd_checksum_iodone
12758  *
12759  * Description: A stub function for a layer that's currently not used.
12760  *		For now just a placeholder.
12761  *
12762  *     Context: May be called under interrupt context
12763  */
12764 
12765 static void
12766 sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
12767 {
12768 	ASSERT(un != NULL);
12769 	ASSERT(bp != NULL);
12770 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12771 	SD_NEXT_IODONE(index, un, bp);
12772 }
12773 
12774 
12775 /*
12776  *    Function: sd_checksum_uscsi_iostart
12777  *
12778  * Description: A stub function for a layer that's currently not used.
12779  *		For now just a placeholder.
12780  *
12781  *     Context: Kernel thread context
12782  */
12783 
12784 static void
12785 sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
12786 {
12787 	ASSERT(un != NULL);
12788 	ASSERT(bp != NULL);
12789 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12790 	SD_NEXT_IOSTART(index, un, bp);
12791 }
12792 
12793 
12794 /*
12795  *    Function: sd_checksum_uscsi_iodone
12796  *
12797  * Description: A stub function for a layer that's currently not used.
12798  *		For now just a placeholder.
12799  *
12800  *     Context: May be called under interrupt context
12801  */
12802 
12803 static void
12804 sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
12805 {
12806 	ASSERT(un != NULL);
12807 	ASSERT(bp != NULL);
12808 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12809 	SD_NEXT_IODONE(index, un, bp);
12810 }
12811 
12812 
12813 /*
12814  *    Function: sd_pm_iostart
12815  *
12816  * Description: iostart-side routine for Power mangement.
12817  *
12818  *     Context: Kernel thread context
12819  */
12820 
12821 static void
12822 sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
12823 {
12824 	ASSERT(un != NULL);
12825 	ASSERT(bp != NULL);
12826 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12827 	ASSERT(!mutex_owned(&un->un_pm_mutex));
12828 
12829 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
12830 
12831 	if (sd_pm_entry(un) != DDI_SUCCESS) {
12832 		/*
12833 		 * Set up to return the failed buf back up the 'iodone'
12834 		 * side of the calling chain.
12835 		 */
12836 		bioerror(bp, EIO);
12837 		bp->b_resid = bp->b_bcount;
12838 
12839 		SD_BEGIN_IODONE(index, un, bp);
12840 
12841 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
12842 		return;
12843 	}
12844 
12845 	SD_NEXT_IOSTART(index, un, bp);
12846 
12847 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
12848 }
12849 
12850 
12851 /*
12852  *    Function: sd_pm_iodone
12853  *
12854  * Description: iodone-side routine for power mangement.
12855  *
12856  *     Context: may be called from interrupt context
12857  */
12858 
12859 static void
12860 sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
12861 {
12862 	ASSERT(un != NULL);
12863 	ASSERT(bp != NULL);
12864 	ASSERT(!mutex_owned(&un->un_pm_mutex));
12865 
12866 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
12867 
12868 	/*
12869 	 * After attach the following flag is only read, so don't
12870 	 * take the penalty of acquiring a mutex for it.
12871 	 */
12872 	if (un->un_f_pm_is_enabled == TRUE) {
12873 		sd_pm_exit(un);
12874 	}
12875 
12876 	SD_NEXT_IODONE(index, un, bp);
12877 
12878 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
12879 }
12880 
12881 
12882 /*
12883  *    Function: sd_core_iostart
12884  *
12885  * Description: Primary driver function for enqueuing buf(9S) structs from
12886  *		the system and initiating IO to the target device
12887  *
12888  *     Context: Kernel thread context. Can sleep.
12889  *
12890  * Assumptions:  - The given xp->xb_blkno is absolute
12891  *		   (ie, relative to the start of the device).
12892  *		 - The IO is to be done using the native blocksize of
12893  *		   the device, as specified in un->un_tgt_blocksize.
12894  */
12895 /* ARGSUSED */
12896 static void
12897 sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
12898 {
12899 	struct sd_xbuf *xp;
12900 
12901 	ASSERT(un != NULL);
12902 	ASSERT(bp != NULL);
12903 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12904 	ASSERT(bp->b_resid == 0);
12905 
12906 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
12907 
12908 	xp = SD_GET_XBUF(bp);
12909 	ASSERT(xp != NULL);
12910 
12911 	mutex_enter(SD_MUTEX(un));
12912 
12913 	/*
12914 	 * If we are currently in the failfast state, fail any new IO
12915 	 * that has B_FAILFAST set, then return.
12916 	 */
12917 	if ((bp->b_flags & B_FAILFAST) &&
12918 	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
12919 		mutex_exit(SD_MUTEX(un));
12920 		bioerror(bp, EIO);
12921 		bp->b_resid = bp->b_bcount;
12922 		SD_BEGIN_IODONE(index, un, bp);
12923 		return;
12924 	}
12925 
12926 	if (SD_IS_DIRECT_PRIORITY(xp)) {
12927 		/*
12928 		 * Priority command -- transport it immediately.
12929 		 *
12930 		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
12931 		 * because all direct priority commands should be associated
12932 		 * with error recovery actions which we don't want to retry.
12933 		 */
12934 		sd_start_cmds(un, bp);
12935 	} else {
12936 		/*
12937 		 * Normal command -- add it to the wait queue, then start
12938 		 * transporting commands from the wait queue.
12939 		 */
12940 		sd_add_buf_to_waitq(un, bp);
12941 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
12942 		sd_start_cmds(un, NULL);
12943 	}
12944 
12945 	mutex_exit(SD_MUTEX(un));
12946 
12947 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
12948 }
12949 
12950 
12951 /*
12952  *    Function: sd_init_cdb_limits
12953  *
12954  * Description: This is to handle scsi_pkt initialization differences
12955  *		between the driver platforms.
12956  *
12957  *		Legacy behaviors:
12958  *
12959  *		If the block number or the sector count exceeds the
12960  *		capabilities of a Group 0 command, shift over to a
12961  *		Group 1 command. We don't blindly use Group 1
12962  *		commands because a) some drives (CDC Wren IVs) get a
12963  *		bit confused, and b) there is probably a fair amount
12964  *		of speed difference for a target to receive and decode
12965  *		a 10 byte command instead of a 6 byte command.
12966  *
12967  *		The xfer time difference of 6 vs 10 byte CDBs is
12968  *		still significant so this code is still worthwhile.
12969  *		10 byte CDBs are very inefficient with the fas HBA driver
12970  *		and older disks. Each CDB byte took 1 usec with some
12971  *		popular disks.
12972  *
12973  *     Context: Must be called at attach time
12974  */
12975 
12976 static void
12977 sd_init_cdb_limits(struct sd_lun *un)
12978 {
12979 	/*
12980 	 * Use CDB_GROUP1 commands for most devices except for
12981 	 * parallel SCSI fixed drives in which case we get better
12982 	 * performance using CDB_GROUP0 commands (where applicable).
12983 	 */
12984 	un->un_mincdb = SD_CDB_GROUP1;
12985 #if !defined(__fibre)
12986 	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
12987 	    !ISREMOVABLE(un)) {
12988 		un->un_mincdb = SD_CDB_GROUP0;
12989 	}
12990 #endif
12991 
12992 	/*
12993 	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
12994 	 * commands for fixed disks unless we are building for a 32 bit
12995 	 * kernel.
12996 	 */
12997 #ifdef _LP64
12998 	un->un_maxcdb = (ISREMOVABLE(un)) ? SD_CDB_GROUP5 : SD_CDB_GROUP4;
12999 #else
13000 	un->un_maxcdb = (ISREMOVABLE(un)) ? SD_CDB_GROUP5 : SD_CDB_GROUP1;
13001 #endif
13002 
13003 	/*
13004 	 * x86 systems require the PKT_DMA_PARTIAL flag
13005 	 */
13006 #if defined(__x86)
13007 	un->un_pkt_flags = PKT_DMA_PARTIAL;
13008 #else
13009 	un->un_pkt_flags = 0;
13010 #endif
13011 
13012 	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
13013 	    ? sizeof (struct scsi_arq_status) : 1);
13014 	un->un_cmd_timeout = (ushort_t)sd_io_time;
13015 	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
13016 }
13017 
13018 
13019 /*
13020  *    Function: sd_initpkt_for_buf
13021  *
13022  * Description: Allocate and initialize for transport a scsi_pkt struct,
13023  *		based upon the info specified in the given buf struct.
13024  *
13025  *		Assumes the xb_blkno in the request is absolute (ie,
13026  *		relative to the start of the device (NOT partition!).
13027  *		Also assumes that the request is using the native block
13028  *		size of the device (as returned by the READ CAPACITY
13029  *		command).
13030  *
13031  * Return Code: SD_PKT_ALLOC_SUCCESS
13032  *		SD_PKT_ALLOC_FAILURE
13033  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13034  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13035  *
13036  *     Context: Kernel thread and may be called from software interrupt context
13037  *		as part of a sdrunout callback. This function may not block or
13038  *		call routines that block
13039  */
13040 
13041 static int
13042 sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
13043 {
13044 	struct sd_xbuf	*xp;
13045 	struct scsi_pkt *pktp = NULL;
13046 	struct sd_lun	*un;
13047 	size_t		blockcount;
13048 	daddr_t		startblock;
13049 	int		rval;
13050 	int		cmd_flags;
13051 
13052 	ASSERT(bp != NULL);
13053 	ASSERT(pktpp != NULL);
13054 	xp = SD_GET_XBUF(bp);
13055 	ASSERT(xp != NULL);
13056 	un = SD_GET_UN(bp);
13057 	ASSERT(un != NULL);
13058 	ASSERT(mutex_owned(SD_MUTEX(un)));
13059 	ASSERT(bp->b_resid == 0);
13060 
13061 	SD_TRACE(SD_LOG_IO_CORE, un,
13062 	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
13063 
13064 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13065 	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
13066 		/*
13067 		 * Already have a scsi_pkt -- just need DMA resources.
13068 		 * We must recompute the CDB in case the mapping returns
13069 		 * a nonzero pkt_resid.
13070 		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
13071 		 * that is being retried, the unmap/remap of the DMA resouces
13072 		 * will result in the entire transfer starting over again
13073 		 * from the very first block.
13074 		 */
13075 		ASSERT(xp->xb_pktp != NULL);
13076 		pktp = xp->xb_pktp;
13077 	} else {
13078 		pktp = NULL;
13079 	}
13080 #endif /* __i386 || __amd64 */
13081 
13082 	startblock = xp->xb_blkno;	/* Absolute block num. */
13083 	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
13084 
13085 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13086 
13087 	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
13088 
13089 #else
13090 
13091 	cmd_flags = un->un_pkt_flags | xp->xb_pkt_flags;
13092 
13093 #endif
13094 
13095 	/*
13096 	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
13097 	 * call scsi_init_pkt, and build the CDB.
13098 	 */
13099 	rval = sd_setup_rw_pkt(un, &pktp, bp,
13100 	    cmd_flags, sdrunout, (caddr_t)un,
13101 	    startblock, blockcount);
13102 
13103 	if (rval == 0) {
13104 		/*
13105 		 * Success.
13106 		 *
13107 		 * If partial DMA is being used and required for this transfer.
13108 		 * set it up here.
13109 		 */
13110 		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
13111 		    (pktp->pkt_resid != 0)) {
13112 
13113 			/*
13114 			 * Save the CDB length and pkt_resid for the
13115 			 * next xfer
13116 			 */
13117 			xp->xb_dma_resid = pktp->pkt_resid;
13118 
13119 			/* rezero resid */
13120 			pktp->pkt_resid = 0;
13121 
13122 		} else {
13123 			xp->xb_dma_resid = 0;
13124 		}
13125 
13126 		pktp->pkt_flags = un->un_tagflags;
13127 		pktp->pkt_time  = un->un_cmd_timeout;
13128 		pktp->pkt_comp  = sdintr;
13129 
13130 		pktp->pkt_private = bp;
13131 		*pktpp = pktp;
13132 
13133 		SD_TRACE(SD_LOG_IO_CORE, un,
13134 		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
13135 
13136 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13137 		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
13138 #endif
13139 
13140 		return (SD_PKT_ALLOC_SUCCESS);
13141 
13142 	}
13143 
13144 	/*
13145 	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
13146 	 * from sd_setup_rw_pkt.
13147 	 */
13148 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
13149 
13150 	if (rval == SD_PKT_ALLOC_FAILURE) {
13151 		*pktpp = NULL;
13152 		/*
13153 		 * Set the driver state to RWAIT to indicate the driver
13154 		 * is waiting on resource allocations. The driver will not
13155 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13156 		 */
13157 		New_state(un, SD_STATE_RWAIT);
13158 
13159 		SD_ERROR(SD_LOG_IO_CORE, un,
13160 		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
13161 
13162 		if ((bp->b_flags & B_ERROR) != 0) {
13163 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13164 		}
13165 		return (SD_PKT_ALLOC_FAILURE);
13166 	} else {
13167 		/*
13168 		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13169 		 *
13170 		 * This should never happen.  Maybe someone messed with the
13171 		 * kernel's minphys?
13172 		 */
13173 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13174 		    "Request rejected: too large for CDB: "
13175 		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
13176 		SD_ERROR(SD_LOG_IO_CORE, un,
13177 		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
13178 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13179 
13180 	}
13181 }
13182 
13183 
13184 /*
13185  *    Function: sd_destroypkt_for_buf
13186  *
13187  * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
13188  *
13189  *     Context: Kernel thread or interrupt context
13190  */
13191 
13192 static void
13193 sd_destroypkt_for_buf(struct buf *bp)
13194 {
13195 	ASSERT(bp != NULL);
13196 	ASSERT(SD_GET_UN(bp) != NULL);
13197 
13198 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13199 	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
13200 
13201 	ASSERT(SD_GET_PKTP(bp) != NULL);
13202 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13203 
13204 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13205 	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
13206 }
13207 
13208 /*
13209  *    Function: sd_setup_rw_pkt
13210  *
13211  * Description: Determines appropriate CDB group for the requested LBA
13212  *		and transfer length, calls scsi_init_pkt, and builds
13213  *		the CDB.  Do not use for partial DMA transfers except
13214  *		for the initial transfer since the CDB size must
13215  *		remain constant.
13216  *
13217  *     Context: Kernel thread and may be called from software interrupt
13218  *		context as part of a sdrunout callback. This function may not
13219  *		block or call routines that block
13220  */
13221 
13222 
13223 int
13224 sd_setup_rw_pkt(struct sd_lun *un,
13225     struct scsi_pkt **pktpp, struct buf *bp, int flags,
13226     int (*callback)(caddr_t), caddr_t callback_arg,
13227     diskaddr_t lba, uint32_t blockcount)
13228 {
13229 	struct scsi_pkt *return_pktp;
13230 	union scsi_cdb *cdbp;
13231 	struct sd_cdbinfo *cp = NULL;
13232 	int i;
13233 
13234 	/*
13235 	 * See which size CDB to use, based upon the request.
13236 	 */
13237 	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
13238 
13239 		/*
13240 		 * Check lba and block count against sd_cdbtab limits.
13241 		 * In the partial DMA case, we have to use the same size
13242 		 * CDB for all the transfers.  Check lba + blockcount
13243 		 * against the max LBA so we know that segment of the
13244 		 * transfer can use the CDB we select.
13245 		 */
13246 		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
13247 		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
13248 
13249 			/*
13250 			 * The command will fit into the CDB type
13251 			 * specified by sd_cdbtab[i].
13252 			 */
13253 			cp = sd_cdbtab + i;
13254 
13255 			/*
13256 			 * Call scsi_init_pkt so we can fill in the
13257 			 * CDB.
13258 			 */
13259 			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
13260 			    bp, cp->sc_grpcode, un->un_status_len, 0,
13261 			    flags, callback, callback_arg);
13262 
13263 			if (return_pktp != NULL) {
13264 
13265 				/*
13266 				 * Return new value of pkt
13267 				 */
13268 				*pktpp = return_pktp;
13269 
13270 				/*
13271 				 * To be safe, zero the CDB insuring there is
13272 				 * no leftover data from a previous command.
13273 				 */
13274 				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
13275 
13276 				/*
13277 				 * Handle partial DMA mapping
13278 				 */
13279 				if (return_pktp->pkt_resid != 0) {
13280 
13281 					/*
13282 					 * Not going to xfer as many blocks as
13283 					 * originally expected
13284 					 */
13285 					blockcount -=
13286 					    SD_BYTES2TGTBLOCKS(un,
13287 						return_pktp->pkt_resid);
13288 				}
13289 
13290 				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
13291 
13292 				/*
13293 				 * Set command byte based on the CDB
13294 				 * type we matched.
13295 				 */
13296 				cdbp->scc_cmd = cp->sc_grpmask |
13297 				    ((bp->b_flags & B_READ) ?
13298 					SCMD_READ : SCMD_WRITE);
13299 
13300 				SD_FILL_SCSI1_LUN(un, return_pktp);
13301 
13302 				/*
13303 				 * Fill in LBA and length
13304 				 */
13305 				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
13306 				    (cp->sc_grpcode == CDB_GROUP4) ||
13307 				    (cp->sc_grpcode == CDB_GROUP0) ||
13308 				    (cp->sc_grpcode == CDB_GROUP5));
13309 
13310 				if (cp->sc_grpcode == CDB_GROUP1) {
13311 					FORMG1ADDR(cdbp, lba);
13312 					FORMG1COUNT(cdbp, blockcount);
13313 					return (0);
13314 				} else if (cp->sc_grpcode == CDB_GROUP4) {
13315 					FORMG4LONGADDR(cdbp, lba);
13316 					FORMG4COUNT(cdbp, blockcount);
13317 					return (0);
13318 				} else if (cp->sc_grpcode == CDB_GROUP0) {
13319 					FORMG0ADDR(cdbp, lba);
13320 					FORMG0COUNT(cdbp, blockcount);
13321 					return (0);
13322 				} else if (cp->sc_grpcode == CDB_GROUP5) {
13323 					FORMG5ADDR(cdbp, lba);
13324 					FORMG5COUNT(cdbp, blockcount);
13325 					return (0);
13326 				}
13327 
13328 				/*
13329 				 * It should be impossible to not match one
13330 				 * of the CDB types above, so we should never
13331 				 * reach this point.  Set the CDB command byte
13332 				 * to test-unit-ready to avoid writing
13333 				 * to somewhere we don't intend.
13334 				 */
13335 				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
13336 				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13337 			} else {
13338 				/*
13339 				 * Couldn't get scsi_pkt
13340 				 */
13341 				return (SD_PKT_ALLOC_FAILURE);
13342 			}
13343 		}
13344 	}
13345 
13346 	/*
13347 	 * None of the available CDB types were suitable.  This really
13348 	 * should never happen:  on a 64 bit system we support
13349 	 * READ16/WRITE16 which will hold an entire 64 bit disk address
13350 	 * and on a 32 bit system we will refuse to bind to a device
13351 	 * larger than 2TB so addresses will never be larger than 32 bits.
13352 	 */
13353 	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13354 }
13355 
13356 #if defined(__i386) || defined(__amd64)
13357 /*
13358  *    Function: sd_setup_next_rw_pkt
13359  *
13360  * Description: Setup packet for partial DMA transfers, except for the
13361  * 		initial transfer.  sd_setup_rw_pkt should be used for
13362  *		the initial transfer.
13363  *
13364  *     Context: Kernel thread and may be called from interrupt context.
13365  */
13366 
13367 int
13368 sd_setup_next_rw_pkt(struct sd_lun *un,
13369     struct scsi_pkt *pktp, struct buf *bp,
13370     diskaddr_t lba, uint32_t blockcount)
13371 {
13372 	uchar_t com;
13373 	union scsi_cdb *cdbp;
13374 	uchar_t cdb_group_id;
13375 
13376 	ASSERT(pktp != NULL);
13377 	ASSERT(pktp->pkt_cdbp != NULL);
13378 
13379 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
13380 	com = cdbp->scc_cmd;
13381 	cdb_group_id = CDB_GROUPID(com);
13382 
13383 	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
13384 	    (cdb_group_id == CDB_GROUPID_1) ||
13385 	    (cdb_group_id == CDB_GROUPID_4) ||
13386 	    (cdb_group_id == CDB_GROUPID_5));
13387 
13388 	/*
13389 	 * Move pkt to the next portion of the xfer.
13390 	 * func is NULL_FUNC so we do not have to release
13391 	 * the disk mutex here.
13392 	 */
13393 	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
13394 	    NULL_FUNC, NULL) == pktp) {
13395 		/* Success.  Handle partial DMA */
13396 		if (pktp->pkt_resid != 0) {
13397 			blockcount -=
13398 			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
13399 		}
13400 
13401 		cdbp->scc_cmd = com;
13402 		SD_FILL_SCSI1_LUN(un, pktp);
13403 		if (cdb_group_id == CDB_GROUPID_1) {
13404 			FORMG1ADDR(cdbp, lba);
13405 			FORMG1COUNT(cdbp, blockcount);
13406 			return (0);
13407 		} else if (cdb_group_id == CDB_GROUPID_4) {
13408 			FORMG4LONGADDR(cdbp, lba);
13409 			FORMG4COUNT(cdbp, blockcount);
13410 			return (0);
13411 		} else if (cdb_group_id == CDB_GROUPID_0) {
13412 			FORMG0ADDR(cdbp, lba);
13413 			FORMG0COUNT(cdbp, blockcount);
13414 			return (0);
13415 		} else if (cdb_group_id == CDB_GROUPID_5) {
13416 			FORMG5ADDR(cdbp, lba);
13417 			FORMG5COUNT(cdbp, blockcount);
13418 			return (0);
13419 		}
13420 
13421 		/* Unreachable */
13422 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13423 	}
13424 
13425 	/*
13426 	 * Error setting up next portion of cmd transfer.
13427 	 * Something is definitely very wrong and this
13428 	 * should not happen.
13429 	 */
13430 	return (SD_PKT_ALLOC_FAILURE);
13431 }
13432 #endif /* defined(__i386) || defined(__amd64) */
13433 
13434 /*
13435  *    Function: sd_initpkt_for_uscsi
13436  *
13437  * Description: Allocate and initialize for transport a scsi_pkt struct,
13438  *		based upon the info specified in the given uscsi_cmd struct.
13439  *
13440  * Return Code: SD_PKT_ALLOC_SUCCESS
13441  *		SD_PKT_ALLOC_FAILURE
13442  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13443  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13444  *
13445  *     Context: Kernel thread and may be called from software interrupt context
13446  *		as part of a sdrunout callback. This function may not block or
13447  *		call routines that block
13448  */
13449 
13450 static int
13451 sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
13452 {
13453 	struct uscsi_cmd *uscmd;
13454 	struct sd_xbuf	*xp;
13455 	struct scsi_pkt	*pktp;
13456 	struct sd_lun	*un;
13457 	uint32_t	flags = 0;
13458 
13459 	ASSERT(bp != NULL);
13460 	ASSERT(pktpp != NULL);
13461 	xp = SD_GET_XBUF(bp);
13462 	ASSERT(xp != NULL);
13463 	un = SD_GET_UN(bp);
13464 	ASSERT(un != NULL);
13465 	ASSERT(mutex_owned(SD_MUTEX(un)));
13466 
13467 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13468 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13469 	ASSERT(uscmd != NULL);
13470 
13471 	SD_TRACE(SD_LOG_IO_CORE, un,
13472 	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
13473 
13474 	/*
13475 	 * Allocate the scsi_pkt for the command.
13476 	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
13477 	 *	 during scsi_init_pkt time and will continue to use the
13478 	 *	 same path as long as the same scsi_pkt is used without
13479 	 *	 intervening scsi_dma_free(). Since uscsi command does
13480 	 *	 not call scsi_dmafree() before retry failed command, it
13481 	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
13482 	 *	 set such that scsi_vhci can use other available path for
13483 	 *	 retry. Besides, ucsci command does not allow DMA breakup,
13484 	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
13485 	 */
13486 	pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
13487 	    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
13488 	    sizeof (struct scsi_arq_status), 0,
13489 	    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
13490 	    sdrunout, (caddr_t)un);
13491 
13492 	if (pktp == NULL) {
13493 		*pktpp = NULL;
13494 		/*
13495 		 * Set the driver state to RWAIT to indicate the driver
13496 		 * is waiting on resource allocations. The driver will not
13497 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13498 		 */
13499 		New_state(un, SD_STATE_RWAIT);
13500 
13501 		SD_ERROR(SD_LOG_IO_CORE, un,
13502 		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
13503 
13504 		if ((bp->b_flags & B_ERROR) != 0) {
13505 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13506 		}
13507 		return (SD_PKT_ALLOC_FAILURE);
13508 	}
13509 
13510 	/*
13511 	 * We do not do DMA breakup for USCSI commands, so return failure
13512 	 * here if all the needed DMA resources were not allocated.
13513 	 */
13514 	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
13515 	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
13516 		scsi_destroy_pkt(pktp);
13517 		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
13518 		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
13519 		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
13520 	}
13521 
13522 	/* Init the cdb from the given uscsi struct */
13523 	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
13524 	    uscmd->uscsi_cdb[0], 0, 0, 0);
13525 
13526 	SD_FILL_SCSI1_LUN(un, pktp);
13527 
13528 	/*
13529 	 * Set up the optional USCSI flags. See the uscsi (7I) man page
13530 	 * for listing of the supported flags.
13531 	 */
13532 
13533 	if (uscmd->uscsi_flags & USCSI_SILENT) {
13534 		flags |= FLAG_SILENT;
13535 	}
13536 
13537 	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
13538 		flags |= FLAG_DIAGNOSE;
13539 	}
13540 
13541 	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
13542 		flags |= FLAG_ISOLATE;
13543 	}
13544 
13545 	if (un->un_f_is_fibre == FALSE) {
13546 		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
13547 			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
13548 		}
13549 	}
13550 
13551 	/*
13552 	 * Set the pkt flags here so we save time later.
13553 	 * Note: These flags are NOT in the uscsi man page!!!
13554 	 */
13555 	if (uscmd->uscsi_flags & USCSI_HEAD) {
13556 		flags |= FLAG_HEAD;
13557 	}
13558 
13559 	if (uscmd->uscsi_flags & USCSI_NOINTR) {
13560 		flags |= FLAG_NOINTR;
13561 	}
13562 
13563 	/*
13564 	 * For tagged queueing, things get a bit complicated.
13565 	 * Check first for head of queue and last for ordered queue.
13566 	 * If neither head nor order, use the default driver tag flags.
13567 	 */
13568 	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
13569 		if (uscmd->uscsi_flags & USCSI_HTAG) {
13570 			flags |= FLAG_HTAG;
13571 		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
13572 			flags |= FLAG_OTAG;
13573 		} else {
13574 			flags |= un->un_tagflags & FLAG_TAGMASK;
13575 		}
13576 	}
13577 
13578 	if (uscmd->uscsi_flags & USCSI_NODISCON) {
13579 		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
13580 	}
13581 
13582 	pktp->pkt_flags = flags;
13583 
13584 	/* Copy the caller's CDB into the pkt... */
13585 	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
13586 
13587 	if (uscmd->uscsi_timeout == 0) {
13588 		pktp->pkt_time = un->un_uscsi_timeout;
13589 	} else {
13590 		pktp->pkt_time = uscmd->uscsi_timeout;
13591 	}
13592 
13593 	/* need it later to identify USCSI request in sdintr */
13594 	xp->xb_pkt_flags |= SD_XB_USCSICMD;
13595 
13596 	xp->xb_sense_resid = uscmd->uscsi_rqresid;
13597 
13598 	pktp->pkt_private = bp;
13599 	pktp->pkt_comp = sdintr;
13600 	*pktpp = pktp;
13601 
13602 	SD_TRACE(SD_LOG_IO_CORE, un,
13603 	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
13604 
13605 	return (SD_PKT_ALLOC_SUCCESS);
13606 }
13607 
13608 
13609 /*
13610  *    Function: sd_destroypkt_for_uscsi
13611  *
13612  * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
13613  *		IOs.. Also saves relevant info into the associated uscsi_cmd
13614  *		struct.
13615  *
13616  *     Context: May be called under interrupt context
13617  */
13618 
13619 static void
13620 sd_destroypkt_for_uscsi(struct buf *bp)
13621 {
13622 	struct uscsi_cmd *uscmd;
13623 	struct sd_xbuf	*xp;
13624 	struct scsi_pkt	*pktp;
13625 	struct sd_lun	*un;
13626 
13627 	ASSERT(bp != NULL);
13628 	xp = SD_GET_XBUF(bp);
13629 	ASSERT(xp != NULL);
13630 	un = SD_GET_UN(bp);
13631 	ASSERT(un != NULL);
13632 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13633 	pktp = SD_GET_PKTP(bp);
13634 	ASSERT(pktp != NULL);
13635 
13636 	SD_TRACE(SD_LOG_IO_CORE, un,
13637 	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
13638 
13639 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13640 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13641 	ASSERT(uscmd != NULL);
13642 
13643 	/* Save the status and the residual into the uscsi_cmd struct */
13644 	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
13645 	uscmd->uscsi_resid  = bp->b_resid;
13646 
13647 	/*
13648 	 * If enabled, copy any saved sense data into the area specified
13649 	 * by the uscsi command.
13650 	 */
13651 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
13652 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
13653 		/*
13654 		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
13655 		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
13656 		 */
13657 		uscmd->uscsi_rqstatus = xp->xb_sense_status;
13658 		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
13659 		bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf, SENSE_LENGTH);
13660 	}
13661 
13662 	/* We are done with the scsi_pkt; free it now */
13663 	ASSERT(SD_GET_PKTP(bp) != NULL);
13664 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13665 
13666 	SD_TRACE(SD_LOG_IO_CORE, un,
13667 	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
13668 }
13669 
13670 
13671 /*
13672  *    Function: sd_bioclone_alloc
13673  *
13674  * Description: Allocate a buf(9S) and init it as per the given buf
13675  *		and the various arguments.  The associated sd_xbuf
13676  *		struct is (nearly) duplicated.  The struct buf *bp
13677  *		argument is saved in new_xp->xb_private.
13678  *
13679  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13680  *		datalen - size of data area for the shadow bp
13681  *		blkno - starting LBA
13682  *		func - function pointer for b_iodone in the shadow buf. (May
13683  *			be NULL if none.)
13684  *
13685  * Return Code: Pointer to allocates buf(9S) struct
13686  *
13687  *     Context: Can sleep.
13688  */
13689 
13690 static struct buf *
13691 sd_bioclone_alloc(struct buf *bp, size_t datalen,
13692 	daddr_t blkno, int (*func)(struct buf *))
13693 {
13694 	struct	sd_lun	*un;
13695 	struct	sd_xbuf	*xp;
13696 	struct	sd_xbuf	*new_xp;
13697 	struct	buf	*new_bp;
13698 
13699 	ASSERT(bp != NULL);
13700 	xp = SD_GET_XBUF(bp);
13701 	ASSERT(xp != NULL);
13702 	un = SD_GET_UN(bp);
13703 	ASSERT(un != NULL);
13704 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13705 
13706 	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
13707 	    NULL, KM_SLEEP);
13708 
13709 	new_bp->b_lblkno	= blkno;
13710 
13711 	/*
13712 	 * Allocate an xbuf for the shadow bp and copy the contents of the
13713 	 * original xbuf into it.
13714 	 */
13715 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
13716 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
13717 
13718 	/*
13719 	 * The given bp is automatically saved in the xb_private member
13720 	 * of the new xbuf.  Callers are allowed to depend on this.
13721 	 */
13722 	new_xp->xb_private = bp;
13723 
13724 	new_bp->b_private  = new_xp;
13725 
13726 	return (new_bp);
13727 }
13728 
13729 /*
13730  *    Function: sd_shadow_buf_alloc
13731  *
13732  * Description: Allocate a buf(9S) and init it as per the given buf
13733  *		and the various arguments.  The associated sd_xbuf
13734  *		struct is (nearly) duplicated.  The struct buf *bp
13735  *		argument is saved in new_xp->xb_private.
13736  *
13737  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13738  *		datalen - size of data area for the shadow bp
13739  *		bflags - B_READ or B_WRITE (pseudo flag)
13740  *		blkno - starting LBA
13741  *		func - function pointer for b_iodone in the shadow buf. (May
13742  *			be NULL if none.)
13743  *
13744  * Return Code: Pointer to allocates buf(9S) struct
13745  *
13746  *     Context: Can sleep.
13747  */
13748 
13749 static struct buf *
13750 sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
13751 	daddr_t blkno, int (*func)(struct buf *))
13752 {
13753 	struct	sd_lun	*un;
13754 	struct	sd_xbuf	*xp;
13755 	struct	sd_xbuf	*new_xp;
13756 	struct	buf	*new_bp;
13757 
13758 	ASSERT(bp != NULL);
13759 	xp = SD_GET_XBUF(bp);
13760 	ASSERT(xp != NULL);
13761 	un = SD_GET_UN(bp);
13762 	ASSERT(un != NULL);
13763 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13764 
13765 	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
13766 		bp_mapin(bp);
13767 	}
13768 
13769 	bflags &= (B_READ | B_WRITE);
13770 #if defined(__i386) || defined(__amd64)
13771 	new_bp = getrbuf(KM_SLEEP);
13772 	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
13773 	new_bp->b_bcount = datalen;
13774 	new_bp->b_flags	= bp->b_flags | bflags;
13775 #else
13776 	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
13777 	    datalen, bflags, SLEEP_FUNC, NULL);
13778 #endif
13779 	new_bp->av_forw	= NULL;
13780 	new_bp->av_back	= NULL;
13781 	new_bp->b_dev	= bp->b_dev;
13782 	new_bp->b_blkno	= blkno;
13783 	new_bp->b_iodone = func;
13784 	new_bp->b_edev	= bp->b_edev;
13785 	new_bp->b_resid	= 0;
13786 
13787 	/* We need to preserve the B_FAILFAST flag */
13788 	if (bp->b_flags & B_FAILFAST) {
13789 		new_bp->b_flags |= B_FAILFAST;
13790 	}
13791 
13792 	/*
13793 	 * Allocate an xbuf for the shadow bp and copy the contents of the
13794 	 * original xbuf into it.
13795 	 */
13796 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
13797 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
13798 
13799 	/* Need later to copy data between the shadow buf & original buf! */
13800 	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
13801 
13802 	/*
13803 	 * The given bp is automatically saved in the xb_private member
13804 	 * of the new xbuf.  Callers are allowed to depend on this.
13805 	 */
13806 	new_xp->xb_private = bp;
13807 
13808 	new_bp->b_private  = new_xp;
13809 
13810 	return (new_bp);
13811 }
13812 
13813 /*
13814  *    Function: sd_bioclone_free
13815  *
13816  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
13817  *		in the larger than partition operation.
13818  *
13819  *     Context: May be called under interrupt context
13820  */
13821 
13822 static void
13823 sd_bioclone_free(struct buf *bp)
13824 {
13825 	struct sd_xbuf	*xp;
13826 
13827 	ASSERT(bp != NULL);
13828 	xp = SD_GET_XBUF(bp);
13829 	ASSERT(xp != NULL);
13830 
13831 	/*
13832 	 * Call bp_mapout() before freeing the buf,  in case a lower
13833 	 * layer or HBA  had done a bp_mapin().  we must do this here
13834 	 * as we are the "originator" of the shadow buf.
13835 	 */
13836 	bp_mapout(bp);
13837 
13838 	/*
13839 	 * Null out b_iodone before freeing the bp, to ensure that the driver
13840 	 * never gets confused by a stale value in this field. (Just a little
13841 	 * extra defensiveness here.)
13842 	 */
13843 	bp->b_iodone = NULL;
13844 
13845 	freerbuf(bp);
13846 
13847 	kmem_free(xp, sizeof (struct sd_xbuf));
13848 }
13849 
13850 /*
13851  *    Function: sd_shadow_buf_free
13852  *
13853  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
13854  *
13855  *     Context: May be called under interrupt context
13856  */
13857 
13858 static void
13859 sd_shadow_buf_free(struct buf *bp)
13860 {
13861 	struct sd_xbuf	*xp;
13862 
13863 	ASSERT(bp != NULL);
13864 	xp = SD_GET_XBUF(bp);
13865 	ASSERT(xp != NULL);
13866 
13867 #if defined(__sparc)
13868 	/*
13869 	 * Call bp_mapout() before freeing the buf,  in case a lower
13870 	 * layer or HBA  had done a bp_mapin().  we must do this here
13871 	 * as we are the "originator" of the shadow buf.
13872 	 */
13873 	bp_mapout(bp);
13874 #endif
13875 
13876 	/*
13877 	 * Null out b_iodone before freeing the bp, to ensure that the driver
13878 	 * never gets confused by a stale value in this field. (Just a little
13879 	 * extra defensiveness here.)
13880 	 */
13881 	bp->b_iodone = NULL;
13882 
13883 #if defined(__i386) || defined(__amd64)
13884 	kmem_free(bp->b_un.b_addr, bp->b_bcount);
13885 	freerbuf(bp);
13886 #else
13887 	scsi_free_consistent_buf(bp);
13888 #endif
13889 
13890 	kmem_free(xp, sizeof (struct sd_xbuf));
13891 }
13892 
13893 
13894 /*
13895  *    Function: sd_print_transport_rejected_message
13896  *
13897  * Description: This implements the ludicrously complex rules for printing
13898  *		a "transport rejected" message.  This is to address the
13899  *		specific problem of having a flood of this error message
13900  *		produced when a failover occurs.
13901  *
13902  *     Context: Any.
13903  */
13904 
13905 static void
13906 sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
13907 	int code)
13908 {
13909 	ASSERT(un != NULL);
13910 	ASSERT(mutex_owned(SD_MUTEX(un)));
13911 	ASSERT(xp != NULL);
13912 
13913 	/*
13914 	 * Print the "transport rejected" message under the following
13915 	 * conditions:
13916 	 *
13917 	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
13918 	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
13919 	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
13920 	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
13921 	 *   scsi_transport(9F) (which indicates that the target might have
13922 	 *   gone off-line).  This uses the un->un_tran_fatal_count
13923 	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
13924 	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
13925 	 *   from scsi_transport().
13926 	 *
13927 	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
13928 	 * the preceeding cases in order for the message to be printed.
13929 	 */
13930 	if ((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) {
13931 		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
13932 		    (code != TRAN_FATAL_ERROR) ||
13933 		    (un->un_tran_fatal_count == 1)) {
13934 			switch (code) {
13935 			case TRAN_BADPKT:
13936 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13937 				    "transport rejected bad packet\n");
13938 				break;
13939 			case TRAN_FATAL_ERROR:
13940 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13941 				    "transport rejected fatal error\n");
13942 				break;
13943 			default:
13944 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13945 				    "transport rejected (%d)\n", code);
13946 				break;
13947 			}
13948 		}
13949 	}
13950 }
13951 
13952 
13953 /*
13954  *    Function: sd_add_buf_to_waitq
13955  *
13956  * Description: Add the given buf(9S) struct to the wait queue for the
13957  *		instance.  If sorting is enabled, then the buf is added
13958  *		to the queue via an elevator sort algorithm (a la
13959  *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
13960  *		If sorting is not enabled, then the buf is just added
13961  *		to the end of the wait queue.
13962  *
13963  * Return Code: void
13964  *
13965  *     Context: Does not sleep/block, therefore technically can be called
13966  *		from any context.  However if sorting is enabled then the
13967  *		execution time is indeterminate, and may take long if
13968  *		the wait queue grows large.
13969  */
13970 
13971 static void
13972 sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
13973 {
13974 	struct buf *ap;
13975 
13976 	ASSERT(bp != NULL);
13977 	ASSERT(un != NULL);
13978 	ASSERT(mutex_owned(SD_MUTEX(un)));
13979 
13980 	/* If the queue is empty, add the buf as the only entry & return. */
13981 	if (un->un_waitq_headp == NULL) {
13982 		ASSERT(un->un_waitq_tailp == NULL);
13983 		un->un_waitq_headp = un->un_waitq_tailp = bp;
13984 		bp->av_forw = NULL;
13985 		return;
13986 	}
13987 
13988 	ASSERT(un->un_waitq_tailp != NULL);
13989 
13990 	/*
13991 	 * If sorting is disabled, just add the buf to the tail end of
13992 	 * the wait queue and return.
13993 	 */
13994 	if (un->un_f_disksort_disabled) {
13995 		un->un_waitq_tailp->av_forw = bp;
13996 		un->un_waitq_tailp = bp;
13997 		bp->av_forw = NULL;
13998 		return;
13999 	}
14000 
14001 	/*
14002 	 * Sort thru the list of requests currently on the wait queue
14003 	 * and add the new buf request at the appropriate position.
14004 	 *
14005 	 * The un->un_waitq_headp is an activity chain pointer on which
14006 	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
14007 	 * first queue holds those requests which are positioned after
14008 	 * the current SD_GET_BLKNO() (in the first request); the second holds
14009 	 * requests which came in after their SD_GET_BLKNO() number was passed.
14010 	 * Thus we implement a one way scan, retracting after reaching
14011 	 * the end of the drive to the first request on the second
14012 	 * queue, at which time it becomes the first queue.
14013 	 * A one-way scan is natural because of the way UNIX read-ahead
14014 	 * blocks are allocated.
14015 	 *
14016 	 * If we lie after the first request, then we must locate the
14017 	 * second request list and add ourselves to it.
14018 	 */
14019 	ap = un->un_waitq_headp;
14020 	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
14021 		while (ap->av_forw != NULL) {
14022 			/*
14023 			 * Look for an "inversion" in the (normally
14024 			 * ascending) block numbers. This indicates
14025 			 * the start of the second request list.
14026 			 */
14027 			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
14028 				/*
14029 				 * Search the second request list for the
14030 				 * first request at a larger block number.
14031 				 * We go before that; however if there is
14032 				 * no such request, we go at the end.
14033 				 */
14034 				do {
14035 					if (SD_GET_BLKNO(bp) <
14036 					    SD_GET_BLKNO(ap->av_forw)) {
14037 						goto insert;
14038 					}
14039 					ap = ap->av_forw;
14040 				} while (ap->av_forw != NULL);
14041 				goto insert;		/* after last */
14042 			}
14043 			ap = ap->av_forw;
14044 		}
14045 
14046 		/*
14047 		 * No inversions... we will go after the last, and
14048 		 * be the first request in the second request list.
14049 		 */
14050 		goto insert;
14051 	}
14052 
14053 	/*
14054 	 * Request is at/after the current request...
14055 	 * sort in the first request list.
14056 	 */
14057 	while (ap->av_forw != NULL) {
14058 		/*
14059 		 * We want to go after the current request (1) if
14060 		 * there is an inversion after it (i.e. it is the end
14061 		 * of the first request list), or (2) if the next
14062 		 * request is a larger block no. than our request.
14063 		 */
14064 		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
14065 		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
14066 			goto insert;
14067 		}
14068 		ap = ap->av_forw;
14069 	}
14070 
14071 	/*
14072 	 * Neither a second list nor a larger request, therefore
14073 	 * we go at the end of the first list (which is the same
14074 	 * as the end of the whole schebang).
14075 	 */
14076 insert:
14077 	bp->av_forw = ap->av_forw;
14078 	ap->av_forw = bp;
14079 
14080 	/*
14081 	 * If we inserted onto the tail end of the waitq, make sure the
14082 	 * tail pointer is updated.
14083 	 */
14084 	if (ap == un->un_waitq_tailp) {
14085 		un->un_waitq_tailp = bp;
14086 	}
14087 }
14088 
14089 
14090 /*
14091  *    Function: sd_start_cmds
14092  *
14093  * Description: Remove and transport cmds from the driver queues.
14094  *
14095  *   Arguments: un - pointer to the unit (soft state) struct for the target.
14096  *
14097  *		immed_bp - ptr to a buf to be transported immediately. Only
14098  *		the immed_bp is transported; bufs on the waitq are not
14099  *		processed and the un_retry_bp is not checked.  If immed_bp is
14100  *		NULL, then normal queue processing is performed.
14101  *
14102  *     Context: May be called from kernel thread context, interrupt context,
14103  *		or runout callback context. This function may not block or
14104  *		call routines that block.
14105  */
14106 
14107 static void
14108 sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
14109 {
14110 	struct	sd_xbuf	*xp;
14111 	struct	buf	*bp;
14112 	void	(*statp)(kstat_io_t *);
14113 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14114 	void	(*saved_statp)(kstat_io_t *);
14115 #endif
14116 	int	rval;
14117 
14118 	ASSERT(un != NULL);
14119 	ASSERT(mutex_owned(SD_MUTEX(un)));
14120 	ASSERT(un->un_ncmds_in_transport >= 0);
14121 	ASSERT(un->un_throttle >= 0);
14122 
14123 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
14124 
14125 	do {
14126 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14127 		saved_statp = NULL;
14128 #endif
14129 
14130 		/*
14131 		 * If we are syncing or dumping, fail the command to
14132 		 * avoid recursively calling back into scsi_transport().
14133 		 * The dump I/O itself uses a separate code path so this
14134 		 * only prevents non-dump I/O from being sent while dumping.
14135 		 * File system sync takes place before dumping begins.
14136 		 * During panic, filesystem I/O is allowed provided
14137 		 * un_in_callback is <= 1.  This is to prevent recursion
14138 		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
14139 		 * sd_start_cmds and so on.  See panic.c for more information
14140 		 * about the states the system can be in during panic.
14141 		 */
14142 		if ((un->un_state == SD_STATE_DUMPING) ||
14143 		    (ddi_in_panic() && (un->un_in_callback > 1))) {
14144 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14145 			    "sd_start_cmds: panicking\n");
14146 			goto exit;
14147 		}
14148 
14149 		if ((bp = immed_bp) != NULL) {
14150 			/*
14151 			 * We have a bp that must be transported immediately.
14152 			 * It's OK to transport the immed_bp here without doing
14153 			 * the throttle limit check because the immed_bp is
14154 			 * always used in a retry/recovery case. This means
14155 			 * that we know we are not at the throttle limit by
14156 			 * virtue of the fact that to get here we must have
14157 			 * already gotten a command back via sdintr(). This also
14158 			 * relies on (1) the command on un_retry_bp preventing
14159 			 * further commands from the waitq from being issued;
14160 			 * and (2) the code in sd_retry_command checking the
14161 			 * throttle limit before issuing a delayed or immediate
14162 			 * retry. This holds even if the throttle limit is
14163 			 * currently ratcheted down from its maximum value.
14164 			 */
14165 			statp = kstat_runq_enter;
14166 			if (bp == un->un_retry_bp) {
14167 				ASSERT((un->un_retry_statp == NULL) ||
14168 				    (un->un_retry_statp == kstat_waitq_enter) ||
14169 				    (un->un_retry_statp ==
14170 				    kstat_runq_back_to_waitq));
14171 				/*
14172 				 * If the waitq kstat was incremented when
14173 				 * sd_set_retry_bp() queued this bp for a retry,
14174 				 * then we must set up statp so that the waitq
14175 				 * count will get decremented correctly below.
14176 				 * Also we must clear un->un_retry_statp to
14177 				 * ensure that we do not act on a stale value
14178 				 * in this field.
14179 				 */
14180 				if ((un->un_retry_statp == kstat_waitq_enter) ||
14181 				    (un->un_retry_statp ==
14182 				    kstat_runq_back_to_waitq)) {
14183 					statp = kstat_waitq_to_runq;
14184 				}
14185 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14186 				saved_statp = un->un_retry_statp;
14187 #endif
14188 				un->un_retry_statp = NULL;
14189 
14190 				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14191 				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
14192 				    "un_throttle:%d un_ncmds_in_transport:%d\n",
14193 				    un, un->un_retry_bp, un->un_throttle,
14194 				    un->un_ncmds_in_transport);
14195 			} else {
14196 				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
14197 				    "processing priority bp:0x%p\n", bp);
14198 			}
14199 
14200 		} else if ((bp = un->un_waitq_headp) != NULL) {
14201 			/*
14202 			 * A command on the waitq is ready to go, but do not
14203 			 * send it if:
14204 			 *
14205 			 * (1) the throttle limit has been reached, or
14206 			 * (2) a retry is pending, or
14207 			 * (3) a START_STOP_UNIT callback pending, or
14208 			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
14209 			 *	command is pending.
14210 			 *
14211 			 * For all of these conditions, IO processing will
14212 			 * restart after the condition is cleared.
14213 			 */
14214 			if (un->un_ncmds_in_transport >= un->un_throttle) {
14215 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14216 				    "sd_start_cmds: exiting, "
14217 				    "throttle limit reached!\n");
14218 				goto exit;
14219 			}
14220 			if (un->un_retry_bp != NULL) {
14221 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14222 				    "sd_start_cmds: exiting, retry pending!\n");
14223 				goto exit;
14224 			}
14225 			if (un->un_startstop_timeid != NULL) {
14226 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14227 				    "sd_start_cmds: exiting, "
14228 				    "START_STOP pending!\n");
14229 				goto exit;
14230 			}
14231 			if (un->un_direct_priority_timeid != NULL) {
14232 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14233 				    "sd_start_cmds: exiting, "
14234 				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
14235 				goto exit;
14236 			}
14237 
14238 			/* Dequeue the command */
14239 			un->un_waitq_headp = bp->av_forw;
14240 			if (un->un_waitq_headp == NULL) {
14241 				un->un_waitq_tailp = NULL;
14242 			}
14243 			bp->av_forw = NULL;
14244 			statp = kstat_waitq_to_runq;
14245 			SD_TRACE(SD_LOG_IO_CORE, un,
14246 			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
14247 
14248 		} else {
14249 			/* No work to do so bail out now */
14250 			SD_TRACE(SD_LOG_IO_CORE, un,
14251 			    "sd_start_cmds: no more work, exiting!\n");
14252 			goto exit;
14253 		}
14254 
14255 		/*
14256 		 * Reset the state to normal. This is the mechanism by which
14257 		 * the state transitions from either SD_STATE_RWAIT or
14258 		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
14259 		 * If state is SD_STATE_PM_CHANGING then this command is
14260 		 * part of the device power control and the state must
14261 		 * not be put back to normal. Doing so would would
14262 		 * allow new commands to proceed when they shouldn't,
14263 		 * the device may be going off.
14264 		 */
14265 		if ((un->un_state != SD_STATE_SUSPENDED) &&
14266 		    (un->un_state != SD_STATE_PM_CHANGING)) {
14267 			New_state(un, SD_STATE_NORMAL);
14268 		    }
14269 
14270 		xp = SD_GET_XBUF(bp);
14271 		ASSERT(xp != NULL);
14272 
14273 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14274 		/*
14275 		 * Allocate the scsi_pkt if we need one, or attach DMA
14276 		 * resources if we have a scsi_pkt that needs them. The
14277 		 * latter should only occur for commands that are being
14278 		 * retried.
14279 		 */
14280 		if ((xp->xb_pktp == NULL) ||
14281 		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
14282 #else
14283 		if (xp->xb_pktp == NULL) {
14284 #endif
14285 			/*
14286 			 * There is no scsi_pkt allocated for this buf. Call
14287 			 * the initpkt function to allocate & init one.
14288 			 *
14289 			 * The scsi_init_pkt runout callback functionality is
14290 			 * implemented as follows:
14291 			 *
14292 			 * 1) The initpkt function always calls
14293 			 *    scsi_init_pkt(9F) with sdrunout specified as the
14294 			 *    callback routine.
14295 			 * 2) A successful packet allocation is initialized and
14296 			 *    the I/O is transported.
14297 			 * 3) The I/O associated with an allocation resource
14298 			 *    failure is left on its queue to be retried via
14299 			 *    runout or the next I/O.
14300 			 * 4) The I/O associated with a DMA error is removed
14301 			 *    from the queue and failed with EIO. Processing of
14302 			 *    the transport queues is also halted to be
14303 			 *    restarted via runout or the next I/O.
14304 			 * 5) The I/O associated with a CDB size or packet
14305 			 *    size error is removed from the queue and failed
14306 			 *    with EIO. Processing of the transport queues is
14307 			 *    continued.
14308 			 *
14309 			 * Note: there is no interface for canceling a runout
14310 			 * callback. To prevent the driver from detaching or
14311 			 * suspending while a runout is pending the driver
14312 			 * state is set to SD_STATE_RWAIT
14313 			 *
14314 			 * Note: using the scsi_init_pkt callback facility can
14315 			 * result in an I/O request persisting at the head of
14316 			 * the list which cannot be satisfied even after
14317 			 * multiple retries. In the future the driver may
14318 			 * implement some kind of maximum runout count before
14319 			 * failing an I/O.
14320 			 *
14321 			 * Note: the use of funcp below may seem superfluous,
14322 			 * but it helps warlock figure out the correct
14323 			 * initpkt function calls (see [s]sd.wlcmd).
14324 			 */
14325 			struct scsi_pkt	*pktp;
14326 			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
14327 
14328 			ASSERT(bp != un->un_rqs_bp);
14329 
14330 			funcp = sd_initpkt_map[xp->xb_chain_iostart];
14331 			switch ((*funcp)(bp, &pktp)) {
14332 			case  SD_PKT_ALLOC_SUCCESS:
14333 				xp->xb_pktp = pktp;
14334 				SD_TRACE(SD_LOG_IO_CORE, un,
14335 				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
14336 				    pktp);
14337 				goto got_pkt;
14338 
14339 			case SD_PKT_ALLOC_FAILURE:
14340 				/*
14341 				 * Temporary (hopefully) resource depletion.
14342 				 * Since retries and RQS commands always have a
14343 				 * scsi_pkt allocated, these cases should never
14344 				 * get here. So the only cases this needs to
14345 				 * handle is a bp from the waitq (which we put
14346 				 * back onto the waitq for sdrunout), or a bp
14347 				 * sent as an immed_bp (which we just fail).
14348 				 */
14349 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14350 				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
14351 
14352 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14353 
14354 				if (bp == immed_bp) {
14355 					/*
14356 					 * If SD_XB_DMA_FREED is clear, then
14357 					 * this is a failure to allocate a
14358 					 * scsi_pkt, and we must fail the
14359 					 * command.
14360 					 */
14361 					if ((xp->xb_pkt_flags &
14362 					    SD_XB_DMA_FREED) == 0) {
14363 						break;
14364 					}
14365 
14366 					/*
14367 					 * If this immediate command is NOT our
14368 					 * un_retry_bp, then we must fail it.
14369 					 */
14370 					if (bp != un->un_retry_bp) {
14371 						break;
14372 					}
14373 
14374 					/*
14375 					 * We get here if this cmd is our
14376 					 * un_retry_bp that was DMAFREED, but
14377 					 * scsi_init_pkt() failed to reallocate
14378 					 * DMA resources when we attempted to
14379 					 * retry it. This can happen when an
14380 					 * mpxio failover is in progress, but
14381 					 * we don't want to just fail the
14382 					 * command in this case.
14383 					 *
14384 					 * Use timeout(9F) to restart it after
14385 					 * a 100ms delay.  We don't want to
14386 					 * let sdrunout() restart it, because
14387 					 * sdrunout() is just supposed to start
14388 					 * commands that are sitting on the
14389 					 * wait queue.  The un_retry_bp stays
14390 					 * set until the command completes, but
14391 					 * sdrunout can be called many times
14392 					 * before that happens.  Since sdrunout
14393 					 * cannot tell if the un_retry_bp is
14394 					 * already in the transport, it could
14395 					 * end up calling scsi_transport() for
14396 					 * the un_retry_bp multiple times.
14397 					 *
14398 					 * Also: don't schedule the callback
14399 					 * if some other callback is already
14400 					 * pending.
14401 					 */
14402 					if (un->un_retry_statp == NULL) {
14403 						/*
14404 						 * restore the kstat pointer to
14405 						 * keep kstat counts coherent
14406 						 * when we do retry the command.
14407 						 */
14408 						un->un_retry_statp =
14409 						    saved_statp;
14410 					}
14411 
14412 					if ((un->un_startstop_timeid == NULL) &&
14413 					    (un->un_retry_timeid == NULL) &&
14414 					    (un->un_direct_priority_timeid ==
14415 					    NULL)) {
14416 
14417 						un->un_retry_timeid =
14418 						    timeout(
14419 						    sd_start_retry_command,
14420 						    un, SD_RESTART_TIMEOUT);
14421 					}
14422 					goto exit;
14423 				}
14424 
14425 #else
14426 				if (bp == immed_bp) {
14427 					break;	/* Just fail the command */
14428 				}
14429 #endif
14430 
14431 				/* Add the buf back to the head of the waitq */
14432 				bp->av_forw = un->un_waitq_headp;
14433 				un->un_waitq_headp = bp;
14434 				if (un->un_waitq_tailp == NULL) {
14435 					un->un_waitq_tailp = bp;
14436 				}
14437 				goto exit;
14438 
14439 			case SD_PKT_ALLOC_FAILURE_NO_DMA:
14440 				/*
14441 				 * HBA DMA resource failure. Fail the command
14442 				 * and continue processing of the queues.
14443 				 */
14444 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14445 				    "sd_start_cmds: "
14446 				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
14447 				break;
14448 
14449 			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
14450 				/*
14451 				 * Note:x86: Partial DMA mapping not supported
14452 				 * for USCSI commands, and all the needed DMA
14453 				 * resources were not allocated.
14454 				 */
14455 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14456 				    "sd_start_cmds: "
14457 				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
14458 				break;
14459 
14460 			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
14461 				/*
14462 				 * Note:x86: Request cannot fit into CDB based
14463 				 * on lba and len.
14464 				 */
14465 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14466 				    "sd_start_cmds: "
14467 				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
14468 				break;
14469 
14470 			default:
14471 				/* Should NEVER get here! */
14472 				panic("scsi_initpkt error");
14473 				/*NOTREACHED*/
14474 			}
14475 
14476 			/*
14477 			 * Fatal error in allocating a scsi_pkt for this buf.
14478 			 * Update kstats & return the buf with an error code.
14479 			 * We must use sd_return_failed_command_no_restart() to
14480 			 * avoid a recursive call back into sd_start_cmds().
14481 			 * However this also means that we must keep processing
14482 			 * the waitq here in order to avoid stalling.
14483 			 */
14484 			if (statp == kstat_waitq_to_runq) {
14485 				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
14486 			}
14487 			sd_return_failed_command_no_restart(un, bp, EIO);
14488 			if (bp == immed_bp) {
14489 				/* immed_bp is gone by now, so clear this */
14490 				immed_bp = NULL;
14491 			}
14492 			continue;
14493 		}
14494 got_pkt:
14495 		if (bp == immed_bp) {
14496 			/* goto the head of the class.... */
14497 			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
14498 		}
14499 
14500 		un->un_ncmds_in_transport++;
14501 		SD_UPDATE_KSTATS(un, statp, bp);
14502 
14503 		/*
14504 		 * Call scsi_transport() to send the command to the target.
14505 		 * According to SCSA architecture, we must drop the mutex here
14506 		 * before calling scsi_transport() in order to avoid deadlock.
14507 		 * Note that the scsi_pkt's completion routine can be executed
14508 		 * (from interrupt context) even before the call to
14509 		 * scsi_transport() returns.
14510 		 */
14511 		SD_TRACE(SD_LOG_IO_CORE, un,
14512 		    "sd_start_cmds: calling scsi_transport()\n");
14513 		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
14514 
14515 		mutex_exit(SD_MUTEX(un));
14516 		rval = scsi_transport(xp->xb_pktp);
14517 		mutex_enter(SD_MUTEX(un));
14518 
14519 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14520 		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
14521 
14522 		switch (rval) {
14523 		case TRAN_ACCEPT:
14524 			/* Clear this with every pkt accepted by the HBA */
14525 			un->un_tran_fatal_count = 0;
14526 			break;	/* Success; try the next cmd (if any) */
14527 
14528 		case TRAN_BUSY:
14529 			un->un_ncmds_in_transport--;
14530 			ASSERT(un->un_ncmds_in_transport >= 0);
14531 
14532 			/*
14533 			 * Don't retry request sense, the sense data
14534 			 * is lost when another request is sent.
14535 			 * Free up the rqs buf and retry
14536 			 * the original failed cmd.  Update kstat.
14537 			 */
14538 			if (bp == un->un_rqs_bp) {
14539 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14540 				bp = sd_mark_rqs_idle(un, xp);
14541 				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
14542 					NULL, NULL, EIO, SD_BSY_TIMEOUT / 500,
14543 					kstat_waitq_enter);
14544 				goto exit;
14545 			}
14546 
14547 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14548 			/*
14549 			 * Free the DMA resources for the  scsi_pkt. This will
14550 			 * allow mpxio to select another path the next time
14551 			 * we call scsi_transport() with this scsi_pkt.
14552 			 * See sdintr() for the rationalization behind this.
14553 			 */
14554 			if ((un->un_f_is_fibre == TRUE) &&
14555 			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14556 			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
14557 				scsi_dmafree(xp->xb_pktp);
14558 				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14559 			}
14560 #endif
14561 
14562 			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
14563 				/*
14564 				 * Commands that are SD_PATH_DIRECT_PRIORITY
14565 				 * are for error recovery situations. These do
14566 				 * not use the normal command waitq, so if they
14567 				 * get a TRAN_BUSY we cannot put them back onto
14568 				 * the waitq for later retry. One possible
14569 				 * problem is that there could already be some
14570 				 * other command on un_retry_bp that is waiting
14571 				 * for this one to complete, so we would be
14572 				 * deadlocked if we put this command back onto
14573 				 * the waitq for later retry (since un_retry_bp
14574 				 * must complete before the driver gets back to
14575 				 * commands on the waitq).
14576 				 *
14577 				 * To avoid deadlock we must schedule a callback
14578 				 * that will restart this command after a set
14579 				 * interval.  This should keep retrying for as
14580 				 * long as the underlying transport keeps
14581 				 * returning TRAN_BUSY (just like for other
14582 				 * commands).  Use the same timeout interval as
14583 				 * for the ordinary TRAN_BUSY retry.
14584 				 */
14585 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14586 				    "sd_start_cmds: scsi_transport() returned "
14587 				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
14588 
14589 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14590 				un->un_direct_priority_timeid =
14591 				    timeout(sd_start_direct_priority_command,
14592 				    bp, SD_BSY_TIMEOUT / 500);
14593 
14594 				goto exit;
14595 			}
14596 
14597 			/*
14598 			 * For TRAN_BUSY, we want to reduce the throttle value,
14599 			 * unless we are retrying a command.
14600 			 */
14601 			if (bp != un->un_retry_bp) {
14602 				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
14603 			}
14604 
14605 			/*
14606 			 * Set up the bp to be tried again 10 ms later.
14607 			 * Note:x86: Is there a timeout value in the sd_lun
14608 			 * for this condition?
14609 			 */
14610 			sd_set_retry_bp(un, bp, SD_BSY_TIMEOUT / 500,
14611 				kstat_runq_back_to_waitq);
14612 			goto exit;
14613 
14614 		case TRAN_FATAL_ERROR:
14615 			un->un_tran_fatal_count++;
14616 			/* FALLTHRU */
14617 
14618 		case TRAN_BADPKT:
14619 		default:
14620 			un->un_ncmds_in_transport--;
14621 			ASSERT(un->un_ncmds_in_transport >= 0);
14622 
14623 			/*
14624 			 * If this is our REQUEST SENSE command with a
14625 			 * transport error, we must get back the pointers
14626 			 * to the original buf, and mark the REQUEST
14627 			 * SENSE command as "available".
14628 			 */
14629 			if (bp == un->un_rqs_bp) {
14630 				bp = sd_mark_rqs_idle(un, xp);
14631 				xp = SD_GET_XBUF(bp);
14632 			} else {
14633 				/*
14634 				 * Legacy behavior: do not update transport
14635 				 * error count for request sense commands.
14636 				 */
14637 				SD_UPDATE_ERRSTATS(un, sd_transerrs);
14638 			}
14639 
14640 			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14641 			sd_print_transport_rejected_message(un, xp, rval);
14642 
14643 			/*
14644 			 * We must use sd_return_failed_command_no_restart() to
14645 			 * avoid a recursive call back into sd_start_cmds().
14646 			 * However this also means that we must keep processing
14647 			 * the waitq here in order to avoid stalling.
14648 			 */
14649 			sd_return_failed_command_no_restart(un, bp, EIO);
14650 
14651 			/*
14652 			 * Notify any threads waiting in sd_ddi_suspend() that
14653 			 * a command completion has occurred.
14654 			 */
14655 			if (un->un_state == SD_STATE_SUSPENDED) {
14656 				cv_broadcast(&un->un_disk_busy_cv);
14657 			}
14658 
14659 			if (bp == immed_bp) {
14660 				/* immed_bp is gone by now, so clear this */
14661 				immed_bp = NULL;
14662 			}
14663 			break;
14664 		}
14665 
14666 	} while (immed_bp == NULL);
14667 
14668 exit:
14669 	ASSERT(mutex_owned(SD_MUTEX(un)));
14670 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
14671 }
14672 
14673 
14674 /*
14675  *    Function: sd_return_command
14676  *
14677  * Description: Returns a command to its originator (with or without an
14678  *		error).  Also starts commands waiting to be transported
14679  *		to the target.
14680  *
14681  *     Context: May be called from interrupt, kernel, or timeout context
14682  */
14683 
14684 static void
14685 sd_return_command(struct sd_lun *un, struct buf *bp)
14686 {
14687 	struct sd_xbuf *xp;
14688 #if defined(__i386) || defined(__amd64)
14689 	struct scsi_pkt *pktp;
14690 #endif
14691 
14692 	ASSERT(bp != NULL);
14693 	ASSERT(un != NULL);
14694 	ASSERT(mutex_owned(SD_MUTEX(un)));
14695 	ASSERT(bp != un->un_rqs_bp);
14696 	xp = SD_GET_XBUF(bp);
14697 	ASSERT(xp != NULL);
14698 
14699 #if defined(__i386) || defined(__amd64)
14700 	pktp = SD_GET_PKTP(bp);
14701 #endif
14702 
14703 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
14704 
14705 #if defined(__i386) || defined(__amd64)
14706 	/*
14707 	 * Note:x86: check for the "sdrestart failed" case.
14708 	 */
14709 	if (((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
14710 		(geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
14711 		(xp->xb_pktp->pkt_resid == 0)) {
14712 
14713 		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
14714 			/*
14715 			 * Successfully set up next portion of cmd
14716 			 * transfer, try sending it
14717 			 */
14718 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
14719 			    NULL, NULL, 0, (clock_t)0, NULL);
14720 			sd_start_cmds(un, NULL);
14721 			return;	/* Note:x86: need a return here? */
14722 		}
14723 	}
14724 #endif
14725 
14726 	/*
14727 	 * If this is the failfast bp, clear it from un_failfast_bp. This
14728 	 * can happen if upon being re-tried the failfast bp either
14729 	 * succeeded or encountered another error (possibly even a different
14730 	 * error than the one that precipitated the failfast state, but in
14731 	 * that case it would have had to exhaust retries as well). Regardless,
14732 	 * this should not occur whenever the instance is in the active
14733 	 * failfast state.
14734 	 */
14735 	if (bp == un->un_failfast_bp) {
14736 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
14737 		un->un_failfast_bp = NULL;
14738 	}
14739 
14740 	/*
14741 	 * Clear the failfast state upon successful completion of ANY cmd.
14742 	 */
14743 	if (bp->b_error == 0) {
14744 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
14745 	}
14746 
14747 	/*
14748 	 * This is used if the command was retried one or more times. Show that
14749 	 * we are done with it, and allow processing of the waitq to resume.
14750 	 */
14751 	if (bp == un->un_retry_bp) {
14752 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14753 		    "sd_return_command: un:0x%p: "
14754 		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
14755 		un->un_retry_bp = NULL;
14756 		un->un_retry_statp = NULL;
14757 	}
14758 
14759 	SD_UPDATE_RDWR_STATS(un, bp);
14760 	SD_UPDATE_PARTITION_STATS(un, bp);
14761 
14762 	switch (un->un_state) {
14763 	case SD_STATE_SUSPENDED:
14764 		/*
14765 		 * Notify any threads waiting in sd_ddi_suspend() that
14766 		 * a command completion has occurred.
14767 		 */
14768 		cv_broadcast(&un->un_disk_busy_cv);
14769 		break;
14770 	default:
14771 		sd_start_cmds(un, NULL);
14772 		break;
14773 	}
14774 
14775 	/* Return this command up the iodone chain to its originator. */
14776 	mutex_exit(SD_MUTEX(un));
14777 
14778 	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
14779 	xp->xb_pktp = NULL;
14780 
14781 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
14782 
14783 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14784 	mutex_enter(SD_MUTEX(un));
14785 
14786 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
14787 }
14788 
14789 
14790 /*
14791  *    Function: sd_return_failed_command
14792  *
14793  * Description: Command completion when an error occurred.
14794  *
14795  *     Context: May be called from interrupt context
14796  */
14797 
14798 static void
14799 sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
14800 {
14801 	ASSERT(bp != NULL);
14802 	ASSERT(un != NULL);
14803 	ASSERT(mutex_owned(SD_MUTEX(un)));
14804 
14805 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14806 	    "sd_return_failed_command: entry\n");
14807 
14808 	/*
14809 	 * b_resid could already be nonzero due to a partial data
14810 	 * transfer, so do not change it here.
14811 	 */
14812 	SD_BIOERROR(bp, errcode);
14813 
14814 	sd_return_command(un, bp);
14815 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14816 	    "sd_return_failed_command: exit\n");
14817 }
14818 
14819 
14820 /*
14821  *    Function: sd_return_failed_command_no_restart
14822  *
14823  * Description: Same as sd_return_failed_command, but ensures that no
14824  *		call back into sd_start_cmds will be issued.
14825  *
14826  *     Context: May be called from interrupt context
14827  */
14828 
14829 static void
14830 sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
14831 	int errcode)
14832 {
14833 	struct sd_xbuf *xp;
14834 
14835 	ASSERT(bp != NULL);
14836 	ASSERT(un != NULL);
14837 	ASSERT(mutex_owned(SD_MUTEX(un)));
14838 	xp = SD_GET_XBUF(bp);
14839 	ASSERT(xp != NULL);
14840 	ASSERT(errcode != 0);
14841 
14842 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14843 	    "sd_return_failed_command_no_restart: entry\n");
14844 
14845 	/*
14846 	 * b_resid could already be nonzero due to a partial data
14847 	 * transfer, so do not change it here.
14848 	 */
14849 	SD_BIOERROR(bp, errcode);
14850 
14851 	/*
14852 	 * If this is the failfast bp, clear it. This can happen if the
14853 	 * failfast bp encounterd a fatal error when we attempted to
14854 	 * re-try it (such as a scsi_transport(9F) failure).  However
14855 	 * we should NOT be in an active failfast state if the failfast
14856 	 * bp is not NULL.
14857 	 */
14858 	if (bp == un->un_failfast_bp) {
14859 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
14860 		un->un_failfast_bp = NULL;
14861 	}
14862 
14863 	if (bp == un->un_retry_bp) {
14864 		/*
14865 		 * This command was retried one or more times. Show that we are
14866 		 * done with it, and allow processing of the waitq to resume.
14867 		 */
14868 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14869 		    "sd_return_failed_command_no_restart: "
14870 		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
14871 		un->un_retry_bp = NULL;
14872 		un->un_retry_statp = NULL;
14873 	}
14874 
14875 	SD_UPDATE_RDWR_STATS(un, bp);
14876 	SD_UPDATE_PARTITION_STATS(un, bp);
14877 
14878 	mutex_exit(SD_MUTEX(un));
14879 
14880 	if (xp->xb_pktp != NULL) {
14881 		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
14882 		xp->xb_pktp = NULL;
14883 	}
14884 
14885 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
14886 
14887 	mutex_enter(SD_MUTEX(un));
14888 
14889 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14890 	    "sd_return_failed_command_no_restart: exit\n");
14891 }
14892 
14893 
14894 /*
14895  *    Function: sd_retry_command
14896  *
14897  * Description: queue up a command for retry, or (optionally) fail it
14898  *		if retry counts are exhausted.
14899  *
14900  *   Arguments: un - Pointer to the sd_lun struct for the target.
14901  *
14902  *		bp - Pointer to the buf for the command to be retried.
14903  *
14904  *		retry_check_flag - Flag to see which (if any) of the retry
14905  *		   counts should be decremented/checked. If the indicated
14906  *		   retry count is exhausted, then the command will not be
14907  *		   retried; it will be failed instead. This should use a
14908  *		   value equal to one of the following:
14909  *
14910  *			SD_RETRIES_NOCHECK
14911  *			SD_RESD_RETRIES_STANDARD
14912  *			SD_RETRIES_VICTIM
14913  *
14914  *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
14915  *		   if the check should be made to see of FLAG_ISOLATE is set
14916  *		   in the pkt. If FLAG_ISOLATE is set, then the command is
14917  *		   not retried, it is simply failed.
14918  *
14919  *		user_funcp - Ptr to function to call before dispatching the
14920  *		   command. May be NULL if no action needs to be performed.
14921  *		   (Primarily intended for printing messages.)
14922  *
14923  *		user_arg - Optional argument to be passed along to
14924  *		   the user_funcp call.
14925  *
14926  *		failure_code - errno return code to set in the bp if the
14927  *		   command is going to be failed.
14928  *
14929  *		retry_delay - Retry delay interval in (clock_t) units. May
14930  *		   be zero which indicates that the retry should be retried
14931  *		   immediately (ie, without an intervening delay).
14932  *
14933  *		statp - Ptr to kstat function to be updated if the command
14934  *		   is queued for a delayed retry. May be NULL if no kstat
14935  *		   update is desired.
14936  *
14937  *     Context: May be called from interupt context.
14938  */
14939 
14940 static void
14941 sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
14942 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int
14943 	code), void *user_arg, int failure_code,  clock_t retry_delay,
14944 	void (*statp)(kstat_io_t *))
14945 {
14946 	struct sd_xbuf	*xp;
14947 	struct scsi_pkt	*pktp;
14948 
14949 	ASSERT(un != NULL);
14950 	ASSERT(mutex_owned(SD_MUTEX(un)));
14951 	ASSERT(bp != NULL);
14952 	xp = SD_GET_XBUF(bp);
14953 	ASSERT(xp != NULL);
14954 	pktp = SD_GET_PKTP(bp);
14955 	ASSERT(pktp != NULL);
14956 
14957 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14958 	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
14959 
14960 	/*
14961 	 * If we are syncing or dumping, fail the command to avoid
14962 	 * recursively calling back into scsi_transport().
14963 	 */
14964 	if (ddi_in_panic()) {
14965 		goto fail_command_no_log;
14966 	}
14967 
14968 	/*
14969 	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
14970 	 * log an error and fail the command.
14971 	 */
14972 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
14973 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
14974 		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
14975 		sd_dump_memory(un, SD_LOG_IO, "CDB",
14976 		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
14977 		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
14978 		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
14979 		goto fail_command;
14980 	}
14981 
14982 	/*
14983 	 * If we are suspended, then put the command onto head of the
14984 	 * wait queue since we don't want to start more commands.
14985 	 */
14986 	switch (un->un_state) {
14987 	case SD_STATE_SUSPENDED:
14988 	case SD_STATE_DUMPING:
14989 		bp->av_forw = un->un_waitq_headp;
14990 		un->un_waitq_headp = bp;
14991 		if (un->un_waitq_tailp == NULL) {
14992 			un->un_waitq_tailp = bp;
14993 		}
14994 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
14995 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
14996 		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
14997 		return;
14998 	default:
14999 		break;
15000 	}
15001 
15002 	/*
15003 	 * If the caller wants us to check FLAG_ISOLATE, then see if that
15004 	 * is set; if it is then we do not want to retry the command.
15005 	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
15006 	 */
15007 	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
15008 		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
15009 			goto fail_command;
15010 		}
15011 	}
15012 
15013 
15014 	/*
15015 	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
15016 	 * command timeout or a selection timeout has occurred. This means
15017 	 * that we were unable to establish an kind of communication with
15018 	 * the target, and subsequent retries and/or commands are likely
15019 	 * to encounter similar results and take a long time to complete.
15020 	 *
15021 	 * If this is a failfast error condition, we need to update the
15022 	 * failfast state, even if this bp does not have B_FAILFAST set.
15023 	 */
15024 	if (retry_check_flag & SD_RETRIES_FAILFAST) {
15025 		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
15026 			ASSERT(un->un_failfast_bp == NULL);
15027 			/*
15028 			 * If we are already in the active failfast state, and
15029 			 * another failfast error condition has been detected,
15030 			 * then fail this command if it has B_FAILFAST set.
15031 			 * If B_FAILFAST is clear, then maintain the legacy
15032 			 * behavior of retrying heroically, even tho this will
15033 			 * take a lot more time to fail the command.
15034 			 */
15035 			if (bp->b_flags & B_FAILFAST) {
15036 				goto fail_command;
15037 			}
15038 		} else {
15039 			/*
15040 			 * We're not in the active failfast state, but we
15041 			 * have a failfast error condition, so we must begin
15042 			 * transition to the next state. We do this regardless
15043 			 * of whether or not this bp has B_FAILFAST set.
15044 			 */
15045 			if (un->un_failfast_bp == NULL) {
15046 				/*
15047 				 * This is the first bp to meet a failfast
15048 				 * condition so save it on un_failfast_bp &
15049 				 * do normal retry processing. Do not enter
15050 				 * active failfast state yet. This marks
15051 				 * entry into the "failfast pending" state.
15052 				 */
15053 				un->un_failfast_bp = bp;
15054 
15055 			} else if (un->un_failfast_bp == bp) {
15056 				/*
15057 				 * This is the second time *this* bp has
15058 				 * encountered a failfast error condition,
15059 				 * so enter active failfast state & flush
15060 				 * queues as appropriate.
15061 				 */
15062 				un->un_failfast_state = SD_FAILFAST_ACTIVE;
15063 				un->un_failfast_bp = NULL;
15064 				sd_failfast_flushq(un);
15065 
15066 				/*
15067 				 * Fail this bp now if B_FAILFAST set;
15068 				 * otherwise continue with retries. (It would
15069 				 * be pretty ironic if this bp succeeded on a
15070 				 * subsequent retry after we just flushed all
15071 				 * the queues).
15072 				 */
15073 				if (bp->b_flags & B_FAILFAST) {
15074 					goto fail_command;
15075 				}
15076 
15077 #if !defined(lint) && !defined(__lint)
15078 			} else {
15079 				/*
15080 				 * If neither of the preceeding conditionals
15081 				 * was true, it means that there is some
15082 				 * *other* bp that has met an inital failfast
15083 				 * condition and is currently either being
15084 				 * retried or is waiting to be retried. In
15085 				 * that case we should perform normal retry
15086 				 * processing on *this* bp, since there is a
15087 				 * chance that the current failfast condition
15088 				 * is transient and recoverable. If that does
15089 				 * not turn out to be the case, then retries
15090 				 * will be cleared when the wait queue is
15091 				 * flushed anyway.
15092 				 */
15093 #endif
15094 			}
15095 		}
15096 	} else {
15097 		/*
15098 		 * SD_RETRIES_FAILFAST is clear, which indicates that we
15099 		 * likely were able to at least establish some level of
15100 		 * communication with the target and subsequent commands
15101 		 * and/or retries are likely to get through to the target,
15102 		 * In this case we want to be aggressive about clearing
15103 		 * the failfast state. Note that this does not affect
15104 		 * the "failfast pending" condition.
15105 		 */
15106 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15107 	}
15108 
15109 
15110 	/*
15111 	 * Check the specified retry count to see if we can still do
15112 	 * any retries with this pkt before we should fail it.
15113 	 */
15114 	switch (retry_check_flag & SD_RETRIES_MASK) {
15115 	case SD_RETRIES_VICTIM:
15116 		/*
15117 		 * Check the victim retry count. If exhausted, then fall
15118 		 * thru & check against the standard retry count.
15119 		 */
15120 		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
15121 			/* Increment count & proceed with the retry */
15122 			xp->xb_victim_retry_count++;
15123 			break;
15124 		}
15125 		/* Victim retries exhausted, fall back to std. retries... */
15126 		/* FALLTHRU */
15127 
15128 	case SD_RETRIES_STANDARD:
15129 		if (xp->xb_retry_count >= un->un_retry_count) {
15130 			/* Retries exhausted, fail the command */
15131 			SD_TRACE(SD_LOG_IO_CORE, un,
15132 			    "sd_retry_command: retries exhausted!\n");
15133 			/*
15134 			 * update b_resid for failed SCMD_READ & SCMD_WRITE
15135 			 * commands with nonzero pkt_resid.
15136 			 */
15137 			if ((pktp->pkt_reason == CMD_CMPLT) &&
15138 			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
15139 			    (pktp->pkt_resid != 0)) {
15140 				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
15141 				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
15142 					SD_UPDATE_B_RESID(bp, pktp);
15143 				}
15144 			}
15145 			goto fail_command;
15146 		}
15147 		xp->xb_retry_count++;
15148 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15149 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15150 		break;
15151 
15152 	case SD_RETRIES_UA:
15153 		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
15154 			/* Retries exhausted, fail the command */
15155 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15156 			    "Unit Attention retries exhausted. "
15157 			    "Check the target.\n");
15158 			goto fail_command;
15159 		}
15160 		xp->xb_ua_retry_count++;
15161 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15162 		    "sd_retry_command: retry count:%d\n",
15163 			xp->xb_ua_retry_count);
15164 		break;
15165 
15166 	case SD_RETRIES_BUSY:
15167 		if (xp->xb_retry_count >= un->un_busy_retry_count) {
15168 			/* Retries exhausted, fail the command */
15169 			SD_TRACE(SD_LOG_IO_CORE, un,
15170 			    "sd_retry_command: retries exhausted!\n");
15171 			goto fail_command;
15172 		}
15173 		xp->xb_retry_count++;
15174 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15175 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15176 		break;
15177 
15178 	case SD_RETRIES_NOCHECK:
15179 	default:
15180 		/* No retry count to check. Just proceed with the retry */
15181 		break;
15182 	}
15183 
15184 	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15185 
15186 	/*
15187 	 * If we were given a zero timeout, we must attempt to retry the
15188 	 * command immediately (ie, without a delay).
15189 	 */
15190 	if (retry_delay == 0) {
15191 		/*
15192 		 * Check some limiting conditions to see if we can actually
15193 		 * do the immediate retry.  If we cannot, then we must
15194 		 * fall back to queueing up a delayed retry.
15195 		 */
15196 		if (un->un_ncmds_in_transport >= un->un_throttle) {
15197 			/*
15198 			 * We are at the throttle limit for the target,
15199 			 * fall back to delayed retry.
15200 			 */
15201 			retry_delay = SD_BSY_TIMEOUT;
15202 			statp = kstat_waitq_enter;
15203 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15204 			    "sd_retry_command: immed. retry hit "
15205 			    "throttle!\n");
15206 		} else {
15207 			/*
15208 			 * We're clear to proceed with the immediate retry.
15209 			 * First call the user-provided function (if any)
15210 			 */
15211 			if (user_funcp != NULL) {
15212 				(*user_funcp)(un, bp, user_arg,
15213 				    SD_IMMEDIATE_RETRY_ISSUED);
15214 			}
15215 
15216 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15217 			    "sd_retry_command: issuing immediate retry\n");
15218 
15219 			/*
15220 			 * Call sd_start_cmds() to transport the command to
15221 			 * the target.
15222 			 */
15223 			sd_start_cmds(un, bp);
15224 
15225 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15226 			    "sd_retry_command exit\n");
15227 			return;
15228 		}
15229 	}
15230 
15231 	/*
15232 	 * Set up to retry the command after a delay.
15233 	 * First call the user-provided function (if any)
15234 	 */
15235 	if (user_funcp != NULL) {
15236 		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
15237 	}
15238 
15239 	sd_set_retry_bp(un, bp, retry_delay, statp);
15240 
15241 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15242 	return;
15243 
15244 fail_command:
15245 
15246 	if (user_funcp != NULL) {
15247 		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
15248 	}
15249 
15250 fail_command_no_log:
15251 
15252 	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15253 	    "sd_retry_command: returning failed command\n");
15254 
15255 	sd_return_failed_command(un, bp, failure_code);
15256 
15257 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15258 }
15259 
15260 
15261 /*
15262  *    Function: sd_set_retry_bp
15263  *
15264  * Description: Set up the given bp for retry.
15265  *
15266  *   Arguments: un - ptr to associated softstate
15267  *		bp - ptr to buf(9S) for the command
15268  *		retry_delay - time interval before issuing retry (may be 0)
15269  *		statp - optional pointer to kstat function
15270  *
15271  *     Context: May be called under interrupt context
15272  */
15273 
15274 static void
15275 sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
15276 	void (*statp)(kstat_io_t *))
15277 {
15278 	ASSERT(un != NULL);
15279 	ASSERT(mutex_owned(SD_MUTEX(un)));
15280 	ASSERT(bp != NULL);
15281 
15282 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15283 	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
15284 
15285 	/*
15286 	 * Indicate that the command is being retried. This will not allow any
15287 	 * other commands on the wait queue to be transported to the target
15288 	 * until this command has been completed (success or failure). The
15289 	 * "retry command" is not transported to the target until the given
15290 	 * time delay expires, unless the user specified a 0 retry_delay.
15291 	 *
15292 	 * Note: the timeout(9F) callback routine is what actually calls
15293 	 * sd_start_cmds() to transport the command, with the exception of a
15294 	 * zero retry_delay. The only current implementor of a zero retry delay
15295 	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
15296 	 */
15297 	if (un->un_retry_bp == NULL) {
15298 		ASSERT(un->un_retry_statp == NULL);
15299 		un->un_retry_bp = bp;
15300 
15301 		/*
15302 		 * If the user has not specified a delay the command should
15303 		 * be queued and no timeout should be scheduled.
15304 		 */
15305 		if (retry_delay == 0) {
15306 			/*
15307 			 * Save the kstat pointer that will be used in the
15308 			 * call to SD_UPDATE_KSTATS() below, so that
15309 			 * sd_start_cmds() can correctly decrement the waitq
15310 			 * count when it is time to transport this command.
15311 			 */
15312 			un->un_retry_statp = statp;
15313 			goto done;
15314 		}
15315 	}
15316 
15317 	if (un->un_retry_bp == bp) {
15318 		/*
15319 		 * Save the kstat pointer that will be used in the call to
15320 		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
15321 		 * correctly decrement the waitq count when it is time to
15322 		 * transport this command.
15323 		 */
15324 		un->un_retry_statp = statp;
15325 
15326 		/*
15327 		 * Schedule a timeout if:
15328 		 *   1) The user has specified a delay.
15329 		 *   2) There is not a START_STOP_UNIT callback pending.
15330 		 *
15331 		 * If no delay has been specified, then it is up to the caller
15332 		 * to ensure that IO processing continues without stalling.
15333 		 * Effectively, this means that the caller will issue the
15334 		 * required call to sd_start_cmds(). The START_STOP_UNIT
15335 		 * callback does this after the START STOP UNIT command has
15336 		 * completed. In either of these cases we should not schedule
15337 		 * a timeout callback here.  Also don't schedule the timeout if
15338 		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
15339 		 */
15340 		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
15341 		    (un->un_direct_priority_timeid == NULL)) {
15342 			un->un_retry_timeid =
15343 			    timeout(sd_start_retry_command, un, retry_delay);
15344 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15345 			    "sd_set_retry_bp: setting timeout: un: 0x%p"
15346 			    " bp:0x%p un_retry_timeid:0x%p\n",
15347 			    un, bp, un->un_retry_timeid);
15348 		}
15349 	} else {
15350 		/*
15351 		 * We only get in here if there is already another command
15352 		 * waiting to be retried.  In this case, we just put the
15353 		 * given command onto the wait queue, so it can be transported
15354 		 * after the current retry command has completed.
15355 		 *
15356 		 * Also we have to make sure that if the command at the head
15357 		 * of the wait queue is the un_failfast_bp, that we do not
15358 		 * put ahead of it any other commands that are to be retried.
15359 		 */
15360 		if ((un->un_failfast_bp != NULL) &&
15361 		    (un->un_failfast_bp == un->un_waitq_headp)) {
15362 			/*
15363 			 * Enqueue this command AFTER the first command on
15364 			 * the wait queue (which is also un_failfast_bp).
15365 			 */
15366 			bp->av_forw = un->un_waitq_headp->av_forw;
15367 			un->un_waitq_headp->av_forw = bp;
15368 			if (un->un_waitq_headp == un->un_waitq_tailp) {
15369 				un->un_waitq_tailp = bp;
15370 			}
15371 		} else {
15372 			/* Enqueue this command at the head of the waitq. */
15373 			bp->av_forw = un->un_waitq_headp;
15374 			un->un_waitq_headp = bp;
15375 			if (un->un_waitq_tailp == NULL) {
15376 				un->un_waitq_tailp = bp;
15377 			}
15378 		}
15379 
15380 		if (statp == NULL) {
15381 			statp = kstat_waitq_enter;
15382 		}
15383 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15384 		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
15385 	}
15386 
15387 done:
15388 	if (statp != NULL) {
15389 		SD_UPDATE_KSTATS(un, statp, bp);
15390 	}
15391 
15392 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15393 	    "sd_set_retry_bp: exit un:0x%p\n", un);
15394 }
15395 
15396 
15397 /*
15398  *    Function: sd_start_retry_command
15399  *
15400  * Description: Start the command that has been waiting on the target's
15401  *		retry queue.  Called from timeout(9F) context after the
15402  *		retry delay interval has expired.
15403  *
15404  *   Arguments: arg - pointer to associated softstate for the device.
15405  *
15406  *     Context: timeout(9F) thread context.  May not sleep.
15407  */
15408 
15409 static void
15410 sd_start_retry_command(void *arg)
15411 {
15412 	struct sd_lun *un = arg;
15413 
15414 	ASSERT(un != NULL);
15415 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15416 
15417 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15418 	    "sd_start_retry_command: entry\n");
15419 
15420 	mutex_enter(SD_MUTEX(un));
15421 
15422 	un->un_retry_timeid = NULL;
15423 
15424 	if (un->un_retry_bp != NULL) {
15425 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15426 		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
15427 		    un, un->un_retry_bp);
15428 		sd_start_cmds(un, un->un_retry_bp);
15429 	}
15430 
15431 	mutex_exit(SD_MUTEX(un));
15432 
15433 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15434 	    "sd_start_retry_command: exit\n");
15435 }
15436 
15437 
15438 /*
15439  *    Function: sd_start_direct_priority_command
15440  *
15441  * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
15442  *		received TRAN_BUSY when we called scsi_transport() to send it
15443  *		to the underlying HBA. This function is called from timeout(9F)
15444  *		context after the delay interval has expired.
15445  *
15446  *   Arguments: arg - pointer to associated buf(9S) to be restarted.
15447  *
15448  *     Context: timeout(9F) thread context.  May not sleep.
15449  */
15450 
15451 static void
15452 sd_start_direct_priority_command(void *arg)
15453 {
15454 	struct buf	*priority_bp = arg;
15455 	struct sd_lun	*un;
15456 
15457 	ASSERT(priority_bp != NULL);
15458 	un = SD_GET_UN(priority_bp);
15459 	ASSERT(un != NULL);
15460 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15461 
15462 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15463 	    "sd_start_direct_priority_command: entry\n");
15464 
15465 	mutex_enter(SD_MUTEX(un));
15466 	un->un_direct_priority_timeid = NULL;
15467 	sd_start_cmds(un, priority_bp);
15468 	mutex_exit(SD_MUTEX(un));
15469 
15470 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15471 	    "sd_start_direct_priority_command: exit\n");
15472 }
15473 
15474 
15475 /*
15476  *    Function: sd_send_request_sense_command
15477  *
15478  * Description: Sends a REQUEST SENSE command to the target
15479  *
15480  *     Context: May be called from interrupt context.
15481  */
15482 
15483 static void
15484 sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
15485 	struct scsi_pkt *pktp)
15486 {
15487 	ASSERT(bp != NULL);
15488 	ASSERT(un != NULL);
15489 	ASSERT(mutex_owned(SD_MUTEX(un)));
15490 
15491 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
15492 	    "entry: buf:0x%p\n", bp);
15493 
15494 	/*
15495 	 * If we are syncing or dumping, then fail the command to avoid a
15496 	 * recursive callback into scsi_transport(). Also fail the command
15497 	 * if we are suspended (legacy behavior).
15498 	 */
15499 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
15500 	    (un->un_state == SD_STATE_DUMPING)) {
15501 		sd_return_failed_command(un, bp, EIO);
15502 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15503 		    "sd_send_request_sense_command: syncing/dumping, exit\n");
15504 		return;
15505 	}
15506 
15507 	/*
15508 	 * Retry the failed command and don't issue the request sense if:
15509 	 *    1) the sense buf is busy
15510 	 *    2) we have 1 or more outstanding commands on the target
15511 	 *    (the sense data will be cleared or invalidated any way)
15512 	 *
15513 	 * Note: There could be an issue with not checking a retry limit here,
15514 	 * the problem is determining which retry limit to check.
15515 	 */
15516 	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
15517 		/* Don't retry if the command is flagged as non-retryable */
15518 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15519 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
15520 			    NULL, NULL, 0, SD_BSY_TIMEOUT, kstat_waitq_enter);
15521 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15522 			    "sd_send_request_sense_command: "
15523 			    "at full throttle, retrying exit\n");
15524 		} else {
15525 			sd_return_failed_command(un, bp, EIO);
15526 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15527 			    "sd_send_request_sense_command: "
15528 			    "at full throttle, non-retryable exit\n");
15529 		}
15530 		return;
15531 	}
15532 
15533 	sd_mark_rqs_busy(un, bp);
15534 	sd_start_cmds(un, un->un_rqs_bp);
15535 
15536 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15537 	    "sd_send_request_sense_command: exit\n");
15538 }
15539 
15540 
15541 /*
15542  *    Function: sd_mark_rqs_busy
15543  *
15544  * Description: Indicate that the request sense bp for this instance is
15545  *		in use.
15546  *
15547  *     Context: May be called under interrupt context
15548  */
15549 
15550 static void
15551 sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
15552 {
15553 	struct sd_xbuf	*sense_xp;
15554 
15555 	ASSERT(un != NULL);
15556 	ASSERT(bp != NULL);
15557 	ASSERT(mutex_owned(SD_MUTEX(un)));
15558 	ASSERT(un->un_sense_isbusy == 0);
15559 
15560 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
15561 	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
15562 
15563 	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
15564 	ASSERT(sense_xp != NULL);
15565 
15566 	SD_INFO(SD_LOG_IO, un,
15567 	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
15568 
15569 	ASSERT(sense_xp->xb_pktp != NULL);
15570 	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
15571 	    == (FLAG_SENSING | FLAG_HEAD));
15572 
15573 	un->un_sense_isbusy = 1;
15574 	un->un_rqs_bp->b_resid = 0;
15575 	sense_xp->xb_pktp->pkt_resid  = 0;
15576 	sense_xp->xb_pktp->pkt_reason = 0;
15577 
15578 	/* So we can get back the bp at interrupt time! */
15579 	sense_xp->xb_sense_bp = bp;
15580 
15581 	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
15582 
15583 	/*
15584 	 * Mark this buf as awaiting sense data. (This is already set in
15585 	 * the pkt_flags for the RQS packet.)
15586 	 */
15587 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
15588 
15589 	sense_xp->xb_retry_count	= 0;
15590 	sense_xp->xb_victim_retry_count = 0;
15591 	sense_xp->xb_ua_retry_count	= 0;
15592 	sense_xp->xb_dma_resid  = 0;
15593 
15594 	/* Clean up the fields for auto-request sense */
15595 	sense_xp->xb_sense_status = 0;
15596 	sense_xp->xb_sense_state  = 0;
15597 	sense_xp->xb_sense_resid  = 0;
15598 	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
15599 
15600 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
15601 }
15602 
15603 
15604 /*
15605  *    Function: sd_mark_rqs_idle
15606  *
15607  * Description: SD_MUTEX must be held continuously through this routine
15608  *		to prevent reuse of the rqs struct before the caller can
15609  *		complete it's processing.
15610  *
15611  * Return Code: Pointer to the RQS buf
15612  *
15613  *     Context: May be called under interrupt context
15614  */
15615 
15616 static struct buf *
15617 sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
15618 {
15619 	struct buf *bp;
15620 	ASSERT(un != NULL);
15621 	ASSERT(sense_xp != NULL);
15622 	ASSERT(mutex_owned(SD_MUTEX(un)));
15623 	ASSERT(un->un_sense_isbusy != 0);
15624 
15625 	un->un_sense_isbusy = 0;
15626 	bp = sense_xp->xb_sense_bp;
15627 	sense_xp->xb_sense_bp = NULL;
15628 
15629 	/* This pkt is no longer interested in getting sense data */
15630 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
15631 
15632 	return (bp);
15633 }
15634 
15635 
15636 
15637 /*
15638  *    Function: sd_alloc_rqs
15639  *
15640  * Description: Set up the unit to receive auto request sense data
15641  *
15642  * Return Code: DDI_SUCCESS or DDI_FAILURE
15643  *
15644  *     Context: Called under attach(9E) context
15645  */
15646 
15647 static int
15648 sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
15649 {
15650 	struct sd_xbuf *xp;
15651 
15652 	ASSERT(un != NULL);
15653 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15654 	ASSERT(un->un_rqs_bp == NULL);
15655 	ASSERT(un->un_rqs_pktp == NULL);
15656 
15657 	/*
15658 	 * First allocate the required buf and scsi_pkt structs, then set up
15659 	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
15660 	 */
15661 	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
15662 	    SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
15663 	if (un->un_rqs_bp == NULL) {
15664 		return (DDI_FAILURE);
15665 	}
15666 
15667 	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
15668 	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
15669 
15670 	if (un->un_rqs_pktp == NULL) {
15671 		sd_free_rqs(un);
15672 		return (DDI_FAILURE);
15673 	}
15674 
15675 	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
15676 	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
15677 	    SCMD_REQUEST_SENSE, 0, SENSE_LENGTH, 0);
15678 
15679 	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
15680 
15681 	/* Set up the other needed members in the ARQ scsi_pkt. */
15682 	un->un_rqs_pktp->pkt_comp   = sdintr;
15683 	un->un_rqs_pktp->pkt_time   = sd_io_time;
15684 	un->un_rqs_pktp->pkt_flags |=
15685 	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
15686 
15687 	/*
15688 	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
15689 	 * provide any intpkt, destroypkt routines as we take care of
15690 	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
15691 	 */
15692 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
15693 	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
15694 	xp->xb_pktp = un->un_rqs_pktp;
15695 	SD_INFO(SD_LOG_ATTACH_DETACH, un,
15696 	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
15697 	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
15698 
15699 	/*
15700 	 * Save the pointer to the request sense private bp so it can
15701 	 * be retrieved in sdintr.
15702 	 */
15703 	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
15704 	ASSERT(un->un_rqs_bp->b_private == xp);
15705 
15706 	/*
15707 	 * See if the HBA supports auto-request sense for the specified
15708 	 * target/lun. If it does, then try to enable it (if not already
15709 	 * enabled).
15710 	 *
15711 	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
15712 	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
15713 	 * return success.  However, in both of these cases ARQ is always
15714 	 * enabled and scsi_ifgetcap will always return true. The best approach
15715 	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
15716 	 *
15717 	 * The 3rd case is the HBA (adp) always return enabled on
15718 	 * scsi_ifgetgetcap even when it's not enable, the best approach
15719 	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
15720 	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
15721 	 */
15722 
15723 	if (un->un_f_is_fibre == TRUE) {
15724 		un->un_f_arq_enabled = TRUE;
15725 	} else {
15726 #if defined(__i386) || defined(__amd64)
15727 		/*
15728 		 * Circumvent the Adaptec bug, remove this code when
15729 		 * the bug is fixed
15730 		 */
15731 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
15732 #endif
15733 		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
15734 		case 0:
15735 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15736 				"sd_alloc_rqs: HBA supports ARQ\n");
15737 			/*
15738 			 * ARQ is supported by this HBA but currently is not
15739 			 * enabled. Attempt to enable it and if successful then
15740 			 * mark this instance as ARQ enabled.
15741 			 */
15742 			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
15743 				== 1) {
15744 				/* Successfully enabled ARQ in the HBA */
15745 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
15746 					"sd_alloc_rqs: ARQ enabled\n");
15747 				un->un_f_arq_enabled = TRUE;
15748 			} else {
15749 				/* Could not enable ARQ in the HBA */
15750 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
15751 				"sd_alloc_rqs: failed ARQ enable\n");
15752 				un->un_f_arq_enabled = FALSE;
15753 			}
15754 			break;
15755 		case 1:
15756 			/*
15757 			 * ARQ is supported by this HBA and is already enabled.
15758 			 * Just mark ARQ as enabled for this instance.
15759 			 */
15760 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15761 				"sd_alloc_rqs: ARQ already enabled\n");
15762 			un->un_f_arq_enabled = TRUE;
15763 			break;
15764 		default:
15765 			/*
15766 			 * ARQ is not supported by this HBA; disable it for this
15767 			 * instance.
15768 			 */
15769 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15770 				"sd_alloc_rqs: HBA does not support ARQ\n");
15771 			un->un_f_arq_enabled = FALSE;
15772 			break;
15773 		}
15774 	}
15775 
15776 	return (DDI_SUCCESS);
15777 }
15778 
15779 
15780 /*
15781  *    Function: sd_free_rqs
15782  *
15783  * Description: Cleanup for the pre-instance RQS command.
15784  *
15785  *     Context: Kernel thread context
15786  */
15787 
15788 static void
15789 sd_free_rqs(struct sd_lun *un)
15790 {
15791 	ASSERT(un != NULL);
15792 
15793 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
15794 
15795 	/*
15796 	 * If consistent memory is bound to a scsi_pkt, the pkt
15797 	 * has to be destroyed *before* freeing the consistent memory.
15798 	 * Don't change the sequence of this operations.
15799 	 * scsi_destroy_pkt() might access memory, which isn't allowed,
15800 	 * after it was freed in scsi_free_consistent_buf().
15801 	 */
15802 	if (un->un_rqs_pktp != NULL) {
15803 		scsi_destroy_pkt(un->un_rqs_pktp);
15804 		un->un_rqs_pktp = NULL;
15805 	}
15806 
15807 	if (un->un_rqs_bp != NULL) {
15808 		kmem_free(SD_GET_XBUF(un->un_rqs_bp), sizeof (struct sd_xbuf));
15809 		scsi_free_consistent_buf(un->un_rqs_bp);
15810 		un->un_rqs_bp = NULL;
15811 	}
15812 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
15813 }
15814 
15815 
15816 
15817 /*
15818  *    Function: sd_reduce_throttle
15819  *
15820  * Description: Reduces the maximun # of outstanding commands on a
15821  *		target to the current number of outstanding commands.
15822  *		Queues a tiemout(9F) callback to restore the limit
15823  *		after a specified interval has elapsed.
15824  *		Typically used when we get a TRAN_BUSY return code
15825  *		back from scsi_transport().
15826  *
15827  *   Arguments: un - ptr to the sd_lun softstate struct
15828  *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
15829  *
15830  *     Context: May be called from interrupt context
15831  */
15832 
15833 static void
15834 sd_reduce_throttle(struct sd_lun *un, int throttle_type)
15835 {
15836 	ASSERT(un != NULL);
15837 	ASSERT(mutex_owned(SD_MUTEX(un)));
15838 	ASSERT(un->un_ncmds_in_transport >= 0);
15839 
15840 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
15841 	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
15842 	    un, un->un_throttle, un->un_ncmds_in_transport);
15843 
15844 	if (un->un_throttle > 1) {
15845 		if (un->un_f_use_adaptive_throttle == TRUE) {
15846 			switch (throttle_type) {
15847 			case SD_THROTTLE_TRAN_BUSY:
15848 				if (un->un_busy_throttle == 0) {
15849 					un->un_busy_throttle = un->un_throttle;
15850 				}
15851 				break;
15852 			case SD_THROTTLE_QFULL:
15853 				un->un_busy_throttle = 0;
15854 				break;
15855 			default:
15856 				ASSERT(FALSE);
15857 			}
15858 
15859 			if (un->un_ncmds_in_transport > 0) {
15860 			    un->un_throttle = un->un_ncmds_in_transport;
15861 			}
15862 
15863 		} else {
15864 			if (un->un_ncmds_in_transport == 0) {
15865 				un->un_throttle = 1;
15866 			} else {
15867 				un->un_throttle = un->un_ncmds_in_transport;
15868 			}
15869 		}
15870 	}
15871 
15872 	/* Reschedule the timeout if none is currently active */
15873 	if (un->un_reset_throttle_timeid == NULL) {
15874 		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
15875 		    un, SD_THROTTLE_RESET_INTERVAL);
15876 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15877 		    "sd_reduce_throttle: timeout scheduled!\n");
15878 	}
15879 
15880 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
15881 	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
15882 }
15883 
15884 
15885 
15886 /*
15887  *    Function: sd_restore_throttle
15888  *
15889  * Description: Callback function for timeout(9F).  Resets the current
15890  *		value of un->un_throttle to its default.
15891  *
15892  *   Arguments: arg - pointer to associated softstate for the device.
15893  *
15894  *     Context: May be called from interrupt context
15895  */
15896 
15897 static void
15898 sd_restore_throttle(void *arg)
15899 {
15900 	struct sd_lun	*un = arg;
15901 
15902 	ASSERT(un != NULL);
15903 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15904 
15905 	mutex_enter(SD_MUTEX(un));
15906 
15907 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
15908 	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
15909 
15910 	un->un_reset_throttle_timeid = NULL;
15911 
15912 	if (un->un_f_use_adaptive_throttle == TRUE) {
15913 		/*
15914 		 * If un_busy_throttle is nonzero, then it contains the
15915 		 * value that un_throttle was when we got a TRAN_BUSY back
15916 		 * from scsi_transport(). We want to revert back to this
15917 		 * value.
15918 		 *
15919 		 * In the QFULL case, the throttle limit will incrementally
15920 		 * increase until it reaches max throttle.
15921 		 */
15922 		if (un->un_busy_throttle > 0) {
15923 			un->un_throttle = un->un_busy_throttle;
15924 			un->un_busy_throttle = 0;
15925 		} else {
15926 			/*
15927 			 * increase throttle by 10% open gate slowly, schedule
15928 			 * another restore if saved throttle has not been
15929 			 * reached
15930 			 */
15931 			short throttle;
15932 			if (sd_qfull_throttle_enable) {
15933 				throttle = un->un_throttle +
15934 				    max((un->un_throttle / 10), 1);
15935 				un->un_throttle =
15936 				    (throttle < un->un_saved_throttle) ?
15937 				    throttle : un->un_saved_throttle;
15938 				if (un->un_throttle < un->un_saved_throttle) {
15939 				    un->un_reset_throttle_timeid =
15940 					timeout(sd_restore_throttle,
15941 					un, SD_QFULL_THROTTLE_RESET_INTERVAL);
15942 				}
15943 			}
15944 		}
15945 
15946 		/*
15947 		 * If un_throttle has fallen below the low-water mark, we
15948 		 * restore the maximum value here (and allow it to ratchet
15949 		 * down again if necessary).
15950 		 */
15951 		if (un->un_throttle < un->un_min_throttle) {
15952 			un->un_throttle = un->un_saved_throttle;
15953 		}
15954 	} else {
15955 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
15956 		    "restoring limit from 0x%x to 0x%x\n",
15957 		    un->un_throttle, un->un_saved_throttle);
15958 		un->un_throttle = un->un_saved_throttle;
15959 	}
15960 
15961 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15962 	    "sd_restore_throttle: calling sd_start_cmds!\n");
15963 
15964 	sd_start_cmds(un, NULL);
15965 
15966 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15967 	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
15968 	    un, un->un_throttle);
15969 
15970 	mutex_exit(SD_MUTEX(un));
15971 
15972 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
15973 }
15974 
15975 /*
15976  *    Function: sdrunout
15977  *
15978  * Description: Callback routine for scsi_init_pkt when a resource allocation
15979  *		fails.
15980  *
15981  *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
15982  *		soft state instance.
15983  *
15984  * Return Code: The scsi_init_pkt routine allows for the callback function to
15985  *		return a 0 indicating the callback should be rescheduled or a 1
15986  *		indicating not to reschedule. This routine always returns 1
15987  *		because the driver always provides a callback function to
15988  *		scsi_init_pkt. This results in a callback always being scheduled
15989  *		(via the scsi_init_pkt callback implementation) if a resource
15990  *		failure occurs.
15991  *
15992  *     Context: This callback function may not block or call routines that block
15993  *
15994  *        Note: Using the scsi_init_pkt callback facility can result in an I/O
15995  *		request persisting at the head of the list which cannot be
15996  *		satisfied even after multiple retries. In the future the driver
15997  *		may implement some time of maximum runout count before failing
15998  *		an I/O.
15999  */
16000 
16001 static int
16002 sdrunout(caddr_t arg)
16003 {
16004 	struct sd_lun	*un = (struct sd_lun *)arg;
16005 
16006 	ASSERT(un != NULL);
16007 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16008 
16009 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
16010 
16011 	mutex_enter(SD_MUTEX(un));
16012 	sd_start_cmds(un, NULL);
16013 	mutex_exit(SD_MUTEX(un));
16014 	/*
16015 	 * This callback routine always returns 1 (i.e. do not reschedule)
16016 	 * because we always specify sdrunout as the callback handler for
16017 	 * scsi_init_pkt inside the call to sd_start_cmds.
16018 	 */
16019 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
16020 	return (1);
16021 }
16022 
16023 
16024 /*
16025  *    Function: sdintr
16026  *
16027  * Description: Completion callback routine for scsi_pkt(9S) structs
16028  *		sent to the HBA driver via scsi_transport(9F).
16029  *
16030  *     Context: Interrupt context
16031  */
16032 
16033 static void
16034 sdintr(struct scsi_pkt *pktp)
16035 {
16036 	struct buf	*bp;
16037 	struct sd_xbuf	*xp;
16038 	struct sd_lun	*un;
16039 
16040 	ASSERT(pktp != NULL);
16041 	bp = (struct buf *)pktp->pkt_private;
16042 	ASSERT(bp != NULL);
16043 	xp = SD_GET_XBUF(bp);
16044 	ASSERT(xp != NULL);
16045 	ASSERT(xp->xb_pktp != NULL);
16046 	un = SD_GET_UN(bp);
16047 	ASSERT(un != NULL);
16048 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16049 
16050 #ifdef SD_FAULT_INJECTION
16051 
16052 	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
16053 	/* SD FaultInjection */
16054 	sd_faultinjection(pktp);
16055 
16056 #endif /* SD_FAULT_INJECTION */
16057 
16058 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
16059 	    " xp:0x%p, un:0x%p\n", bp, xp, un);
16060 
16061 	mutex_enter(SD_MUTEX(un));
16062 
16063 	/* Reduce the count of the #commands currently in transport */
16064 	un->un_ncmds_in_transport--;
16065 	ASSERT(un->un_ncmds_in_transport >= 0);
16066 
16067 	/* Increment counter to indicate that the callback routine is active */
16068 	un->un_in_callback++;
16069 
16070 	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
16071 
16072 #ifdef	SDDEBUG
16073 	if (bp == un->un_retry_bp) {
16074 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
16075 		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
16076 		    un, un->un_retry_bp, un->un_ncmds_in_transport);
16077 	}
16078 #endif
16079 
16080 	/*
16081 	 * If pkt_reason is CMD_DEV_GONE, just fail the command
16082 	 */
16083 	if (pktp->pkt_reason == CMD_DEV_GONE) {
16084 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16085 			    "Device is gone\n");
16086 		sd_return_failed_command(un, bp, EIO);
16087 		goto exit;
16088 	}
16089 
16090 	/*
16091 	 * First see if the pkt has auto-request sense data with it....
16092 	 * Look at the packet state first so we don't take a performance
16093 	 * hit looking at the arq enabled flag unless absolutely necessary.
16094 	 */
16095 	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
16096 	    (un->un_f_arq_enabled == TRUE)) {
16097 		/*
16098 		 * The HBA did an auto request sense for this command so check
16099 		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16100 		 * driver command that should not be retried.
16101 		 */
16102 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16103 			/*
16104 			 * Save the relevant sense info into the xp for the
16105 			 * original cmd.
16106 			 */
16107 			struct scsi_arq_status *asp;
16108 			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16109 			xp->xb_sense_status =
16110 			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
16111 			xp->xb_sense_state  = asp->sts_rqpkt_state;
16112 			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16113 			bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16114 			    min(sizeof (struct scsi_extended_sense),
16115 			    SENSE_LENGTH));
16116 
16117 			/* fail the command */
16118 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16119 			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
16120 			sd_return_failed_command(un, bp, EIO);
16121 			goto exit;
16122 		}
16123 
16124 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16125 		/*
16126 		 * We want to either retry or fail this command, so free
16127 		 * the DMA resources here.  If we retry the command then
16128 		 * the DMA resources will be reallocated in sd_start_cmds().
16129 		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
16130 		 * causes the *entire* transfer to start over again from the
16131 		 * beginning of the request, even for PARTIAL chunks that
16132 		 * have already transferred successfully.
16133 		 */
16134 		if ((un->un_f_is_fibre == TRUE) &&
16135 		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16136 		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16137 			scsi_dmafree(pktp);
16138 			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16139 		}
16140 #endif
16141 
16142 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16143 		    "sdintr: arq done, sd_handle_auto_request_sense\n");
16144 
16145 		sd_handle_auto_request_sense(un, bp, xp, pktp);
16146 		goto exit;
16147 	}
16148 
16149 	/* Next see if this is the REQUEST SENSE pkt for the instance */
16150 	if (pktp->pkt_flags & FLAG_SENSING)  {
16151 		/* This pktp is from the unit's REQUEST_SENSE command */
16152 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16153 		    "sdintr: sd_handle_request_sense\n");
16154 		sd_handle_request_sense(un, bp, xp, pktp);
16155 		goto exit;
16156 	}
16157 
16158 	/*
16159 	 * Check to see if the command successfully completed as requested;
16160 	 * this is the most common case (and also the hot performance path).
16161 	 *
16162 	 * Requirements for successful completion are:
16163 	 * pkt_reason is CMD_CMPLT and packet status is status good.
16164 	 * In addition:
16165 	 * - A residual of zero indicates successful completion no matter what
16166 	 *   the command is.
16167 	 * - If the residual is not zero and the command is not a read or
16168 	 *   write, then it's still defined as successful completion. In other
16169 	 *   words, if the command is a read or write the residual must be
16170 	 *   zero for successful completion.
16171 	 * - If the residual is not zero and the command is a read or
16172 	 *   write, and it's a USCSICMD, then it's still defined as
16173 	 *   successful completion.
16174 	 */
16175 	if ((pktp->pkt_reason == CMD_CMPLT) &&
16176 	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
16177 
16178 		/*
16179 		 * Since this command is returned with a good status, we
16180 		 * can reset the count for Sonoma failover.
16181 		 */
16182 		un->un_sonoma_failure_count = 0;
16183 
16184 		/*
16185 		 * Return all USCSI commands on good status
16186 		 */
16187 		if (pktp->pkt_resid == 0) {
16188 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16189 			    "sdintr: returning command for resid == 0\n");
16190 		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
16191 		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
16192 			SD_UPDATE_B_RESID(bp, pktp);
16193 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16194 			    "sdintr: returning command for resid != 0\n");
16195 		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16196 			SD_UPDATE_B_RESID(bp, pktp);
16197 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16198 				"sdintr: returning uscsi command\n");
16199 		} else {
16200 			goto not_successful;
16201 		}
16202 		sd_return_command(un, bp);
16203 
16204 		/*
16205 		 * Decrement counter to indicate that the callback routine
16206 		 * is done.
16207 		 */
16208 		un->un_in_callback--;
16209 		ASSERT(un->un_in_callback >= 0);
16210 		mutex_exit(SD_MUTEX(un));
16211 
16212 		return;
16213 	}
16214 
16215 not_successful:
16216 
16217 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16218 	/*
16219 	 * The following is based upon knowledge of the underlying transport
16220 	 * and its use of DMA resources.  This code should be removed when
16221 	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
16222 	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
16223 	 * and sd_start_cmds().
16224 	 *
16225 	 * Free any DMA resources associated with this command if there
16226 	 * is a chance it could be retried or enqueued for later retry.
16227 	 * If we keep the DMA binding then mpxio cannot reissue the
16228 	 * command on another path whenever a path failure occurs.
16229 	 *
16230 	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
16231 	 * causes the *entire* transfer to start over again from the
16232 	 * beginning of the request, even for PARTIAL chunks that
16233 	 * have already transferred successfully.
16234 	 *
16235 	 * This is only done for non-uscsi commands (and also skipped for the
16236 	 * driver's internal RQS command). Also just do this for Fibre Channel
16237 	 * devices as these are the only ones that support mpxio.
16238 	 */
16239 	if ((un->un_f_is_fibre == TRUE) &&
16240 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16241 	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16242 		scsi_dmafree(pktp);
16243 		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16244 	}
16245 #endif
16246 
16247 	/*
16248 	 * The command did not successfully complete as requested so check
16249 	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16250 	 * driver command that should not be retried so just return. If
16251 	 * FLAG_DIAGNOSE is not set the error will be processed below.
16252 	 */
16253 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16254 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16255 		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
16256 		/*
16257 		 * Issue a request sense if a check condition caused the error
16258 		 * (we handle the auto request sense case above), otherwise
16259 		 * just fail the command.
16260 		 */
16261 		if ((pktp->pkt_reason == CMD_CMPLT) &&
16262 		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
16263 			sd_send_request_sense_command(un, bp, pktp);
16264 		} else {
16265 			sd_return_failed_command(un, bp, EIO);
16266 		}
16267 		goto exit;
16268 	}
16269 
16270 	/*
16271 	 * The command did not successfully complete as requested so process
16272 	 * the error, retry, and/or attempt recovery.
16273 	 */
16274 	switch (pktp->pkt_reason) {
16275 	case CMD_CMPLT:
16276 		switch (SD_GET_PKT_STATUS(pktp)) {
16277 		case STATUS_GOOD:
16278 			/*
16279 			 * The command completed successfully with a non-zero
16280 			 * residual
16281 			 */
16282 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16283 			    "sdintr: STATUS_GOOD \n");
16284 			sd_pkt_status_good(un, bp, xp, pktp);
16285 			break;
16286 
16287 		case STATUS_CHECK:
16288 		case STATUS_TERMINATED:
16289 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16290 			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
16291 			sd_pkt_status_check_condition(un, bp, xp, pktp);
16292 			break;
16293 
16294 		case STATUS_BUSY:
16295 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16296 			    "sdintr: STATUS_BUSY\n");
16297 			sd_pkt_status_busy(un, bp, xp, pktp);
16298 			break;
16299 
16300 		case STATUS_RESERVATION_CONFLICT:
16301 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16302 			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
16303 			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16304 			break;
16305 
16306 		case STATUS_QFULL:
16307 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16308 			    "sdintr: STATUS_QFULL\n");
16309 			sd_pkt_status_qfull(un, bp, xp, pktp);
16310 			break;
16311 
16312 		case STATUS_MET:
16313 		case STATUS_INTERMEDIATE:
16314 		case STATUS_SCSI2:
16315 		case STATUS_INTERMEDIATE_MET:
16316 		case STATUS_ACA_ACTIVE:
16317 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16318 			    "Unexpected SCSI status received: 0x%x\n",
16319 			    SD_GET_PKT_STATUS(pktp));
16320 			sd_return_failed_command(un, bp, EIO);
16321 			break;
16322 
16323 		default:
16324 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16325 			    "Invalid SCSI status received: 0x%x\n",
16326 			    SD_GET_PKT_STATUS(pktp));
16327 			sd_return_failed_command(un, bp, EIO);
16328 			break;
16329 
16330 		}
16331 		break;
16332 
16333 	case CMD_INCOMPLETE:
16334 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16335 		    "sdintr:  CMD_INCOMPLETE\n");
16336 		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
16337 		break;
16338 	case CMD_TRAN_ERR:
16339 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16340 		    "sdintr: CMD_TRAN_ERR\n");
16341 		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
16342 		break;
16343 	case CMD_RESET:
16344 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16345 		    "sdintr: CMD_RESET \n");
16346 		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
16347 		break;
16348 	case CMD_ABORTED:
16349 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16350 		    "sdintr: CMD_ABORTED \n");
16351 		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
16352 		break;
16353 	case CMD_TIMEOUT:
16354 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16355 		    "sdintr: CMD_TIMEOUT\n");
16356 		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
16357 		break;
16358 	case CMD_UNX_BUS_FREE:
16359 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16360 		    "sdintr: CMD_UNX_BUS_FREE \n");
16361 		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
16362 		break;
16363 	case CMD_TAG_REJECT:
16364 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16365 		    "sdintr: CMD_TAG_REJECT\n");
16366 		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
16367 		break;
16368 	default:
16369 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16370 		    "sdintr: default\n");
16371 		sd_pkt_reason_default(un, bp, xp, pktp);
16372 		break;
16373 	}
16374 
16375 exit:
16376 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
16377 
16378 	/* Decrement counter to indicate that the callback routine is done. */
16379 	un->un_in_callback--;
16380 	ASSERT(un->un_in_callback >= 0);
16381 
16382 	/*
16383 	 * At this point, the pkt has been dispatched, ie, it is either
16384 	 * being re-tried or has been returned to its caller and should
16385 	 * not be referenced.
16386 	 */
16387 
16388 	mutex_exit(SD_MUTEX(un));
16389 }
16390 
16391 
16392 /*
16393  *    Function: sd_print_incomplete_msg
16394  *
16395  * Description: Prints the error message for a CMD_INCOMPLETE error.
16396  *
16397  *   Arguments: un - ptr to associated softstate for the device.
16398  *		bp - ptr to the buf(9S) for the command.
16399  *		arg - message string ptr
16400  *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
16401  *			or SD_NO_RETRY_ISSUED.
16402  *
16403  *     Context: May be called under interrupt context
16404  */
16405 
16406 static void
16407 sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
16408 {
16409 	struct scsi_pkt	*pktp;
16410 	char	*msgp;
16411 	char	*cmdp = arg;
16412 
16413 	ASSERT(un != NULL);
16414 	ASSERT(mutex_owned(SD_MUTEX(un)));
16415 	ASSERT(bp != NULL);
16416 	ASSERT(arg != NULL);
16417 	pktp = SD_GET_PKTP(bp);
16418 	ASSERT(pktp != NULL);
16419 
16420 	switch (code) {
16421 	case SD_DELAYED_RETRY_ISSUED:
16422 	case SD_IMMEDIATE_RETRY_ISSUED:
16423 		msgp = "retrying";
16424 		break;
16425 	case SD_NO_RETRY_ISSUED:
16426 	default:
16427 		msgp = "giving up";
16428 		break;
16429 	}
16430 
16431 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16432 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16433 		    "incomplete %s- %s\n", cmdp, msgp);
16434 	}
16435 }
16436 
16437 
16438 
16439 /*
16440  *    Function: sd_pkt_status_good
16441  *
16442  * Description: Processing for a STATUS_GOOD code in pkt_status.
16443  *
16444  *     Context: May be called under interrupt context
16445  */
16446 
16447 static void
16448 sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
16449 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16450 {
16451 	char	*cmdp;
16452 
16453 	ASSERT(un != NULL);
16454 	ASSERT(mutex_owned(SD_MUTEX(un)));
16455 	ASSERT(bp != NULL);
16456 	ASSERT(xp != NULL);
16457 	ASSERT(pktp != NULL);
16458 	ASSERT(pktp->pkt_reason == CMD_CMPLT);
16459 	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
16460 	ASSERT(pktp->pkt_resid != 0);
16461 
16462 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
16463 
16464 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16465 	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
16466 	case SCMD_READ:
16467 		cmdp = "read";
16468 		break;
16469 	case SCMD_WRITE:
16470 		cmdp = "write";
16471 		break;
16472 	default:
16473 		SD_UPDATE_B_RESID(bp, pktp);
16474 		sd_return_command(un, bp);
16475 		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16476 		return;
16477 	}
16478 
16479 	/*
16480 	 * See if we can retry the read/write, preferrably immediately.
16481 	 * If retries are exhaused, then sd_retry_command() will update
16482 	 * the b_resid count.
16483 	 */
16484 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
16485 	    cmdp, EIO, (clock_t)0, NULL);
16486 
16487 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16488 }
16489 
16490 
16491 
16492 
16493 
16494 /*
16495  *    Function: sd_handle_request_sense
16496  *
16497  * Description: Processing for non-auto Request Sense command.
16498  *
16499  *   Arguments: un - ptr to associated softstate
16500  *		sense_bp - ptr to buf(9S) for the RQS command
16501  *		sense_xp - ptr to the sd_xbuf for the RQS command
16502  *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
16503  *
16504  *     Context: May be called under interrupt context
16505  */
16506 
16507 static void
16508 sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
16509 	struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
16510 {
16511 	struct buf	*cmd_bp;	/* buf for the original command */
16512 	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
16513 	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
16514 
16515 	ASSERT(un != NULL);
16516 	ASSERT(mutex_owned(SD_MUTEX(un)));
16517 	ASSERT(sense_bp != NULL);
16518 	ASSERT(sense_xp != NULL);
16519 	ASSERT(sense_pktp != NULL);
16520 
16521 	/*
16522 	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
16523 	 * RQS command and not the original command.
16524 	 */
16525 	ASSERT(sense_pktp == un->un_rqs_pktp);
16526 	ASSERT(sense_bp   == un->un_rqs_bp);
16527 	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
16528 	    (FLAG_SENSING | FLAG_HEAD));
16529 	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
16530 	    FLAG_SENSING) == FLAG_SENSING);
16531 
16532 	/* These are the bp, xp, and pktp for the original command */
16533 	cmd_bp = sense_xp->xb_sense_bp;
16534 	cmd_xp = SD_GET_XBUF(cmd_bp);
16535 	cmd_pktp = SD_GET_PKTP(cmd_bp);
16536 
16537 	if (sense_pktp->pkt_reason != CMD_CMPLT) {
16538 		/*
16539 		 * The REQUEST SENSE command failed.  Release the REQUEST
16540 		 * SENSE command for re-use, get back the bp for the original
16541 		 * command, and attempt to re-try the original command if
16542 		 * FLAG_DIAGNOSE is not set in the original packet.
16543 		 */
16544 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
16545 		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16546 			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
16547 			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
16548 			    NULL, NULL, EIO, (clock_t)0, NULL);
16549 			return;
16550 		}
16551 	}
16552 
16553 	/*
16554 	 * Save the relevant sense info into the xp for the original cmd.
16555 	 *
16556 	 * Note: if the request sense failed the state info will be zero
16557 	 * as set in sd_mark_rqs_busy()
16558 	 */
16559 	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
16560 	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
16561 	cmd_xp->xb_sense_resid  = sense_pktp->pkt_resid;
16562 	bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data, SENSE_LENGTH);
16563 
16564 	/*
16565 	 *  Free up the RQS command....
16566 	 *  NOTE:
16567 	 *	Must do this BEFORE calling sd_validate_sense_data!
16568 	 *	sd_validate_sense_data may return the original command in
16569 	 *	which case the pkt will be freed and the flags can no
16570 	 *	longer be touched.
16571 	 *	SD_MUTEX is held through this process until the command
16572 	 *	is dispatched based upon the sense data, so there are
16573 	 *	no race conditions.
16574 	 */
16575 	(void) sd_mark_rqs_idle(un, sense_xp);
16576 
16577 	/*
16578 	 * For a retryable command see if we have valid sense data, if so then
16579 	 * turn it over to sd_decode_sense() to figure out the right course of
16580 	 * action. Just fail a non-retryable command.
16581 	 */
16582 	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16583 		if (sd_validate_sense_data(un, cmd_bp, cmd_xp) ==
16584 		    SD_SENSE_DATA_IS_VALID) {
16585 			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
16586 		}
16587 	} else {
16588 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
16589 		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
16590 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
16591 		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
16592 		sd_return_failed_command(un, cmd_bp, EIO);
16593 	}
16594 }
16595 
16596 
16597 
16598 
16599 /*
16600  *    Function: sd_handle_auto_request_sense
16601  *
16602  * Description: Processing for auto-request sense information.
16603  *
16604  *   Arguments: un - ptr to associated softstate
16605  *		bp - ptr to buf(9S) for the command
16606  *		xp - ptr to the sd_xbuf for the command
16607  *		pktp - ptr to the scsi_pkt(9S) for the command
16608  *
16609  *     Context: May be called under interrupt context
16610  */
16611 
16612 static void
16613 sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
16614 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16615 {
16616 	struct scsi_arq_status *asp;
16617 
16618 	ASSERT(un != NULL);
16619 	ASSERT(mutex_owned(SD_MUTEX(un)));
16620 	ASSERT(bp != NULL);
16621 	ASSERT(xp != NULL);
16622 	ASSERT(pktp != NULL);
16623 	ASSERT(pktp != un->un_rqs_pktp);
16624 	ASSERT(bp   != un->un_rqs_bp);
16625 
16626 	/*
16627 	 * For auto-request sense, we get a scsi_arq_status back from
16628 	 * the HBA, with the sense data in the sts_sensedata member.
16629 	 * The pkt_scbp of the packet points to this scsi_arq_status.
16630 	 */
16631 	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16632 
16633 	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
16634 		/*
16635 		 * The auto REQUEST SENSE failed; see if we can re-try
16636 		 * the original command.
16637 		 */
16638 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16639 		    "auto request sense failed (reason=%s)\n",
16640 		    scsi_rname(asp->sts_rqpkt_reason));
16641 
16642 		sd_reset_target(un, pktp);
16643 
16644 		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16645 		    NULL, NULL, EIO, (clock_t)0, NULL);
16646 		return;
16647 	}
16648 
16649 	/* Save the relevant sense info into the xp for the original cmd. */
16650 	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
16651 	xp->xb_sense_state  = asp->sts_rqpkt_state;
16652 	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16653 	bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16654 	    min(sizeof (struct scsi_extended_sense), SENSE_LENGTH));
16655 
16656 	/*
16657 	 * See if we have valid sense data, if so then turn it over to
16658 	 * sd_decode_sense() to figure out the right course of action.
16659 	 */
16660 	if (sd_validate_sense_data(un, bp, xp) == SD_SENSE_DATA_IS_VALID) {
16661 		sd_decode_sense(un, bp, xp, pktp);
16662 	}
16663 }
16664 
16665 
16666 /*
16667  *    Function: sd_print_sense_failed_msg
16668  *
16669  * Description: Print log message when RQS has failed.
16670  *
16671  *   Arguments: un - ptr to associated softstate
16672  *		bp - ptr to buf(9S) for the command
16673  *		arg - generic message string ptr
16674  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16675  *			or SD_NO_RETRY_ISSUED
16676  *
16677  *     Context: May be called from interrupt context
16678  */
16679 
16680 static void
16681 sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
16682 	int code)
16683 {
16684 	char	*msgp = arg;
16685 
16686 	ASSERT(un != NULL);
16687 	ASSERT(mutex_owned(SD_MUTEX(un)));
16688 	ASSERT(bp != NULL);
16689 
16690 	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
16691 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
16692 	}
16693 }
16694 
16695 
16696 /*
16697  *    Function: sd_validate_sense_data
16698  *
16699  * Description: Check the given sense data for validity.
16700  *		If the sense data is not valid, the command will
16701  *		be either failed or retried!
16702  *
16703  * Return Code: SD_SENSE_DATA_IS_INVALID
16704  *		SD_SENSE_DATA_IS_VALID
16705  *
16706  *     Context: May be called from interrupt context
16707  */
16708 
16709 static int
16710 sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp)
16711 {
16712 	struct scsi_extended_sense *esp;
16713 	struct	scsi_pkt *pktp;
16714 	size_t	actual_len;
16715 	char	*msgp = NULL;
16716 
16717 	ASSERT(un != NULL);
16718 	ASSERT(mutex_owned(SD_MUTEX(un)));
16719 	ASSERT(bp != NULL);
16720 	ASSERT(bp != un->un_rqs_bp);
16721 	ASSERT(xp != NULL);
16722 
16723 	pktp = SD_GET_PKTP(bp);
16724 	ASSERT(pktp != NULL);
16725 
16726 	/*
16727 	 * Check the status of the RQS command (auto or manual).
16728 	 */
16729 	switch (xp->xb_sense_status & STATUS_MASK) {
16730 	case STATUS_GOOD:
16731 		break;
16732 
16733 	case STATUS_RESERVATION_CONFLICT:
16734 		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16735 		return (SD_SENSE_DATA_IS_INVALID);
16736 
16737 	case STATUS_BUSY:
16738 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16739 		    "Busy Status on REQUEST SENSE\n");
16740 		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
16741 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
16742 		return (SD_SENSE_DATA_IS_INVALID);
16743 
16744 	case STATUS_QFULL:
16745 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16746 		    "QFULL Status on REQUEST SENSE\n");
16747 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
16748 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
16749 		return (SD_SENSE_DATA_IS_INVALID);
16750 
16751 	case STATUS_CHECK:
16752 	case STATUS_TERMINATED:
16753 		msgp = "Check Condition on REQUEST SENSE\n";
16754 		goto sense_failed;
16755 
16756 	default:
16757 		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
16758 		goto sense_failed;
16759 	}
16760 
16761 	/*
16762 	 * See if we got the minimum required amount of sense data.
16763 	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
16764 	 * or less.
16765 	 */
16766 	actual_len = (int)(SENSE_LENGTH - xp->xb_sense_resid);
16767 	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
16768 	    (actual_len == 0)) {
16769 		msgp = "Request Sense couldn't get sense data\n";
16770 		goto sense_failed;
16771 	}
16772 
16773 	if (actual_len < SUN_MIN_SENSE_LENGTH) {
16774 		msgp = "Not enough sense information\n";
16775 		goto sense_failed;
16776 	}
16777 
16778 	/*
16779 	 * We require the extended sense data
16780 	 */
16781 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
16782 	if (esp->es_class != CLASS_EXTENDED_SENSE) {
16783 		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16784 			static char tmp[8];
16785 			static char buf[148];
16786 			char *p = (char *)(xp->xb_sense_data);
16787 			int i;
16788 
16789 			mutex_enter(&sd_sense_mutex);
16790 			(void) strcpy(buf, "undecodable sense information:");
16791 			for (i = 0; i < actual_len; i++) {
16792 				(void) sprintf(tmp, " 0x%x", *(p++)&0xff);
16793 				(void) strcpy(&buf[strlen(buf)], tmp);
16794 			}
16795 			i = strlen(buf);
16796 			(void) strcpy(&buf[i], "-(assumed fatal)\n");
16797 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, buf);
16798 			mutex_exit(&sd_sense_mutex);
16799 		}
16800 		/* Note: Legacy behavior, fail the command with no retry */
16801 		sd_return_failed_command(un, bp, EIO);
16802 		return (SD_SENSE_DATA_IS_INVALID);
16803 	}
16804 
16805 	/*
16806 	 * Check that es_code is valid (es_class concatenated with es_code
16807 	 * make up the "response code" field.  es_class will always be 7, so
16808 	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
16809 	 * format.
16810 	 */
16811 	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
16812 	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
16813 	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
16814 	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
16815 	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
16816 		goto sense_failed;
16817 	}
16818 
16819 	return (SD_SENSE_DATA_IS_VALID);
16820 
16821 sense_failed:
16822 	/*
16823 	 * If the request sense failed (for whatever reason), attempt
16824 	 * to retry the original command.
16825 	 */
16826 #if defined(__i386) || defined(__amd64)
16827 	/*
16828 	 * SD_RETRY_DELAY is conditionally compile (#if fibre) in
16829 	 * sddef.h for Sparc platform, and x86 uses 1 binary
16830 	 * for both SCSI/FC.
16831 	 * The SD_RETRY_DELAY value need to be adjusted here
16832 	 * when SD_RETRY_DELAY change in sddef.h
16833 	 */
16834 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16835 	    sd_print_sense_failed_msg, msgp, EIO,
16836 		un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0, NULL);
16837 #else
16838 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16839 	    sd_print_sense_failed_msg, msgp, EIO, SD_RETRY_DELAY, NULL);
16840 #endif
16841 
16842 	return (SD_SENSE_DATA_IS_INVALID);
16843 }
16844 
16845 
16846 
16847 /*
16848  *    Function: sd_decode_sense
16849  *
16850  * Description: Take recovery action(s) when SCSI Sense Data is received.
16851  *
16852  *     Context: Interrupt context.
16853  */
16854 
16855 static void
16856 sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
16857 	struct scsi_pkt *pktp)
16858 {
16859 	struct scsi_extended_sense *esp;
16860 	struct scsi_descr_sense_hdr *sdsp;
16861 	uint8_t asc, ascq, sense_key;
16862 
16863 	ASSERT(un != NULL);
16864 	ASSERT(mutex_owned(SD_MUTEX(un)));
16865 	ASSERT(bp != NULL);
16866 	ASSERT(bp != un->un_rqs_bp);
16867 	ASSERT(xp != NULL);
16868 	ASSERT(pktp != NULL);
16869 
16870 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
16871 
16872 	switch (esp->es_code) {
16873 	case CODE_FMT_DESCR_CURRENT:
16874 	case CODE_FMT_DESCR_DEFERRED:
16875 		sdsp = (struct scsi_descr_sense_hdr *)xp->xb_sense_data;
16876 		sense_key = sdsp->ds_key;
16877 		asc = sdsp->ds_add_code;
16878 		ascq = sdsp->ds_qual_code;
16879 		break;
16880 	case CODE_FMT_VENDOR_SPECIFIC:
16881 	case CODE_FMT_FIXED_CURRENT:
16882 	case CODE_FMT_FIXED_DEFERRED:
16883 	default:
16884 		sense_key = esp->es_key;
16885 		asc = esp->es_add_code;
16886 		ascq = esp->es_qual_code;
16887 		break;
16888 	}
16889 
16890 	switch (sense_key) {
16891 	case KEY_NO_SENSE:
16892 		sd_sense_key_no_sense(un, bp, xp, pktp);
16893 		break;
16894 	case KEY_RECOVERABLE_ERROR:
16895 		sd_sense_key_recoverable_error(un, asc, bp, xp, pktp);
16896 		break;
16897 	case KEY_NOT_READY:
16898 		sd_sense_key_not_ready(un, asc, ascq, bp, xp, pktp);
16899 		break;
16900 	case KEY_MEDIUM_ERROR:
16901 	case KEY_HARDWARE_ERROR:
16902 		sd_sense_key_medium_or_hardware_error(un,
16903 		    sense_key, asc, bp, xp, pktp);
16904 		break;
16905 	case KEY_ILLEGAL_REQUEST:
16906 		sd_sense_key_illegal_request(un, bp, xp, pktp);
16907 		break;
16908 	case KEY_UNIT_ATTENTION:
16909 		sd_sense_key_unit_attention(un, asc, bp, xp, pktp);
16910 		break;
16911 	case KEY_WRITE_PROTECT:
16912 	case KEY_VOLUME_OVERFLOW:
16913 	case KEY_MISCOMPARE:
16914 		sd_sense_key_fail_command(un, bp, xp, pktp);
16915 		break;
16916 	case KEY_BLANK_CHECK:
16917 		sd_sense_key_blank_check(un, bp, xp, pktp);
16918 		break;
16919 	case KEY_ABORTED_COMMAND:
16920 		sd_sense_key_aborted_command(un, bp, xp, pktp);
16921 		break;
16922 	case KEY_VENDOR_UNIQUE:
16923 	case KEY_COPY_ABORTED:
16924 	case KEY_EQUAL:
16925 	case KEY_RESERVED:
16926 	default:
16927 		sd_sense_key_default(un, sense_key, bp, xp, pktp);
16928 		break;
16929 	}
16930 }
16931 
16932 
16933 /*
16934  *    Function: sd_dump_memory
16935  *
16936  * Description: Debug logging routine to print the contents of a user provided
16937  *		buffer. The output of the buffer is broken up into 256 byte
16938  *		segments due to a size constraint of the scsi_log.
16939  *		implementation.
16940  *
16941  *   Arguments: un - ptr to softstate
16942  *		comp - component mask
16943  *		title - "title" string to preceed data when printed
16944  *		data - ptr to data block to be printed
16945  *		len - size of data block to be printed
16946  *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
16947  *
16948  *     Context: May be called from interrupt context
16949  */
16950 
16951 #define	SD_DUMP_MEMORY_BUF_SIZE	256
16952 
16953 static char *sd_dump_format_string[] = {
16954 		" 0x%02x",
16955 		" %c"
16956 };
16957 
16958 static void
16959 sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
16960     int len, int fmt)
16961 {
16962 	int	i, j;
16963 	int	avail_count;
16964 	int	start_offset;
16965 	int	end_offset;
16966 	size_t	entry_len;
16967 	char	*bufp;
16968 	char	*local_buf;
16969 	char	*format_string;
16970 
16971 	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
16972 
16973 	/*
16974 	 * In the debug version of the driver, this function is called from a
16975 	 * number of places which are NOPs in the release driver.
16976 	 * The debug driver therefore has additional methods of filtering
16977 	 * debug output.
16978 	 */
16979 #ifdef SDDEBUG
16980 	/*
16981 	 * In the debug version of the driver we can reduce the amount of debug
16982 	 * messages by setting sd_error_level to something other than
16983 	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
16984 	 * sd_component_mask.
16985 	 */
16986 	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
16987 	    (sd_error_level != SCSI_ERR_ALL)) {
16988 		return;
16989 	}
16990 	if (((sd_component_mask & comp) == 0) ||
16991 	    (sd_error_level != SCSI_ERR_ALL)) {
16992 		return;
16993 	}
16994 #else
16995 	if (sd_error_level != SCSI_ERR_ALL) {
16996 		return;
16997 	}
16998 #endif
16999 
17000 	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
17001 	bufp = local_buf;
17002 	/*
17003 	 * Available length is the length of local_buf[], minus the
17004 	 * length of the title string, minus one for the ":", minus
17005 	 * one for the newline, minus one for the NULL terminator.
17006 	 * This gives the #bytes available for holding the printed
17007 	 * values from the given data buffer.
17008 	 */
17009 	if (fmt == SD_LOG_HEX) {
17010 		format_string = sd_dump_format_string[0];
17011 	} else /* SD_LOG_CHAR */ {
17012 		format_string = sd_dump_format_string[1];
17013 	}
17014 	/*
17015 	 * Available count is the number of elements from the given
17016 	 * data buffer that we can fit into the available length.
17017 	 * This is based upon the size of the format string used.
17018 	 * Make one entry and find it's size.
17019 	 */
17020 	(void) sprintf(bufp, format_string, data[0]);
17021 	entry_len = strlen(bufp);
17022 	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
17023 
17024 	j = 0;
17025 	while (j < len) {
17026 		bufp = local_buf;
17027 		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
17028 		start_offset = j;
17029 
17030 		end_offset = start_offset + avail_count;
17031 
17032 		(void) sprintf(bufp, "%s:", title);
17033 		bufp += strlen(bufp);
17034 		for (i = start_offset; ((i < end_offset) && (j < len));
17035 		    i++, j++) {
17036 			(void) sprintf(bufp, format_string, data[i]);
17037 			bufp += entry_len;
17038 		}
17039 		(void) sprintf(bufp, "\n");
17040 
17041 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
17042 	}
17043 	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
17044 }
17045 
17046 /*
17047  *    Function: sd_print_sense_msg
17048  *
17049  * Description: Log a message based upon the given sense data.
17050  *
17051  *   Arguments: un - ptr to associated softstate
17052  *		bp - ptr to buf(9S) for the command
17053  *		arg - ptr to associate sd_sense_info struct
17054  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17055  *			or SD_NO_RETRY_ISSUED
17056  *
17057  *     Context: May be called from interrupt context
17058  */
17059 
17060 static void
17061 sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
17062 {
17063 	struct sd_xbuf	*xp;
17064 	struct scsi_pkt	*pktp;
17065 	struct scsi_extended_sense *sensep;
17066 	daddr_t request_blkno;
17067 	diskaddr_t err_blkno;
17068 	int severity;
17069 	int pfa_flag;
17070 	int fixed_format = TRUE;
17071 	extern struct scsi_key_strings scsi_cmds[];
17072 
17073 	ASSERT(un != NULL);
17074 	ASSERT(mutex_owned(SD_MUTEX(un)));
17075 	ASSERT(bp != NULL);
17076 	xp = SD_GET_XBUF(bp);
17077 	ASSERT(xp != NULL);
17078 	pktp = SD_GET_PKTP(bp);
17079 	ASSERT(pktp != NULL);
17080 	ASSERT(arg != NULL);
17081 
17082 	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
17083 	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
17084 
17085 	if ((code == SD_DELAYED_RETRY_ISSUED) ||
17086 	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
17087 		severity = SCSI_ERR_RETRYABLE;
17088 	}
17089 
17090 	/* Use absolute block number for the request block number */
17091 	request_blkno = xp->xb_blkno;
17092 
17093 	/*
17094 	 * Now try to get the error block number from the sense data
17095 	 */
17096 	sensep = (struct scsi_extended_sense *)xp->xb_sense_data;
17097 	switch (sensep->es_code) {
17098 	case CODE_FMT_DESCR_CURRENT:
17099 	case CODE_FMT_DESCR_DEFERRED:
17100 		err_blkno =
17101 		    sd_extract_sense_info_descr(
17102 			(struct scsi_descr_sense_hdr *)sensep);
17103 		fixed_format = FALSE;
17104 		break;
17105 	case CODE_FMT_FIXED_CURRENT:
17106 	case CODE_FMT_FIXED_DEFERRED:
17107 	case CODE_FMT_VENDOR_SPECIFIC:
17108 	default:
17109 		/*
17110 		 * With the es_valid bit set, we assume that the error
17111 		 * blkno is in the sense data.  Also, if xp->xb_blkno is
17112 		 * greater than 0xffffffff then the target *should* have used
17113 		 * a descriptor sense format (or it shouldn't have set
17114 		 * the es_valid bit), and we may as well ignore the
17115 		 * 32-bit value.
17116 		 */
17117 		if ((sensep->es_valid != 0) && (xp->xb_blkno <= 0xffffffff)) {
17118 			err_blkno = (diskaddr_t)
17119 			    ((sensep->es_info_1 << 24) |
17120 			    (sensep->es_info_2 << 16) |
17121 			    (sensep->es_info_3 << 8)  |
17122 			    (sensep->es_info_4));
17123 		} else {
17124 			err_blkno = (diskaddr_t)-1;
17125 		}
17126 		break;
17127 	}
17128 
17129 	if (err_blkno == (diskaddr_t)-1) {
17130 		/*
17131 		 * Without the es_valid bit set (for fixed format) or an
17132 		 * information descriptor (for descriptor format) we cannot
17133 		 * be certain of the error blkno, so just use the
17134 		 * request_blkno.
17135 		 */
17136 		err_blkno = (diskaddr_t)request_blkno;
17137 	} else {
17138 		/*
17139 		 * We retrieved the error block number from the information
17140 		 * portion of the sense data.
17141 		 *
17142 		 * For USCSI commands we are better off using the error
17143 		 * block no. as the requested block no. (This is the best
17144 		 * we can estimate.)
17145 		 */
17146 		if ((SD_IS_BUFIO(xp) == FALSE) &&
17147 		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
17148 			request_blkno = err_blkno;
17149 		}
17150 	}
17151 
17152 	/*
17153 	 * The following will log the buffer contents for the release driver
17154 	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
17155 	 * level is set to verbose.
17156 	 */
17157 	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
17158 	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
17159 	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
17160 	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
17161 
17162 	if (pfa_flag == FALSE) {
17163 		/* This is normally only set for USCSI */
17164 		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
17165 			return;
17166 		}
17167 
17168 		if ((SD_IS_BUFIO(xp) == TRUE) &&
17169 		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
17170 		    (severity < sd_error_level))) {
17171 			return;
17172 		}
17173 	}
17174 
17175 	/*
17176 	 * If the data is fixed format then check for Sonoma Failover,
17177 	 * and keep a count of how many failed I/O's.  We should not have
17178 	 * to worry about Sonoma returning descriptor format sense data,
17179 	 * and asc/ascq are in a different location in descriptor format.
17180 	 */
17181 	if (fixed_format &&
17182 	    (SD_IS_LSI(un)) && (sensep->es_key == KEY_ILLEGAL_REQUEST) &&
17183 	    (sensep->es_add_code == 0x94) && (sensep->es_qual_code == 0x01)) {
17184 		un->un_sonoma_failure_count++;
17185 		if (un->un_sonoma_failure_count > 1) {
17186 			return;
17187 		}
17188 	}
17189 
17190 	scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
17191 	    request_blkno, err_blkno, scsi_cmds, sensep,
17192 	    un->un_additional_codes, NULL);
17193 }
17194 
17195 /*
17196  *    Function: sd_extract_sense_info_descr
17197  *
17198  * Description: Retrieve "information" field from descriptor format
17199  *              sense data.  Iterates through each sense descriptor
17200  *              looking for the information descriptor and returns
17201  *              the information field from that descriptor.
17202  *
17203  *     Context: May be called from interrupt context
17204  */
17205 
17206 static diskaddr_t
17207 sd_extract_sense_info_descr(struct scsi_descr_sense_hdr *sdsp)
17208 {
17209 	diskaddr_t result;
17210 	uint8_t *descr_offset;
17211 	int valid_sense_length;
17212 	struct scsi_information_sense_descr *isd;
17213 
17214 	/*
17215 	 * Initialize result to -1 indicating there is no information
17216 	 * descriptor
17217 	 */
17218 	result = (diskaddr_t)-1;
17219 
17220 	/*
17221 	 * The first descriptor will immediately follow the header
17222 	 */
17223 	descr_offset = (uint8_t *)(sdsp+1); /* Pointer arithmetic */
17224 
17225 	/*
17226 	 * Calculate the amount of valid sense data
17227 	 */
17228 	valid_sense_length =
17229 	    min((sizeof (struct scsi_descr_sense_hdr) +
17230 	    sdsp->ds_addl_sense_length),
17231 	    SENSE_LENGTH);
17232 
17233 	/*
17234 	 * Iterate through the list of descriptors, stopping when we
17235 	 * run out of sense data
17236 	 */
17237 	while ((descr_offset + sizeof (struct scsi_information_sense_descr)) <=
17238 	    (uint8_t *)sdsp + valid_sense_length) {
17239 		/*
17240 		 * Check if this is an information descriptor.  We can
17241 		 * use the scsi_information_sense_descr structure as a
17242 		 * template sense the first two fields are always the
17243 		 * same
17244 		 */
17245 		isd = (struct scsi_information_sense_descr *)descr_offset;
17246 		if (isd->isd_descr_type == DESCR_INFORMATION) {
17247 			/*
17248 			 * Found an information descriptor.  Copy the
17249 			 * information field.  There will only be one
17250 			 * information descriptor so we can stop looking.
17251 			 */
17252 			result =
17253 			    (((diskaddr_t)isd->isd_information[0] << 56) |
17254 				((diskaddr_t)isd->isd_information[1] << 48) |
17255 				((diskaddr_t)isd->isd_information[2] << 40) |
17256 				((diskaddr_t)isd->isd_information[3] << 32) |
17257 				((diskaddr_t)isd->isd_information[4] << 24) |
17258 				((diskaddr_t)isd->isd_information[5] << 16) |
17259 				((diskaddr_t)isd->isd_information[6] << 8)  |
17260 				((diskaddr_t)isd->isd_information[7]));
17261 			break;
17262 		}
17263 
17264 		/*
17265 		 * Get pointer to the next descriptor.  The "additional
17266 		 * length" field holds the length of the descriptor except
17267 		 * for the "type" and "additional length" fields, so
17268 		 * we need to add 2 to get the total length.
17269 		 */
17270 		descr_offset += (isd->isd_addl_length + 2);
17271 	}
17272 
17273 	return (result);
17274 }
17275 
17276 /*
17277  *    Function: sd_sense_key_no_sense
17278  *
17279  * Description: Recovery action when sense data was not received.
17280  *
17281  *     Context: May be called from interrupt context
17282  */
17283 
17284 static void
17285 sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
17286 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17287 {
17288 	struct sd_sense_info	si;
17289 
17290 	ASSERT(un != NULL);
17291 	ASSERT(mutex_owned(SD_MUTEX(un)));
17292 	ASSERT(bp != NULL);
17293 	ASSERT(xp != NULL);
17294 	ASSERT(pktp != NULL);
17295 
17296 	si.ssi_severity = SCSI_ERR_FATAL;
17297 	si.ssi_pfa_flag = FALSE;
17298 
17299 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17300 
17301 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17302 		&si, EIO, (clock_t)0, NULL);
17303 }
17304 
17305 
17306 /*
17307  *    Function: sd_sense_key_recoverable_error
17308  *
17309  * Description: Recovery actions for a SCSI "Recovered Error" sense key.
17310  *
17311  *     Context: May be called from interrupt context
17312  */
17313 
17314 static void
17315 sd_sense_key_recoverable_error(struct sd_lun *un,
17316 	uint8_t asc,
17317 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17318 {
17319 	struct sd_sense_info	si;
17320 
17321 	ASSERT(un != NULL);
17322 	ASSERT(mutex_owned(SD_MUTEX(un)));
17323 	ASSERT(bp != NULL);
17324 	ASSERT(xp != NULL);
17325 	ASSERT(pktp != NULL);
17326 
17327 	/*
17328 	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
17329 	 */
17330 	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
17331 		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17332 		si.ssi_severity = SCSI_ERR_INFO;
17333 		si.ssi_pfa_flag = TRUE;
17334 	} else {
17335 		SD_UPDATE_ERRSTATS(un, sd_softerrs);
17336 		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
17337 		si.ssi_severity = SCSI_ERR_RECOVERED;
17338 		si.ssi_pfa_flag = FALSE;
17339 	}
17340 
17341 	if (pktp->pkt_resid == 0) {
17342 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17343 		sd_return_command(un, bp);
17344 		return;
17345 	}
17346 
17347 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17348 	    &si, EIO, (clock_t)0, NULL);
17349 }
17350 
17351 
17352 
17353 
17354 /*
17355  *    Function: sd_sense_key_not_ready
17356  *
17357  * Description: Recovery actions for a SCSI "Not Ready" sense key.
17358  *
17359  *     Context: May be called from interrupt context
17360  */
17361 
17362 static void
17363 sd_sense_key_not_ready(struct sd_lun *un,
17364 	uint8_t asc, uint8_t ascq,
17365 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17366 {
17367 	struct sd_sense_info	si;
17368 
17369 	ASSERT(un != NULL);
17370 	ASSERT(mutex_owned(SD_MUTEX(un)));
17371 	ASSERT(bp != NULL);
17372 	ASSERT(xp != NULL);
17373 	ASSERT(pktp != NULL);
17374 
17375 	si.ssi_severity = SCSI_ERR_FATAL;
17376 	si.ssi_pfa_flag = FALSE;
17377 
17378 	/*
17379 	 * Update error stats after first NOT READY error. Disks may have
17380 	 * been powered down and may need to be restarted.  For CDROMs,
17381 	 * report NOT READY errors only if media is present.
17382 	 */
17383 	if ((ISCD(un) && (un->un_f_geometry_is_valid == TRUE)) ||
17384 	    (xp->xb_retry_count > 0)) {
17385 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17386 		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
17387 	}
17388 
17389 	/*
17390 	 * Just fail if the "not ready" retry limit has been reached.
17391 	 */
17392 	if (xp->xb_retry_count >= un->un_notready_retry_count) {
17393 		/* Special check for error message printing for removables. */
17394 		if ((ISREMOVABLE(un)) && (asc == 0x04) &&
17395 		    (ascq >= 0x04)) {
17396 			si.ssi_severity = SCSI_ERR_ALL;
17397 		}
17398 		goto fail_command;
17399 	}
17400 
17401 	/*
17402 	 * Check the ASC and ASCQ in the sense data as needed, to determine
17403 	 * what to do.
17404 	 */
17405 	switch (asc) {
17406 	case 0x04:	/* LOGICAL UNIT NOT READY */
17407 		/*
17408 		 * disk drives that don't spin up result in a very long delay
17409 		 * in format without warning messages. We will log a message
17410 		 * if the error level is set to verbose.
17411 		 */
17412 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17413 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17414 			    "logical unit not ready, resetting disk\n");
17415 		}
17416 
17417 		/*
17418 		 * There are different requirements for CDROMs and disks for
17419 		 * the number of retries.  If a CD-ROM is giving this, it is
17420 		 * probably reading TOC and is in the process of getting
17421 		 * ready, so we should keep on trying for a long time to make
17422 		 * sure that all types of media are taken in account (for
17423 		 * some media the drive takes a long time to read TOC).  For
17424 		 * disks we do not want to retry this too many times as this
17425 		 * can cause a long hang in format when the drive refuses to
17426 		 * spin up (a very common failure).
17427 		 */
17428 		switch (ascq) {
17429 		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
17430 			/*
17431 			 * Disk drives frequently refuse to spin up which
17432 			 * results in a very long hang in format without
17433 			 * warning messages.
17434 			 *
17435 			 * Note: This code preserves the legacy behavior of
17436 			 * comparing xb_retry_count against zero for fibre
17437 			 * channel targets instead of comparing against the
17438 			 * un_reset_retry_count value.  The reason for this
17439 			 * discrepancy has been so utterly lost beneath the
17440 			 * Sands of Time that even Indiana Jones could not
17441 			 * find it.
17442 			 */
17443 			if (un->un_f_is_fibre == TRUE) {
17444 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17445 					(xp->xb_retry_count > 0)) &&
17446 					(un->un_startstop_timeid == NULL)) {
17447 					scsi_log(SD_DEVINFO(un), sd_label,
17448 					CE_WARN, "logical unit not ready, "
17449 					"resetting disk\n");
17450 					sd_reset_target(un, pktp);
17451 				}
17452 			} else {
17453 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17454 					(xp->xb_retry_count >
17455 					un->un_reset_retry_count)) &&
17456 					(un->un_startstop_timeid == NULL)) {
17457 					scsi_log(SD_DEVINFO(un), sd_label,
17458 					CE_WARN, "logical unit not ready, "
17459 					"resetting disk\n");
17460 					sd_reset_target(un, pktp);
17461 				}
17462 			}
17463 			break;
17464 
17465 		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
17466 			/*
17467 			 * If the target is in the process of becoming
17468 			 * ready, just proceed with the retry. This can
17469 			 * happen with CD-ROMs that take a long time to
17470 			 * read TOC after a power cycle or reset.
17471 			 */
17472 			goto do_retry;
17473 
17474 		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
17475 			break;
17476 
17477 		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
17478 			/*
17479 			 * Retries cannot help here so just fail right away.
17480 			 */
17481 			goto fail_command;
17482 
17483 		case 0x88:
17484 			/*
17485 			 * Vendor-unique code for T3/T4: it indicates a
17486 			 * path problem in a mutipathed config, but as far as
17487 			 * the target driver is concerned it equates to a fatal
17488 			 * error, so we should just fail the command right away
17489 			 * (without printing anything to the console). If this
17490 			 * is not a T3/T4, fall thru to the default recovery
17491 			 * action.
17492 			 * T3/T4 is FC only, don't need to check is_fibre
17493 			 */
17494 			if (SD_IS_T3(un) || SD_IS_T4(un)) {
17495 				sd_return_failed_command(un, bp, EIO);
17496 				return;
17497 			}
17498 			/* FALLTHRU */
17499 
17500 		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
17501 		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
17502 		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
17503 		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
17504 		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
17505 		default:    /* Possible future codes in SCSI spec? */
17506 			/*
17507 			 * For removable-media devices, do not retry if
17508 			 * ASCQ > 2 as these result mostly from USCSI commands
17509 			 * on MMC devices issued to check status of an
17510 			 * operation initiated in immediate mode.  Also for
17511 			 * ASCQ >= 4 do not print console messages as these
17512 			 * mainly represent a user-initiated operation
17513 			 * instead of a system failure.
17514 			 */
17515 			if (ISREMOVABLE(un)) {
17516 				si.ssi_severity = SCSI_ERR_ALL;
17517 				goto fail_command;
17518 			}
17519 			break;
17520 		}
17521 
17522 		/*
17523 		 * As part of our recovery attempt for the NOT READY
17524 		 * condition, we issue a START STOP UNIT command. However
17525 		 * we want to wait for a short delay before attempting this
17526 		 * as there may still be more commands coming back from the
17527 		 * target with the check condition. To do this we use
17528 		 * timeout(9F) to call sd_start_stop_unit_callback() after
17529 		 * the delay interval expires. (sd_start_stop_unit_callback()
17530 		 * dispatches sd_start_stop_unit_task(), which will issue
17531 		 * the actual START STOP UNIT command. The delay interval
17532 		 * is one-half of the delay that we will use to retry the
17533 		 * command that generated the NOT READY condition.
17534 		 *
17535 		 * Note that we could just dispatch sd_start_stop_unit_task()
17536 		 * from here and allow it to sleep for the delay interval,
17537 		 * but then we would be tying up the taskq thread
17538 		 * uncesessarily for the duration of the delay.
17539 		 *
17540 		 * Do not issue the START STOP UNIT if the current command
17541 		 * is already a START STOP UNIT.
17542 		 */
17543 		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
17544 			break;
17545 		}
17546 
17547 		/*
17548 		 * Do not schedule the timeout if one is already pending.
17549 		 */
17550 		if (un->un_startstop_timeid != NULL) {
17551 			SD_INFO(SD_LOG_ERROR, un,
17552 			    "sd_sense_key_not_ready: restart already issued to"
17553 			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
17554 			    ddi_get_instance(SD_DEVINFO(un)));
17555 			break;
17556 		}
17557 
17558 		/*
17559 		 * Schedule the START STOP UNIT command, then queue the command
17560 		 * for a retry.
17561 		 *
17562 		 * Note: A timeout is not scheduled for this retry because we
17563 		 * want the retry to be serial with the START_STOP_UNIT. The
17564 		 * retry will be started when the START_STOP_UNIT is completed
17565 		 * in sd_start_stop_unit_task.
17566 		 */
17567 		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
17568 		    un, SD_BSY_TIMEOUT / 2);
17569 		xp->xb_retry_count++;
17570 		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
17571 		return;
17572 
17573 	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
17574 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17575 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17576 			    "unit does not respond to selection\n");
17577 		}
17578 		break;
17579 
17580 	case 0x3A:	/* MEDIUM NOT PRESENT */
17581 		if (sd_error_level >= SCSI_ERR_FATAL) {
17582 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17583 			    "Caddy not inserted in drive\n");
17584 		}
17585 
17586 		sr_ejected(un);
17587 		un->un_mediastate = DKIO_EJECTED;
17588 		/* The state has changed, inform the media watch routines */
17589 		cv_broadcast(&un->un_state_cv);
17590 		/* Just fail if no media is present in the drive. */
17591 		goto fail_command;
17592 
17593 	default:
17594 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17595 			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
17596 			    "Unit not Ready. Additional sense code 0x%x\n",
17597 			    asc);
17598 		}
17599 		break;
17600 	}
17601 
17602 do_retry:
17603 
17604 	/*
17605 	 * Retry the command, as some targets may report NOT READY for
17606 	 * several seconds after being reset.
17607 	 */
17608 	xp->xb_retry_count++;
17609 	si.ssi_severity = SCSI_ERR_RETRYABLE;
17610 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
17611 	    &si, EIO, SD_BSY_TIMEOUT, NULL);
17612 
17613 	return;
17614 
17615 fail_command:
17616 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17617 	sd_return_failed_command(un, bp, EIO);
17618 }
17619 
17620 
17621 
17622 /*
17623  *    Function: sd_sense_key_medium_or_hardware_error
17624  *
17625  * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
17626  *		sense key.
17627  *
17628  *     Context: May be called from interrupt context
17629  */
17630 
17631 static void
17632 sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
17633 	int sense_key, uint8_t asc,
17634 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17635 {
17636 	struct sd_sense_info	si;
17637 
17638 	ASSERT(un != NULL);
17639 	ASSERT(mutex_owned(SD_MUTEX(un)));
17640 	ASSERT(bp != NULL);
17641 	ASSERT(xp != NULL);
17642 	ASSERT(pktp != NULL);
17643 
17644 	si.ssi_severity = SCSI_ERR_FATAL;
17645 	si.ssi_pfa_flag = FALSE;
17646 
17647 	if (sense_key == KEY_MEDIUM_ERROR) {
17648 		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
17649 	}
17650 
17651 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17652 
17653 	if ((un->un_reset_retry_count != 0) &&
17654 	    (xp->xb_retry_count == un->un_reset_retry_count)) {
17655 		mutex_exit(SD_MUTEX(un));
17656 		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
17657 		if (un->un_f_allow_bus_device_reset == TRUE) {
17658 
17659 			boolean_t try_resetting_target = B_TRUE;
17660 
17661 			/*
17662 			 * We need to be able to handle specific ASC when we are
17663 			 * handling a KEY_HARDWARE_ERROR. In particular
17664 			 * taking the default action of resetting the target may
17665 			 * not be the appropriate way to attempt recovery.
17666 			 * Resetting a target because of a single LUN failure
17667 			 * victimizes all LUNs on that target.
17668 			 *
17669 			 * This is true for the LSI arrays, if an LSI
17670 			 * array controller returns an ASC of 0x84 (LUN Dead) we
17671 			 * should trust it.
17672 			 */
17673 
17674 			if (sense_key == KEY_HARDWARE_ERROR) {
17675 				switch (asc) {
17676 				case 0x84:
17677 					if (SD_IS_LSI(un)) {
17678 						try_resetting_target = B_FALSE;
17679 					}
17680 					break;
17681 				default:
17682 					break;
17683 				}
17684 			}
17685 
17686 			if (try_resetting_target == B_TRUE) {
17687 				int reset_retval = 0;
17688 				if (un->un_f_lun_reset_enabled == TRUE) {
17689 					SD_TRACE(SD_LOG_IO_CORE, un,
17690 					    "sd_sense_key_medium_or_hardware_"
17691 					    "error: issuing RESET_LUN\n");
17692 					reset_retval =
17693 					    scsi_reset(SD_ADDRESS(un),
17694 					    RESET_LUN);
17695 				}
17696 				if (reset_retval == 0) {
17697 					SD_TRACE(SD_LOG_IO_CORE, un,
17698 					    "sd_sense_key_medium_or_hardware_"
17699 					    "error: issuing RESET_TARGET\n");
17700 					(void) scsi_reset(SD_ADDRESS(un),
17701 					    RESET_TARGET);
17702 				}
17703 			}
17704 		}
17705 		mutex_enter(SD_MUTEX(un));
17706 	}
17707 
17708 	/*
17709 	 * This really ought to be a fatal error, but we will retry anyway
17710 	 * as some drives report this as a spurious error.
17711 	 */
17712 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17713 	    &si, EIO, (clock_t)0, NULL);
17714 }
17715 
17716 
17717 
17718 /*
17719  *    Function: sd_sense_key_illegal_request
17720  *
17721  * Description: Recovery actions for a SCSI "Illegal Request" sense key.
17722  *
17723  *     Context: May be called from interrupt context
17724  */
17725 
17726 static void
17727 sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
17728 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17729 {
17730 	struct sd_sense_info	si;
17731 
17732 	ASSERT(un != NULL);
17733 	ASSERT(mutex_owned(SD_MUTEX(un)));
17734 	ASSERT(bp != NULL);
17735 	ASSERT(xp != NULL);
17736 	ASSERT(pktp != NULL);
17737 
17738 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17739 	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
17740 
17741 	si.ssi_severity = SCSI_ERR_INFO;
17742 	si.ssi_pfa_flag = FALSE;
17743 
17744 	/* Pointless to retry if the target thinks it's an illegal request */
17745 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17746 	sd_return_failed_command(un, bp, EIO);
17747 }
17748 
17749 
17750 
17751 
17752 /*
17753  *    Function: sd_sense_key_unit_attention
17754  *
17755  * Description: Recovery actions for a SCSI "Unit Attention" sense key.
17756  *
17757  *     Context: May be called from interrupt context
17758  */
17759 
17760 static void
17761 sd_sense_key_unit_attention(struct sd_lun *un,
17762 	uint8_t asc,
17763 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17764 {
17765 	/*
17766 	 * For UNIT ATTENTION we allow retries for one minute. Devices
17767 	 * like Sonoma can return UNIT ATTENTION close to a minute
17768 	 * under certain conditions.
17769 	 */
17770 	int	retry_check_flag = SD_RETRIES_UA;
17771 	struct	sd_sense_info		si;
17772 
17773 	ASSERT(un != NULL);
17774 	ASSERT(mutex_owned(SD_MUTEX(un)));
17775 	ASSERT(bp != NULL);
17776 	ASSERT(xp != NULL);
17777 	ASSERT(pktp != NULL);
17778 
17779 	si.ssi_severity = SCSI_ERR_INFO;
17780 	si.ssi_pfa_flag = FALSE;
17781 
17782 
17783 	switch (asc) {
17784 	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
17785 		if (sd_report_pfa != 0) {
17786 			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17787 			si.ssi_pfa_flag = TRUE;
17788 			retry_check_flag = SD_RETRIES_STANDARD;
17789 			goto do_retry;
17790 		}
17791 		break;
17792 
17793 	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
17794 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
17795 			un->un_resvd_status |=
17796 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
17797 		}
17798 		/* FALLTHRU */
17799 
17800 	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
17801 		if (!ISREMOVABLE(un)) {
17802 			break;
17803 		}
17804 
17805 		/*
17806 		 * When we get a unit attention from a removable-media device,
17807 		 * it may be in a state that will take a long time to recover
17808 		 * (e.g., from a reset).  Since we are executing in interrupt
17809 		 * context here, we cannot wait around for the device to come
17810 		 * back. So hand this command off to sd_media_change_task()
17811 		 * for deferred processing under taskq thread context. (Note
17812 		 * that the command still may be failed if a problem is
17813 		 * encountered at a later time.)
17814 		 */
17815 		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
17816 		    KM_NOSLEEP) == 0) {
17817 			/*
17818 			 * Cannot dispatch the request so fail the command.
17819 			 */
17820 			SD_UPDATE_ERRSTATS(un, sd_harderrs);
17821 			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17822 			si.ssi_severity = SCSI_ERR_FATAL;
17823 			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17824 			sd_return_failed_command(un, bp, EIO);
17825 		}
17826 		/*
17827 		 * Either the command has been successfully dispatched to a
17828 		 * task Q for retrying, or the dispatch failed. In either case
17829 		 * do NOT retry again by calling sd_retry_command. This sets up
17830 		 * two retries of the same command and when one completes and
17831 		 * frees the resources the other will access freed memory,
17832 		 * a bad thing.
17833 		 */
17834 		return;
17835 
17836 	default:
17837 		break;
17838 	}
17839 
17840 	if (!ISREMOVABLE(un)) {
17841 		/*
17842 		 * Do not update these here for removables. For removables
17843 		 * these stats are updated (1) above if we failed to dispatch
17844 		 * sd_media_change_task(), or (2) sd_media_change_task() may
17845 		 * update these later if it encounters an error.
17846 		 */
17847 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17848 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17849 	}
17850 
17851 do_retry:
17852 	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
17853 	    EIO, SD_UA_RETRY_DELAY, NULL);
17854 }
17855 
17856 
17857 
17858 /*
17859  *    Function: sd_sense_key_fail_command
17860  *
17861  * Description: Use to fail a command when we don't like the sense key that
17862  *		was returned.
17863  *
17864  *     Context: May be called from interrupt context
17865  */
17866 
17867 static void
17868 sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
17869 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17870 {
17871 	struct sd_sense_info	si;
17872 
17873 	ASSERT(un != NULL);
17874 	ASSERT(mutex_owned(SD_MUTEX(un)));
17875 	ASSERT(bp != NULL);
17876 	ASSERT(xp != NULL);
17877 	ASSERT(pktp != NULL);
17878 
17879 	si.ssi_severity = SCSI_ERR_FATAL;
17880 	si.ssi_pfa_flag = FALSE;
17881 
17882 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17883 	sd_return_failed_command(un, bp, EIO);
17884 }
17885 
17886 
17887 
17888 /*
17889  *    Function: sd_sense_key_blank_check
17890  *
17891  * Description: Recovery actions for a SCSI "Blank Check" sense key.
17892  *		Has no monetary connotation.
17893  *
17894  *     Context: May be called from interrupt context
17895  */
17896 
17897 static void
17898 sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
17899 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17900 {
17901 	struct sd_sense_info	si;
17902 
17903 	ASSERT(un != NULL);
17904 	ASSERT(mutex_owned(SD_MUTEX(un)));
17905 	ASSERT(bp != NULL);
17906 	ASSERT(xp != NULL);
17907 	ASSERT(pktp != NULL);
17908 
17909 	/*
17910 	 * Blank check is not fatal for removable devices, therefore
17911 	 * it does not require a console message.
17912 	 */
17913 	si.ssi_severity = (ISREMOVABLE(un)) ? SCSI_ERR_ALL : SCSI_ERR_FATAL;
17914 	si.ssi_pfa_flag = FALSE;
17915 
17916 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17917 	sd_return_failed_command(un, bp, EIO);
17918 }
17919 
17920 
17921 
17922 
17923 /*
17924  *    Function: sd_sense_key_aborted_command
17925  *
17926  * Description: Recovery actions for a SCSI "Aborted Command" sense key.
17927  *
17928  *     Context: May be called from interrupt context
17929  */
17930 
17931 static void
17932 sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
17933 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17934 {
17935 	struct sd_sense_info	si;
17936 
17937 	ASSERT(un != NULL);
17938 	ASSERT(mutex_owned(SD_MUTEX(un)));
17939 	ASSERT(bp != NULL);
17940 	ASSERT(xp != NULL);
17941 	ASSERT(pktp != NULL);
17942 
17943 	si.ssi_severity = SCSI_ERR_FATAL;
17944 	si.ssi_pfa_flag = FALSE;
17945 
17946 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17947 
17948 	/*
17949 	 * This really ought to be a fatal error, but we will retry anyway
17950 	 * as some drives report this as a spurious error.
17951 	 */
17952 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17953 	    &si, EIO, (clock_t)0, NULL);
17954 }
17955 
17956 
17957 
17958 /*
17959  *    Function: sd_sense_key_default
17960  *
17961  * Description: Default recovery action for several SCSI sense keys (basically
17962  *		attempts a retry).
17963  *
17964  *     Context: May be called from interrupt context
17965  */
17966 
17967 static void
17968 sd_sense_key_default(struct sd_lun *un,
17969 	int sense_key,
17970 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17971 {
17972 	struct sd_sense_info	si;
17973 
17974 	ASSERT(un != NULL);
17975 	ASSERT(mutex_owned(SD_MUTEX(un)));
17976 	ASSERT(bp != NULL);
17977 	ASSERT(xp != NULL);
17978 	ASSERT(pktp != NULL);
17979 
17980 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17981 
17982 	/*
17983 	 * Undecoded sense key.	Attempt retries and hope that will fix
17984 	 * the problem.  Otherwise, we're dead.
17985 	 */
17986 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
17987 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17988 		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
17989 	}
17990 
17991 	si.ssi_severity = SCSI_ERR_FATAL;
17992 	si.ssi_pfa_flag = FALSE;
17993 
17994 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17995 	    &si, EIO, (clock_t)0, NULL);
17996 }
17997 
17998 
17999 
18000 /*
18001  *    Function: sd_print_retry_msg
18002  *
18003  * Description: Print a message indicating the retry action being taken.
18004  *
18005  *   Arguments: un - ptr to associated softstate
18006  *		bp - ptr to buf(9S) for the command
18007  *		arg - not used.
18008  *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18009  *			or SD_NO_RETRY_ISSUED
18010  *
18011  *     Context: May be called from interrupt context
18012  */
18013 /* ARGSUSED */
18014 static void
18015 sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
18016 {
18017 	struct sd_xbuf	*xp;
18018 	struct scsi_pkt *pktp;
18019 	char *reasonp;
18020 	char *msgp;
18021 
18022 	ASSERT(un != NULL);
18023 	ASSERT(mutex_owned(SD_MUTEX(un)));
18024 	ASSERT(bp != NULL);
18025 	pktp = SD_GET_PKTP(bp);
18026 	ASSERT(pktp != NULL);
18027 	xp = SD_GET_XBUF(bp);
18028 	ASSERT(xp != NULL);
18029 
18030 	ASSERT(!mutex_owned(&un->un_pm_mutex));
18031 	mutex_enter(&un->un_pm_mutex);
18032 	if ((un->un_state == SD_STATE_SUSPENDED) ||
18033 	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
18034 	    (pktp->pkt_flags & FLAG_SILENT)) {
18035 		mutex_exit(&un->un_pm_mutex);
18036 		goto update_pkt_reason;
18037 	}
18038 	mutex_exit(&un->un_pm_mutex);
18039 
18040 	/*
18041 	 * Suppress messages if they are all the same pkt_reason; with
18042 	 * TQ, many (up to 256) are returned with the same pkt_reason.
18043 	 * If we are in panic, then suppress the retry messages.
18044 	 */
18045 	switch (flag) {
18046 	case SD_NO_RETRY_ISSUED:
18047 		msgp = "giving up";
18048 		break;
18049 	case SD_IMMEDIATE_RETRY_ISSUED:
18050 	case SD_DELAYED_RETRY_ISSUED:
18051 		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
18052 		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
18053 		    (sd_error_level != SCSI_ERR_ALL))) {
18054 			return;
18055 		}
18056 		msgp = "retrying command";
18057 		break;
18058 	default:
18059 		goto update_pkt_reason;
18060 	}
18061 
18062 	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
18063 	    scsi_rname(pktp->pkt_reason));
18064 
18065 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18066 	    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
18067 
18068 update_pkt_reason:
18069 	/*
18070 	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
18071 	 * This is to prevent multiple console messages for the same failure
18072 	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
18073 	 * when the command is retried successfully because there still may be
18074 	 * more commands coming back with the same value of pktp->pkt_reason.
18075 	 */
18076 	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
18077 		un->un_last_pkt_reason = pktp->pkt_reason;
18078 	}
18079 }
18080 
18081 
18082 /*
18083  *    Function: sd_print_cmd_incomplete_msg
18084  *
18085  * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
18086  *
18087  *   Arguments: un - ptr to associated softstate
18088  *		bp - ptr to buf(9S) for the command
18089  *		arg - passed to sd_print_retry_msg()
18090  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18091  *			or SD_NO_RETRY_ISSUED
18092  *
18093  *     Context: May be called from interrupt context
18094  */
18095 
18096 static void
18097 sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
18098 	int code)
18099 {
18100 	dev_info_t	*dip;
18101 
18102 	ASSERT(un != NULL);
18103 	ASSERT(mutex_owned(SD_MUTEX(un)));
18104 	ASSERT(bp != NULL);
18105 
18106 	switch (code) {
18107 	case SD_NO_RETRY_ISSUED:
18108 		/* Command was failed. Someone turned off this target? */
18109 		if (un->un_state != SD_STATE_OFFLINE) {
18110 			/*
18111 			 * Suppress message if we are detaching and
18112 			 * device has been disconnected
18113 			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
18114 			 * private interface and not part of the DDI
18115 			 */
18116 			dip = un->un_sd->sd_dev;
18117 			if (!(DEVI_IS_DETACHING(dip) &&
18118 			    DEVI_IS_DEVICE_REMOVED(dip))) {
18119 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18120 				"disk not responding to selection\n");
18121 			}
18122 			New_state(un, SD_STATE_OFFLINE);
18123 		}
18124 		break;
18125 
18126 	case SD_DELAYED_RETRY_ISSUED:
18127 	case SD_IMMEDIATE_RETRY_ISSUED:
18128 	default:
18129 		/* Command was successfully queued for retry */
18130 		sd_print_retry_msg(un, bp, arg, code);
18131 		break;
18132 	}
18133 }
18134 
18135 
18136 /*
18137  *    Function: sd_pkt_reason_cmd_incomplete
18138  *
18139  * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
18140  *
18141  *     Context: May be called from interrupt context
18142  */
18143 
18144 static void
18145 sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
18146 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18147 {
18148 	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
18149 
18150 	ASSERT(un != NULL);
18151 	ASSERT(mutex_owned(SD_MUTEX(un)));
18152 	ASSERT(bp != NULL);
18153 	ASSERT(xp != NULL);
18154 	ASSERT(pktp != NULL);
18155 
18156 	/* Do not do a reset if selection did not complete */
18157 	/* Note: Should this not just check the bit? */
18158 	if (pktp->pkt_state != STATE_GOT_BUS) {
18159 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18160 		sd_reset_target(un, pktp);
18161 	}
18162 
18163 	/*
18164 	 * If the target was not successfully selected, then set
18165 	 * SD_RETRIES_FAILFAST to indicate that we lost communication
18166 	 * with the target, and further retries and/or commands are
18167 	 * likely to take a long time.
18168 	 */
18169 	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
18170 		flag |= SD_RETRIES_FAILFAST;
18171 	}
18172 
18173 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18174 
18175 	sd_retry_command(un, bp, flag,
18176 	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18177 }
18178 
18179 
18180 
18181 /*
18182  *    Function: sd_pkt_reason_cmd_tran_err
18183  *
18184  * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
18185  *
18186  *     Context: May be called from interrupt context
18187  */
18188 
18189 static void
18190 sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
18191 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18192 {
18193 	ASSERT(un != NULL);
18194 	ASSERT(mutex_owned(SD_MUTEX(un)));
18195 	ASSERT(bp != NULL);
18196 	ASSERT(xp != NULL);
18197 	ASSERT(pktp != NULL);
18198 
18199 	/*
18200 	 * Do not reset if we got a parity error, or if
18201 	 * selection did not complete.
18202 	 */
18203 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18204 	/* Note: Should this not just check the bit for pkt_state? */
18205 	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
18206 	    (pktp->pkt_state != STATE_GOT_BUS)) {
18207 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18208 		sd_reset_target(un, pktp);
18209 	}
18210 
18211 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18212 
18213 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18214 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18215 }
18216 
18217 
18218 
18219 /*
18220  *    Function: sd_pkt_reason_cmd_reset
18221  *
18222  * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
18223  *
18224  *     Context: May be called from interrupt context
18225  */
18226 
18227 static void
18228 sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
18229 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18230 {
18231 	ASSERT(un != NULL);
18232 	ASSERT(mutex_owned(SD_MUTEX(un)));
18233 	ASSERT(bp != NULL);
18234 	ASSERT(xp != NULL);
18235 	ASSERT(pktp != NULL);
18236 
18237 	/* The target may still be running the command, so try to reset. */
18238 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18239 	sd_reset_target(un, pktp);
18240 
18241 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18242 
18243 	/*
18244 	 * If pkt_reason is CMD_RESET chances are that this pkt got
18245 	 * reset because another target on this bus caused it. The target
18246 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18247 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18248 	 */
18249 
18250 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18251 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18252 }
18253 
18254 
18255 
18256 
18257 /*
18258  *    Function: sd_pkt_reason_cmd_aborted
18259  *
18260  * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
18261  *
18262  *     Context: May be called from interrupt context
18263  */
18264 
18265 static void
18266 sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
18267 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18268 {
18269 	ASSERT(un != NULL);
18270 	ASSERT(mutex_owned(SD_MUTEX(un)));
18271 	ASSERT(bp != NULL);
18272 	ASSERT(xp != NULL);
18273 	ASSERT(pktp != NULL);
18274 
18275 	/* The target may still be running the command, so try to reset. */
18276 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18277 	sd_reset_target(un, pktp);
18278 
18279 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18280 
18281 	/*
18282 	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
18283 	 * aborted because another target on this bus caused it. The target
18284 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18285 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18286 	 */
18287 
18288 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18289 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18290 }
18291 
18292 
18293 
18294 /*
18295  *    Function: sd_pkt_reason_cmd_timeout
18296  *
18297  * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
18298  *
18299  *     Context: May be called from interrupt context
18300  */
18301 
18302 static void
18303 sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
18304 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18305 {
18306 	ASSERT(un != NULL);
18307 	ASSERT(mutex_owned(SD_MUTEX(un)));
18308 	ASSERT(bp != NULL);
18309 	ASSERT(xp != NULL);
18310 	ASSERT(pktp != NULL);
18311 
18312 
18313 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18314 	sd_reset_target(un, pktp);
18315 
18316 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18317 
18318 	/*
18319 	 * A command timeout indicates that we could not establish
18320 	 * communication with the target, so set SD_RETRIES_FAILFAST
18321 	 * as further retries/commands are likely to take a long time.
18322 	 */
18323 	sd_retry_command(un, bp,
18324 	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
18325 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18326 }
18327 
18328 
18329 
18330 /*
18331  *    Function: sd_pkt_reason_cmd_unx_bus_free
18332  *
18333  * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
18334  *
18335  *     Context: May be called from interrupt context
18336  */
18337 
18338 static void
18339 sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
18340 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18341 {
18342 	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
18343 
18344 	ASSERT(un != NULL);
18345 	ASSERT(mutex_owned(SD_MUTEX(un)));
18346 	ASSERT(bp != NULL);
18347 	ASSERT(xp != NULL);
18348 	ASSERT(pktp != NULL);
18349 
18350 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18351 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18352 
18353 	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
18354 	    sd_print_retry_msg : NULL;
18355 
18356 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18357 	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18358 }
18359 
18360 
18361 /*
18362  *    Function: sd_pkt_reason_cmd_tag_reject
18363  *
18364  * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
18365  *
18366  *     Context: May be called from interrupt context
18367  */
18368 
18369 static void
18370 sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
18371 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18372 {
18373 	ASSERT(un != NULL);
18374 	ASSERT(mutex_owned(SD_MUTEX(un)));
18375 	ASSERT(bp != NULL);
18376 	ASSERT(xp != NULL);
18377 	ASSERT(pktp != NULL);
18378 
18379 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18380 	pktp->pkt_flags = 0;
18381 	un->un_tagflags = 0;
18382 	if (un->un_f_opt_queueing == TRUE) {
18383 		un->un_throttle = min(un->un_throttle, 3);
18384 	} else {
18385 		un->un_throttle = 1;
18386 	}
18387 	mutex_exit(SD_MUTEX(un));
18388 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
18389 	mutex_enter(SD_MUTEX(un));
18390 
18391 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18392 
18393 	/* Legacy behavior not to check retry counts here. */
18394 	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
18395 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18396 }
18397 
18398 
18399 /*
18400  *    Function: sd_pkt_reason_default
18401  *
18402  * Description: Default recovery actions for SCSA pkt_reason values that
18403  *		do not have more explicit recovery actions.
18404  *
18405  *     Context: May be called from interrupt context
18406  */
18407 
18408 static void
18409 sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
18410 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18411 {
18412 	ASSERT(un != NULL);
18413 	ASSERT(mutex_owned(SD_MUTEX(un)));
18414 	ASSERT(bp != NULL);
18415 	ASSERT(xp != NULL);
18416 	ASSERT(pktp != NULL);
18417 
18418 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18419 	sd_reset_target(un, pktp);
18420 
18421 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18422 
18423 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18424 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18425 }
18426 
18427 
18428 
18429 /*
18430  *    Function: sd_pkt_status_check_condition
18431  *
18432  * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
18433  *
18434  *     Context: May be called from interrupt context
18435  */
18436 
18437 static void
18438 sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
18439 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18440 {
18441 	ASSERT(un != NULL);
18442 	ASSERT(mutex_owned(SD_MUTEX(un)));
18443 	ASSERT(bp != NULL);
18444 	ASSERT(xp != NULL);
18445 	ASSERT(pktp != NULL);
18446 
18447 	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
18448 	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
18449 
18450 	/*
18451 	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
18452 	 * command will be retried after the request sense). Otherwise, retry
18453 	 * the command. Note: we are issuing the request sense even though the
18454 	 * retry limit may have been reached for the failed command.
18455 	 */
18456 	if (un->un_f_arq_enabled == FALSE) {
18457 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18458 		    "no ARQ, sending request sense command\n");
18459 		sd_send_request_sense_command(un, bp, pktp);
18460 	} else {
18461 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18462 		    "ARQ,retrying request sense command\n");
18463 #if defined(__i386) || defined(__amd64)
18464 		/*
18465 		 * The SD_RETRY_DELAY value need to be adjusted here
18466 		 * when SD_RETRY_DELAY change in sddef.h
18467 		 */
18468 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, 0,
18469 			un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
18470 			NULL);
18471 #else
18472 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL,
18473 		    0, SD_RETRY_DELAY, NULL);
18474 #endif
18475 	}
18476 
18477 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
18478 }
18479 
18480 
18481 /*
18482  *    Function: sd_pkt_status_busy
18483  *
18484  * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
18485  *
18486  *     Context: May be called from interrupt context
18487  */
18488 
18489 static void
18490 sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18491 	struct scsi_pkt *pktp)
18492 {
18493 	ASSERT(un != NULL);
18494 	ASSERT(mutex_owned(SD_MUTEX(un)));
18495 	ASSERT(bp != NULL);
18496 	ASSERT(xp != NULL);
18497 	ASSERT(pktp != NULL);
18498 
18499 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18500 	    "sd_pkt_status_busy: entry\n");
18501 
18502 	/* If retries are exhausted, just fail the command. */
18503 	if (xp->xb_retry_count >= un->un_busy_retry_count) {
18504 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18505 		    "device busy too long\n");
18506 		sd_return_failed_command(un, bp, EIO);
18507 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18508 		    "sd_pkt_status_busy: exit\n");
18509 		return;
18510 	}
18511 	xp->xb_retry_count++;
18512 
18513 	/*
18514 	 * Try to reset the target. However, we do not want to perform
18515 	 * more than one reset if the device continues to fail. The reset
18516 	 * will be performed when the retry count reaches the reset
18517 	 * threshold.  This threshold should be set such that at least
18518 	 * one retry is issued before the reset is performed.
18519 	 */
18520 	if (xp->xb_retry_count ==
18521 	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
18522 		int rval = 0;
18523 		mutex_exit(SD_MUTEX(un));
18524 		if (un->un_f_allow_bus_device_reset == TRUE) {
18525 			/*
18526 			 * First try to reset the LUN; if we cannot then
18527 			 * try to reset the target.
18528 			 */
18529 			if (un->un_f_lun_reset_enabled == TRUE) {
18530 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18531 				    "sd_pkt_status_busy: RESET_LUN\n");
18532 				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18533 			}
18534 			if (rval == 0) {
18535 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18536 				    "sd_pkt_status_busy: RESET_TARGET\n");
18537 				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18538 			}
18539 		}
18540 		if (rval == 0) {
18541 			/*
18542 			 * If the RESET_LUN and/or RESET_TARGET failed,
18543 			 * try RESET_ALL
18544 			 */
18545 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18546 			    "sd_pkt_status_busy: RESET_ALL\n");
18547 			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
18548 		}
18549 		mutex_enter(SD_MUTEX(un));
18550 		if (rval == 0) {
18551 			/*
18552 			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
18553 			 * At this point we give up & fail the command.
18554 			 */
18555 			sd_return_failed_command(un, bp, EIO);
18556 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18557 			    "sd_pkt_status_busy: exit (failed cmd)\n");
18558 			return;
18559 		}
18560 	}
18561 
18562 	/*
18563 	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
18564 	 * we have already checked the retry counts above.
18565 	 */
18566 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
18567 	    EIO, SD_BSY_TIMEOUT, NULL);
18568 
18569 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18570 	    "sd_pkt_status_busy: exit\n");
18571 }
18572 
18573 
18574 /*
18575  *    Function: sd_pkt_status_reservation_conflict
18576  *
18577  * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
18578  *		command status.
18579  *
18580  *     Context: May be called from interrupt context
18581  */
18582 
18583 static void
18584 sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
18585 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18586 {
18587 	ASSERT(un != NULL);
18588 	ASSERT(mutex_owned(SD_MUTEX(un)));
18589 	ASSERT(bp != NULL);
18590 	ASSERT(xp != NULL);
18591 	ASSERT(pktp != NULL);
18592 
18593 	/*
18594 	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
18595 	 * conflict could be due to various reasons like incorrect keys, not
18596 	 * registered or not reserved etc. So, we return EACCES to the caller.
18597 	 */
18598 	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
18599 		int cmd = SD_GET_PKT_OPCODE(pktp);
18600 		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
18601 		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
18602 			sd_return_failed_command(un, bp, EACCES);
18603 			return;
18604 		}
18605 	}
18606 
18607 	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
18608 
18609 	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
18610 		if (sd_failfast_enable != 0) {
18611 			/* By definition, we must panic here.... */
18612 			panic("Reservation Conflict");
18613 			/*NOTREACHED*/
18614 		}
18615 		SD_ERROR(SD_LOG_IO, un,
18616 		    "sd_handle_resv_conflict: Disk Reserved\n");
18617 		sd_return_failed_command(un, bp, EACCES);
18618 		return;
18619 	}
18620 
18621 	/*
18622 	 * 1147670: retry only if sd_retry_on_reservation_conflict
18623 	 * property is set (default is 1). Retries will not succeed
18624 	 * on a disk reserved by another initiator. HA systems
18625 	 * may reset this via sd.conf to avoid these retries.
18626 	 *
18627 	 * Note: The legacy return code for this failure is EIO, however EACCES
18628 	 * seems more appropriate for a reservation conflict.
18629 	 */
18630 	if (sd_retry_on_reservation_conflict == 0) {
18631 		SD_ERROR(SD_LOG_IO, un,
18632 		    "sd_handle_resv_conflict: Device Reserved\n");
18633 		sd_return_failed_command(un, bp, EIO);
18634 		return;
18635 	}
18636 
18637 	/*
18638 	 * Retry the command if we can.
18639 	 *
18640 	 * Note: The legacy return code for this failure is EIO, however EACCES
18641 	 * seems more appropriate for a reservation conflict.
18642 	 */
18643 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18644 	    (clock_t)2, NULL);
18645 }
18646 
18647 
18648 
18649 /*
18650  *    Function: sd_pkt_status_qfull
18651  *
18652  * Description: Handle a QUEUE FULL condition from the target.  This can
18653  *		occur if the HBA does not handle the queue full condition.
18654  *		(Basically this means third-party HBAs as Sun HBAs will
18655  *		handle the queue full condition.)  Note that if there are
18656  *		some commands already in the transport, then the queue full
18657  *		has occurred because the queue for this nexus is actually
18658  *		full. If there are no commands in the transport, then the
18659  *		queue full is resulting from some other initiator or lun
18660  *		consuming all the resources at the target.
18661  *
18662  *     Context: May be called from interrupt context
18663  */
18664 
18665 static void
18666 sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
18667 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18668 {
18669 	ASSERT(un != NULL);
18670 	ASSERT(mutex_owned(SD_MUTEX(un)));
18671 	ASSERT(bp != NULL);
18672 	ASSERT(xp != NULL);
18673 	ASSERT(pktp != NULL);
18674 
18675 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18676 	    "sd_pkt_status_qfull: entry\n");
18677 
18678 	/*
18679 	 * Just lower the QFULL throttle and retry the command.  Note that
18680 	 * we do not limit the number of retries here.
18681 	 */
18682 	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
18683 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
18684 	    SD_RESTART_TIMEOUT, NULL);
18685 
18686 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18687 	    "sd_pkt_status_qfull: exit\n");
18688 }
18689 
18690 
18691 /*
18692  *    Function: sd_reset_target
18693  *
18694  * Description: Issue a scsi_reset(9F), with either RESET_LUN,
18695  *		RESET_TARGET, or RESET_ALL.
18696  *
18697  *     Context: May be called under interrupt context.
18698  */
18699 
18700 static void
18701 sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
18702 {
18703 	int rval = 0;
18704 
18705 	ASSERT(un != NULL);
18706 	ASSERT(mutex_owned(SD_MUTEX(un)));
18707 	ASSERT(pktp != NULL);
18708 
18709 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
18710 
18711 	/*
18712 	 * No need to reset if the transport layer has already done so.
18713 	 */
18714 	if ((pktp->pkt_statistics &
18715 	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
18716 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18717 		    "sd_reset_target: no reset\n");
18718 		return;
18719 	}
18720 
18721 	mutex_exit(SD_MUTEX(un));
18722 
18723 	if (un->un_f_allow_bus_device_reset == TRUE) {
18724 		if (un->un_f_lun_reset_enabled == TRUE) {
18725 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18726 			    "sd_reset_target: RESET_LUN\n");
18727 			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18728 		}
18729 		if (rval == 0) {
18730 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18731 			    "sd_reset_target: RESET_TARGET\n");
18732 			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18733 		}
18734 	}
18735 
18736 	if (rval == 0) {
18737 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18738 		    "sd_reset_target: RESET_ALL\n");
18739 		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
18740 	}
18741 
18742 	mutex_enter(SD_MUTEX(un));
18743 
18744 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
18745 }
18746 
18747 
18748 /*
18749  *    Function: sd_media_change_task
18750  *
18751  * Description: Recovery action for CDROM to become available.
18752  *
18753  *     Context: Executes in a taskq() thread context
18754  */
18755 
18756 static void
18757 sd_media_change_task(void *arg)
18758 {
18759 	struct	scsi_pkt	*pktp = arg;
18760 	struct	sd_lun		*un;
18761 	struct	buf		*bp;
18762 	struct	sd_xbuf		*xp;
18763 	int	err		= 0;
18764 	int	retry_count	= 0;
18765 	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
18766 	struct	sd_sense_info	si;
18767 
18768 	ASSERT(pktp != NULL);
18769 	bp = (struct buf *)pktp->pkt_private;
18770 	ASSERT(bp != NULL);
18771 	xp = SD_GET_XBUF(bp);
18772 	ASSERT(xp != NULL);
18773 	un = SD_GET_UN(bp);
18774 	ASSERT(un != NULL);
18775 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18776 	ASSERT(ISREMOVABLE(un));
18777 
18778 	si.ssi_severity = SCSI_ERR_INFO;
18779 	si.ssi_pfa_flag = FALSE;
18780 
18781 	/*
18782 	 * When a reset is issued on a CDROM, it takes a long time to
18783 	 * recover. First few attempts to read capacity and other things
18784 	 * related to handling unit attention fail (with a ASC 0x4 and
18785 	 * ASCQ 0x1). In that case we want to do enough retries and we want
18786 	 * to limit the retries in other cases of genuine failures like
18787 	 * no media in drive.
18788 	 */
18789 	while (retry_count++ < retry_limit) {
18790 		if ((err = sd_handle_mchange(un)) == 0) {
18791 			break;
18792 		}
18793 		if (err == EAGAIN) {
18794 			retry_limit = SD_UNIT_ATTENTION_RETRY;
18795 		}
18796 		/* Sleep for 0.5 sec. & try again */
18797 		delay(drv_usectohz(500000));
18798 	}
18799 
18800 	/*
18801 	 * Dispatch (retry or fail) the original command here,
18802 	 * along with appropriate console messages....
18803 	 *
18804 	 * Must grab the mutex before calling sd_retry_command,
18805 	 * sd_print_sense_msg and sd_return_failed_command.
18806 	 */
18807 	mutex_enter(SD_MUTEX(un));
18808 	if (err != SD_CMD_SUCCESS) {
18809 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18810 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18811 		si.ssi_severity = SCSI_ERR_FATAL;
18812 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18813 		sd_return_failed_command(un, bp, EIO);
18814 	} else {
18815 		sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
18816 		    &si, EIO, (clock_t)0, NULL);
18817 	}
18818 	mutex_exit(SD_MUTEX(un));
18819 }
18820 
18821 
18822 
18823 /*
18824  *    Function: sd_handle_mchange
18825  *
18826  * Description: Perform geometry validation & other recovery when CDROM
18827  *		has been removed from drive.
18828  *
18829  * Return Code: 0 for success
18830  *		errno-type return code of either sd_send_scsi_DOORLOCK() or
18831  *		sd_send_scsi_READ_CAPACITY()
18832  *
18833  *     Context: Executes in a taskq() thread context
18834  */
18835 
18836 static int
18837 sd_handle_mchange(struct sd_lun *un)
18838 {
18839 	uint64_t	capacity;
18840 	uint32_t	lbasize;
18841 	int		rval;
18842 
18843 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18844 	ASSERT(ISREMOVABLE(un));
18845 
18846 	if ((rval = sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
18847 	    SD_PATH_DIRECT_PRIORITY)) != 0) {
18848 		return (rval);
18849 	}
18850 
18851 	mutex_enter(SD_MUTEX(un));
18852 	sd_update_block_info(un, lbasize, capacity);
18853 
18854 	if (un->un_errstats != NULL) {
18855 		struct	sd_errstats *stp =
18856 		    (struct sd_errstats *)un->un_errstats->ks_data;
18857 		stp->sd_capacity.value.ui64 = (uint64_t)
18858 		    ((uint64_t)un->un_blockcount *
18859 		    (uint64_t)un->un_tgt_blocksize);
18860 	}
18861 
18862 	/*
18863 	 * Note: Maybe let the strategy/partitioning chain worry about getting
18864 	 * valid geometry.
18865 	 */
18866 	un->un_f_geometry_is_valid = FALSE;
18867 	(void) sd_validate_geometry(un, SD_PATH_DIRECT_PRIORITY);
18868 	if (un->un_f_geometry_is_valid == FALSE) {
18869 		mutex_exit(SD_MUTEX(un));
18870 		return (EIO);
18871 	}
18872 
18873 	mutex_exit(SD_MUTEX(un));
18874 
18875 	/*
18876 	 * Try to lock the door
18877 	 */
18878 	return (sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
18879 	    SD_PATH_DIRECT_PRIORITY));
18880 }
18881 
18882 
18883 /*
18884  *    Function: sd_send_scsi_DOORLOCK
18885  *
18886  * Description: Issue the scsi DOOR LOCK command
18887  *
18888  *   Arguments: un    - pointer to driver soft state (unit) structure for
18889  *			this target.
18890  *		flag  - SD_REMOVAL_ALLOW
18891  *			SD_REMOVAL_PREVENT
18892  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
18893  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
18894  *			to use the USCSI "direct" chain and bypass the normal
18895  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
18896  *			command is issued as part of an error recovery action.
18897  *
18898  * Return Code: 0   - Success
18899  *		errno return code from sd_send_scsi_cmd()
18900  *
18901  *     Context: Can sleep.
18902  */
18903 
18904 static int
18905 sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag)
18906 {
18907 	union scsi_cdb		cdb;
18908 	struct uscsi_cmd	ucmd_buf;
18909 	struct scsi_extended_sense	sense_buf;
18910 	int			status;
18911 
18912 	ASSERT(un != NULL);
18913 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18914 
18915 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
18916 
18917 	/* already determined doorlock is not supported, fake success */
18918 	if (un->un_f_doorlock_supported == FALSE) {
18919 		return (0);
18920 	}
18921 
18922 	bzero(&cdb, sizeof (cdb));
18923 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18924 
18925 	cdb.scc_cmd = SCMD_DOORLOCK;
18926 	cdb.cdb_opaque[4] = (uchar_t)flag;
18927 
18928 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18929 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
18930 	ucmd_buf.uscsi_bufaddr	= NULL;
18931 	ucmd_buf.uscsi_buflen	= 0;
18932 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18933 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
18934 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
18935 	ucmd_buf.uscsi_timeout	= 15;
18936 
18937 	SD_TRACE(SD_LOG_IO, un,
18938 	    "sd_send_scsi_DOORLOCK: returning sd_send_scsi_cmd()\n");
18939 
18940 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
18941 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
18942 
18943 	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
18944 	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
18945 	    (sense_buf.es_key == KEY_ILLEGAL_REQUEST)) {
18946 		/* fake success and skip subsequent doorlock commands */
18947 		un->un_f_doorlock_supported = FALSE;
18948 		return (0);
18949 	}
18950 
18951 	return (status);
18952 }
18953 
18954 
18955 /*
18956  *    Function: sd_send_scsi_READ_CAPACITY
18957  *
18958  * Description: This routine uses the scsi READ CAPACITY command to determine
18959  *		the device capacity in number of blocks and the device native
18960  *		block size. If this function returns a failure, then the
18961  *		values in *capp and *lbap are undefined.  If the capacity
18962  *		returned is 0xffffffff then the lun is too large for a
18963  *		normal READ CAPACITY command and the results of a
18964  *		READ CAPACITY 16 will be used instead.
18965  *
18966  *   Arguments: un   - ptr to soft state struct for the target
18967  *		capp - ptr to unsigned 64-bit variable to receive the
18968  *			capacity value from the command.
18969  *		lbap - ptr to unsigned 32-bit varaible to receive the
18970  *			block size value from the command
18971  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
18972  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
18973  *			to use the USCSI "direct" chain and bypass the normal
18974  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
18975  *			command is issued as part of an error recovery action.
18976  *
18977  * Return Code: 0   - Success
18978  *		EIO - IO error
18979  *		EACCES - Reservation conflict detected
18980  *		EAGAIN - Device is becoming ready
18981  *		errno return code from sd_send_scsi_cmd()
18982  *
18983  *     Context: Can sleep.  Blocks until command completes.
18984  */
18985 
18986 #define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
18987 
18988 static int
18989 sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp, uint32_t *lbap,
18990 	int path_flag)
18991 {
18992 	struct	scsi_extended_sense	sense_buf;
18993 	struct	uscsi_cmd	ucmd_buf;
18994 	union	scsi_cdb	cdb;
18995 	uint32_t		*capacity_buf;
18996 	uint64_t		capacity;
18997 	uint32_t		lbasize;
18998 	int			status;
18999 
19000 	ASSERT(un != NULL);
19001 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19002 	ASSERT(capp != NULL);
19003 	ASSERT(lbap != NULL);
19004 
19005 	SD_TRACE(SD_LOG_IO, un,
19006 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19007 
19008 	/*
19009 	 * First send a READ_CAPACITY command to the target.
19010 	 * (This command is mandatory under SCSI-2.)
19011 	 *
19012 	 * Set up the CDB for the READ_CAPACITY command.  The Partial
19013 	 * Medium Indicator bit is cleared.  The address field must be
19014 	 * zero if the PMI bit is zero.
19015 	 */
19016 	bzero(&cdb, sizeof (cdb));
19017 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19018 
19019 	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
19020 
19021 	cdb.scc_cmd = SCMD_READ_CAPACITY;
19022 
19023 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19024 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19025 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
19026 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
19027 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19028 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19029 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19030 	ucmd_buf.uscsi_timeout	= 60;
19031 
19032 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19033 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19034 
19035 	switch (status) {
19036 	case 0:
19037 		/* Return failure if we did not get valid capacity data. */
19038 		if (ucmd_buf.uscsi_resid != 0) {
19039 			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19040 			return (EIO);
19041 		}
19042 
19043 		/*
19044 		 * Read capacity and block size from the READ CAPACITY 10 data.
19045 		 * This data may be adjusted later due to device specific
19046 		 * issues.
19047 		 *
19048 		 * According to the SCSI spec, the READ CAPACITY 10
19049 		 * command returns the following:
19050 		 *
19051 		 *  bytes 0-3: Maximum logical block address available.
19052 		 *		(MSB in byte:0 & LSB in byte:3)
19053 		 *
19054 		 *  bytes 4-7: Block length in bytes
19055 		 *		(MSB in byte:4 & LSB in byte:7)
19056 		 *
19057 		 */
19058 		capacity = BE_32(capacity_buf[0]);
19059 		lbasize = BE_32(capacity_buf[1]);
19060 
19061 		/*
19062 		 * Done with capacity_buf
19063 		 */
19064 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19065 
19066 		/*
19067 		 * if the reported capacity is set to all 0xf's, then
19068 		 * this disk is too large and requires SBC-2 commands.
19069 		 * Reissue the request using READ CAPACITY 16.
19070 		 */
19071 		if (capacity == 0xffffffff) {
19072 			status = sd_send_scsi_READ_CAPACITY_16(un, &capacity,
19073 			    &lbasize, path_flag);
19074 			if (status != 0) {
19075 				return (status);
19076 			}
19077 		}
19078 		break;	/* Success! */
19079 	case EIO:
19080 		switch (ucmd_buf.uscsi_status) {
19081 		case STATUS_RESERVATION_CONFLICT:
19082 			status = EACCES;
19083 			break;
19084 		case STATUS_CHECK:
19085 			/*
19086 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19087 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19088 			 */
19089 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19090 			    (sense_buf.es_add_code  == 0x04) &&
19091 			    (sense_buf.es_qual_code == 0x01)) {
19092 				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19093 				return (EAGAIN);
19094 			}
19095 			break;
19096 		default:
19097 			break;
19098 		}
19099 		/* FALLTHRU */
19100 	default:
19101 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19102 		return (status);
19103 	}
19104 
19105 	/*
19106 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
19107 	 * (2352 and 0 are common) so for these devices always force the value
19108 	 * to 2048 as required by the ATAPI specs.
19109 	 */
19110 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
19111 		lbasize = 2048;
19112 	}
19113 
19114 	/*
19115 	 * Get the maximum LBA value from the READ CAPACITY data.
19116 	 * Here we assume that the Partial Medium Indicator (PMI) bit
19117 	 * was cleared when issuing the command. This means that the LBA
19118 	 * returned from the device is the LBA of the last logical block
19119 	 * on the logical unit.  The actual logical block count will be
19120 	 * this value plus one.
19121 	 *
19122 	 * Currently the capacity is saved in terms of un->un_sys_blocksize,
19123 	 * so scale the capacity value to reflect this.
19124 	 */
19125 	capacity = (capacity + 1) * (lbasize / un->un_sys_blocksize);
19126 
19127 #if defined(__i386) || defined(__amd64)
19128 	/*
19129 	 * On x86, compensate for off-by-1 error (number of sectors on
19130 	 * media)  (1175930)
19131 	 */
19132 	if (!ISREMOVABLE(un) && (lbasize == un->un_sys_blocksize)) {
19133 		capacity -= 1;
19134 	}
19135 #endif
19136 
19137 	/*
19138 	 * Copy the values from the READ CAPACITY command into the space
19139 	 * provided by the caller.
19140 	 */
19141 	*capp = capacity;
19142 	*lbap = lbasize;
19143 
19144 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
19145 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19146 
19147 	/*
19148 	 * Both the lbasize and capacity from the device must be nonzero,
19149 	 * otherwise we assume that the values are not valid and return
19150 	 * failure to the caller. (4203735)
19151 	 */
19152 	if ((capacity == 0) || (lbasize == 0)) {
19153 		return (EIO);
19154 	}
19155 
19156 	return (0);
19157 }
19158 
19159 /*
19160  *    Function: sd_send_scsi_READ_CAPACITY_16
19161  *
19162  * Description: This routine uses the scsi READ CAPACITY 16 command to
19163  *		determine the device capacity in number of blocks and the
19164  *		device native block size.  If this function returns a failure,
19165  *		then the values in *capp and *lbap are undefined.
19166  *		This routine should always be called by
19167  *		sd_send_scsi_READ_CAPACITY which will appy any device
19168  *		specific adjustments to capacity and lbasize.
19169  *
19170  *   Arguments: un   - ptr to soft state struct for the target
19171  *		capp - ptr to unsigned 64-bit variable to receive the
19172  *			capacity value from the command.
19173  *		lbap - ptr to unsigned 32-bit varaible to receive the
19174  *			block size value from the command
19175  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19176  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19177  *			to use the USCSI "direct" chain and bypass the normal
19178  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
19179  *			this command is issued as part of an error recovery
19180  *			action.
19181  *
19182  * Return Code: 0   - Success
19183  *		EIO - IO error
19184  *		EACCES - Reservation conflict detected
19185  *		EAGAIN - Device is becoming ready
19186  *		errno return code from sd_send_scsi_cmd()
19187  *
19188  *     Context: Can sleep.  Blocks until command completes.
19189  */
19190 
19191 #define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
19192 
19193 static int
19194 sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
19195 	uint32_t *lbap, int path_flag)
19196 {
19197 	struct	scsi_extended_sense	sense_buf;
19198 	struct	uscsi_cmd	ucmd_buf;
19199 	union	scsi_cdb	cdb;
19200 	uint64_t		*capacity16_buf;
19201 	uint64_t		capacity;
19202 	uint32_t		lbasize;
19203 	int			status;
19204 
19205 	ASSERT(un != NULL);
19206 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19207 	ASSERT(capp != NULL);
19208 	ASSERT(lbap != NULL);
19209 
19210 	SD_TRACE(SD_LOG_IO, un,
19211 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19212 
19213 	/*
19214 	 * First send a READ_CAPACITY_16 command to the target.
19215 	 *
19216 	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
19217 	 * Medium Indicator bit is cleared.  The address field must be
19218 	 * zero if the PMI bit is zero.
19219 	 */
19220 	bzero(&cdb, sizeof (cdb));
19221 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19222 
19223 	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
19224 
19225 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19226 	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
19227 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
19228 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
19229 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19230 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19231 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19232 	ucmd_buf.uscsi_timeout	= 60;
19233 
19234 	/*
19235 	 * Read Capacity (16) is a Service Action In command.  One
19236 	 * command byte (0x9E) is overloaded for multiple operations,
19237 	 * with the second CDB byte specifying the desired operation
19238 	 */
19239 	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
19240 	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
19241 
19242 	/*
19243 	 * Fill in allocation length field
19244 	 */
19245 	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
19246 
19247 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19248 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19249 
19250 	switch (status) {
19251 	case 0:
19252 		/* Return failure if we did not get valid capacity data. */
19253 		if (ucmd_buf.uscsi_resid > 20) {
19254 			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19255 			return (EIO);
19256 		}
19257 
19258 		/*
19259 		 * Read capacity and block size from the READ CAPACITY 10 data.
19260 		 * This data may be adjusted later due to device specific
19261 		 * issues.
19262 		 *
19263 		 * According to the SCSI spec, the READ CAPACITY 10
19264 		 * command returns the following:
19265 		 *
19266 		 *  bytes 0-7: Maximum logical block address available.
19267 		 *		(MSB in byte:0 & LSB in byte:7)
19268 		 *
19269 		 *  bytes 8-11: Block length in bytes
19270 		 *		(MSB in byte:8 & LSB in byte:11)
19271 		 *
19272 		 */
19273 		capacity = BE_64(capacity16_buf[0]);
19274 		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
19275 
19276 		/*
19277 		 * Done with capacity16_buf
19278 		 */
19279 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19280 
19281 		/*
19282 		 * if the reported capacity is set to all 0xf's, then
19283 		 * this disk is too large.  This could only happen with
19284 		 * a device that supports LBAs larger than 64 bits which
19285 		 * are not defined by any current T10 standards.
19286 		 */
19287 		if (capacity == 0xffffffffffffffff) {
19288 			return (EIO);
19289 		}
19290 		break;	/* Success! */
19291 	case EIO:
19292 		switch (ucmd_buf.uscsi_status) {
19293 		case STATUS_RESERVATION_CONFLICT:
19294 			status = EACCES;
19295 			break;
19296 		case STATUS_CHECK:
19297 			/*
19298 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19299 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19300 			 */
19301 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19302 			    (sense_buf.es_add_code  == 0x04) &&
19303 			    (sense_buf.es_qual_code == 0x01)) {
19304 				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19305 				return (EAGAIN);
19306 			}
19307 			break;
19308 		default:
19309 			break;
19310 		}
19311 		/* FALLTHRU */
19312 	default:
19313 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19314 		return (status);
19315 	}
19316 
19317 	*capp = capacity;
19318 	*lbap = lbasize;
19319 
19320 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
19321 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19322 
19323 	return (0);
19324 }
19325 
19326 
19327 /*
19328  *    Function: sd_send_scsi_START_STOP_UNIT
19329  *
19330  * Description: Issue a scsi START STOP UNIT command to the target.
19331  *
19332  *   Arguments: un    - pointer to driver soft state (unit) structure for
19333  *			this target.
19334  *		flag  - SD_TARGET_START
19335  *			SD_TARGET_STOP
19336  *			SD_TARGET_EJECT
19337  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19338  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19339  *			to use the USCSI "direct" chain and bypass the normal
19340  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19341  *			command is issued as part of an error recovery action.
19342  *
19343  * Return Code: 0   - Success
19344  *		EIO - IO error
19345  *		EACCES - Reservation conflict detected
19346  *		ENXIO  - Not Ready, medium not present
19347  *		errno return code from sd_send_scsi_cmd()
19348  *
19349  *     Context: Can sleep.
19350  */
19351 
19352 static int
19353 sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag, int path_flag)
19354 {
19355 	struct	scsi_extended_sense	sense_buf;
19356 	union scsi_cdb		cdb;
19357 	struct uscsi_cmd	ucmd_buf;
19358 	int			status;
19359 
19360 	ASSERT(un != NULL);
19361 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19362 
19363 	SD_TRACE(SD_LOG_IO, un,
19364 	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
19365 
19366 	if (ISREMOVABLE(un) &&
19367 	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
19368 	    (un->un_f_start_stop_supported != TRUE)) {
19369 		return (0);
19370 	}
19371 
19372 	bzero(&cdb, sizeof (cdb));
19373 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19374 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19375 
19376 	cdb.scc_cmd = SCMD_START_STOP;
19377 	cdb.cdb_opaque[4] = (uchar_t)flag;
19378 
19379 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19380 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19381 	ucmd_buf.uscsi_bufaddr	= NULL;
19382 	ucmd_buf.uscsi_buflen	= 0;
19383 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19384 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19385 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19386 	ucmd_buf.uscsi_timeout	= 200;
19387 
19388 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19389 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19390 
19391 	switch (status) {
19392 	case 0:
19393 		break;	/* Success! */
19394 	case EIO:
19395 		switch (ucmd_buf.uscsi_status) {
19396 		case STATUS_RESERVATION_CONFLICT:
19397 			status = EACCES;
19398 			break;
19399 		case STATUS_CHECK:
19400 			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
19401 				switch (sense_buf.es_key) {
19402 				case KEY_ILLEGAL_REQUEST:
19403 					status = ENOTSUP;
19404 					break;
19405 				case KEY_NOT_READY:
19406 					if (sense_buf.es_add_code == 0x3A) {
19407 						status = ENXIO;
19408 					}
19409 					break;
19410 				default:
19411 					break;
19412 				}
19413 			}
19414 			break;
19415 		default:
19416 			break;
19417 		}
19418 		break;
19419 	default:
19420 		break;
19421 	}
19422 
19423 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
19424 
19425 	return (status);
19426 }
19427 
19428 
19429 /*
19430  *    Function: sd_start_stop_unit_callback
19431  *
19432  * Description: timeout(9F) callback to begin recovery process for a
19433  *		device that has spun down.
19434  *
19435  *   Arguments: arg - pointer to associated softstate struct.
19436  *
19437  *     Context: Executes in a timeout(9F) thread context
19438  */
19439 
19440 static void
19441 sd_start_stop_unit_callback(void *arg)
19442 {
19443 	struct sd_lun	*un = arg;
19444 	ASSERT(un != NULL);
19445 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19446 
19447 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
19448 
19449 	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
19450 }
19451 
19452 
19453 /*
19454  *    Function: sd_start_stop_unit_task
19455  *
19456  * Description: Recovery procedure when a drive is spun down.
19457  *
19458  *   Arguments: arg - pointer to associated softstate struct.
19459  *
19460  *     Context: Executes in a taskq() thread context
19461  */
19462 
19463 static void
19464 sd_start_stop_unit_task(void *arg)
19465 {
19466 	struct sd_lun	*un = arg;
19467 
19468 	ASSERT(un != NULL);
19469 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19470 
19471 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
19472 
19473 	/*
19474 	 * Some unformatted drives report not ready error, no need to
19475 	 * restart if format has been initiated.
19476 	 */
19477 	mutex_enter(SD_MUTEX(un));
19478 	if (un->un_f_format_in_progress == TRUE) {
19479 		mutex_exit(SD_MUTEX(un));
19480 		return;
19481 	}
19482 	mutex_exit(SD_MUTEX(un));
19483 
19484 	/*
19485 	 * When a START STOP command is issued from here, it is part of a
19486 	 * failure recovery operation and must be issued before any other
19487 	 * commands, including any pending retries. Thus it must be sent
19488 	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
19489 	 * succeeds or not, we will start I/O after the attempt.
19490 	 */
19491 	(void) sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
19492 	    SD_PATH_DIRECT_PRIORITY);
19493 
19494 	/*
19495 	 * The above call blocks until the START_STOP_UNIT command completes.
19496 	 * Now that it has completed, we must re-try the original IO that
19497 	 * received the NOT READY condition in the first place. There are
19498 	 * three possible conditions here:
19499 	 *
19500 	 *  (1) The original IO is on un_retry_bp.
19501 	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
19502 	 *	is NULL.
19503 	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
19504 	 *	points to some other, unrelated bp.
19505 	 *
19506 	 * For each case, we must call sd_start_cmds() with un_retry_bp
19507 	 * as the argument. If un_retry_bp is NULL, this will initiate
19508 	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
19509 	 * then this will process the bp on un_retry_bp. That may or may not
19510 	 * be the original IO, but that does not matter: the important thing
19511 	 * is to keep the IO processing going at this point.
19512 	 *
19513 	 * Note: This is a very specific error recovery sequence associated
19514 	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
19515 	 * serialize the I/O with completion of the spin-up.
19516 	 */
19517 	mutex_enter(SD_MUTEX(un));
19518 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19519 	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
19520 	    un, un->un_retry_bp);
19521 	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
19522 	sd_start_cmds(un, un->un_retry_bp);
19523 	mutex_exit(SD_MUTEX(un));
19524 
19525 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
19526 }
19527 
19528 
19529 /*
19530  *    Function: sd_send_scsi_INQUIRY
19531  *
19532  * Description: Issue the scsi INQUIRY command.
19533  *
19534  *   Arguments: un
19535  *		bufaddr
19536  *		buflen
19537  *		evpd
19538  *		page_code
19539  *		page_length
19540  *
19541  * Return Code: 0   - Success
19542  *		errno return code from sd_send_scsi_cmd()
19543  *
19544  *     Context: Can sleep. Does not return until command is completed.
19545  */
19546 
19547 static int
19548 sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr, size_t buflen,
19549 	uchar_t evpd, uchar_t page_code, size_t *residp)
19550 {
19551 	union scsi_cdb		cdb;
19552 	struct uscsi_cmd	ucmd_buf;
19553 	int			status;
19554 
19555 	ASSERT(un != NULL);
19556 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19557 	ASSERT(bufaddr != NULL);
19558 
19559 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
19560 
19561 	bzero(&cdb, sizeof (cdb));
19562 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19563 	bzero(bufaddr, buflen);
19564 
19565 	cdb.scc_cmd = SCMD_INQUIRY;
19566 	cdb.cdb_opaque[1] = evpd;
19567 	cdb.cdb_opaque[2] = page_code;
19568 	FORMG0COUNT(&cdb, buflen);
19569 
19570 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19571 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19572 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
19573 	ucmd_buf.uscsi_buflen	= buflen;
19574 	ucmd_buf.uscsi_rqbuf	= NULL;
19575 	ucmd_buf.uscsi_rqlen	= 0;
19576 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
19577 	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
19578 
19579 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19580 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_DIRECT);
19581 
19582 	if ((status == 0) && (residp != NULL)) {
19583 		*residp = ucmd_buf.uscsi_resid;
19584 	}
19585 
19586 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
19587 
19588 	return (status);
19589 }
19590 
19591 
19592 /*
19593  *    Function: sd_send_scsi_TEST_UNIT_READY
19594  *
19595  * Description: Issue the scsi TEST UNIT READY command.
19596  *		This routine can be told to set the flag USCSI_DIAGNOSE to
19597  *		prevent retrying failed commands. Use this when the intent
19598  *		is either to check for device readiness, to clear a Unit
19599  *		Attention, or to clear any outstanding sense data.
19600  *		However under specific conditions the expected behavior
19601  *		is for retries to bring a device ready, so use the flag
19602  *		with caution.
19603  *
19604  *   Arguments: un
19605  *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
19606  *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
19607  *			0: dont check for media present, do retries on cmd.
19608  *
19609  * Return Code: 0   - Success
19610  *		EIO - IO error
19611  *		EACCES - Reservation conflict detected
19612  *		ENXIO  - Not Ready, medium not present
19613  *		errno return code from sd_send_scsi_cmd()
19614  *
19615  *     Context: Can sleep. Does not return until command is completed.
19616  */
19617 
19618 static int
19619 sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag)
19620 {
19621 	struct	scsi_extended_sense	sense_buf;
19622 	union scsi_cdb		cdb;
19623 	struct uscsi_cmd	ucmd_buf;
19624 	int			status;
19625 
19626 	ASSERT(un != NULL);
19627 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19628 
19629 	SD_TRACE(SD_LOG_IO, un,
19630 	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
19631 
19632 	/*
19633 	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
19634 	 * timeouts when they receive a TUR and the queue is not empty. Check
19635 	 * the configuration flag set during attach (indicating the drive has
19636 	 * this firmware bug) and un_ncmds_in_transport before issuing the
19637 	 * TUR. If there are
19638 	 * pending commands return success, this is a bit arbitrary but is ok
19639 	 * for non-removables (i.e. the eliteI disks) and non-clustering
19640 	 * configurations.
19641 	 */
19642 	if (un->un_f_cfg_tur_check == TRUE) {
19643 		mutex_enter(SD_MUTEX(un));
19644 		if (un->un_ncmds_in_transport != 0) {
19645 			mutex_exit(SD_MUTEX(un));
19646 			return (0);
19647 		}
19648 		mutex_exit(SD_MUTEX(un));
19649 	}
19650 
19651 	bzero(&cdb, sizeof (cdb));
19652 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19653 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19654 
19655 	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
19656 
19657 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19658 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19659 	ucmd_buf.uscsi_bufaddr	= NULL;
19660 	ucmd_buf.uscsi_buflen	= 0;
19661 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19662 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19663 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19664 
19665 	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
19666 	if ((flag & SD_DONT_RETRY_TUR) != 0) {
19667 		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
19668 	}
19669 	ucmd_buf.uscsi_timeout	= 60;
19670 
19671 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19672 	    UIO_SYSSPACE, UIO_SYSSPACE,
19673 	    ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT : SD_PATH_STANDARD));
19674 
19675 	switch (status) {
19676 	case 0:
19677 		break;	/* Success! */
19678 	case EIO:
19679 		switch (ucmd_buf.uscsi_status) {
19680 		case STATUS_RESERVATION_CONFLICT:
19681 			status = EACCES;
19682 			break;
19683 		case STATUS_CHECK:
19684 			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
19685 				break;
19686 			}
19687 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19688 			    (sense_buf.es_key == KEY_NOT_READY) &&
19689 			    (sense_buf.es_add_code == 0x3A)) {
19690 				status = ENXIO;
19691 			}
19692 			break;
19693 		default:
19694 			break;
19695 		}
19696 		break;
19697 	default:
19698 		break;
19699 	}
19700 
19701 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
19702 
19703 	return (status);
19704 }
19705 
19706 
19707 /*
19708  *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
19709  *
19710  * Description: Issue the scsi PERSISTENT RESERVE IN command.
19711  *
19712  *   Arguments: un
19713  *
19714  * Return Code: 0   - Success
19715  *		EACCES
19716  *		ENOTSUP
19717  *		errno return code from sd_send_scsi_cmd()
19718  *
19719  *     Context: Can sleep. Does not return until command is completed.
19720  */
19721 
19722 static int
19723 sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un, uchar_t  usr_cmd,
19724 	uint16_t data_len, uchar_t *data_bufp)
19725 {
19726 	struct scsi_extended_sense	sense_buf;
19727 	union scsi_cdb		cdb;
19728 	struct uscsi_cmd	ucmd_buf;
19729 	int			status;
19730 	int			no_caller_buf = FALSE;
19731 
19732 	ASSERT(un != NULL);
19733 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19734 	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
19735 
19736 	SD_TRACE(SD_LOG_IO, un,
19737 	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
19738 
19739 	bzero(&cdb, sizeof (cdb));
19740 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19741 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19742 	if (data_bufp == NULL) {
19743 		/* Allocate a default buf if the caller did not give one */
19744 		ASSERT(data_len == 0);
19745 		data_len  = MHIOC_RESV_KEY_SIZE;
19746 		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
19747 		no_caller_buf = TRUE;
19748 	}
19749 
19750 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
19751 	cdb.cdb_opaque[1] = usr_cmd;
19752 	FORMG1COUNT(&cdb, data_len);
19753 
19754 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19755 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19756 	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
19757 	ucmd_buf.uscsi_buflen	= data_len;
19758 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19759 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19760 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19761 	ucmd_buf.uscsi_timeout	= 60;
19762 
19763 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19764 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
19765 
19766 	switch (status) {
19767 	case 0:
19768 		break;	/* Success! */
19769 	case EIO:
19770 		switch (ucmd_buf.uscsi_status) {
19771 		case STATUS_RESERVATION_CONFLICT:
19772 			status = EACCES;
19773 			break;
19774 		case STATUS_CHECK:
19775 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19776 			    (sense_buf.es_key == KEY_ILLEGAL_REQUEST)) {
19777 				status = ENOTSUP;
19778 			}
19779 			break;
19780 		default:
19781 			break;
19782 		}
19783 		break;
19784 	default:
19785 		break;
19786 	}
19787 
19788 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
19789 
19790 	if (no_caller_buf == TRUE) {
19791 		kmem_free(data_bufp, data_len);
19792 	}
19793 
19794 	return (status);
19795 }
19796 
19797 
19798 /*
19799  *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
19800  *
19801  * Description: This routine is the driver entry point for handling CD-ROM
19802  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
19803  *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
19804  *		device.
19805  *
19806  *   Arguments: un  -   Pointer to soft state struct for the target.
19807  *		usr_cmd SCSI-3 reservation facility command (one of
19808  *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
19809  *			SD_SCSI3_PREEMPTANDABORT)
19810  *		usr_bufp - user provided pointer register, reserve descriptor or
19811  *			preempt and abort structure (mhioc_register_t,
19812  *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
19813  *
19814  * Return Code: 0   - Success
19815  *		EACCES
19816  *		ENOTSUP
19817  *		errno return code from sd_send_scsi_cmd()
19818  *
19819  *     Context: Can sleep. Does not return until command is completed.
19820  */
19821 
19822 static int
19823 sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un, uchar_t usr_cmd,
19824 	uchar_t	*usr_bufp)
19825 {
19826 	struct scsi_extended_sense	sense_buf;
19827 	union scsi_cdb		cdb;
19828 	struct uscsi_cmd	ucmd_buf;
19829 	int			status;
19830 	uchar_t			data_len = sizeof (sd_prout_t);
19831 	sd_prout_t		*prp;
19832 
19833 	ASSERT(un != NULL);
19834 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19835 	ASSERT(data_len == 24);	/* required by scsi spec */
19836 
19837 	SD_TRACE(SD_LOG_IO, un,
19838 	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
19839 
19840 	if (usr_bufp == NULL) {
19841 		return (EINVAL);
19842 	}
19843 
19844 	bzero(&cdb, sizeof (cdb));
19845 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19846 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19847 	prp = kmem_zalloc(data_len, KM_SLEEP);
19848 
19849 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
19850 	cdb.cdb_opaque[1] = usr_cmd;
19851 	FORMG1COUNT(&cdb, data_len);
19852 
19853 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19854 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19855 	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
19856 	ucmd_buf.uscsi_buflen	= data_len;
19857 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19858 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19859 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
19860 	ucmd_buf.uscsi_timeout	= 60;
19861 
19862 	switch (usr_cmd) {
19863 	case SD_SCSI3_REGISTER: {
19864 		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
19865 
19866 		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
19867 		bcopy(ptr->newkey.key, prp->service_key,
19868 		    MHIOC_RESV_KEY_SIZE);
19869 		prp->aptpl = ptr->aptpl;
19870 		break;
19871 	}
19872 	case SD_SCSI3_RESERVE:
19873 	case SD_SCSI3_RELEASE: {
19874 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
19875 
19876 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
19877 		prp->scope_address = BE_32(ptr->scope_specific_addr);
19878 		cdb.cdb_opaque[2] = ptr->type;
19879 		break;
19880 	}
19881 	case SD_SCSI3_PREEMPTANDABORT: {
19882 		mhioc_preemptandabort_t *ptr =
19883 		    (mhioc_preemptandabort_t *)usr_bufp;
19884 
19885 		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
19886 		bcopy(ptr->victim_key.key, prp->service_key,
19887 		    MHIOC_RESV_KEY_SIZE);
19888 		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
19889 		cdb.cdb_opaque[2] = ptr->resvdesc.type;
19890 		ucmd_buf.uscsi_flags |= USCSI_HEAD;
19891 		break;
19892 	}
19893 	case SD_SCSI3_REGISTERANDIGNOREKEY:
19894 	{
19895 		mhioc_registerandignorekey_t *ptr;
19896 		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
19897 		bcopy(ptr->newkey.key,
19898 		    prp->service_key, MHIOC_RESV_KEY_SIZE);
19899 		prp->aptpl = ptr->aptpl;
19900 		break;
19901 	}
19902 	default:
19903 		ASSERT(FALSE);
19904 		break;
19905 	}
19906 
19907 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19908 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
19909 
19910 	switch (status) {
19911 	case 0:
19912 		break;	/* Success! */
19913 	case EIO:
19914 		switch (ucmd_buf.uscsi_status) {
19915 		case STATUS_RESERVATION_CONFLICT:
19916 			status = EACCES;
19917 			break;
19918 		case STATUS_CHECK:
19919 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19920 			    (sense_buf.es_key == KEY_ILLEGAL_REQUEST)) {
19921 				status = ENOTSUP;
19922 			}
19923 			break;
19924 		default:
19925 			break;
19926 		}
19927 		break;
19928 	default:
19929 		break;
19930 	}
19931 
19932 	kmem_free(prp, data_len);
19933 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
19934 	return (status);
19935 }
19936 
19937 
19938 /*
19939  *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
19940  *
19941  * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
19942  *
19943  *   Arguments: un - pointer to the target's soft state struct
19944  *
19945  * Return Code: 0 - success
19946  *		errno-type error code
19947  *
19948  *     Context: kernel thread context only.
19949  */
19950 
19951 static int
19952 sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
19953 {
19954 	struct sd_uscsi_info	*uip;
19955 	struct uscsi_cmd	*uscmd;
19956 	union scsi_cdb		*cdb;
19957 	struct buf		*bp;
19958 	int			rval = 0;
19959 
19960 	SD_TRACE(SD_LOG_IO, un,
19961 	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
19962 
19963 	ASSERT(un != NULL);
19964 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19965 
19966 	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
19967 	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
19968 
19969 	/*
19970 	 * First get some memory for the uscsi_cmd struct and cdb
19971 	 * and initialize for SYNCHRONIZE_CACHE cmd.
19972 	 */
19973 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
19974 	uscmd->uscsi_cdblen = CDB_GROUP1;
19975 	uscmd->uscsi_cdb = (caddr_t)cdb;
19976 	uscmd->uscsi_bufaddr = NULL;
19977 	uscmd->uscsi_buflen = 0;
19978 	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
19979 	uscmd->uscsi_rqlen = SENSE_LENGTH;
19980 	uscmd->uscsi_rqresid = SENSE_LENGTH;
19981 	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
19982 	uscmd->uscsi_timeout = sd_io_time;
19983 
19984 	/*
19985 	 * Allocate an sd_uscsi_info struct and fill it with the info
19986 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
19987 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
19988 	 * since we allocate the buf here in this function, we do not
19989 	 * need to preserve the prior contents of b_private.
19990 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
19991 	 */
19992 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
19993 	uip->ui_flags = SD_PATH_DIRECT;
19994 	uip->ui_cmdp  = uscmd;
19995 
19996 	bp = getrbuf(KM_SLEEP);
19997 	bp->b_private = uip;
19998 
19999 	/*
20000 	 * Setup buffer to carry uscsi request.
20001 	 */
20002 	bp->b_flags  = B_BUSY;
20003 	bp->b_bcount = 0;
20004 	bp->b_blkno  = 0;
20005 
20006 	if (dkc != NULL) {
20007 		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
20008 		uip->ui_dkc = *dkc;
20009 	}
20010 
20011 	bp->b_edev = SD_GET_DEV(un);
20012 	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
20013 
20014 	(void) sd_uscsi_strategy(bp);
20015 
20016 	/*
20017 	 * If synchronous request, wait for completion
20018 	 * If async just return and let b_iodone callback
20019 	 * cleanup.
20020 	 * NOTE: On return, u_ncmds_in_driver will be decremented,
20021 	 * but it was also incremented in sd_uscsi_strategy(), so
20022 	 * we should be ok.
20023 	 */
20024 	if (dkc == NULL) {
20025 		(void) biowait(bp);
20026 		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
20027 	}
20028 
20029 	return (rval);
20030 }
20031 
20032 
20033 static int
20034 sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
20035 {
20036 	struct sd_uscsi_info *uip;
20037 	struct uscsi_cmd *uscmd;
20038 	struct scsi_extended_sense *sense_buf;
20039 	struct sd_lun *un;
20040 	int status;
20041 
20042 	uip = (struct sd_uscsi_info *)(bp->b_private);
20043 	ASSERT(uip != NULL);
20044 
20045 	uscmd = uip->ui_cmdp;
20046 	ASSERT(uscmd != NULL);
20047 
20048 	sense_buf = (struct scsi_extended_sense *)uscmd->uscsi_rqbuf;
20049 	ASSERT(sense_buf != NULL);
20050 
20051 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
20052 	ASSERT(un != NULL);
20053 
20054 	status = geterror(bp);
20055 	switch (status) {
20056 	case 0:
20057 		break;	/* Success! */
20058 	case EIO:
20059 		switch (uscmd->uscsi_status) {
20060 		case STATUS_RESERVATION_CONFLICT:
20061 			/* Ignore reservation conflict */
20062 			status = 0;
20063 			goto done;
20064 
20065 		case STATUS_CHECK:
20066 			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
20067 			    (sense_buf->es_key == KEY_ILLEGAL_REQUEST)) {
20068 				/* Ignore Illegal Request error */
20069 				mutex_enter(SD_MUTEX(un));
20070 				un->un_f_sync_cache_unsupported = TRUE;
20071 				mutex_exit(SD_MUTEX(un));
20072 				status = ENOTSUP;
20073 				goto done;
20074 			}
20075 			break;
20076 		default:
20077 			break;
20078 		}
20079 		/* FALLTHRU */
20080 	default:
20081 		/* Ignore error if the media is not present */
20082 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
20083 			status = 0;
20084 			goto done;
20085 		}
20086 		/* If we reach this, we had an error */
20087 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
20088 		    "SYNCHRONIZE CACHE command failed (%d)\n", status);
20089 		break;
20090 	}
20091 
20092 done:
20093 	if (uip->ui_dkc.dkc_callback != NULL) {
20094 		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
20095 	}
20096 
20097 	ASSERT((bp->b_flags & B_REMAPPED) == 0);
20098 	freerbuf(bp);
20099 	kmem_free(uip, sizeof (struct sd_uscsi_info));
20100 	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
20101 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
20102 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
20103 
20104 	return (status);
20105 }
20106 
20107 
20108 /*
20109  *    Function: sd_send_scsi_GET_CONFIGURATION
20110  *
20111  * Description: Issues the get configuration command to the device.
20112  *		Called from sd_check_for_writable_cd & sd_get_media_info
20113  *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
20114  *   Arguments: un
20115  *		ucmdbuf
20116  *		rqbuf
20117  *		rqbuflen
20118  *		bufaddr
20119  *		buflen
20120  *
20121  * Return Code: 0   - Success
20122  *		errno return code from sd_send_scsi_cmd()
20123  *
20124  *     Context: Can sleep. Does not return until command is completed.
20125  *
20126  */
20127 
20128 static int
20129 sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un, struct uscsi_cmd *ucmdbuf,
20130 	uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen)
20131 {
20132 	char	cdb[CDB_GROUP1];
20133 	int	status;
20134 
20135 	ASSERT(un != NULL);
20136 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20137 	ASSERT(bufaddr != NULL);
20138 	ASSERT(ucmdbuf != NULL);
20139 	ASSERT(rqbuf != NULL);
20140 
20141 	SD_TRACE(SD_LOG_IO, un,
20142 	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
20143 
20144 	bzero(cdb, sizeof (cdb));
20145 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20146 	bzero(rqbuf, rqbuflen);
20147 	bzero(bufaddr, buflen);
20148 
20149 	/*
20150 	 * Set up cdb field for the get configuration command.
20151 	 */
20152 	cdb[0] = SCMD_GET_CONFIGURATION;
20153 	cdb[1] = 0x02;  /* Requested Type */
20154 	cdb[8] = SD_PROFILE_HEADER_LEN;
20155 	ucmdbuf->uscsi_cdb = cdb;
20156 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20157 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20158 	ucmdbuf->uscsi_buflen = buflen;
20159 	ucmdbuf->uscsi_timeout = sd_io_time;
20160 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20161 	ucmdbuf->uscsi_rqlen = rqbuflen;
20162 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20163 
20164 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, UIO_SYSSPACE,
20165 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20166 
20167 	switch (status) {
20168 	case 0:
20169 		break;  /* Success! */
20170 	case EIO:
20171 		switch (ucmdbuf->uscsi_status) {
20172 		case STATUS_RESERVATION_CONFLICT:
20173 			status = EACCES;
20174 			break;
20175 		default:
20176 			break;
20177 		}
20178 		break;
20179 	default:
20180 		break;
20181 	}
20182 
20183 	if (status == 0) {
20184 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20185 		    "sd_send_scsi_GET_CONFIGURATION: data",
20186 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20187 	}
20188 
20189 	SD_TRACE(SD_LOG_IO, un,
20190 	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
20191 
20192 	return (status);
20193 }
20194 
20195 /*
20196  *    Function: sd_send_scsi_feature_GET_CONFIGURATION
20197  *
20198  * Description: Issues the get configuration command to the device to
20199  *              retrieve a specfic feature. Called from
20200  *		sd_check_for_writable_cd & sd_set_mmc_caps.
20201  *   Arguments: un
20202  *              ucmdbuf
20203  *              rqbuf
20204  *              rqbuflen
20205  *              bufaddr
20206  *              buflen
20207  *		feature
20208  *
20209  * Return Code: 0   - Success
20210  *              errno return code from sd_send_scsi_cmd()
20211  *
20212  *     Context: Can sleep. Does not return until command is completed.
20213  *
20214  */
20215 static int
20216 sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
20217 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
20218 	uchar_t *bufaddr, uint_t buflen, char feature)
20219 {
20220 	char    cdb[CDB_GROUP1];
20221 	int	status;
20222 
20223 	ASSERT(un != NULL);
20224 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20225 	ASSERT(bufaddr != NULL);
20226 	ASSERT(ucmdbuf != NULL);
20227 	ASSERT(rqbuf != NULL);
20228 
20229 	SD_TRACE(SD_LOG_IO, un,
20230 	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
20231 
20232 	bzero(cdb, sizeof (cdb));
20233 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20234 	bzero(rqbuf, rqbuflen);
20235 	bzero(bufaddr, buflen);
20236 
20237 	/*
20238 	 * Set up cdb field for the get configuration command.
20239 	 */
20240 	cdb[0] = SCMD_GET_CONFIGURATION;
20241 	cdb[1] = 0x02;  /* Requested Type */
20242 	cdb[3] = feature;
20243 	cdb[8] = buflen;
20244 	ucmdbuf->uscsi_cdb = cdb;
20245 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20246 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20247 	ucmdbuf->uscsi_buflen = buflen;
20248 	ucmdbuf->uscsi_timeout = sd_io_time;
20249 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20250 	ucmdbuf->uscsi_rqlen = rqbuflen;
20251 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20252 
20253 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, UIO_SYSSPACE,
20254 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20255 
20256 	switch (status) {
20257 	case 0:
20258 		break;  /* Success! */
20259 	case EIO:
20260 		switch (ucmdbuf->uscsi_status) {
20261 		case STATUS_RESERVATION_CONFLICT:
20262 			status = EACCES;
20263 			break;
20264 		default:
20265 			break;
20266 		}
20267 		break;
20268 	default:
20269 		break;
20270 	}
20271 
20272 	if (status == 0) {
20273 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20274 		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
20275 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20276 	}
20277 
20278 	SD_TRACE(SD_LOG_IO, un,
20279 	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
20280 
20281 	return (status);
20282 }
20283 
20284 
20285 /*
20286  *    Function: sd_send_scsi_MODE_SENSE
20287  *
20288  * Description: Utility function for issuing a scsi MODE SENSE command.
20289  *		Note: This routine uses a consistent implementation for Group0,
20290  *		Group1, and Group2 commands across all platforms. ATAPI devices
20291  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20292  *
20293  *   Arguments: un - pointer to the softstate struct for the target.
20294  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20295  *			  CDB_GROUP[1|2] (10 byte).
20296  *		bufaddr - buffer for page data retrieved from the target.
20297  *		buflen - size of page to be retrieved.
20298  *		page_code - page code of data to be retrieved from the target.
20299  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20300  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20301  *			to use the USCSI "direct" chain and bypass the normal
20302  *			command waitq.
20303  *
20304  * Return Code: 0   - Success
20305  *		errno return code from sd_send_scsi_cmd()
20306  *
20307  *     Context: Can sleep. Does not return until command is completed.
20308  */
20309 
20310 static int
20311 sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
20312 	size_t buflen,  uchar_t page_code, int path_flag)
20313 {
20314 	struct	scsi_extended_sense	sense_buf;
20315 	union scsi_cdb		cdb;
20316 	struct uscsi_cmd	ucmd_buf;
20317 	int			status;
20318 
20319 	ASSERT(un != NULL);
20320 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20321 	ASSERT(bufaddr != NULL);
20322 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20323 	    (cdbsize == CDB_GROUP2));
20324 
20325 	SD_TRACE(SD_LOG_IO, un,
20326 	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
20327 
20328 	bzero(&cdb, sizeof (cdb));
20329 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20330 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20331 	bzero(bufaddr, buflen);
20332 
20333 	if (cdbsize == CDB_GROUP0) {
20334 		cdb.scc_cmd = SCMD_MODE_SENSE;
20335 		cdb.cdb_opaque[2] = page_code;
20336 		FORMG0COUNT(&cdb, buflen);
20337 	} else {
20338 		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
20339 		cdb.cdb_opaque[2] = page_code;
20340 		FORMG1COUNT(&cdb, buflen);
20341 	}
20342 
20343 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20344 
20345 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20346 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20347 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20348 	ucmd_buf.uscsi_buflen	= buflen;
20349 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20350 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20351 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20352 	ucmd_buf.uscsi_timeout	= 60;
20353 
20354 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20355 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20356 
20357 	switch (status) {
20358 	case 0:
20359 		break;	/* Success! */
20360 	case EIO:
20361 		switch (ucmd_buf.uscsi_status) {
20362 		case STATUS_RESERVATION_CONFLICT:
20363 			status = EACCES;
20364 			break;
20365 		default:
20366 			break;
20367 		}
20368 		break;
20369 	default:
20370 		break;
20371 	}
20372 
20373 	if (status == 0) {
20374 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
20375 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20376 	}
20377 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
20378 
20379 	return (status);
20380 }
20381 
20382 
20383 /*
20384  *    Function: sd_send_scsi_MODE_SELECT
20385  *
20386  * Description: Utility function for issuing a scsi MODE SELECT command.
20387  *		Note: This routine uses a consistent implementation for Group0,
20388  *		Group1, and Group2 commands across all platforms. ATAPI devices
20389  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20390  *
20391  *   Arguments: un - pointer to the softstate struct for the target.
20392  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20393  *			  CDB_GROUP[1|2] (10 byte).
20394  *		bufaddr - buffer for page data retrieved from the target.
20395  *		buflen - size of page to be retrieved.
20396  *		save_page - boolean to determin if SP bit should be set.
20397  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20398  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20399  *			to use the USCSI "direct" chain and bypass the normal
20400  *			command waitq.
20401  *
20402  * Return Code: 0   - Success
20403  *		errno return code from sd_send_scsi_cmd()
20404  *
20405  *     Context: Can sleep. Does not return until command is completed.
20406  */
20407 
20408 static int
20409 sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
20410 	size_t buflen,  uchar_t save_page, int path_flag)
20411 {
20412 	struct	scsi_extended_sense	sense_buf;
20413 	union scsi_cdb		cdb;
20414 	struct uscsi_cmd	ucmd_buf;
20415 	int			status;
20416 
20417 	ASSERT(un != NULL);
20418 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20419 	ASSERT(bufaddr != NULL);
20420 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20421 	    (cdbsize == CDB_GROUP2));
20422 
20423 	SD_TRACE(SD_LOG_IO, un,
20424 	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
20425 
20426 	bzero(&cdb, sizeof (cdb));
20427 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20428 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20429 
20430 	/* Set the PF bit for many third party drives */
20431 	cdb.cdb_opaque[1] = 0x10;
20432 
20433 	/* Set the savepage(SP) bit if given */
20434 	if (save_page == SD_SAVE_PAGE) {
20435 		cdb.cdb_opaque[1] |= 0x01;
20436 	}
20437 
20438 	if (cdbsize == CDB_GROUP0) {
20439 		cdb.scc_cmd = SCMD_MODE_SELECT;
20440 		FORMG0COUNT(&cdb, buflen);
20441 	} else {
20442 		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
20443 		FORMG1COUNT(&cdb, buflen);
20444 	}
20445 
20446 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20447 
20448 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20449 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20450 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20451 	ucmd_buf.uscsi_buflen	= buflen;
20452 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20453 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20454 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20455 	ucmd_buf.uscsi_timeout	= 60;
20456 
20457 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20458 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20459 
20460 	switch (status) {
20461 	case 0:
20462 		break;	/* Success! */
20463 	case EIO:
20464 		switch (ucmd_buf.uscsi_status) {
20465 		case STATUS_RESERVATION_CONFLICT:
20466 			status = EACCES;
20467 			break;
20468 		default:
20469 			break;
20470 		}
20471 		break;
20472 	default:
20473 		break;
20474 	}
20475 
20476 	if (status == 0) {
20477 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
20478 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20479 	}
20480 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
20481 
20482 	return (status);
20483 }
20484 
20485 
20486 /*
20487  *    Function: sd_send_scsi_RDWR
20488  *
20489  * Description: Issue a scsi READ or WRITE command with the given parameters.
20490  *
20491  *   Arguments: un:      Pointer to the sd_lun struct for the target.
20492  *		cmd:	 SCMD_READ or SCMD_WRITE
20493  *		bufaddr: Address of caller's buffer to receive the RDWR data
20494  *		buflen:  Length of caller's buffer receive the RDWR data.
20495  *		start_block: Block number for the start of the RDWR operation.
20496  *			 (Assumes target-native block size.)
20497  *		residp:  Pointer to variable to receive the redisual of the
20498  *			 RDWR operation (may be NULL of no residual requested).
20499  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20500  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20501  *			to use the USCSI "direct" chain and bypass the normal
20502  *			command waitq.
20503  *
20504  * Return Code: 0   - Success
20505  *		errno return code from sd_send_scsi_cmd()
20506  *
20507  *     Context: Can sleep. Does not return until command is completed.
20508  */
20509 
20510 static int
20511 sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
20512 	size_t buflen, daddr_t start_block, int path_flag)
20513 {
20514 	struct	scsi_extended_sense	sense_buf;
20515 	union scsi_cdb		cdb;
20516 	struct uscsi_cmd	ucmd_buf;
20517 	uint32_t		block_count;
20518 	int			status;
20519 	int			cdbsize;
20520 	uchar_t			flag;
20521 
20522 	ASSERT(un != NULL);
20523 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20524 	ASSERT(bufaddr != NULL);
20525 	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
20526 
20527 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
20528 
20529 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
20530 		return (EINVAL);
20531 	}
20532 
20533 	mutex_enter(SD_MUTEX(un));
20534 	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
20535 	mutex_exit(SD_MUTEX(un));
20536 
20537 	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
20538 
20539 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
20540 	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
20541 	    bufaddr, buflen, start_block, block_count);
20542 
20543 	bzero(&cdb, sizeof (cdb));
20544 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20545 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20546 
20547 	/* Compute CDB size to use */
20548 	if (start_block > 0xffffffff)
20549 		cdbsize = CDB_GROUP4;
20550 	else if ((start_block & 0xFFE00000) ||
20551 	    (un->un_f_cfg_is_atapi == TRUE))
20552 		cdbsize = CDB_GROUP1;
20553 	else
20554 		cdbsize = CDB_GROUP0;
20555 
20556 	switch (cdbsize) {
20557 	case CDB_GROUP0:	/* 6-byte CDBs */
20558 		cdb.scc_cmd = cmd;
20559 		FORMG0ADDR(&cdb, start_block);
20560 		FORMG0COUNT(&cdb, block_count);
20561 		break;
20562 	case CDB_GROUP1:	/* 10-byte CDBs */
20563 		cdb.scc_cmd = cmd | SCMD_GROUP1;
20564 		FORMG1ADDR(&cdb, start_block);
20565 		FORMG1COUNT(&cdb, block_count);
20566 		break;
20567 	case CDB_GROUP4:	/* 16-byte CDBs */
20568 		cdb.scc_cmd = cmd | SCMD_GROUP4;
20569 		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
20570 		FORMG4COUNT(&cdb, block_count);
20571 		break;
20572 	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
20573 	default:
20574 		/* All others reserved */
20575 		return (EINVAL);
20576 	}
20577 
20578 	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
20579 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20580 
20581 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20582 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20583 	ucmd_buf.uscsi_bufaddr	= bufaddr;
20584 	ucmd_buf.uscsi_buflen	= buflen;
20585 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20586 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20587 	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
20588 	ucmd_buf.uscsi_timeout	= 60;
20589 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20590 				UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20591 	switch (status) {
20592 	case 0:
20593 		break;	/* Success! */
20594 	case EIO:
20595 		switch (ucmd_buf.uscsi_status) {
20596 		case STATUS_RESERVATION_CONFLICT:
20597 			status = EACCES;
20598 			break;
20599 		default:
20600 			break;
20601 		}
20602 		break;
20603 	default:
20604 		break;
20605 	}
20606 
20607 	if (status == 0) {
20608 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
20609 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20610 	}
20611 
20612 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
20613 
20614 	return (status);
20615 }
20616 
20617 
20618 /*
20619  *    Function: sd_send_scsi_LOG_SENSE
20620  *
20621  * Description: Issue a scsi LOG_SENSE command with the given parameters.
20622  *
20623  *   Arguments: un:      Pointer to the sd_lun struct for the target.
20624  *
20625  * Return Code: 0   - Success
20626  *		errno return code from sd_send_scsi_cmd()
20627  *
20628  *     Context: Can sleep. Does not return until command is completed.
20629  */
20630 
20631 static int
20632 sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr, uint16_t buflen,
20633 	uchar_t page_code, uchar_t page_control, uint16_t param_ptr,
20634 	int path_flag)
20635 
20636 {
20637 	struct	scsi_extended_sense	sense_buf;
20638 	union scsi_cdb		cdb;
20639 	struct uscsi_cmd	ucmd_buf;
20640 	int			status;
20641 
20642 	ASSERT(un != NULL);
20643 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20644 
20645 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
20646 
20647 	bzero(&cdb, sizeof (cdb));
20648 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20649 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20650 
20651 	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
20652 	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
20653 	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
20654 	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
20655 	FORMG1COUNT(&cdb, buflen);
20656 
20657 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20658 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20659 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20660 	ucmd_buf.uscsi_buflen	= buflen;
20661 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20662 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20663 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20664 	ucmd_buf.uscsi_timeout	= 60;
20665 
20666 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20667 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20668 
20669 	switch (status) {
20670 	case 0:
20671 		break;
20672 	case EIO:
20673 		switch (ucmd_buf.uscsi_status) {
20674 		case STATUS_RESERVATION_CONFLICT:
20675 			status = EACCES;
20676 			break;
20677 		case STATUS_CHECK:
20678 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20679 			    (sense_buf.es_key == KEY_ILLEGAL_REQUEST) &&
20680 			    (sense_buf.es_add_code == 0x24)) {
20681 				/*
20682 				 * ASC 0x24: INVALID FIELD IN CDB
20683 				 */
20684 				switch (page_code) {
20685 				case START_STOP_CYCLE_PAGE:
20686 					/*
20687 					 * The start stop cycle counter is
20688 					 * implemented as page 0x31 in earlier
20689 					 * generation disks. In new generation
20690 					 * disks the start stop cycle counter is
20691 					 * implemented as page 0xE. To properly
20692 					 * handle this case if an attempt for
20693 					 * log page 0xE is made and fails we
20694 					 * will try again using page 0x31.
20695 					 *
20696 					 * Network storage BU committed to
20697 					 * maintain the page 0x31 for this
20698 					 * purpose and will not have any other
20699 					 * page implemented with page code 0x31
20700 					 * until all disks transition to the
20701 					 * standard page.
20702 					 */
20703 					mutex_enter(SD_MUTEX(un));
20704 					un->un_start_stop_cycle_page =
20705 					    START_STOP_CYCLE_VU_PAGE;
20706 					cdb.cdb_opaque[2] =
20707 					    (char)(page_control << 6) |
20708 					    un->un_start_stop_cycle_page;
20709 					mutex_exit(SD_MUTEX(un));
20710 					status = sd_send_scsi_cmd(
20711 					    SD_GET_DEV(un), &ucmd_buf,
20712 					    UIO_SYSSPACE, UIO_SYSSPACE,
20713 					    UIO_SYSSPACE, path_flag);
20714 
20715 					break;
20716 				case TEMPERATURE_PAGE:
20717 					status = ENOTTY;
20718 					break;
20719 				default:
20720 					break;
20721 				}
20722 			}
20723 			break;
20724 		default:
20725 			break;
20726 		}
20727 		break;
20728 	default:
20729 		break;
20730 	}
20731 
20732 	if (status == 0) {
20733 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
20734 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20735 	}
20736 
20737 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
20738 
20739 	return (status);
20740 }
20741 
20742 
20743 /*
20744  *    Function: sdioctl
20745  *
20746  * Description: Driver's ioctl(9e) entry point function.
20747  *
20748  *   Arguments: dev     - device number
20749  *		cmd     - ioctl operation to be performed
20750  *		arg     - user argument, contains data to be set or reference
20751  *			  parameter for get
20752  *		flag    - bit flag, indicating open settings, 32/64 bit type
20753  *		cred_p  - user credential pointer
20754  *		rval_p  - calling process return value (OPT)
20755  *
20756  * Return Code: EINVAL
20757  *		ENOTTY
20758  *		ENXIO
20759  *		EIO
20760  *		EFAULT
20761  *		ENOTSUP
20762  *		EPERM
20763  *
20764  *     Context: Called from the device switch at normal priority.
20765  */
20766 
20767 static int
20768 sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
20769 {
20770 	struct sd_lun	*un = NULL;
20771 	int		geom_validated = FALSE;
20772 	int		err = 0;
20773 	int		i = 0;
20774 	cred_t		*cr;
20775 
20776 	/*
20777 	 * All device accesses go thru sdstrategy where we check on suspend
20778 	 * status
20779 	 */
20780 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20781 		return (ENXIO);
20782 	}
20783 
20784 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20785 
20786 	/*
20787 	 * Moved this wait from sd_uscsi_strategy to here for
20788 	 * reasons of deadlock prevention. Internal driver commands,
20789 	 * specifically those to change a devices power level, result
20790 	 * in a call to sd_uscsi_strategy.
20791 	 */
20792 	mutex_enter(SD_MUTEX(un));
20793 	while ((un->un_state == SD_STATE_SUSPENDED) ||
20794 	    (un->un_state == SD_STATE_PM_CHANGING)) {
20795 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
20796 	}
20797 	/*
20798 	 * Twiddling the counter here protects commands from now
20799 	 * through to the top of sd_uscsi_strategy. Without the
20800 	 * counter inc. a power down, for example, could get in
20801 	 * after the above check for state is made and before
20802 	 * execution gets to the top of sd_uscsi_strategy.
20803 	 * That would cause problems.
20804 	 */
20805 	un->un_ncmds_in_driver++;
20806 
20807 	if ((un->un_f_geometry_is_valid == FALSE) &&
20808 	    (flag & (FNDELAY | FNONBLOCK))) {
20809 		switch (cmd) {
20810 		case CDROMPAUSE:
20811 		case CDROMRESUME:
20812 		case CDROMPLAYMSF:
20813 		case CDROMPLAYTRKIND:
20814 		case CDROMREADTOCHDR:
20815 		case CDROMREADTOCENTRY:
20816 		case CDROMSTOP:
20817 		case CDROMSTART:
20818 		case CDROMVOLCTRL:
20819 		case CDROMSUBCHNL:
20820 		case CDROMREADMODE2:
20821 		case CDROMREADMODE1:
20822 		case CDROMREADOFFSET:
20823 		case CDROMSBLKMODE:
20824 		case CDROMGBLKMODE:
20825 		case CDROMGDRVSPEED:
20826 		case CDROMSDRVSPEED:
20827 		case CDROMCDDA:
20828 		case CDROMCDXA:
20829 		case CDROMSUBCODE:
20830 			if (!ISCD(un)) {
20831 				un->un_ncmds_in_driver--;
20832 				ASSERT(un->un_ncmds_in_driver >= 0);
20833 				mutex_exit(SD_MUTEX(un));
20834 				return (ENOTTY);
20835 			}
20836 			break;
20837 		case FDEJECT:
20838 		case DKIOCEJECT:
20839 		case CDROMEJECT:
20840 			if (!ISREMOVABLE(un)) {
20841 				un->un_ncmds_in_driver--;
20842 				ASSERT(un->un_ncmds_in_driver >= 0);
20843 				mutex_exit(SD_MUTEX(un));
20844 				return (ENOTTY);
20845 			}
20846 			break;
20847 		case DKIOCSVTOC:
20848 		case DKIOCSETEFI:
20849 		case DKIOCSMBOOT:
20850 		case DKIOCFLUSHWRITECACHE:
20851 			mutex_exit(SD_MUTEX(un));
20852 			err = sd_send_scsi_TEST_UNIT_READY(un, 0);
20853 			if (err != 0) {
20854 				mutex_enter(SD_MUTEX(un));
20855 				un->un_ncmds_in_driver--;
20856 				ASSERT(un->un_ncmds_in_driver >= 0);
20857 				mutex_exit(SD_MUTEX(un));
20858 				return (EIO);
20859 			}
20860 			mutex_enter(SD_MUTEX(un));
20861 			/* FALLTHROUGH */
20862 		case DKIOCREMOVABLE:
20863 		case DKIOCINFO:
20864 		case DKIOCGMEDIAINFO:
20865 		case MHIOCENFAILFAST:
20866 		case MHIOCSTATUS:
20867 		case MHIOCTKOWN:
20868 		case MHIOCRELEASE:
20869 		case MHIOCGRP_INKEYS:
20870 		case MHIOCGRP_INRESV:
20871 		case MHIOCGRP_REGISTER:
20872 		case MHIOCGRP_RESERVE:
20873 		case MHIOCGRP_PREEMPTANDABORT:
20874 		case MHIOCGRP_REGISTERANDIGNOREKEY:
20875 		case CDROMCLOSETRAY:
20876 		case USCSICMD:
20877 			goto skip_ready_valid;
20878 		default:
20879 			break;
20880 		}
20881 
20882 		mutex_exit(SD_MUTEX(un));
20883 		err = sd_ready_and_valid(un);
20884 		mutex_enter(SD_MUTEX(un));
20885 		if (err == SD_READY_NOT_VALID) {
20886 			switch (cmd) {
20887 			case DKIOCGAPART:
20888 			case DKIOCGGEOM:
20889 			case DKIOCSGEOM:
20890 			case DKIOCGVTOC:
20891 			case DKIOCSVTOC:
20892 			case DKIOCSAPART:
20893 			case DKIOCG_PHYGEOM:
20894 			case DKIOCG_VIRTGEOM:
20895 				err = ENOTSUP;
20896 				un->un_ncmds_in_driver--;
20897 				ASSERT(un->un_ncmds_in_driver >= 0);
20898 				mutex_exit(SD_MUTEX(un));
20899 				return (err);
20900 			}
20901 		}
20902 		if (err != SD_READY_VALID) {
20903 			switch (cmd) {
20904 			case DKIOCSTATE:
20905 			case CDROMGDRVSPEED:
20906 			case CDROMSDRVSPEED:
20907 			case FDEJECT:	/* for eject command */
20908 			case DKIOCEJECT:
20909 			case CDROMEJECT:
20910 			case DKIOCGETEFI:
20911 			case DKIOCSGEOM:
20912 			case DKIOCREMOVABLE:
20913 			case DKIOCSAPART:
20914 			case DKIOCSETEFI:
20915 				break;
20916 			default:
20917 				if (ISREMOVABLE(un)) {
20918 					err = ENXIO;
20919 				} else {
20920 					/* Do not map EACCES to EIO */
20921 					if (err != EACCES)
20922 						err = EIO;
20923 				}
20924 				un->un_ncmds_in_driver--;
20925 				ASSERT(un->un_ncmds_in_driver >= 0);
20926 				mutex_exit(SD_MUTEX(un));
20927 				return (err);
20928 			}
20929 		}
20930 		geom_validated = TRUE;
20931 	}
20932 	if ((un->un_f_geometry_is_valid == TRUE) &&
20933 	    (un->un_solaris_size > 0)) {
20934 		/*
20935 		 * the "geometry_is_valid" flag could be true if we
20936 		 * have an fdisk table but no Solaris partition
20937 		 */
20938 		if (un->un_vtoc.v_sanity != VTOC_SANE) {
20939 			/* it is EFI, so return ENOTSUP for these */
20940 			switch (cmd) {
20941 			case DKIOCGAPART:
20942 			case DKIOCGGEOM:
20943 			case DKIOCGVTOC:
20944 			case DKIOCSVTOC:
20945 			case DKIOCSAPART:
20946 				err = ENOTSUP;
20947 				un->un_ncmds_in_driver--;
20948 				ASSERT(un->un_ncmds_in_driver >= 0);
20949 				mutex_exit(SD_MUTEX(un));
20950 				return (err);
20951 			}
20952 		}
20953 	}
20954 
20955 skip_ready_valid:
20956 	mutex_exit(SD_MUTEX(un));
20957 
20958 	switch (cmd) {
20959 	case DKIOCINFO:
20960 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
20961 		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
20962 		break;
20963 
20964 	case DKIOCGMEDIAINFO:
20965 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
20966 		err = sd_get_media_info(dev, (caddr_t)arg, flag);
20967 		break;
20968 
20969 	case DKIOCGGEOM:
20970 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGGEOM\n");
20971 		err = sd_dkio_get_geometry(dev, (caddr_t)arg, flag,
20972 		    geom_validated);
20973 		break;
20974 
20975 	case DKIOCSGEOM:
20976 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSGEOM\n");
20977 		err = sd_dkio_set_geometry(dev, (caddr_t)arg, flag);
20978 		break;
20979 
20980 	case DKIOCGAPART:
20981 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGAPART\n");
20982 		err = sd_dkio_get_partition(dev, (caddr_t)arg, flag,
20983 		    geom_validated);
20984 		break;
20985 
20986 	case DKIOCSAPART:
20987 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSAPART\n");
20988 		err = sd_dkio_set_partition(dev, (caddr_t)arg, flag);
20989 		break;
20990 
20991 	case DKIOCGVTOC:
20992 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGVTOC\n");
20993 		err = sd_dkio_get_vtoc(dev, (caddr_t)arg, flag,
20994 		    geom_validated);
20995 		break;
20996 
20997 	case DKIOCGETEFI:
20998 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGETEFI\n");
20999 		err = sd_dkio_get_efi(dev, (caddr_t)arg, flag);
21000 		break;
21001 
21002 	case DKIOCPARTITION:
21003 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTITION\n");
21004 		err = sd_dkio_partition(dev, (caddr_t)arg, flag);
21005 		break;
21006 
21007 	case DKIOCSVTOC:
21008 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSVTOC\n");
21009 		err = sd_dkio_set_vtoc(dev, (caddr_t)arg, flag);
21010 		break;
21011 
21012 	case DKIOCSETEFI:
21013 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSETEFI\n");
21014 		err = sd_dkio_set_efi(dev, (caddr_t)arg, flag);
21015 		break;
21016 
21017 	case DKIOCGMBOOT:
21018 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMBOOT\n");
21019 		err = sd_dkio_get_mboot(dev, (caddr_t)arg, flag);
21020 		break;
21021 
21022 	case DKIOCSMBOOT:
21023 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSMBOOT\n");
21024 		err = sd_dkio_set_mboot(dev, (caddr_t)arg, flag);
21025 		break;
21026 
21027 	case DKIOCLOCK:
21028 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
21029 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
21030 		    SD_PATH_STANDARD);
21031 		break;
21032 
21033 	case DKIOCUNLOCK:
21034 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
21035 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
21036 		    SD_PATH_STANDARD);
21037 		break;
21038 
21039 	case DKIOCSTATE: {
21040 		enum dkio_state		state;
21041 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
21042 
21043 		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
21044 			err = EFAULT;
21045 		} else {
21046 			err = sd_check_media(dev, state);
21047 			if (err == 0) {
21048 				if (ddi_copyout(&un->un_mediastate, (void *)arg,
21049 				    sizeof (int), flag) != 0)
21050 					err = EFAULT;
21051 			}
21052 		}
21053 		break;
21054 	}
21055 
21056 	case DKIOCREMOVABLE:
21057 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
21058 		if (ISREMOVABLE(un)) {
21059 			i = 1;
21060 		} else {
21061 			i = 0;
21062 		}
21063 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21064 			err = EFAULT;
21065 		} else {
21066 			err = 0;
21067 		}
21068 		break;
21069 
21070 	case DKIOCGTEMPERATURE:
21071 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
21072 		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
21073 		break;
21074 
21075 	case MHIOCENFAILFAST:
21076 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
21077 		if ((err = drv_priv(cred_p)) == 0) {
21078 			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
21079 		}
21080 		break;
21081 
21082 	case MHIOCTKOWN:
21083 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
21084 		if ((err = drv_priv(cred_p)) == 0) {
21085 			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
21086 		}
21087 		break;
21088 
21089 	case MHIOCRELEASE:
21090 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
21091 		if ((err = drv_priv(cred_p)) == 0) {
21092 			err = sd_mhdioc_release(dev);
21093 		}
21094 		break;
21095 
21096 	case MHIOCSTATUS:
21097 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
21098 		if ((err = drv_priv(cred_p)) == 0) {
21099 			switch (sd_send_scsi_TEST_UNIT_READY(un, 0)) {
21100 			case 0:
21101 				err = 0;
21102 				break;
21103 			case EACCES:
21104 				*rval_p = 1;
21105 				err = 0;
21106 				break;
21107 			default:
21108 				err = EIO;
21109 				break;
21110 			}
21111 		}
21112 		break;
21113 
21114 	case MHIOCQRESERVE:
21115 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
21116 		if ((err = drv_priv(cred_p)) == 0) {
21117 			err = sd_reserve_release(dev, SD_RESERVE);
21118 		}
21119 		break;
21120 
21121 	case MHIOCREREGISTERDEVID:
21122 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
21123 		if (drv_priv(cred_p) == EPERM) {
21124 			err = EPERM;
21125 		} else if (ISREMOVABLE(un) || ISCD(un)) {
21126 			err = ENOTTY;
21127 		} else {
21128 			err = sd_mhdioc_register_devid(dev);
21129 		}
21130 		break;
21131 
21132 	case MHIOCGRP_INKEYS:
21133 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
21134 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21135 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21136 				err = ENOTSUP;
21137 			} else {
21138 				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
21139 				    flag);
21140 			}
21141 		}
21142 		break;
21143 
21144 	case MHIOCGRP_INRESV:
21145 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
21146 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21147 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21148 				err = ENOTSUP;
21149 			} else {
21150 				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
21151 			}
21152 		}
21153 		break;
21154 
21155 	case MHIOCGRP_REGISTER:
21156 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
21157 		if ((err = drv_priv(cred_p)) != EPERM) {
21158 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21159 				err = ENOTSUP;
21160 			} else if (arg != NULL) {
21161 				mhioc_register_t reg;
21162 				if (ddi_copyin((void *)arg, &reg,
21163 				    sizeof (mhioc_register_t), flag) != 0) {
21164 					err = EFAULT;
21165 				} else {
21166 					err =
21167 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21168 					    un, SD_SCSI3_REGISTER,
21169 					    (uchar_t *)&reg);
21170 				}
21171 			}
21172 		}
21173 		break;
21174 
21175 	case MHIOCGRP_RESERVE:
21176 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
21177 		if ((err = drv_priv(cred_p)) != EPERM) {
21178 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21179 				err = ENOTSUP;
21180 			} else if (arg != NULL) {
21181 				mhioc_resv_desc_t resv_desc;
21182 				if (ddi_copyin((void *)arg, &resv_desc,
21183 				    sizeof (mhioc_resv_desc_t), flag) != 0) {
21184 					err = EFAULT;
21185 				} else {
21186 					err =
21187 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21188 					    un, SD_SCSI3_RESERVE,
21189 					    (uchar_t *)&resv_desc);
21190 				}
21191 			}
21192 		}
21193 		break;
21194 
21195 	case MHIOCGRP_PREEMPTANDABORT:
21196 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
21197 		if ((err = drv_priv(cred_p)) != EPERM) {
21198 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21199 				err = ENOTSUP;
21200 			} else if (arg != NULL) {
21201 				mhioc_preemptandabort_t preempt_abort;
21202 				if (ddi_copyin((void *)arg, &preempt_abort,
21203 				    sizeof (mhioc_preemptandabort_t),
21204 				    flag) != 0) {
21205 					err = EFAULT;
21206 				} else {
21207 					err =
21208 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21209 					    un, SD_SCSI3_PREEMPTANDABORT,
21210 					    (uchar_t *)&preempt_abort);
21211 				}
21212 			}
21213 		}
21214 		break;
21215 
21216 	case MHIOCGRP_REGISTERANDIGNOREKEY:
21217 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
21218 		if ((err = drv_priv(cred_p)) != EPERM) {
21219 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21220 				err = ENOTSUP;
21221 			} else if (arg != NULL) {
21222 				mhioc_registerandignorekey_t r_and_i;
21223 				if (ddi_copyin((void *)arg, (void *)&r_and_i,
21224 				    sizeof (mhioc_registerandignorekey_t),
21225 				    flag) != 0) {
21226 					err = EFAULT;
21227 				} else {
21228 					err =
21229 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21230 					    un, SD_SCSI3_REGISTERANDIGNOREKEY,
21231 					    (uchar_t *)&r_and_i);
21232 				}
21233 			}
21234 		}
21235 		break;
21236 
21237 	case USCSICMD:
21238 		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
21239 		cr = ddi_get_cred();
21240 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
21241 			err = EPERM;
21242 		} else {
21243 			err = sd_uscsi_ioctl(dev, (caddr_t)arg, flag);
21244 		}
21245 		break;
21246 
21247 	case CDROMPAUSE:
21248 	case CDROMRESUME:
21249 		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
21250 		if (!ISCD(un)) {
21251 			err = ENOTTY;
21252 		} else {
21253 			err = sr_pause_resume(dev, cmd);
21254 		}
21255 		break;
21256 
21257 	case CDROMPLAYMSF:
21258 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
21259 		if (!ISCD(un)) {
21260 			err = ENOTTY;
21261 		} else {
21262 			err = sr_play_msf(dev, (caddr_t)arg, flag);
21263 		}
21264 		break;
21265 
21266 	case CDROMPLAYTRKIND:
21267 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
21268 #if defined(__i386) || defined(__amd64)
21269 		/*
21270 		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
21271 		 */
21272 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21273 #else
21274 		if (!ISCD(un)) {
21275 #endif
21276 			err = ENOTTY;
21277 		} else {
21278 			err = sr_play_trkind(dev, (caddr_t)arg, flag);
21279 		}
21280 		break;
21281 
21282 	case CDROMREADTOCHDR:
21283 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
21284 		if (!ISCD(un)) {
21285 			err = ENOTTY;
21286 		} else {
21287 			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
21288 		}
21289 		break;
21290 
21291 	case CDROMREADTOCENTRY:
21292 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
21293 		if (!ISCD(un)) {
21294 			err = ENOTTY;
21295 		} else {
21296 			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
21297 		}
21298 		break;
21299 
21300 	case CDROMSTOP:
21301 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
21302 		if (!ISCD(un)) {
21303 			err = ENOTTY;
21304 		} else {
21305 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_STOP,
21306 			    SD_PATH_STANDARD);
21307 		}
21308 		break;
21309 
21310 	case CDROMSTART:
21311 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
21312 		if (!ISCD(un)) {
21313 			err = ENOTTY;
21314 		} else {
21315 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
21316 			    SD_PATH_STANDARD);
21317 		}
21318 		break;
21319 
21320 	case CDROMCLOSETRAY:
21321 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
21322 		if (!ISCD(un)) {
21323 			err = ENOTTY;
21324 		} else {
21325 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_CLOSE,
21326 			    SD_PATH_STANDARD);
21327 		}
21328 		break;
21329 
21330 	case FDEJECT:	/* for eject command */
21331 	case DKIOCEJECT:
21332 	case CDROMEJECT:
21333 		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
21334 		if (!ISREMOVABLE(un)) {
21335 			err = ENOTTY;
21336 		} else {
21337 			err = sr_eject(dev);
21338 		}
21339 		break;
21340 
21341 	case CDROMVOLCTRL:
21342 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
21343 		if (!ISCD(un)) {
21344 			err = ENOTTY;
21345 		} else {
21346 			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
21347 		}
21348 		break;
21349 
21350 	case CDROMSUBCHNL:
21351 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
21352 		if (!ISCD(un)) {
21353 			err = ENOTTY;
21354 		} else {
21355 			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
21356 		}
21357 		break;
21358 
21359 	case CDROMREADMODE2:
21360 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
21361 		if (!ISCD(un)) {
21362 			err = ENOTTY;
21363 		} else if (un->un_f_cfg_is_atapi == TRUE) {
21364 			/*
21365 			 * If the drive supports READ CD, use that instead of
21366 			 * switching the LBA size via a MODE SELECT
21367 			 * Block Descriptor
21368 			 */
21369 			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
21370 		} else {
21371 			err = sr_read_mode2(dev, (caddr_t)arg, flag);
21372 		}
21373 		break;
21374 
21375 	case CDROMREADMODE1:
21376 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
21377 		if (!ISCD(un)) {
21378 			err = ENOTTY;
21379 		} else {
21380 			err = sr_read_mode1(dev, (caddr_t)arg, flag);
21381 		}
21382 		break;
21383 
21384 	case CDROMREADOFFSET:
21385 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
21386 		if (!ISCD(un)) {
21387 			err = ENOTTY;
21388 		} else {
21389 			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
21390 			    flag);
21391 		}
21392 		break;
21393 
21394 	case CDROMSBLKMODE:
21395 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
21396 		/*
21397 		 * There is no means of changing block size in case of atapi
21398 		 * drives, thus return ENOTTY if drive type is atapi
21399 		 */
21400 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21401 			err = ENOTTY;
21402 		} else if (un->un_f_mmc_cap == TRUE) {
21403 
21404 			/*
21405 			 * MMC Devices do not support changing the
21406 			 * logical block size
21407 			 *
21408 			 * Note: EINVAL is being returned instead of ENOTTY to
21409 			 * maintain consistancy with the original mmc
21410 			 * driver update.
21411 			 */
21412 			err = EINVAL;
21413 		} else {
21414 			mutex_enter(SD_MUTEX(un));
21415 			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
21416 			    (un->un_ncmds_in_transport > 0)) {
21417 				mutex_exit(SD_MUTEX(un));
21418 				err = EINVAL;
21419 			} else {
21420 				mutex_exit(SD_MUTEX(un));
21421 				err = sr_change_blkmode(dev, cmd, arg, flag);
21422 			}
21423 		}
21424 		break;
21425 
21426 	case CDROMGBLKMODE:
21427 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
21428 		if (!ISCD(un)) {
21429 			err = ENOTTY;
21430 		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
21431 		    (un->un_f_blockcount_is_valid != FALSE)) {
21432 			/*
21433 			 * Drive is an ATAPI drive so return target block
21434 			 * size for ATAPI drives since we cannot change the
21435 			 * blocksize on ATAPI drives. Used primarily to detect
21436 			 * if an ATAPI cdrom is present.
21437 			 */
21438 			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
21439 			    sizeof (int), flag) != 0) {
21440 				err = EFAULT;
21441 			} else {
21442 				err = 0;
21443 			}
21444 
21445 		} else {
21446 			/*
21447 			 * Drive supports changing block sizes via a Mode
21448 			 * Select.
21449 			 */
21450 			err = sr_change_blkmode(dev, cmd, arg, flag);
21451 		}
21452 		break;
21453 
21454 	case CDROMGDRVSPEED:
21455 	case CDROMSDRVSPEED:
21456 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
21457 		if (!ISCD(un)) {
21458 			err = ENOTTY;
21459 		} else if (un->un_f_mmc_cap == TRUE) {
21460 			/*
21461 			 * Note: In the future the driver implementation
21462 			 * for getting and
21463 			 * setting cd speed should entail:
21464 			 * 1) If non-mmc try the Toshiba mode page
21465 			 *    (sr_change_speed)
21466 			 * 2) If mmc but no support for Real Time Streaming try
21467 			 *    the SET CD SPEED (0xBB) command
21468 			 *   (sr_atapi_change_speed)
21469 			 * 3) If mmc and support for Real Time Streaming
21470 			 *    try the GET PERFORMANCE and SET STREAMING
21471 			 *    commands (not yet implemented, 4380808)
21472 			 */
21473 			/*
21474 			 * As per recent MMC spec, CD-ROM speed is variable
21475 			 * and changes with LBA. Since there is no such
21476 			 * things as drive speed now, fail this ioctl.
21477 			 *
21478 			 * Note: EINVAL is returned for consistancy of original
21479 			 * implementation which included support for getting
21480 			 * the drive speed of mmc devices but not setting
21481 			 * the drive speed. Thus EINVAL would be returned
21482 			 * if a set request was made for an mmc device.
21483 			 * We no longer support get or set speed for
21484 			 * mmc but need to remain consistant with regard
21485 			 * to the error code returned.
21486 			 */
21487 			err = EINVAL;
21488 		} else if (un->un_f_cfg_is_atapi == TRUE) {
21489 			err = sr_atapi_change_speed(dev, cmd, arg, flag);
21490 		} else {
21491 			err = sr_change_speed(dev, cmd, arg, flag);
21492 		}
21493 		break;
21494 
21495 	case CDROMCDDA:
21496 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
21497 		if (!ISCD(un)) {
21498 			err = ENOTTY;
21499 		} else {
21500 			err = sr_read_cdda(dev, (void *)arg, flag);
21501 		}
21502 		break;
21503 
21504 	case CDROMCDXA:
21505 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
21506 		if (!ISCD(un)) {
21507 			err = ENOTTY;
21508 		} else {
21509 			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
21510 		}
21511 		break;
21512 
21513 	case CDROMSUBCODE:
21514 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
21515 		if (!ISCD(un)) {
21516 			err = ENOTTY;
21517 		} else {
21518 			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
21519 		}
21520 		break;
21521 
21522 	case DKIOCPARTINFO: {
21523 		/*
21524 		 * Return parameters describing the selected disk slice.
21525 		 * Note: this ioctl is for the intel platform only
21526 		 */
21527 #if defined(__i386) || defined(__amd64)
21528 		int part;
21529 
21530 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTINFO\n");
21531 		part = SDPART(dev);
21532 
21533 		/* don't check un_solaris_size for pN */
21534 		if (part < P0_RAW_DISK && un->un_solaris_size == 0) {
21535 			err = EIO;
21536 		} else {
21537 			struct part_info p;
21538 
21539 			p.p_start = (daddr_t)un->un_offset[part];
21540 			p.p_length = (int)un->un_map[part].dkl_nblk;
21541 #ifdef _MULTI_DATAMODEL
21542 			switch (ddi_model_convert_from(flag & FMODELS)) {
21543 			case DDI_MODEL_ILP32:
21544 			{
21545 				struct part_info32 p32;
21546 
21547 				p32.p_start = (daddr32_t)p.p_start;
21548 				p32.p_length = p.p_length;
21549 				if (ddi_copyout(&p32, (void *)arg,
21550 				    sizeof (p32), flag))
21551 					err = EFAULT;
21552 				break;
21553 			}
21554 
21555 			case DDI_MODEL_NONE:
21556 			{
21557 				if (ddi_copyout(&p, (void *)arg, sizeof (p),
21558 				    flag))
21559 					err = EFAULT;
21560 				break;
21561 			}
21562 			}
21563 #else /* ! _MULTI_DATAMODEL */
21564 			if (ddi_copyout(&p, (void *)arg, sizeof (p), flag))
21565 				err = EFAULT;
21566 #endif /* _MULTI_DATAMODEL */
21567 		}
21568 #else
21569 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTINFO\n");
21570 		err = ENOTTY;
21571 #endif
21572 		break;
21573 	}
21574 
21575 	case DKIOCG_PHYGEOM: {
21576 		/* Return the driver's notion of the media physical geometry */
21577 #if defined(__i386) || defined(__amd64)
21578 		struct dk_geom	disk_geom;
21579 		struct dk_geom	*dkgp = &disk_geom;
21580 
21581 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_PHYGEOM\n");
21582 		mutex_enter(SD_MUTEX(un));
21583 
21584 		if (un->un_g.dkg_nhead != 0 &&
21585 		    un->un_g.dkg_nsect != 0) {
21586 			/*
21587 			 * We succeeded in getting a geometry, but
21588 			 * right now it is being reported as just the
21589 			 * Solaris fdisk partition, just like for
21590 			 * DKIOCGGEOM. We need to change that to be
21591 			 * correct for the entire disk now.
21592 			 */
21593 			bcopy(&un->un_g, dkgp, sizeof (*dkgp));
21594 			dkgp->dkg_acyl = 0;
21595 			dkgp->dkg_ncyl = un->un_blockcount /
21596 			    (dkgp->dkg_nhead * dkgp->dkg_nsect);
21597 		} else {
21598 			bzero(dkgp, sizeof (struct dk_geom));
21599 			/*
21600 			 * This disk does not have a Solaris VTOC
21601 			 * so we must present a physical geometry
21602 			 * that will remain consistent regardless
21603 			 * of how the disk is used. This will ensure
21604 			 * that the geometry does not change regardless
21605 			 * of the fdisk partition type (ie. EFI, FAT32,
21606 			 * Solaris, etc).
21607 			 */
21608 			if (ISCD(un)) {
21609 				dkgp->dkg_nhead = un->un_pgeom.g_nhead;
21610 				dkgp->dkg_nsect = un->un_pgeom.g_nsect;
21611 				dkgp->dkg_ncyl = un->un_pgeom.g_ncyl;
21612 				dkgp->dkg_acyl = un->un_pgeom.g_acyl;
21613 			} else {
21614 				sd_convert_geometry(un->un_blockcount, dkgp);
21615 				dkgp->dkg_acyl = 0;
21616 				dkgp->dkg_ncyl = un->un_blockcount /
21617 				    (dkgp->dkg_nhead * dkgp->dkg_nsect);
21618 			}
21619 		}
21620 		dkgp->dkg_pcyl = dkgp->dkg_ncyl + dkgp->dkg_acyl;
21621 
21622 		if (ddi_copyout(dkgp, (void *)arg,
21623 		    sizeof (struct dk_geom), flag)) {
21624 			mutex_exit(SD_MUTEX(un));
21625 			err = EFAULT;
21626 		} else {
21627 			mutex_exit(SD_MUTEX(un));
21628 			err = 0;
21629 		}
21630 #else
21631 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_PHYGEOM\n");
21632 		err = ENOTTY;
21633 #endif
21634 		break;
21635 	}
21636 
21637 	case DKIOCG_VIRTGEOM: {
21638 		/* Return the driver's notion of the media's logical geometry */
21639 #if defined(__i386) || defined(__amd64)
21640 		struct dk_geom	disk_geom;
21641 		struct dk_geom	*dkgp = &disk_geom;
21642 
21643 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_VIRTGEOM\n");
21644 		mutex_enter(SD_MUTEX(un));
21645 		/*
21646 		 * If there is no HBA geometry available, or
21647 		 * if the HBA returned us something that doesn't
21648 		 * really fit into an Int 13/function 8 geometry
21649 		 * result, just fail the ioctl.  See PSARC 1998/313.
21650 		 */
21651 		if (un->un_lgeom.g_nhead == 0 ||
21652 		    un->un_lgeom.g_nsect == 0 ||
21653 		    un->un_lgeom.g_ncyl > 1024) {
21654 			mutex_exit(SD_MUTEX(un));
21655 			err = EINVAL;
21656 		} else {
21657 			dkgp->dkg_ncyl	= un->un_lgeom.g_ncyl;
21658 			dkgp->dkg_acyl	= un->un_lgeom.g_acyl;
21659 			dkgp->dkg_pcyl	= dkgp->dkg_ncyl + dkgp->dkg_acyl;
21660 			dkgp->dkg_nhead	= un->un_lgeom.g_nhead;
21661 			dkgp->dkg_nsect	= un->un_lgeom.g_nsect;
21662 
21663 			if (ddi_copyout(dkgp, (void *)arg,
21664 			    sizeof (struct dk_geom), flag)) {
21665 				mutex_exit(SD_MUTEX(un));
21666 				err = EFAULT;
21667 			} else {
21668 				mutex_exit(SD_MUTEX(un));
21669 				err = 0;
21670 			}
21671 		}
21672 #else
21673 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_VIRTGEOM\n");
21674 		err = ENOTTY;
21675 #endif
21676 		break;
21677 	}
21678 #ifdef SDDEBUG
21679 /* RESET/ABORTS testing ioctls */
21680 	case DKIOCRESET: {
21681 		int	reset_level;
21682 
21683 		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
21684 			err = EFAULT;
21685 		} else {
21686 			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
21687 			    "reset_level = 0x%lx\n", reset_level);
21688 			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
21689 				err = 0;
21690 			} else {
21691 				err = EIO;
21692 			}
21693 		}
21694 		break;
21695 	}
21696 
21697 	case DKIOCABORT:
21698 		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
21699 		if (scsi_abort(SD_ADDRESS(un), NULL)) {
21700 			err = 0;
21701 		} else {
21702 			err = EIO;
21703 		}
21704 		break;
21705 #endif
21706 
21707 #ifdef SD_FAULT_INJECTION
21708 /* SDIOC FaultInjection testing ioctls */
21709 	case SDIOCSTART:
21710 	case SDIOCSTOP:
21711 	case SDIOCINSERTPKT:
21712 	case SDIOCINSERTXB:
21713 	case SDIOCINSERTUN:
21714 	case SDIOCINSERTARQ:
21715 	case SDIOCPUSH:
21716 	case SDIOCRETRIEVE:
21717 	case SDIOCRUN:
21718 		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
21719 		    "SDIOC detected cmd:0x%X:\n", cmd);
21720 		/* call error generator */
21721 		sd_faultinjection_ioctl(cmd, arg, un);
21722 		err = 0;
21723 		break;
21724 
21725 #endif /* SD_FAULT_INJECTION */
21726 
21727 	case DKIOCFLUSHWRITECACHE:
21728 		{
21729 			struct dk_callback *dkc = (struct dk_callback *)arg;
21730 
21731 			mutex_enter(SD_MUTEX(un));
21732 			if (un->un_f_sync_cache_unsupported ||
21733 			    ! un->un_f_write_cache_enabled) {
21734 				err = un->un_f_sync_cache_unsupported ?
21735 					ENOTSUP : 0;
21736 				mutex_exit(SD_MUTEX(un));
21737 				if ((flag & FKIOCTL) && dkc != NULL &&
21738 				    dkc->dkc_callback != NULL) {
21739 					(*dkc->dkc_callback)(dkc->dkc_cookie,
21740 					    err);
21741 					/*
21742 					 * Did callback and reported error.
21743 					 * Since we did a callback, ioctl
21744 					 * should return 0.
21745 					 */
21746 					err = 0;
21747 				}
21748 				break;
21749 			}
21750 			mutex_exit(SD_MUTEX(un));
21751 
21752 			if ((flag & FKIOCTL) && dkc != NULL &&
21753 			    dkc->dkc_callback != NULL) {
21754 				/* async SYNC CACHE request */
21755 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
21756 			} else {
21757 				/* synchronous SYNC CACHE request */
21758 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
21759 			}
21760 		}
21761 		break;
21762 
21763 	default:
21764 		err = ENOTTY;
21765 		break;
21766 	}
21767 	mutex_enter(SD_MUTEX(un));
21768 	un->un_ncmds_in_driver--;
21769 	ASSERT(un->un_ncmds_in_driver >= 0);
21770 	mutex_exit(SD_MUTEX(un));
21771 
21772 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
21773 	return (err);
21774 }
21775 
21776 
21777 /*
21778  *    Function: sd_uscsi_ioctl
21779  *
21780  * Description: This routine is the driver entry point for handling USCSI ioctl
21781  *		requests (USCSICMD).
21782  *
21783  *   Arguments: dev	- the device number
21784  *		arg	- user provided scsi command
21785  *		flag	- this argument is a pass through to ddi_copyxxx()
21786  *			  directly from the mode argument of ioctl().
21787  *
21788  * Return Code: code returned by sd_send_scsi_cmd
21789  *		ENXIO
21790  *		EFAULT
21791  *		EAGAIN
21792  */
21793 
21794 static int
21795 sd_uscsi_ioctl(dev_t dev, caddr_t arg, int flag)
21796 {
21797 #ifdef _MULTI_DATAMODEL
21798 	/*
21799 	 * For use when a 32 bit app makes a call into a
21800 	 * 64 bit ioctl
21801 	 */
21802 	struct uscsi_cmd32	uscsi_cmd_32_for_64;
21803 	struct uscsi_cmd32	*ucmd32 = &uscsi_cmd_32_for_64;
21804 	model_t			model;
21805 #endif /* _MULTI_DATAMODEL */
21806 	struct uscsi_cmd	*scmd = NULL;
21807 	struct sd_lun		*un = NULL;
21808 	enum uio_seg		uioseg;
21809 	char			cdb[CDB_GROUP0];
21810 	int			rval = 0;
21811 
21812 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21813 		return (ENXIO);
21814 	}
21815 
21816 	SD_TRACE(SD_LOG_IOCTL, un, "sd_uscsi_ioctl: entry: un:0x%p\n", un);
21817 
21818 	scmd = (struct uscsi_cmd *)
21819 	    kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
21820 
21821 #ifdef _MULTI_DATAMODEL
21822 	switch (model = ddi_model_convert_from(flag & FMODELS)) {
21823 	case DDI_MODEL_ILP32:
21824 	{
21825 		if (ddi_copyin((void *)arg, ucmd32, sizeof (*ucmd32), flag)) {
21826 			rval = EFAULT;
21827 			goto done;
21828 		}
21829 		/*
21830 		 * Convert the ILP32 uscsi data from the
21831 		 * application to LP64 for internal use.
21832 		 */
21833 		uscsi_cmd32touscsi_cmd(ucmd32, scmd);
21834 		break;
21835 	}
21836 	case DDI_MODEL_NONE:
21837 		if (ddi_copyin((void *)arg, scmd, sizeof (*scmd), flag)) {
21838 			rval = EFAULT;
21839 			goto done;
21840 		}
21841 		break;
21842 	}
21843 #else /* ! _MULTI_DATAMODEL */
21844 	if (ddi_copyin((void *)arg, scmd, sizeof (*scmd), flag)) {
21845 		rval = EFAULT;
21846 		goto done;
21847 	}
21848 #endif /* _MULTI_DATAMODEL */
21849 
21850 	scmd->uscsi_flags &= ~USCSI_NOINTR;
21851 	uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE : UIO_USERSPACE;
21852 	if (un->un_f_format_in_progress == TRUE) {
21853 		rval = EAGAIN;
21854 		goto done;
21855 	}
21856 
21857 	/*
21858 	 * Gotta do the ddi_copyin() here on the uscsi_cdb so that
21859 	 * we will have a valid cdb[0] to test.
21860 	 */
21861 	if ((ddi_copyin(scmd->uscsi_cdb, cdb, CDB_GROUP0, flag) == 0) &&
21862 	    (cdb[0] == SCMD_FORMAT)) {
21863 		SD_TRACE(SD_LOG_IOCTL, un,
21864 		    "sd_uscsi_ioctl: scmd->uscsi_cdb 0x%x\n", cdb[0]);
21865 		mutex_enter(SD_MUTEX(un));
21866 		un->un_f_format_in_progress = TRUE;
21867 		mutex_exit(SD_MUTEX(un));
21868 		rval = sd_send_scsi_cmd(dev, scmd, uioseg, uioseg, uioseg,
21869 		    SD_PATH_STANDARD);
21870 		mutex_enter(SD_MUTEX(un));
21871 		un->un_f_format_in_progress = FALSE;
21872 		mutex_exit(SD_MUTEX(un));
21873 	} else {
21874 		SD_TRACE(SD_LOG_IOCTL, un,
21875 		    "sd_uscsi_ioctl: scmd->uscsi_cdb 0x%x\n", cdb[0]);
21876 		/*
21877 		 * It's OK to fall into here even if the ddi_copyin()
21878 		 * on the uscsi_cdb above fails, because sd_send_scsi_cmd()
21879 		 * does this same copyin and will return the EFAULT
21880 		 * if it fails.
21881 		 */
21882 		rval = sd_send_scsi_cmd(dev, scmd, uioseg, uioseg, uioseg,
21883 		    SD_PATH_STANDARD);
21884 	}
21885 #ifdef _MULTI_DATAMODEL
21886 	switch (model) {
21887 	case DDI_MODEL_ILP32:
21888 		/*
21889 		 * Convert back to ILP32 before copyout to the
21890 		 * application
21891 		 */
21892 		uscsi_cmdtouscsi_cmd32(scmd, ucmd32);
21893 		if (ddi_copyout(ucmd32, (void *)arg, sizeof (*ucmd32), flag)) {
21894 			if (rval != 0) {
21895 				rval = EFAULT;
21896 			}
21897 		}
21898 		break;
21899 	case DDI_MODEL_NONE:
21900 		if (ddi_copyout(scmd, (void *)arg, sizeof (*scmd), flag)) {
21901 			if (rval != 0) {
21902 				rval = EFAULT;
21903 			}
21904 		}
21905 		break;
21906 	}
21907 #else /* ! _MULTI_DATAMODE */
21908 	if (ddi_copyout(scmd, (void *)arg, sizeof (*scmd), flag)) {
21909 		if (rval != 0) {
21910 			rval = EFAULT;
21911 		}
21912 	}
21913 #endif /* _MULTI_DATAMODE */
21914 done:
21915 	kmem_free(scmd, sizeof (struct uscsi_cmd));
21916 
21917 	SD_TRACE(SD_LOG_IOCTL, un, "sd_uscsi_ioctl: exit: un:0x%p\n", un);
21918 
21919 	return (rval);
21920 }
21921 
21922 
21923 /*
21924  *    Function: sd_dkio_ctrl_info
21925  *
21926  * Description: This routine is the driver entry point for handling controller
21927  *		information ioctl requests (DKIOCINFO).
21928  *
21929  *   Arguments: dev  - the device number
21930  *		arg  - pointer to user provided dk_cinfo structure
21931  *		       specifying the controller type and attributes.
21932  *		flag - this argument is a pass through to ddi_copyxxx()
21933  *		       directly from the mode argument of ioctl().
21934  *
21935  * Return Code: 0
21936  *		EFAULT
21937  *		ENXIO
21938  */
21939 
21940 static int
21941 sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
21942 {
21943 	struct sd_lun	*un = NULL;
21944 	struct dk_cinfo	*info;
21945 	dev_info_t	*pdip;
21946 	int		lun, tgt;
21947 
21948 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21949 		return (ENXIO);
21950 	}
21951 
21952 	info = (struct dk_cinfo *)
21953 		kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
21954 
21955 	switch (un->un_ctype) {
21956 	case CTYPE_CDROM:
21957 		info->dki_ctype = DKC_CDROM;
21958 		break;
21959 	default:
21960 		info->dki_ctype = DKC_SCSI_CCS;
21961 		break;
21962 	}
21963 	pdip = ddi_get_parent(SD_DEVINFO(un));
21964 	info->dki_cnum = ddi_get_instance(pdip);
21965 	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
21966 		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
21967 	} else {
21968 		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
21969 		    DK_DEVLEN - 1);
21970 	}
21971 
21972 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
21973 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
21974 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
21975 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
21976 
21977 	/* Unit Information */
21978 	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
21979 	info->dki_slave = ((tgt << 3) | lun);
21980 	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
21981 	    DK_DEVLEN - 1);
21982 	info->dki_flags = DKI_FMTVOL;
21983 	info->dki_partition = SDPART(dev);
21984 
21985 	/* Max Transfer size of this device in blocks */
21986 	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
21987 	info->dki_addr = 0;
21988 	info->dki_space = 0;
21989 	info->dki_prio = 0;
21990 	info->dki_vec = 0;
21991 
21992 	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
21993 		kmem_free(info, sizeof (struct dk_cinfo));
21994 		return (EFAULT);
21995 	} else {
21996 		kmem_free(info, sizeof (struct dk_cinfo));
21997 		return (0);
21998 	}
21999 }
22000 
22001 
22002 /*
22003  *    Function: sd_get_media_info
22004  *
22005  * Description: This routine is the driver entry point for handling ioctl
22006  *		requests for the media type or command set profile used by the
22007  *		drive to operate on the media (DKIOCGMEDIAINFO).
22008  *
22009  *   Arguments: dev	- the device number
22010  *		arg	- pointer to user provided dk_minfo structure
22011  *			  specifying the media type, logical block size and
22012  *			  drive capacity.
22013  *		flag	- this argument is a pass through to ddi_copyxxx()
22014  *			  directly from the mode argument of ioctl().
22015  *
22016  * Return Code: 0
22017  *		EACCESS
22018  *		EFAULT
22019  *		ENXIO
22020  *		EIO
22021  */
22022 
22023 static int
22024 sd_get_media_info(dev_t dev, caddr_t arg, int flag)
22025 {
22026 	struct sd_lun		*un = NULL;
22027 	struct uscsi_cmd	com;
22028 	struct scsi_inquiry	*sinq;
22029 	struct dk_minfo		media_info;
22030 	u_longlong_t		media_capacity;
22031 	uint64_t		capacity;
22032 	uint_t			lbasize;
22033 	uchar_t			*out_data;
22034 	uchar_t			*rqbuf;
22035 	int			rval = 0;
22036 	int			rtn;
22037 
22038 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
22039 	    (un->un_state == SD_STATE_OFFLINE)) {
22040 		return (ENXIO);
22041 	}
22042 
22043 	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info: entry\n");
22044 
22045 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
22046 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
22047 
22048 	/* Issue a TUR to determine if the drive is ready with media present */
22049 	rval = sd_send_scsi_TEST_UNIT_READY(un, SD_CHECK_FOR_MEDIA);
22050 	if (rval == ENXIO) {
22051 		goto done;
22052 	}
22053 
22054 	/* Now get configuration data */
22055 	if (ISCD(un)) {
22056 		media_info.dki_media_type = DK_CDROM;
22057 
22058 		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
22059 		if (un->un_f_mmc_cap == TRUE) {
22060 			rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf,
22061 				SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN);
22062 
22063 			if (rtn) {
22064 				/*
22065 				 * Failed for other than an illegal request
22066 				 * or command not supported
22067 				 */
22068 				if ((com.uscsi_status == STATUS_CHECK) &&
22069 				    (com.uscsi_rqstatus == STATUS_GOOD)) {
22070 					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
22071 					    (rqbuf[12] != 0x20)) {
22072 						rval = EIO;
22073 						goto done;
22074 					}
22075 				}
22076 			} else {
22077 				/*
22078 				 * The GET CONFIGURATION command succeeded
22079 				 * so set the media type according to the
22080 				 * returned data
22081 				 */
22082 				media_info.dki_media_type = out_data[6];
22083 				media_info.dki_media_type <<= 8;
22084 				media_info.dki_media_type |= out_data[7];
22085 			}
22086 		}
22087 	} else {
22088 		/*
22089 		 * The profile list is not available, so we attempt to identify
22090 		 * the media type based on the inquiry data
22091 		 */
22092 		sinq = un->un_sd->sd_inq;
22093 		if (sinq->inq_qual == 0) {
22094 			/* This is a direct access device */
22095 			media_info.dki_media_type = DK_FIXED_DISK;
22096 
22097 			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
22098 			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
22099 				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
22100 					media_info.dki_media_type = DK_ZIP;
22101 				} else if (
22102 				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
22103 					media_info.dki_media_type = DK_JAZ;
22104 				}
22105 			}
22106 		} else {
22107 			/* Not a CD or direct access so return unknown media */
22108 			media_info.dki_media_type = DK_UNKNOWN;
22109 		}
22110 	}
22111 
22112 	/* Now read the capacity so we can provide the lbasize and capacity */
22113 	switch (sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
22114 	    SD_PATH_DIRECT)) {
22115 	case 0:
22116 		break;
22117 	case EACCES:
22118 		rval = EACCES;
22119 		goto done;
22120 	default:
22121 		rval = EIO;
22122 		goto done;
22123 	}
22124 
22125 	media_info.dki_lbsize = lbasize;
22126 	media_capacity = capacity;
22127 
22128 	/*
22129 	 * sd_send_scsi_READ_CAPACITY() reports capacity in
22130 	 * un->un_sys_blocksize chunks. So we need to convert it into
22131 	 * cap.lbasize chunks.
22132 	 */
22133 	media_capacity *= un->un_sys_blocksize;
22134 	media_capacity /= lbasize;
22135 	media_info.dki_capacity = media_capacity;
22136 
22137 	if (ddi_copyout(&media_info, arg, sizeof (struct dk_minfo), flag)) {
22138 		rval = EFAULT;
22139 		/* Put goto. Anybody might add some code below in future */
22140 		goto done;
22141 	}
22142 done:
22143 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
22144 	kmem_free(rqbuf, SENSE_LENGTH);
22145 	return (rval);
22146 }
22147 
22148 
22149 /*
22150  *    Function: sd_dkio_get_geometry
22151  *
22152  * Description: This routine is the driver entry point for handling user
22153  *		requests to get the device geometry (DKIOCGGEOM).
22154  *
22155  *   Arguments: dev  - the device number
22156  *		arg  - pointer to user provided dk_geom structure specifying
22157  *			the controller's notion of the current geometry.
22158  *		flag - this argument is a pass through to ddi_copyxxx()
22159  *		       directly from the mode argument of ioctl().
22160  *		geom_validated - flag indicating if the device geometry has been
22161  *				 previously validated in the sdioctl routine.
22162  *
22163  * Return Code: 0
22164  *		EFAULT
22165  *		ENXIO
22166  *		EIO
22167  */
22168 
22169 static int
22170 sd_dkio_get_geometry(dev_t dev, caddr_t arg, int flag, int geom_validated)
22171 {
22172 	struct sd_lun	*un = NULL;
22173 	struct dk_geom	*tmp_geom = NULL;
22174 	int		rval = 0;
22175 
22176 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22177 		return (ENXIO);
22178 	}
22179 
22180 #if defined(__i386) || defined(__amd64)
22181 	if (un->un_solaris_size == 0) {
22182 		return (EIO);
22183 	}
22184 #endif
22185 	if (geom_validated == FALSE) {
22186 		/*
22187 		 * sd_validate_geometry does not spin a disk up
22188 		 * if it was spun down. We need to make sure it
22189 		 * is ready.
22190 		 */
22191 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22192 			return (rval);
22193 		}
22194 		mutex_enter(SD_MUTEX(un));
22195 		rval = sd_validate_geometry(un, SD_PATH_DIRECT);
22196 		mutex_exit(SD_MUTEX(un));
22197 	}
22198 	if (rval)
22199 		return (rval);
22200 
22201 	/*
22202 	 * Make a local copy of the soft state geometry to avoid some potential
22203 	 * race conditions associated with holding the mutex and updating the
22204 	 * write_reinstruct value
22205 	 */
22206 	tmp_geom = kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP);
22207 	mutex_enter(SD_MUTEX(un));
22208 	bcopy(&un->un_g, tmp_geom, sizeof (struct dk_geom));
22209 	mutex_exit(SD_MUTEX(un));
22210 
22211 	if (tmp_geom->dkg_write_reinstruct == 0) {
22212 		tmp_geom->dkg_write_reinstruct =
22213 		    (int)((int)(tmp_geom->dkg_nsect * tmp_geom->dkg_rpm *
22214 		    sd_rot_delay) / (int)60000);
22215 	}
22216 
22217 	rval = ddi_copyout(tmp_geom, (void *)arg, sizeof (struct dk_geom),
22218 	    flag);
22219 	if (rval != 0) {
22220 		rval = EFAULT;
22221 	}
22222 
22223 	kmem_free(tmp_geom, sizeof (struct dk_geom));
22224 	return (rval);
22225 
22226 }
22227 
22228 
22229 /*
22230  *    Function: sd_dkio_set_geometry
22231  *
22232  * Description: This routine is the driver entry point for handling user
22233  *		requests to set the device geometry (DKIOCSGEOM). The actual
22234  *		device geometry is not updated, just the driver "notion" of it.
22235  *
22236  *   Arguments: dev  - the device number
22237  *		arg  - pointer to user provided dk_geom structure used to set
22238  *			the controller's notion of the current geometry.
22239  *		flag - this argument is a pass through to ddi_copyxxx()
22240  *		       directly from the mode argument of ioctl().
22241  *
22242  * Return Code: 0
22243  *		EFAULT
22244  *		ENXIO
22245  *		EIO
22246  */
22247 
22248 static int
22249 sd_dkio_set_geometry(dev_t dev, caddr_t arg, int flag)
22250 {
22251 	struct sd_lun	*un = NULL;
22252 	struct dk_geom	*tmp_geom;
22253 	struct dk_map	*lp;
22254 	int		rval = 0;
22255 	int		i;
22256 
22257 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22258 		return (ENXIO);
22259 	}
22260 
22261 #if defined(__i386) || defined(__amd64)
22262 	if (un->un_solaris_size == 0) {
22263 		return (EIO);
22264 	}
22265 #endif
22266 	/*
22267 	 * We need to copy the user specified geometry into local
22268 	 * storage and then update the softstate. We don't want to hold
22269 	 * the mutex and copyin directly from the user to the soft state
22270 	 */
22271 	tmp_geom = (struct dk_geom *)
22272 	    kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP);
22273 	rval = ddi_copyin(arg, tmp_geom, sizeof (struct dk_geom), flag);
22274 	if (rval != 0) {
22275 		kmem_free(tmp_geom, sizeof (struct dk_geom));
22276 		return (EFAULT);
22277 	}
22278 
22279 	mutex_enter(SD_MUTEX(un));
22280 	bcopy(tmp_geom, &un->un_g, sizeof (struct dk_geom));
22281 	for (i = 0; i < NDKMAP; i++) {
22282 		lp  = &un->un_map[i];
22283 		un->un_offset[i] =
22284 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
22285 #if defined(__i386) || defined(__amd64)
22286 		un->un_offset[i] += un->un_solaris_offset;
22287 #endif
22288 	}
22289 	un->un_f_geometry_is_valid = FALSE;
22290 	mutex_exit(SD_MUTEX(un));
22291 	kmem_free(tmp_geom, sizeof (struct dk_geom));
22292 
22293 	return (rval);
22294 }
22295 
22296 
22297 /*
22298  *    Function: sd_dkio_get_partition
22299  *
22300  * Description: This routine is the driver entry point for handling user
22301  *		requests to get the partition table (DKIOCGAPART).
22302  *
22303  *   Arguments: dev  - the device number
22304  *		arg  - pointer to user provided dk_allmap structure specifying
22305  *			the controller's notion of the current partition table.
22306  *		flag - this argument is a pass through to ddi_copyxxx()
22307  *		       directly from the mode argument of ioctl().
22308  *		geom_validated - flag indicating if the device geometry has been
22309  *				 previously validated in the sdioctl routine.
22310  *
22311  * Return Code: 0
22312  *		EFAULT
22313  *		ENXIO
22314  *		EIO
22315  */
22316 
22317 static int
22318 sd_dkio_get_partition(dev_t dev, caddr_t arg, int flag, int geom_validated)
22319 {
22320 	struct sd_lun	*un = NULL;
22321 	int		rval = 0;
22322 	int		size;
22323 
22324 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22325 		return (ENXIO);
22326 	}
22327 
22328 #if defined(__i386) || defined(__amd64)
22329 	if (un->un_solaris_size == 0) {
22330 		return (EIO);
22331 	}
22332 #endif
22333 	/*
22334 	 * Make sure the geometry is valid before getting the partition
22335 	 * information.
22336 	 */
22337 	mutex_enter(SD_MUTEX(un));
22338 	if (geom_validated == FALSE) {
22339 		/*
22340 		 * sd_validate_geometry does not spin a disk up
22341 		 * if it was spun down. We need to make sure it
22342 		 * is ready before validating the geometry.
22343 		 */
22344 		mutex_exit(SD_MUTEX(un));
22345 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22346 			return (rval);
22347 		}
22348 		mutex_enter(SD_MUTEX(un));
22349 
22350 		if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
22351 			mutex_exit(SD_MUTEX(un));
22352 			return (rval);
22353 		}
22354 	}
22355 	mutex_exit(SD_MUTEX(un));
22356 
22357 #ifdef _MULTI_DATAMODEL
22358 	switch (ddi_model_convert_from(flag & FMODELS)) {
22359 	case DDI_MODEL_ILP32: {
22360 		struct dk_map32 dk_map32[NDKMAP];
22361 		int		i;
22362 
22363 		for (i = 0; i < NDKMAP; i++) {
22364 			dk_map32[i].dkl_cylno = un->un_map[i].dkl_cylno;
22365 			dk_map32[i].dkl_nblk  = un->un_map[i].dkl_nblk;
22366 		}
22367 		size = NDKMAP * sizeof (struct dk_map32);
22368 		rval = ddi_copyout(dk_map32, (void *)arg, size, flag);
22369 		if (rval != 0) {
22370 			rval = EFAULT;
22371 		}
22372 		break;
22373 	}
22374 	case DDI_MODEL_NONE:
22375 		size = NDKMAP * sizeof (struct dk_map);
22376 		rval = ddi_copyout(un->un_map, (void *)arg, size, flag);
22377 		if (rval != 0) {
22378 			rval = EFAULT;
22379 		}
22380 		break;
22381 	}
22382 #else /* ! _MULTI_DATAMODEL */
22383 	size = NDKMAP * sizeof (struct dk_map);
22384 	rval = ddi_copyout(un->un_map, (void *)arg, size, flag);
22385 	if (rval != 0) {
22386 		rval = EFAULT;
22387 	}
22388 #endif /* _MULTI_DATAMODEL */
22389 	return (rval);
22390 }
22391 
22392 
22393 /*
22394  *    Function: sd_dkio_set_partition
22395  *
22396  * Description: This routine is the driver entry point for handling user
22397  *		requests to set the partition table (DKIOCSAPART). The actual
22398  *		device partition is not updated.
22399  *
22400  *   Arguments: dev  - the device number
22401  *		arg  - pointer to user provided dk_allmap structure used to set
22402  *			the controller's notion of the partition table.
22403  *		flag - this argument is a pass through to ddi_copyxxx()
22404  *		       directly from the mode argument of ioctl().
22405  *
22406  * Return Code: 0
22407  *		EINVAL
22408  *		EFAULT
22409  *		ENXIO
22410  *		EIO
22411  */
22412 
22413 static int
22414 sd_dkio_set_partition(dev_t dev, caddr_t arg, int flag)
22415 {
22416 	struct sd_lun	*un = NULL;
22417 	struct dk_map	dk_map[NDKMAP];
22418 	struct dk_map	*lp;
22419 	int		rval = 0;
22420 	int		size;
22421 	int		i;
22422 #if defined(_SUNOS_VTOC_16)
22423 	struct dkl_partition	*vp;
22424 #endif
22425 
22426 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22427 		return (ENXIO);
22428 	}
22429 
22430 	/*
22431 	 * Set the map for all logical partitions.  We lock
22432 	 * the priority just to make sure an interrupt doesn't
22433 	 * come in while the map is half updated.
22434 	 */
22435 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_solaris_size))
22436 	mutex_enter(SD_MUTEX(un));
22437 	if (un->un_blockcount > DK_MAX_BLOCKS) {
22438 		mutex_exit(SD_MUTEX(un));
22439 		return (ENOTSUP);
22440 	}
22441 	mutex_exit(SD_MUTEX(un));
22442 	if (un->un_solaris_size == 0) {
22443 		return (EIO);
22444 	}
22445 
22446 #ifdef _MULTI_DATAMODEL
22447 	switch (ddi_model_convert_from(flag & FMODELS)) {
22448 	case DDI_MODEL_ILP32: {
22449 		struct dk_map32 dk_map32[NDKMAP];
22450 
22451 		size = NDKMAP * sizeof (struct dk_map32);
22452 		rval = ddi_copyin((void *)arg, dk_map32, size, flag);
22453 		if (rval != 0) {
22454 			return (EFAULT);
22455 		}
22456 		for (i = 0; i < NDKMAP; i++) {
22457 			dk_map[i].dkl_cylno = dk_map32[i].dkl_cylno;
22458 			dk_map[i].dkl_nblk  = dk_map32[i].dkl_nblk;
22459 		}
22460 		break;
22461 	}
22462 	case DDI_MODEL_NONE:
22463 		size = NDKMAP * sizeof (struct dk_map);
22464 		rval = ddi_copyin((void *)arg, dk_map, size, flag);
22465 		if (rval != 0) {
22466 			return (EFAULT);
22467 		}
22468 		break;
22469 	}
22470 #else /* ! _MULTI_DATAMODEL */
22471 	size = NDKMAP * sizeof (struct dk_map);
22472 	rval = ddi_copyin((void *)arg, dk_map, size, flag);
22473 	if (rval != 0) {
22474 		return (EFAULT);
22475 	}
22476 #endif /* _MULTI_DATAMODEL */
22477 
22478 	mutex_enter(SD_MUTEX(un));
22479 	/* Note: The size used in this bcopy is set based upon the data model */
22480 	bcopy(dk_map, un->un_map, size);
22481 #if defined(_SUNOS_VTOC_16)
22482 	vp = (struct dkl_partition *)&(un->un_vtoc);
22483 #endif	/* defined(_SUNOS_VTOC_16) */
22484 	for (i = 0; i < NDKMAP; i++) {
22485 		lp  = &un->un_map[i];
22486 		un->un_offset[i] =
22487 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
22488 #if defined(_SUNOS_VTOC_16)
22489 		vp->p_start = un->un_offset[i];
22490 		vp->p_size = lp->dkl_nblk;
22491 		vp++;
22492 #endif	/* defined(_SUNOS_VTOC_16) */
22493 #if defined(__i386) || defined(__amd64)
22494 		un->un_offset[i] += un->un_solaris_offset;
22495 #endif
22496 	}
22497 	mutex_exit(SD_MUTEX(un));
22498 	return (rval);
22499 }
22500 
22501 
22502 /*
22503  *    Function: sd_dkio_get_vtoc
22504  *
22505  * Description: This routine is the driver entry point for handling user
22506  *		requests to get the current volume table of contents
22507  *		(DKIOCGVTOC).
22508  *
22509  *   Arguments: dev  - the device number
22510  *		arg  - pointer to user provided vtoc structure specifying
22511  *			the current vtoc.
22512  *		flag - this argument is a pass through to ddi_copyxxx()
22513  *		       directly from the mode argument of ioctl().
22514  *		geom_validated - flag indicating if the device geometry has been
22515  *				 previously validated in the sdioctl routine.
22516  *
22517  * Return Code: 0
22518  *		EFAULT
22519  *		ENXIO
22520  *		EIO
22521  */
22522 
22523 static int
22524 sd_dkio_get_vtoc(dev_t dev, caddr_t arg, int flag, int geom_validated)
22525 {
22526 	struct sd_lun	*un = NULL;
22527 #if defined(_SUNOS_VTOC_8)
22528 	struct vtoc	user_vtoc;
22529 #endif	/* defined(_SUNOS_VTOC_8) */
22530 	int		rval = 0;
22531 
22532 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22533 		return (ENXIO);
22534 	}
22535 
22536 	mutex_enter(SD_MUTEX(un));
22537 	if (geom_validated == FALSE) {
22538 		/*
22539 		 * sd_validate_geometry does not spin a disk up
22540 		 * if it was spun down. We need to make sure it
22541 		 * is ready.
22542 		 */
22543 		mutex_exit(SD_MUTEX(un));
22544 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22545 			return (rval);
22546 		}
22547 		mutex_enter(SD_MUTEX(un));
22548 		if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
22549 			mutex_exit(SD_MUTEX(un));
22550 			return (rval);
22551 		}
22552 	}
22553 
22554 #if defined(_SUNOS_VTOC_8)
22555 	sd_build_user_vtoc(un, &user_vtoc);
22556 	mutex_exit(SD_MUTEX(un));
22557 
22558 #ifdef _MULTI_DATAMODEL
22559 	switch (ddi_model_convert_from(flag & FMODELS)) {
22560 	case DDI_MODEL_ILP32: {
22561 		struct vtoc32 user_vtoc32;
22562 
22563 		vtoctovtoc32(user_vtoc, user_vtoc32);
22564 		if (ddi_copyout(&user_vtoc32, (void *)arg,
22565 		    sizeof (struct vtoc32), flag)) {
22566 			return (EFAULT);
22567 		}
22568 		break;
22569 	}
22570 
22571 	case DDI_MODEL_NONE:
22572 		if (ddi_copyout(&user_vtoc, (void *)arg,
22573 		    sizeof (struct vtoc), flag)) {
22574 			return (EFAULT);
22575 		}
22576 		break;
22577 	}
22578 #else /* ! _MULTI_DATAMODEL */
22579 	if (ddi_copyout(&user_vtoc, (void *)arg, sizeof (struct vtoc), flag)) {
22580 		return (EFAULT);
22581 	}
22582 #endif /* _MULTI_DATAMODEL */
22583 
22584 #elif defined(_SUNOS_VTOC_16)
22585 	mutex_exit(SD_MUTEX(un));
22586 
22587 #ifdef _MULTI_DATAMODEL
22588 	/*
22589 	 * The un_vtoc structure is a "struct dk_vtoc"  which is always
22590 	 * 32-bit to maintain compatibility with existing on-disk
22591 	 * structures.  Thus, we need to convert the structure when copying
22592 	 * it out to a datamodel-dependent "struct vtoc" in a 64-bit
22593 	 * program.  If the target is a 32-bit program, then no conversion
22594 	 * is necessary.
22595 	 */
22596 	/* LINTED: logical expression always true: op "||" */
22597 	ASSERT(sizeof (un->un_vtoc) == sizeof (struct vtoc32));
22598 	switch (ddi_model_convert_from(flag & FMODELS)) {
22599 	case DDI_MODEL_ILP32:
22600 		if (ddi_copyout(&(un->un_vtoc), (void *)arg,
22601 		    sizeof (un->un_vtoc), flag)) {
22602 			return (EFAULT);
22603 		}
22604 		break;
22605 
22606 	case DDI_MODEL_NONE: {
22607 		struct vtoc user_vtoc;
22608 
22609 		vtoc32tovtoc(un->un_vtoc, user_vtoc);
22610 		if (ddi_copyout(&user_vtoc, (void *)arg,
22611 		    sizeof (struct vtoc), flag)) {
22612 			return (EFAULT);
22613 		}
22614 		break;
22615 	}
22616 	}
22617 #else /* ! _MULTI_DATAMODEL */
22618 	if (ddi_copyout(&(un->un_vtoc), (void *)arg, sizeof (un->un_vtoc),
22619 	    flag)) {
22620 		return (EFAULT);
22621 	}
22622 #endif /* _MULTI_DATAMODEL */
22623 #else
22624 #error "No VTOC format defined."
22625 #endif
22626 
22627 	return (rval);
22628 }
22629 
22630 static int
22631 sd_dkio_get_efi(dev_t dev, caddr_t arg, int flag)
22632 {
22633 	struct sd_lun	*un = NULL;
22634 	dk_efi_t	user_efi;
22635 	int		rval = 0;
22636 	void		*buffer;
22637 
22638 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL)
22639 		return (ENXIO);
22640 
22641 	if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag))
22642 		return (EFAULT);
22643 
22644 	user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64;
22645 
22646 	if ((user_efi.dki_length % un->un_tgt_blocksize) ||
22647 	    (user_efi.dki_length > un->un_max_xfer_size))
22648 		return (EINVAL);
22649 
22650 	buffer = kmem_alloc(user_efi.dki_length, KM_SLEEP);
22651 	rval = sd_send_scsi_READ(un, buffer, user_efi.dki_length,
22652 	    user_efi.dki_lba, SD_PATH_DIRECT);
22653 	if (rval == 0 && ddi_copyout(buffer, user_efi.dki_data,
22654 	    user_efi.dki_length, flag) != 0)
22655 		rval = EFAULT;
22656 
22657 	kmem_free(buffer, user_efi.dki_length);
22658 	return (rval);
22659 }
22660 
22661 /*
22662  *    Function: sd_build_user_vtoc
22663  *
22664  * Description: This routine populates a pass by reference variable with the
22665  *		current volume table of contents.
22666  *
22667  *   Arguments: un - driver soft state (unit) structure
22668  *		user_vtoc - pointer to vtoc structure to be populated
22669  */
22670 
22671 static void
22672 sd_build_user_vtoc(struct sd_lun *un, struct vtoc *user_vtoc)
22673 {
22674 	struct dk_map2		*lpart;
22675 	struct dk_map		*lmap;
22676 	struct partition	*vpart;
22677 	int			nblks;
22678 	int			i;
22679 
22680 	ASSERT(mutex_owned(SD_MUTEX(un)));
22681 
22682 	/*
22683 	 * Return vtoc structure fields in the provided VTOC area, addressed
22684 	 * by *vtoc.
22685 	 */
22686 	bzero(user_vtoc, sizeof (struct vtoc));
22687 	user_vtoc->v_bootinfo[0] = un->un_vtoc.v_bootinfo[0];
22688 	user_vtoc->v_bootinfo[1] = un->un_vtoc.v_bootinfo[1];
22689 	user_vtoc->v_bootinfo[2] = un->un_vtoc.v_bootinfo[2];
22690 	user_vtoc->v_sanity	= VTOC_SANE;
22691 	user_vtoc->v_version	= un->un_vtoc.v_version;
22692 	bcopy(un->un_vtoc.v_volume, user_vtoc->v_volume, LEN_DKL_VVOL);
22693 	user_vtoc->v_sectorsz = un->un_sys_blocksize;
22694 	user_vtoc->v_nparts = un->un_vtoc.v_nparts;
22695 	bcopy(un->un_vtoc.v_reserved, user_vtoc->v_reserved,
22696 	    sizeof (un->un_vtoc.v_reserved));
22697 	/*
22698 	 * Convert partitioning information.
22699 	 *
22700 	 * Note the conversion from starting cylinder number
22701 	 * to starting sector number.
22702 	 */
22703 	lmap = un->un_map;
22704 	lpart = (struct dk_map2 *)un->un_vtoc.v_part;
22705 	vpart = user_vtoc->v_part;
22706 
22707 	nblks = un->un_g.dkg_nsect * un->un_g.dkg_nhead;
22708 
22709 	for (i = 0; i < V_NUMPAR; i++) {
22710 		vpart->p_tag	= lpart->p_tag;
22711 		vpart->p_flag	= lpart->p_flag;
22712 		vpart->p_start	= lmap->dkl_cylno * nblks;
22713 		vpart->p_size	= lmap->dkl_nblk;
22714 		lmap++;
22715 		lpart++;
22716 		vpart++;
22717 
22718 		/* (4364927) */
22719 		user_vtoc->timestamp[i] = (time_t)un->un_vtoc.v_timestamp[i];
22720 	}
22721 
22722 	bcopy(un->un_asciilabel, user_vtoc->v_asciilabel, LEN_DKL_ASCII);
22723 }
22724 
22725 static int
22726 sd_dkio_partition(dev_t dev, caddr_t arg, int flag)
22727 {
22728 	struct sd_lun		*un = NULL;
22729 	struct partition64	p64;
22730 	int			rval = 0;
22731 	uint_t			nparts;
22732 	efi_gpe_t		*partitions;
22733 	efi_gpt_t		*buffer;
22734 	diskaddr_t		gpe_lba;
22735 
22736 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22737 		return (ENXIO);
22738 	}
22739 
22740 	if (ddi_copyin((const void *)arg, &p64,
22741 	    sizeof (struct partition64), flag)) {
22742 		return (EFAULT);
22743 	}
22744 
22745 	buffer = kmem_alloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP);
22746 	rval = sd_send_scsi_READ(un, buffer, DEV_BSIZE,
22747 		1, SD_PATH_DIRECT);
22748 	if (rval != 0)
22749 		goto done_error;
22750 
22751 	sd_swap_efi_gpt(buffer);
22752 
22753 	if ((rval = sd_validate_efi(buffer)) != 0)
22754 		goto done_error;
22755 
22756 	nparts = buffer->efi_gpt_NumberOfPartitionEntries;
22757 	gpe_lba = buffer->efi_gpt_PartitionEntryLBA;
22758 	if (p64.p_partno > nparts) {
22759 		/* couldn't find it */
22760 		rval = ESRCH;
22761 		goto done_error;
22762 	}
22763 	/*
22764 	 * if we're dealing with a partition that's out of the normal
22765 	 * 16K block, adjust accordingly
22766 	 */
22767 	gpe_lba += p64.p_partno / sizeof (efi_gpe_t);
22768 	rval = sd_send_scsi_READ(un, buffer, EFI_MIN_ARRAY_SIZE,
22769 			gpe_lba, SD_PATH_DIRECT);
22770 	if (rval) {
22771 		goto done_error;
22772 	}
22773 	partitions = (efi_gpe_t *)buffer;
22774 
22775 	sd_swap_efi_gpe(nparts, partitions);
22776 
22777 	partitions += p64.p_partno;
22778 	bcopy(&partitions->efi_gpe_PartitionTypeGUID, &p64.p_type,
22779 	    sizeof (struct uuid));
22780 	p64.p_start = partitions->efi_gpe_StartingLBA;
22781 	p64.p_size = partitions->efi_gpe_EndingLBA -
22782 			p64.p_start + 1;
22783 
22784 	if (ddi_copyout(&p64, (void *)arg, sizeof (struct partition64), flag))
22785 		rval = EFAULT;
22786 
22787 done_error:
22788 	kmem_free(buffer, EFI_MIN_ARRAY_SIZE);
22789 	return (rval);
22790 }
22791 
22792 
22793 /*
22794  *    Function: sd_dkio_set_vtoc
22795  *
22796  * Description: This routine is the driver entry point for handling user
22797  *		requests to set the current volume table of contents
22798  *		(DKIOCSVTOC).
22799  *
22800  *   Arguments: dev  - the device number
22801  *		arg  - pointer to user provided vtoc structure used to set the
22802  *			current vtoc.
22803  *		flag - this argument is a pass through to ddi_copyxxx()
22804  *		       directly from the mode argument of ioctl().
22805  *
22806  * Return Code: 0
22807  *		EFAULT
22808  *		ENXIO
22809  *		EINVAL
22810  *		ENOTSUP
22811  */
22812 
22813 static int
22814 sd_dkio_set_vtoc(dev_t dev, caddr_t arg, int flag)
22815 {
22816 	struct sd_lun	*un = NULL;
22817 	struct vtoc	user_vtoc;
22818 	int		rval = 0;
22819 
22820 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22821 		return (ENXIO);
22822 	}
22823 
22824 #if defined(__i386) || defined(__amd64)
22825 	if (un->un_tgt_blocksize != un->un_sys_blocksize) {
22826 		return (EINVAL);
22827 	}
22828 #endif
22829 
22830 #ifdef _MULTI_DATAMODEL
22831 	switch (ddi_model_convert_from(flag & FMODELS)) {
22832 	case DDI_MODEL_ILP32: {
22833 		struct vtoc32 user_vtoc32;
22834 
22835 		if (ddi_copyin((const void *)arg, &user_vtoc32,
22836 		    sizeof (struct vtoc32), flag)) {
22837 			return (EFAULT);
22838 		}
22839 		vtoc32tovtoc(user_vtoc32, user_vtoc);
22840 		break;
22841 	}
22842 
22843 	case DDI_MODEL_NONE:
22844 		if (ddi_copyin((const void *)arg, &user_vtoc,
22845 		    sizeof (struct vtoc), flag)) {
22846 			return (EFAULT);
22847 		}
22848 		break;
22849 	}
22850 #else /* ! _MULTI_DATAMODEL */
22851 	if (ddi_copyin((const void *)arg, &user_vtoc,
22852 	    sizeof (struct vtoc), flag)) {
22853 		return (EFAULT);
22854 	}
22855 #endif /* _MULTI_DATAMODEL */
22856 
22857 	mutex_enter(SD_MUTEX(un));
22858 	if (un->un_blockcount > DK_MAX_BLOCKS) {
22859 		mutex_exit(SD_MUTEX(un));
22860 		return (ENOTSUP);
22861 	}
22862 	if (un->un_g.dkg_ncyl == 0) {
22863 		mutex_exit(SD_MUTEX(un));
22864 		return (EINVAL);
22865 	}
22866 
22867 	mutex_exit(SD_MUTEX(un));
22868 	sd_clear_efi(un);
22869 	ddi_remove_minor_node(SD_DEVINFO(un), "wd");
22870 	ddi_remove_minor_node(SD_DEVINFO(un), "wd,raw");
22871 	(void) ddi_create_minor_node(SD_DEVINFO(un), "h",
22872 	    S_IFBLK, (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
22873 	    un->un_node_type, NULL);
22874 	(void) ddi_create_minor_node(SD_DEVINFO(un), "h,raw",
22875 	    S_IFCHR, (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
22876 	    un->un_node_type, NULL);
22877 	mutex_enter(SD_MUTEX(un));
22878 
22879 	if ((rval = sd_build_label_vtoc(un, &user_vtoc)) == 0) {
22880 		if ((rval = sd_write_label(dev)) == 0) {
22881 			if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT))
22882 			    != 0) {
22883 				SD_ERROR(SD_LOG_IOCTL_DKIO, un,
22884 				    "sd_dkio_set_vtoc: "
22885 				    "Failed validate geometry\n");
22886 			}
22887 		}
22888 	}
22889 
22890 	/*
22891 	 * If sd_build_label_vtoc, or sd_write_label failed above write the
22892 	 * devid anyway, what can it hurt? Also preserve the device id by
22893 	 * writing to the disk acyl for the case where a devid has been
22894 	 * fabricated.
22895 	 */
22896 	if (!ISREMOVABLE(un) && !ISCD(un) &&
22897 	    (un->un_f_opt_fab_devid == TRUE)) {
22898 		if (un->un_devid == NULL) {
22899 			sd_register_devid(un, SD_DEVINFO(un),
22900 			    SD_TARGET_IS_UNRESERVED);
22901 		} else {
22902 			/*
22903 			 * The device id for this disk has been
22904 			 * fabricated. Fabricated device id's are
22905 			 * managed by storing them in the last 2
22906 			 * available sectors on the drive. The device
22907 			 * id must be preserved by writing it back out
22908 			 * to this location.
22909 			 */
22910 			if (sd_write_deviceid(un) != 0) {
22911 				ddi_devid_free(un->un_devid);
22912 				un->un_devid = NULL;
22913 			}
22914 		}
22915 	}
22916 	mutex_exit(SD_MUTEX(un));
22917 	return (rval);
22918 }
22919 
22920 
22921 /*
22922  *    Function: sd_build_label_vtoc
22923  *
22924  * Description: This routine updates the driver soft state current volume table
22925  *		of contents based on a user specified vtoc.
22926  *
22927  *   Arguments: un - driver soft state (unit) structure
22928  *		user_vtoc - pointer to vtoc structure specifying vtoc to be used
22929  *			    to update the driver soft state.
22930  *
22931  * Return Code: 0
22932  *		EINVAL
22933  */
22934 
22935 static int
22936 sd_build_label_vtoc(struct sd_lun *un, struct vtoc *user_vtoc)
22937 {
22938 	struct dk_map		*lmap;
22939 	struct partition	*vpart;
22940 	int			nblks;
22941 #if defined(_SUNOS_VTOC_8)
22942 	int			ncyl;
22943 	struct dk_map2		*lpart;
22944 #endif	/* defined(_SUNOS_VTOC_8) */
22945 	int			i;
22946 
22947 	ASSERT(mutex_owned(SD_MUTEX(un)));
22948 
22949 	/* Sanity-check the vtoc */
22950 	if (user_vtoc->v_sanity != VTOC_SANE ||
22951 	    user_vtoc->v_sectorsz != un->un_sys_blocksize ||
22952 	    user_vtoc->v_nparts != V_NUMPAR) {
22953 		return (EINVAL);
22954 	}
22955 
22956 	nblks = un->un_g.dkg_nsect * un->un_g.dkg_nhead;
22957 	if (nblks == 0) {
22958 		return (EINVAL);
22959 	}
22960 
22961 #if defined(_SUNOS_VTOC_8)
22962 	vpart = user_vtoc->v_part;
22963 	for (i = 0; i < V_NUMPAR; i++) {
22964 		if ((vpart->p_start % nblks) != 0) {
22965 			return (EINVAL);
22966 		}
22967 		ncyl = vpart->p_start / nblks;
22968 		ncyl += vpart->p_size / nblks;
22969 		if ((vpart->p_size % nblks) != 0) {
22970 			ncyl++;
22971 		}
22972 		if (ncyl > (int)un->un_g.dkg_ncyl) {
22973 			return (EINVAL);
22974 		}
22975 		vpart++;
22976 	}
22977 #endif	/* defined(_SUNOS_VTOC_8) */
22978 
22979 	/* Put appropriate vtoc structure fields into the disk label */
22980 #if defined(_SUNOS_VTOC_16)
22981 	/*
22982 	 * The vtoc is always a 32bit data structure to maintain the
22983 	 * on-disk format. Convert "in place" instead of bcopying it.
22984 	 */
22985 	vtoctovtoc32((*user_vtoc), (*((struct vtoc32 *)&(un->un_vtoc))));
22986 
22987 	/*
22988 	 * in the 16-slice vtoc, starting sectors are expressed in
22989 	 * numbers *relative* to the start of the Solaris fdisk partition.
22990 	 */
22991 	lmap = un->un_map;
22992 	vpart = user_vtoc->v_part;
22993 
22994 	for (i = 0; i < (int)user_vtoc->v_nparts; i++, lmap++, vpart++) {
22995 		lmap->dkl_cylno = vpart->p_start / nblks;
22996 		lmap->dkl_nblk = vpart->p_size;
22997 	}
22998 
22999 #elif defined(_SUNOS_VTOC_8)
23000 
23001 	un->un_vtoc.v_bootinfo[0] = (uint32_t)user_vtoc->v_bootinfo[0];
23002 	un->un_vtoc.v_bootinfo[1] = (uint32_t)user_vtoc->v_bootinfo[1];
23003 	un->un_vtoc.v_bootinfo[2] = (uint32_t)user_vtoc->v_bootinfo[2];
23004 
23005 	un->un_vtoc.v_sanity = (uint32_t)user_vtoc->v_sanity;
23006 	un->un_vtoc.v_version = (uint32_t)user_vtoc->v_version;
23007 
23008 	bcopy(user_vtoc->v_volume, un->un_vtoc.v_volume, LEN_DKL_VVOL);
23009 
23010 	un->un_vtoc.v_nparts = user_vtoc->v_nparts;
23011 
23012 	bcopy(user_vtoc->v_reserved, un->un_vtoc.v_reserved,
23013 	    sizeof (un->un_vtoc.v_reserved));
23014 
23015 	/*
23016 	 * Note the conversion from starting sector number
23017 	 * to starting cylinder number.
23018 	 * Return error if division results in a remainder.
23019 	 */
23020 	lmap = un->un_map;
23021 	lpart = un->un_vtoc.v_part;
23022 	vpart = user_vtoc->v_part;
23023 
23024 	for (i = 0; i < (int)user_vtoc->v_nparts; i++) {
23025 		lpart->p_tag  = vpart->p_tag;
23026 		lpart->p_flag = vpart->p_flag;
23027 		lmap->dkl_cylno = vpart->p_start / nblks;
23028 		lmap->dkl_nblk = vpart->p_size;
23029 
23030 		lmap++;
23031 		lpart++;
23032 		vpart++;
23033 
23034 		/* (4387723) */
23035 #ifdef _LP64
23036 		if (user_vtoc->timestamp[i] > TIME32_MAX) {
23037 			un->un_vtoc.v_timestamp[i] = TIME32_MAX;
23038 		} else {
23039 			un->un_vtoc.v_timestamp[i] = user_vtoc->timestamp[i];
23040 		}
23041 #else
23042 		un->un_vtoc.v_timestamp[i] = user_vtoc->timestamp[i];
23043 #endif
23044 	}
23045 
23046 	bcopy(user_vtoc->v_asciilabel, un->un_asciilabel, LEN_DKL_ASCII);
23047 #else
23048 #error "No VTOC format defined."
23049 #endif
23050 	return (0);
23051 }
23052 
23053 /*
23054  *    Function: sd_clear_efi
23055  *
23056  * Description: This routine clears all EFI labels.
23057  *
23058  *   Arguments: un - driver soft state (unit) structure
23059  *
23060  * Return Code: void
23061  */
23062 
23063 static void
23064 sd_clear_efi(struct sd_lun *un)
23065 {
23066 	efi_gpt_t	*gpt;
23067 	uint_t		lbasize;
23068 	uint64_t	cap;
23069 	int rval;
23070 
23071 	ASSERT(!mutex_owned(SD_MUTEX(un)));
23072 
23073 	gpt = kmem_alloc(sizeof (efi_gpt_t), KM_SLEEP);
23074 
23075 	if (sd_send_scsi_READ(un, gpt, DEV_BSIZE, 1, SD_PATH_DIRECT) != 0) {
23076 		goto done;
23077 	}
23078 
23079 	sd_swap_efi_gpt(gpt);
23080 	rval = sd_validate_efi(gpt);
23081 	if (rval == 0) {
23082 		/* clear primary */
23083 		bzero(gpt, sizeof (efi_gpt_t));
23084 		if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE, 1,
23085 			SD_PATH_DIRECT))) {
23086 			SD_INFO(SD_LOG_IO_PARTITION, un,
23087 				"sd_clear_efi: clear primary label failed\n");
23088 		}
23089 	}
23090 	/* the backup */
23091 	rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
23092 	    SD_PATH_DIRECT);
23093 	if (rval) {
23094 		goto done;
23095 	}
23096 	if ((rval = sd_send_scsi_READ(un, gpt, lbasize,
23097 	    cap - 1, SD_PATH_DIRECT)) != 0) {
23098 		goto done;
23099 	}
23100 	sd_swap_efi_gpt(gpt);
23101 	rval = sd_validate_efi(gpt);
23102 	if (rval == 0) {
23103 		/* clear backup */
23104 		SD_TRACE(SD_LOG_IOCTL, un, "sd_clear_efi clear backup@%lu\n",
23105 			cap-1);
23106 		bzero(gpt, sizeof (efi_gpt_t));
23107 		if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE,
23108 		    cap-1, SD_PATH_DIRECT))) {
23109 			SD_INFO(SD_LOG_IO_PARTITION, un,
23110 				"sd_clear_efi: clear backup label failed\n");
23111 		}
23112 	}
23113 
23114 done:
23115 	kmem_free(gpt, sizeof (efi_gpt_t));
23116 }
23117 
23118 /*
23119  *    Function: sd_set_vtoc
23120  *
23121  * Description: This routine writes data to the appropriate positions
23122  *
23123  *   Arguments: un - driver soft state (unit) structure
23124  *              dkl  - the data to be written
23125  *
23126  * Return: void
23127  */
23128 
23129 static int
23130 sd_set_vtoc(struct sd_lun *un, struct dk_label *dkl)
23131 {
23132 	void			*shadow_buf;
23133 	uint_t			label_addr;
23134 	int			sec;
23135 	int			blk;
23136 	int			head;
23137 	int			cyl;
23138 	int			rval;
23139 
23140 #if defined(__i386) || defined(__amd64)
23141 	label_addr = un->un_solaris_offset + DK_LABEL_LOC;
23142 #else
23143 	/* Write the primary label at block 0 of the solaris partition. */
23144 	label_addr = 0;
23145 #endif
23146 
23147 	if (NOT_DEVBSIZE(un)) {
23148 		shadow_buf = kmem_zalloc(un->un_tgt_blocksize, KM_SLEEP);
23149 		/*
23150 		 * Read the target's first block.
23151 		 */
23152 		if ((rval = sd_send_scsi_READ(un, shadow_buf,
23153 		    un->un_tgt_blocksize, label_addr,
23154 		    SD_PATH_STANDARD)) != 0) {
23155 			goto exit;
23156 		}
23157 		/*
23158 		 * Copy the contents of the label into the shadow buffer
23159 		 * which is of the size of target block size.
23160 		 */
23161 		bcopy(dkl, shadow_buf, sizeof (struct dk_label));
23162 	}
23163 
23164 	/* Write the primary label */
23165 	if (NOT_DEVBSIZE(un)) {
23166 		rval = sd_send_scsi_WRITE(un, shadow_buf, un->un_tgt_blocksize,
23167 		    label_addr, SD_PATH_STANDARD);
23168 	} else {
23169 		rval = sd_send_scsi_WRITE(un, dkl, un->un_sys_blocksize,
23170 		    label_addr, SD_PATH_STANDARD);
23171 	}
23172 	if (rval != 0) {
23173 		return (rval);
23174 	}
23175 
23176 	/*
23177 	 * Calculate where the backup labels go.  They are always on
23178 	 * the last alternate cylinder, but some older drives put them
23179 	 * on head 2 instead of the last head.	They are always on the
23180 	 * first 5 odd sectors of the appropriate track.
23181 	 *
23182 	 * We have no choice at this point, but to believe that the
23183 	 * disk label is valid.	 Use the geometry of the disk
23184 	 * as described in the label.
23185 	 */
23186 	cyl  = dkl->dkl_ncyl  + dkl->dkl_acyl - 1;
23187 	head = dkl->dkl_nhead - 1;
23188 
23189 	/*
23190 	 * Write and verify the backup labels. Make sure we don't try to
23191 	 * write past the last cylinder.
23192 	 */
23193 	for (sec = 1; ((sec < 5 * 2 + 1) && (sec < dkl->dkl_nsect)); sec += 2) {
23194 		blk = (daddr_t)(
23195 		    (cyl * ((dkl->dkl_nhead * dkl->dkl_nsect) - dkl->dkl_apc)) +
23196 		    (head * dkl->dkl_nsect) + sec);
23197 #if defined(__i386) || defined(__amd64)
23198 		blk += un->un_solaris_offset;
23199 #endif
23200 		if (NOT_DEVBSIZE(un)) {
23201 			uint64_t	tblk;
23202 			/*
23203 			 * Need to read the block first for read modify write.
23204 			 */
23205 			tblk = (uint64_t)blk;
23206 			blk = (int)((tblk * un->un_sys_blocksize) /
23207 			    un->un_tgt_blocksize);
23208 			if ((rval = sd_send_scsi_READ(un, shadow_buf,
23209 			    un->un_tgt_blocksize, blk,
23210 			    SD_PATH_STANDARD)) != 0) {
23211 				goto exit;
23212 			}
23213 			/*
23214 			 * Modify the shadow buffer with the label.
23215 			 */
23216 			bcopy(dkl, shadow_buf, sizeof (struct dk_label));
23217 			rval = sd_send_scsi_WRITE(un, shadow_buf,
23218 			    un->un_tgt_blocksize, blk, SD_PATH_STANDARD);
23219 		} else {
23220 			rval = sd_send_scsi_WRITE(un, dkl, un->un_sys_blocksize,
23221 			    blk, SD_PATH_STANDARD);
23222 			SD_INFO(SD_LOG_IO_PARTITION, un,
23223 			"sd_set_vtoc: wrote backup label %d\n", blk);
23224 		}
23225 		if (rval != 0) {
23226 			goto exit;
23227 		}
23228 	}
23229 exit:
23230 	if (NOT_DEVBSIZE(un)) {
23231 		kmem_free(shadow_buf, un->un_tgt_blocksize);
23232 	}
23233 	return (rval);
23234 }
23235 
23236 /*
23237  *    Function: sd_clear_vtoc
23238  *
23239  * Description: This routine clears out the VTOC labels.
23240  *
23241  *   Arguments: un - driver soft state (unit) structure
23242  *
23243  * Return: void
23244  */
23245 
23246 static void
23247 sd_clear_vtoc(struct sd_lun *un)
23248 {
23249 	struct dk_label		*dkl;
23250 
23251 	mutex_exit(SD_MUTEX(un));
23252 	dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
23253 	mutex_enter(SD_MUTEX(un));
23254 	/*
23255 	 * sd_set_vtoc uses these fields in order to figure out
23256 	 * where to overwrite the backup labels
23257 	 */
23258 	dkl->dkl_apc    = un->un_g.dkg_apc;
23259 	dkl->dkl_ncyl   = un->un_g.dkg_ncyl;
23260 	dkl->dkl_acyl   = un->un_g.dkg_acyl;
23261 	dkl->dkl_nhead  = un->un_g.dkg_nhead;
23262 	dkl->dkl_nsect  = un->un_g.dkg_nsect;
23263 	mutex_exit(SD_MUTEX(un));
23264 	(void) sd_set_vtoc(un, dkl);
23265 	kmem_free(dkl, sizeof (struct dk_label));
23266 
23267 	mutex_enter(SD_MUTEX(un));
23268 }
23269 
23270 /*
23271  *    Function: sd_write_label
23272  *
23273  * Description: This routine will validate and write the driver soft state vtoc
23274  *		contents to the device.
23275  *
23276  *   Arguments: dev - the device number
23277  *
23278  * Return Code: the code returned by sd_send_scsi_cmd()
23279  *		0
23280  *		EINVAL
23281  *		ENXIO
23282  *		ENOMEM
23283  */
23284 
23285 static int
23286 sd_write_label(dev_t dev)
23287 {
23288 	struct sd_lun		*un;
23289 	struct dk_label		*dkl;
23290 	short			sum;
23291 	short			*sp;
23292 	int			i;
23293 	int			rval;
23294 
23295 	if (((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) ||
23296 	    (un->un_state == SD_STATE_OFFLINE)) {
23297 		return (ENXIO);
23298 	}
23299 	ASSERT(mutex_owned(SD_MUTEX(un)));
23300 	mutex_exit(SD_MUTEX(un));
23301 	dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
23302 	mutex_enter(SD_MUTEX(un));
23303 
23304 	bcopy(&un->un_vtoc, &dkl->dkl_vtoc, sizeof (struct dk_vtoc));
23305 	dkl->dkl_rpm	= un->un_g.dkg_rpm;
23306 	dkl->dkl_pcyl	= un->un_g.dkg_pcyl;
23307 	dkl->dkl_apc	= un->un_g.dkg_apc;
23308 	dkl->dkl_intrlv = un->un_g.dkg_intrlv;
23309 	dkl->dkl_ncyl	= un->un_g.dkg_ncyl;
23310 	dkl->dkl_acyl	= un->un_g.dkg_acyl;
23311 	dkl->dkl_nhead	= un->un_g.dkg_nhead;
23312 	dkl->dkl_nsect	= un->un_g.dkg_nsect;
23313 
23314 #if defined(_SUNOS_VTOC_8)
23315 	dkl->dkl_obs1	= un->un_g.dkg_obs1;
23316 	dkl->dkl_obs2	= un->un_g.dkg_obs2;
23317 	dkl->dkl_obs3	= un->un_g.dkg_obs3;
23318 	for (i = 0; i < NDKMAP; i++) {
23319 		dkl->dkl_map[i].dkl_cylno = un->un_map[i].dkl_cylno;
23320 		dkl->dkl_map[i].dkl_nblk  = un->un_map[i].dkl_nblk;
23321 	}
23322 	bcopy(un->un_asciilabel, dkl->dkl_asciilabel, LEN_DKL_ASCII);
23323 #elif defined(_SUNOS_VTOC_16)
23324 	dkl->dkl_skew	= un->un_dkg_skew;
23325 #else
23326 #error "No VTOC format defined."
23327 #endif
23328 
23329 	dkl->dkl_magic			= DKL_MAGIC;
23330 	dkl->dkl_write_reinstruct	= un->un_g.dkg_write_reinstruct;
23331 	dkl->dkl_read_reinstruct	= un->un_g.dkg_read_reinstruct;
23332 
23333 	/* Construct checksum for the new disk label */
23334 	sum = 0;
23335 	sp = (short *)dkl;
23336 	i = sizeof (struct dk_label) / sizeof (short);
23337 	while (i--) {
23338 		sum ^= *sp++;
23339 	}
23340 	dkl->dkl_cksum = sum;
23341 
23342 	mutex_exit(SD_MUTEX(un));
23343 
23344 	rval = sd_set_vtoc(un, dkl);
23345 exit:
23346 	kmem_free(dkl, sizeof (struct dk_label));
23347 	mutex_enter(SD_MUTEX(un));
23348 	return (rval);
23349 }
23350 
23351 static int
23352 sd_dkio_set_efi(dev_t dev, caddr_t arg, int flag)
23353 {
23354 	struct sd_lun	*un = NULL;
23355 	dk_efi_t	user_efi;
23356 	int		rval = 0;
23357 	void		*buffer;
23358 
23359 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL)
23360 		return (ENXIO);
23361 
23362 	if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag))
23363 		return (EFAULT);
23364 
23365 	user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64;
23366 
23367 	if ((user_efi.dki_length % un->un_tgt_blocksize) ||
23368 	    (user_efi.dki_length > un->un_max_xfer_size))
23369 		return (EINVAL);
23370 
23371 	buffer = kmem_alloc(user_efi.dki_length, KM_SLEEP);
23372 	if (ddi_copyin(user_efi.dki_data, buffer, user_efi.dki_length, flag)) {
23373 		rval = EFAULT;
23374 	} else {
23375 		/*
23376 		 * let's clear the vtoc labels and clear the softstate
23377 		 * vtoc.
23378 		 */
23379 		mutex_enter(SD_MUTEX(un));
23380 		if (un->un_vtoc.v_sanity == VTOC_SANE) {
23381 			SD_TRACE(SD_LOG_IO_PARTITION, un,
23382 				"sd_dkio_set_efi: CLEAR VTOC\n");
23383 			sd_clear_vtoc(un);
23384 			bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
23385 			mutex_exit(SD_MUTEX(un));
23386 			ddi_remove_minor_node(SD_DEVINFO(un), "h");
23387 			ddi_remove_minor_node(SD_DEVINFO(un), "h,raw");
23388 			(void) ddi_create_minor_node(SD_DEVINFO(un), "wd",
23389 			    S_IFBLK,
23390 			    (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23391 			    un->un_node_type, NULL);
23392 			(void) ddi_create_minor_node(SD_DEVINFO(un), "wd,raw",
23393 			    S_IFCHR,
23394 			    (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23395 			    un->un_node_type, NULL);
23396 		} else
23397 			mutex_exit(SD_MUTEX(un));
23398 		rval = sd_send_scsi_WRITE(un, buffer, user_efi.dki_length,
23399 		    user_efi.dki_lba, SD_PATH_DIRECT);
23400 		if (rval == 0) {
23401 			mutex_enter(SD_MUTEX(un));
23402 			un->un_f_geometry_is_valid = FALSE;
23403 			mutex_exit(SD_MUTEX(un));
23404 		}
23405 	}
23406 	kmem_free(buffer, user_efi.dki_length);
23407 	return (rval);
23408 }
23409 
23410 /*
23411  *    Function: sd_dkio_get_mboot
23412  *
23413  * Description: This routine is the driver entry point for handling user
23414  *		requests to get the current device mboot (DKIOCGMBOOT)
23415  *
23416  *   Arguments: dev  - the device number
23417  *		arg  - pointer to user provided mboot structure specifying
23418  *			the current mboot.
23419  *		flag - this argument is a pass through to ddi_copyxxx()
23420  *		       directly from the mode argument of ioctl().
23421  *
23422  * Return Code: 0
23423  *		EINVAL
23424  *		EFAULT
23425  *		ENXIO
23426  */
23427 
23428 static int
23429 sd_dkio_get_mboot(dev_t dev, caddr_t arg, int flag)
23430 {
23431 	struct sd_lun	*un;
23432 	struct mboot	*mboot;
23433 	int		rval;
23434 	size_t		buffer_size;
23435 
23436 	if (((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) ||
23437 	    (un->un_state == SD_STATE_OFFLINE)) {
23438 		return (ENXIO);
23439 	}
23440 
23441 #if defined(_SUNOS_VTOC_8)
23442 	if ((!ISREMOVABLE(un)) || (arg == NULL)) {
23443 #elif defined(_SUNOS_VTOC_16)
23444 	if (arg == NULL) {
23445 #endif
23446 		return (EINVAL);
23447 	}
23448 
23449 	/*
23450 	 * Read the mboot block, located at absolute block 0 on the target.
23451 	 */
23452 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct mboot));
23453 
23454 	SD_TRACE(SD_LOG_IO_PARTITION, un,
23455 	    "sd_dkio_get_mboot: allocation size: 0x%x\n", buffer_size);
23456 
23457 	mboot = kmem_zalloc(buffer_size, KM_SLEEP);
23458 	if ((rval = sd_send_scsi_READ(un, mboot, buffer_size, 0,
23459 	    SD_PATH_STANDARD)) == 0) {
23460 		if (ddi_copyout(mboot, (void *)arg,
23461 		    sizeof (struct mboot), flag) != 0) {
23462 			rval = EFAULT;
23463 		}
23464 	}
23465 	kmem_free(mboot, buffer_size);
23466 	return (rval);
23467 }
23468 
23469 
23470 /*
23471  *    Function: sd_dkio_set_mboot
23472  *
23473  * Description: This routine is the driver entry point for handling user
23474  *		requests to validate and set the device master boot
23475  *		(DKIOCSMBOOT).
23476  *
23477  *   Arguments: dev  - the device number
23478  *		arg  - pointer to user provided mboot structure used to set the
23479  *			master boot.
23480  *		flag - this argument is a pass through to ddi_copyxxx()
23481  *		       directly from the mode argument of ioctl().
23482  *
23483  * Return Code: 0
23484  *		EINVAL
23485  *		EFAULT
23486  *		ENXIO
23487  */
23488 
23489 static int
23490 sd_dkio_set_mboot(dev_t dev, caddr_t arg, int flag)
23491 {
23492 	struct sd_lun	*un = NULL;
23493 	struct mboot	*mboot = NULL;
23494 	int		rval;
23495 	ushort_t	magic;
23496 
23497 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23498 		return (ENXIO);
23499 	}
23500 
23501 	ASSERT(!mutex_owned(SD_MUTEX(un)));
23502 
23503 #if defined(_SUNOS_VTOC_8)
23504 	if (!ISREMOVABLE(un)) {
23505 		return (EINVAL);
23506 	}
23507 #endif
23508 
23509 	if (arg == NULL) {
23510 		return (EINVAL);
23511 	}
23512 
23513 	mboot = kmem_zalloc(sizeof (struct mboot), KM_SLEEP);
23514 
23515 	if (ddi_copyin((const void *)arg, mboot,
23516 	    sizeof (struct mboot), flag) != 0) {
23517 		kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23518 		return (EFAULT);
23519 	}
23520 
23521 	/* Is this really a master boot record? */
23522 	magic = LE_16(mboot->signature);
23523 	if (magic != MBB_MAGIC) {
23524 		kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23525 		return (EINVAL);
23526 	}
23527 
23528 	rval = sd_send_scsi_WRITE(un, mboot, un->un_sys_blocksize, 0,
23529 	    SD_PATH_STANDARD);
23530 
23531 	mutex_enter(SD_MUTEX(un));
23532 #if defined(__i386) || defined(__amd64)
23533 	if (rval == 0) {
23534 		/*
23535 		 * mboot has been written successfully.
23536 		 * update the fdisk and vtoc tables in memory
23537 		 */
23538 		rval = sd_update_fdisk_and_vtoc(un);
23539 		if ((un->un_f_geometry_is_valid == FALSE) || (rval != 0)) {
23540 			mutex_exit(SD_MUTEX(un));
23541 			kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23542 			return (rval);
23543 		}
23544 	}
23545 
23546 	/*
23547 	 * If the mboot write fails, write the devid anyway, what can it hurt?
23548 	 * Also preserve the device id by writing to the disk acyl for the case
23549 	 * where a devid has been fabricated.
23550 	 */
23551 	if (!ISREMOVABLE(un) && !ISCD(un) &&
23552 	    (un->un_f_opt_fab_devid == TRUE)) {
23553 		if (un->un_devid == NULL) {
23554 			sd_register_devid(un, SD_DEVINFO(un),
23555 			    SD_TARGET_IS_UNRESERVED);
23556 		} else {
23557 			/*
23558 			 * The device id for this disk has been
23559 			 * fabricated. Fabricated device id's are
23560 			 * managed by storing them in the last 2
23561 			 * available sectors on the drive. The device
23562 			 * id must be preserved by writing it back out
23563 			 * to this location.
23564 			 */
23565 			if (sd_write_deviceid(un) != 0) {
23566 				ddi_devid_free(un->un_devid);
23567 				un->un_devid = NULL;
23568 			}
23569 		}
23570 	}
23571 #else
23572 	if (rval == 0) {
23573 		/*
23574 		 * mboot has been written successfully.
23575 		 * set up the default geometry and VTOC
23576 		 */
23577 		if (un->un_blockcount <= DK_MAX_BLOCKS)
23578 			sd_setup_default_geometry(un);
23579 	}
23580 #endif
23581 	mutex_exit(SD_MUTEX(un));
23582 	kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23583 	return (rval);
23584 }
23585 
23586 
23587 /*
23588  *    Function: sd_setup_default_geometry
23589  *
23590  * Description: This local utility routine sets the default geometry as part of
23591  *		setting the device mboot.
23592  *
23593  *   Arguments: un - driver soft state (unit) structure
23594  *
23595  * Note: This may be redundant with sd_build_default_label.
23596  */
23597 
23598 static void
23599 sd_setup_default_geometry(struct sd_lun *un)
23600 {
23601 	/* zero out the soft state geometry and partition table. */
23602 	bzero(&un->un_g, sizeof (struct dk_geom));
23603 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
23604 	bzero(un->un_map, NDKMAP * (sizeof (struct dk_map)));
23605 	un->un_asciilabel[0] = '\0';
23606 
23607 	/*
23608 	 * For the rpm, we use the minimum for the disk.
23609 	 * For the head, cyl and number of sector per track,
23610 	 * if the capacity <= 1GB, head = 64, sect = 32.
23611 	 * else head = 255, sect 63
23612 	 * Note: the capacity should be equal to C*H*S values.
23613 	 * This will cause some truncation of size due to
23614 	 * round off errors. For CD-ROMs, this truncation can
23615 	 * have adverse side effects, so returning ncyl and
23616 	 * nhead as 1. The nsect will overflow for most of
23617 	 * CD-ROMs as nsect is of type ushort.
23618 	 */
23619 	if (ISCD(un)) {
23620 		un->un_g.dkg_ncyl = 1;
23621 		un->un_g.dkg_nhead = 1;
23622 		un->un_g.dkg_nsect = un->un_blockcount;
23623 	} else {
23624 		if (un->un_blockcount <= 0x1000) {
23625 			/* Needed for unlabeled SCSI floppies. */
23626 			un->un_g.dkg_nhead = 2;
23627 			un->un_g.dkg_ncyl = 80;
23628 			un->un_g.dkg_pcyl = 80;
23629 			un->un_g.dkg_nsect = un->un_blockcount / (2 * 80);
23630 		} else if (un->un_blockcount <= 0x200000) {
23631 			un->un_g.dkg_nhead = 64;
23632 			un->un_g.dkg_nsect = 32;
23633 			un->un_g.dkg_ncyl = un->un_blockcount / (64 * 32);
23634 		} else {
23635 			un->un_g.dkg_nhead = 255;
23636 			un->un_g.dkg_nsect = 63;
23637 			un->un_g.dkg_ncyl = un->un_blockcount / (255 * 63);
23638 		}
23639 		un->un_blockcount = un->un_g.dkg_ncyl *
23640 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect;
23641 	}
23642 	un->un_g.dkg_acyl = 0;
23643 	un->un_g.dkg_bcyl = 0;
23644 	un->un_g.dkg_intrlv = 1;
23645 	un->un_g.dkg_rpm = 200;
23646 	un->un_g.dkg_read_reinstruct = 0;
23647 	un->un_g.dkg_write_reinstruct = 0;
23648 	if (un->un_g.dkg_pcyl == 0) {
23649 		un->un_g.dkg_pcyl = un->un_g.dkg_ncyl + un->un_g.dkg_acyl;
23650 	}
23651 
23652 	un->un_map['a'-'a'].dkl_cylno = 0;
23653 	un->un_map['a'-'a'].dkl_nblk = un->un_blockcount;
23654 	un->un_map['c'-'a'].dkl_cylno = 0;
23655 	un->un_map['c'-'a'].dkl_nblk = un->un_blockcount;
23656 	un->un_f_geometry_is_valid = FALSE;
23657 }
23658 
23659 
23660 #if defined(__i386) || defined(__amd64)
23661 /*
23662  *    Function: sd_update_fdisk_and_vtoc
23663  *
23664  * Description: This local utility routine updates the device fdisk and vtoc
23665  *		as part of setting the device mboot.
23666  *
23667  *   Arguments: un - driver soft state (unit) structure
23668  *
23669  * Return Code: 0 for success or errno-type return code.
23670  *
23671  *    Note:x86: This looks like a duplicate of sd_validate_geometry(), but
23672  *		these did exist seperately in x86 sd.c!!!
23673  */
23674 
23675 static int
23676 sd_update_fdisk_and_vtoc(struct sd_lun *un)
23677 {
23678 	static char	labelstring[128];
23679 	static char	buf[256];
23680 	char		*label = 0;
23681 	int		count;
23682 	int		label_rc = 0;
23683 	int		gvalid = un->un_f_geometry_is_valid;
23684 	int		fdisk_rval;
23685 	int		lbasize;
23686 	int		capacity;
23687 
23688 	ASSERT(mutex_owned(SD_MUTEX(un)));
23689 
23690 	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
23691 		return (EINVAL);
23692 	}
23693 
23694 	if (un->un_f_blockcount_is_valid == FALSE) {
23695 		return (EINVAL);
23696 	}
23697 
23698 #if defined(_SUNOS_VTOC_16)
23699 	/*
23700 	 * Set up the "whole disk" fdisk partition; this should always
23701 	 * exist, regardless of whether the disk contains an fdisk table
23702 	 * or vtoc.
23703 	 */
23704 	un->un_map[P0_RAW_DISK].dkl_cylno = 0;
23705 	un->un_map[P0_RAW_DISK].dkl_nblk = un->un_blockcount;
23706 #endif	/* defined(_SUNOS_VTOC_16) */
23707 
23708 	/*
23709 	 * copy the lbasize and capacity so that if they're
23710 	 * reset while we're not holding the SD_MUTEX(un), we will
23711 	 * continue to use valid values after the SD_MUTEX(un) is
23712 	 * reacquired.
23713 	 */
23714 	lbasize  = un->un_tgt_blocksize;
23715 	capacity = un->un_blockcount;
23716 
23717 	/*
23718 	 * refresh the logical and physical geometry caches.
23719 	 * (data from mode sense format/rigid disk geometry pages,
23720 	 * and scsi_ifgetcap("geometry").
23721 	 */
23722 	sd_resync_geom_caches(un, capacity, lbasize, SD_PATH_DIRECT);
23723 
23724 	/*
23725 	 * Only DIRECT ACCESS devices will have Sun labels.
23726 	 * CD's supposedly have a Sun label, too
23727 	 */
23728 	if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT || ISREMOVABLE(un)) {
23729 		fdisk_rval = sd_read_fdisk(un, capacity, lbasize,
23730 		    SD_PATH_DIRECT);
23731 		if (fdisk_rval == SD_CMD_FAILURE) {
23732 			ASSERT(mutex_owned(SD_MUTEX(un)));
23733 			return (EIO);
23734 		}
23735 
23736 		if (fdisk_rval == SD_CMD_RESERVATION_CONFLICT) {
23737 			ASSERT(mutex_owned(SD_MUTEX(un)));
23738 			return (EACCES);
23739 		}
23740 
23741 		if (un->un_solaris_size <= DK_LABEL_LOC) {
23742 			/*
23743 			 * Found fdisk table but no Solaris partition entry,
23744 			 * so don't call sd_uselabel() and don't create
23745 			 * a default label.
23746 			 */
23747 			label_rc = 0;
23748 			un->un_f_geometry_is_valid = TRUE;
23749 			goto no_solaris_partition;
23750 		}
23751 
23752 #if defined(_SUNOS_VTOC_8)
23753 		label = (char *)un->un_asciilabel;
23754 #elif defined(_SUNOS_VTOC_16)
23755 		label = (char *)un->un_vtoc.v_asciilabel;
23756 #else
23757 #error "No VTOC format defined."
23758 #endif
23759 	} else if (capacity < 0) {
23760 		ASSERT(mutex_owned(SD_MUTEX(un)));
23761 		return (EINVAL);
23762 	}
23763 
23764 	/*
23765 	 * For Removable media We reach here if we have found a
23766 	 * SOLARIS PARTITION.
23767 	 * If un_f_geometry_is_valid is FALSE it indicates that the SOLARIS
23768 	 * PARTITION has changed from the previous one, hence we will setup a
23769 	 * default VTOC in this case.
23770 	 */
23771 	if (un->un_f_geometry_is_valid == FALSE) {
23772 		sd_build_default_label(un);
23773 		label_rc = 0;
23774 	}
23775 
23776 no_solaris_partition:
23777 	if ((!ISREMOVABLE(un) ||
23778 	    (ISREMOVABLE(un) && un->un_mediastate == DKIO_EJECTED)) &&
23779 	    (un->un_state == SD_STATE_NORMAL && gvalid == FALSE)) {
23780 		/*
23781 		 * Print out a message indicating who and what we are.
23782 		 * We do this only when we happen to really validate the
23783 		 * geometry. We may call sd_validate_geometry() at other
23784 		 * times, ioctl()'s like Get VTOC in which case we
23785 		 * don't want to print the label.
23786 		 * If the geometry is valid, print the label string,
23787 		 * else print vendor and product info, if available
23788 		 */
23789 		if ((un->un_f_geometry_is_valid == TRUE) && (label != NULL)) {
23790 			SD_INFO(SD_LOG_IOCTL_DKIO, un, "?<%s>\n", label);
23791 		} else {
23792 			mutex_enter(&sd_label_mutex);
23793 			sd_inq_fill(SD_INQUIRY(un)->inq_vid, VIDMAX,
23794 			    labelstring);
23795 			sd_inq_fill(SD_INQUIRY(un)->inq_pid, PIDMAX,
23796 			    &labelstring[64]);
23797 			(void) sprintf(buf, "?Vendor '%s', product '%s'",
23798 			    labelstring, &labelstring[64]);
23799 			if (un->un_f_blockcount_is_valid == TRUE) {
23800 				(void) sprintf(&buf[strlen(buf)],
23801 				    ", %" PRIu64 " %u byte blocks\n",
23802 				    un->un_blockcount,
23803 				    un->un_tgt_blocksize);
23804 			} else {
23805 				(void) sprintf(&buf[strlen(buf)],
23806 				    ", (unknown capacity)\n");
23807 			}
23808 			SD_INFO(SD_LOG_IOCTL_DKIO, un, buf);
23809 			mutex_exit(&sd_label_mutex);
23810 		}
23811 	}
23812 
23813 #if defined(_SUNOS_VTOC_16)
23814 	/*
23815 	 * If we have valid geometry, set up the remaining fdisk partitions.
23816 	 * Note that dkl_cylno is not used for the fdisk map entries, so
23817 	 * we set it to an entirely bogus value.
23818 	 */
23819 	for (count = 0; count < FD_NUMPART; count++) {
23820 		un->un_map[FDISK_P1 + count].dkl_cylno = -1;
23821 		un->un_map[FDISK_P1 + count].dkl_nblk =
23822 		    un->un_fmap[count].fmap_nblk;
23823 		un->un_offset[FDISK_P1 + count] =
23824 		    un->un_fmap[count].fmap_start;
23825 	}
23826 #endif
23827 
23828 	for (count = 0; count < NDKMAP; count++) {
23829 #if defined(_SUNOS_VTOC_8)
23830 		struct dk_map *lp  = &un->un_map[count];
23831 		un->un_offset[count] =
23832 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
23833 #elif defined(_SUNOS_VTOC_16)
23834 		struct dkl_partition *vp = &un->un_vtoc.v_part[count];
23835 		un->un_offset[count] = vp->p_start + un->un_solaris_offset;
23836 #else
23837 #error "No VTOC format defined."
23838 #endif
23839 	}
23840 
23841 	ASSERT(mutex_owned(SD_MUTEX(un)));
23842 	return (label_rc);
23843 }
23844 #endif
23845 
23846 
23847 /*
23848  *    Function: sd_check_media
23849  *
23850  * Description: This utility routine implements the functionality for the
23851  *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
23852  *		driver state changes from that specified by the user
23853  *		(inserted or ejected). For example, if the user specifies
23854  *		DKIO_EJECTED and the current media state is inserted this
23855  *		routine will immediately return DKIO_INSERTED. However, if the
23856  *		current media state is not inserted the user thread will be
23857  *		blocked until the drive state changes. If DKIO_NONE is specified
23858  *		the user thread will block until a drive state change occurs.
23859  *
23860  *   Arguments: dev  - the device number
23861  *		state  - user pointer to a dkio_state, updated with the current
23862  *			drive state at return.
23863  *
23864  * Return Code: ENXIO
23865  *		EIO
23866  *		EAGAIN
23867  *		EINTR
23868  */
23869 
23870 static int
23871 sd_check_media(dev_t dev, enum dkio_state state)
23872 {
23873 	struct sd_lun		*un = NULL;
23874 	enum dkio_state		prev_state;
23875 	opaque_t		token = NULL;
23876 	int			rval = 0;
23877 
23878 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23879 		return (ENXIO);
23880 	}
23881 
23882 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
23883 
23884 	mutex_enter(SD_MUTEX(un));
23885 
23886 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
23887 	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
23888 
23889 	prev_state = un->un_mediastate;
23890 
23891 	/* is there anything to do? */
23892 	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
23893 		/*
23894 		 * submit the request to the scsi_watch service;
23895 		 * scsi_media_watch_cb() does the real work
23896 		 */
23897 		mutex_exit(SD_MUTEX(un));
23898 
23899 		/*
23900 		 * This change handles the case where a scsi watch request is
23901 		 * added to a device that is powered down. To accomplish this
23902 		 * we power up the device before adding the scsi watch request,
23903 		 * since the scsi watch sends a TUR directly to the device
23904 		 * which the device cannot handle if it is powered down.
23905 		 */
23906 		if (sd_pm_entry(un) != DDI_SUCCESS) {
23907 			mutex_enter(SD_MUTEX(un));
23908 			goto done;
23909 		}
23910 
23911 		token = scsi_watch_request_submit(SD_SCSI_DEVP(un),
23912 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
23913 		    (caddr_t)dev);
23914 
23915 		sd_pm_exit(un);
23916 
23917 		mutex_enter(SD_MUTEX(un));
23918 		if (token == NULL) {
23919 			rval = EAGAIN;
23920 			goto done;
23921 		}
23922 
23923 		/*
23924 		 * This is a special case IOCTL that doesn't return
23925 		 * until the media state changes. Routine sdpower
23926 		 * knows about and handles this so don't count it
23927 		 * as an active cmd in the driver, which would
23928 		 * keep the device busy to the pm framework.
23929 		 * If the count isn't decremented the device can't
23930 		 * be powered down.
23931 		 */
23932 		un->un_ncmds_in_driver--;
23933 		ASSERT(un->un_ncmds_in_driver >= 0);
23934 
23935 		/*
23936 		 * if a prior request had been made, this will be the same
23937 		 * token, as scsi_watch was designed that way.
23938 		 */
23939 		un->un_swr_token = token;
23940 		un->un_specified_mediastate = state;
23941 
23942 		/*
23943 		 * now wait for media change
23944 		 * we will not be signalled unless mediastate == state but it is
23945 		 * still better to test for this condition, since there is a
23946 		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
23947 		 */
23948 		SD_TRACE(SD_LOG_COMMON, un,
23949 		    "sd_check_media: waiting for media state change\n");
23950 		while (un->un_mediastate == state) {
23951 			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
23952 				SD_TRACE(SD_LOG_COMMON, un,
23953 				    "sd_check_media: waiting for media state "
23954 				    "was interrupted\n");
23955 				un->un_ncmds_in_driver++;
23956 				rval = EINTR;
23957 				goto done;
23958 			}
23959 			SD_TRACE(SD_LOG_COMMON, un,
23960 			    "sd_check_media: received signal, state=%x\n",
23961 			    un->un_mediastate);
23962 		}
23963 		/*
23964 		 * Inc the counter to indicate the device once again
23965 		 * has an active outstanding cmd.
23966 		 */
23967 		un->un_ncmds_in_driver++;
23968 	}
23969 
23970 	/* invalidate geometry */
23971 	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
23972 		sr_ejected(un);
23973 	}
23974 
23975 	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
23976 		uint64_t	capacity;
23977 		uint_t		lbasize;
23978 
23979 		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
23980 		mutex_exit(SD_MUTEX(un));
23981 		/*
23982 		 * Since the following routines use SD_PATH_DIRECT, we must
23983 		 * call PM directly before the upcoming disk accesses. This
23984 		 * may cause the disk to be power/spin up.
23985 		 */
23986 
23987 		if (sd_pm_entry(un) == DDI_SUCCESS) {
23988 			rval = sd_send_scsi_READ_CAPACITY(un,
23989 			    &capacity,
23990 			    &lbasize, SD_PATH_DIRECT);
23991 			if (rval != 0) {
23992 				sd_pm_exit(un);
23993 				mutex_enter(SD_MUTEX(un));
23994 				goto done;
23995 			}
23996 		} else {
23997 			rval = EIO;
23998 			mutex_enter(SD_MUTEX(un));
23999 			goto done;
24000 		}
24001 		mutex_enter(SD_MUTEX(un));
24002 
24003 		sd_update_block_info(un, lbasize, capacity);
24004 
24005 		un->un_f_geometry_is_valid	= FALSE;
24006 		(void) sd_validate_geometry(un, SD_PATH_DIRECT);
24007 
24008 		mutex_exit(SD_MUTEX(un));
24009 		rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
24010 		    SD_PATH_DIRECT);
24011 		sd_pm_exit(un);
24012 
24013 		mutex_enter(SD_MUTEX(un));
24014 	}
24015 done:
24016 	un->un_f_watcht_stopped = FALSE;
24017 	if (un->un_swr_token) {
24018 		/*
24019 		 * Use of this local token and the mutex ensures that we avoid
24020 		 * some race conditions associated with terminating the
24021 		 * scsi watch.
24022 		 */
24023 		token = un->un_swr_token;
24024 		un->un_swr_token = (opaque_t)NULL;
24025 		mutex_exit(SD_MUTEX(un));
24026 		(void) scsi_watch_request_terminate(token,
24027 		    SCSI_WATCH_TERMINATE_WAIT);
24028 		mutex_enter(SD_MUTEX(un));
24029 	}
24030 
24031 	/*
24032 	 * Update the capacity kstat value, if no media previously
24033 	 * (capacity kstat is 0) and a media has been inserted
24034 	 * (un_f_blockcount_is_valid == TRUE)
24035 	 * This is a more generic way then checking for ISREMOVABLE.
24036 	 */
24037 	if (un->un_errstats) {
24038 		struct sd_errstats	*stp = NULL;
24039 
24040 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
24041 		if ((stp->sd_capacity.value.ui64 == 0) &&
24042 		    (un->un_f_blockcount_is_valid == TRUE)) {
24043 			stp->sd_capacity.value.ui64 =
24044 			    (uint64_t)((uint64_t)un->un_blockcount *
24045 			    un->un_sys_blocksize);
24046 		}
24047 	}
24048 	mutex_exit(SD_MUTEX(un));
24049 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
24050 	return (rval);
24051 }
24052 
24053 
24054 /*
24055  *    Function: sd_delayed_cv_broadcast
24056  *
24057  * Description: Delayed cv_broadcast to allow for target to recover from media
24058  *		insertion.
24059  *
24060  *   Arguments: arg - driver soft state (unit) structure
24061  */
24062 
24063 static void
24064 sd_delayed_cv_broadcast(void *arg)
24065 {
24066 	struct sd_lun *un = arg;
24067 
24068 	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
24069 
24070 	mutex_enter(SD_MUTEX(un));
24071 	un->un_dcvb_timeid = NULL;
24072 	cv_broadcast(&un->un_state_cv);
24073 	mutex_exit(SD_MUTEX(un));
24074 }
24075 
24076 
24077 /*
24078  *    Function: sd_media_watch_cb
24079  *
24080  * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
24081  *		routine processes the TUR sense data and updates the driver
24082  *		state if a transition has occurred. The user thread
24083  *		(sd_check_media) is then signalled.
24084  *
24085  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
24086  *			among multiple watches that share this callback function
24087  *		resultp - scsi watch facility result packet containing scsi
24088  *			  packet, status byte and sense data
24089  *
24090  * Return Code: 0 for success, -1 for failure
24091  */
24092 
24093 static int
24094 sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
24095 {
24096 	struct sd_lun			*un;
24097 	struct scsi_status		*statusp = resultp->statusp;
24098 	struct scsi_extended_sense	*sensep = resultp->sensep;
24099 	enum dkio_state			state = DKIO_NONE;
24100 	dev_t				dev = (dev_t)arg;
24101 	uchar_t				actual_sense_length;
24102 
24103 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24104 		return (-1);
24105 	}
24106 	actual_sense_length = resultp->actual_sense_length;
24107 
24108 	mutex_enter(SD_MUTEX(un));
24109 	SD_TRACE(SD_LOG_COMMON, un,
24110 	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
24111 	    *((char *)statusp), (void *)sensep, actual_sense_length);
24112 
24113 	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
24114 		un->un_mediastate = DKIO_DEV_GONE;
24115 		cv_broadcast(&un->un_state_cv);
24116 		mutex_exit(SD_MUTEX(un));
24117 
24118 		return (0);
24119 	}
24120 
24121 	/*
24122 	 * If there was a check condition then sensep points to valid sense data
24123 	 * If status was not a check condition but a reservation or busy status
24124 	 * then the new state is DKIO_NONE
24125 	 */
24126 	if (sensep != NULL) {
24127 		SD_INFO(SD_LOG_COMMON, un,
24128 		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
24129 		    sensep->es_key, sensep->es_add_code, sensep->es_qual_code);
24130 		/* This routine only uses up to 13 bytes of sense data. */
24131 		if (actual_sense_length >= 13) {
24132 			if (sensep->es_key == KEY_UNIT_ATTENTION) {
24133 				if (sensep->es_add_code == 0x28) {
24134 					state = DKIO_INSERTED;
24135 				}
24136 			} else {
24137 				/*
24138 				 * if 02/04/02  means that the host
24139 				 * should send start command. Explicitly
24140 				 * leave the media state as is
24141 				 * (inserted) as the media is inserted
24142 				 * and host has stopped device for PM
24143 				 * reasons. Upon next true read/write
24144 				 * to this media will bring the
24145 				 * device to the right state good for
24146 				 * media access.
24147 				 */
24148 				if ((sensep->es_key == KEY_NOT_READY) &&
24149 				    (sensep->es_add_code == 0x3a)) {
24150 					state = DKIO_EJECTED;
24151 				}
24152 
24153 				/*
24154 				 * If the drivge is busy with an operation
24155 				 * or long write, keep the media in an
24156 				 * inserted state.
24157 				 */
24158 
24159 				if ((sensep->es_key == KEY_NOT_READY) &&
24160 				    (sensep->es_add_code == 0x04) &&
24161 				    ((sensep->es_qual_code == 0x02) ||
24162 				    (sensep->es_qual_code == 0x07) ||
24163 				    (sensep->es_qual_code == 0x08))) {
24164 					state = DKIO_INSERTED;
24165 				}
24166 			}
24167 		}
24168 	} else if ((*((char *)statusp) == STATUS_GOOD) &&
24169 	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
24170 		state = DKIO_INSERTED;
24171 	}
24172 
24173 	SD_TRACE(SD_LOG_COMMON, un,
24174 	    "sd_media_watch_cb: state=%x, specified=%x\n",
24175 	    state, un->un_specified_mediastate);
24176 
24177 	/*
24178 	 * now signal the waiting thread if this is *not* the specified state;
24179 	 * delay the signal if the state is DKIO_INSERTED to allow the target
24180 	 * to recover
24181 	 */
24182 	if (state != un->un_specified_mediastate) {
24183 		un->un_mediastate = state;
24184 		if (state == DKIO_INSERTED) {
24185 			/*
24186 			 * delay the signal to give the drive a chance
24187 			 * to do what it apparently needs to do
24188 			 */
24189 			SD_TRACE(SD_LOG_COMMON, un,
24190 			    "sd_media_watch_cb: delayed cv_broadcast\n");
24191 			if (un->un_dcvb_timeid == NULL) {
24192 				un->un_dcvb_timeid =
24193 				    timeout(sd_delayed_cv_broadcast, un,
24194 				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
24195 			}
24196 		} else {
24197 			SD_TRACE(SD_LOG_COMMON, un,
24198 			    "sd_media_watch_cb: immediate cv_broadcast\n");
24199 			cv_broadcast(&un->un_state_cv);
24200 		}
24201 	}
24202 	mutex_exit(SD_MUTEX(un));
24203 	return (0);
24204 }
24205 
24206 
24207 /*
24208  *    Function: sd_dkio_get_temp
24209  *
24210  * Description: This routine is the driver entry point for handling ioctl
24211  *		requests to get the disk temperature.
24212  *
24213  *   Arguments: dev  - the device number
24214  *		arg  - pointer to user provided dk_temperature structure.
24215  *		flag - this argument is a pass through to ddi_copyxxx()
24216  *		       directly from the mode argument of ioctl().
24217  *
24218  * Return Code: 0
24219  *		EFAULT
24220  *		ENXIO
24221  *		EAGAIN
24222  */
24223 
24224 static int
24225 sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
24226 {
24227 	struct sd_lun		*un = NULL;
24228 	struct dk_temperature	*dktemp = NULL;
24229 	uchar_t			*temperature_page;
24230 	int			rval = 0;
24231 	int			path_flag = SD_PATH_STANDARD;
24232 
24233 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24234 		return (ENXIO);
24235 	}
24236 
24237 	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
24238 
24239 	/* copyin the disk temp argument to get the user flags */
24240 	if (ddi_copyin((void *)arg, dktemp,
24241 	    sizeof (struct dk_temperature), flag) != 0) {
24242 		rval = EFAULT;
24243 		goto done;
24244 	}
24245 
24246 	/* Initialize the temperature to invalid. */
24247 	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24248 	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24249 
24250 	/*
24251 	 * Note: Investigate removing the "bypass pm" semantic.
24252 	 * Can we just bypass PM always?
24253 	 */
24254 	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
24255 		path_flag = SD_PATH_DIRECT;
24256 		ASSERT(!mutex_owned(&un->un_pm_mutex));
24257 		mutex_enter(&un->un_pm_mutex);
24258 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
24259 			/*
24260 			 * If DKT_BYPASS_PM is set, and the drive happens to be
24261 			 * in low power mode, we can not wake it up, Need to
24262 			 * return EAGAIN.
24263 			 */
24264 			mutex_exit(&un->un_pm_mutex);
24265 			rval = EAGAIN;
24266 			goto done;
24267 		} else {
24268 			/*
24269 			 * Indicate to PM the device is busy. This is required
24270 			 * to avoid a race - i.e. the ioctl is issuing a
24271 			 * command and the pm framework brings down the device
24272 			 * to low power mode (possible power cut-off on some
24273 			 * platforms).
24274 			 */
24275 			mutex_exit(&un->un_pm_mutex);
24276 			if (sd_pm_entry(un) != DDI_SUCCESS) {
24277 				rval = EAGAIN;
24278 				goto done;
24279 			}
24280 		}
24281 	}
24282 
24283 	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
24284 
24285 	if ((rval = sd_send_scsi_LOG_SENSE(un, temperature_page,
24286 	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag)) != 0) {
24287 		goto done2;
24288 	}
24289 
24290 	/*
24291 	 * For the current temperature verify that the parameter length is 0x02
24292 	 * and the parameter code is 0x00
24293 	 */
24294 	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
24295 	    (temperature_page[5] == 0x00)) {
24296 		if (temperature_page[9] == 0xFF) {
24297 			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24298 		} else {
24299 			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
24300 		}
24301 	}
24302 
24303 	/*
24304 	 * For the reference temperature verify that the parameter
24305 	 * length is 0x02 and the parameter code is 0x01
24306 	 */
24307 	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
24308 	    (temperature_page[11] == 0x01)) {
24309 		if (temperature_page[15] == 0xFF) {
24310 			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24311 		} else {
24312 			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
24313 		}
24314 	}
24315 
24316 	/* Do the copyout regardless of the temperature commands status. */
24317 	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
24318 	    flag) != 0) {
24319 		rval = EFAULT;
24320 	}
24321 
24322 done2:
24323 	if (path_flag == SD_PATH_DIRECT) {
24324 		sd_pm_exit(un);
24325 	}
24326 
24327 	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
24328 done:
24329 	if (dktemp != NULL) {
24330 		kmem_free(dktemp, sizeof (struct dk_temperature));
24331 	}
24332 
24333 	return (rval);
24334 }
24335 
24336 
24337 /*
24338  *    Function: sd_log_page_supported
24339  *
24340  * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
24341  *		supported log pages.
24342  *
24343  *   Arguments: un -
24344  *		log_page -
24345  *
24346  * Return Code: -1 - on error (log sense is optional and may not be supported).
24347  *		0  - log page not found.
24348  *  		1  - log page found.
24349  */
24350 
24351 static int
24352 sd_log_page_supported(struct sd_lun *un, int log_page)
24353 {
24354 	uchar_t *log_page_data;
24355 	int	i;
24356 	int	match = 0;
24357 	int	log_size;
24358 
24359 	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
24360 
24361 	if (sd_send_scsi_LOG_SENSE(un, log_page_data, 0xFF, 0, 0x01, 0,
24362 	    SD_PATH_DIRECT) != 0) {
24363 		SD_ERROR(SD_LOG_COMMON, un,
24364 		    "sd_log_page_supported: failed log page retrieval\n");
24365 		kmem_free(log_page_data, 0xFF);
24366 		return (-1);
24367 	}
24368 	log_size = log_page_data[3];
24369 
24370 	/*
24371 	 * The list of supported log pages start from the fourth byte. Check
24372 	 * until we run out of log pages or a match is found.
24373 	 */
24374 	for (i = 4; (i < (log_size + 4)) && !match; i++) {
24375 		if (log_page_data[i] == log_page) {
24376 			match++;
24377 		}
24378 	}
24379 	kmem_free(log_page_data, 0xFF);
24380 	return (match);
24381 }
24382 
24383 
24384 /*
24385  *    Function: sd_mhdioc_failfast
24386  *
24387  * Description: This routine is the driver entry point for handling ioctl
24388  *		requests to enable/disable the multihost failfast option.
24389  *		(MHIOCENFAILFAST)
24390  *
24391  *   Arguments: dev	- the device number
24392  *		arg	- user specified probing interval.
24393  *		flag	- this argument is a pass through to ddi_copyxxx()
24394  *			  directly from the mode argument of ioctl().
24395  *
24396  * Return Code: 0
24397  *		EFAULT
24398  *		ENXIO
24399  */
24400 
24401 static int
24402 sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
24403 {
24404 	struct sd_lun	*un = NULL;
24405 	int		mh_time;
24406 	int		rval = 0;
24407 
24408 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24409 		return (ENXIO);
24410 	}
24411 
24412 	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
24413 		return (EFAULT);
24414 
24415 	if (mh_time) {
24416 		mutex_enter(SD_MUTEX(un));
24417 		un->un_resvd_status |= SD_FAILFAST;
24418 		mutex_exit(SD_MUTEX(un));
24419 		/*
24420 		 * If mh_time is INT_MAX, then this ioctl is being used for
24421 		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
24422 		 */
24423 		if (mh_time != INT_MAX) {
24424 			rval = sd_check_mhd(dev, mh_time);
24425 		}
24426 	} else {
24427 		(void) sd_check_mhd(dev, 0);
24428 		mutex_enter(SD_MUTEX(un));
24429 		un->un_resvd_status &= ~SD_FAILFAST;
24430 		mutex_exit(SD_MUTEX(un));
24431 	}
24432 	return (rval);
24433 }
24434 
24435 
24436 /*
24437  *    Function: sd_mhdioc_takeown
24438  *
24439  * Description: This routine is the driver entry point for handling ioctl
24440  *		requests to forcefully acquire exclusive access rights to the
24441  *		multihost disk (MHIOCTKOWN).
24442  *
24443  *   Arguments: dev	- the device number
24444  *		arg	- user provided structure specifying the delay
24445  *			  parameters in milliseconds
24446  *		flag	- this argument is a pass through to ddi_copyxxx()
24447  *			  directly from the mode argument of ioctl().
24448  *
24449  * Return Code: 0
24450  *		EFAULT
24451  *		ENXIO
24452  */
24453 
24454 static int
24455 sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
24456 {
24457 	struct sd_lun		*un = NULL;
24458 	struct mhioctkown	*tkown = NULL;
24459 	int			rval = 0;
24460 
24461 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24462 		return (ENXIO);
24463 	}
24464 
24465 	if (arg != NULL) {
24466 		tkown = (struct mhioctkown *)
24467 		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
24468 		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
24469 		if (rval != 0) {
24470 			rval = EFAULT;
24471 			goto error;
24472 		}
24473 	}
24474 
24475 	rval = sd_take_ownership(dev, tkown);
24476 	mutex_enter(SD_MUTEX(un));
24477 	if (rval == 0) {
24478 		un->un_resvd_status |= SD_RESERVE;
24479 		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
24480 			sd_reinstate_resv_delay =
24481 			    tkown->reinstate_resv_delay * 1000;
24482 		} else {
24483 			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
24484 		}
24485 		/*
24486 		 * Give the scsi_watch routine interval set by
24487 		 * the MHIOCENFAILFAST ioctl precedence here.
24488 		 */
24489 		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
24490 			mutex_exit(SD_MUTEX(un));
24491 			(void) sd_check_mhd(dev, sd_reinstate_resv_delay/1000);
24492 			SD_TRACE(SD_LOG_IOCTL_MHD, un,
24493 			    "sd_mhdioc_takeown : %d\n",
24494 			    sd_reinstate_resv_delay);
24495 		} else {
24496 			mutex_exit(SD_MUTEX(un));
24497 		}
24498 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
24499 		    sd_mhd_reset_notify_cb, (caddr_t)un);
24500 	} else {
24501 		un->un_resvd_status &= ~SD_RESERVE;
24502 		mutex_exit(SD_MUTEX(un));
24503 	}
24504 
24505 error:
24506 	if (tkown != NULL) {
24507 		kmem_free(tkown, sizeof (struct mhioctkown));
24508 	}
24509 	return (rval);
24510 }
24511 
24512 
24513 /*
24514  *    Function: sd_mhdioc_release
24515  *
24516  * Description: This routine is the driver entry point for handling ioctl
24517  *		requests to release exclusive access rights to the multihost
24518  *		disk (MHIOCRELEASE).
24519  *
24520  *   Arguments: dev	- the device number
24521  *
24522  * Return Code: 0
24523  *		ENXIO
24524  */
24525 
24526 static int
24527 sd_mhdioc_release(dev_t dev)
24528 {
24529 	struct sd_lun		*un = NULL;
24530 	timeout_id_t		resvd_timeid_save;
24531 	int			resvd_status_save;
24532 	int			rval = 0;
24533 
24534 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24535 		return (ENXIO);
24536 	}
24537 
24538 	mutex_enter(SD_MUTEX(un));
24539 	resvd_status_save = un->un_resvd_status;
24540 	un->un_resvd_status &=
24541 	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
24542 	if (un->un_resvd_timeid) {
24543 		resvd_timeid_save = un->un_resvd_timeid;
24544 		un->un_resvd_timeid = NULL;
24545 		mutex_exit(SD_MUTEX(un));
24546 		(void) untimeout(resvd_timeid_save);
24547 	} else {
24548 		mutex_exit(SD_MUTEX(un));
24549 	}
24550 
24551 	/*
24552 	 * destroy any pending timeout thread that may be attempting to
24553 	 * reinstate reservation on this device.
24554 	 */
24555 	sd_rmv_resv_reclaim_req(dev);
24556 
24557 	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
24558 		mutex_enter(SD_MUTEX(un));
24559 		if ((un->un_mhd_token) &&
24560 		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
24561 			mutex_exit(SD_MUTEX(un));
24562 			(void) sd_check_mhd(dev, 0);
24563 		} else {
24564 			mutex_exit(SD_MUTEX(un));
24565 		}
24566 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
24567 		    sd_mhd_reset_notify_cb, (caddr_t)un);
24568 	} else {
24569 		/*
24570 		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
24571 		 */
24572 		mutex_enter(SD_MUTEX(un));
24573 		un->un_resvd_status = resvd_status_save;
24574 		mutex_exit(SD_MUTEX(un));
24575 	}
24576 	return (rval);
24577 }
24578 
24579 
24580 /*
24581  *    Function: sd_mhdioc_register_devid
24582  *
24583  * Description: This routine is the driver entry point for handling ioctl
24584  *		requests to register the device id (MHIOCREREGISTERDEVID).
24585  *
24586  *		Note: The implementation for this ioctl has been updated to
24587  *		be consistent with the original PSARC case (1999/357)
24588  *		(4375899, 4241671, 4220005)
24589  *
24590  *   Arguments: dev	- the device number
24591  *
24592  * Return Code: 0
24593  *		ENXIO
24594  */
24595 
24596 static int
24597 sd_mhdioc_register_devid(dev_t dev)
24598 {
24599 	struct sd_lun	*un = NULL;
24600 	int		rval = 0;
24601 
24602 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24603 		return (ENXIO);
24604 	}
24605 
24606 	ASSERT(!mutex_owned(SD_MUTEX(un)));
24607 
24608 	mutex_enter(SD_MUTEX(un));
24609 
24610 	/* If a devid already exists, de-register it */
24611 	if (un->un_devid != NULL) {
24612 		ddi_devid_unregister(SD_DEVINFO(un));
24613 		/*
24614 		 * After unregister devid, needs to free devid memory
24615 		 */
24616 		ddi_devid_free(un->un_devid);
24617 		un->un_devid = NULL;
24618 	}
24619 
24620 	/* Check for reservation conflict */
24621 	mutex_exit(SD_MUTEX(un));
24622 	rval = sd_send_scsi_TEST_UNIT_READY(un, 0);
24623 	mutex_enter(SD_MUTEX(un));
24624 
24625 	switch (rval) {
24626 	case 0:
24627 		sd_register_devid(un, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
24628 		break;
24629 	case EACCES:
24630 		break;
24631 	default:
24632 		rval = EIO;
24633 	}
24634 
24635 	mutex_exit(SD_MUTEX(un));
24636 	return (rval);
24637 }
24638 
24639 
24640 /*
24641  *    Function: sd_mhdioc_inkeys
24642  *
24643  * Description: This routine is the driver entry point for handling ioctl
24644  *		requests to issue the SCSI-3 Persistent In Read Keys command
24645  *		to the device (MHIOCGRP_INKEYS).
24646  *
24647  *   Arguments: dev	- the device number
24648  *		arg	- user provided in_keys structure
24649  *		flag	- this argument is a pass through to ddi_copyxxx()
24650  *			  directly from the mode argument of ioctl().
24651  *
24652  * Return Code: code returned by sd_persistent_reservation_in_read_keys()
24653  *		ENXIO
24654  *		EFAULT
24655  */
24656 
24657 static int
24658 sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
24659 {
24660 	struct sd_lun		*un;
24661 	mhioc_inkeys_t		inkeys;
24662 	int			rval = 0;
24663 
24664 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24665 		return (ENXIO);
24666 	}
24667 
24668 #ifdef _MULTI_DATAMODEL
24669 	switch (ddi_model_convert_from(flag & FMODELS)) {
24670 	case DDI_MODEL_ILP32: {
24671 		struct mhioc_inkeys32	inkeys32;
24672 
24673 		if (ddi_copyin(arg, &inkeys32,
24674 		    sizeof (struct mhioc_inkeys32), flag) != 0) {
24675 			return (EFAULT);
24676 		}
24677 		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
24678 		if ((rval = sd_persistent_reservation_in_read_keys(un,
24679 		    &inkeys, flag)) != 0) {
24680 			return (rval);
24681 		}
24682 		inkeys32.generation = inkeys.generation;
24683 		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
24684 		    flag) != 0) {
24685 			return (EFAULT);
24686 		}
24687 		break;
24688 	}
24689 	case DDI_MODEL_NONE:
24690 		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
24691 		    flag) != 0) {
24692 			return (EFAULT);
24693 		}
24694 		if ((rval = sd_persistent_reservation_in_read_keys(un,
24695 		    &inkeys, flag)) != 0) {
24696 			return (rval);
24697 		}
24698 		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
24699 		    flag) != 0) {
24700 			return (EFAULT);
24701 		}
24702 		break;
24703 	}
24704 
24705 #else /* ! _MULTI_DATAMODEL */
24706 
24707 	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
24708 		return (EFAULT);
24709 	}
24710 	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
24711 	if (rval != 0) {
24712 		return (rval);
24713 	}
24714 	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
24715 		return (EFAULT);
24716 	}
24717 
24718 #endif /* _MULTI_DATAMODEL */
24719 
24720 	return (rval);
24721 }
24722 
24723 
24724 /*
24725  *    Function: sd_mhdioc_inresv
24726  *
24727  * Description: This routine is the driver entry point for handling ioctl
24728  *		requests to issue the SCSI-3 Persistent In Read Reservations
24729  *		command to the device (MHIOCGRP_INKEYS).
24730  *
24731  *   Arguments: dev	- the device number
24732  *		arg	- user provided in_resv structure
24733  *		flag	- this argument is a pass through to ddi_copyxxx()
24734  *			  directly from the mode argument of ioctl().
24735  *
24736  * Return Code: code returned by sd_persistent_reservation_in_read_resv()
24737  *		ENXIO
24738  *		EFAULT
24739  */
24740 
24741 static int
24742 sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
24743 {
24744 	struct sd_lun		*un;
24745 	mhioc_inresvs_t		inresvs;
24746 	int			rval = 0;
24747 
24748 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24749 		return (ENXIO);
24750 	}
24751 
24752 #ifdef _MULTI_DATAMODEL
24753 
24754 	switch (ddi_model_convert_from(flag & FMODELS)) {
24755 	case DDI_MODEL_ILP32: {
24756 		struct mhioc_inresvs32	inresvs32;
24757 
24758 		if (ddi_copyin(arg, &inresvs32,
24759 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
24760 			return (EFAULT);
24761 		}
24762 		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
24763 		if ((rval = sd_persistent_reservation_in_read_resv(un,
24764 		    &inresvs, flag)) != 0) {
24765 			return (rval);
24766 		}
24767 		inresvs32.generation = inresvs.generation;
24768 		if (ddi_copyout(&inresvs32, arg,
24769 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
24770 			return (EFAULT);
24771 		}
24772 		break;
24773 	}
24774 	case DDI_MODEL_NONE:
24775 		if (ddi_copyin(arg, &inresvs,
24776 		    sizeof (mhioc_inresvs_t), flag) != 0) {
24777 			return (EFAULT);
24778 		}
24779 		if ((rval = sd_persistent_reservation_in_read_resv(un,
24780 		    &inresvs, flag)) != 0) {
24781 			return (rval);
24782 		}
24783 		if (ddi_copyout(&inresvs, arg,
24784 		    sizeof (mhioc_inresvs_t), flag) != 0) {
24785 			return (EFAULT);
24786 		}
24787 		break;
24788 	}
24789 
24790 #else /* ! _MULTI_DATAMODEL */
24791 
24792 	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
24793 		return (EFAULT);
24794 	}
24795 	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
24796 	if (rval != 0) {
24797 		return (rval);
24798 	}
24799 	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
24800 		return (EFAULT);
24801 	}
24802 
24803 #endif /* ! _MULTI_DATAMODEL */
24804 
24805 	return (rval);
24806 }
24807 
24808 
24809 /*
24810  * The following routines support the clustering functionality described below
24811  * and implement lost reservation reclaim functionality.
24812  *
24813  * Clustering
24814  * ----------
24815  * The clustering code uses two different, independent forms of SCSI
24816  * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
24817  * Persistent Group Reservations. For any particular disk, it will use either
24818  * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
24819  *
24820  * SCSI-2
24821  * The cluster software takes ownership of a multi-hosted disk by issuing the
24822  * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
24823  * MHIOCRELEASE ioctl.Closely related is the MHIOCENFAILFAST ioctl -- a cluster,
24824  * just after taking ownership of the disk with the MHIOCTKOWN ioctl then issues
24825  * the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the driver. The
24826  * meaning of failfast is that if the driver (on this host) ever encounters the
24827  * scsi error return code RESERVATION_CONFLICT from the device, it should
24828  * immediately panic the host. The motivation for this ioctl is that if this
24829  * host does encounter reservation conflict, the underlying cause is that some
24830  * other host of the cluster has decided that this host is no longer in the
24831  * cluster and has seized control of the disks for itself. Since this host is no
24832  * longer in the cluster, it ought to panic itself. The MHIOCENFAILFAST ioctl
24833  * does two things:
24834  *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
24835  *      error to panic the host
24836  *      (b) it sets up a periodic timer to test whether this host still has
24837  *      "access" (in that no other host has reserved the device):  if the
24838  *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
24839  *      purpose of that periodic timer is to handle scenarios where the host is
24840  *      otherwise temporarily quiescent, temporarily doing no real i/o.
24841  * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
24842  * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
24843  * the device itself.
24844  *
24845  * SCSI-3 PGR
24846  * A direct semantic implementation of the SCSI-3 Persistent Reservation
24847  * facility is supported through the shared multihost disk ioctls
24848  * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
24849  * MHIOCGRP_PREEMPTANDABORT)
24850  *
24851  * Reservation Reclaim:
24852  * --------------------
24853  * To support the lost reservation reclaim operations this driver creates a
24854  * single thread to handle reinstating reservations on all devices that have
24855  * lost reservations sd_resv_reclaim_requests are logged for all devices that
24856  * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
24857  * and the reservation reclaim thread loops through the requests to regain the
24858  * lost reservations.
24859  */
24860 
24861 /*
24862  *    Function: sd_check_mhd()
24863  *
24864  * Description: This function sets up and submits a scsi watch request or
24865  *		terminates an existing watch request. This routine is used in
24866  *		support of reservation reclaim.
24867  *
24868  *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
24869  *			 among multiple watches that share the callback function
24870  *		interval - the number of microseconds specifying the watch
24871  *			   interval for issuing TEST UNIT READY commands. If
24872  *			   set to 0 the watch should be terminated. If the
24873  *			   interval is set to 0 and if the device is required
24874  *			   to hold reservation while disabling failfast, the
24875  *			   watch is restarted with an interval of
24876  *			   reinstate_resv_delay.
24877  *
24878  * Return Code: 0	   - Successful submit/terminate of scsi watch request
24879  *		ENXIO      - Indicates an invalid device was specified
24880  *		EAGAIN     - Unable to submit the scsi watch request
24881  */
24882 
24883 static int
24884 sd_check_mhd(dev_t dev, int interval)
24885 {
24886 	struct sd_lun	*un;
24887 	opaque_t	token;
24888 
24889 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24890 		return (ENXIO);
24891 	}
24892 
24893 	/* is this a watch termination request? */
24894 	if (interval == 0) {
24895 		mutex_enter(SD_MUTEX(un));
24896 		/* if there is an existing watch task then terminate it */
24897 		if (un->un_mhd_token) {
24898 			token = un->un_mhd_token;
24899 			un->un_mhd_token = NULL;
24900 			mutex_exit(SD_MUTEX(un));
24901 			(void) scsi_watch_request_terminate(token,
24902 			    SCSI_WATCH_TERMINATE_WAIT);
24903 			mutex_enter(SD_MUTEX(un));
24904 		} else {
24905 			mutex_exit(SD_MUTEX(un));
24906 			/*
24907 			 * Note: If we return here we don't check for the
24908 			 * failfast case. This is the original legacy
24909 			 * implementation but perhaps we should be checking
24910 			 * the failfast case.
24911 			 */
24912 			return (0);
24913 		}
24914 		/*
24915 		 * If the device is required to hold reservation while
24916 		 * disabling failfast, we need to restart the scsi_watch
24917 		 * routine with an interval of reinstate_resv_delay.
24918 		 */
24919 		if (un->un_resvd_status & SD_RESERVE) {
24920 			interval = sd_reinstate_resv_delay/1000;
24921 		} else {
24922 			/* no failfast so bail */
24923 			mutex_exit(SD_MUTEX(un));
24924 			return (0);
24925 		}
24926 		mutex_exit(SD_MUTEX(un));
24927 	}
24928 
24929 	/*
24930 	 * adjust minimum time interval to 1 second,
24931 	 * and convert from msecs to usecs
24932 	 */
24933 	if (interval > 0 && interval < 1000) {
24934 		interval = 1000;
24935 	}
24936 	interval *= 1000;
24937 
24938 	/*
24939 	 * submit the request to the scsi_watch service
24940 	 */
24941 	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
24942 	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
24943 	if (token == NULL) {
24944 		return (EAGAIN);
24945 	}
24946 
24947 	/*
24948 	 * save token for termination later on
24949 	 */
24950 	mutex_enter(SD_MUTEX(un));
24951 	un->un_mhd_token = token;
24952 	mutex_exit(SD_MUTEX(un));
24953 	return (0);
24954 }
24955 
24956 
24957 /*
24958  *    Function: sd_mhd_watch_cb()
24959  *
24960  * Description: This function is the call back function used by the scsi watch
24961  *		facility. The scsi watch facility sends the "Test Unit Ready"
24962  *		and processes the status. If applicable (i.e. a "Unit Attention"
24963  *		status and automatic "Request Sense" not used) the scsi watch
24964  *		facility will send a "Request Sense" and retrieve the sense data
24965  *		to be passed to this callback function. In either case the
24966  *		automatic "Request Sense" or the facility submitting one, this
24967  *		callback is passed the status and sense data.
24968  *
24969  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
24970  *			among multiple watches that share this callback function
24971  *		resultp - scsi watch facility result packet containing scsi
24972  *			  packet, status byte and sense data
24973  *
24974  * Return Code: 0 - continue the watch task
24975  *		non-zero - terminate the watch task
24976  */
24977 
24978 static int
24979 sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
24980 {
24981 	struct sd_lun			*un;
24982 	struct scsi_status		*statusp;
24983 	struct scsi_extended_sense	*sensep;
24984 	struct scsi_pkt			*pkt;
24985 	uchar_t				actual_sense_length;
24986 	dev_t  				dev = (dev_t)arg;
24987 
24988 	ASSERT(resultp != NULL);
24989 	statusp			= resultp->statusp;
24990 	sensep			= resultp->sensep;
24991 	pkt			= resultp->pkt;
24992 	actual_sense_length	= resultp->actual_sense_length;
24993 
24994 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24995 		return (ENXIO);
24996 	}
24997 
24998 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
24999 	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
25000 	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
25001 
25002 	/* Begin processing of the status and/or sense data */
25003 	if (pkt->pkt_reason != CMD_CMPLT) {
25004 		/* Handle the incomplete packet */
25005 		sd_mhd_watch_incomplete(un, pkt);
25006 		return (0);
25007 	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
25008 		if (*((unsigned char *)statusp)
25009 		    == STATUS_RESERVATION_CONFLICT) {
25010 			/*
25011 			 * Handle a reservation conflict by panicking if
25012 			 * configured for failfast or by logging the conflict
25013 			 * and updating the reservation status
25014 			 */
25015 			mutex_enter(SD_MUTEX(un));
25016 			if ((un->un_resvd_status & SD_FAILFAST) &&
25017 			    (sd_failfast_enable)) {
25018 				panic("Reservation Conflict");
25019 				/*NOTREACHED*/
25020 			}
25021 			SD_INFO(SD_LOG_IOCTL_MHD, un,
25022 			    "sd_mhd_watch_cb: Reservation Conflict\n");
25023 			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
25024 			mutex_exit(SD_MUTEX(un));
25025 		}
25026 	}
25027 
25028 	if (sensep != NULL) {
25029 		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
25030 			mutex_enter(SD_MUTEX(un));
25031 			if ((sensep->es_add_code == SD_SCSI_RESET_SENSE_CODE) &&
25032 			    (un->un_resvd_status & SD_RESERVE)) {
25033 				/*
25034 				 * The additional sense code indicates a power
25035 				 * on or bus device reset has occurred; update
25036 				 * the reservation status.
25037 				 */
25038 				un->un_resvd_status |=
25039 				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25040 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25041 				    "sd_mhd_watch_cb: Lost Reservation\n");
25042 			}
25043 		} else {
25044 			return (0);
25045 		}
25046 	} else {
25047 		mutex_enter(SD_MUTEX(un));
25048 	}
25049 
25050 	if ((un->un_resvd_status & SD_RESERVE) &&
25051 	    (un->un_resvd_status & SD_LOST_RESERVE)) {
25052 		if (un->un_resvd_status & SD_WANT_RESERVE) {
25053 			/*
25054 			 * A reset occurred in between the last probe and this
25055 			 * one so if a timeout is pending cancel it.
25056 			 */
25057 			if (un->un_resvd_timeid) {
25058 				timeout_id_t temp_id = un->un_resvd_timeid;
25059 				un->un_resvd_timeid = NULL;
25060 				mutex_exit(SD_MUTEX(un));
25061 				(void) untimeout(temp_id);
25062 				mutex_enter(SD_MUTEX(un));
25063 			}
25064 			un->un_resvd_status &= ~SD_WANT_RESERVE;
25065 		}
25066 		if (un->un_resvd_timeid == 0) {
25067 			/* Schedule a timeout to handle the lost reservation */
25068 			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
25069 			    (void *)dev,
25070 			    drv_usectohz(sd_reinstate_resv_delay));
25071 		}
25072 	}
25073 	mutex_exit(SD_MUTEX(un));
25074 	return (0);
25075 }
25076 
25077 
25078 /*
25079  *    Function: sd_mhd_watch_incomplete()
25080  *
25081  * Description: This function is used to find out why a scsi pkt sent by the
25082  *		scsi watch facility was not completed. Under some scenarios this
25083  *		routine will return. Otherwise it will send a bus reset to see
25084  *		if the drive is still online.
25085  *
25086  *   Arguments: un  - driver soft state (unit) structure
25087  *		pkt - incomplete scsi pkt
25088  */
25089 
25090 static void
25091 sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
25092 {
25093 	int	be_chatty;
25094 	int	perr;
25095 
25096 	ASSERT(pkt != NULL);
25097 	ASSERT(un != NULL);
25098 	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
25099 	perr		= (pkt->pkt_statistics & STAT_PERR);
25100 
25101 	mutex_enter(SD_MUTEX(un));
25102 	if (un->un_state == SD_STATE_DUMPING) {
25103 		mutex_exit(SD_MUTEX(un));
25104 		return;
25105 	}
25106 
25107 	switch (pkt->pkt_reason) {
25108 	case CMD_UNX_BUS_FREE:
25109 		/*
25110 		 * If we had a parity error that caused the target to drop BSY*,
25111 		 * don't be chatty about it.
25112 		 */
25113 		if (perr && be_chatty) {
25114 			be_chatty = 0;
25115 		}
25116 		break;
25117 	case CMD_TAG_REJECT:
25118 		/*
25119 		 * The SCSI-2 spec states that a tag reject will be sent by the
25120 		 * target if tagged queuing is not supported. A tag reject may
25121 		 * also be sent during certain initialization periods or to
25122 		 * control internal resources. For the latter case the target
25123 		 * may also return Queue Full.
25124 		 *
25125 		 * If this driver receives a tag reject from a target that is
25126 		 * going through an init period or controlling internal
25127 		 * resources tagged queuing will be disabled. This is a less
25128 		 * than optimal behavior but the driver is unable to determine
25129 		 * the target state and assumes tagged queueing is not supported
25130 		 */
25131 		pkt->pkt_flags = 0;
25132 		un->un_tagflags = 0;
25133 
25134 		if (un->un_f_opt_queueing == TRUE) {
25135 			un->un_throttle = min(un->un_throttle, 3);
25136 		} else {
25137 			un->un_throttle = 1;
25138 		}
25139 		mutex_exit(SD_MUTEX(un));
25140 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
25141 		mutex_enter(SD_MUTEX(un));
25142 		break;
25143 	case CMD_INCOMPLETE:
25144 		/*
25145 		 * The transport stopped with an abnormal state, fallthrough and
25146 		 * reset the target and/or bus unless selection did not complete
25147 		 * (indicated by STATE_GOT_BUS) in which case we don't want to
25148 		 * go through a target/bus reset
25149 		 */
25150 		if (pkt->pkt_state == STATE_GOT_BUS) {
25151 			break;
25152 		}
25153 		/*FALLTHROUGH*/
25154 
25155 	case CMD_TIMEOUT:
25156 	default:
25157 		/*
25158 		 * The lun may still be running the command, so a lun reset
25159 		 * should be attempted. If the lun reset fails or cannot be
25160 		 * issued, than try a target reset. Lastly try a bus reset.
25161 		 */
25162 		if ((pkt->pkt_statistics &
25163 		    (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
25164 			int reset_retval = 0;
25165 			mutex_exit(SD_MUTEX(un));
25166 			if (un->un_f_allow_bus_device_reset == TRUE) {
25167 				if (un->un_f_lun_reset_enabled == TRUE) {
25168 					reset_retval =
25169 					    scsi_reset(SD_ADDRESS(un),
25170 					    RESET_LUN);
25171 				}
25172 				if (reset_retval == 0) {
25173 					reset_retval =
25174 					    scsi_reset(SD_ADDRESS(un),
25175 					    RESET_TARGET);
25176 				}
25177 			}
25178 			if (reset_retval == 0) {
25179 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
25180 			}
25181 			mutex_enter(SD_MUTEX(un));
25182 		}
25183 		break;
25184 	}
25185 
25186 	/* A device/bus reset has occurred; update the reservation status. */
25187 	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
25188 	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
25189 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25190 			un->un_resvd_status |=
25191 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25192 			SD_INFO(SD_LOG_IOCTL_MHD, un,
25193 			    "sd_mhd_watch_incomplete: Lost Reservation\n");
25194 		}
25195 	}
25196 
25197 	/*
25198 	 * The disk has been turned off; Update the device state.
25199 	 *
25200 	 * Note: Should we be offlining the disk here?
25201 	 */
25202 	if (pkt->pkt_state == STATE_GOT_BUS) {
25203 		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
25204 		    "Disk not responding to selection\n");
25205 		if (un->un_state != SD_STATE_OFFLINE) {
25206 			New_state(un, SD_STATE_OFFLINE);
25207 		}
25208 	} else if (be_chatty) {
25209 		/*
25210 		 * suppress messages if they are all the same pkt reason;
25211 		 * with TQ, many (up to 256) are returned with the same
25212 		 * pkt_reason
25213 		 */
25214 		if (pkt->pkt_reason != un->un_last_pkt_reason) {
25215 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25216 			    "sd_mhd_watch_incomplete: "
25217 			    "SCSI transport failed: reason '%s'\n",
25218 			    scsi_rname(pkt->pkt_reason));
25219 		}
25220 	}
25221 	un->un_last_pkt_reason = pkt->pkt_reason;
25222 	mutex_exit(SD_MUTEX(un));
25223 }
25224 
25225 
25226 /*
25227  *    Function: sd_sname()
25228  *
25229  * Description: This is a simple little routine to return a string containing
25230  *		a printable description of command status byte for use in
25231  *		logging.
25232  *
25233  *   Arguments: status - pointer to a status byte
25234  *
25235  * Return Code: char * - string containing status description.
25236  */
25237 
25238 static char *
25239 sd_sname(uchar_t status)
25240 {
25241 	switch (status & STATUS_MASK) {
25242 	case STATUS_GOOD:
25243 		return ("good status");
25244 	case STATUS_CHECK:
25245 		return ("check condition");
25246 	case STATUS_MET:
25247 		return ("condition met");
25248 	case STATUS_BUSY:
25249 		return ("busy");
25250 	case STATUS_INTERMEDIATE:
25251 		return ("intermediate");
25252 	case STATUS_INTERMEDIATE_MET:
25253 		return ("intermediate - condition met");
25254 	case STATUS_RESERVATION_CONFLICT:
25255 		return ("reservation_conflict");
25256 	case STATUS_TERMINATED:
25257 		return ("command terminated");
25258 	case STATUS_QFULL:
25259 		return ("queue full");
25260 	default:
25261 		return ("<unknown status>");
25262 	}
25263 }
25264 
25265 
25266 /*
25267  *    Function: sd_mhd_resvd_recover()
25268  *
25269  * Description: This function adds a reservation entry to the
25270  *		sd_resv_reclaim_request list and signals the reservation
25271  *		reclaim thread that there is work pending. If the reservation
25272  *		reclaim thread has not been previously created this function
25273  *		will kick it off.
25274  *
25275  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25276  *			among multiple watches that share this callback function
25277  *
25278  *     Context: This routine is called by timeout() and is run in interrupt
25279  *		context. It must not sleep or call other functions which may
25280  *		sleep.
25281  */
25282 
25283 static void
25284 sd_mhd_resvd_recover(void *arg)
25285 {
25286 	dev_t			dev = (dev_t)arg;
25287 	struct sd_lun		*un;
25288 	struct sd_thr_request	*sd_treq = NULL;
25289 	struct sd_thr_request	*sd_cur = NULL;
25290 	struct sd_thr_request	*sd_prev = NULL;
25291 	int			already_there = 0;
25292 
25293 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25294 		return;
25295 	}
25296 
25297 	mutex_enter(SD_MUTEX(un));
25298 	un->un_resvd_timeid = NULL;
25299 	if (un->un_resvd_status & SD_WANT_RESERVE) {
25300 		/*
25301 		 * There was a reset so don't issue the reserve, allow the
25302 		 * sd_mhd_watch_cb callback function to notice this and
25303 		 * reschedule the timeout for reservation.
25304 		 */
25305 		mutex_exit(SD_MUTEX(un));
25306 		return;
25307 	}
25308 	mutex_exit(SD_MUTEX(un));
25309 
25310 	/*
25311 	 * Add this device to the sd_resv_reclaim_request list and the
25312 	 * sd_resv_reclaim_thread should take care of the rest.
25313 	 *
25314 	 * Note: We can't sleep in this context so if the memory allocation
25315 	 * fails allow the sd_mhd_watch_cb callback function to notice this and
25316 	 * reschedule the timeout for reservation.  (4378460)
25317 	 */
25318 	sd_treq = (struct sd_thr_request *)
25319 	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
25320 	if (sd_treq == NULL) {
25321 		return;
25322 	}
25323 
25324 	sd_treq->sd_thr_req_next = NULL;
25325 	sd_treq->dev = dev;
25326 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25327 	if (sd_tr.srq_thr_req_head == NULL) {
25328 		sd_tr.srq_thr_req_head = sd_treq;
25329 	} else {
25330 		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
25331 		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
25332 			if (sd_cur->dev == dev) {
25333 				/*
25334 				 * already in Queue so don't log
25335 				 * another request for the device
25336 				 */
25337 				already_there = 1;
25338 				break;
25339 			}
25340 			sd_prev = sd_cur;
25341 		}
25342 		if (!already_there) {
25343 			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
25344 			    "logging request for %lx\n", dev);
25345 			sd_prev->sd_thr_req_next = sd_treq;
25346 		} else {
25347 			kmem_free(sd_treq, sizeof (struct sd_thr_request));
25348 		}
25349 	}
25350 
25351 	/*
25352 	 * Create a kernel thread to do the reservation reclaim and free up this
25353 	 * thread. We cannot block this thread while we go away to do the
25354 	 * reservation reclaim
25355 	 */
25356 	if (sd_tr.srq_resv_reclaim_thread == NULL)
25357 		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
25358 		    sd_resv_reclaim_thread, NULL,
25359 		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
25360 
25361 	/* Tell the reservation reclaim thread that it has work to do */
25362 	cv_signal(&sd_tr.srq_resv_reclaim_cv);
25363 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25364 }
25365 
25366 /*
25367  *    Function: sd_resv_reclaim_thread()
25368  *
25369  * Description: This function implements the reservation reclaim operations
25370  *
25371  *   Arguments: arg - the device 'dev_t' is used for context to discriminate
25372  *		      among multiple watches that share this callback function
25373  */
25374 
25375 static void
25376 sd_resv_reclaim_thread()
25377 {
25378 	struct sd_lun		*un;
25379 	struct sd_thr_request	*sd_mhreq;
25380 
25381 	/* Wait for work */
25382 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25383 	if (sd_tr.srq_thr_req_head == NULL) {
25384 		cv_wait(&sd_tr.srq_resv_reclaim_cv,
25385 		    &sd_tr.srq_resv_reclaim_mutex);
25386 	}
25387 
25388 	/* Loop while we have work */
25389 	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
25390 		un = ddi_get_soft_state(sd_state,
25391 		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
25392 		if (un == NULL) {
25393 			/*
25394 			 * softstate structure is NULL so just
25395 			 * dequeue the request and continue
25396 			 */
25397 			sd_tr.srq_thr_req_head =
25398 			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25399 			kmem_free(sd_tr.srq_thr_cur_req,
25400 			    sizeof (struct sd_thr_request));
25401 			continue;
25402 		}
25403 
25404 		/* dequeue the request */
25405 		sd_mhreq = sd_tr.srq_thr_cur_req;
25406 		sd_tr.srq_thr_req_head =
25407 		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25408 		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25409 
25410 		/*
25411 		 * Reclaim reservation only if SD_RESERVE is still set. There
25412 		 * may have been a call to MHIOCRELEASE before we got here.
25413 		 */
25414 		mutex_enter(SD_MUTEX(un));
25415 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25416 			/*
25417 			 * Note: The SD_LOST_RESERVE flag is cleared before
25418 			 * reclaiming the reservation. If this is done after the
25419 			 * call to sd_reserve_release a reservation loss in the
25420 			 * window between pkt completion of reserve cmd and
25421 			 * mutex_enter below may not be recognized
25422 			 */
25423 			un->un_resvd_status &= ~SD_LOST_RESERVE;
25424 			mutex_exit(SD_MUTEX(un));
25425 
25426 			if (sd_reserve_release(sd_mhreq->dev,
25427 			    SD_RESERVE) == 0) {
25428 				mutex_enter(SD_MUTEX(un));
25429 				un->un_resvd_status |= SD_RESERVE;
25430 				mutex_exit(SD_MUTEX(un));
25431 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25432 				    "sd_resv_reclaim_thread: "
25433 				    "Reservation Recovered\n");
25434 			} else {
25435 				mutex_enter(SD_MUTEX(un));
25436 				un->un_resvd_status |= SD_LOST_RESERVE;
25437 				mutex_exit(SD_MUTEX(un));
25438 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25439 				    "sd_resv_reclaim_thread: Failed "
25440 				    "Reservation Recovery\n");
25441 			}
25442 		} else {
25443 			mutex_exit(SD_MUTEX(un));
25444 		}
25445 		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25446 		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
25447 		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25448 		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
25449 		/*
25450 		 * wakeup the destroy thread if anyone is waiting on
25451 		 * us to complete.
25452 		 */
25453 		cv_signal(&sd_tr.srq_inprocess_cv);
25454 		SD_TRACE(SD_LOG_IOCTL_MHD, un,
25455 		    "sd_resv_reclaim_thread: cv_signalling current request \n");
25456 	}
25457 
25458 	/*
25459 	 * cleanup the sd_tr structure now that this thread will not exist
25460 	 */
25461 	ASSERT(sd_tr.srq_thr_req_head == NULL);
25462 	ASSERT(sd_tr.srq_thr_cur_req == NULL);
25463 	sd_tr.srq_resv_reclaim_thread = NULL;
25464 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25465 	thread_exit();
25466 }
25467 
25468 
25469 /*
25470  *    Function: sd_rmv_resv_reclaim_req()
25471  *
25472  * Description: This function removes any pending reservation reclaim requests
25473  *		for the specified device.
25474  *
25475  *   Arguments: dev - the device 'dev_t'
25476  */
25477 
25478 static void
25479 sd_rmv_resv_reclaim_req(dev_t dev)
25480 {
25481 	struct sd_thr_request *sd_mhreq;
25482 	struct sd_thr_request *sd_prev;
25483 
25484 	/* Remove a reservation reclaim request from the list */
25485 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25486 	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
25487 		/*
25488 		 * We are attempting to reinstate reservation for
25489 		 * this device. We wait for sd_reserve_release()
25490 		 * to return before we return.
25491 		 */
25492 		cv_wait(&sd_tr.srq_inprocess_cv,
25493 		    &sd_tr.srq_resv_reclaim_mutex);
25494 	} else {
25495 		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
25496 		if (sd_mhreq && sd_mhreq->dev == dev) {
25497 			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
25498 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25499 			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25500 			return;
25501 		}
25502 		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
25503 			if (sd_mhreq && sd_mhreq->dev == dev) {
25504 				break;
25505 			}
25506 			sd_prev = sd_mhreq;
25507 		}
25508 		if (sd_mhreq != NULL) {
25509 			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
25510 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25511 		}
25512 	}
25513 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25514 }
25515 
25516 
25517 /*
25518  *    Function: sd_mhd_reset_notify_cb()
25519  *
25520  * Description: This is a call back function for scsi_reset_notify. This
25521  *		function updates the softstate reserved status and logs the
25522  *		reset. The driver scsi watch facility callback function
25523  *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
25524  *		will reclaim the reservation.
25525  *
25526  *   Arguments: arg  - driver soft state (unit) structure
25527  */
25528 
25529 static void
25530 sd_mhd_reset_notify_cb(caddr_t arg)
25531 {
25532 	struct sd_lun *un = (struct sd_lun *)arg;
25533 
25534 	mutex_enter(SD_MUTEX(un));
25535 	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25536 		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
25537 		SD_INFO(SD_LOG_IOCTL_MHD, un,
25538 		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
25539 	}
25540 	mutex_exit(SD_MUTEX(un));
25541 }
25542 
25543 
25544 /*
25545  *    Function: sd_take_ownership()
25546  *
25547  * Description: This routine implements an algorithm to achieve a stable
25548  *		reservation on disks which don't implement priority reserve,
25549  *		and makes sure that other host lose re-reservation attempts.
25550  *		This algorithm contains of a loop that keeps issuing the RESERVE
25551  *		for some period of time (min_ownership_delay, default 6 seconds)
25552  *		During that loop, it looks to see if there has been a bus device
25553  *		reset or bus reset (both of which cause an existing reservation
25554  *		to be lost). If the reservation is lost issue RESERVE until a
25555  *		period of min_ownership_delay with no resets has gone by, or
25556  *		until max_ownership_delay has expired. This loop ensures that
25557  *		the host really did manage to reserve the device, in spite of
25558  *		resets. The looping for min_ownership_delay (default six
25559  *		seconds) is important to early generation clustering products,
25560  *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
25561  *		MHIOCENFAILFAST periodic timer of two seconds. By having
25562  *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
25563  *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
25564  *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
25565  *		have already noticed, via the MHIOCENFAILFAST polling, that it
25566  *		no longer "owns" the disk and will have panicked itself.  Thus,
25567  *		the host issuing the MHIOCTKOWN is assured (with timing
25568  *		dependencies) that by the time it actually starts to use the
25569  *		disk for real work, the old owner is no longer accessing it.
25570  *
25571  *		min_ownership_delay is the minimum amount of time for which the
25572  *		disk must be reserved continuously devoid of resets before the
25573  *		MHIOCTKOWN ioctl will return success.
25574  *
25575  *		max_ownership_delay indicates the amount of time by which the
25576  *		take ownership should succeed or timeout with an error.
25577  *
25578  *   Arguments: dev - the device 'dev_t'
25579  *		*p  - struct containing timing info.
25580  *
25581  * Return Code: 0 for success or error code
25582  */
25583 
25584 static int
25585 sd_take_ownership(dev_t dev, struct mhioctkown *p)
25586 {
25587 	struct sd_lun	*un;
25588 	int		rval;
25589 	int		err;
25590 	int		reservation_count   = 0;
25591 	int		min_ownership_delay =  6000000; /* in usec */
25592 	int		max_ownership_delay = 30000000; /* in usec */
25593 	clock_t		start_time;	/* starting time of this algorithm */
25594 	clock_t		end_time;	/* time limit for giving up */
25595 	clock_t		ownership_time;	/* time limit for stable ownership */
25596 	clock_t		current_time;
25597 	clock_t		previous_current_time;
25598 
25599 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25600 		return (ENXIO);
25601 	}
25602 
25603 	/*
25604 	 * Attempt a device reservation. A priority reservation is requested.
25605 	 */
25606 	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
25607 	    != SD_SUCCESS) {
25608 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
25609 		    "sd_take_ownership: return(1)=%d\n", rval);
25610 		return (rval);
25611 	}
25612 
25613 	/* Update the softstate reserved status to indicate the reservation */
25614 	mutex_enter(SD_MUTEX(un));
25615 	un->un_resvd_status |= SD_RESERVE;
25616 	un->un_resvd_status &=
25617 	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
25618 	mutex_exit(SD_MUTEX(un));
25619 
25620 	if (p != NULL) {
25621 		if (p->min_ownership_delay != 0) {
25622 			min_ownership_delay = p->min_ownership_delay * 1000;
25623 		}
25624 		if (p->max_ownership_delay != 0) {
25625 			max_ownership_delay = p->max_ownership_delay * 1000;
25626 		}
25627 	}
25628 	SD_INFO(SD_LOG_IOCTL_MHD, un,
25629 	    "sd_take_ownership: min, max delays: %d, %d\n",
25630 	    min_ownership_delay, max_ownership_delay);
25631 
25632 	start_time = ddi_get_lbolt();
25633 	current_time	= start_time;
25634 	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
25635 	end_time	= start_time + drv_usectohz(max_ownership_delay);
25636 
25637 	while (current_time - end_time < 0) {
25638 		delay(drv_usectohz(500000));
25639 
25640 		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
25641 			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
25642 				mutex_enter(SD_MUTEX(un));
25643 				rval = (un->un_resvd_status &
25644 				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
25645 				mutex_exit(SD_MUTEX(un));
25646 				break;
25647 			}
25648 		}
25649 		previous_current_time = current_time;
25650 		current_time = ddi_get_lbolt();
25651 		mutex_enter(SD_MUTEX(un));
25652 		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
25653 			ownership_time = ddi_get_lbolt() +
25654 			    drv_usectohz(min_ownership_delay);
25655 			reservation_count = 0;
25656 		} else {
25657 			reservation_count++;
25658 		}
25659 		un->un_resvd_status |= SD_RESERVE;
25660 		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
25661 		mutex_exit(SD_MUTEX(un));
25662 
25663 		SD_INFO(SD_LOG_IOCTL_MHD, un,
25664 		    "sd_take_ownership: ticks for loop iteration=%ld, "
25665 		    "reservation=%s\n", (current_time - previous_current_time),
25666 		    reservation_count ? "ok" : "reclaimed");
25667 
25668 		if (current_time - ownership_time >= 0 &&
25669 		    reservation_count >= 4) {
25670 			rval = 0; /* Achieved a stable ownership */
25671 			break;
25672 		}
25673 		if (current_time - end_time >= 0) {
25674 			rval = EACCES; /* No ownership in max possible time */
25675 			break;
25676 		}
25677 	}
25678 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
25679 	    "sd_take_ownership: return(2)=%d\n", rval);
25680 	return (rval);
25681 }
25682 
25683 
25684 /*
25685  *    Function: sd_reserve_release()
25686  *
25687  * Description: This function builds and sends scsi RESERVE, RELEASE, and
25688  *		PRIORITY RESERVE commands based on a user specified command type
25689  *
25690  *   Arguments: dev - the device 'dev_t'
25691  *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
25692  *		      SD_RESERVE, SD_RELEASE
25693  *
25694  * Return Code: 0 or Error Code
25695  */
25696 
25697 static int
25698 sd_reserve_release(dev_t dev, int cmd)
25699 {
25700 	struct uscsi_cmd	*com = NULL;
25701 	struct sd_lun		*un = NULL;
25702 	char			cdb[CDB_GROUP0];
25703 	int			rval;
25704 
25705 	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
25706 	    (cmd == SD_PRIORITY_RESERVE));
25707 
25708 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25709 		return (ENXIO);
25710 	}
25711 
25712 	/* instantiate and initialize the command and cdb */
25713 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25714 	bzero(cdb, CDB_GROUP0);
25715 	com->uscsi_flags   = USCSI_SILENT;
25716 	com->uscsi_timeout = un->un_reserve_release_time;
25717 	com->uscsi_cdblen  = CDB_GROUP0;
25718 	com->uscsi_cdb	   = cdb;
25719 	if (cmd == SD_RELEASE) {
25720 		cdb[0] = SCMD_RELEASE;
25721 	} else {
25722 		cdb[0] = SCMD_RESERVE;
25723 	}
25724 
25725 	/* Send the command. */
25726 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
25727 	    UIO_SYSSPACE, SD_PATH_STANDARD);
25728 
25729 	/*
25730 	 * "break" a reservation that is held by another host, by issuing a
25731 	 * reset if priority reserve is desired, and we could not get the
25732 	 * device.
25733 	 */
25734 	if ((cmd == SD_PRIORITY_RESERVE) &&
25735 	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
25736 		/*
25737 		 * First try to reset the LUN. If we cannot, then try a target
25738 		 * reset, followed by a bus reset if the target reset fails.
25739 		 */
25740 		int reset_retval = 0;
25741 		if (un->un_f_lun_reset_enabled == TRUE) {
25742 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
25743 		}
25744 		if (reset_retval == 0) {
25745 			/* The LUN reset either failed or was not issued */
25746 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
25747 		}
25748 		if ((reset_retval == 0) &&
25749 		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
25750 			rval = EIO;
25751 			kmem_free(com, sizeof (*com));
25752 			return (rval);
25753 		}
25754 
25755 		bzero(com, sizeof (struct uscsi_cmd));
25756 		com->uscsi_flags   = USCSI_SILENT;
25757 		com->uscsi_cdb	   = cdb;
25758 		com->uscsi_cdblen  = CDB_GROUP0;
25759 		com->uscsi_timeout = 5;
25760 
25761 		/*
25762 		 * Reissue the last reserve command, this time without request
25763 		 * sense.  Assume that it is just a regular reserve command.
25764 		 */
25765 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
25766 		    UIO_SYSSPACE, SD_PATH_STANDARD);
25767 	}
25768 
25769 	/* Return an error if still getting a reservation conflict. */
25770 	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
25771 		rval = EACCES;
25772 	}
25773 
25774 	kmem_free(com, sizeof (*com));
25775 	return (rval);
25776 }
25777 
25778 
25779 #define	SD_NDUMP_RETRIES	12
25780 /*
25781  *	System Crash Dump routine
25782  */
25783 
25784 static int
25785 sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
25786 {
25787 	int		instance;
25788 	int		partition;
25789 	int		i;
25790 	int		err;
25791 	struct sd_lun	*un;
25792 	struct dk_map	*lp;
25793 	struct scsi_pkt *wr_pktp;
25794 	struct buf	*wr_bp;
25795 	struct buf	wr_buf;
25796 	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
25797 	daddr_t		tgt_blkno;	/* rmw - blkno for target */
25798 	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
25799 	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
25800 	size_t		io_start_offset;
25801 	int		doing_rmw = FALSE;
25802 	int		rval;
25803 #if defined(__i386) || defined(__amd64)
25804 	ssize_t dma_resid;
25805 	daddr_t oblkno;
25806 #endif
25807 
25808 	instance = SDUNIT(dev);
25809 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
25810 	    (!un->un_f_geometry_is_valid) || ISCD(un)) {
25811 		return (ENXIO);
25812 	}
25813 
25814 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
25815 
25816 	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
25817 
25818 	partition = SDPART(dev);
25819 	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
25820 
25821 	/* Validate blocks to dump at against partition size. */
25822 	lp = &un->un_map[partition];
25823 	if ((blkno + nblk) > lp->dkl_nblk) {
25824 		SD_TRACE(SD_LOG_DUMP, un,
25825 		    "sddump: dump range larger than partition: "
25826 		    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
25827 		    blkno, nblk, lp->dkl_nblk);
25828 		return (EINVAL);
25829 	}
25830 
25831 	mutex_enter(&un->un_pm_mutex);
25832 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
25833 		struct scsi_pkt *start_pktp;
25834 
25835 		mutex_exit(&un->un_pm_mutex);
25836 
25837 		/*
25838 		 * use pm framework to power on HBA 1st
25839 		 */
25840 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
25841 
25842 		/*
25843 		 * Dump no long uses sdpower to power on a device, it's
25844 		 * in-line here so it can be done in polled mode.
25845 		 */
25846 
25847 		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
25848 
25849 		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
25850 		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
25851 
25852 		if (start_pktp == NULL) {
25853 			/* We were not given a SCSI packet, fail. */
25854 			return (EIO);
25855 		}
25856 		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
25857 		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
25858 		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
25859 		start_pktp->pkt_flags = FLAG_NOINTR;
25860 
25861 		mutex_enter(SD_MUTEX(un));
25862 		SD_FILL_SCSI1_LUN(un, start_pktp);
25863 		mutex_exit(SD_MUTEX(un));
25864 		/*
25865 		 * Scsi_poll returns 0 (success) if the command completes and
25866 		 * the status block is STATUS_GOOD.
25867 		 */
25868 		if (sd_scsi_poll(un, start_pktp) != 0) {
25869 			scsi_destroy_pkt(start_pktp);
25870 			return (EIO);
25871 		}
25872 		scsi_destroy_pkt(start_pktp);
25873 		(void) sd_ddi_pm_resume(un);
25874 	} else {
25875 		mutex_exit(&un->un_pm_mutex);
25876 	}
25877 
25878 	mutex_enter(SD_MUTEX(un));
25879 	un->un_throttle = 0;
25880 
25881 	/*
25882 	 * The first time through, reset the specific target device.
25883 	 * However, when cpr calls sddump we know that sd is in a
25884 	 * a good state so no bus reset is required.
25885 	 * Clear sense data via Request Sense cmd.
25886 	 * In sddump we don't care about allow_bus_device_reset anymore
25887 	 */
25888 
25889 	if ((un->un_state != SD_STATE_SUSPENDED) &&
25890 	    (un->un_state != SD_STATE_DUMPING)) {
25891 
25892 		New_state(un, SD_STATE_DUMPING);
25893 
25894 		if (un->un_f_is_fibre == FALSE) {
25895 			mutex_exit(SD_MUTEX(un));
25896 			/*
25897 			 * Attempt a bus reset for parallel scsi.
25898 			 *
25899 			 * Note: A bus reset is required because on some host
25900 			 * systems (i.e. E420R) a bus device reset is
25901 			 * insufficient to reset the state of the target.
25902 			 *
25903 			 * Note: Don't issue the reset for fibre-channel,
25904 			 * because this tends to hang the bus (loop) for
25905 			 * too long while everyone is logging out and in
25906 			 * and the deadman timer for dumping will fire
25907 			 * before the dump is complete.
25908 			 */
25909 			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
25910 				mutex_enter(SD_MUTEX(un));
25911 				Restore_state(un);
25912 				mutex_exit(SD_MUTEX(un));
25913 				return (EIO);
25914 			}
25915 
25916 			/* Delay to give the device some recovery time. */
25917 			drv_usecwait(10000);
25918 
25919 			if (sd_send_polled_RQS(un) == SD_FAILURE) {
25920 				SD_INFO(SD_LOG_DUMP, un,
25921 					"sddump: sd_send_polled_RQS failed\n");
25922 			}
25923 			mutex_enter(SD_MUTEX(un));
25924 		}
25925 	}
25926 
25927 	/*
25928 	 * Convert the partition-relative block number to a
25929 	 * disk physical block number.
25930 	 */
25931 	blkno += un->un_offset[partition];
25932 	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
25933 
25934 
25935 	/*
25936 	 * Check if the device has a non-512 block size.
25937 	 */
25938 	wr_bp = NULL;
25939 	if (NOT_DEVBSIZE(un)) {
25940 		tgt_byte_offset = blkno * un->un_sys_blocksize;
25941 		tgt_byte_count = nblk * un->un_sys_blocksize;
25942 		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
25943 		    (tgt_byte_count % un->un_tgt_blocksize)) {
25944 			doing_rmw = TRUE;
25945 			/*
25946 			 * Calculate the block number and number of block
25947 			 * in terms of the media block size.
25948 			 */
25949 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
25950 			tgt_nblk =
25951 			    ((tgt_byte_offset + tgt_byte_count +
25952 				(un->un_tgt_blocksize - 1)) /
25953 				un->un_tgt_blocksize) - tgt_blkno;
25954 
25955 			/*
25956 			 * Invoke the routine which is going to do read part
25957 			 * of read-modify-write.
25958 			 * Note that this routine returns a pointer to
25959 			 * a valid bp in wr_bp.
25960 			 */
25961 			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
25962 			    &wr_bp);
25963 			if (err) {
25964 				mutex_exit(SD_MUTEX(un));
25965 				return (err);
25966 			}
25967 			/*
25968 			 * Offset is being calculated as -
25969 			 * (original block # * system block size) -
25970 			 * (new block # * target block size)
25971 			 */
25972 			io_start_offset =
25973 			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
25974 			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
25975 
25976 			ASSERT((io_start_offset >= 0) &&
25977 			    (io_start_offset < un->un_tgt_blocksize));
25978 			/*
25979 			 * Do the modify portion of read modify write.
25980 			 */
25981 			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
25982 			    (size_t)nblk * un->un_sys_blocksize);
25983 		} else {
25984 			doing_rmw = FALSE;
25985 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
25986 			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
25987 		}
25988 
25989 		/* Convert blkno and nblk to target blocks */
25990 		blkno = tgt_blkno;
25991 		nblk = tgt_nblk;
25992 	} else {
25993 		wr_bp = &wr_buf;
25994 		bzero(wr_bp, sizeof (struct buf));
25995 		wr_bp->b_flags		= B_BUSY;
25996 		wr_bp->b_un.b_addr	= addr;
25997 		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
25998 		wr_bp->b_resid		= 0;
25999 	}
26000 
26001 	mutex_exit(SD_MUTEX(un));
26002 
26003 	/*
26004 	 * Obtain a SCSI packet for the write command.
26005 	 * It should be safe to call the allocator here without
26006 	 * worrying about being locked for DVMA mapping because
26007 	 * the address we're passed is already a DVMA mapping
26008 	 *
26009 	 * We are also not going to worry about semaphore ownership
26010 	 * in the dump buffer. Dumping is single threaded at present.
26011 	 */
26012 
26013 	wr_pktp = NULL;
26014 
26015 #if defined(__i386) || defined(__amd64)
26016 	dma_resid = wr_bp->b_bcount;
26017 	oblkno = blkno;
26018 	while (dma_resid != 0) {
26019 #endif
26020 
26021 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26022 		wr_bp->b_flags &= ~B_ERROR;
26023 
26024 #if defined(__i386) || defined(__amd64)
26025 		blkno = oblkno +
26026 			((wr_bp->b_bcount - dma_resid) /
26027 			    un->un_tgt_blocksize);
26028 		nblk = dma_resid / un->un_tgt_blocksize;
26029 
26030 		if (wr_pktp) {
26031 			/* Partial DMA transfers after initial transfer */
26032 			rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
26033 			    blkno, nblk);
26034 		} else {
26035 			/* Initial transfer */
26036 			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26037 			    un->un_pkt_flags, NULL_FUNC, NULL,
26038 			    blkno, nblk);
26039 		}
26040 #else
26041 		rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26042 		    0, NULL_FUNC, NULL, blkno, nblk);
26043 #endif
26044 
26045 		if (rval == 0) {
26046 			/* We were given a SCSI packet, continue. */
26047 			break;
26048 		}
26049 
26050 		if (i == 0) {
26051 			if (wr_bp->b_flags & B_ERROR) {
26052 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26053 				    "no resources for dumping; "
26054 				    "error code: 0x%x, retrying",
26055 				    geterror(wr_bp));
26056 			} else {
26057 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26058 				    "no resources for dumping; retrying");
26059 			}
26060 		} else if (i != (SD_NDUMP_RETRIES - 1)) {
26061 			if (wr_bp->b_flags & B_ERROR) {
26062 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26063 				    "no resources for dumping; error code: "
26064 				    "0x%x, retrying\n", geterror(wr_bp));
26065 			}
26066 		} else {
26067 			if (wr_bp->b_flags & B_ERROR) {
26068 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26069 				    "no resources for dumping; "
26070 				    "error code: 0x%x, retries failed, "
26071 				    "giving up.\n", geterror(wr_bp));
26072 			} else {
26073 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26074 				    "no resources for dumping; "
26075 				    "retries failed, giving up.\n");
26076 			}
26077 			mutex_enter(SD_MUTEX(un));
26078 			Restore_state(un);
26079 			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
26080 				mutex_exit(SD_MUTEX(un));
26081 				scsi_free_consistent_buf(wr_bp);
26082 			} else {
26083 				mutex_exit(SD_MUTEX(un));
26084 			}
26085 			return (EIO);
26086 		}
26087 		drv_usecwait(10000);
26088 	}
26089 
26090 #if defined(__i386) || defined(__amd64)
26091 	/*
26092 	 * save the resid from PARTIAL_DMA
26093 	 */
26094 	dma_resid = wr_pktp->pkt_resid;
26095 	if (dma_resid != 0)
26096 		nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
26097 	wr_pktp->pkt_resid = 0;
26098 #endif
26099 
26100 	/* SunBug 1222170 */
26101 	wr_pktp->pkt_flags = FLAG_NOINTR;
26102 
26103 	err = EIO;
26104 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26105 
26106 		/*
26107 		 * Scsi_poll returns 0 (success) if the command completes and
26108 		 * the status block is STATUS_GOOD.  We should only check
26109 		 * errors if this condition is not true.  Even then we should
26110 		 * send our own request sense packet only if we have a check
26111 		 * condition and auto request sense has not been performed by
26112 		 * the hba.
26113 		 */
26114 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
26115 
26116 		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
26117 		    (wr_pktp->pkt_resid == 0)) {
26118 			err = SD_SUCCESS;
26119 			break;
26120 		}
26121 
26122 		/*
26123 		 * Check CMD_DEV_GONE 1st, give up if device is gone.
26124 		 */
26125 		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
26126 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26127 			    "Device is gone\n");
26128 			break;
26129 		}
26130 
26131 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
26132 			SD_INFO(SD_LOG_DUMP, un,
26133 			    "sddump: write failed with CHECK, try # %d\n", i);
26134 			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
26135 				(void) sd_send_polled_RQS(un);
26136 			}
26137 
26138 			continue;
26139 		}
26140 
26141 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
26142 			int reset_retval = 0;
26143 
26144 			SD_INFO(SD_LOG_DUMP, un,
26145 			    "sddump: write failed with BUSY, try # %d\n", i);
26146 
26147 			if (un->un_f_lun_reset_enabled == TRUE) {
26148 				reset_retval = scsi_reset(SD_ADDRESS(un),
26149 				    RESET_LUN);
26150 			}
26151 			if (reset_retval == 0) {
26152 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26153 			}
26154 			(void) sd_send_polled_RQS(un);
26155 
26156 		} else {
26157 			SD_INFO(SD_LOG_DUMP, un,
26158 			    "sddump: write failed with 0x%x, try # %d\n",
26159 			    SD_GET_PKT_STATUS(wr_pktp), i);
26160 			mutex_enter(SD_MUTEX(un));
26161 			sd_reset_target(un, wr_pktp);
26162 			mutex_exit(SD_MUTEX(un));
26163 		}
26164 
26165 		/*
26166 		 * If we are not getting anywhere with lun/target resets,
26167 		 * let's reset the bus.
26168 		 */
26169 		if (i == SD_NDUMP_RETRIES/2) {
26170 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
26171 			(void) sd_send_polled_RQS(un);
26172 		}
26173 
26174 	}
26175 #if defined(__i386) || defined(__amd64)
26176 	}	/* dma_resid */
26177 #endif
26178 
26179 	scsi_destroy_pkt(wr_pktp);
26180 	mutex_enter(SD_MUTEX(un));
26181 	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
26182 		mutex_exit(SD_MUTEX(un));
26183 		scsi_free_consistent_buf(wr_bp);
26184 	} else {
26185 		mutex_exit(SD_MUTEX(un));
26186 	}
26187 	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
26188 	return (err);
26189 }
26190 
26191 /*
26192  *    Function: sd_scsi_poll()
26193  *
26194  * Description: This is a wrapper for the scsi_poll call.
26195  *
26196  *   Arguments: sd_lun - The unit structure
26197  *              scsi_pkt - The scsi packet being sent to the device.
26198  *
26199  * Return Code: 0 - Command completed successfully with good status
26200  *             -1 - Command failed.  This could indicate a check condition
26201  *                  or other status value requiring recovery action.
26202  *
26203  */
26204 
26205 static int
26206 sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
26207 {
26208 	int status;
26209 
26210 	ASSERT(un != NULL);
26211 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26212 	ASSERT(pktp != NULL);
26213 
26214 	status = SD_SUCCESS;
26215 
26216 	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
26217 		pktp->pkt_flags |= un->un_tagflags;
26218 		pktp->pkt_flags &= ~FLAG_NODISCON;
26219 	}
26220 
26221 	status = sd_ddi_scsi_poll(pktp);
26222 	/*
26223 	 * Scsi_poll returns 0 (success) if the command completes and the
26224 	 * status block is STATUS_GOOD.  We should only check errors if this
26225 	 * condition is not true.  Even then we should send our own request
26226 	 * sense packet only if we have a check condition and auto
26227 	 * request sense has not been performed by the hba.
26228 	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
26229 	 */
26230 	if ((status != SD_SUCCESS) &&
26231 	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
26232 	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
26233 	    (pktp->pkt_reason != CMD_DEV_GONE))
26234 		(void) sd_send_polled_RQS(un);
26235 
26236 	return (status);
26237 }
26238 
26239 /*
26240  *    Function: sd_send_polled_RQS()
26241  *
26242  * Description: This sends the request sense command to a device.
26243  *
26244  *   Arguments: sd_lun - The unit structure
26245  *
26246  * Return Code: 0 - Command completed successfully with good status
26247  *             -1 - Command failed.
26248  *
26249  */
26250 
26251 static int
26252 sd_send_polled_RQS(struct sd_lun *un)
26253 {
26254 	int	ret_val;
26255 	struct	scsi_pkt	*rqs_pktp;
26256 	struct	buf		*rqs_bp;
26257 
26258 	ASSERT(un != NULL);
26259 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26260 
26261 	ret_val = SD_SUCCESS;
26262 
26263 	rqs_pktp = un->un_rqs_pktp;
26264 	rqs_bp	 = un->un_rqs_bp;
26265 
26266 	mutex_enter(SD_MUTEX(un));
26267 
26268 	if (un->un_sense_isbusy) {
26269 		ret_val = SD_FAILURE;
26270 		mutex_exit(SD_MUTEX(un));
26271 		return (ret_val);
26272 	}
26273 
26274 	/*
26275 	 * If the request sense buffer (and packet) is not in use,
26276 	 * let's set the un_sense_isbusy and send our packet
26277 	 */
26278 	un->un_sense_isbusy 	= 1;
26279 	rqs_pktp->pkt_resid  	= 0;
26280 	rqs_pktp->pkt_reason 	= 0;
26281 	rqs_pktp->pkt_flags |= FLAG_NOINTR;
26282 	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
26283 
26284 	mutex_exit(SD_MUTEX(un));
26285 
26286 	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
26287 	    " 0x%p\n", rqs_bp->b_un.b_addr);
26288 
26289 	/*
26290 	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
26291 	 * axle - it has a call into us!
26292 	 */
26293 	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
26294 		SD_INFO(SD_LOG_COMMON, un,
26295 		    "sd_send_polled_RQS: RQS failed\n");
26296 	}
26297 
26298 	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
26299 	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
26300 
26301 	mutex_enter(SD_MUTEX(un));
26302 	un->un_sense_isbusy = 0;
26303 	mutex_exit(SD_MUTEX(un));
26304 
26305 	return (ret_val);
26306 }
26307 
26308 /*
26309  * Defines needed for localized version of the scsi_poll routine.
26310  */
26311 #define	SD_CSEC		10000			/* usecs */
26312 #define	SD_SEC_TO_CSEC	(1000000/SD_CSEC)
26313 
26314 
26315 /*
26316  *    Function: sd_ddi_scsi_poll()
26317  *
26318  * Description: Localized version of the scsi_poll routine.  The purpose is to
26319  *		send a scsi_pkt to a device as a polled command.  This version
26320  *		is to ensure more robust handling of transport errors.
26321  *		Specifically this routine cures not ready, coming ready
26322  *		transition for power up and reset of sonoma's.  This can take
26323  *		up to 45 seconds for power-on and 20 seconds for reset of a
26324  * 		sonoma lun.
26325  *
26326  *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
26327  *
26328  * Return Code: 0 - Command completed successfully with good status
26329  *             -1 - Command failed.
26330  *
26331  */
26332 
26333 static int
26334 sd_ddi_scsi_poll(struct scsi_pkt *pkt)
26335 {
26336 	int busy_count;
26337 	int timeout;
26338 	int rval = SD_FAILURE;
26339 	int savef;
26340 	struct scsi_extended_sense *sensep;
26341 	long savet;
26342 	void (*savec)();
26343 	/*
26344 	 * The following is defined in machdep.c and is used in determining if
26345 	 * the scsi transport system will do polled I/O instead of interrupt
26346 	 * I/O when called from xx_dump().
26347 	 */
26348 	extern int do_polled_io;
26349 
26350 	/*
26351 	 * save old flags in pkt, to restore at end
26352 	 */
26353 	savef = pkt->pkt_flags;
26354 	savec = pkt->pkt_comp;
26355 	savet = pkt->pkt_time;
26356 
26357 	pkt->pkt_flags |= FLAG_NOINTR;
26358 
26359 	/*
26360 	 * XXX there is nothing in the SCSA spec that states that we should not
26361 	 * do a callback for polled cmds; however, removing this will break sd
26362 	 * and probably other target drivers
26363 	 */
26364 	pkt->pkt_comp = NULL;
26365 
26366 	/*
26367 	 * we don't like a polled command without timeout.
26368 	 * 60 seconds seems long enough.
26369 	 */
26370 	if (pkt->pkt_time == 0) {
26371 		pkt->pkt_time = SCSI_POLL_TIMEOUT;
26372 	}
26373 
26374 	/*
26375 	 * Send polled cmd.
26376 	 *
26377 	 * We do some error recovery for various errors.  Tran_busy,
26378 	 * queue full, and non-dispatched commands are retried every 10 msec.
26379 	 * as they are typically transient failures.  Busy status and Not
26380 	 * Ready are retried every second as this status takes a while to
26381 	 * change.  Unit attention is retried for pkt_time (60) times
26382 	 * with no delay.
26383 	 */
26384 	timeout = pkt->pkt_time * SD_SEC_TO_CSEC;
26385 
26386 	for (busy_count = 0; busy_count < timeout; busy_count++) {
26387 		int rc;
26388 		int poll_delay;
26389 
26390 		/*
26391 		 * Initialize pkt status variables.
26392 		 */
26393 		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
26394 
26395 		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
26396 			if (rc != TRAN_BUSY) {
26397 				/* Transport failed - give up. */
26398 				break;
26399 			} else {
26400 				/* Transport busy - try again. */
26401 				poll_delay = 1 * SD_CSEC; /* 10 msec */
26402 			}
26403 		} else {
26404 			/*
26405 			 * Transport accepted - check pkt status.
26406 			 */
26407 			rc = (*pkt->pkt_scbp) & STATUS_MASK;
26408 			if (pkt->pkt_reason == CMD_CMPLT &&
26409 			    rc == STATUS_CHECK &&
26410 			    pkt->pkt_state & STATE_ARQ_DONE) {
26411 				struct scsi_arq_status *arqstat =
26412 				    (struct scsi_arq_status *)(pkt->pkt_scbp);
26413 
26414 				sensep = &arqstat->sts_sensedata;
26415 			} else {
26416 				sensep = NULL;
26417 			}
26418 
26419 			if ((pkt->pkt_reason == CMD_CMPLT) &&
26420 			    (rc == STATUS_GOOD)) {
26421 				/* No error - we're done */
26422 				rval = SD_SUCCESS;
26423 				break;
26424 
26425 			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
26426 				/* Lost connection - give up */
26427 				break;
26428 
26429 			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
26430 			    (pkt->pkt_state == 0)) {
26431 				/* Pkt not dispatched - try again. */
26432 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
26433 
26434 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26435 			    (rc == STATUS_QFULL)) {
26436 				/* Queue full - try again. */
26437 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
26438 
26439 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26440 			    (rc == STATUS_BUSY)) {
26441 				/* Busy - try again. */
26442 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
26443 				busy_count += (SD_SEC_TO_CSEC - 1);
26444 
26445 			} else if ((sensep != NULL) &&
26446 			    (sensep->es_key == KEY_UNIT_ATTENTION)) {
26447 				/* Unit Attention - try again */
26448 				busy_count += (SD_SEC_TO_CSEC - 1); /* 1 */
26449 				continue;
26450 
26451 			} else if ((sensep != NULL) &&
26452 			    (sensep->es_key == KEY_NOT_READY) &&
26453 			    (sensep->es_add_code == 0x04) &&
26454 			    (sensep->es_qual_code == 0x01)) {
26455 				/* Not ready -> ready - try again. */
26456 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
26457 				busy_count += (SD_SEC_TO_CSEC - 1);
26458 
26459 			} else {
26460 				/* BAD status - give up. */
26461 				break;
26462 			}
26463 		}
26464 
26465 		if ((curthread->t_flag & T_INTR_THREAD) == 0 &&
26466 		    !do_polled_io) {
26467 			delay(drv_usectohz(poll_delay));
26468 		} else {
26469 			/* we busy wait during cpr_dump or interrupt threads */
26470 			drv_usecwait(poll_delay);
26471 		}
26472 	}
26473 
26474 	pkt->pkt_flags = savef;
26475 	pkt->pkt_comp = savec;
26476 	pkt->pkt_time = savet;
26477 	return (rval);
26478 }
26479 
26480 
26481 /*
26482  *    Function: sd_persistent_reservation_in_read_keys
26483  *
26484  * Description: This routine is the driver entry point for handling CD-ROM
26485  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
26486  *		by sending the SCSI-3 PRIN commands to the device.
26487  *		Processes the read keys command response by copying the
26488  *		reservation key information into the user provided buffer.
26489  *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
26490  *
26491  *   Arguments: un   -  Pointer to soft state struct for the target.
26492  *		usrp -	user provided pointer to multihost Persistent In Read
26493  *			Keys structure (mhioc_inkeys_t)
26494  *		flag -	this argument is a pass through to ddi_copyxxx()
26495  *			directly from the mode argument of ioctl().
26496  *
26497  * Return Code: 0   - Success
26498  *		EACCES
26499  *		ENOTSUP
26500  *		errno return code from sd_send_scsi_cmd()
26501  *
26502  *     Context: Can sleep. Does not return until command is completed.
26503  */
26504 
26505 static int
26506 sd_persistent_reservation_in_read_keys(struct sd_lun *un,
26507     mhioc_inkeys_t *usrp, int flag)
26508 {
26509 #ifdef _MULTI_DATAMODEL
26510 	struct mhioc_key_list32	li32;
26511 #endif
26512 	sd_prin_readkeys_t	*in;
26513 	mhioc_inkeys_t		*ptr;
26514 	mhioc_key_list_t	li;
26515 	uchar_t			*data_bufp;
26516 	int 			data_len;
26517 	int			rval;
26518 	size_t			copysz;
26519 
26520 	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
26521 		return (EINVAL);
26522 	}
26523 	bzero(&li, sizeof (mhioc_key_list_t));
26524 
26525 	/*
26526 	 * Get the listsize from user
26527 	 */
26528 #ifdef _MULTI_DATAMODEL
26529 
26530 	switch (ddi_model_convert_from(flag & FMODELS)) {
26531 	case DDI_MODEL_ILP32:
26532 		copysz = sizeof (struct mhioc_key_list32);
26533 		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
26534 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26535 			    "sd_persistent_reservation_in_read_keys: "
26536 			    "failed ddi_copyin: mhioc_key_list32_t\n");
26537 			rval = EFAULT;
26538 			goto done;
26539 		}
26540 		li.listsize = li32.listsize;
26541 		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
26542 		break;
26543 
26544 	case DDI_MODEL_NONE:
26545 		copysz = sizeof (mhioc_key_list_t);
26546 		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26547 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26548 			    "sd_persistent_reservation_in_read_keys: "
26549 			    "failed ddi_copyin: mhioc_key_list_t\n");
26550 			rval = EFAULT;
26551 			goto done;
26552 		}
26553 		break;
26554 	}
26555 
26556 #else /* ! _MULTI_DATAMODEL */
26557 	copysz = sizeof (mhioc_key_list_t);
26558 	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26559 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26560 		    "sd_persistent_reservation_in_read_keys: "
26561 		    "failed ddi_copyin: mhioc_key_list_t\n");
26562 		rval = EFAULT;
26563 		goto done;
26564 	}
26565 #endif
26566 
26567 	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
26568 	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
26569 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
26570 
26571 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS,
26572 	    data_len, data_bufp)) != 0) {
26573 		goto done;
26574 	}
26575 	in = (sd_prin_readkeys_t *)data_bufp;
26576 	ptr->generation = BE_32(in->generation);
26577 	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
26578 
26579 	/*
26580 	 * Return the min(listsize, listlen) keys
26581 	 */
26582 #ifdef _MULTI_DATAMODEL
26583 
26584 	switch (ddi_model_convert_from(flag & FMODELS)) {
26585 	case DDI_MODEL_ILP32:
26586 		li32.listlen = li.listlen;
26587 		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
26588 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26589 			    "sd_persistent_reservation_in_read_keys: "
26590 			    "failed ddi_copyout: mhioc_key_list32_t\n");
26591 			rval = EFAULT;
26592 			goto done;
26593 		}
26594 		break;
26595 
26596 	case DDI_MODEL_NONE:
26597 		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
26598 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26599 			    "sd_persistent_reservation_in_read_keys: "
26600 			    "failed ddi_copyout: mhioc_key_list_t\n");
26601 			rval = EFAULT;
26602 			goto done;
26603 		}
26604 		break;
26605 	}
26606 
26607 #else /* ! _MULTI_DATAMODEL */
26608 
26609 	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
26610 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26611 		    "sd_persistent_reservation_in_read_keys: "
26612 		    "failed ddi_copyout: mhioc_key_list_t\n");
26613 		rval = EFAULT;
26614 		goto done;
26615 	}
26616 
26617 #endif /* _MULTI_DATAMODEL */
26618 
26619 	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
26620 	    li.listsize * MHIOC_RESV_KEY_SIZE);
26621 	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
26622 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26623 		    "sd_persistent_reservation_in_read_keys: "
26624 		    "failed ddi_copyout: keylist\n");
26625 		rval = EFAULT;
26626 	}
26627 done:
26628 	kmem_free(data_bufp, data_len);
26629 	return (rval);
26630 }
26631 
26632 
26633 /*
26634  *    Function: sd_persistent_reservation_in_read_resv
26635  *
26636  * Description: This routine is the driver entry point for handling CD-ROM
26637  *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
26638  *		by sending the SCSI-3 PRIN commands to the device.
26639  *		Process the read persistent reservations command response by
26640  *		copying the reservation information into the user provided
26641  *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
26642  *
26643  *   Arguments: un   -  Pointer to soft state struct for the target.
26644  *		usrp -	user provided pointer to multihost Persistent In Read
26645  *			Keys structure (mhioc_inkeys_t)
26646  *		flag -	this argument is a pass through to ddi_copyxxx()
26647  *			directly from the mode argument of ioctl().
26648  *
26649  * Return Code: 0   - Success
26650  *		EACCES
26651  *		ENOTSUP
26652  *		errno return code from sd_send_scsi_cmd()
26653  *
26654  *     Context: Can sleep. Does not return until command is completed.
26655  */
26656 
26657 static int
26658 sd_persistent_reservation_in_read_resv(struct sd_lun *un,
26659     mhioc_inresvs_t *usrp, int flag)
26660 {
26661 #ifdef _MULTI_DATAMODEL
26662 	struct mhioc_resv_desc_list32 resvlist32;
26663 #endif
26664 	sd_prin_readresv_t	*in;
26665 	mhioc_inresvs_t		*ptr;
26666 	sd_readresv_desc_t	*readresv_ptr;
26667 	mhioc_resv_desc_list_t	resvlist;
26668 	mhioc_resv_desc_t 	resvdesc;
26669 	uchar_t			*data_bufp;
26670 	int 			data_len;
26671 	int			rval;
26672 	int			i;
26673 	size_t			copysz;
26674 	mhioc_resv_desc_t	*bufp;
26675 
26676 	if ((ptr = usrp) == NULL) {
26677 		return (EINVAL);
26678 	}
26679 
26680 	/*
26681 	 * Get the listsize from user
26682 	 */
26683 #ifdef _MULTI_DATAMODEL
26684 	switch (ddi_model_convert_from(flag & FMODELS)) {
26685 	case DDI_MODEL_ILP32:
26686 		copysz = sizeof (struct mhioc_resv_desc_list32);
26687 		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
26688 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26689 			    "sd_persistent_reservation_in_read_resv: "
26690 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26691 			rval = EFAULT;
26692 			goto done;
26693 		}
26694 		resvlist.listsize = resvlist32.listsize;
26695 		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
26696 		break;
26697 
26698 	case DDI_MODEL_NONE:
26699 		copysz = sizeof (mhioc_resv_desc_list_t);
26700 		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
26701 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26702 			    "sd_persistent_reservation_in_read_resv: "
26703 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26704 			rval = EFAULT;
26705 			goto done;
26706 		}
26707 		break;
26708 	}
26709 #else /* ! _MULTI_DATAMODEL */
26710 	copysz = sizeof (mhioc_resv_desc_list_t);
26711 	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
26712 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26713 		    "sd_persistent_reservation_in_read_resv: "
26714 		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26715 		rval = EFAULT;
26716 		goto done;
26717 	}
26718 #endif /* ! _MULTI_DATAMODEL */
26719 
26720 	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
26721 	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
26722 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
26723 
26724 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_RESV,
26725 	    data_len, data_bufp)) != 0) {
26726 		goto done;
26727 	}
26728 	in = (sd_prin_readresv_t *)data_bufp;
26729 	ptr->generation = BE_32(in->generation);
26730 	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
26731 
26732 	/*
26733 	 * Return the min(listsize, listlen( keys
26734 	 */
26735 #ifdef _MULTI_DATAMODEL
26736 
26737 	switch (ddi_model_convert_from(flag & FMODELS)) {
26738 	case DDI_MODEL_ILP32:
26739 		resvlist32.listlen = resvlist.listlen;
26740 		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
26741 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26742 			    "sd_persistent_reservation_in_read_resv: "
26743 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26744 			rval = EFAULT;
26745 			goto done;
26746 		}
26747 		break;
26748 
26749 	case DDI_MODEL_NONE:
26750 		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
26751 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26752 			    "sd_persistent_reservation_in_read_resv: "
26753 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26754 			rval = EFAULT;
26755 			goto done;
26756 		}
26757 		break;
26758 	}
26759 
26760 #else /* ! _MULTI_DATAMODEL */
26761 
26762 	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
26763 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26764 		    "sd_persistent_reservation_in_read_resv: "
26765 		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26766 		rval = EFAULT;
26767 		goto done;
26768 	}
26769 
26770 #endif /* ! _MULTI_DATAMODEL */
26771 
26772 	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
26773 	bufp = resvlist.list;
26774 	copysz = sizeof (mhioc_resv_desc_t);
26775 	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
26776 	    i++, readresv_ptr++, bufp++) {
26777 
26778 		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
26779 		    MHIOC_RESV_KEY_SIZE);
26780 		resvdesc.type  = readresv_ptr->type;
26781 		resvdesc.scope = readresv_ptr->scope;
26782 		resvdesc.scope_specific_addr =
26783 		    BE_32(readresv_ptr->scope_specific_addr);
26784 
26785 		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
26786 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26787 			    "sd_persistent_reservation_in_read_resv: "
26788 			    "failed ddi_copyout: resvlist\n");
26789 			rval = EFAULT;
26790 			goto done;
26791 		}
26792 	}
26793 done:
26794 	kmem_free(data_bufp, data_len);
26795 	return (rval);
26796 }
26797 
26798 
26799 /*
26800  *    Function: sr_change_blkmode()
26801  *
26802  * Description: This routine is the driver entry point for handling CD-ROM
26803  *		block mode ioctl requests. Support for returning and changing
26804  *		the current block size in use by the device is implemented. The
26805  *		LBA size is changed via a MODE SELECT Block Descriptor.
26806  *
26807  *		This routine issues a mode sense with an allocation length of
26808  *		12 bytes for the mode page header and a single block descriptor.
26809  *
26810  *   Arguments: dev - the device 'dev_t'
26811  *		cmd - the request type; one of CDROMGBLKMODE (get) or
26812  *		      CDROMSBLKMODE (set)
26813  *		data - current block size or requested block size
26814  *		flag - this argument is a pass through to ddi_copyxxx() directly
26815  *		       from the mode argument of ioctl().
26816  *
26817  * Return Code: the code returned by sd_send_scsi_cmd()
26818  *		EINVAL if invalid arguments are provided
26819  *		EFAULT if ddi_copyxxx() fails
26820  *		ENXIO if fail ddi_get_soft_state
26821  *		EIO if invalid mode sense block descriptor length
26822  *
26823  */
26824 
26825 static int
26826 sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
26827 {
26828 	struct sd_lun			*un = NULL;
26829 	struct mode_header		*sense_mhp, *select_mhp;
26830 	struct block_descriptor		*sense_desc, *select_desc;
26831 	int				current_bsize;
26832 	int				rval = EINVAL;
26833 	uchar_t				*sense = NULL;
26834 	uchar_t				*select = NULL;
26835 
26836 	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
26837 
26838 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26839 		return (ENXIO);
26840 	}
26841 
26842 	/*
26843 	 * The block length is changed via the Mode Select block descriptor, the
26844 	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
26845 	 * required as part of this routine. Therefore the mode sense allocation
26846 	 * length is specified to be the length of a mode page header and a
26847 	 * block descriptor.
26848 	 */
26849 	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
26850 
26851 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
26852 	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD)) != 0) {
26853 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26854 		    "sr_change_blkmode: Mode Sense Failed\n");
26855 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26856 		return (rval);
26857 	}
26858 
26859 	/* Check the block descriptor len to handle only 1 block descriptor */
26860 	sense_mhp = (struct mode_header *)sense;
26861 	if ((sense_mhp->bdesc_length == 0) ||
26862 	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
26863 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26864 		    "sr_change_blkmode: Mode Sense returned invalid block"
26865 		    " descriptor length\n");
26866 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26867 		return (EIO);
26868 	}
26869 	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
26870 	current_bsize = ((sense_desc->blksize_hi << 16) |
26871 	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
26872 
26873 	/* Process command */
26874 	switch (cmd) {
26875 	case CDROMGBLKMODE:
26876 		/* Return the block size obtained during the mode sense */
26877 		if (ddi_copyout(&current_bsize, (void *)data,
26878 		    sizeof (int), flag) != 0)
26879 			rval = EFAULT;
26880 		break;
26881 	case CDROMSBLKMODE:
26882 		/* Validate the requested block size */
26883 		switch (data) {
26884 		case CDROM_BLK_512:
26885 		case CDROM_BLK_1024:
26886 		case CDROM_BLK_2048:
26887 		case CDROM_BLK_2056:
26888 		case CDROM_BLK_2336:
26889 		case CDROM_BLK_2340:
26890 		case CDROM_BLK_2352:
26891 		case CDROM_BLK_2368:
26892 		case CDROM_BLK_2448:
26893 		case CDROM_BLK_2646:
26894 		case CDROM_BLK_2647:
26895 			break;
26896 		default:
26897 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26898 			    "sr_change_blkmode: "
26899 			    "Block Size '%ld' Not Supported\n", data);
26900 			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26901 			return (EINVAL);
26902 		}
26903 
26904 		/*
26905 		 * The current block size matches the requested block size so
26906 		 * there is no need to send the mode select to change the size
26907 		 */
26908 		if (current_bsize == data) {
26909 			break;
26910 		}
26911 
26912 		/* Build the select data for the requested block size */
26913 		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
26914 		select_mhp = (struct mode_header *)select;
26915 		select_desc =
26916 		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
26917 		/*
26918 		 * The LBA size is changed via the block descriptor, so the
26919 		 * descriptor is built according to the user data
26920 		 */
26921 		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
26922 		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
26923 		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
26924 		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
26925 
26926 		/* Send the mode select for the requested block size */
26927 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
26928 		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
26929 		    SD_PATH_STANDARD)) != 0) {
26930 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26931 			    "sr_change_blkmode: Mode Select Failed\n");
26932 			/*
26933 			 * The mode select failed for the requested block size,
26934 			 * so reset the data for the original block size and
26935 			 * send it to the target. The error is indicated by the
26936 			 * return value for the failed mode select.
26937 			 */
26938 			select_desc->blksize_hi  = sense_desc->blksize_hi;
26939 			select_desc->blksize_mid = sense_desc->blksize_mid;
26940 			select_desc->blksize_lo  = sense_desc->blksize_lo;
26941 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
26942 			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
26943 			    SD_PATH_STANDARD);
26944 		} else {
26945 			ASSERT(!mutex_owned(SD_MUTEX(un)));
26946 			mutex_enter(SD_MUTEX(un));
26947 			sd_update_block_info(un, (uint32_t)data, 0);
26948 
26949 			mutex_exit(SD_MUTEX(un));
26950 		}
26951 		break;
26952 	default:
26953 		/* should not reach here, but check anyway */
26954 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26955 		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
26956 		rval = EINVAL;
26957 		break;
26958 	}
26959 
26960 	if (select) {
26961 		kmem_free(select, BUFLEN_CHG_BLK_MODE);
26962 	}
26963 	if (sense) {
26964 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26965 	}
26966 	return (rval);
26967 }
26968 
26969 
26970 /*
26971  * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
26972  * implement driver support for getting and setting the CD speed. The command
26973  * set used will be based on the device type. If the device has not been
26974  * identified as MMC the Toshiba vendor specific mode page will be used. If
26975  * the device is MMC but does not support the Real Time Streaming feature
26976  * the SET CD SPEED command will be used to set speed and mode page 0x2A will
26977  * be used to read the speed.
26978  */
26979 
26980 /*
26981  *    Function: sr_change_speed()
26982  *
26983  * Description: This routine is the driver entry point for handling CD-ROM
26984  *		drive speed ioctl requests for devices supporting the Toshiba
26985  *		vendor specific drive speed mode page. Support for returning
26986  *		and changing the current drive speed in use by the device is
26987  *		implemented.
26988  *
26989  *   Arguments: dev - the device 'dev_t'
26990  *		cmd - the request type; one of CDROMGDRVSPEED (get) or
26991  *		      CDROMSDRVSPEED (set)
26992  *		data - current drive speed or requested drive speed
26993  *		flag - this argument is a pass through to ddi_copyxxx() directly
26994  *		       from the mode argument of ioctl().
26995  *
26996  * Return Code: the code returned by sd_send_scsi_cmd()
26997  *		EINVAL if invalid arguments are provided
26998  *		EFAULT if ddi_copyxxx() fails
26999  *		ENXIO if fail ddi_get_soft_state
27000  *		EIO if invalid mode sense block descriptor length
27001  */
27002 
27003 static int
27004 sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27005 {
27006 	struct sd_lun			*un = NULL;
27007 	struct mode_header		*sense_mhp, *select_mhp;
27008 	struct mode_speed		*sense_page, *select_page;
27009 	int				current_speed;
27010 	int				rval = EINVAL;
27011 	int				bd_len;
27012 	uchar_t				*sense = NULL;
27013 	uchar_t				*select = NULL;
27014 
27015 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27016 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27017 		return (ENXIO);
27018 	}
27019 
27020 	/*
27021 	 * Note: The drive speed is being modified here according to a Toshiba
27022 	 * vendor specific mode page (0x31).
27023 	 */
27024 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27025 
27026 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
27027 	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
27028 	    SD_PATH_STANDARD)) != 0) {
27029 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27030 		    "sr_change_speed: Mode Sense Failed\n");
27031 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27032 		return (rval);
27033 	}
27034 	sense_mhp  = (struct mode_header *)sense;
27035 
27036 	/* Check the block descriptor len to handle only 1 block descriptor */
27037 	bd_len = sense_mhp->bdesc_length;
27038 	if (bd_len > MODE_BLK_DESC_LENGTH) {
27039 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27040 		    "sr_change_speed: Mode Sense returned invalid block "
27041 		    "descriptor length\n");
27042 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27043 		return (EIO);
27044 	}
27045 
27046 	sense_page = (struct mode_speed *)
27047 	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
27048 	current_speed = sense_page->speed;
27049 
27050 	/* Process command */
27051 	switch (cmd) {
27052 	case CDROMGDRVSPEED:
27053 		/* Return the drive speed obtained during the mode sense */
27054 		if (current_speed == 0x2) {
27055 			current_speed = CDROM_TWELVE_SPEED;
27056 		}
27057 		if (ddi_copyout(&current_speed, (void *)data,
27058 		    sizeof (int), flag) != 0) {
27059 			rval = EFAULT;
27060 		}
27061 		break;
27062 	case CDROMSDRVSPEED:
27063 		/* Validate the requested drive speed */
27064 		switch ((uchar_t)data) {
27065 		case CDROM_TWELVE_SPEED:
27066 			data = 0x2;
27067 			/*FALLTHROUGH*/
27068 		case CDROM_NORMAL_SPEED:
27069 		case CDROM_DOUBLE_SPEED:
27070 		case CDROM_QUAD_SPEED:
27071 		case CDROM_MAXIMUM_SPEED:
27072 			break;
27073 		default:
27074 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27075 			    "sr_change_speed: "
27076 			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
27077 			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27078 			return (EINVAL);
27079 		}
27080 
27081 		/*
27082 		 * The current drive speed matches the requested drive speed so
27083 		 * there is no need to send the mode select to change the speed
27084 		 */
27085 		if (current_speed == data) {
27086 			break;
27087 		}
27088 
27089 		/* Build the select data for the requested drive speed */
27090 		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27091 		select_mhp = (struct mode_header *)select;
27092 		select_mhp->bdesc_length = 0;
27093 		select_page =
27094 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27095 		select_page =
27096 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27097 		select_page->mode_page.code = CDROM_MODE_SPEED;
27098 		select_page->mode_page.length = 2;
27099 		select_page->speed = (uchar_t)data;
27100 
27101 		/* Send the mode select for the requested block size */
27102 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
27103 		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27104 		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
27105 			/*
27106 			 * The mode select failed for the requested drive speed,
27107 			 * so reset the data for the original drive speed and
27108 			 * send it to the target. The error is indicated by the
27109 			 * return value for the failed mode select.
27110 			 */
27111 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27112 			    "sr_drive_speed: Mode Select Failed\n");
27113 			select_page->speed = sense_page->speed;
27114 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
27115 			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27116 			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27117 		}
27118 		break;
27119 	default:
27120 		/* should not reach here, but check anyway */
27121 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27122 		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
27123 		rval = EINVAL;
27124 		break;
27125 	}
27126 
27127 	if (select) {
27128 		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
27129 	}
27130 	if (sense) {
27131 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27132 	}
27133 
27134 	return (rval);
27135 }
27136 
27137 
27138 /*
27139  *    Function: sr_atapi_change_speed()
27140  *
27141  * Description: This routine is the driver entry point for handling CD-ROM
27142  *		drive speed ioctl requests for MMC devices that do not support
27143  *		the Real Time Streaming feature (0x107).
27144  *
27145  *		Note: This routine will use the SET SPEED command which may not
27146  *		be supported by all devices.
27147  *
27148  *   Arguments: dev- the device 'dev_t'
27149  *		cmd- the request type; one of CDROMGDRVSPEED (get) or
27150  *		     CDROMSDRVSPEED (set)
27151  *		data- current drive speed or requested drive speed
27152  *		flag- this argument is a pass through to ddi_copyxxx() directly
27153  *		      from the mode argument of ioctl().
27154  *
27155  * Return Code: the code returned by sd_send_scsi_cmd()
27156  *		EINVAL if invalid arguments are provided
27157  *		EFAULT if ddi_copyxxx() fails
27158  *		ENXIO if fail ddi_get_soft_state
27159  *		EIO if invalid mode sense block descriptor length
27160  */
27161 
27162 static int
27163 sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27164 {
27165 	struct sd_lun			*un;
27166 	struct uscsi_cmd		*com = NULL;
27167 	struct mode_header_grp2		*sense_mhp;
27168 	uchar_t				*sense_page;
27169 	uchar_t				*sense = NULL;
27170 	char				cdb[CDB_GROUP5];
27171 	int				bd_len;
27172 	int				current_speed = 0;
27173 	int				max_speed = 0;
27174 	int				rval;
27175 
27176 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27177 
27178 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27179 		return (ENXIO);
27180 	}
27181 
27182 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
27183 
27184 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
27185 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
27186 	    SD_PATH_STANDARD)) != 0) {
27187 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27188 		    "sr_atapi_change_speed: Mode Sense Failed\n");
27189 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27190 		return (rval);
27191 	}
27192 
27193 	/* Check the block descriptor len to handle only 1 block descriptor */
27194 	sense_mhp = (struct mode_header_grp2 *)sense;
27195 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
27196 	if (bd_len > MODE_BLK_DESC_LENGTH) {
27197 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27198 		    "sr_atapi_change_speed: Mode Sense returned invalid "
27199 		    "block descriptor length\n");
27200 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27201 		return (EIO);
27202 	}
27203 
27204 	/* Calculate the current and maximum drive speeds */
27205 	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
27206 	current_speed = (sense_page[14] << 8) | sense_page[15];
27207 	max_speed = (sense_page[8] << 8) | sense_page[9];
27208 
27209 	/* Process the command */
27210 	switch (cmd) {
27211 	case CDROMGDRVSPEED:
27212 		current_speed /= SD_SPEED_1X;
27213 		if (ddi_copyout(&current_speed, (void *)data,
27214 		    sizeof (int), flag) != 0)
27215 			rval = EFAULT;
27216 		break;
27217 	case CDROMSDRVSPEED:
27218 		/* Convert the speed code to KB/sec */
27219 		switch ((uchar_t)data) {
27220 		case CDROM_NORMAL_SPEED:
27221 			current_speed = SD_SPEED_1X;
27222 			break;
27223 		case CDROM_DOUBLE_SPEED:
27224 			current_speed = 2 * SD_SPEED_1X;
27225 			break;
27226 		case CDROM_QUAD_SPEED:
27227 			current_speed = 4 * SD_SPEED_1X;
27228 			break;
27229 		case CDROM_TWELVE_SPEED:
27230 			current_speed = 12 * SD_SPEED_1X;
27231 			break;
27232 		case CDROM_MAXIMUM_SPEED:
27233 			current_speed = 0xffff;
27234 			break;
27235 		default:
27236 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27237 			    "sr_atapi_change_speed: invalid drive speed %d\n",
27238 			    (uchar_t)data);
27239 			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27240 			return (EINVAL);
27241 		}
27242 
27243 		/* Check the request against the drive's max speed. */
27244 		if (current_speed != 0xffff) {
27245 			if (current_speed > max_speed) {
27246 				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27247 				return (EINVAL);
27248 			}
27249 		}
27250 
27251 		/*
27252 		 * Build and send the SET SPEED command
27253 		 *
27254 		 * Note: The SET SPEED (0xBB) command used in this routine is
27255 		 * obsolete per the SCSI MMC spec but still supported in the
27256 		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
27257 		 * therefore the command is still implemented in this routine.
27258 		 */
27259 		bzero(cdb, sizeof (cdb));
27260 		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
27261 		cdb[2] = (uchar_t)(current_speed >> 8);
27262 		cdb[3] = (uchar_t)current_speed;
27263 		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27264 		com->uscsi_cdb	   = (caddr_t)cdb;
27265 		com->uscsi_cdblen  = CDB_GROUP5;
27266 		com->uscsi_bufaddr = NULL;
27267 		com->uscsi_buflen  = 0;
27268 		com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT;
27269 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, 0,
27270 		    UIO_SYSSPACE, SD_PATH_STANDARD);
27271 		break;
27272 	default:
27273 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27274 		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
27275 		rval = EINVAL;
27276 	}
27277 
27278 	if (sense) {
27279 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27280 	}
27281 	if (com) {
27282 		kmem_free(com, sizeof (*com));
27283 	}
27284 	return (rval);
27285 }
27286 
27287 
27288 /*
27289  *    Function: sr_pause_resume()
27290  *
27291  * Description: This routine is the driver entry point for handling CD-ROM
27292  *		pause/resume ioctl requests. This only affects the audio play
27293  *		operation.
27294  *
27295  *   Arguments: dev - the device 'dev_t'
27296  *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
27297  *		      for setting the resume bit of the cdb.
27298  *
27299  * Return Code: the code returned by sd_send_scsi_cmd()
27300  *		EINVAL if invalid mode specified
27301  *
27302  */
27303 
27304 static int
27305 sr_pause_resume(dev_t dev, int cmd)
27306 {
27307 	struct sd_lun		*un;
27308 	struct uscsi_cmd	*com;
27309 	char			cdb[CDB_GROUP1];
27310 	int			rval;
27311 
27312 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27313 		return (ENXIO);
27314 	}
27315 
27316 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27317 	bzero(cdb, CDB_GROUP1);
27318 	cdb[0] = SCMD_PAUSE_RESUME;
27319 	switch (cmd) {
27320 	case CDROMRESUME:
27321 		cdb[8] = 1;
27322 		break;
27323 	case CDROMPAUSE:
27324 		cdb[8] = 0;
27325 		break;
27326 	default:
27327 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
27328 		    " Command '%x' Not Supported\n", cmd);
27329 		rval = EINVAL;
27330 		goto done;
27331 	}
27332 
27333 	com->uscsi_cdb    = cdb;
27334 	com->uscsi_cdblen = CDB_GROUP1;
27335 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27336 
27337 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27338 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27339 
27340 done:
27341 	kmem_free(com, sizeof (*com));
27342 	return (rval);
27343 }
27344 
27345 
27346 /*
27347  *    Function: sr_play_msf()
27348  *
27349  * Description: This routine is the driver entry point for handling CD-ROM
27350  *		ioctl requests to output the audio signals at the specified
27351  *		starting address and continue the audio play until the specified
27352  *		ending address (CDROMPLAYMSF) The address is in Minute Second
27353  *		Frame (MSF) format.
27354  *
27355  *   Arguments: dev	- the device 'dev_t'
27356  *		data	- pointer to user provided audio msf structure,
27357  *		          specifying start/end addresses.
27358  *		flag	- this argument is a pass through to ddi_copyxxx()
27359  *		          directly from the mode argument of ioctl().
27360  *
27361  * Return Code: the code returned by sd_send_scsi_cmd()
27362  *		EFAULT if ddi_copyxxx() fails
27363  *		ENXIO if fail ddi_get_soft_state
27364  *		EINVAL if data pointer is NULL
27365  */
27366 
27367 static int
27368 sr_play_msf(dev_t dev, caddr_t data, int flag)
27369 {
27370 	struct sd_lun		*un;
27371 	struct uscsi_cmd	*com;
27372 	struct cdrom_msf	msf_struct;
27373 	struct cdrom_msf	*msf = &msf_struct;
27374 	char			cdb[CDB_GROUP1];
27375 	int			rval;
27376 
27377 	if (data == NULL) {
27378 		return (EINVAL);
27379 	}
27380 
27381 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27382 		return (ENXIO);
27383 	}
27384 
27385 	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
27386 		return (EFAULT);
27387 	}
27388 
27389 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27390 	bzero(cdb, CDB_GROUP1);
27391 	cdb[0] = SCMD_PLAYAUDIO_MSF;
27392 	if (un->un_f_cfg_playmsf_bcd == TRUE) {
27393 		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
27394 		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
27395 		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
27396 		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
27397 		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
27398 		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
27399 	} else {
27400 		cdb[3] = msf->cdmsf_min0;
27401 		cdb[4] = msf->cdmsf_sec0;
27402 		cdb[5] = msf->cdmsf_frame0;
27403 		cdb[6] = msf->cdmsf_min1;
27404 		cdb[7] = msf->cdmsf_sec1;
27405 		cdb[8] = msf->cdmsf_frame1;
27406 	}
27407 	com->uscsi_cdb    = cdb;
27408 	com->uscsi_cdblen = CDB_GROUP1;
27409 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27410 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27411 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27412 	kmem_free(com, sizeof (*com));
27413 	return (rval);
27414 }
27415 
27416 
27417 /*
27418  *    Function: sr_play_trkind()
27419  *
27420  * Description: This routine is the driver entry point for handling CD-ROM
27421  *		ioctl requests to output the audio signals at the specified
27422  *		starting address and continue the audio play until the specified
27423  *		ending address (CDROMPLAYTRKIND). The address is in Track Index
27424  *		format.
27425  *
27426  *   Arguments: dev	- the device 'dev_t'
27427  *		data	- pointer to user provided audio track/index structure,
27428  *		          specifying start/end addresses.
27429  *		flag	- this argument is a pass through to ddi_copyxxx()
27430  *		          directly from the mode argument of ioctl().
27431  *
27432  * Return Code: the code returned by sd_send_scsi_cmd()
27433  *		EFAULT if ddi_copyxxx() fails
27434  *		ENXIO if fail ddi_get_soft_state
27435  *		EINVAL if data pointer is NULL
27436  */
27437 
27438 static int
27439 sr_play_trkind(dev_t dev, caddr_t data, int flag)
27440 {
27441 	struct cdrom_ti		ti_struct;
27442 	struct cdrom_ti		*ti = &ti_struct;
27443 	struct uscsi_cmd	*com = NULL;
27444 	char			cdb[CDB_GROUP1];
27445 	int			rval;
27446 
27447 	if (data == NULL) {
27448 		return (EINVAL);
27449 	}
27450 
27451 	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
27452 		return (EFAULT);
27453 	}
27454 
27455 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27456 	bzero(cdb, CDB_GROUP1);
27457 	cdb[0] = SCMD_PLAYAUDIO_TI;
27458 	cdb[4] = ti->cdti_trk0;
27459 	cdb[5] = ti->cdti_ind0;
27460 	cdb[7] = ti->cdti_trk1;
27461 	cdb[8] = ti->cdti_ind1;
27462 	com->uscsi_cdb    = cdb;
27463 	com->uscsi_cdblen = CDB_GROUP1;
27464 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27465 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27466 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27467 	kmem_free(com, sizeof (*com));
27468 	return (rval);
27469 }
27470 
27471 
27472 /*
27473  *    Function: sr_read_all_subcodes()
27474  *
27475  * Description: This routine is the driver entry point for handling CD-ROM
27476  *		ioctl requests to return raw subcode data while the target is
27477  *		playing audio (CDROMSUBCODE).
27478  *
27479  *   Arguments: dev	- the device 'dev_t'
27480  *		data	- pointer to user provided cdrom subcode structure,
27481  *		          specifying the transfer length and address.
27482  *		flag	- this argument is a pass through to ddi_copyxxx()
27483  *		          directly from the mode argument of ioctl().
27484  *
27485  * Return Code: the code returned by sd_send_scsi_cmd()
27486  *		EFAULT if ddi_copyxxx() fails
27487  *		ENXIO if fail ddi_get_soft_state
27488  *		EINVAL if data pointer is NULL
27489  */
27490 
27491 static int
27492 sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
27493 {
27494 	struct sd_lun		*un = NULL;
27495 	struct uscsi_cmd	*com = NULL;
27496 	struct cdrom_subcode	*subcode = NULL;
27497 	int			rval;
27498 	size_t			buflen;
27499 	char			cdb[CDB_GROUP5];
27500 
27501 #ifdef _MULTI_DATAMODEL
27502 	/* To support ILP32 applications in an LP64 world */
27503 	struct cdrom_subcode32		cdrom_subcode32;
27504 	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
27505 #endif
27506 	if (data == NULL) {
27507 		return (EINVAL);
27508 	}
27509 
27510 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27511 		return (ENXIO);
27512 	}
27513 
27514 	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
27515 
27516 #ifdef _MULTI_DATAMODEL
27517 	switch (ddi_model_convert_from(flag & FMODELS)) {
27518 	case DDI_MODEL_ILP32:
27519 		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
27520 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27521 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27522 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27523 			return (EFAULT);
27524 		}
27525 		/* Convert the ILP32 uscsi data from the application to LP64 */
27526 		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
27527 		break;
27528 	case DDI_MODEL_NONE:
27529 		if (ddi_copyin(data, subcode,
27530 		    sizeof (struct cdrom_subcode), flag)) {
27531 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27532 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27533 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27534 			return (EFAULT);
27535 		}
27536 		break;
27537 	}
27538 #else /* ! _MULTI_DATAMODEL */
27539 	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
27540 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27541 		    "sr_read_all_subcodes: ddi_copyin Failed\n");
27542 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27543 		return (EFAULT);
27544 	}
27545 #endif /* _MULTI_DATAMODEL */
27546 
27547 	/*
27548 	 * Since MMC-2 expects max 3 bytes for length, check if the
27549 	 * length input is greater than 3 bytes
27550 	 */
27551 	if ((subcode->cdsc_length & 0xFF000000) != 0) {
27552 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27553 		    "sr_read_all_subcodes: "
27554 		    "cdrom transfer length too large: %d (limit %d)\n",
27555 		    subcode->cdsc_length, 0xFFFFFF);
27556 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27557 		return (EINVAL);
27558 	}
27559 
27560 	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
27561 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27562 	bzero(cdb, CDB_GROUP5);
27563 
27564 	if (un->un_f_mmc_cap == TRUE) {
27565 		cdb[0] = (char)SCMD_READ_CD;
27566 		cdb[2] = (char)0xff;
27567 		cdb[3] = (char)0xff;
27568 		cdb[4] = (char)0xff;
27569 		cdb[5] = (char)0xff;
27570 		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
27571 		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
27572 		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
27573 		cdb[10] = 1;
27574 	} else {
27575 		/*
27576 		 * Note: A vendor specific command (0xDF) is being used her to
27577 		 * request a read of all subcodes.
27578 		 */
27579 		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
27580 		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
27581 		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
27582 		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
27583 		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
27584 	}
27585 	com->uscsi_cdb	   = cdb;
27586 	com->uscsi_cdblen  = CDB_GROUP5;
27587 	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
27588 	com->uscsi_buflen  = buflen;
27589 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27590 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
27591 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27592 	kmem_free(subcode, sizeof (struct cdrom_subcode));
27593 	kmem_free(com, sizeof (*com));
27594 	return (rval);
27595 }
27596 
27597 
27598 /*
27599  *    Function: sr_read_subchannel()
27600  *
27601  * Description: This routine is the driver entry point for handling CD-ROM
27602  *		ioctl requests to return the Q sub-channel data of the CD
27603  *		current position block. (CDROMSUBCHNL) The data includes the
27604  *		track number, index number, absolute CD-ROM address (LBA or MSF
27605  *		format per the user) , track relative CD-ROM address (LBA or MSF
27606  *		format per the user), control data and audio status.
27607  *
27608  *   Arguments: dev	- the device 'dev_t'
27609  *		data	- pointer to user provided cdrom sub-channel structure
27610  *		flag	- this argument is a pass through to ddi_copyxxx()
27611  *		          directly from the mode argument of ioctl().
27612  *
27613  * Return Code: the code returned by sd_send_scsi_cmd()
27614  *		EFAULT if ddi_copyxxx() fails
27615  *		ENXIO if fail ddi_get_soft_state
27616  *		EINVAL if data pointer is NULL
27617  */
27618 
27619 static int
27620 sr_read_subchannel(dev_t dev, caddr_t data, int flag)
27621 {
27622 	struct sd_lun		*un;
27623 	struct uscsi_cmd	*com;
27624 	struct cdrom_subchnl	subchanel;
27625 	struct cdrom_subchnl	*subchnl = &subchanel;
27626 	char			cdb[CDB_GROUP1];
27627 	caddr_t			buffer;
27628 	int			rval;
27629 
27630 	if (data == NULL) {
27631 		return (EINVAL);
27632 	}
27633 
27634 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27635 	    (un->un_state == SD_STATE_OFFLINE)) {
27636 		return (ENXIO);
27637 	}
27638 
27639 	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
27640 		return (EFAULT);
27641 	}
27642 
27643 	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
27644 	bzero(cdb, CDB_GROUP1);
27645 	cdb[0] = SCMD_READ_SUBCHANNEL;
27646 	/* Set the MSF bit based on the user requested address format */
27647 	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
27648 	/*
27649 	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
27650 	 * returned
27651 	 */
27652 	cdb[2] = 0x40;
27653 	/*
27654 	 * Set byte 3 to specify the return data format. A value of 0x01
27655 	 * indicates that the CD-ROM current position should be returned.
27656 	 */
27657 	cdb[3] = 0x01;
27658 	cdb[8] = 0x10;
27659 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27660 	com->uscsi_cdb	   = cdb;
27661 	com->uscsi_cdblen  = CDB_GROUP1;
27662 	com->uscsi_bufaddr = buffer;
27663 	com->uscsi_buflen  = 16;
27664 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27665 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27666 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27667 	if (rval != 0) {
27668 		kmem_free(buffer, 16);
27669 		kmem_free(com, sizeof (*com));
27670 		return (rval);
27671 	}
27672 
27673 	/* Process the returned Q sub-channel data */
27674 	subchnl->cdsc_audiostatus = buffer[1];
27675 	subchnl->cdsc_adr	= (buffer[5] & 0xF0);
27676 	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
27677 	subchnl->cdsc_trk	= buffer[6];
27678 	subchnl->cdsc_ind	= buffer[7];
27679 	if (subchnl->cdsc_format & CDROM_LBA) {
27680 		subchnl->cdsc_absaddr.lba =
27681 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
27682 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
27683 		subchnl->cdsc_reladdr.lba =
27684 		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
27685 		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
27686 	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
27687 		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
27688 		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
27689 		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
27690 		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
27691 		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
27692 		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
27693 	} else {
27694 		subchnl->cdsc_absaddr.msf.minute = buffer[9];
27695 		subchnl->cdsc_absaddr.msf.second = buffer[10];
27696 		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
27697 		subchnl->cdsc_reladdr.msf.minute = buffer[13];
27698 		subchnl->cdsc_reladdr.msf.second = buffer[14];
27699 		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
27700 	}
27701 	kmem_free(buffer, 16);
27702 	kmem_free(com, sizeof (*com));
27703 	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
27704 	    != 0) {
27705 		return (EFAULT);
27706 	}
27707 	return (rval);
27708 }
27709 
27710 
27711 /*
27712  *    Function: sr_read_tocentry()
27713  *
27714  * Description: This routine is the driver entry point for handling CD-ROM
27715  *		ioctl requests to read from the Table of Contents (TOC)
27716  *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
27717  *		fields, the starting address (LBA or MSF format per the user)
27718  *		and the data mode if the user specified track is a data track.
27719  *
27720  *		Note: The READ HEADER (0x44) command used in this routine is
27721  *		obsolete per the SCSI MMC spec but still supported in the
27722  *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
27723  *		therefore the command is still implemented in this routine.
27724  *
27725  *   Arguments: dev	- the device 'dev_t'
27726  *		data	- pointer to user provided toc entry structure,
27727  *			  specifying the track # and the address format
27728  *			  (LBA or MSF).
27729  *		flag	- this argument is a pass through to ddi_copyxxx()
27730  *		          directly from the mode argument of ioctl().
27731  *
27732  * Return Code: the code returned by sd_send_scsi_cmd()
27733  *		EFAULT if ddi_copyxxx() fails
27734  *		ENXIO if fail ddi_get_soft_state
27735  *		EINVAL if data pointer is NULL
27736  */
27737 
27738 static int
27739 sr_read_tocentry(dev_t dev, caddr_t data, int flag)
27740 {
27741 	struct sd_lun		*un = NULL;
27742 	struct uscsi_cmd	*com;
27743 	struct cdrom_tocentry	toc_entry;
27744 	struct cdrom_tocentry	*entry = &toc_entry;
27745 	caddr_t			buffer;
27746 	int			rval;
27747 	char			cdb[CDB_GROUP1];
27748 
27749 	if (data == NULL) {
27750 		return (EINVAL);
27751 	}
27752 
27753 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27754 	    (un->un_state == SD_STATE_OFFLINE)) {
27755 		return (ENXIO);
27756 	}
27757 
27758 	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
27759 		return (EFAULT);
27760 	}
27761 
27762 	/* Validate the requested track and address format */
27763 	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
27764 		return (EINVAL);
27765 	}
27766 
27767 	if (entry->cdte_track == 0) {
27768 		return (EINVAL);
27769 	}
27770 
27771 	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
27772 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27773 	bzero(cdb, CDB_GROUP1);
27774 
27775 	cdb[0] = SCMD_READ_TOC;
27776 	/* Set the MSF bit based on the user requested address format  */
27777 	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
27778 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
27779 		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
27780 	} else {
27781 		cdb[6] = entry->cdte_track;
27782 	}
27783 
27784 	/*
27785 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
27786 	 * (4 byte TOC response header + 8 byte track descriptor)
27787 	 */
27788 	cdb[8] = 12;
27789 	com->uscsi_cdb	   = cdb;
27790 	com->uscsi_cdblen  = CDB_GROUP1;
27791 	com->uscsi_bufaddr = buffer;
27792 	com->uscsi_buflen  = 0x0C;
27793 	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
27794 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27795 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27796 	if (rval != 0) {
27797 		kmem_free(buffer, 12);
27798 		kmem_free(com, sizeof (*com));
27799 		return (rval);
27800 	}
27801 
27802 	/* Process the toc entry */
27803 	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
27804 	entry->cdte_ctrl	= (buffer[5] & 0x0F);
27805 	if (entry->cdte_format & CDROM_LBA) {
27806 		entry->cdte_addr.lba =
27807 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
27808 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
27809 	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
27810 		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
27811 		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
27812 		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
27813 		/*
27814 		 * Send a READ TOC command using the LBA address format to get
27815 		 * the LBA for the track requested so it can be used in the
27816 		 * READ HEADER request
27817 		 *
27818 		 * Note: The MSF bit of the READ HEADER command specifies the
27819 		 * output format. The block address specified in that command
27820 		 * must be in LBA format.
27821 		 */
27822 		cdb[1] = 0;
27823 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27824 		    UIO_SYSSPACE, SD_PATH_STANDARD);
27825 		if (rval != 0) {
27826 			kmem_free(buffer, 12);
27827 			kmem_free(com, sizeof (*com));
27828 			return (rval);
27829 		}
27830 	} else {
27831 		entry->cdte_addr.msf.minute	= buffer[9];
27832 		entry->cdte_addr.msf.second	= buffer[10];
27833 		entry->cdte_addr.msf.frame	= buffer[11];
27834 		/*
27835 		 * Send a READ TOC command using the LBA address format to get
27836 		 * the LBA for the track requested so it can be used in the
27837 		 * READ HEADER request
27838 		 *
27839 		 * Note: The MSF bit of the READ HEADER command specifies the
27840 		 * output format. The block address specified in that command
27841 		 * must be in LBA format.
27842 		 */
27843 		cdb[1] = 0;
27844 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27845 		    UIO_SYSSPACE, SD_PATH_STANDARD);
27846 		if (rval != 0) {
27847 			kmem_free(buffer, 12);
27848 			kmem_free(com, sizeof (*com));
27849 			return (rval);
27850 		}
27851 	}
27852 
27853 	/*
27854 	 * Build and send the READ HEADER command to determine the data mode of
27855 	 * the user specified track.
27856 	 */
27857 	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
27858 	    (entry->cdte_track != CDROM_LEADOUT)) {
27859 		bzero(cdb, CDB_GROUP1);
27860 		cdb[0] = SCMD_READ_HEADER;
27861 		cdb[2] = buffer[8];
27862 		cdb[3] = buffer[9];
27863 		cdb[4] = buffer[10];
27864 		cdb[5] = buffer[11];
27865 		cdb[8] = 0x08;
27866 		com->uscsi_buflen = 0x08;
27867 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27868 		    UIO_SYSSPACE, SD_PATH_STANDARD);
27869 		if (rval == 0) {
27870 			entry->cdte_datamode = buffer[0];
27871 		} else {
27872 			/*
27873 			 * READ HEADER command failed, since this is
27874 			 * obsoleted in one spec, its better to return
27875 			 * -1 for an invlid track so that we can still
27876 			 * recieve the rest of the TOC data.
27877 			 */
27878 			entry->cdte_datamode = (uchar_t)-1;
27879 		}
27880 	} else {
27881 		entry->cdte_datamode = (uchar_t)-1;
27882 	}
27883 
27884 	kmem_free(buffer, 12);
27885 	kmem_free(com, sizeof (*com));
27886 	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
27887 		return (EFAULT);
27888 
27889 	return (rval);
27890 }
27891 
27892 
27893 /*
27894  *    Function: sr_read_tochdr()
27895  *
27896  * Description: This routine is the driver entry point for handling CD-ROM
27897  * 		ioctl requests to read the Table of Contents (TOC) header
27898  *		(CDROMREADTOHDR). The TOC header consists of the disk starting
27899  *		and ending track numbers
27900  *
27901  *   Arguments: dev	- the device 'dev_t'
27902  *		data	- pointer to user provided toc header structure,
27903  *			  specifying the starting and ending track numbers.
27904  *		flag	- this argument is a pass through to ddi_copyxxx()
27905  *			  directly from the mode argument of ioctl().
27906  *
27907  * Return Code: the code returned by sd_send_scsi_cmd()
27908  *		EFAULT if ddi_copyxxx() fails
27909  *		ENXIO if fail ddi_get_soft_state
27910  *		EINVAL if data pointer is NULL
27911  */
27912 
27913 static int
27914 sr_read_tochdr(dev_t dev, caddr_t data, int flag)
27915 {
27916 	struct sd_lun		*un;
27917 	struct uscsi_cmd	*com;
27918 	struct cdrom_tochdr	toc_header;
27919 	struct cdrom_tochdr	*hdr = &toc_header;
27920 	char			cdb[CDB_GROUP1];
27921 	int			rval;
27922 	caddr_t			buffer;
27923 
27924 	if (data == NULL) {
27925 		return (EINVAL);
27926 	}
27927 
27928 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27929 	    (un->un_state == SD_STATE_OFFLINE)) {
27930 		return (ENXIO);
27931 	}
27932 
27933 	buffer = kmem_zalloc(4, KM_SLEEP);
27934 	bzero(cdb, CDB_GROUP1);
27935 	cdb[0] = SCMD_READ_TOC;
27936 	/*
27937 	 * Specifying a track number of 0x00 in the READ TOC command indicates
27938 	 * that the TOC header should be returned
27939 	 */
27940 	cdb[6] = 0x00;
27941 	/*
27942 	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
27943 	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
27944 	 */
27945 	cdb[8] = 0x04;
27946 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27947 	com->uscsi_cdb	   = cdb;
27948 	com->uscsi_cdblen  = CDB_GROUP1;
27949 	com->uscsi_bufaddr = buffer;
27950 	com->uscsi_buflen  = 0x04;
27951 	com->uscsi_timeout = 300;
27952 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27953 
27954 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27955 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27956 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
27957 		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
27958 		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
27959 	} else {
27960 		hdr->cdth_trk0 = buffer[2];
27961 		hdr->cdth_trk1 = buffer[3];
27962 	}
27963 	kmem_free(buffer, 4);
27964 	kmem_free(com, sizeof (*com));
27965 	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
27966 		return (EFAULT);
27967 	}
27968 	return (rval);
27969 }
27970 
27971 
27972 /*
27973  * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
27974  * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
27975  * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
27976  * digital audio and extended architecture digital audio. These modes are
27977  * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
27978  * MMC specs.
27979  *
27980  * In addition to support for the various data formats these routines also
27981  * include support for devices that implement only the direct access READ
27982  * commands (0x08, 0x28), devices that implement the READ_CD commands
27983  * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
27984  * READ CDXA commands (0xD8, 0xDB)
27985  */
27986 
27987 /*
27988  *    Function: sr_read_mode1()
27989  *
27990  * Description: This routine is the driver entry point for handling CD-ROM
27991  *		ioctl read mode1 requests (CDROMREADMODE1).
27992  *
27993  *   Arguments: dev	- the device 'dev_t'
27994  *		data	- pointer to user provided cd read structure specifying
27995  *			  the lba buffer address and length.
27996  *		flag	- this argument is a pass through to ddi_copyxxx()
27997  *			  directly from the mode argument of ioctl().
27998  *
27999  * Return Code: the code returned by sd_send_scsi_cmd()
28000  *		EFAULT if ddi_copyxxx() fails
28001  *		ENXIO if fail ddi_get_soft_state
28002  *		EINVAL if data pointer is NULL
28003  */
28004 
28005 static int
28006 sr_read_mode1(dev_t dev, caddr_t data, int flag)
28007 {
28008 	struct sd_lun		*un;
28009 	struct cdrom_read	mode1_struct;
28010 	struct cdrom_read	*mode1 = &mode1_struct;
28011 	int			rval;
28012 #ifdef _MULTI_DATAMODEL
28013 	/* To support ILP32 applications in an LP64 world */
28014 	struct cdrom_read32	cdrom_read32;
28015 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28016 #endif /* _MULTI_DATAMODEL */
28017 
28018 	if (data == NULL) {
28019 		return (EINVAL);
28020 	}
28021 
28022 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28023 	    (un->un_state == SD_STATE_OFFLINE)) {
28024 		return (ENXIO);
28025 	}
28026 
28027 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28028 	    "sd_read_mode1: entry: un:0x%p\n", un);
28029 
28030 #ifdef _MULTI_DATAMODEL
28031 	switch (ddi_model_convert_from(flag & FMODELS)) {
28032 	case DDI_MODEL_ILP32:
28033 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28034 			return (EFAULT);
28035 		}
28036 		/* Convert the ILP32 uscsi data from the application to LP64 */
28037 		cdrom_read32tocdrom_read(cdrd32, mode1);
28038 		break;
28039 	case DDI_MODEL_NONE:
28040 		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28041 			return (EFAULT);
28042 		}
28043 	}
28044 #else /* ! _MULTI_DATAMODEL */
28045 	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28046 		return (EFAULT);
28047 	}
28048 #endif /* _MULTI_DATAMODEL */
28049 
28050 	rval = sd_send_scsi_READ(un, mode1->cdread_bufaddr,
28051 	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
28052 
28053 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28054 	    "sd_read_mode1: exit: un:0x%p\n", un);
28055 
28056 	return (rval);
28057 }
28058 
28059 
28060 /*
28061  *    Function: sr_read_cd_mode2()
28062  *
28063  * Description: This routine is the driver entry point for handling CD-ROM
28064  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28065  *		support the READ CD (0xBE) command or the 1st generation
28066  *		READ CD (0xD4) command.
28067  *
28068  *   Arguments: dev	- the device 'dev_t'
28069  *		data	- pointer to user provided cd read structure specifying
28070  *			  the lba buffer address and length.
28071  *		flag	- this argument is a pass through to ddi_copyxxx()
28072  *			  directly from the mode argument of ioctl().
28073  *
28074  * Return Code: the code returned by sd_send_scsi_cmd()
28075  *		EFAULT if ddi_copyxxx() fails
28076  *		ENXIO if fail ddi_get_soft_state
28077  *		EINVAL if data pointer is NULL
28078  */
28079 
28080 static int
28081 sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
28082 {
28083 	struct sd_lun		*un;
28084 	struct uscsi_cmd	*com;
28085 	struct cdrom_read	mode2_struct;
28086 	struct cdrom_read	*mode2 = &mode2_struct;
28087 	uchar_t			cdb[CDB_GROUP5];
28088 	int			nblocks;
28089 	int			rval;
28090 #ifdef _MULTI_DATAMODEL
28091 	/*  To support ILP32 applications in an LP64 world */
28092 	struct cdrom_read32	cdrom_read32;
28093 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28094 #endif /* _MULTI_DATAMODEL */
28095 
28096 	if (data == NULL) {
28097 		return (EINVAL);
28098 	}
28099 
28100 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28101 	    (un->un_state == SD_STATE_OFFLINE)) {
28102 		return (ENXIO);
28103 	}
28104 
28105 #ifdef _MULTI_DATAMODEL
28106 	switch (ddi_model_convert_from(flag & FMODELS)) {
28107 	case DDI_MODEL_ILP32:
28108 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28109 			return (EFAULT);
28110 		}
28111 		/* Convert the ILP32 uscsi data from the application to LP64 */
28112 		cdrom_read32tocdrom_read(cdrd32, mode2);
28113 		break;
28114 	case DDI_MODEL_NONE:
28115 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28116 			return (EFAULT);
28117 		}
28118 		break;
28119 	}
28120 
28121 #else /* ! _MULTI_DATAMODEL */
28122 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28123 		return (EFAULT);
28124 	}
28125 #endif /* _MULTI_DATAMODEL */
28126 
28127 	bzero(cdb, sizeof (cdb));
28128 	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
28129 		/* Read command supported by 1st generation atapi drives */
28130 		cdb[0] = SCMD_READ_CDD4;
28131 	} else {
28132 		/* Universal CD Access Command */
28133 		cdb[0] = SCMD_READ_CD;
28134 	}
28135 
28136 	/*
28137 	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
28138 	 */
28139 	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
28140 
28141 	/* set the start address */
28142 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
28143 	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
28144 	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28145 	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
28146 
28147 	/* set the transfer length */
28148 	nblocks = mode2->cdread_buflen / 2336;
28149 	cdb[6] = (uchar_t)(nblocks >> 16);
28150 	cdb[7] = (uchar_t)(nblocks >> 8);
28151 	cdb[8] = (uchar_t)nblocks;
28152 
28153 	/* set the filter bits */
28154 	cdb[9] = CDROM_READ_CD_USERDATA;
28155 
28156 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28157 	com->uscsi_cdb = (caddr_t)cdb;
28158 	com->uscsi_cdblen = sizeof (cdb);
28159 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28160 	com->uscsi_buflen = mode2->cdread_buflen;
28161 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28162 
28163 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28164 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28165 	kmem_free(com, sizeof (*com));
28166 	return (rval);
28167 }
28168 
28169 
28170 /*
28171  *    Function: sr_read_mode2()
28172  *
28173  * Description: This routine is the driver entry point for handling CD-ROM
28174  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28175  *		do not support the READ CD (0xBE) command.
28176  *
28177  *   Arguments: dev	- the device 'dev_t'
28178  *		data	- pointer to user provided cd read structure specifying
28179  *			  the lba buffer address and length.
28180  *		flag	- this argument is a pass through to ddi_copyxxx()
28181  *			  directly from the mode argument of ioctl().
28182  *
28183  * Return Code: the code returned by sd_send_scsi_cmd()
28184  *		EFAULT if ddi_copyxxx() fails
28185  *		ENXIO if fail ddi_get_soft_state
28186  *		EINVAL if data pointer is NULL
28187  *		EIO if fail to reset block size
28188  *		EAGAIN if commands are in progress in the driver
28189  */
28190 
28191 static int
28192 sr_read_mode2(dev_t dev, caddr_t data, int flag)
28193 {
28194 	struct sd_lun		*un;
28195 	struct cdrom_read	mode2_struct;
28196 	struct cdrom_read	*mode2 = &mode2_struct;
28197 	int			rval;
28198 	uint32_t		restore_blksize;
28199 	struct uscsi_cmd	*com;
28200 	uchar_t			cdb[CDB_GROUP0];
28201 	int			nblocks;
28202 
28203 #ifdef _MULTI_DATAMODEL
28204 	/* To support ILP32 applications in an LP64 world */
28205 	struct cdrom_read32	cdrom_read32;
28206 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28207 #endif /* _MULTI_DATAMODEL */
28208 
28209 	if (data == NULL) {
28210 		return (EINVAL);
28211 	}
28212 
28213 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28214 	    (un->un_state == SD_STATE_OFFLINE)) {
28215 		return (ENXIO);
28216 	}
28217 
28218 	/*
28219 	 * Because this routine will update the device and driver block size
28220 	 * being used we want to make sure there are no commands in progress.
28221 	 * If commands are in progress the user will have to try again.
28222 	 *
28223 	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
28224 	 * in sdioctl to protect commands from sdioctl through to the top of
28225 	 * sd_uscsi_strategy. See sdioctl for details.
28226 	 */
28227 	mutex_enter(SD_MUTEX(un));
28228 	if (un->un_ncmds_in_driver != 1) {
28229 		mutex_exit(SD_MUTEX(un));
28230 		return (EAGAIN);
28231 	}
28232 	mutex_exit(SD_MUTEX(un));
28233 
28234 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28235 	    "sd_read_mode2: entry: un:0x%p\n", un);
28236 
28237 #ifdef _MULTI_DATAMODEL
28238 	switch (ddi_model_convert_from(flag & FMODELS)) {
28239 	case DDI_MODEL_ILP32:
28240 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28241 			return (EFAULT);
28242 		}
28243 		/* Convert the ILP32 uscsi data from the application to LP64 */
28244 		cdrom_read32tocdrom_read(cdrd32, mode2);
28245 		break;
28246 	case DDI_MODEL_NONE:
28247 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28248 			return (EFAULT);
28249 		}
28250 		break;
28251 	}
28252 #else /* ! _MULTI_DATAMODEL */
28253 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
28254 		return (EFAULT);
28255 	}
28256 #endif /* _MULTI_DATAMODEL */
28257 
28258 	/* Store the current target block size for restoration later */
28259 	restore_blksize = un->un_tgt_blocksize;
28260 
28261 	/* Change the device and soft state target block size to 2336 */
28262 	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
28263 		rval = EIO;
28264 		goto done;
28265 	}
28266 
28267 
28268 	bzero(cdb, sizeof (cdb));
28269 
28270 	/* set READ operation */
28271 	cdb[0] = SCMD_READ;
28272 
28273 	/* adjust lba for 2kbyte blocks from 512 byte blocks */
28274 	mode2->cdread_lba >>= 2;
28275 
28276 	/* set the start address */
28277 	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
28278 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28279 	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
28280 
28281 	/* set the transfer length */
28282 	nblocks = mode2->cdread_buflen / 2336;
28283 	cdb[4] = (uchar_t)nblocks & 0xFF;
28284 
28285 	/* build command */
28286 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28287 	com->uscsi_cdb = (caddr_t)cdb;
28288 	com->uscsi_cdblen = sizeof (cdb);
28289 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28290 	com->uscsi_buflen = mode2->cdread_buflen;
28291 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28292 
28293 	/*
28294 	 * Issue SCSI command with user space address for read buffer.
28295 	 *
28296 	 * This sends the command through main channel in the driver.
28297 	 *
28298 	 * Since this is accessed via an IOCTL call, we go through the
28299 	 * standard path, so that if the device was powered down, then
28300 	 * it would be 'awakened' to handle the command.
28301 	 */
28302 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28303 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28304 
28305 	kmem_free(com, sizeof (*com));
28306 
28307 	/* Restore the device and soft state target block size */
28308 	if (sr_sector_mode(dev, restore_blksize) != 0) {
28309 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28310 		    "can't do switch back to mode 1\n");
28311 		/*
28312 		 * If sd_send_scsi_READ succeeded we still need to report
28313 		 * an error because we failed to reset the block size
28314 		 */
28315 		if (rval == 0) {
28316 			rval = EIO;
28317 		}
28318 	}
28319 
28320 done:
28321 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28322 	    "sd_read_mode2: exit: un:0x%p\n", un);
28323 
28324 	return (rval);
28325 }
28326 
28327 
28328 /*
28329  *    Function: sr_sector_mode()
28330  *
28331  * Description: This utility function is used by sr_read_mode2 to set the target
28332  *		block size based on the user specified size. This is a legacy
28333  *		implementation based upon a vendor specific mode page
28334  *
28335  *   Arguments: dev	- the device 'dev_t'
28336  *		data	- flag indicating if block size is being set to 2336 or
28337  *			  512.
28338  *
28339  * Return Code: the code returned by sd_send_scsi_cmd()
28340  *		EFAULT if ddi_copyxxx() fails
28341  *		ENXIO if fail ddi_get_soft_state
28342  *		EINVAL if data pointer is NULL
28343  */
28344 
28345 static int
28346 sr_sector_mode(dev_t dev, uint32_t blksize)
28347 {
28348 	struct sd_lun	*un;
28349 	uchar_t		*sense;
28350 	uchar_t		*select;
28351 	int		rval;
28352 
28353 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28354 	    (un->un_state == SD_STATE_OFFLINE)) {
28355 		return (ENXIO);
28356 	}
28357 
28358 	sense = kmem_zalloc(20, KM_SLEEP);
28359 
28360 	/* Note: This is a vendor specific mode page (0x81) */
28361 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, 20, 0x81,
28362 	    SD_PATH_STANDARD)) != 0) {
28363 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28364 		    "sr_sector_mode: Mode Sense failed\n");
28365 		kmem_free(sense, 20);
28366 		return (rval);
28367 	}
28368 	select = kmem_zalloc(20, KM_SLEEP);
28369 	select[3] = 0x08;
28370 	select[10] = ((blksize >> 8) & 0xff);
28371 	select[11] = (blksize & 0xff);
28372 	select[12] = 0x01;
28373 	select[13] = 0x06;
28374 	select[14] = sense[14];
28375 	select[15] = sense[15];
28376 	if (blksize == SD_MODE2_BLKSIZE) {
28377 		select[14] |= 0x01;
28378 	}
28379 
28380 	if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select, 20,
28381 	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
28382 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28383 		    "sr_sector_mode: Mode Select failed\n");
28384 	} else {
28385 		/*
28386 		 * Only update the softstate block size if we successfully
28387 		 * changed the device block mode.
28388 		 */
28389 		mutex_enter(SD_MUTEX(un));
28390 		sd_update_block_info(un, blksize, 0);
28391 		mutex_exit(SD_MUTEX(un));
28392 	}
28393 	kmem_free(sense, 20);
28394 	kmem_free(select, 20);
28395 	return (rval);
28396 }
28397 
28398 
28399 /*
28400  *    Function: sr_read_cdda()
28401  *
28402  * Description: This routine is the driver entry point for handling CD-ROM
28403  *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
28404  *		the target supports CDDA these requests are handled via a vendor
28405  *		specific command (0xD8) If the target does not support CDDA
28406  *		these requests are handled via the READ CD command (0xBE).
28407  *
28408  *   Arguments: dev	- the device 'dev_t'
28409  *		data	- pointer to user provided CD-DA structure specifying
28410  *			  the track starting address, transfer length, and
28411  *			  subcode options.
28412  *		flag	- this argument is a pass through to ddi_copyxxx()
28413  *			  directly from the mode argument of ioctl().
28414  *
28415  * Return Code: the code returned by sd_send_scsi_cmd()
28416  *		EFAULT if ddi_copyxxx() fails
28417  *		ENXIO if fail ddi_get_soft_state
28418  *		EINVAL if invalid arguments are provided
28419  *		ENOTTY
28420  */
28421 
28422 static int
28423 sr_read_cdda(dev_t dev, caddr_t data, int flag)
28424 {
28425 	struct sd_lun			*un;
28426 	struct uscsi_cmd		*com;
28427 	struct cdrom_cdda		*cdda;
28428 	int				rval;
28429 	size_t				buflen;
28430 	char				cdb[CDB_GROUP5];
28431 
28432 #ifdef _MULTI_DATAMODEL
28433 	/* To support ILP32 applications in an LP64 world */
28434 	struct cdrom_cdda32	cdrom_cdda32;
28435 	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
28436 #endif /* _MULTI_DATAMODEL */
28437 
28438 	if (data == NULL) {
28439 		return (EINVAL);
28440 	}
28441 
28442 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28443 		return (ENXIO);
28444 	}
28445 
28446 	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
28447 
28448 #ifdef _MULTI_DATAMODEL
28449 	switch (ddi_model_convert_from(flag & FMODELS)) {
28450 	case DDI_MODEL_ILP32:
28451 		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
28452 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28453 			    "sr_read_cdda: ddi_copyin Failed\n");
28454 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28455 			return (EFAULT);
28456 		}
28457 		/* Convert the ILP32 uscsi data from the application to LP64 */
28458 		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
28459 		break;
28460 	case DDI_MODEL_NONE:
28461 		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28462 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28463 			    "sr_read_cdda: ddi_copyin Failed\n");
28464 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28465 			return (EFAULT);
28466 		}
28467 		break;
28468 	}
28469 #else /* ! _MULTI_DATAMODEL */
28470 	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28471 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28472 		    "sr_read_cdda: ddi_copyin Failed\n");
28473 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28474 		return (EFAULT);
28475 	}
28476 #endif /* _MULTI_DATAMODEL */
28477 
28478 	/*
28479 	 * Since MMC-2 expects max 3 bytes for length, check if the
28480 	 * length input is greater than 3 bytes
28481 	 */
28482 	if ((cdda->cdda_length & 0xFF000000) != 0) {
28483 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
28484 		    "cdrom transfer length too large: %d (limit %d)\n",
28485 		    cdda->cdda_length, 0xFFFFFF);
28486 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28487 		return (EINVAL);
28488 	}
28489 
28490 	switch (cdda->cdda_subcode) {
28491 	case CDROM_DA_NO_SUBCODE:
28492 		buflen = CDROM_BLK_2352 * cdda->cdda_length;
28493 		break;
28494 	case CDROM_DA_SUBQ:
28495 		buflen = CDROM_BLK_2368 * cdda->cdda_length;
28496 		break;
28497 	case CDROM_DA_ALL_SUBCODE:
28498 		buflen = CDROM_BLK_2448 * cdda->cdda_length;
28499 		break;
28500 	case CDROM_DA_SUBCODE_ONLY:
28501 		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
28502 		break;
28503 	default:
28504 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28505 		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
28506 		    cdda->cdda_subcode);
28507 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28508 		return (EINVAL);
28509 	}
28510 
28511 	/* Build and send the command */
28512 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28513 	bzero(cdb, CDB_GROUP5);
28514 
28515 	if (un->un_f_cfg_cdda == TRUE) {
28516 		cdb[0] = (char)SCMD_READ_CD;
28517 		cdb[1] = 0x04;
28518 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28519 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28520 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28521 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28522 		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28523 		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28524 		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
28525 		cdb[9] = 0x10;
28526 		switch (cdda->cdda_subcode) {
28527 		case CDROM_DA_NO_SUBCODE :
28528 			cdb[10] = 0x0;
28529 			break;
28530 		case CDROM_DA_SUBQ :
28531 			cdb[10] = 0x2;
28532 			break;
28533 		case CDROM_DA_ALL_SUBCODE :
28534 			cdb[10] = 0x1;
28535 			break;
28536 		case CDROM_DA_SUBCODE_ONLY :
28537 			/* FALLTHROUGH */
28538 		default :
28539 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28540 			kmem_free(com, sizeof (*com));
28541 			return (ENOTTY);
28542 		}
28543 	} else {
28544 		cdb[0] = (char)SCMD_READ_CDDA;
28545 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28546 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28547 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28548 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28549 		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
28550 		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28551 		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28552 		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
28553 		cdb[10] = cdda->cdda_subcode;
28554 	}
28555 
28556 	com->uscsi_cdb = cdb;
28557 	com->uscsi_cdblen = CDB_GROUP5;
28558 	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
28559 	com->uscsi_buflen = buflen;
28560 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28561 
28562 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28563 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28564 
28565 	kmem_free(cdda, sizeof (struct cdrom_cdda));
28566 	kmem_free(com, sizeof (*com));
28567 	return (rval);
28568 }
28569 
28570 
28571 /*
28572  *    Function: sr_read_cdxa()
28573  *
28574  * Description: This routine is the driver entry point for handling CD-ROM
28575  *		ioctl requests to return CD-XA (Extended Architecture) data.
28576  *		(CDROMCDXA).
28577  *
28578  *   Arguments: dev	- the device 'dev_t'
28579  *		data	- pointer to user provided CD-XA structure specifying
28580  *			  the data starting address, transfer length, and format
28581  *		flag	- this argument is a pass through to ddi_copyxxx()
28582  *			  directly from the mode argument of ioctl().
28583  *
28584  * Return Code: the code returned by sd_send_scsi_cmd()
28585  *		EFAULT if ddi_copyxxx() fails
28586  *		ENXIO if fail ddi_get_soft_state
28587  *		EINVAL if data pointer is NULL
28588  */
28589 
28590 static int
28591 sr_read_cdxa(dev_t dev, caddr_t data, int flag)
28592 {
28593 	struct sd_lun		*un;
28594 	struct uscsi_cmd	*com;
28595 	struct cdrom_cdxa	*cdxa;
28596 	int			rval;
28597 	size_t			buflen;
28598 	char			cdb[CDB_GROUP5];
28599 	uchar_t			read_flags;
28600 
28601 #ifdef _MULTI_DATAMODEL
28602 	/* To support ILP32 applications in an LP64 world */
28603 	struct cdrom_cdxa32		cdrom_cdxa32;
28604 	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
28605 #endif /* _MULTI_DATAMODEL */
28606 
28607 	if (data == NULL) {
28608 		return (EINVAL);
28609 	}
28610 
28611 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28612 		return (ENXIO);
28613 	}
28614 
28615 	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
28616 
28617 #ifdef _MULTI_DATAMODEL
28618 	switch (ddi_model_convert_from(flag & FMODELS)) {
28619 	case DDI_MODEL_ILP32:
28620 		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
28621 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28622 			return (EFAULT);
28623 		}
28624 		/*
28625 		 * Convert the ILP32 uscsi data from the
28626 		 * application to LP64 for internal use.
28627 		 */
28628 		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
28629 		break;
28630 	case DDI_MODEL_NONE:
28631 		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
28632 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28633 			return (EFAULT);
28634 		}
28635 		break;
28636 	}
28637 #else /* ! _MULTI_DATAMODEL */
28638 	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
28639 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28640 		return (EFAULT);
28641 	}
28642 #endif /* _MULTI_DATAMODEL */
28643 
28644 	/*
28645 	 * Since MMC-2 expects max 3 bytes for length, check if the
28646 	 * length input is greater than 3 bytes
28647 	 */
28648 	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
28649 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
28650 		    "cdrom transfer length too large: %d (limit %d)\n",
28651 		    cdxa->cdxa_length, 0xFFFFFF);
28652 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28653 		return (EINVAL);
28654 	}
28655 
28656 	switch (cdxa->cdxa_format) {
28657 	case CDROM_XA_DATA:
28658 		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
28659 		read_flags = 0x10;
28660 		break;
28661 	case CDROM_XA_SECTOR_DATA:
28662 		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
28663 		read_flags = 0xf8;
28664 		break;
28665 	case CDROM_XA_DATA_W_ERROR:
28666 		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
28667 		read_flags = 0xfc;
28668 		break;
28669 	default:
28670 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28671 		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
28672 		    cdxa->cdxa_format);
28673 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28674 		return (EINVAL);
28675 	}
28676 
28677 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28678 	bzero(cdb, CDB_GROUP5);
28679 	if (un->un_f_mmc_cap == TRUE) {
28680 		cdb[0] = (char)SCMD_READ_CD;
28681 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
28682 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
28683 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
28684 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
28685 		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
28686 		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
28687 		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
28688 		cdb[9] = (char)read_flags;
28689 	} else {
28690 		/*
28691 		 * Note: A vendor specific command (0xDB) is being used her to
28692 		 * request a read of all subcodes.
28693 		 */
28694 		cdb[0] = (char)SCMD_READ_CDXA;
28695 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
28696 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
28697 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
28698 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
28699 		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
28700 		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
28701 		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
28702 		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
28703 		cdb[10] = cdxa->cdxa_format;
28704 	}
28705 	com->uscsi_cdb	   = cdb;
28706 	com->uscsi_cdblen  = CDB_GROUP5;
28707 	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
28708 	com->uscsi_buflen  = buflen;
28709 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28710 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28711 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28712 	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28713 	kmem_free(com, sizeof (*com));
28714 	return (rval);
28715 }
28716 
28717 
28718 /*
28719  *    Function: sr_eject()
28720  *
28721  * Description: This routine is the driver entry point for handling CD-ROM
28722  *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
28723  *
28724  *   Arguments: dev	- the device 'dev_t'
28725  *
28726  * Return Code: the code returned by sd_send_scsi_cmd()
28727  */
28728 
28729 static int
28730 sr_eject(dev_t dev)
28731 {
28732 	struct sd_lun	*un;
28733 	int		rval;
28734 
28735 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28736 	    (un->un_state == SD_STATE_OFFLINE)) {
28737 		return (ENXIO);
28738 	}
28739 	if ((rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
28740 	    SD_PATH_STANDARD)) != 0) {
28741 		return (rval);
28742 	}
28743 
28744 	rval = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_EJECT,
28745 	    SD_PATH_STANDARD);
28746 
28747 	if (rval == 0) {
28748 		mutex_enter(SD_MUTEX(un));
28749 		sr_ejected(un);
28750 		un->un_mediastate = DKIO_EJECTED;
28751 		cv_broadcast(&un->un_state_cv);
28752 		mutex_exit(SD_MUTEX(un));
28753 	}
28754 	return (rval);
28755 }
28756 
28757 
28758 /*
28759  *    Function: sr_ejected()
28760  *
28761  * Description: This routine updates the soft state structure to invalidate the
28762  *		geometry information after the media has been ejected or a
28763  *		media eject has been detected.
28764  *
28765  *   Arguments: un - driver soft state (unit) structure
28766  */
28767 
28768 static void
28769 sr_ejected(struct sd_lun *un)
28770 {
28771 	struct sd_errstats *stp;
28772 
28773 	ASSERT(un != NULL);
28774 	ASSERT(mutex_owned(SD_MUTEX(un)));
28775 
28776 	un->un_f_blockcount_is_valid	= FALSE;
28777 	un->un_f_tgt_blocksize_is_valid	= FALSE;
28778 	un->un_f_geometry_is_valid	= FALSE;
28779 
28780 	if (un->un_errstats != NULL) {
28781 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
28782 		stp->sd_capacity.value.ui64 = 0;
28783 	}
28784 }
28785 
28786 
28787 /*
28788  *    Function: sr_check_wp()
28789  *
28790  * Description: This routine checks the write protection of a removable media
28791  *		disk via the write protect bit of the Mode Page Header device
28792  *		specific field.  This routine has been implemented to use the
28793  *		error recovery mode page for all device types.
28794  *		Note: In the future use a sd_send_scsi_MODE_SENSE() routine
28795  *
28796  *   Arguments: dev		- the device 'dev_t'
28797  *
28798  * Return Code: int indicating if the device is write protected (1) or not (0)
28799  *
28800  *     Context: Kernel thread.
28801  *
28802  */
28803 
28804 static int
28805 sr_check_wp(dev_t dev)
28806 {
28807 	struct sd_lun	*un;
28808 	uchar_t		device_specific;
28809 	uchar_t		*sense;
28810 	int		hdrlen;
28811 	int		rval;
28812 	int		retry_flag = FALSE;
28813 
28814 	/*
28815 	 * Note: The return codes for this routine should be reworked to
28816 	 * properly handle the case of a NULL softstate.
28817 	 */
28818 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28819 		return (FALSE);
28820 	}
28821 
28822 	if (un->un_f_cfg_is_atapi == TRUE) {
28823 		retry_flag = TRUE;
28824 	}
28825 
28826 retry:
28827 	if (un->un_f_cfg_is_atapi == TRUE) {
28828 		/*
28829 		 * The mode page contents are not required; set the allocation
28830 		 * length for the mode page header only
28831 		 */
28832 		hdrlen = MODE_HEADER_LENGTH_GRP2;
28833 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
28834 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense, hdrlen,
28835 		    MODEPAGE_ERR_RECOV, SD_PATH_STANDARD);
28836 		device_specific =
28837 		    ((struct mode_header_grp2 *)sense)->device_specific;
28838 	} else {
28839 		hdrlen = MODE_HEADER_LENGTH;
28840 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
28841 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, hdrlen,
28842 		    MODEPAGE_ERR_RECOV, SD_PATH_STANDARD);
28843 		device_specific =
28844 		    ((struct mode_header *)sense)->device_specific;
28845 	}
28846 
28847 	if (rval != 0) {
28848 		if ((un->un_f_cfg_is_atapi == TRUE) && (retry_flag)) {
28849 			/*
28850 			 * For an Atapi Zip drive, observed the drive
28851 			 * reporting check condition for the first attempt.
28852 			 * Sense data indicating power on or bus device/reset.
28853 			 * Hence in case of failure need to try at least once
28854 			 * for Atapi devices.
28855 			 */
28856 			retry_flag = FALSE;
28857 			kmem_free(sense, hdrlen);
28858 			goto retry;
28859 		} else {
28860 			/*
28861 			 * Write protect mode sense failed; not all disks
28862 			 * understand this query. Return FALSE assuming that
28863 			 * these devices are not writable.
28864 			 */
28865 			rval = FALSE;
28866 		}
28867 	} else {
28868 		if (device_specific & WRITE_PROTECT) {
28869 			rval = TRUE;
28870 		} else {
28871 			rval = FALSE;
28872 		}
28873 	}
28874 	kmem_free(sense, hdrlen);
28875 	return (rval);
28876 }
28877 
28878 
28879 /*
28880  *    Function: sr_volume_ctrl()
28881  *
28882  * Description: This routine is the driver entry point for handling CD-ROM
28883  *		audio output volume ioctl requests. (CDROMVOLCTRL)
28884  *
28885  *   Arguments: dev	- the device 'dev_t'
28886  *		data	- pointer to user audio volume control structure
28887  *		flag	- this argument is a pass through to ddi_copyxxx()
28888  *			  directly from the mode argument of ioctl().
28889  *
28890  * Return Code: the code returned by sd_send_scsi_cmd()
28891  *		EFAULT if ddi_copyxxx() fails
28892  *		ENXIO if fail ddi_get_soft_state
28893  *		EINVAL if data pointer is NULL
28894  *
28895  */
28896 
28897 static int
28898 sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
28899 {
28900 	struct sd_lun		*un;
28901 	struct cdrom_volctrl    volume;
28902 	struct cdrom_volctrl    *vol = &volume;
28903 	uchar_t			*sense_page;
28904 	uchar_t			*select_page;
28905 	uchar_t			*sense;
28906 	uchar_t			*select;
28907 	int			sense_buflen;
28908 	int			select_buflen;
28909 	int			rval;
28910 
28911 	if (data == NULL) {
28912 		return (EINVAL);
28913 	}
28914 
28915 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28916 	    (un->un_state == SD_STATE_OFFLINE)) {
28917 		return (ENXIO);
28918 	}
28919 
28920 	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
28921 		return (EFAULT);
28922 	}
28923 
28924 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
28925 		struct mode_header_grp2		*sense_mhp;
28926 		struct mode_header_grp2		*select_mhp;
28927 		int				bd_len;
28928 
28929 		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
28930 		select_buflen = MODE_HEADER_LENGTH_GRP2 +
28931 		    MODEPAGE_AUDIO_CTRL_LEN;
28932 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
28933 		select = kmem_zalloc(select_buflen, KM_SLEEP);
28934 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
28935 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
28936 		    SD_PATH_STANDARD)) != 0) {
28937 			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28938 			    "sr_volume_ctrl: Mode Sense Failed\n");
28939 			kmem_free(sense, sense_buflen);
28940 			kmem_free(select, select_buflen);
28941 			return (rval);
28942 		}
28943 		sense_mhp = (struct mode_header_grp2 *)sense;
28944 		select_mhp = (struct mode_header_grp2 *)select;
28945 		bd_len = (sense_mhp->bdesc_length_hi << 8) |
28946 		    sense_mhp->bdesc_length_lo;
28947 		if (bd_len > MODE_BLK_DESC_LENGTH) {
28948 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28949 			    "sr_volume_ctrl: Mode Sense returned invalid "
28950 			    "block descriptor length\n");
28951 			kmem_free(sense, sense_buflen);
28952 			kmem_free(select, select_buflen);
28953 			return (EIO);
28954 		}
28955 		sense_page = (uchar_t *)
28956 		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
28957 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
28958 		select_mhp->length_msb = 0;
28959 		select_mhp->length_lsb = 0;
28960 		select_mhp->bdesc_length_hi = 0;
28961 		select_mhp->bdesc_length_lo = 0;
28962 	} else {
28963 		struct mode_header		*sense_mhp, *select_mhp;
28964 
28965 		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
28966 		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
28967 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
28968 		select = kmem_zalloc(select_buflen, KM_SLEEP);
28969 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
28970 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
28971 		    SD_PATH_STANDARD)) != 0) {
28972 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28973 			    "sr_volume_ctrl: Mode Sense Failed\n");
28974 			kmem_free(sense, sense_buflen);
28975 			kmem_free(select, select_buflen);
28976 			return (rval);
28977 		}
28978 		sense_mhp  = (struct mode_header *)sense;
28979 		select_mhp = (struct mode_header *)select;
28980 		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
28981 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28982 			    "sr_volume_ctrl: Mode Sense returned invalid "
28983 			    "block descriptor length\n");
28984 			kmem_free(sense, sense_buflen);
28985 			kmem_free(select, select_buflen);
28986 			return (EIO);
28987 		}
28988 		sense_page = (uchar_t *)
28989 		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
28990 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
28991 		select_mhp->length = 0;
28992 		select_mhp->bdesc_length = 0;
28993 	}
28994 	/*
28995 	 * Note: An audio control data structure could be created and overlayed
28996 	 * on the following in place of the array indexing method implemented.
28997 	 */
28998 
28999 	/* Build the select data for the user volume data */
29000 	select_page[0] = MODEPAGE_AUDIO_CTRL;
29001 	select_page[1] = 0xE;
29002 	/* Set the immediate bit */
29003 	select_page[2] = 0x04;
29004 	/* Zero out reserved fields */
29005 	select_page[3] = 0x00;
29006 	select_page[4] = 0x00;
29007 	/* Return sense data for fields not to be modified */
29008 	select_page[5] = sense_page[5];
29009 	select_page[6] = sense_page[6];
29010 	select_page[7] = sense_page[7];
29011 	/* Set the user specified volume levels for channel 0 and 1 */
29012 	select_page[8] = 0x01;
29013 	select_page[9] = vol->channel0;
29014 	select_page[10] = 0x02;
29015 	select_page[11] = vol->channel1;
29016 	/* Channel 2 and 3 are currently unsupported so return the sense data */
29017 	select_page[12] = sense_page[12];
29018 	select_page[13] = sense_page[13];
29019 	select_page[14] = sense_page[14];
29020 	select_page[15] = sense_page[15];
29021 
29022 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29023 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, select,
29024 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29025 	} else {
29026 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
29027 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29028 	}
29029 
29030 	kmem_free(sense, sense_buflen);
29031 	kmem_free(select, select_buflen);
29032 	return (rval);
29033 }
29034 
29035 
29036 /*
29037  *    Function: sr_read_sony_session_offset()
29038  *
29039  * Description: This routine is the driver entry point for handling CD-ROM
29040  *		ioctl requests for session offset information. (CDROMREADOFFSET)
29041  *		The address of the first track in the last session of a
29042  *		multi-session CD-ROM is returned
29043  *
29044  *		Note: This routine uses a vendor specific key value in the
29045  *		command control field without implementing any vendor check here
29046  *		or in the ioctl routine.
29047  *
29048  *   Arguments: dev	- the device 'dev_t'
29049  *		data	- pointer to an int to hold the requested address
29050  *		flag	- this argument is a pass through to ddi_copyxxx()
29051  *			  directly from the mode argument of ioctl().
29052  *
29053  * Return Code: the code returned by sd_send_scsi_cmd()
29054  *		EFAULT if ddi_copyxxx() fails
29055  *		ENXIO if fail ddi_get_soft_state
29056  *		EINVAL if data pointer is NULL
29057  */
29058 
29059 static int
29060 sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
29061 {
29062 	struct sd_lun		*un;
29063 	struct uscsi_cmd	*com;
29064 	caddr_t			buffer;
29065 	char			cdb[CDB_GROUP1];
29066 	int			session_offset = 0;
29067 	int			rval;
29068 
29069 	if (data == NULL) {
29070 		return (EINVAL);
29071 	}
29072 
29073 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29074 	    (un->un_state == SD_STATE_OFFLINE)) {
29075 		return (ENXIO);
29076 	}
29077 
29078 	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
29079 	bzero(cdb, CDB_GROUP1);
29080 	cdb[0] = SCMD_READ_TOC;
29081 	/*
29082 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
29083 	 * (4 byte TOC response header + 8 byte response data)
29084 	 */
29085 	cdb[8] = SONY_SESSION_OFFSET_LEN;
29086 	/* Byte 9 is the control byte. A vendor specific value is used */
29087 	cdb[9] = SONY_SESSION_OFFSET_KEY;
29088 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29089 	com->uscsi_cdb = cdb;
29090 	com->uscsi_cdblen = CDB_GROUP1;
29091 	com->uscsi_bufaddr = buffer;
29092 	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
29093 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
29094 
29095 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
29096 	    UIO_SYSSPACE, SD_PATH_STANDARD);
29097 	if (rval != 0) {
29098 		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29099 		kmem_free(com, sizeof (*com));
29100 		return (rval);
29101 	}
29102 	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
29103 		session_offset =
29104 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
29105 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
29106 		/*
29107 		 * Offset returned offset in current lbasize block's. Convert to
29108 		 * 2k block's to return to the user
29109 		 */
29110 		if (un->un_tgt_blocksize == CDROM_BLK_512) {
29111 			session_offset >>= 2;
29112 		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
29113 			session_offset >>= 1;
29114 		}
29115 	}
29116 
29117 	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
29118 		rval = EFAULT;
29119 	}
29120 
29121 	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29122 	kmem_free(com, sizeof (*com));
29123 	return (rval);
29124 }
29125 
29126 
29127 /*
29128  *    Function: sd_wm_cache_constructor()
29129  *
29130  * Description: Cache Constructor for the wmap cache for the read/modify/write
29131  * 		devices.
29132  *
29133  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29134  *		un	- sd_lun structure for the device.
29135  *		flag	- the km flags passed to constructor
29136  *
29137  * Return Code: 0 on success.
29138  *		-1 on failure.
29139  */
29140 
29141 /*ARGSUSED*/
29142 static int
29143 sd_wm_cache_constructor(void *wm, void *un, int flags)
29144 {
29145 	bzero(wm, sizeof (struct sd_w_map));
29146 	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
29147 	return (0);
29148 }
29149 
29150 
29151 /*
29152  *    Function: sd_wm_cache_destructor()
29153  *
29154  * Description: Cache destructor for the wmap cache for the read/modify/write
29155  * 		devices.
29156  *
29157  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29158  *		un	- sd_lun structure for the device.
29159  */
29160 /*ARGSUSED*/
29161 static void
29162 sd_wm_cache_destructor(void *wm, void *un)
29163 {
29164 	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
29165 }
29166 
29167 
29168 /*
29169  *    Function: sd_range_lock()
29170  *
29171  * Description: Lock the range of blocks specified as parameter to ensure
29172  *		that read, modify write is atomic and no other i/o writes
29173  *		to the same location. The range is specified in terms
29174  *		of start and end blocks. Block numbers are the actual
29175  *		media block numbers and not system.
29176  *
29177  *   Arguments: un	- sd_lun structure for the device.
29178  *		startb - The starting block number
29179  *		endb - The end block number
29180  *		typ - type of i/o - simple/read_modify_write
29181  *
29182  * Return Code: wm  - pointer to the wmap structure.
29183  *
29184  *     Context: This routine can sleep.
29185  */
29186 
29187 static struct sd_w_map *
29188 sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
29189 {
29190 	struct sd_w_map *wmp = NULL;
29191 	struct sd_w_map *sl_wmp = NULL;
29192 	struct sd_w_map *tmp_wmp;
29193 	wm_state state = SD_WM_CHK_LIST;
29194 
29195 
29196 	ASSERT(un != NULL);
29197 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29198 
29199 	mutex_enter(SD_MUTEX(un));
29200 
29201 	while (state != SD_WM_DONE) {
29202 
29203 		switch (state) {
29204 		case SD_WM_CHK_LIST:
29205 			/*
29206 			 * This is the starting state. Check the wmap list
29207 			 * to see if the range is currently available.
29208 			 */
29209 			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
29210 				/*
29211 				 * If this is a simple write and no rmw
29212 				 * i/o is pending then try to lock the
29213 				 * range as the range should be available.
29214 				 */
29215 				state = SD_WM_LOCK_RANGE;
29216 			} else {
29217 				tmp_wmp = sd_get_range(un, startb, endb);
29218 				if (tmp_wmp != NULL) {
29219 					if ((wmp != NULL) && ONLIST(un, wmp)) {
29220 						/*
29221 						 * Should not keep onlist wmps
29222 						 * while waiting this macro
29223 						 * will also do wmp = NULL;
29224 						 */
29225 						FREE_ONLIST_WMAP(un, wmp);
29226 					}
29227 					/*
29228 					 * sl_wmp is the wmap on which wait
29229 					 * is done, since the tmp_wmp points
29230 					 * to the inuse wmap, set sl_wmp to
29231 					 * tmp_wmp and change the state to sleep
29232 					 */
29233 					sl_wmp = tmp_wmp;
29234 					state = SD_WM_WAIT_MAP;
29235 				} else {
29236 					state = SD_WM_LOCK_RANGE;
29237 				}
29238 
29239 			}
29240 			break;
29241 
29242 		case SD_WM_LOCK_RANGE:
29243 			ASSERT(un->un_wm_cache);
29244 			/*
29245 			 * The range need to be locked, try to get a wmap.
29246 			 * First attempt it with NO_SLEEP, want to avoid a sleep
29247 			 * if possible as we will have to release the sd mutex
29248 			 * if we have to sleep.
29249 			 */
29250 			if (wmp == NULL)
29251 				wmp = kmem_cache_alloc(un->un_wm_cache,
29252 				    KM_NOSLEEP);
29253 			if (wmp == NULL) {
29254 				mutex_exit(SD_MUTEX(un));
29255 				_NOTE(DATA_READABLE_WITHOUT_LOCK
29256 				    (sd_lun::un_wm_cache))
29257 				wmp = kmem_cache_alloc(un->un_wm_cache,
29258 				    KM_SLEEP);
29259 				mutex_enter(SD_MUTEX(un));
29260 				/*
29261 				 * we released the mutex so recheck and go to
29262 				 * check list state.
29263 				 */
29264 				state = SD_WM_CHK_LIST;
29265 			} else {
29266 				/*
29267 				 * We exit out of state machine since we
29268 				 * have the wmap. Do the housekeeping first.
29269 				 * place the wmap on the wmap list if it is not
29270 				 * on it already and then set the state to done.
29271 				 */
29272 				wmp->wm_start = startb;
29273 				wmp->wm_end = endb;
29274 				wmp->wm_flags = typ | SD_WM_BUSY;
29275 				if (typ & SD_WTYPE_RMW) {
29276 					un->un_rmw_count++;
29277 				}
29278 				/*
29279 				 * If not already on the list then link
29280 				 */
29281 				if (!ONLIST(un, wmp)) {
29282 					wmp->wm_next = un->un_wm;
29283 					wmp->wm_prev = NULL;
29284 					if (wmp->wm_next)
29285 						wmp->wm_next->wm_prev = wmp;
29286 					un->un_wm = wmp;
29287 				}
29288 				state = SD_WM_DONE;
29289 			}
29290 			break;
29291 
29292 		case SD_WM_WAIT_MAP:
29293 			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
29294 			/*
29295 			 * Wait is done on sl_wmp, which is set in the
29296 			 * check_list state.
29297 			 */
29298 			sl_wmp->wm_wanted_count++;
29299 			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
29300 			sl_wmp->wm_wanted_count--;
29301 			if (!(sl_wmp->wm_flags & SD_WM_BUSY)) {
29302 				if (wmp != NULL)
29303 					CHK_N_FREEWMP(un, wmp);
29304 				wmp = sl_wmp;
29305 			}
29306 			sl_wmp = NULL;
29307 			/*
29308 			 * After waking up, need to recheck for availability of
29309 			 * range.
29310 			 */
29311 			state = SD_WM_CHK_LIST;
29312 			break;
29313 
29314 		default:
29315 			panic("sd_range_lock: "
29316 			    "Unknown state %d in sd_range_lock", state);
29317 			/*NOTREACHED*/
29318 		} /* switch(state) */
29319 
29320 	} /* while(state != SD_WM_DONE) */
29321 
29322 	mutex_exit(SD_MUTEX(un));
29323 
29324 	ASSERT(wmp != NULL);
29325 
29326 	return (wmp);
29327 }
29328 
29329 
29330 /*
29331  *    Function: sd_get_range()
29332  *
29333  * Description: Find if there any overlapping I/O to this one
29334  *		Returns the write-map of 1st such I/O, NULL otherwise.
29335  *
29336  *   Arguments: un	- sd_lun structure for the device.
29337  *		startb - The starting block number
29338  *		endb - The end block number
29339  *
29340  * Return Code: wm  - pointer to the wmap structure.
29341  */
29342 
29343 static struct sd_w_map *
29344 sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
29345 {
29346 	struct sd_w_map *wmp;
29347 
29348 	ASSERT(un != NULL);
29349 
29350 	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
29351 		if (!(wmp->wm_flags & SD_WM_BUSY)) {
29352 			continue;
29353 		}
29354 		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
29355 			break;
29356 		}
29357 		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
29358 			break;
29359 		}
29360 	}
29361 
29362 	return (wmp);
29363 }
29364 
29365 
29366 /*
29367  *    Function: sd_free_inlist_wmap()
29368  *
29369  * Description: Unlink and free a write map struct.
29370  *
29371  *   Arguments: un      - sd_lun structure for the device.
29372  *		wmp	- sd_w_map which needs to be unlinked.
29373  */
29374 
29375 static void
29376 sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
29377 {
29378 	ASSERT(un != NULL);
29379 
29380 	if (un->un_wm == wmp) {
29381 		un->un_wm = wmp->wm_next;
29382 	} else {
29383 		wmp->wm_prev->wm_next = wmp->wm_next;
29384 	}
29385 
29386 	if (wmp->wm_next) {
29387 		wmp->wm_next->wm_prev = wmp->wm_prev;
29388 	}
29389 
29390 	wmp->wm_next = wmp->wm_prev = NULL;
29391 
29392 	kmem_cache_free(un->un_wm_cache, wmp);
29393 }
29394 
29395 
29396 /*
29397  *    Function: sd_range_unlock()
29398  *
29399  * Description: Unlock the range locked by wm.
29400  *		Free write map if nobody else is waiting on it.
29401  *
29402  *   Arguments: un      - sd_lun structure for the device.
29403  *              wmp     - sd_w_map which needs to be unlinked.
29404  */
29405 
29406 static void
29407 sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
29408 {
29409 	ASSERT(un != NULL);
29410 	ASSERT(wm != NULL);
29411 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29412 
29413 	mutex_enter(SD_MUTEX(un));
29414 
29415 	if (wm->wm_flags & SD_WTYPE_RMW) {
29416 		un->un_rmw_count--;
29417 	}
29418 
29419 	if (wm->wm_wanted_count) {
29420 		wm->wm_flags = 0;
29421 		/*
29422 		 * Broadcast that the wmap is available now.
29423 		 */
29424 		cv_broadcast(&wm->wm_avail);
29425 	} else {
29426 		/*
29427 		 * If no one is waiting on the map, it should be free'ed.
29428 		 */
29429 		sd_free_inlist_wmap(un, wm);
29430 	}
29431 
29432 	mutex_exit(SD_MUTEX(un));
29433 }
29434 
29435 
29436 /*
29437  *    Function: sd_read_modify_write_task
29438  *
29439  * Description: Called from a taskq thread to initiate the write phase of
29440  *		a read-modify-write request.  This is used for targets where
29441  *		un->un_sys_blocksize != un->un_tgt_blocksize.
29442  *
29443  *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
29444  *
29445  *     Context: Called under taskq thread context.
29446  */
29447 
29448 static void
29449 sd_read_modify_write_task(void *arg)
29450 {
29451 	struct sd_mapblocksize_info	*bsp;
29452 	struct buf	*bp;
29453 	struct sd_xbuf	*xp;
29454 	struct sd_lun	*un;
29455 
29456 	bp = arg;	/* The bp is given in arg */
29457 	ASSERT(bp != NULL);
29458 
29459 	/* Get the pointer to the layer-private data struct */
29460 	xp = SD_GET_XBUF(bp);
29461 	ASSERT(xp != NULL);
29462 	bsp = xp->xb_private;
29463 	ASSERT(bsp != NULL);
29464 
29465 	un = SD_GET_UN(bp);
29466 	ASSERT(un != NULL);
29467 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29468 
29469 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29470 	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
29471 
29472 	/*
29473 	 * This is the write phase of a read-modify-write request, called
29474 	 * under the context of a taskq thread in response to the completion
29475 	 * of the read portion of the rmw request completing under interrupt
29476 	 * context. The write request must be sent from here down the iostart
29477 	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
29478 	 * we use the layer index saved in the layer-private data area.
29479 	 */
29480 	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
29481 
29482 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29483 	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
29484 }
29485 
29486 
29487 /*
29488  *    Function: sddump_do_read_of_rmw()
29489  *
29490  * Description: This routine will be called from sddump, If sddump is called
29491  *		with an I/O which not aligned on device blocksize boundary
29492  *		then the write has to be converted to read-modify-write.
29493  *		Do the read part here in order to keep sddump simple.
29494  *		Note - That the sd_mutex is held across the call to this
29495  *		routine.
29496  *
29497  *   Arguments: un	- sd_lun
29498  *		blkno	- block number in terms of media block size.
29499  *		nblk	- number of blocks.
29500  *		bpp	- pointer to pointer to the buf structure. On return
29501  *			from this function, *bpp points to the valid buffer
29502  *			to which the write has to be done.
29503  *
29504  * Return Code: 0 for success or errno-type return code
29505  */
29506 
29507 static int
29508 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
29509 	struct buf **bpp)
29510 {
29511 	int err;
29512 	int i;
29513 	int rval;
29514 	struct buf *bp;
29515 	struct scsi_pkt *pkt = NULL;
29516 	uint32_t target_blocksize;
29517 
29518 	ASSERT(un != NULL);
29519 	ASSERT(mutex_owned(SD_MUTEX(un)));
29520 
29521 	target_blocksize = un->un_tgt_blocksize;
29522 
29523 	mutex_exit(SD_MUTEX(un));
29524 
29525 	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
29526 	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
29527 	if (bp == NULL) {
29528 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29529 		    "no resources for dumping; giving up");
29530 		err = ENOMEM;
29531 		goto done;
29532 	}
29533 
29534 	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
29535 	    blkno, nblk);
29536 	if (rval != 0) {
29537 		scsi_free_consistent_buf(bp);
29538 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29539 		    "no resources for dumping; giving up");
29540 		err = ENOMEM;
29541 		goto done;
29542 	}
29543 
29544 	pkt->pkt_flags |= FLAG_NOINTR;
29545 
29546 	err = EIO;
29547 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
29548 
29549 		/*
29550 		 * Scsi_poll returns 0 (success) if the command completes and
29551 		 * the status block is STATUS_GOOD.  We should only check
29552 		 * errors if this condition is not true.  Even then we should
29553 		 * send our own request sense packet only if we have a check
29554 		 * condition and auto request sense has not been performed by
29555 		 * the hba.
29556 		 */
29557 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
29558 
29559 		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
29560 			err = 0;
29561 			break;
29562 		}
29563 
29564 		/*
29565 		 * Check CMD_DEV_GONE 1st, give up if device is gone,
29566 		 * no need to read RQS data.
29567 		 */
29568 		if (pkt->pkt_reason == CMD_DEV_GONE) {
29569 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29570 			    "Device is gone\n");
29571 			break;
29572 		}
29573 
29574 		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
29575 			SD_INFO(SD_LOG_DUMP, un,
29576 			    "sddump: read failed with CHECK, try # %d\n", i);
29577 			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
29578 				(void) sd_send_polled_RQS(un);
29579 			}
29580 
29581 			continue;
29582 		}
29583 
29584 		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
29585 			int reset_retval = 0;
29586 
29587 			SD_INFO(SD_LOG_DUMP, un,
29588 			    "sddump: read failed with BUSY, try # %d\n", i);
29589 
29590 			if (un->un_f_lun_reset_enabled == TRUE) {
29591 				reset_retval = scsi_reset(SD_ADDRESS(un),
29592 				    RESET_LUN);
29593 			}
29594 			if (reset_retval == 0) {
29595 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
29596 			}
29597 			(void) sd_send_polled_RQS(un);
29598 
29599 		} else {
29600 			SD_INFO(SD_LOG_DUMP, un,
29601 			    "sddump: read failed with 0x%x, try # %d\n",
29602 			    SD_GET_PKT_STATUS(pkt), i);
29603 			mutex_enter(SD_MUTEX(un));
29604 			sd_reset_target(un, pkt);
29605 			mutex_exit(SD_MUTEX(un));
29606 		}
29607 
29608 		/*
29609 		 * If we are not getting anywhere with lun/target resets,
29610 		 * let's reset the bus.
29611 		 */
29612 		if (i > SD_NDUMP_RETRIES/2) {
29613 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
29614 			(void) sd_send_polled_RQS(un);
29615 		}
29616 
29617 	}
29618 	scsi_destroy_pkt(pkt);
29619 
29620 	if (err != 0) {
29621 		scsi_free_consistent_buf(bp);
29622 		*bpp = NULL;
29623 	} else {
29624 		*bpp = bp;
29625 	}
29626 
29627 done:
29628 	mutex_enter(SD_MUTEX(un));
29629 	return (err);
29630 }
29631 
29632 
29633 /*
29634  *    Function: sd_failfast_flushq
29635  *
29636  * Description: Take all bp's on the wait queue that have B_FAILFAST set
29637  *		in b_flags and move them onto the failfast queue, then kick
29638  *		off a thread to return all bp's on the failfast queue to
29639  *		their owners with an error set.
29640  *
29641  *   Arguments: un - pointer to the soft state struct for the instance.
29642  *
29643  *     Context: may execute in interrupt context.
29644  */
29645 
29646 static void
29647 sd_failfast_flushq(struct sd_lun *un)
29648 {
29649 	struct buf *bp;
29650 	struct buf *next_waitq_bp;
29651 	struct buf *prev_waitq_bp = NULL;
29652 
29653 	ASSERT(un != NULL);
29654 	ASSERT(mutex_owned(SD_MUTEX(un)));
29655 	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
29656 	ASSERT(un->un_failfast_bp == NULL);
29657 
29658 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
29659 	    "sd_failfast_flushq: entry: un:0x%p\n", un);
29660 
29661 	/*
29662 	 * Check if we should flush all bufs when entering failfast state, or
29663 	 * just those with B_FAILFAST set.
29664 	 */
29665 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
29666 		/*
29667 		 * Move *all* bp's on the wait queue to the failfast flush
29668 		 * queue, including those that do NOT have B_FAILFAST set.
29669 		 */
29670 		if (un->un_failfast_headp == NULL) {
29671 			ASSERT(un->un_failfast_tailp == NULL);
29672 			un->un_failfast_headp = un->un_waitq_headp;
29673 		} else {
29674 			ASSERT(un->un_failfast_tailp != NULL);
29675 			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
29676 		}
29677 
29678 		un->un_failfast_tailp = un->un_waitq_tailp;
29679 
29680 		/* update kstat for each bp moved out of the waitq */
29681 		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
29682 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
29683 		}
29684 
29685 		/* empty the waitq */
29686 		un->un_waitq_headp = un->un_waitq_tailp = NULL;
29687 
29688 	} else {
29689 		/*
29690 		 * Go thru the wait queue, pick off all entries with
29691 		 * B_FAILFAST set, and move these onto the failfast queue.
29692 		 */
29693 		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
29694 			/*
29695 			 * Save the pointer to the next bp on the wait queue,
29696 			 * so we get to it on the next iteration of this loop.
29697 			 */
29698 			next_waitq_bp = bp->av_forw;
29699 
29700 			/*
29701 			 * If this bp from the wait queue does NOT have
29702 			 * B_FAILFAST set, just move on to the next element
29703 			 * in the wait queue. Note, this is the only place
29704 			 * where it is correct to set prev_waitq_bp.
29705 			 */
29706 			if ((bp->b_flags & B_FAILFAST) == 0) {
29707 				prev_waitq_bp = bp;
29708 				continue;
29709 			}
29710 
29711 			/*
29712 			 * Remove the bp from the wait queue.
29713 			 */
29714 			if (bp == un->un_waitq_headp) {
29715 				/* The bp is the first element of the waitq. */
29716 				un->un_waitq_headp = next_waitq_bp;
29717 				if (un->un_waitq_headp == NULL) {
29718 					/* The wait queue is now empty */
29719 					un->un_waitq_tailp = NULL;
29720 				}
29721 			} else {
29722 				/*
29723 				 * The bp is either somewhere in the middle
29724 				 * or at the end of the wait queue.
29725 				 */
29726 				ASSERT(un->un_waitq_headp != NULL);
29727 				ASSERT(prev_waitq_bp != NULL);
29728 				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
29729 				    == 0);
29730 				if (bp == un->un_waitq_tailp) {
29731 					/* bp is the last entry on the waitq. */
29732 					ASSERT(next_waitq_bp == NULL);
29733 					un->un_waitq_tailp = prev_waitq_bp;
29734 				}
29735 				prev_waitq_bp->av_forw = next_waitq_bp;
29736 			}
29737 			bp->av_forw = NULL;
29738 
29739 			/*
29740 			 * update kstat since the bp is moved out of
29741 			 * the waitq
29742 			 */
29743 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
29744 
29745 			/*
29746 			 * Now put the bp onto the failfast queue.
29747 			 */
29748 			if (un->un_failfast_headp == NULL) {
29749 				/* failfast queue is currently empty */
29750 				ASSERT(un->un_failfast_tailp == NULL);
29751 				un->un_failfast_headp =
29752 				    un->un_failfast_tailp = bp;
29753 			} else {
29754 				/* Add the bp to the end of the failfast q */
29755 				ASSERT(un->un_failfast_tailp != NULL);
29756 				ASSERT(un->un_failfast_tailp->b_flags &
29757 				    B_FAILFAST);
29758 				un->un_failfast_tailp->av_forw = bp;
29759 				un->un_failfast_tailp = bp;
29760 			}
29761 		}
29762 	}
29763 
29764 	/*
29765 	 * Now return all bp's on the failfast queue to their owners.
29766 	 */
29767 	while ((bp = un->un_failfast_headp) != NULL) {
29768 
29769 		un->un_failfast_headp = bp->av_forw;
29770 		if (un->un_failfast_headp == NULL) {
29771 			un->un_failfast_tailp = NULL;
29772 		}
29773 
29774 		/*
29775 		 * We want to return the bp with a failure error code, but
29776 		 * we do not want a call to sd_start_cmds() to occur here,
29777 		 * so use sd_return_failed_command_no_restart() instead of
29778 		 * sd_return_failed_command().
29779 		 */
29780 		sd_return_failed_command_no_restart(un, bp, EIO);
29781 	}
29782 
29783 	/* Flush the xbuf queues if required. */
29784 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
29785 		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
29786 	}
29787 
29788 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
29789 	    "sd_failfast_flushq: exit: un:0x%p\n", un);
29790 }
29791 
29792 
29793 /*
29794  *    Function: sd_failfast_flushq_callback
29795  *
29796  * Description: Return TRUE if the given bp meets the criteria for failfast
29797  *		flushing. Used with ddi_xbuf_flushq(9F).
29798  *
29799  *   Arguments: bp - ptr to buf struct to be examined.
29800  *
29801  *     Context: Any
29802  */
29803 
29804 static int
29805 sd_failfast_flushq_callback(struct buf *bp)
29806 {
29807 	/*
29808 	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
29809 	 * state is entered; OR (2) the given bp has B_FAILFAST set.
29810 	 */
29811 	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
29812 	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
29813 }
29814 
29815 
29816 
29817 #if defined(__i386) || defined(__amd64)
29818 /*
29819  * Function: sd_setup_next_xfer
29820  *
29821  * Description: Prepare next I/O operation using DMA_PARTIAL
29822  *
29823  */
29824 
29825 static int
29826 sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
29827     struct scsi_pkt *pkt, struct sd_xbuf *xp)
29828 {
29829 	ssize_t	num_blks_not_xfered;
29830 	daddr_t	strt_blk_num;
29831 	ssize_t	bytes_not_xfered;
29832 	int	rval;
29833 
29834 	ASSERT(pkt->pkt_resid == 0);
29835 
29836 	/*
29837 	 * Calculate next block number and amount to be transferred.
29838 	 *
29839 	 * How much data NOT transfered to the HBA yet.
29840 	 */
29841 	bytes_not_xfered = xp->xb_dma_resid;
29842 
29843 	/*
29844 	 * figure how many blocks NOT transfered to the HBA yet.
29845 	 */
29846 	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
29847 
29848 	/*
29849 	 * set starting block number to the end of what WAS transfered.
29850 	 */
29851 	strt_blk_num = xp->xb_blkno +
29852 	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
29853 
29854 	/*
29855 	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
29856 	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
29857 	 * the disk mutex here.
29858 	 */
29859 	rval = sd_setup_next_rw_pkt(un, pkt, bp,
29860 	    strt_blk_num, num_blks_not_xfered);
29861 
29862 	if (rval == 0) {
29863 
29864 		/*
29865 		 * Success.
29866 		 *
29867 		 * Adjust things if there are still more blocks to be
29868 		 * transfered.
29869 		 */
29870 		xp->xb_dma_resid = pkt->pkt_resid;
29871 		pkt->pkt_resid = 0;
29872 
29873 		return (1);
29874 	}
29875 
29876 	/*
29877 	 * There's really only one possible return value from
29878 	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
29879 	 * returns NULL.
29880 	 */
29881 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
29882 
29883 	bp->b_resid = bp->b_bcount;
29884 	bp->b_flags |= B_ERROR;
29885 
29886 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29887 	    "Error setting up next portion of DMA transfer\n");
29888 
29889 	return (0);
29890 }
29891 #endif
29892 
29893 /*
29894  * Note: The following sd_faultinjection_ioctl( ) routines implement
29895  * driver support for handling fault injection for error analysis
29896  * causing faults in multiple layers of the driver.
29897  *
29898  */
29899 
29900 #ifdef SD_FAULT_INJECTION
29901 static uint_t   sd_fault_injection_on = 0;
29902 
29903 /*
29904  *    Function: sd_faultinjection_ioctl()
29905  *
29906  * Description: This routine is the driver entry point for handling
29907  *              faultinjection ioctls to inject errors into the
29908  *              layer model
29909  *
29910  *   Arguments: cmd	- the ioctl cmd recieved
29911  *		arg	- the arguments from user and returns
29912  */
29913 
29914 static void
29915 sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un) {
29916 
29917 	uint_t i;
29918 	uint_t rval;
29919 
29920 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
29921 
29922 	mutex_enter(SD_MUTEX(un));
29923 
29924 	switch (cmd) {
29925 	case SDIOCRUN:
29926 		/* Allow pushed faults to be injected */
29927 		SD_INFO(SD_LOG_SDTEST, un,
29928 		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
29929 
29930 		sd_fault_injection_on = 1;
29931 
29932 		SD_INFO(SD_LOG_IOERR, un,
29933 		    "sd_faultinjection_ioctl: run finished\n");
29934 		break;
29935 
29936 	case SDIOCSTART:
29937 		/* Start Injection Session */
29938 		SD_INFO(SD_LOG_SDTEST, un,
29939 		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
29940 
29941 		sd_fault_injection_on = 0;
29942 		un->sd_injection_mask = 0xFFFFFFFF;
29943 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
29944 			un->sd_fi_fifo_pkt[i] = NULL;
29945 			un->sd_fi_fifo_xb[i] = NULL;
29946 			un->sd_fi_fifo_un[i] = NULL;
29947 			un->sd_fi_fifo_arq[i] = NULL;
29948 		}
29949 		un->sd_fi_fifo_start = 0;
29950 		un->sd_fi_fifo_end = 0;
29951 
29952 		mutex_enter(&(un->un_fi_mutex));
29953 		un->sd_fi_log[0] = '\0';
29954 		un->sd_fi_buf_len = 0;
29955 		mutex_exit(&(un->un_fi_mutex));
29956 
29957 		SD_INFO(SD_LOG_IOERR, un,
29958 		    "sd_faultinjection_ioctl: start finished\n");
29959 		break;
29960 
29961 	case SDIOCSTOP:
29962 		/* Stop Injection Session */
29963 		SD_INFO(SD_LOG_SDTEST, un,
29964 		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
29965 		sd_fault_injection_on = 0;
29966 		un->sd_injection_mask = 0x0;
29967 
29968 		/* Empty stray or unuseds structs from fifo */
29969 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
29970 			if (un->sd_fi_fifo_pkt[i] != NULL) {
29971 				kmem_free(un->sd_fi_fifo_pkt[i],
29972 				    sizeof (struct sd_fi_pkt));
29973 			}
29974 			if (un->sd_fi_fifo_xb[i] != NULL) {
29975 				kmem_free(un->sd_fi_fifo_xb[i],
29976 				    sizeof (struct sd_fi_xb));
29977 			}
29978 			if (un->sd_fi_fifo_un[i] != NULL) {
29979 				kmem_free(un->sd_fi_fifo_un[i],
29980 				    sizeof (struct sd_fi_un));
29981 			}
29982 			if (un->sd_fi_fifo_arq[i] != NULL) {
29983 				kmem_free(un->sd_fi_fifo_arq[i],
29984 				    sizeof (struct sd_fi_arq));
29985 			}
29986 			un->sd_fi_fifo_pkt[i] = NULL;
29987 			un->sd_fi_fifo_un[i] = NULL;
29988 			un->sd_fi_fifo_xb[i] = NULL;
29989 			un->sd_fi_fifo_arq[i] = NULL;
29990 		}
29991 		un->sd_fi_fifo_start = 0;
29992 		un->sd_fi_fifo_end = 0;
29993 
29994 		SD_INFO(SD_LOG_IOERR, un,
29995 		    "sd_faultinjection_ioctl: stop finished\n");
29996 		break;
29997 
29998 	case SDIOCINSERTPKT:
29999 		/* Store a packet struct to be pushed onto fifo */
30000 		SD_INFO(SD_LOG_SDTEST, un,
30001 		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
30002 
30003 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30004 
30005 		sd_fault_injection_on = 0;
30006 
30007 		/* No more that SD_FI_MAX_ERROR allowed in Queue */
30008 		if (un->sd_fi_fifo_pkt[i] != NULL) {
30009 			kmem_free(un->sd_fi_fifo_pkt[i],
30010 			    sizeof (struct sd_fi_pkt));
30011 		}
30012 		if (arg != NULL) {
30013 			un->sd_fi_fifo_pkt[i] =
30014 			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
30015 			if (un->sd_fi_fifo_pkt[i] == NULL) {
30016 				/* Alloc failed don't store anything */
30017 				break;
30018 			}
30019 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
30020 			    sizeof (struct sd_fi_pkt), 0);
30021 			if (rval == -1) {
30022 				kmem_free(un->sd_fi_fifo_pkt[i],
30023 				    sizeof (struct sd_fi_pkt));
30024 				un->sd_fi_fifo_pkt[i] = NULL;
30025 			}
30026 		} else {
30027 			SD_INFO(SD_LOG_IOERR, un,
30028 			    "sd_faultinjection_ioctl: pkt null\n");
30029 		}
30030 		break;
30031 
30032 	case SDIOCINSERTXB:
30033 		/* Store a xb struct to be pushed onto fifo */
30034 		SD_INFO(SD_LOG_SDTEST, un,
30035 		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
30036 
30037 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30038 
30039 		sd_fault_injection_on = 0;
30040 
30041 		if (un->sd_fi_fifo_xb[i] != NULL) {
30042 			kmem_free(un->sd_fi_fifo_xb[i],
30043 			    sizeof (struct sd_fi_xb));
30044 			un->sd_fi_fifo_xb[i] = NULL;
30045 		}
30046 		if (arg != NULL) {
30047 			un->sd_fi_fifo_xb[i] =
30048 			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
30049 			if (un->sd_fi_fifo_xb[i] == NULL) {
30050 				/* Alloc failed don't store anything */
30051 				break;
30052 			}
30053 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
30054 			    sizeof (struct sd_fi_xb), 0);
30055 
30056 			if (rval == -1) {
30057 				kmem_free(un->sd_fi_fifo_xb[i],
30058 				    sizeof (struct sd_fi_xb));
30059 				un->sd_fi_fifo_xb[i] = NULL;
30060 			}
30061 		} else {
30062 			SD_INFO(SD_LOG_IOERR, un,
30063 			    "sd_faultinjection_ioctl: xb null\n");
30064 		}
30065 		break;
30066 
30067 	case SDIOCINSERTUN:
30068 		/* Store a un struct to be pushed onto fifo */
30069 		SD_INFO(SD_LOG_SDTEST, un,
30070 		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
30071 
30072 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30073 
30074 		sd_fault_injection_on = 0;
30075 
30076 		if (un->sd_fi_fifo_un[i] != NULL) {
30077 			kmem_free(un->sd_fi_fifo_un[i],
30078 			    sizeof (struct sd_fi_un));
30079 			un->sd_fi_fifo_un[i] = NULL;
30080 		}
30081 		if (arg != NULL) {
30082 			un->sd_fi_fifo_un[i] =
30083 			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
30084 			if (un->sd_fi_fifo_un[i] == NULL) {
30085 				/* Alloc failed don't store anything */
30086 				break;
30087 			}
30088 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
30089 			    sizeof (struct sd_fi_un), 0);
30090 			if (rval == -1) {
30091 				kmem_free(un->sd_fi_fifo_un[i],
30092 				    sizeof (struct sd_fi_un));
30093 				un->sd_fi_fifo_un[i] = NULL;
30094 			}
30095 
30096 		} else {
30097 			SD_INFO(SD_LOG_IOERR, un,
30098 			    "sd_faultinjection_ioctl: un null\n");
30099 		}
30100 
30101 		break;
30102 
30103 	case SDIOCINSERTARQ:
30104 		/* Store a arq struct to be pushed onto fifo */
30105 		SD_INFO(SD_LOG_SDTEST, un,
30106 		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
30107 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30108 
30109 		sd_fault_injection_on = 0;
30110 
30111 		if (un->sd_fi_fifo_arq[i] != NULL) {
30112 			kmem_free(un->sd_fi_fifo_arq[i],
30113 			    sizeof (struct sd_fi_arq));
30114 			un->sd_fi_fifo_arq[i] = NULL;
30115 		}
30116 		if (arg != NULL) {
30117 			un->sd_fi_fifo_arq[i] =
30118 			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
30119 			if (un->sd_fi_fifo_arq[i] == NULL) {
30120 				/* Alloc failed don't store anything */
30121 				break;
30122 			}
30123 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
30124 			    sizeof (struct sd_fi_arq), 0);
30125 			if (rval == -1) {
30126 				kmem_free(un->sd_fi_fifo_arq[i],
30127 				    sizeof (struct sd_fi_arq));
30128 				un->sd_fi_fifo_arq[i] = NULL;
30129 			}
30130 
30131 		} else {
30132 			SD_INFO(SD_LOG_IOERR, un,
30133 			    "sd_faultinjection_ioctl: arq null\n");
30134 		}
30135 
30136 		break;
30137 
30138 	case SDIOCPUSH:
30139 		/* Push stored xb, pkt, un, and arq onto fifo */
30140 		sd_fault_injection_on = 0;
30141 
30142 		if (arg != NULL) {
30143 			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
30144 			if (rval != -1 &&
30145 			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30146 				un->sd_fi_fifo_end += i;
30147 			}
30148 		} else {
30149 			SD_INFO(SD_LOG_IOERR, un,
30150 			    "sd_faultinjection_ioctl: push arg null\n");
30151 			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30152 				un->sd_fi_fifo_end++;
30153 			}
30154 		}
30155 		SD_INFO(SD_LOG_IOERR, un,
30156 		    "sd_faultinjection_ioctl: push to end=%d\n",
30157 		    un->sd_fi_fifo_end);
30158 		break;
30159 
30160 	case SDIOCRETRIEVE:
30161 		/* Return buffer of log from Injection session */
30162 		SD_INFO(SD_LOG_SDTEST, un,
30163 		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
30164 
30165 		sd_fault_injection_on = 0;
30166 
30167 		mutex_enter(&(un->un_fi_mutex));
30168 		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
30169 		    un->sd_fi_buf_len+1, 0);
30170 		mutex_exit(&(un->un_fi_mutex));
30171 
30172 		if (rval == -1) {
30173 			/*
30174 			 * arg is possibly invalid setting
30175 			 * it to NULL for return
30176 			 */
30177 			arg = NULL;
30178 		}
30179 		break;
30180 	}
30181 
30182 	mutex_exit(SD_MUTEX(un));
30183 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl:"
30184 			    " exit\n");
30185 }
30186 
30187 
30188 /*
30189  *    Function: sd_injection_log()
30190  *
30191  * Description: This routine adds buff to the already existing injection log
30192  *              for retrieval via faultinjection_ioctl for use in fault
30193  *              detection and recovery
30194  *
30195  *   Arguments: buf - the string to add to the log
30196  */
30197 
30198 static void
30199 sd_injection_log(char *buf, struct sd_lun *un)
30200 {
30201 	uint_t len;
30202 
30203 	ASSERT(un != NULL);
30204 	ASSERT(buf != NULL);
30205 
30206 	mutex_enter(&(un->un_fi_mutex));
30207 
30208 	len = min(strlen(buf), 255);
30209 	/* Add logged value to Injection log to be returned later */
30210 	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
30211 		uint_t	offset = strlen((char *)un->sd_fi_log);
30212 		char *destp = (char *)un->sd_fi_log + offset;
30213 		int i;
30214 		for (i = 0; i < len; i++) {
30215 			*destp++ = *buf++;
30216 		}
30217 		un->sd_fi_buf_len += len;
30218 		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
30219 	}
30220 
30221 	mutex_exit(&(un->un_fi_mutex));
30222 }
30223 
30224 
30225 /*
30226  *    Function: sd_faultinjection()
30227  *
30228  * Description: This routine takes the pkt and changes its
30229  *		content based on error injection scenerio.
30230  *
30231  *   Arguments: pktp	- packet to be changed
30232  */
30233 
30234 static void
30235 sd_faultinjection(struct scsi_pkt *pktp)
30236 {
30237 	uint_t i;
30238 	struct sd_fi_pkt *fi_pkt;
30239 	struct sd_fi_xb *fi_xb;
30240 	struct sd_fi_un *fi_un;
30241 	struct sd_fi_arq *fi_arq;
30242 	struct buf *bp;
30243 	struct sd_xbuf *xb;
30244 	struct sd_lun *un;
30245 
30246 	ASSERT(pktp != NULL);
30247 
30248 	/* pull bp xb and un from pktp */
30249 	bp = (struct buf *)pktp->pkt_private;
30250 	xb = SD_GET_XBUF(bp);
30251 	un = SD_GET_UN(bp);
30252 
30253 	ASSERT(un != NULL);
30254 
30255 	mutex_enter(SD_MUTEX(un));
30256 
30257 	SD_TRACE(SD_LOG_SDTEST, un,
30258 	    "sd_faultinjection: entry Injection from sdintr\n");
30259 
30260 	/* if injection is off return */
30261 	if (sd_fault_injection_on == 0 ||
30262 		un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
30263 		mutex_exit(SD_MUTEX(un));
30264 		return;
30265 	}
30266 
30267 
30268 	/* take next set off fifo */
30269 	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
30270 
30271 	fi_pkt = un->sd_fi_fifo_pkt[i];
30272 	fi_xb = un->sd_fi_fifo_xb[i];
30273 	fi_un = un->sd_fi_fifo_un[i];
30274 	fi_arq = un->sd_fi_fifo_arq[i];
30275 
30276 
30277 	/* set variables accordingly */
30278 	/* set pkt if it was on fifo */
30279 	if (fi_pkt != NULL) {
30280 		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
30281 		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
30282 		SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
30283 		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
30284 		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
30285 		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
30286 
30287 	}
30288 
30289 	/* set xb if it was on fifo */
30290 	if (fi_xb != NULL) {
30291 		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
30292 		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
30293 		SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
30294 		SD_CONDSET(xb, xb, xb_victim_retry_count,
30295 		    "xb_victim_retry_count");
30296 		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
30297 		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
30298 		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
30299 
30300 		/* copy in block data from sense */
30301 		if (fi_xb->xb_sense_data[0] != -1) {
30302 			bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
30303 			    SENSE_LENGTH);
30304 		}
30305 
30306 		/* copy in extended sense codes */
30307 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_code,
30308 		    "es_code");
30309 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_key,
30310 		    "es_key");
30311 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_add_code,
30312 		    "es_add_code");
30313 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb,
30314 		    es_qual_code, "es_qual_code");
30315 	}
30316 
30317 	/* set un if it was on fifo */
30318 	if (fi_un != NULL) {
30319 		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
30320 		SD_CONDSET(un, un, un_ctype, "un_ctype");
30321 		SD_CONDSET(un, un, un_reset_retry_count,
30322 		    "un_reset_retry_count");
30323 		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
30324 		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
30325 		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
30326 		SD_CONDSET(un, un, un_f_geometry_is_valid,
30327 		    "un_f_geometry_is_valid");
30328 		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
30329 		    "un_f_allow_bus_device_reset");
30330 		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
30331 
30332 	}
30333 
30334 	/* copy in auto request sense if it was on fifo */
30335 	if (fi_arq != NULL) {
30336 		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
30337 	}
30338 
30339 	/* free structs */
30340 	if (un->sd_fi_fifo_pkt[i] != NULL) {
30341 		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
30342 	}
30343 	if (un->sd_fi_fifo_xb[i] != NULL) {
30344 		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
30345 	}
30346 	if (un->sd_fi_fifo_un[i] != NULL) {
30347 		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
30348 	}
30349 	if (un->sd_fi_fifo_arq[i] != NULL) {
30350 		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
30351 	}
30352 
30353 	/*
30354 	 * kmem_free does not gurantee to set to NULL
30355 	 * since we uses these to determine if we set
30356 	 * values or not lets confirm they are always
30357 	 * NULL after free
30358 	 */
30359 	un->sd_fi_fifo_pkt[i] = NULL;
30360 	un->sd_fi_fifo_un[i] = NULL;
30361 	un->sd_fi_fifo_xb[i] = NULL;
30362 	un->sd_fi_fifo_arq[i] = NULL;
30363 
30364 	un->sd_fi_fifo_start++;
30365 
30366 	mutex_exit(SD_MUTEX(un));
30367 
30368 	SD_TRACE(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
30369 }
30370 
30371 #endif /* SD_FAULT_INJECTION */
30372