xref: /titanic_41/usr/src/uts/common/io/scsi/targets/sd.c (revision a5652762e5f7bf683d19f18542e5e39df63bad79)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * SCSI disk target driver.
30  */
31 
32 
33 
34 
35 #include <sys/scsi/scsi.h>
36 #include <sys/dkbad.h>
37 #include <sys/dklabel.h>
38 #include <sys/dkio.h>
39 #include <sys/fdio.h>
40 #include <sys/cdio.h>
41 #include <sys/mhd.h>
42 #include <sys/vtoc.h>
43 #include <sys/dktp/fdisk.h>
44 #include <sys/file.h>
45 #include <sys/stat.h>
46 #include <sys/kstat.h>
47 #include <sys/vtrace.h>
48 #include <sys/note.h>
49 #include <sys/thread.h>
50 #include <sys/proc.h>
51 #include <sys/efi_partition.h>
52 #include <sys/var.h>
53 #include <sys/aio_req.h>
54 
55 #ifdef __lock_lint
56 #define	_LP64
57 #define	__amd64
58 #endif
59 
60 #if (defined(__fibre))
61 /* Note: is there a leadville version of the following? */
62 #include <sys/fc4/fcal_linkapp.h>
63 #endif
64 #include <sys/taskq.h>
65 #include <sys/uuid.h>
66 #include <sys/byteorder.h>
67 #include <sys/sdt.h>
68 
69 #include "sd_xbuf.h"
70 
71 #include <sys/scsi/targets/sddef.h>
72 
73 
74 /*
75  * Loadable module info.
76  */
77 #if (defined(__fibre))
78 #define	SD_MODULE_NAME	"SCSI SSA/FCAL Disk Driver %I%"
79 char _depends_on[]	= "misc/scsi drv/fcp";
80 #else
81 #define	SD_MODULE_NAME	"SCSI Disk Driver %I%"
82 char _depends_on[]	= "misc/scsi";
83 #endif
84 
85 /*
86  * Define the interconnect type, to allow the driver to distinguish
87  * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
88  *
89  * This is really for backward compatability. In the future, the driver
90  * should actually check the "interconnect-type" property as reported by
91  * the HBA; however at present this property is not defined by all HBAs,
92  * so we will use this #define (1) to permit the driver to run in
93  * backward-compatability mode; and (2) to print a notification message
94  * if an FC HBA does not support the "interconnect-type" property.  The
95  * behavior of the driver will be to assume parallel SCSI behaviors unless
96  * the "interconnect-type" property is defined by the HBA **AND** has a
97  * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
98  * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
99  * Channel behaviors (as per the old ssd).  (Note that the
100  * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
101  * will result in the driver assuming parallel SCSI behaviors.)
102  *
103  * (see common/sys/scsi/impl/services.h)
104  *
105  * Note: For ssd semantics, don't use INTERCONNECT_FABRIC as the default
106  * since some FC HBAs may already support that, and there is some code in
107  * the driver that already looks for it.  Using INTERCONNECT_FABRIC as the
108  * default would confuse that code, and besides things should work fine
109  * anyways if the FC HBA already reports INTERCONNECT_FABRIC for the
110  * "interconnect_type" property.
111  */
112 #if (defined(__fibre))
113 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_FIBRE
114 #else
115 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
116 #endif
117 
118 /*
119  * The name of the driver, established from the module name in _init.
120  */
121 static	char *sd_label			= NULL;
122 
123 /*
124  * Driver name is unfortunately prefixed on some driver.conf properties.
125  */
126 #if (defined(__fibre))
127 #define	sd_max_xfer_size		ssd_max_xfer_size
128 #define	sd_config_list			ssd_config_list
129 static	char *sd_max_xfer_size		= "ssd_max_xfer_size";
130 static	char *sd_config_list		= "ssd-config-list";
131 #else
132 static	char *sd_max_xfer_size		= "sd_max_xfer_size";
133 static	char *sd_config_list		= "sd-config-list";
134 #endif
135 
136 /*
137  * Driver global variables
138  */
139 
140 #if (defined(__fibre))
141 /*
142  * These #defines are to avoid namespace collisions that occur because this
143  * code is currently used to compile two seperate driver modules: sd and ssd.
144  * All global variables need to be treated this way (even if declared static)
145  * in order to allow the debugger to resolve the names properly.
146  * It is anticipated that in the near future the ssd module will be obsoleted,
147  * at which time this namespace issue should go away.
148  */
149 #define	sd_state			ssd_state
150 #define	sd_io_time			ssd_io_time
151 #define	sd_failfast_enable		ssd_failfast_enable
152 #define	sd_ua_retry_count		ssd_ua_retry_count
153 #define	sd_report_pfa			ssd_report_pfa
154 #define	sd_max_throttle			ssd_max_throttle
155 #define	sd_min_throttle			ssd_min_throttle
156 #define	sd_rot_delay			ssd_rot_delay
157 
158 #define	sd_retry_on_reservation_conflict	\
159 					ssd_retry_on_reservation_conflict
160 #define	sd_reinstate_resv_delay		ssd_reinstate_resv_delay
161 #define	sd_resv_conflict_name		ssd_resv_conflict_name
162 
163 #define	sd_component_mask		ssd_component_mask
164 #define	sd_level_mask			ssd_level_mask
165 #define	sd_debug_un			ssd_debug_un
166 #define	sd_error_level			ssd_error_level
167 
168 #define	sd_xbuf_active_limit		ssd_xbuf_active_limit
169 #define	sd_xbuf_reserve_limit		ssd_xbuf_reserve_limit
170 
171 #define	sd_tr				ssd_tr
172 #define	sd_reset_throttle_timeout	ssd_reset_throttle_timeout
173 #define	sd_qfull_throttle_timeout	ssd_qfull_throttle_timeout
174 #define	sd_qfull_throttle_enable	ssd_qfull_throttle_enable
175 #define	sd_check_media_time		ssd_check_media_time
176 #define	sd_wait_cmds_complete		ssd_wait_cmds_complete
177 #define	sd_label_mutex			ssd_label_mutex
178 #define	sd_detach_mutex			ssd_detach_mutex
179 #define	sd_log_buf			ssd_log_buf
180 #define	sd_log_mutex			ssd_log_mutex
181 
182 #define	sd_disk_table			ssd_disk_table
183 #define	sd_disk_table_size		ssd_disk_table_size
184 #define	sd_sense_mutex			ssd_sense_mutex
185 #define	sd_cdbtab			ssd_cdbtab
186 
187 #define	sd_cb_ops			ssd_cb_ops
188 #define	sd_ops				ssd_ops
189 #define	sd_additional_codes		ssd_additional_codes
190 
191 #define	sd_minor_data			ssd_minor_data
192 #define	sd_minor_data_efi		ssd_minor_data_efi
193 
194 #define	sd_tq				ssd_tq
195 #define	sd_wmr_tq			ssd_wmr_tq
196 #define	sd_taskq_name			ssd_taskq_name
197 #define	sd_wmr_taskq_name		ssd_wmr_taskq_name
198 #define	sd_taskq_minalloc		ssd_taskq_minalloc
199 #define	sd_taskq_maxalloc		ssd_taskq_maxalloc
200 
201 #define	sd_dump_format_string		ssd_dump_format_string
202 
203 #define	sd_iostart_chain		ssd_iostart_chain
204 #define	sd_iodone_chain			ssd_iodone_chain
205 
206 #define	sd_pm_idletime			ssd_pm_idletime
207 
208 #define	sd_force_pm_supported		ssd_force_pm_supported
209 
210 #define	sd_dtype_optical_bind		ssd_dtype_optical_bind
211 
212 #endif
213 
214 
215 #ifdef	SDDEBUG
216 int	sd_force_pm_supported		= 0;
217 #endif	/* SDDEBUG */
218 
219 void *sd_state				= NULL;
220 int sd_io_time				= SD_IO_TIME;
221 int sd_failfast_enable			= 1;
222 int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
223 int sd_report_pfa			= 1;
224 int sd_max_throttle			= SD_MAX_THROTTLE;
225 int sd_min_throttle			= SD_MIN_THROTTLE;
226 int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
227 int sd_qfull_throttle_enable		= TRUE;
228 
229 int sd_retry_on_reservation_conflict	= 1;
230 int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
231 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
232 
233 static int sd_dtype_optical_bind	= -1;
234 
235 /* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
236 static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
237 
238 /*
239  * Global data for debug logging. To enable debug printing, sd_component_mask
240  * and sd_level_mask should be set to the desired bit patterns as outlined in
241  * sddef.h.
242  */
243 uint_t	sd_component_mask		= 0x0;
244 uint_t	sd_level_mask			= 0x0;
245 struct	sd_lun *sd_debug_un		= NULL;
246 uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
247 
248 /* Note: these may go away in the future... */
249 static uint32_t	sd_xbuf_active_limit	= 512;
250 static uint32_t sd_xbuf_reserve_limit	= 16;
251 
252 static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
253 
254 /*
255  * Timer value used to reset the throttle after it has been reduced
256  * (typically in response to TRAN_BUSY or STATUS_QFULL)
257  */
258 static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
259 static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
260 
261 /*
262  * Interval value associated with the media change scsi watch.
263  */
264 static int sd_check_media_time		= 3000000;
265 
266 /*
267  * Wait value used for in progress operations during a DDI_SUSPEND
268  */
269 static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
270 
271 /*
272  * sd_label_mutex protects a static buffer used in the disk label
273  * component of the driver
274  */
275 static kmutex_t sd_label_mutex;
276 
277 /*
278  * sd_detach_mutex protects un_layer_count, un_detach_count, and
279  * un_opens_in_progress in the sd_lun structure.
280  */
281 static kmutex_t sd_detach_mutex;
282 
283 _NOTE(MUTEX_PROTECTS_DATA(sd_detach_mutex,
284 	sd_lun::{un_layer_count un_detach_count un_opens_in_progress}))
285 
286 /*
287  * Global buffer and mutex for debug logging
288  */
289 static char	sd_log_buf[1024];
290 static kmutex_t	sd_log_mutex;
291 
292 
293 /*
294  * "Smart" Probe Caching structs, globals, #defines, etc.
295  * For parallel scsi and non-self-identify device only.
296  */
297 
298 /*
299  * The following resources and routines are implemented to support
300  * "smart" probing, which caches the scsi_probe() results in an array,
301  * in order to help avoid long probe times.
302  */
303 struct sd_scsi_probe_cache {
304 	struct	sd_scsi_probe_cache	*next;
305 	dev_info_t	*pdip;
306 	int		cache[NTARGETS_WIDE];
307 };
308 
309 static kmutex_t	sd_scsi_probe_cache_mutex;
310 static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
311 
312 /*
313  * Really we only need protection on the head of the linked list, but
314  * better safe than sorry.
315  */
316 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
317     sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
318 
319 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
320     sd_scsi_probe_cache_head))
321 
322 
323 /*
324  * Vendor specific data name property declarations
325  */
326 
327 #if defined(__fibre) || defined(__i386) ||defined(__amd64)
328 
329 static sd_tunables seagate_properties = {
330 	SEAGATE_THROTTLE_VALUE,
331 	0,
332 	0,
333 	0,
334 	0,
335 	0,
336 	0,
337 	0,
338 	0
339 };
340 
341 
342 static sd_tunables fujitsu_properties = {
343 	FUJITSU_THROTTLE_VALUE,
344 	0,
345 	0,
346 	0,
347 	0,
348 	0,
349 	0,
350 	0,
351 	0
352 };
353 
354 static sd_tunables ibm_properties = {
355 	IBM_THROTTLE_VALUE,
356 	0,
357 	0,
358 	0,
359 	0,
360 	0,
361 	0,
362 	0,
363 	0
364 };
365 
366 static sd_tunables purple_properties = {
367 	PURPLE_THROTTLE_VALUE,
368 	0,
369 	0,
370 	PURPLE_BUSY_RETRIES,
371 	PURPLE_RESET_RETRY_COUNT,
372 	PURPLE_RESERVE_RELEASE_TIME,
373 	0,
374 	0,
375 	0
376 };
377 
378 static sd_tunables sve_properties = {
379 	SVE_THROTTLE_VALUE,
380 	0,
381 	0,
382 	SVE_BUSY_RETRIES,
383 	SVE_RESET_RETRY_COUNT,
384 	SVE_RESERVE_RELEASE_TIME,
385 	SVE_MIN_THROTTLE_VALUE,
386 	SVE_DISKSORT_DISABLED_FLAG,
387 	0
388 };
389 
390 static sd_tunables maserati_properties = {
391 	0,
392 	0,
393 	0,
394 	0,
395 	0,
396 	0,
397 	0,
398 	MASERATI_DISKSORT_DISABLED_FLAG,
399 	MASERATI_LUN_RESET_ENABLED_FLAG
400 };
401 
402 static sd_tunables pirus_properties = {
403 	PIRUS_THROTTLE_VALUE,
404 	0,
405 	PIRUS_NRR_COUNT,
406 	PIRUS_BUSY_RETRIES,
407 	PIRUS_RESET_RETRY_COUNT,
408 	0,
409 	PIRUS_MIN_THROTTLE_VALUE,
410 	PIRUS_DISKSORT_DISABLED_FLAG,
411 	PIRUS_LUN_RESET_ENABLED_FLAG
412 };
413 
414 #endif
415 
416 #if (defined(__sparc) && !defined(__fibre)) || \
417 	(defined(__i386) || defined(__amd64))
418 
419 
420 static sd_tunables elite_properties = {
421 	ELITE_THROTTLE_VALUE,
422 	0,
423 	0,
424 	0,
425 	0,
426 	0,
427 	0,
428 	0,
429 	0
430 };
431 
432 static sd_tunables st31200n_properties = {
433 	ST31200N_THROTTLE_VALUE,
434 	0,
435 	0,
436 	0,
437 	0,
438 	0,
439 	0,
440 	0,
441 	0
442 };
443 
444 #endif /* Fibre or not */
445 
446 static sd_tunables lsi_properties_scsi = {
447 	LSI_THROTTLE_VALUE,
448 	0,
449 	LSI_NOTREADY_RETRIES,
450 	0,
451 	0,
452 	0,
453 	0,
454 	0,
455 	0
456 };
457 
458 static sd_tunables symbios_properties = {
459 	SYMBIOS_THROTTLE_VALUE,
460 	0,
461 	SYMBIOS_NOTREADY_RETRIES,
462 	0,
463 	0,
464 	0,
465 	0,
466 	0,
467 	0
468 };
469 
470 static sd_tunables lsi_properties = {
471 	0,
472 	0,
473 	LSI_NOTREADY_RETRIES,
474 	0,
475 	0,
476 	0,
477 	0,
478 	0,
479 	0
480 };
481 
482 static sd_tunables lsi_oem_properties = {
483 	0,
484 	0,
485 	LSI_OEM_NOTREADY_RETRIES,
486 	0,
487 	0,
488 	0,
489 	0,
490 	0,
491 	0
492 };
493 
494 
495 
496 #if (defined(SD_PROP_TST))
497 
498 #define	SD_TST_CTYPE_VAL	CTYPE_CDROM
499 #define	SD_TST_THROTTLE_VAL	16
500 #define	SD_TST_NOTREADY_VAL	12
501 #define	SD_TST_BUSY_VAL		60
502 #define	SD_TST_RST_RETRY_VAL	36
503 #define	SD_TST_RSV_REL_TIME	60
504 
505 static sd_tunables tst_properties = {
506 	SD_TST_THROTTLE_VAL,
507 	SD_TST_CTYPE_VAL,
508 	SD_TST_NOTREADY_VAL,
509 	SD_TST_BUSY_VAL,
510 	SD_TST_RST_RETRY_VAL,
511 	SD_TST_RSV_REL_TIME,
512 	0,
513 	0,
514 	0
515 };
516 #endif
517 
518 /* This is similiar to the ANSI toupper implementation */
519 #define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
520 
521 /*
522  * Static Driver Configuration Table
523  *
524  * This is the table of disks which need throttle adjustment (or, perhaps
525  * something else as defined by the flags at a future time.)  device_id
526  * is a string consisting of concatenated vid (vendor), pid (product/model)
527  * and revision strings as defined in the scsi_inquiry structure.  Offsets of
528  * the parts of the string are as defined by the sizes in the scsi_inquiry
529  * structure.  Device type is searched as far as the device_id string is
530  * defined.  Flags defines which values are to be set in the driver from the
531  * properties list.
532  *
533  * Entries below which begin and end with a "*" are a special case.
534  * These do not have a specific vendor, and the string which follows
535  * can appear anywhere in the 16 byte PID portion of the inquiry data.
536  *
537  * Entries below which begin and end with a " " (blank) are a special
538  * case. The comparison function will treat multiple consecutive blanks
539  * as equivalent to a single blank. For example, this causes a
540  * sd_disk_table entry of " NEC CDROM " to match a device's id string
541  * of  "NEC       CDROM".
542  *
543  * Note: The MD21 controller type has been obsoleted.
544  *	 ST318202F is a Legacy device
545  *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
546  *	 made with an FC connection. The entries here are a legacy.
547  */
548 static sd_disk_config_t sd_disk_table[] = {
549 #if defined(__fibre) || defined(__i386) || defined(__amd64)
550 	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
551 	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
552 	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
553 	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
554 	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
555 	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
556 	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
557 	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
558 	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
559 	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
560 	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
561 	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
562 	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
563 	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
564 	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
565 	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
566 	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
567 	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
568 	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
569 	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
570 	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
571 	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
572 	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
573 	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
574 	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
575 	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
576 	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
577 	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
578 	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
579 	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
580 	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
581 	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
582 	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
583 	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
584 	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
585 	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
586 	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
587 	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
588 	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
589 	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
590 	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
591 	{ "SUN     T3", SD_CONF_BSET_THROTTLE |
592 			SD_CONF_BSET_BSY_RETRY_COUNT|
593 			SD_CONF_BSET_RST_RETRIES|
594 			SD_CONF_BSET_RSV_REL_TIME,
595 		&purple_properties },
596 	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
597 		SD_CONF_BSET_BSY_RETRY_COUNT|
598 		SD_CONF_BSET_RST_RETRIES|
599 		SD_CONF_BSET_RSV_REL_TIME|
600 		SD_CONF_BSET_MIN_THROTTLE|
601 		SD_CONF_BSET_DISKSORT_DISABLED,
602 		&sve_properties },
603 	{ "SUN     T4", SD_CONF_BSET_THROTTLE |
604 			SD_CONF_BSET_BSY_RETRY_COUNT|
605 			SD_CONF_BSET_RST_RETRIES|
606 			SD_CONF_BSET_RSV_REL_TIME,
607 		&purple_properties },
608 	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
609 		SD_CONF_BSET_LUN_RESET_ENABLED,
610 		&maserati_properties },
611 	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
612 		SD_CONF_BSET_NRR_COUNT|
613 		SD_CONF_BSET_BSY_RETRY_COUNT|
614 		SD_CONF_BSET_RST_RETRIES|
615 		SD_CONF_BSET_MIN_THROTTLE|
616 		SD_CONF_BSET_DISKSORT_DISABLED|
617 		SD_CONF_BSET_LUN_RESET_ENABLED,
618 		&pirus_properties },
619 	{ "SUN     SE6940", SD_CONF_BSET_THROTTLE |
620 		SD_CONF_BSET_NRR_COUNT|
621 		SD_CONF_BSET_BSY_RETRY_COUNT|
622 		SD_CONF_BSET_RST_RETRIES|
623 		SD_CONF_BSET_MIN_THROTTLE|
624 		SD_CONF_BSET_DISKSORT_DISABLED|
625 		SD_CONF_BSET_LUN_RESET_ENABLED,
626 		&pirus_properties },
627 	{ "SUN     StorageTek 6920", SD_CONF_BSET_THROTTLE |
628 		SD_CONF_BSET_NRR_COUNT|
629 		SD_CONF_BSET_BSY_RETRY_COUNT|
630 		SD_CONF_BSET_RST_RETRIES|
631 		SD_CONF_BSET_MIN_THROTTLE|
632 		SD_CONF_BSET_DISKSORT_DISABLED|
633 		SD_CONF_BSET_LUN_RESET_ENABLED,
634 		&pirus_properties },
635 	{ "SUN     StorageTek 6940", SD_CONF_BSET_THROTTLE |
636 		SD_CONF_BSET_NRR_COUNT|
637 		SD_CONF_BSET_BSY_RETRY_COUNT|
638 		SD_CONF_BSET_RST_RETRIES|
639 		SD_CONF_BSET_MIN_THROTTLE|
640 		SD_CONF_BSET_DISKSORT_DISABLED|
641 		SD_CONF_BSET_LUN_RESET_ENABLED,
642 		&pirus_properties },
643 	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
644 		SD_CONF_BSET_NRR_COUNT|
645 		SD_CONF_BSET_BSY_RETRY_COUNT|
646 		SD_CONF_BSET_RST_RETRIES|
647 		SD_CONF_BSET_MIN_THROTTLE|
648 		SD_CONF_BSET_DISKSORT_DISABLED|
649 		SD_CONF_BSET_LUN_RESET_ENABLED,
650 		&pirus_properties },
651 	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
652 		SD_CONF_BSET_NRR_COUNT|
653 		SD_CONF_BSET_BSY_RETRY_COUNT|
654 		SD_CONF_BSET_RST_RETRIES|
655 		SD_CONF_BSET_MIN_THROTTLE|
656 		SD_CONF_BSET_DISKSORT_DISABLED|
657 		SD_CONF_BSET_LUN_RESET_ENABLED,
658 		&pirus_properties },
659 	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
660 	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
661 	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
662 	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
663 	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
664 #endif /* fibre or NON-sparc platforms */
665 #if ((defined(__sparc) && !defined(__fibre)) ||\
666 	(defined(__i386) || defined(__amd64)))
667 	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
668 	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
669 	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
670 	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
671 	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
672 	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
673 	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
674 	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
675 	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
676 	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
677 	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
678 	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
679 	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
680 	    &symbios_properties },
681 	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
682 	    &lsi_properties_scsi },
683 #if defined(__i386) || defined(__amd64)
684 	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
685 				    | SD_CONF_BSET_READSUB_BCD
686 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
687 				    | SD_CONF_BSET_NO_READ_HEADER
688 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
689 
690 	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
691 				    | SD_CONF_BSET_READSUB_BCD
692 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
693 				    | SD_CONF_BSET_NO_READ_HEADER
694 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
695 #endif /* __i386 || __amd64 */
696 #endif /* sparc NON-fibre or NON-sparc platforms */
697 
698 #if (defined(SD_PROP_TST))
699 	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
700 				| SD_CONF_BSET_CTYPE
701 				| SD_CONF_BSET_NRR_COUNT
702 				| SD_CONF_BSET_FAB_DEVID
703 				| SD_CONF_BSET_NOCACHE
704 				| SD_CONF_BSET_BSY_RETRY_COUNT
705 				| SD_CONF_BSET_PLAYMSF_BCD
706 				| SD_CONF_BSET_READSUB_BCD
707 				| SD_CONF_BSET_READ_TOC_TRK_BCD
708 				| SD_CONF_BSET_READ_TOC_ADDR_BCD
709 				| SD_CONF_BSET_NO_READ_HEADER
710 				| SD_CONF_BSET_READ_CD_XD4
711 				| SD_CONF_BSET_RST_RETRIES
712 				| SD_CONF_BSET_RSV_REL_TIME
713 				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
714 #endif
715 };
716 
717 static const int sd_disk_table_size =
718 	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
719 
720 
721 /*
722  * Return codes of sd_uselabel().
723  */
724 #define	SD_LABEL_IS_VALID		0
725 #define	SD_LABEL_IS_INVALID		1
726 
727 #define	SD_INTERCONNECT_PARALLEL	0
728 #define	SD_INTERCONNECT_FABRIC		1
729 #define	SD_INTERCONNECT_FIBRE		2
730 #define	SD_INTERCONNECT_SSA		3
731 #define	SD_IS_PARALLEL_SCSI(un)		\
732 	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
733 
734 /*
735  * Definitions used by device id registration routines
736  */
737 #define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
738 #define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
739 #define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
740 #define	WD_NODE			7	/* the whole disk minor */
741 
742 static kmutex_t sd_sense_mutex = {0};
743 
744 /*
745  * Macros for updates of the driver state
746  */
747 #define	New_state(un, s)        \
748 	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
749 #define	Restore_state(un)	\
750 	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
751 
752 static struct sd_cdbinfo sd_cdbtab[] = {
753 	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
754 	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
755 	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
756 	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
757 };
758 
759 /*
760  * Specifies the number of seconds that must have elapsed since the last
761  * cmd. has completed for a device to be declared idle to the PM framework.
762  */
763 static int sd_pm_idletime = 1;
764 
765 /*
766  * Internal function prototypes
767  */
768 
769 #if (defined(__fibre))
770 /*
771  * These #defines are to avoid namespace collisions that occur because this
772  * code is currently used to compile two seperate driver modules: sd and ssd.
773  * All function names need to be treated this way (even if declared static)
774  * in order to allow the debugger to resolve the names properly.
775  * It is anticipated that in the near future the ssd module will be obsoleted,
776  * at which time this ugliness should go away.
777  */
778 #define	sd_log_trace			ssd_log_trace
779 #define	sd_log_info			ssd_log_info
780 #define	sd_log_err			ssd_log_err
781 #define	sdprobe				ssdprobe
782 #define	sdinfo				ssdinfo
783 #define	sd_prop_op			ssd_prop_op
784 #define	sd_scsi_probe_cache_init	ssd_scsi_probe_cache_init
785 #define	sd_scsi_probe_cache_fini	ssd_scsi_probe_cache_fini
786 #define	sd_scsi_clear_probe_cache	ssd_scsi_clear_probe_cache
787 #define	sd_scsi_probe_with_cache	ssd_scsi_probe_with_cache
788 #define	sd_spin_up_unit			ssd_spin_up_unit
789 #define	sd_enable_descr_sense		ssd_enable_descr_sense
790 #define	sd_set_mmc_caps			ssd_set_mmc_caps
791 #define	sd_read_unit_properties		ssd_read_unit_properties
792 #define	sd_process_sdconf_file		ssd_process_sdconf_file
793 #define	sd_process_sdconf_table		ssd_process_sdconf_table
794 #define	sd_sdconf_id_match		ssd_sdconf_id_match
795 #define	sd_blank_cmp			ssd_blank_cmp
796 #define	sd_chk_vers1_data		ssd_chk_vers1_data
797 #define	sd_set_vers1_properties		ssd_set_vers1_properties
798 #define	sd_validate_geometry		ssd_validate_geometry
799 
800 #if defined(_SUNOS_VTOC_16)
801 #define	sd_convert_geometry		ssd_convert_geometry
802 #endif
803 
804 #define	sd_resync_geom_caches		ssd_resync_geom_caches
805 #define	sd_read_fdisk			ssd_read_fdisk
806 #define	sd_get_physical_geometry	ssd_get_physical_geometry
807 #define	sd_get_virtual_geometry		ssd_get_virtual_geometry
808 #define	sd_update_block_info		ssd_update_block_info
809 #define	sd_swap_efi_gpt			ssd_swap_efi_gpt
810 #define	sd_swap_efi_gpe			ssd_swap_efi_gpe
811 #define	sd_validate_efi			ssd_validate_efi
812 #define	sd_use_efi			ssd_use_efi
813 #define	sd_uselabel			ssd_uselabel
814 #define	sd_build_default_label		ssd_build_default_label
815 #define	sd_has_max_chs_vals		ssd_has_max_chs_vals
816 #define	sd_inq_fill			ssd_inq_fill
817 #define	sd_register_devid		ssd_register_devid
818 #define	sd_get_devid_block		ssd_get_devid_block
819 #define	sd_get_devid			ssd_get_devid
820 #define	sd_create_devid			ssd_create_devid
821 #define	sd_write_deviceid		ssd_write_deviceid
822 #define	sd_check_vpd_page_support	ssd_check_vpd_page_support
823 #define	sd_setup_pm			ssd_setup_pm
824 #define	sd_create_pm_components		ssd_create_pm_components
825 #define	sd_ddi_suspend			ssd_ddi_suspend
826 #define	sd_ddi_pm_suspend		ssd_ddi_pm_suspend
827 #define	sd_ddi_resume			ssd_ddi_resume
828 #define	sd_ddi_pm_resume		ssd_ddi_pm_resume
829 #define	sdpower				ssdpower
830 #define	sdattach			ssdattach
831 #define	sddetach			ssddetach
832 #define	sd_unit_attach			ssd_unit_attach
833 #define	sd_unit_detach			ssd_unit_detach
834 #define	sd_set_unit_attributes		ssd_set_unit_attributes
835 #define	sd_create_minor_nodes		ssd_create_minor_nodes
836 #define	sd_create_errstats		ssd_create_errstats
837 #define	sd_set_errstats			ssd_set_errstats
838 #define	sd_set_pstats			ssd_set_pstats
839 #define	sddump				ssddump
840 #define	sd_scsi_poll			ssd_scsi_poll
841 #define	sd_send_polled_RQS		ssd_send_polled_RQS
842 #define	sd_ddi_scsi_poll		ssd_ddi_scsi_poll
843 #define	sd_init_event_callbacks		ssd_init_event_callbacks
844 #define	sd_event_callback		ssd_event_callback
845 #define	sd_cache_control		ssd_cache_control
846 #define	sd_get_write_cache_enabled	ssd_get_write_cache_enabled
847 #define	sd_make_device			ssd_make_device
848 #define	sdopen				ssdopen
849 #define	sdclose				ssdclose
850 #define	sd_ready_and_valid		ssd_ready_and_valid
851 #define	sdmin				ssdmin
852 #define	sdread				ssdread
853 #define	sdwrite				ssdwrite
854 #define	sdaread				ssdaread
855 #define	sdawrite			ssdawrite
856 #define	sdstrategy			ssdstrategy
857 #define	sdioctl				ssdioctl
858 #define	sd_mapblockaddr_iostart		ssd_mapblockaddr_iostart
859 #define	sd_mapblocksize_iostart		ssd_mapblocksize_iostart
860 #define	sd_checksum_iostart		ssd_checksum_iostart
861 #define	sd_checksum_uscsi_iostart	ssd_checksum_uscsi_iostart
862 #define	sd_pm_iostart			ssd_pm_iostart
863 #define	sd_core_iostart			ssd_core_iostart
864 #define	sd_mapblockaddr_iodone		ssd_mapblockaddr_iodone
865 #define	sd_mapblocksize_iodone		ssd_mapblocksize_iodone
866 #define	sd_checksum_iodone		ssd_checksum_iodone
867 #define	sd_checksum_uscsi_iodone	ssd_checksum_uscsi_iodone
868 #define	sd_pm_iodone			ssd_pm_iodone
869 #define	sd_initpkt_for_buf		ssd_initpkt_for_buf
870 #define	sd_destroypkt_for_buf		ssd_destroypkt_for_buf
871 #define	sd_setup_rw_pkt			ssd_setup_rw_pkt
872 #define	sd_setup_next_rw_pkt		ssd_setup_next_rw_pkt
873 #define	sd_buf_iodone			ssd_buf_iodone
874 #define	sd_uscsi_strategy		ssd_uscsi_strategy
875 #define	sd_initpkt_for_uscsi		ssd_initpkt_for_uscsi
876 #define	sd_destroypkt_for_uscsi		ssd_destroypkt_for_uscsi
877 #define	sd_uscsi_iodone			ssd_uscsi_iodone
878 #define	sd_xbuf_strategy		ssd_xbuf_strategy
879 #define	sd_xbuf_init			ssd_xbuf_init
880 #define	sd_pm_entry			ssd_pm_entry
881 #define	sd_pm_exit			ssd_pm_exit
882 
883 #define	sd_pm_idletimeout_handler	ssd_pm_idletimeout_handler
884 #define	sd_pm_timeout_handler		ssd_pm_timeout_handler
885 
886 #define	sd_add_buf_to_waitq		ssd_add_buf_to_waitq
887 #define	sdintr				ssdintr
888 #define	sd_start_cmds			ssd_start_cmds
889 #define	sd_send_scsi_cmd		ssd_send_scsi_cmd
890 #define	sd_bioclone_alloc		ssd_bioclone_alloc
891 #define	sd_bioclone_free		ssd_bioclone_free
892 #define	sd_shadow_buf_alloc		ssd_shadow_buf_alloc
893 #define	sd_shadow_buf_free		ssd_shadow_buf_free
894 #define	sd_print_transport_rejected_message	\
895 					ssd_print_transport_rejected_message
896 #define	sd_retry_command		ssd_retry_command
897 #define	sd_set_retry_bp			ssd_set_retry_bp
898 #define	sd_send_request_sense_command	ssd_send_request_sense_command
899 #define	sd_start_retry_command		ssd_start_retry_command
900 #define	sd_start_direct_priority_command	\
901 					ssd_start_direct_priority_command
902 #define	sd_return_failed_command	ssd_return_failed_command
903 #define	sd_return_failed_command_no_restart	\
904 					ssd_return_failed_command_no_restart
905 #define	sd_return_command		ssd_return_command
906 #define	sd_sync_with_callback		ssd_sync_with_callback
907 #define	sdrunout			ssdrunout
908 #define	sd_mark_rqs_busy		ssd_mark_rqs_busy
909 #define	sd_mark_rqs_idle		ssd_mark_rqs_idle
910 #define	sd_reduce_throttle		ssd_reduce_throttle
911 #define	sd_restore_throttle		ssd_restore_throttle
912 #define	sd_print_incomplete_msg		ssd_print_incomplete_msg
913 #define	sd_init_cdb_limits		ssd_init_cdb_limits
914 #define	sd_pkt_status_good		ssd_pkt_status_good
915 #define	sd_pkt_status_check_condition	ssd_pkt_status_check_condition
916 #define	sd_pkt_status_busy		ssd_pkt_status_busy
917 #define	sd_pkt_status_reservation_conflict	\
918 					ssd_pkt_status_reservation_conflict
919 #define	sd_pkt_status_qfull		ssd_pkt_status_qfull
920 #define	sd_handle_request_sense		ssd_handle_request_sense
921 #define	sd_handle_auto_request_sense	ssd_handle_auto_request_sense
922 #define	sd_print_sense_failed_msg	ssd_print_sense_failed_msg
923 #define	sd_validate_sense_data		ssd_validate_sense_data
924 #define	sd_decode_sense			ssd_decode_sense
925 #define	sd_print_sense_msg		ssd_print_sense_msg
926 #define	sd_extract_sense_info_descr	ssd_extract_sense_info_descr
927 #define	sd_sense_key_no_sense		ssd_sense_key_no_sense
928 #define	sd_sense_key_recoverable_error	ssd_sense_key_recoverable_error
929 #define	sd_sense_key_not_ready		ssd_sense_key_not_ready
930 #define	sd_sense_key_medium_or_hardware_error	\
931 					ssd_sense_key_medium_or_hardware_error
932 #define	sd_sense_key_illegal_request	ssd_sense_key_illegal_request
933 #define	sd_sense_key_unit_attention	ssd_sense_key_unit_attention
934 #define	sd_sense_key_fail_command	ssd_sense_key_fail_command
935 #define	sd_sense_key_blank_check	ssd_sense_key_blank_check
936 #define	sd_sense_key_aborted_command	ssd_sense_key_aborted_command
937 #define	sd_sense_key_default		ssd_sense_key_default
938 #define	sd_print_retry_msg		ssd_print_retry_msg
939 #define	sd_print_cmd_incomplete_msg	ssd_print_cmd_incomplete_msg
940 #define	sd_pkt_reason_cmd_incomplete	ssd_pkt_reason_cmd_incomplete
941 #define	sd_pkt_reason_cmd_tran_err	ssd_pkt_reason_cmd_tran_err
942 #define	sd_pkt_reason_cmd_reset		ssd_pkt_reason_cmd_reset
943 #define	sd_pkt_reason_cmd_aborted	ssd_pkt_reason_cmd_aborted
944 #define	sd_pkt_reason_cmd_timeout	ssd_pkt_reason_cmd_timeout
945 #define	sd_pkt_reason_cmd_unx_bus_free	ssd_pkt_reason_cmd_unx_bus_free
946 #define	sd_pkt_reason_cmd_tag_reject	ssd_pkt_reason_cmd_tag_reject
947 #define	sd_pkt_reason_default		ssd_pkt_reason_default
948 #define	sd_reset_target			ssd_reset_target
949 #define	sd_start_stop_unit_callback	ssd_start_stop_unit_callback
950 #define	sd_start_stop_unit_task		ssd_start_stop_unit_task
951 #define	sd_taskq_create			ssd_taskq_create
952 #define	sd_taskq_delete			ssd_taskq_delete
953 #define	sd_media_change_task		ssd_media_change_task
954 #define	sd_handle_mchange		ssd_handle_mchange
955 #define	sd_send_scsi_DOORLOCK		ssd_send_scsi_DOORLOCK
956 #define	sd_send_scsi_READ_CAPACITY	ssd_send_scsi_READ_CAPACITY
957 #define	sd_send_scsi_READ_CAPACITY_16	ssd_send_scsi_READ_CAPACITY_16
958 #define	sd_send_scsi_GET_CONFIGURATION	ssd_send_scsi_GET_CONFIGURATION
959 #define	sd_send_scsi_feature_GET_CONFIGURATION	\
960 					sd_send_scsi_feature_GET_CONFIGURATION
961 #define	sd_send_scsi_START_STOP_UNIT	ssd_send_scsi_START_STOP_UNIT
962 #define	sd_send_scsi_INQUIRY		ssd_send_scsi_INQUIRY
963 #define	sd_send_scsi_TEST_UNIT_READY	ssd_send_scsi_TEST_UNIT_READY
964 #define	sd_send_scsi_PERSISTENT_RESERVE_IN	\
965 					ssd_send_scsi_PERSISTENT_RESERVE_IN
966 #define	sd_send_scsi_PERSISTENT_RESERVE_OUT	\
967 					ssd_send_scsi_PERSISTENT_RESERVE_OUT
968 #define	sd_send_scsi_SYNCHRONIZE_CACHE	ssd_send_scsi_SYNCHRONIZE_CACHE
969 #define	sd_send_scsi_SYNCHRONIZE_CACHE_biodone	\
970 					ssd_send_scsi_SYNCHRONIZE_CACHE_biodone
971 #define	sd_send_scsi_MODE_SENSE		ssd_send_scsi_MODE_SENSE
972 #define	sd_send_scsi_MODE_SELECT	ssd_send_scsi_MODE_SELECT
973 #define	sd_send_scsi_RDWR		ssd_send_scsi_RDWR
974 #define	sd_send_scsi_LOG_SENSE		ssd_send_scsi_LOG_SENSE
975 #define	sd_alloc_rqs			ssd_alloc_rqs
976 #define	sd_free_rqs			ssd_free_rqs
977 #define	sd_dump_memory			ssd_dump_memory
978 #define	sd_uscsi_ioctl			ssd_uscsi_ioctl
979 #define	sd_get_media_info		ssd_get_media_info
980 #define	sd_dkio_ctrl_info		ssd_dkio_ctrl_info
981 #define	sd_dkio_get_geometry		ssd_dkio_get_geometry
982 #define	sd_dkio_set_geometry		ssd_dkio_set_geometry
983 #define	sd_dkio_get_partition		ssd_dkio_get_partition
984 #define	sd_dkio_set_partition		ssd_dkio_set_partition
985 #define	sd_dkio_partition		ssd_dkio_partition
986 #define	sd_dkio_get_vtoc		ssd_dkio_get_vtoc
987 #define	sd_dkio_get_efi			ssd_dkio_get_efi
988 #define	sd_build_user_vtoc		ssd_build_user_vtoc
989 #define	sd_dkio_set_vtoc		ssd_dkio_set_vtoc
990 #define	sd_dkio_set_efi			ssd_dkio_set_efi
991 #define	sd_build_label_vtoc		ssd_build_label_vtoc
992 #define	sd_write_label			ssd_write_label
993 #define	sd_clear_vtoc			ssd_clear_vtoc
994 #define	sd_clear_efi			ssd_clear_efi
995 #define	sd_get_tunables_from_conf	ssd_get_tunables_from_conf
996 #define	sd_setup_next_xfer		ssd_setup_next_xfer
997 #define	sd_dkio_get_temp		ssd_dkio_get_temp
998 #define	sd_dkio_get_mboot		ssd_dkio_get_mboot
999 #define	sd_dkio_set_mboot		ssd_dkio_set_mboot
1000 #define	sd_setup_default_geometry	ssd_setup_default_geometry
1001 #define	sd_update_fdisk_and_vtoc	ssd_update_fdisk_and_vtoc
1002 #define	sd_check_mhd			ssd_check_mhd
1003 #define	sd_mhd_watch_cb			ssd_mhd_watch_cb
1004 #define	sd_mhd_watch_incomplete		ssd_mhd_watch_incomplete
1005 #define	sd_sname			ssd_sname
1006 #define	sd_mhd_resvd_recover		ssd_mhd_resvd_recover
1007 #define	sd_resv_reclaim_thread		ssd_resv_reclaim_thread
1008 #define	sd_take_ownership		ssd_take_ownership
1009 #define	sd_reserve_release		ssd_reserve_release
1010 #define	sd_rmv_resv_reclaim_req		ssd_rmv_resv_reclaim_req
1011 #define	sd_mhd_reset_notify_cb		ssd_mhd_reset_notify_cb
1012 #define	sd_persistent_reservation_in_read_keys	\
1013 					ssd_persistent_reservation_in_read_keys
1014 #define	sd_persistent_reservation_in_read_resv	\
1015 					ssd_persistent_reservation_in_read_resv
1016 #define	sd_mhdioc_takeown		ssd_mhdioc_takeown
1017 #define	sd_mhdioc_failfast		ssd_mhdioc_failfast
1018 #define	sd_mhdioc_release		ssd_mhdioc_release
1019 #define	sd_mhdioc_register_devid	ssd_mhdioc_register_devid
1020 #define	sd_mhdioc_inkeys		ssd_mhdioc_inkeys
1021 #define	sd_mhdioc_inresv		ssd_mhdioc_inresv
1022 #define	sr_change_blkmode		ssr_change_blkmode
1023 #define	sr_change_speed			ssr_change_speed
1024 #define	sr_atapi_change_speed		ssr_atapi_change_speed
1025 #define	sr_pause_resume			ssr_pause_resume
1026 #define	sr_play_msf			ssr_play_msf
1027 #define	sr_play_trkind			ssr_play_trkind
1028 #define	sr_read_all_subcodes		ssr_read_all_subcodes
1029 #define	sr_read_subchannel		ssr_read_subchannel
1030 #define	sr_read_tocentry		ssr_read_tocentry
1031 #define	sr_read_tochdr			ssr_read_tochdr
1032 #define	sr_read_cdda			ssr_read_cdda
1033 #define	sr_read_cdxa			ssr_read_cdxa
1034 #define	sr_read_mode1			ssr_read_mode1
1035 #define	sr_read_mode2			ssr_read_mode2
1036 #define	sr_read_cd_mode2		ssr_read_cd_mode2
1037 #define	sr_sector_mode			ssr_sector_mode
1038 #define	sr_eject			ssr_eject
1039 #define	sr_ejected			ssr_ejected
1040 #define	sr_check_wp			ssr_check_wp
1041 #define	sd_check_media			ssd_check_media
1042 #define	sd_media_watch_cb		ssd_media_watch_cb
1043 #define	sd_delayed_cv_broadcast		ssd_delayed_cv_broadcast
1044 #define	sr_volume_ctrl			ssr_volume_ctrl
1045 #define	sr_read_sony_session_offset	ssr_read_sony_session_offset
1046 #define	sd_log_page_supported		ssd_log_page_supported
1047 #define	sd_check_for_writable_cd	ssd_check_for_writable_cd
1048 #define	sd_wm_cache_constructor		ssd_wm_cache_constructor
1049 #define	sd_wm_cache_destructor		ssd_wm_cache_destructor
1050 #define	sd_range_lock			ssd_range_lock
1051 #define	sd_get_range			ssd_get_range
1052 #define	sd_free_inlist_wmap		ssd_free_inlist_wmap
1053 #define	sd_range_unlock			ssd_range_unlock
1054 #define	sd_read_modify_write_task	ssd_read_modify_write_task
1055 #define	sddump_do_read_of_rmw		ssddump_do_read_of_rmw
1056 
1057 #define	sd_iostart_chain		ssd_iostart_chain
1058 #define	sd_iodone_chain			ssd_iodone_chain
1059 #define	sd_initpkt_map			ssd_initpkt_map
1060 #define	sd_destroypkt_map		ssd_destroypkt_map
1061 #define	sd_chain_type_map		ssd_chain_type_map
1062 #define	sd_chain_index_map		ssd_chain_index_map
1063 
1064 #define	sd_failfast_flushctl		ssd_failfast_flushctl
1065 #define	sd_failfast_flushq		ssd_failfast_flushq
1066 #define	sd_failfast_flushq_callback	ssd_failfast_flushq_callback
1067 
1068 #define	sd_is_lsi			ssd_is_lsi
1069 
1070 #endif	/* #if (defined(__fibre)) */
1071 
1072 
1073 int _init(void);
1074 int _fini(void);
1075 int _info(struct modinfo *modinfop);
1076 
1077 /*PRINTFLIKE3*/
1078 static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1079 /*PRINTFLIKE3*/
1080 static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1081 /*PRINTFLIKE3*/
1082 static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1083 
1084 static int sdprobe(dev_info_t *devi);
1085 static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
1086     void **result);
1087 static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
1088     int mod_flags, char *name, caddr_t valuep, int *lengthp);
1089 
1090 /*
1091  * Smart probe for parallel scsi
1092  */
1093 static void sd_scsi_probe_cache_init(void);
1094 static void sd_scsi_probe_cache_fini(void);
1095 static void sd_scsi_clear_probe_cache(void);
1096 static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
1097 
1098 static int	sd_spin_up_unit(struct sd_lun *un);
1099 #ifdef _LP64
1100 static void	sd_enable_descr_sense(struct sd_lun *un);
1101 #endif /* _LP64 */
1102 static void	sd_set_mmc_caps(struct sd_lun *un);
1103 
1104 static void sd_read_unit_properties(struct sd_lun *un);
1105 static int  sd_process_sdconf_file(struct sd_lun *un);
1106 static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
1107     int *data_list, sd_tunables *values);
1108 static void sd_process_sdconf_table(struct sd_lun *un);
1109 static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
1110 static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
1111 static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
1112 	int list_len, char *dataname_ptr);
1113 static void sd_set_vers1_properties(struct sd_lun *un, int flags,
1114     sd_tunables *prop_list);
1115 static int  sd_validate_geometry(struct sd_lun *un, int path_flag);
1116 
1117 #if defined(_SUNOS_VTOC_16)
1118 static void sd_convert_geometry(uint64_t capacity, struct dk_geom *un_g);
1119 #endif
1120 
1121 static void sd_resync_geom_caches(struct sd_lun *un, int capacity, int lbasize,
1122 	int path_flag);
1123 static int  sd_read_fdisk(struct sd_lun *un, uint_t capacity, int lbasize,
1124 	int path_flag);
1125 static void sd_get_physical_geometry(struct sd_lun *un,
1126 	struct geom_cache *pgeom_p, int capacity, int lbasize, int path_flag);
1127 static void sd_get_virtual_geometry(struct sd_lun *un, int capacity,
1128 	int lbasize);
1129 static int  sd_uselabel(struct sd_lun *un, struct dk_label *l, int path_flag);
1130 static void sd_swap_efi_gpt(efi_gpt_t *);
1131 static void sd_swap_efi_gpe(int nparts, efi_gpe_t *);
1132 static int sd_validate_efi(efi_gpt_t *);
1133 static int sd_use_efi(struct sd_lun *, int);
1134 static void sd_build_default_label(struct sd_lun *un);
1135 
1136 #if defined(_FIRMWARE_NEEDS_FDISK)
1137 static int  sd_has_max_chs_vals(struct ipart *fdp);
1138 #endif
1139 static void sd_inq_fill(char *p, int l, char *s);
1140 
1141 
1142 static void sd_register_devid(struct sd_lun *un, dev_info_t *devi,
1143     int reservation_flag);
1144 static daddr_t  sd_get_devid_block(struct sd_lun *un);
1145 static int  sd_get_devid(struct sd_lun *un);
1146 static int  sd_get_serialnum(struct sd_lun *un, uchar_t *wwn, int *len);
1147 static ddi_devid_t sd_create_devid(struct sd_lun *un);
1148 static int  sd_write_deviceid(struct sd_lun *un);
1149 static int  sd_get_devid_page(struct sd_lun *un, uchar_t *wwn, int *len);
1150 static int  sd_check_vpd_page_support(struct sd_lun *un);
1151 
1152 static void sd_setup_pm(struct sd_lun *un, dev_info_t *devi);
1153 static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
1154 
1155 static int  sd_ddi_suspend(dev_info_t *devi);
1156 static int  sd_ddi_pm_suspend(struct sd_lun *un);
1157 static int  sd_ddi_resume(dev_info_t *devi);
1158 static int  sd_ddi_pm_resume(struct sd_lun *un);
1159 static int  sdpower(dev_info_t *devi, int component, int level);
1160 
1161 static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
1162 static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
1163 static int  sd_unit_attach(dev_info_t *devi);
1164 static int  sd_unit_detach(dev_info_t *devi);
1165 
1166 static void sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi);
1167 static int  sd_create_minor_nodes(struct sd_lun *un, dev_info_t *devi);
1168 static void sd_create_errstats(struct sd_lun *un, int instance);
1169 static void sd_set_errstats(struct sd_lun *un);
1170 static void sd_set_pstats(struct sd_lun *un);
1171 
1172 static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
1173 static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
1174 static int  sd_send_polled_RQS(struct sd_lun *un);
1175 static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
1176 
1177 #if (defined(__fibre))
1178 /*
1179  * Event callbacks (photon)
1180  */
1181 static void sd_init_event_callbacks(struct sd_lun *un);
1182 static void  sd_event_callback(dev_info_t *, ddi_eventcookie_t, void *, void *);
1183 #endif
1184 
1185 /*
1186  * Defines for sd_cache_control
1187  */
1188 
1189 #define	SD_CACHE_ENABLE		1
1190 #define	SD_CACHE_DISABLE	0
1191 #define	SD_CACHE_NOCHANGE	-1
1192 
1193 static int   sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag);
1194 static int   sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled);
1195 static dev_t sd_make_device(dev_info_t *devi);
1196 
1197 static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
1198 	uint64_t capacity);
1199 
1200 /*
1201  * Driver entry point functions.
1202  */
1203 static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
1204 static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
1205 static int  sd_ready_and_valid(struct sd_lun *un);
1206 
1207 static void sdmin(struct buf *bp);
1208 static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
1209 static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
1210 static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1211 static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1212 
1213 static int sdstrategy(struct buf *bp);
1214 static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
1215 
1216 /*
1217  * Function prototypes for layering functions in the iostart chain.
1218  */
1219 static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
1220 	struct buf *bp);
1221 static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
1222 	struct buf *bp);
1223 static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
1224 static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
1225 	struct buf *bp);
1226 static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
1227 static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
1228 
1229 /*
1230  * Function prototypes for layering functions in the iodone chain.
1231  */
1232 static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
1233 static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
1234 static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
1235 	struct buf *bp);
1236 static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
1237 	struct buf *bp);
1238 static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
1239 static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
1240 	struct buf *bp);
1241 static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
1242 
1243 /*
1244  * Prototypes for functions to support buf(9S) based IO.
1245  */
1246 static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
1247 static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
1248 static void sd_destroypkt_for_buf(struct buf *);
1249 static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
1250 	struct buf *bp, int flags,
1251 	int (*callback)(caddr_t), caddr_t callback_arg,
1252 	diskaddr_t lba, uint32_t blockcount);
1253 #if defined(__i386) || defined(__amd64)
1254 static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
1255 	struct buf *bp, diskaddr_t lba, uint32_t blockcount);
1256 #endif /* defined(__i386) || defined(__amd64) */
1257 
1258 /*
1259  * Prototypes for functions to support USCSI IO.
1260  */
1261 static int sd_uscsi_strategy(struct buf *bp);
1262 static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
1263 static void sd_destroypkt_for_uscsi(struct buf *);
1264 
1265 static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
1266 	uchar_t chain_type, void *pktinfop);
1267 
1268 static int  sd_pm_entry(struct sd_lun *un);
1269 static void sd_pm_exit(struct sd_lun *un);
1270 
1271 static void sd_pm_idletimeout_handler(void *arg);
1272 
1273 /*
1274  * sd_core internal functions (used at the sd_core_io layer).
1275  */
1276 static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
1277 static void sdintr(struct scsi_pkt *pktp);
1278 static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
1279 
1280 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd,
1281 	enum uio_seg cdbspace, enum uio_seg dataspace, enum uio_seg rqbufspace,
1282 	int path_flag);
1283 
1284 static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
1285 	daddr_t blkno, int (*func)(struct buf *));
1286 static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
1287 	uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
1288 static void sd_bioclone_free(struct buf *bp);
1289 static void sd_shadow_buf_free(struct buf *bp);
1290 
1291 static void sd_print_transport_rejected_message(struct sd_lun *un,
1292 	struct sd_xbuf *xp, int code);
1293 static void sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp,
1294     void *arg, int code);
1295 static void sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp,
1296     void *arg, int code);
1297 static void sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp,
1298     void *arg, int code);
1299 
1300 static void sd_retry_command(struct sd_lun *un, struct buf *bp,
1301 	int retry_check_flag,
1302 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp,
1303 		int c),
1304 	void *user_arg, int failure_code,  clock_t retry_delay,
1305 	void (*statp)(kstat_io_t *));
1306 
1307 static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
1308 	clock_t retry_delay, void (*statp)(kstat_io_t *));
1309 
1310 static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
1311 	struct scsi_pkt *pktp);
1312 static void sd_start_retry_command(void *arg);
1313 static void sd_start_direct_priority_command(void *arg);
1314 static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
1315 	int errcode);
1316 static void sd_return_failed_command_no_restart(struct sd_lun *un,
1317 	struct buf *bp, int errcode);
1318 static void sd_return_command(struct sd_lun *un, struct buf *bp);
1319 static void sd_sync_with_callback(struct sd_lun *un);
1320 static int sdrunout(caddr_t arg);
1321 
1322 static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
1323 static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
1324 
1325 static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
1326 static void sd_restore_throttle(void *arg);
1327 
1328 static void sd_init_cdb_limits(struct sd_lun *un);
1329 
1330 static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
1331 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1332 
1333 /*
1334  * Error handling functions
1335  */
1336 static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
1337 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1338 static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
1339 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1340 static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
1341 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1342 static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
1343 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1344 
1345 static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
1346 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1347 static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1348 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1349 static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1350 	struct sd_xbuf *xp);
1351 static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1352 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1353 
1354 static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1355 	void *arg, int code);
1356 static diskaddr_t sd_extract_sense_info_descr(
1357 	struct scsi_descr_sense_hdr *sdsp);
1358 
1359 static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1360 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1361 static void sd_sense_key_recoverable_error(struct sd_lun *un,
1362 	uint8_t asc,
1363 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1364 static void sd_sense_key_not_ready(struct sd_lun *un,
1365 	uint8_t asc, uint8_t ascq,
1366 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1367 static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1368 	int sense_key, uint8_t asc,
1369 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1370 static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1371 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1372 static void sd_sense_key_unit_attention(struct sd_lun *un,
1373 	uint8_t asc,
1374 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1375 static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1376 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1377 static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1378 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1379 static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1380 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1381 static void sd_sense_key_default(struct sd_lun *un,
1382 	int sense_key,
1383 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1384 
1385 static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1386 	void *arg, int flag);
1387 
1388 static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1389 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1390 static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1391 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1392 static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1393 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1394 static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1395 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1396 static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1397 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1398 static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1399 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1400 static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1401 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1402 static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1403 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1404 
1405 static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1406 
1407 static void sd_start_stop_unit_callback(void *arg);
1408 static void sd_start_stop_unit_task(void *arg);
1409 
1410 static void sd_taskq_create(void);
1411 static void sd_taskq_delete(void);
1412 static void sd_media_change_task(void *arg);
1413 
1414 static int sd_handle_mchange(struct sd_lun *un);
1415 static int sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag);
1416 static int sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp,
1417 	uint32_t *lbap, int path_flag);
1418 static int sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
1419 	uint32_t *lbap, int path_flag);
1420 static int sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag,
1421 	int path_flag);
1422 static int sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr,
1423 	size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1424 static int sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag);
1425 static int sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un,
1426 	uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1427 static int sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un,
1428 	uchar_t usr_cmd, uchar_t *usr_bufp);
1429 static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1430 	struct dk_callback *dkc);
1431 static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1432 static int sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un,
1433 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1434 	uchar_t *bufaddr, uint_t buflen);
1435 static int sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
1436 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1437 	uchar_t *bufaddr, uint_t buflen, char feature);
1438 static int sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize,
1439 	uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1440 static int sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize,
1441 	uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1442 static int sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
1443 	size_t buflen, daddr_t start_block, int path_flag);
1444 #define	sd_send_scsi_READ(un, bufaddr, buflen, start_block, path_flag)	\
1445 	sd_send_scsi_RDWR(un, SCMD_READ, bufaddr, buflen, start_block, \
1446 	path_flag)
1447 #define	sd_send_scsi_WRITE(un, bufaddr, buflen, start_block, path_flag)	\
1448 	sd_send_scsi_RDWR(un, SCMD_WRITE, bufaddr, buflen, start_block,\
1449 	path_flag)
1450 
1451 static int sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr,
1452 	uint16_t buflen, uchar_t page_code, uchar_t page_control,
1453 	uint16_t param_ptr, int path_flag);
1454 
1455 static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1456 static void sd_free_rqs(struct sd_lun *un);
1457 
1458 static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1459 	uchar_t *data, int len, int fmt);
1460 static void sd_panic_for_res_conflict(struct sd_lun *un);
1461 
1462 /*
1463  * Disk Ioctl Function Prototypes
1464  */
1465 static int sd_uscsi_ioctl(dev_t dev, caddr_t arg, int flag);
1466 static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1467 static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1468 static int sd_dkio_get_geometry(dev_t dev, caddr_t arg, int flag,
1469 	int geom_validated);
1470 static int sd_dkio_set_geometry(dev_t dev, caddr_t arg, int flag);
1471 static int sd_dkio_get_partition(dev_t dev, caddr_t arg, int flag,
1472 	int geom_validated);
1473 static int sd_dkio_set_partition(dev_t dev, caddr_t arg, int flag);
1474 static int sd_dkio_get_vtoc(dev_t dev, caddr_t arg, int flag,
1475 	int geom_validated);
1476 static int sd_dkio_get_efi(dev_t dev, caddr_t arg, int flag);
1477 static int sd_dkio_partition(dev_t dev, caddr_t arg, int flag);
1478 static void sd_build_user_vtoc(struct sd_lun *un, struct vtoc *user_vtoc);
1479 static int sd_dkio_set_vtoc(dev_t dev, caddr_t arg, int flag);
1480 static int sd_dkio_set_efi(dev_t dev, caddr_t arg, int flag);
1481 static int sd_build_label_vtoc(struct sd_lun *un, struct vtoc *user_vtoc);
1482 static int sd_write_label(dev_t dev);
1483 static int sd_set_vtoc(struct sd_lun *un, struct dk_label *dkl);
1484 static void sd_clear_vtoc(struct sd_lun *un);
1485 static void sd_clear_efi(struct sd_lun *un);
1486 static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1487 static int sd_dkio_get_mboot(dev_t dev, caddr_t arg, int flag);
1488 static int sd_dkio_set_mboot(dev_t dev, caddr_t arg, int flag);
1489 static void sd_setup_default_geometry(struct sd_lun *un);
1490 #if defined(__i386) || defined(__amd64)
1491 static int sd_update_fdisk_and_vtoc(struct sd_lun *un);
1492 #endif
1493 
1494 /*
1495  * Multi-host Ioctl Prototypes
1496  */
1497 static int sd_check_mhd(dev_t dev, int interval);
1498 static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1499 static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1500 static char *sd_sname(uchar_t status);
1501 static void sd_mhd_resvd_recover(void *arg);
1502 static void sd_resv_reclaim_thread();
1503 static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1504 static int sd_reserve_release(dev_t dev, int cmd);
1505 static void sd_rmv_resv_reclaim_req(dev_t dev);
1506 static void sd_mhd_reset_notify_cb(caddr_t arg);
1507 static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1508 	mhioc_inkeys_t *usrp, int flag);
1509 static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1510 	mhioc_inresvs_t *usrp, int flag);
1511 static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1512 static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1513 static int sd_mhdioc_release(dev_t dev);
1514 static int sd_mhdioc_register_devid(dev_t dev);
1515 static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1516 static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1517 
1518 /*
1519  * SCSI removable prototypes
1520  */
1521 static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1522 static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1523 static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1524 static int sr_pause_resume(dev_t dev, int mode);
1525 static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1526 static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1527 static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1528 static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1529 static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1530 static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1531 static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1532 static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1533 static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1534 static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1535 static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1536 static int sr_sector_mode(dev_t dev, uint32_t blksize);
1537 static int sr_eject(dev_t dev);
1538 static void sr_ejected(register struct sd_lun *un);
1539 static int sr_check_wp(dev_t dev);
1540 static int sd_check_media(dev_t dev, enum dkio_state state);
1541 static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1542 static void sd_delayed_cv_broadcast(void *arg);
1543 static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1544 static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1545 
1546 static int sd_log_page_supported(struct sd_lun *un, int log_page);
1547 
1548 /*
1549  * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1550  */
1551 static void sd_check_for_writable_cd(struct sd_lun *un);
1552 static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1553 static void sd_wm_cache_destructor(void *wm, void *un);
1554 static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1555 	daddr_t endb, ushort_t typ);
1556 static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1557 	daddr_t endb);
1558 static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1559 static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1560 static void sd_read_modify_write_task(void * arg);
1561 static int
1562 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1563 	struct buf **bpp);
1564 
1565 
1566 /*
1567  * Function prototypes for failfast support.
1568  */
1569 static void sd_failfast_flushq(struct sd_lun *un);
1570 static int sd_failfast_flushq_callback(struct buf *bp);
1571 
1572 /*
1573  * Function prototypes to check for lsi devices
1574  */
1575 static void sd_is_lsi(struct sd_lun *un);
1576 
1577 /*
1578  * Function prototypes for x86 support
1579  */
1580 #if defined(__i386) || defined(__amd64)
1581 static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1582 		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1583 #endif
1584 
1585 /*
1586  * Constants for failfast support:
1587  *
1588  * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1589  * failfast processing being performed.
1590  *
1591  * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1592  * failfast processing on all bufs with B_FAILFAST set.
1593  */
1594 
1595 #define	SD_FAILFAST_INACTIVE		0
1596 #define	SD_FAILFAST_ACTIVE		1
1597 
1598 /*
1599  * Bitmask to control behavior of buf(9S) flushes when a transition to
1600  * the failfast state occurs. Optional bits include:
1601  *
1602  * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1603  * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1604  * be flushed.
1605  *
1606  * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1607  * driver, in addition to the regular wait queue. This includes the xbuf
1608  * queues. When clear, only the driver's wait queue will be flushed.
1609  */
1610 #define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1611 #define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1612 
1613 /*
1614  * The default behavior is to only flush bufs that have B_FAILFAST set, but
1615  * to flush all queues within the driver.
1616  */
1617 static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1618 
1619 
1620 /*
1621  * SD Testing Fault Injection
1622  */
1623 #ifdef SD_FAULT_INJECTION
1624 static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1625 static void sd_faultinjection(struct scsi_pkt *pktp);
1626 static void sd_injection_log(char *buf, struct sd_lun *un);
1627 #endif
1628 
1629 /*
1630  * Device driver ops vector
1631  */
1632 static struct cb_ops sd_cb_ops = {
1633 	sdopen,			/* open */
1634 	sdclose,		/* close */
1635 	sdstrategy,		/* strategy */
1636 	nodev,			/* print */
1637 	sddump,			/* dump */
1638 	sdread,			/* read */
1639 	sdwrite,		/* write */
1640 	sdioctl,		/* ioctl */
1641 	nodev,			/* devmap */
1642 	nodev,			/* mmap */
1643 	nodev,			/* segmap */
1644 	nochpoll,		/* poll */
1645 	sd_prop_op,		/* cb_prop_op */
1646 	0,			/* streamtab  */
1647 	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1648 	CB_REV,			/* cb_rev */
1649 	sdaread, 		/* async I/O read entry point */
1650 	sdawrite		/* async I/O write entry point */
1651 };
1652 
1653 static struct dev_ops sd_ops = {
1654 	DEVO_REV,		/* devo_rev, */
1655 	0,			/* refcnt  */
1656 	sdinfo,			/* info */
1657 	nulldev,		/* identify */
1658 	sdprobe,		/* probe */
1659 	sdattach,		/* attach */
1660 	sddetach,		/* detach */
1661 	nodev,			/* reset */
1662 	&sd_cb_ops,		/* driver operations */
1663 	NULL,			/* bus operations */
1664 	sdpower			/* power */
1665 };
1666 
1667 
1668 /*
1669  * This is the loadable module wrapper.
1670  */
1671 #include <sys/modctl.h>
1672 
1673 static struct modldrv modldrv = {
1674 	&mod_driverops,		/* Type of module. This one is a driver */
1675 	SD_MODULE_NAME,		/* Module name. */
1676 	&sd_ops			/* driver ops */
1677 };
1678 
1679 
1680 static struct modlinkage modlinkage = {
1681 	MODREV_1,
1682 	&modldrv,
1683 	NULL
1684 };
1685 
1686 
1687 static struct scsi_asq_key_strings sd_additional_codes[] = {
1688 	0x81, 0, "Logical Unit is Reserved",
1689 	0x85, 0, "Audio Address Not Valid",
1690 	0xb6, 0, "Media Load Mechanism Failed",
1691 	0xB9, 0, "Audio Play Operation Aborted",
1692 	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1693 	0x53, 2, "Medium removal prevented",
1694 	0x6f, 0, "Authentication failed during key exchange",
1695 	0x6f, 1, "Key not present",
1696 	0x6f, 2, "Key not established",
1697 	0x6f, 3, "Read without proper authentication",
1698 	0x6f, 4, "Mismatched region to this logical unit",
1699 	0x6f, 5, "Region reset count error",
1700 	0xffff, 0x0, NULL
1701 };
1702 
1703 
1704 /*
1705  * Struct for passing printing information for sense data messages
1706  */
1707 struct sd_sense_info {
1708 	int	ssi_severity;
1709 	int	ssi_pfa_flag;
1710 };
1711 
1712 /*
1713  * Table of function pointers for iostart-side routines. Seperate "chains"
1714  * of layered function calls are formed by placing the function pointers
1715  * sequentially in the desired order. Functions are called according to an
1716  * incrementing table index ordering. The last function in each chain must
1717  * be sd_core_iostart(). The corresponding iodone-side routines are expected
1718  * in the sd_iodone_chain[] array.
1719  *
1720  * Note: It may seem more natural to organize both the iostart and iodone
1721  * functions together, into an array of structures (or some similar
1722  * organization) with a common index, rather than two seperate arrays which
1723  * must be maintained in synchronization. The purpose of this division is
1724  * to achiece improved performance: individual arrays allows for more
1725  * effective cache line utilization on certain platforms.
1726  */
1727 
1728 typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1729 
1730 
1731 static sd_chain_t sd_iostart_chain[] = {
1732 
1733 	/* Chain for buf IO for disk drive targets (PM enabled) */
1734 	sd_mapblockaddr_iostart,	/* Index: 0 */
1735 	sd_pm_iostart,			/* Index: 1 */
1736 	sd_core_iostart,		/* Index: 2 */
1737 
1738 	/* Chain for buf IO for disk drive targets (PM disabled) */
1739 	sd_mapblockaddr_iostart,	/* Index: 3 */
1740 	sd_core_iostart,		/* Index: 4 */
1741 
1742 	/* Chain for buf IO for removable-media targets (PM enabled) */
1743 	sd_mapblockaddr_iostart,	/* Index: 5 */
1744 	sd_mapblocksize_iostart,	/* Index: 6 */
1745 	sd_pm_iostart,			/* Index: 7 */
1746 	sd_core_iostart,		/* Index: 8 */
1747 
1748 	/* Chain for buf IO for removable-media targets (PM disabled) */
1749 	sd_mapblockaddr_iostart,	/* Index: 9 */
1750 	sd_mapblocksize_iostart,	/* Index: 10 */
1751 	sd_core_iostart,		/* Index: 11 */
1752 
1753 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1754 	sd_mapblockaddr_iostart,	/* Index: 12 */
1755 	sd_checksum_iostart,		/* Index: 13 */
1756 	sd_pm_iostart,			/* Index: 14 */
1757 	sd_core_iostart,		/* Index: 15 */
1758 
1759 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1760 	sd_mapblockaddr_iostart,	/* Index: 16 */
1761 	sd_checksum_iostart,		/* Index: 17 */
1762 	sd_core_iostart,		/* Index: 18 */
1763 
1764 	/* Chain for USCSI commands (all targets) */
1765 	sd_pm_iostart,			/* Index: 19 */
1766 	sd_core_iostart,		/* Index: 20 */
1767 
1768 	/* Chain for checksumming USCSI commands (all targets) */
1769 	sd_checksum_uscsi_iostart,	/* Index: 21 */
1770 	sd_pm_iostart,			/* Index: 22 */
1771 	sd_core_iostart,		/* Index: 23 */
1772 
1773 	/* Chain for "direct" USCSI commands (all targets) */
1774 	sd_core_iostart,		/* Index: 24 */
1775 
1776 	/* Chain for "direct priority" USCSI commands (all targets) */
1777 	sd_core_iostart,		/* Index: 25 */
1778 };
1779 
1780 /*
1781  * Macros to locate the first function of each iostart chain in the
1782  * sd_iostart_chain[] array. These are located by the index in the array.
1783  */
1784 #define	SD_CHAIN_DISK_IOSTART			0
1785 #define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1786 #define	SD_CHAIN_RMMEDIA_IOSTART		5
1787 #define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1788 #define	SD_CHAIN_CHKSUM_IOSTART			12
1789 #define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1790 #define	SD_CHAIN_USCSI_CMD_IOSTART		19
1791 #define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1792 #define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1793 #define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1794 
1795 
1796 /*
1797  * Table of function pointers for the iodone-side routines for the driver-
1798  * internal layering mechanism.  The calling sequence for iodone routines
1799  * uses a decrementing table index, so the last routine called in a chain
1800  * must be at the lowest array index location for that chain.  The last
1801  * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1802  * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1803  * of the functions in an iodone side chain must correspond to the ordering
1804  * of the iostart routines for that chain.  Note that there is no iodone
1805  * side routine that corresponds to sd_core_iostart(), so there is no
1806  * entry in the table for this.
1807  */
1808 
1809 static sd_chain_t sd_iodone_chain[] = {
1810 
1811 	/* Chain for buf IO for disk drive targets (PM enabled) */
1812 	sd_buf_iodone,			/* Index: 0 */
1813 	sd_mapblockaddr_iodone,		/* Index: 1 */
1814 	sd_pm_iodone,			/* Index: 2 */
1815 
1816 	/* Chain for buf IO for disk drive targets (PM disabled) */
1817 	sd_buf_iodone,			/* Index: 3 */
1818 	sd_mapblockaddr_iodone,		/* Index: 4 */
1819 
1820 	/* Chain for buf IO for removable-media targets (PM enabled) */
1821 	sd_buf_iodone,			/* Index: 5 */
1822 	sd_mapblockaddr_iodone,		/* Index: 6 */
1823 	sd_mapblocksize_iodone,		/* Index: 7 */
1824 	sd_pm_iodone,			/* Index: 8 */
1825 
1826 	/* Chain for buf IO for removable-media targets (PM disabled) */
1827 	sd_buf_iodone,			/* Index: 9 */
1828 	sd_mapblockaddr_iodone,		/* Index: 10 */
1829 	sd_mapblocksize_iodone,		/* Index: 11 */
1830 
1831 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1832 	sd_buf_iodone,			/* Index: 12 */
1833 	sd_mapblockaddr_iodone,		/* Index: 13 */
1834 	sd_checksum_iodone,		/* Index: 14 */
1835 	sd_pm_iodone,			/* Index: 15 */
1836 
1837 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1838 	sd_buf_iodone,			/* Index: 16 */
1839 	sd_mapblockaddr_iodone,		/* Index: 17 */
1840 	sd_checksum_iodone,		/* Index: 18 */
1841 
1842 	/* Chain for USCSI commands (non-checksum targets) */
1843 	sd_uscsi_iodone,		/* Index: 19 */
1844 	sd_pm_iodone,			/* Index: 20 */
1845 
1846 	/* Chain for USCSI commands (checksum targets) */
1847 	sd_uscsi_iodone,		/* Index: 21 */
1848 	sd_checksum_uscsi_iodone,	/* Index: 22 */
1849 	sd_pm_iodone,			/* Index: 22 */
1850 
1851 	/* Chain for "direct" USCSI commands (all targets) */
1852 	sd_uscsi_iodone,		/* Index: 24 */
1853 
1854 	/* Chain for "direct priority" USCSI commands (all targets) */
1855 	sd_uscsi_iodone,		/* Index: 25 */
1856 };
1857 
1858 
1859 /*
1860  * Macros to locate the "first" function in the sd_iodone_chain[] array for
1861  * each iodone-side chain. These are located by the array index, but as the
1862  * iodone side functions are called in a decrementing-index order, the
1863  * highest index number in each chain must be specified (as these correspond
1864  * to the first function in the iodone chain that will be called by the core
1865  * at IO completion time).
1866  */
1867 
1868 #define	SD_CHAIN_DISK_IODONE			2
1869 #define	SD_CHAIN_DISK_IODONE_NO_PM		4
1870 #define	SD_CHAIN_RMMEDIA_IODONE			8
1871 #define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
1872 #define	SD_CHAIN_CHKSUM_IODONE			15
1873 #define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
1874 #define	SD_CHAIN_USCSI_CMD_IODONE		20
1875 #define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
1876 #define	SD_CHAIN_DIRECT_CMD_IODONE		24
1877 #define	SD_CHAIN_PRIORITY_CMD_IODONE		25
1878 
1879 
1880 
1881 
1882 /*
1883  * Array to map a layering chain index to the appropriate initpkt routine.
1884  * The redundant entries are present so that the index used for accessing
1885  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1886  * with this table as well.
1887  */
1888 typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
1889 
1890 static sd_initpkt_t	sd_initpkt_map[] = {
1891 
1892 	/* Chain for buf IO for disk drive targets (PM enabled) */
1893 	sd_initpkt_for_buf,		/* Index: 0 */
1894 	sd_initpkt_for_buf,		/* Index: 1 */
1895 	sd_initpkt_for_buf,		/* Index: 2 */
1896 
1897 	/* Chain for buf IO for disk drive targets (PM disabled) */
1898 	sd_initpkt_for_buf,		/* Index: 3 */
1899 	sd_initpkt_for_buf,		/* Index: 4 */
1900 
1901 	/* Chain for buf IO for removable-media targets (PM enabled) */
1902 	sd_initpkt_for_buf,		/* Index: 5 */
1903 	sd_initpkt_for_buf,		/* Index: 6 */
1904 	sd_initpkt_for_buf,		/* Index: 7 */
1905 	sd_initpkt_for_buf,		/* Index: 8 */
1906 
1907 	/* Chain for buf IO for removable-media targets (PM disabled) */
1908 	sd_initpkt_for_buf,		/* Index: 9 */
1909 	sd_initpkt_for_buf,		/* Index: 10 */
1910 	sd_initpkt_for_buf,		/* Index: 11 */
1911 
1912 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1913 	sd_initpkt_for_buf,		/* Index: 12 */
1914 	sd_initpkt_for_buf,		/* Index: 13 */
1915 	sd_initpkt_for_buf,		/* Index: 14 */
1916 	sd_initpkt_for_buf,		/* Index: 15 */
1917 
1918 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1919 	sd_initpkt_for_buf,		/* Index: 16 */
1920 	sd_initpkt_for_buf,		/* Index: 17 */
1921 	sd_initpkt_for_buf,		/* Index: 18 */
1922 
1923 	/* Chain for USCSI commands (non-checksum targets) */
1924 	sd_initpkt_for_uscsi,		/* Index: 19 */
1925 	sd_initpkt_for_uscsi,		/* Index: 20 */
1926 
1927 	/* Chain for USCSI commands (checksum targets) */
1928 	sd_initpkt_for_uscsi,		/* Index: 21 */
1929 	sd_initpkt_for_uscsi,		/* Index: 22 */
1930 	sd_initpkt_for_uscsi,		/* Index: 22 */
1931 
1932 	/* Chain for "direct" USCSI commands (all targets) */
1933 	sd_initpkt_for_uscsi,		/* Index: 24 */
1934 
1935 	/* Chain for "direct priority" USCSI commands (all targets) */
1936 	sd_initpkt_for_uscsi,		/* Index: 25 */
1937 
1938 };
1939 
1940 
1941 /*
1942  * Array to map a layering chain index to the appropriate destroypktpkt routine.
1943  * The redundant entries are present so that the index used for accessing
1944  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1945  * with this table as well.
1946  */
1947 typedef void (*sd_destroypkt_t)(struct buf *);
1948 
1949 static sd_destroypkt_t	sd_destroypkt_map[] = {
1950 
1951 	/* Chain for buf IO for disk drive targets (PM enabled) */
1952 	sd_destroypkt_for_buf,		/* Index: 0 */
1953 	sd_destroypkt_for_buf,		/* Index: 1 */
1954 	sd_destroypkt_for_buf,		/* Index: 2 */
1955 
1956 	/* Chain for buf IO for disk drive targets (PM disabled) */
1957 	sd_destroypkt_for_buf,		/* Index: 3 */
1958 	sd_destroypkt_for_buf,		/* Index: 4 */
1959 
1960 	/* Chain for buf IO for removable-media targets (PM enabled) */
1961 	sd_destroypkt_for_buf,		/* Index: 5 */
1962 	sd_destroypkt_for_buf,		/* Index: 6 */
1963 	sd_destroypkt_for_buf,		/* Index: 7 */
1964 	sd_destroypkt_for_buf,		/* Index: 8 */
1965 
1966 	/* Chain for buf IO for removable-media targets (PM disabled) */
1967 	sd_destroypkt_for_buf,		/* Index: 9 */
1968 	sd_destroypkt_for_buf,		/* Index: 10 */
1969 	sd_destroypkt_for_buf,		/* Index: 11 */
1970 
1971 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1972 	sd_destroypkt_for_buf,		/* Index: 12 */
1973 	sd_destroypkt_for_buf,		/* Index: 13 */
1974 	sd_destroypkt_for_buf,		/* Index: 14 */
1975 	sd_destroypkt_for_buf,		/* Index: 15 */
1976 
1977 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1978 	sd_destroypkt_for_buf,		/* Index: 16 */
1979 	sd_destroypkt_for_buf,		/* Index: 17 */
1980 	sd_destroypkt_for_buf,		/* Index: 18 */
1981 
1982 	/* Chain for USCSI commands (non-checksum targets) */
1983 	sd_destroypkt_for_uscsi,	/* Index: 19 */
1984 	sd_destroypkt_for_uscsi,	/* Index: 20 */
1985 
1986 	/* Chain for USCSI commands (checksum targets) */
1987 	sd_destroypkt_for_uscsi,	/* Index: 21 */
1988 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1989 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1990 
1991 	/* Chain for "direct" USCSI commands (all targets) */
1992 	sd_destroypkt_for_uscsi,	/* Index: 24 */
1993 
1994 	/* Chain for "direct priority" USCSI commands (all targets) */
1995 	sd_destroypkt_for_uscsi,	/* Index: 25 */
1996 
1997 };
1998 
1999 
2000 
2001 /*
2002  * Array to map a layering chain index to the appropriate chain "type".
2003  * The chain type indicates a specific property/usage of the chain.
2004  * The redundant entries are present so that the index used for accessing
2005  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2006  * with this table as well.
2007  */
2008 
2009 #define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
2010 #define	SD_CHAIN_BUFIO			1	/* regular buf IO */
2011 #define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
2012 #define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
2013 #define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
2014 						/* (for error recovery) */
2015 
2016 static int sd_chain_type_map[] = {
2017 
2018 	/* Chain for buf IO for disk drive targets (PM enabled) */
2019 	SD_CHAIN_BUFIO,			/* Index: 0 */
2020 	SD_CHAIN_BUFIO,			/* Index: 1 */
2021 	SD_CHAIN_BUFIO,			/* Index: 2 */
2022 
2023 	/* Chain for buf IO for disk drive targets (PM disabled) */
2024 	SD_CHAIN_BUFIO,			/* Index: 3 */
2025 	SD_CHAIN_BUFIO,			/* Index: 4 */
2026 
2027 	/* Chain for buf IO for removable-media targets (PM enabled) */
2028 	SD_CHAIN_BUFIO,			/* Index: 5 */
2029 	SD_CHAIN_BUFIO,			/* Index: 6 */
2030 	SD_CHAIN_BUFIO,			/* Index: 7 */
2031 	SD_CHAIN_BUFIO,			/* Index: 8 */
2032 
2033 	/* Chain for buf IO for removable-media targets (PM disabled) */
2034 	SD_CHAIN_BUFIO,			/* Index: 9 */
2035 	SD_CHAIN_BUFIO,			/* Index: 10 */
2036 	SD_CHAIN_BUFIO,			/* Index: 11 */
2037 
2038 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2039 	SD_CHAIN_BUFIO,			/* Index: 12 */
2040 	SD_CHAIN_BUFIO,			/* Index: 13 */
2041 	SD_CHAIN_BUFIO,			/* Index: 14 */
2042 	SD_CHAIN_BUFIO,			/* Index: 15 */
2043 
2044 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2045 	SD_CHAIN_BUFIO,			/* Index: 16 */
2046 	SD_CHAIN_BUFIO,			/* Index: 17 */
2047 	SD_CHAIN_BUFIO,			/* Index: 18 */
2048 
2049 	/* Chain for USCSI commands (non-checksum targets) */
2050 	SD_CHAIN_USCSI,			/* Index: 19 */
2051 	SD_CHAIN_USCSI,			/* Index: 20 */
2052 
2053 	/* Chain for USCSI commands (checksum targets) */
2054 	SD_CHAIN_USCSI,			/* Index: 21 */
2055 	SD_CHAIN_USCSI,			/* Index: 22 */
2056 	SD_CHAIN_USCSI,			/* Index: 22 */
2057 
2058 	/* Chain for "direct" USCSI commands (all targets) */
2059 	SD_CHAIN_DIRECT,		/* Index: 24 */
2060 
2061 	/* Chain for "direct priority" USCSI commands (all targets) */
2062 	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
2063 };
2064 
2065 
2066 /* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
2067 #define	SD_IS_BUFIO(xp)			\
2068 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
2069 
2070 /* Macro to return TRUE if the IO has come from the "direct priority" chain. */
2071 #define	SD_IS_DIRECT_PRIORITY(xp)	\
2072 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
2073 
2074 
2075 
2076 /*
2077  * Struct, array, and macros to map a specific chain to the appropriate
2078  * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
2079  *
2080  * The sd_chain_index_map[] array is used at attach time to set the various
2081  * un_xxx_chain type members of the sd_lun softstate to the specific layering
2082  * chain to be used with the instance. This allows different instances to use
2083  * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
2084  * and xb_chain_iodone index values in the sd_xbuf are initialized to these
2085  * values at sd_xbuf init time, this allows (1) layering chains may be changed
2086  * dynamically & without the use of locking; and (2) a layer may update the
2087  * xb_chain_io[start|done] member in a given xbuf with its current index value,
2088  * to allow for deferred processing of an IO within the same chain from a
2089  * different execution context.
2090  */
2091 
2092 struct sd_chain_index {
2093 	int	sci_iostart_index;
2094 	int	sci_iodone_index;
2095 };
2096 
2097 static struct sd_chain_index	sd_chain_index_map[] = {
2098 	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
2099 	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
2100 	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
2101 	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
2102 	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
2103 	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
2104 	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
2105 	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
2106 	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
2107 	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
2108 };
2109 
2110 
2111 /*
2112  * The following are indexes into the sd_chain_index_map[] array.
2113  */
2114 
2115 /* un->un_buf_chain_type must be set to one of these */
2116 #define	SD_CHAIN_INFO_DISK		0
2117 #define	SD_CHAIN_INFO_DISK_NO_PM	1
2118 #define	SD_CHAIN_INFO_RMMEDIA		2
2119 #define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
2120 #define	SD_CHAIN_INFO_CHKSUM		4
2121 #define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
2122 
2123 /* un->un_uscsi_chain_type must be set to one of these */
2124 #define	SD_CHAIN_INFO_USCSI_CMD		6
2125 /* USCSI with PM disabled is the same as DIRECT */
2126 #define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
2127 #define	SD_CHAIN_INFO_USCSI_CHKSUM	7
2128 
2129 /* un->un_direct_chain_type must be set to one of these */
2130 #define	SD_CHAIN_INFO_DIRECT_CMD	8
2131 
2132 /* un->un_priority_chain_type must be set to one of these */
2133 #define	SD_CHAIN_INFO_PRIORITY_CMD	9
2134 
2135 /* size for devid inquiries */
2136 #define	MAX_INQUIRY_SIZE		0xF0
2137 
2138 /*
2139  * Macros used by functions to pass a given buf(9S) struct along to the
2140  * next function in the layering chain for further processing.
2141  *
2142  * In the following macros, passing more than three arguments to the called
2143  * routines causes the optimizer for the SPARC compiler to stop doing tail
2144  * call elimination which results in significant performance degradation.
2145  */
2146 #define	SD_BEGIN_IOSTART(index, un, bp)	\
2147 	((*(sd_iostart_chain[index]))(index, un, bp))
2148 
2149 #define	SD_BEGIN_IODONE(index, un, bp)	\
2150 	((*(sd_iodone_chain[index]))(index, un, bp))
2151 
2152 #define	SD_NEXT_IOSTART(index, un, bp)				\
2153 	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
2154 
2155 #define	SD_NEXT_IODONE(index, un, bp)				\
2156 	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
2157 
2158 /*
2159  *    Function: _init
2160  *
2161  * Description: This is the driver _init(9E) entry point.
2162  *
2163  * Return Code: Returns the value from mod_install(9F) or
2164  *		ddi_soft_state_init(9F) as appropriate.
2165  *
2166  *     Context: Called when driver module loaded.
2167  */
2168 
2169 int
2170 _init(void)
2171 {
2172 	int	err;
2173 
2174 	/* establish driver name from module name */
2175 	sd_label = mod_modname(&modlinkage);
2176 
2177 	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
2178 		SD_MAXUNIT);
2179 
2180 	if (err != 0) {
2181 		return (err);
2182 	}
2183 
2184 	mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
2185 	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
2186 	mutex_init(&sd_label_mutex,  NULL, MUTEX_DRIVER, NULL);
2187 
2188 	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
2189 	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
2190 	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
2191 
2192 	/*
2193 	 * it's ok to init here even for fibre device
2194 	 */
2195 	sd_scsi_probe_cache_init();
2196 
2197 	/*
2198 	 * Creating taskq before mod_install ensures that all callers (threads)
2199 	 * that enter the module after a successfull mod_install encounter
2200 	 * a valid taskq.
2201 	 */
2202 	sd_taskq_create();
2203 
2204 	err = mod_install(&modlinkage);
2205 	if (err != 0) {
2206 		/* delete taskq if install fails */
2207 		sd_taskq_delete();
2208 
2209 		mutex_destroy(&sd_detach_mutex);
2210 		mutex_destroy(&sd_log_mutex);
2211 		mutex_destroy(&sd_label_mutex);
2212 
2213 		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2214 		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2215 		cv_destroy(&sd_tr.srq_inprocess_cv);
2216 
2217 		sd_scsi_probe_cache_fini();
2218 
2219 		ddi_soft_state_fini(&sd_state);
2220 		return (err);
2221 	}
2222 
2223 	return (err);
2224 }
2225 
2226 
2227 /*
2228  *    Function: _fini
2229  *
2230  * Description: This is the driver _fini(9E) entry point.
2231  *
2232  * Return Code: Returns the value from mod_remove(9F)
2233  *
2234  *     Context: Called when driver module is unloaded.
2235  */
2236 
2237 int
2238 _fini(void)
2239 {
2240 	int err;
2241 
2242 	if ((err = mod_remove(&modlinkage)) != 0) {
2243 		return (err);
2244 	}
2245 
2246 	sd_taskq_delete();
2247 
2248 	mutex_destroy(&sd_detach_mutex);
2249 	mutex_destroy(&sd_log_mutex);
2250 	mutex_destroy(&sd_label_mutex);
2251 	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2252 
2253 	sd_scsi_probe_cache_fini();
2254 
2255 	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2256 	cv_destroy(&sd_tr.srq_inprocess_cv);
2257 
2258 	ddi_soft_state_fini(&sd_state);
2259 
2260 	return (err);
2261 }
2262 
2263 
2264 /*
2265  *    Function: _info
2266  *
2267  * Description: This is the driver _info(9E) entry point.
2268  *
2269  *   Arguments: modinfop - pointer to the driver modinfo structure
2270  *
2271  * Return Code: Returns the value from mod_info(9F).
2272  *
2273  *     Context: Kernel thread context
2274  */
2275 
2276 int
2277 _info(struct modinfo *modinfop)
2278 {
2279 	return (mod_info(&modlinkage, modinfop));
2280 }
2281 
2282 
2283 /*
2284  * The following routines implement the driver message logging facility.
2285  * They provide component- and level- based debug output filtering.
2286  * Output may also be restricted to messages for a single instance by
2287  * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2288  * to NULL, then messages for all instances are printed.
2289  *
2290  * These routines have been cloned from each other due to the language
2291  * constraints of macros and variable argument list processing.
2292  */
2293 
2294 
2295 /*
2296  *    Function: sd_log_err
2297  *
2298  * Description: This routine is called by the SD_ERROR macro for debug
2299  *		logging of error conditions.
2300  *
2301  *   Arguments: comp - driver component being logged
2302  *		dev  - pointer to driver info structure
2303  *		fmt  - error string and format to be logged
2304  */
2305 
2306 static void
2307 sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2308 {
2309 	va_list		ap;
2310 	dev_info_t	*dev;
2311 
2312 	ASSERT(un != NULL);
2313 	dev = SD_DEVINFO(un);
2314 	ASSERT(dev != NULL);
2315 
2316 	/*
2317 	 * Filter messages based on the global component and level masks.
2318 	 * Also print if un matches the value of sd_debug_un, or if
2319 	 * sd_debug_un is set to NULL.
2320 	 */
2321 	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2322 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2323 		mutex_enter(&sd_log_mutex);
2324 		va_start(ap, fmt);
2325 		(void) vsprintf(sd_log_buf, fmt, ap);
2326 		va_end(ap);
2327 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2328 		mutex_exit(&sd_log_mutex);
2329 	}
2330 #ifdef SD_FAULT_INJECTION
2331 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2332 	if (un->sd_injection_mask & comp) {
2333 		mutex_enter(&sd_log_mutex);
2334 		va_start(ap, fmt);
2335 		(void) vsprintf(sd_log_buf, fmt, ap);
2336 		va_end(ap);
2337 		sd_injection_log(sd_log_buf, un);
2338 		mutex_exit(&sd_log_mutex);
2339 	}
2340 #endif
2341 }
2342 
2343 
2344 /*
2345  *    Function: sd_log_info
2346  *
2347  * Description: This routine is called by the SD_INFO macro for debug
2348  *		logging of general purpose informational conditions.
2349  *
2350  *   Arguments: comp - driver component being logged
2351  *		dev  - pointer to driver info structure
2352  *		fmt  - info string and format to be logged
2353  */
2354 
2355 static void
2356 sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2357 {
2358 	va_list		ap;
2359 	dev_info_t	*dev;
2360 
2361 	ASSERT(un != NULL);
2362 	dev = SD_DEVINFO(un);
2363 	ASSERT(dev != NULL);
2364 
2365 	/*
2366 	 * Filter messages based on the global component and level masks.
2367 	 * Also print if un matches the value of sd_debug_un, or if
2368 	 * sd_debug_un is set to NULL.
2369 	 */
2370 	if ((sd_component_mask & component) &&
2371 	    (sd_level_mask & SD_LOGMASK_INFO) &&
2372 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2373 		mutex_enter(&sd_log_mutex);
2374 		va_start(ap, fmt);
2375 		(void) vsprintf(sd_log_buf, fmt, ap);
2376 		va_end(ap);
2377 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2378 		mutex_exit(&sd_log_mutex);
2379 	}
2380 #ifdef SD_FAULT_INJECTION
2381 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2382 	if (un->sd_injection_mask & component) {
2383 		mutex_enter(&sd_log_mutex);
2384 		va_start(ap, fmt);
2385 		(void) vsprintf(sd_log_buf, fmt, ap);
2386 		va_end(ap);
2387 		sd_injection_log(sd_log_buf, un);
2388 		mutex_exit(&sd_log_mutex);
2389 	}
2390 #endif
2391 }
2392 
2393 
2394 /*
2395  *    Function: sd_log_trace
2396  *
2397  * Description: This routine is called by the SD_TRACE macro for debug
2398  *		logging of trace conditions (i.e. function entry/exit).
2399  *
2400  *   Arguments: comp - driver component being logged
2401  *		dev  - pointer to driver info structure
2402  *		fmt  - trace string and format to be logged
2403  */
2404 
2405 static void
2406 sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2407 {
2408 	va_list		ap;
2409 	dev_info_t	*dev;
2410 
2411 	ASSERT(un != NULL);
2412 	dev = SD_DEVINFO(un);
2413 	ASSERT(dev != NULL);
2414 
2415 	/*
2416 	 * Filter messages based on the global component and level masks.
2417 	 * Also print if un matches the value of sd_debug_un, or if
2418 	 * sd_debug_un is set to NULL.
2419 	 */
2420 	if ((sd_component_mask & component) &&
2421 	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2422 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2423 		mutex_enter(&sd_log_mutex);
2424 		va_start(ap, fmt);
2425 		(void) vsprintf(sd_log_buf, fmt, ap);
2426 		va_end(ap);
2427 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2428 		mutex_exit(&sd_log_mutex);
2429 	}
2430 #ifdef SD_FAULT_INJECTION
2431 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2432 	if (un->sd_injection_mask & component) {
2433 		mutex_enter(&sd_log_mutex);
2434 		va_start(ap, fmt);
2435 		(void) vsprintf(sd_log_buf, fmt, ap);
2436 		va_end(ap);
2437 		sd_injection_log(sd_log_buf, un);
2438 		mutex_exit(&sd_log_mutex);
2439 	}
2440 #endif
2441 }
2442 
2443 
2444 /*
2445  *    Function: sdprobe
2446  *
2447  * Description: This is the driver probe(9e) entry point function.
2448  *
2449  *   Arguments: devi - opaque device info handle
2450  *
2451  * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2452  *              DDI_PROBE_FAILURE: If the probe failed.
2453  *              DDI_PROBE_PARTIAL: If the instance is not present now,
2454  *				   but may be present in the future.
2455  */
2456 
2457 static int
2458 sdprobe(dev_info_t *devi)
2459 {
2460 	struct scsi_device	*devp;
2461 	int			rval;
2462 	int			instance;
2463 
2464 	/*
2465 	 * if it wasn't for pln, sdprobe could actually be nulldev
2466 	 * in the "__fibre" case.
2467 	 */
2468 	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2469 		return (DDI_PROBE_DONTCARE);
2470 	}
2471 
2472 	devp = ddi_get_driver_private(devi);
2473 
2474 	if (devp == NULL) {
2475 		/* Ooops... nexus driver is mis-configured... */
2476 		return (DDI_PROBE_FAILURE);
2477 	}
2478 
2479 	instance = ddi_get_instance(devi);
2480 
2481 	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2482 		return (DDI_PROBE_PARTIAL);
2483 	}
2484 
2485 	/*
2486 	 * Call the SCSA utility probe routine to see if we actually
2487 	 * have a target at this SCSI nexus.
2488 	 */
2489 	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2490 	case SCSIPROBE_EXISTS:
2491 		switch (devp->sd_inq->inq_dtype) {
2492 		case DTYPE_DIRECT:
2493 			rval = DDI_PROBE_SUCCESS;
2494 			break;
2495 		case DTYPE_RODIRECT:
2496 			/* CDs etc. Can be removable media */
2497 			rval = DDI_PROBE_SUCCESS;
2498 			break;
2499 		case DTYPE_OPTICAL:
2500 			/*
2501 			 * Rewritable optical driver HP115AA
2502 			 * Can also be removable media
2503 			 */
2504 
2505 			/*
2506 			 * Do not attempt to bind to  DTYPE_OPTICAL if
2507 			 * pre solaris 9 sparc sd behavior is required
2508 			 *
2509 			 * If first time through and sd_dtype_optical_bind
2510 			 * has not been set in /etc/system check properties
2511 			 */
2512 
2513 			if (sd_dtype_optical_bind  < 0) {
2514 			    sd_dtype_optical_bind = ddi_prop_get_int
2515 				(DDI_DEV_T_ANY,	devi,	0,
2516 				"optical-device-bind",	1);
2517 			}
2518 
2519 			if (sd_dtype_optical_bind == 0) {
2520 				rval = DDI_PROBE_FAILURE;
2521 			} else {
2522 				rval = DDI_PROBE_SUCCESS;
2523 			}
2524 			break;
2525 
2526 		case DTYPE_NOTPRESENT:
2527 		default:
2528 			rval = DDI_PROBE_FAILURE;
2529 			break;
2530 		}
2531 		break;
2532 	default:
2533 		rval = DDI_PROBE_PARTIAL;
2534 		break;
2535 	}
2536 
2537 	/*
2538 	 * This routine checks for resource allocation prior to freeing,
2539 	 * so it will take care of the "smart probing" case where a
2540 	 * scsi_probe() may or may not have been issued and will *not*
2541 	 * free previously-freed resources.
2542 	 */
2543 	scsi_unprobe(devp);
2544 	return (rval);
2545 }
2546 
2547 
2548 /*
2549  *    Function: sdinfo
2550  *
2551  * Description: This is the driver getinfo(9e) entry point function.
2552  * 		Given the device number, return the devinfo pointer from
2553  *		the scsi_device structure or the instance number
2554  *		associated with the dev_t.
2555  *
2556  *   Arguments: dip     - pointer to device info structure
2557  *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2558  *			  DDI_INFO_DEVT2INSTANCE)
2559  *		arg     - driver dev_t
2560  *		resultp - user buffer for request response
2561  *
2562  * Return Code: DDI_SUCCESS
2563  *              DDI_FAILURE
2564  */
2565 /* ARGSUSED */
2566 static int
2567 sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2568 {
2569 	struct sd_lun	*un;
2570 	dev_t		dev;
2571 	int		instance;
2572 	int		error;
2573 
2574 	switch (infocmd) {
2575 	case DDI_INFO_DEVT2DEVINFO:
2576 		dev = (dev_t)arg;
2577 		instance = SDUNIT(dev);
2578 		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2579 			return (DDI_FAILURE);
2580 		}
2581 		*result = (void *) SD_DEVINFO(un);
2582 		error = DDI_SUCCESS;
2583 		break;
2584 	case DDI_INFO_DEVT2INSTANCE:
2585 		dev = (dev_t)arg;
2586 		instance = SDUNIT(dev);
2587 		*result = (void *)(uintptr_t)instance;
2588 		error = DDI_SUCCESS;
2589 		break;
2590 	default:
2591 		error = DDI_FAILURE;
2592 	}
2593 	return (error);
2594 }
2595 
2596 /*
2597  *    Function: sd_prop_op
2598  *
2599  * Description: This is the driver prop_op(9e) entry point function.
2600  *		Return the number of blocks for the partition in question
2601  *		or forward the request to the property facilities.
2602  *
2603  *   Arguments: dev       - device number
2604  *		dip       - pointer to device info structure
2605  *		prop_op   - property operator
2606  *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2607  *		name      - pointer to property name
2608  *		valuep    - pointer or address of the user buffer
2609  *		lengthp   - property length
2610  *
2611  * Return Code: DDI_PROP_SUCCESS
2612  *              DDI_PROP_NOT_FOUND
2613  *              DDI_PROP_UNDEFINED
2614  *              DDI_PROP_NO_MEMORY
2615  *              DDI_PROP_BUF_TOO_SMALL
2616  */
2617 
2618 static int
2619 sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2620 	char *name, caddr_t valuep, int *lengthp)
2621 {
2622 	int		instance = ddi_get_instance(dip);
2623 	struct sd_lun	*un;
2624 	uint64_t	nblocks64;
2625 
2626 	/*
2627 	 * Our dynamic properties are all device specific and size oriented.
2628 	 * Requests issued under conditions where size is valid are passed
2629 	 * to ddi_prop_op_nblocks with the size information, otherwise the
2630 	 * request is passed to ddi_prop_op. Size depends on valid geometry.
2631 	 */
2632 	un = ddi_get_soft_state(sd_state, instance);
2633 	if ((dev == DDI_DEV_T_ANY) || (un == NULL) ||
2634 	    (un->un_f_geometry_is_valid == FALSE)) {
2635 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2636 		    name, valuep, lengthp));
2637 	} else {
2638 		/* get nblocks value */
2639 		ASSERT(!mutex_owned(SD_MUTEX(un)));
2640 		mutex_enter(SD_MUTEX(un));
2641 		nblocks64 = (ulong_t)un->un_map[SDPART(dev)].dkl_nblk;
2642 		mutex_exit(SD_MUTEX(un));
2643 
2644 		return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags,
2645 		    name, valuep, lengthp, nblocks64));
2646 	}
2647 }
2648 
2649 /*
2650  * The following functions are for smart probing:
2651  * sd_scsi_probe_cache_init()
2652  * sd_scsi_probe_cache_fini()
2653  * sd_scsi_clear_probe_cache()
2654  * sd_scsi_probe_with_cache()
2655  */
2656 
2657 /*
2658  *    Function: sd_scsi_probe_cache_init
2659  *
2660  * Description: Initializes the probe response cache mutex and head pointer.
2661  *
2662  *     Context: Kernel thread context
2663  */
2664 
2665 static void
2666 sd_scsi_probe_cache_init(void)
2667 {
2668 	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2669 	sd_scsi_probe_cache_head = NULL;
2670 }
2671 
2672 
2673 /*
2674  *    Function: sd_scsi_probe_cache_fini
2675  *
2676  * Description: Frees all resources associated with the probe response cache.
2677  *
2678  *     Context: Kernel thread context
2679  */
2680 
2681 static void
2682 sd_scsi_probe_cache_fini(void)
2683 {
2684 	struct sd_scsi_probe_cache *cp;
2685 	struct sd_scsi_probe_cache *ncp;
2686 
2687 	/* Clean up our smart probing linked list */
2688 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2689 		ncp = cp->next;
2690 		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2691 	}
2692 	sd_scsi_probe_cache_head = NULL;
2693 	mutex_destroy(&sd_scsi_probe_cache_mutex);
2694 }
2695 
2696 
2697 /*
2698  *    Function: sd_scsi_clear_probe_cache
2699  *
2700  * Description: This routine clears the probe response cache. This is
2701  *		done when open() returns ENXIO so that when deferred
2702  *		attach is attempted (possibly after a device has been
2703  *		turned on) we will retry the probe. Since we don't know
2704  *		which target we failed to open, we just clear the
2705  *		entire cache.
2706  *
2707  *     Context: Kernel thread context
2708  */
2709 
2710 static void
2711 sd_scsi_clear_probe_cache(void)
2712 {
2713 	struct sd_scsi_probe_cache	*cp;
2714 	int				i;
2715 
2716 	mutex_enter(&sd_scsi_probe_cache_mutex);
2717 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2718 		/*
2719 		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2720 		 * force probing to be performed the next time
2721 		 * sd_scsi_probe_with_cache is called.
2722 		 */
2723 		for (i = 0; i < NTARGETS_WIDE; i++) {
2724 			cp->cache[i] = SCSIPROBE_EXISTS;
2725 		}
2726 	}
2727 	mutex_exit(&sd_scsi_probe_cache_mutex);
2728 }
2729 
2730 
2731 /*
2732  *    Function: sd_scsi_probe_with_cache
2733  *
2734  * Description: This routine implements support for a scsi device probe
2735  *		with cache. The driver maintains a cache of the target
2736  *		responses to scsi probes. If we get no response from a
2737  *		target during a probe inquiry, we remember that, and we
2738  *		avoid additional calls to scsi_probe on non-zero LUNs
2739  *		on the same target until the cache is cleared. By doing
2740  *		so we avoid the 1/4 sec selection timeout for nonzero
2741  *		LUNs. lun0 of a target is always probed.
2742  *
2743  *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2744  *              waitfunc - indicates what the allocator routines should
2745  *			   do when resources are not available. This value
2746  *			   is passed on to scsi_probe() when that routine
2747  *			   is called.
2748  *
2749  * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2750  *		otherwise the value returned by scsi_probe(9F).
2751  *
2752  *     Context: Kernel thread context
2753  */
2754 
2755 static int
2756 sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2757 {
2758 	struct sd_scsi_probe_cache	*cp;
2759 	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2760 	int		lun, tgt;
2761 
2762 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2763 	    SCSI_ADDR_PROP_LUN, 0);
2764 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2765 	    SCSI_ADDR_PROP_TARGET, -1);
2766 
2767 	/* Make sure caching enabled and target in range */
2768 	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2769 		/* do it the old way (no cache) */
2770 		return (scsi_probe(devp, waitfn));
2771 	}
2772 
2773 	mutex_enter(&sd_scsi_probe_cache_mutex);
2774 
2775 	/* Find the cache for this scsi bus instance */
2776 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2777 		if (cp->pdip == pdip) {
2778 			break;
2779 		}
2780 	}
2781 
2782 	/* If we can't find a cache for this pdip, create one */
2783 	if (cp == NULL) {
2784 		int i;
2785 
2786 		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
2787 		    KM_SLEEP);
2788 		cp->pdip = pdip;
2789 		cp->next = sd_scsi_probe_cache_head;
2790 		sd_scsi_probe_cache_head = cp;
2791 		for (i = 0; i < NTARGETS_WIDE; i++) {
2792 			cp->cache[i] = SCSIPROBE_EXISTS;
2793 		}
2794 	}
2795 
2796 	mutex_exit(&sd_scsi_probe_cache_mutex);
2797 
2798 	/* Recompute the cache for this target if LUN zero */
2799 	if (lun == 0) {
2800 		cp->cache[tgt] = SCSIPROBE_EXISTS;
2801 	}
2802 
2803 	/* Don't probe if cache remembers a NORESP from a previous LUN. */
2804 	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
2805 		return (SCSIPROBE_NORESP);
2806 	}
2807 
2808 	/* Do the actual probe; save & return the result */
2809 	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
2810 }
2811 
2812 
2813 /*
2814  *    Function: sd_spin_up_unit
2815  *
2816  * Description: Issues the following commands to spin-up the device:
2817  *		START STOP UNIT, and INQUIRY.
2818  *
2819  *   Arguments: un - driver soft state (unit) structure
2820  *
2821  * Return Code: 0 - success
2822  *		EIO - failure
2823  *		EACCES - reservation conflict
2824  *
2825  *     Context: Kernel thread context
2826  */
2827 
2828 static int
2829 sd_spin_up_unit(struct sd_lun *un)
2830 {
2831 	size_t	resid		= 0;
2832 	int	has_conflict	= FALSE;
2833 	uchar_t *bufaddr;
2834 
2835 	ASSERT(un != NULL);
2836 
2837 	/*
2838 	 * Send a throwaway START UNIT command.
2839 	 *
2840 	 * If we fail on this, we don't care presently what precisely
2841 	 * is wrong.  EMC's arrays will also fail this with a check
2842 	 * condition (0x2/0x4/0x3) if the device is "inactive," but
2843 	 * we don't want to fail the attach because it may become
2844 	 * "active" later.
2845 	 */
2846 	if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START, SD_PATH_DIRECT)
2847 	    == EACCES)
2848 		has_conflict = TRUE;
2849 
2850 	/*
2851 	 * Send another INQUIRY command to the target. This is necessary for
2852 	 * non-removable media direct access devices because their INQUIRY data
2853 	 * may not be fully qualified until they are spun up (perhaps via the
2854 	 * START command above).  Note: This seems to be needed for some
2855 	 * legacy devices only.) The INQUIRY command should succeed even if a
2856 	 * Reservation Conflict is present.
2857 	 */
2858 	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
2859 	if (sd_send_scsi_INQUIRY(un, bufaddr, SUN_INQSIZE, 0, 0, &resid) != 0) {
2860 		kmem_free(bufaddr, SUN_INQSIZE);
2861 		return (EIO);
2862 	}
2863 
2864 	/*
2865 	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
2866 	 * Note that this routine does not return a failure here even if the
2867 	 * INQUIRY command did not return any data.  This is a legacy behavior.
2868 	 */
2869 	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
2870 		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
2871 	}
2872 
2873 	kmem_free(bufaddr, SUN_INQSIZE);
2874 
2875 	/* If we hit a reservation conflict above, tell the caller. */
2876 	if (has_conflict == TRUE) {
2877 		return (EACCES);
2878 	}
2879 
2880 	return (0);
2881 }
2882 
2883 #ifdef _LP64
2884 /*
2885  *    Function: sd_enable_descr_sense
2886  *
2887  * Description: This routine attempts to select descriptor sense format
2888  *		using the Control mode page.  Devices that support 64 bit
2889  *		LBAs (for >2TB luns) should also implement descriptor
2890  *		sense data so we will call this function whenever we see
2891  *		a lun larger than 2TB.  If for some reason the device
2892  *		supports 64 bit LBAs but doesn't support descriptor sense
2893  *		presumably the mode select will fail.  Everything will
2894  *		continue to work normally except that we will not get
2895  *		complete sense data for commands that fail with an LBA
2896  *		larger than 32 bits.
2897  *
2898  *   Arguments: un - driver soft state (unit) structure
2899  *
2900  *     Context: Kernel thread context only
2901  */
2902 
2903 static void
2904 sd_enable_descr_sense(struct sd_lun *un)
2905 {
2906 	uchar_t			*header;
2907 	struct mode_control_scsi3 *ctrl_bufp;
2908 	size_t			buflen;
2909 	size_t			bd_len;
2910 
2911 	/*
2912 	 * Read MODE SENSE page 0xA, Control Mode Page
2913 	 */
2914 	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
2915 	    sizeof (struct mode_control_scsi3);
2916 	header = kmem_zalloc(buflen, KM_SLEEP);
2917 	if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
2918 	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT) != 0) {
2919 		SD_ERROR(SD_LOG_COMMON, un,
2920 		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
2921 		goto eds_exit;
2922 	}
2923 
2924 	/*
2925 	 * Determine size of Block Descriptors in order to locate
2926 	 * the mode page data. ATAPI devices return 0, SCSI devices
2927 	 * should return MODE_BLK_DESC_LENGTH.
2928 	 */
2929 	bd_len  = ((struct mode_header *)header)->bdesc_length;
2930 
2931 	ctrl_bufp = (struct mode_control_scsi3 *)
2932 	    (header + MODE_HEADER_LENGTH + bd_len);
2933 
2934 	/*
2935 	 * Clear PS bit for MODE SELECT
2936 	 */
2937 	ctrl_bufp->mode_page.ps = 0;
2938 
2939 	/*
2940 	 * Set D_SENSE to enable descriptor sense format.
2941 	 */
2942 	ctrl_bufp->d_sense = 1;
2943 
2944 	/*
2945 	 * Use MODE SELECT to commit the change to the D_SENSE bit
2946 	 */
2947 	if (sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
2948 	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT) != 0) {
2949 		SD_INFO(SD_LOG_COMMON, un,
2950 		    "sd_enable_descr_sense: mode select ctrl page failed\n");
2951 		goto eds_exit;
2952 	}
2953 
2954 eds_exit:
2955 	kmem_free(header, buflen);
2956 }
2957 #endif /* _LP64 */
2958 
2959 
2960 /*
2961  *    Function: sd_set_mmc_caps
2962  *
2963  * Description: This routine determines if the device is MMC compliant and if
2964  *		the device supports CDDA via a mode sense of the CDVD
2965  *		capabilities mode page. Also checks if the device is a
2966  *		dvdram writable device.
2967  *
2968  *   Arguments: un - driver soft state (unit) structure
2969  *
2970  *     Context: Kernel thread context only
2971  */
2972 
2973 static void
2974 sd_set_mmc_caps(struct sd_lun *un)
2975 {
2976 	struct mode_header_grp2		*sense_mhp;
2977 	uchar_t				*sense_page;
2978 	caddr_t				buf;
2979 	int				bd_len;
2980 	int				status;
2981 	struct uscsi_cmd		com;
2982 	int				rtn;
2983 	uchar_t				*out_data_rw, *out_data_hd;
2984 	uchar_t				*rqbuf_rw, *rqbuf_hd;
2985 
2986 	ASSERT(un != NULL);
2987 
2988 	/*
2989 	 * The flags which will be set in this function are - mmc compliant,
2990 	 * dvdram writable device, cdda support. Initialize them to FALSE
2991 	 * and if a capability is detected - it will be set to TRUE.
2992 	 */
2993 	un->un_f_mmc_cap = FALSE;
2994 	un->un_f_dvdram_writable_device = FALSE;
2995 	un->un_f_cfg_cdda = FALSE;
2996 
2997 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
2998 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
2999 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3000 
3001 	if (status != 0) {
3002 		/* command failed; just return */
3003 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3004 		return;
3005 	}
3006 	/*
3007 	 * If the mode sense request for the CDROM CAPABILITIES
3008 	 * page (0x2A) succeeds the device is assumed to be MMC.
3009 	 */
3010 	un->un_f_mmc_cap = TRUE;
3011 
3012 	/* Get to the page data */
3013 	sense_mhp = (struct mode_header_grp2 *)buf;
3014 	bd_len = (sense_mhp->bdesc_length_hi << 8) |
3015 	    sense_mhp->bdesc_length_lo;
3016 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3017 		/*
3018 		 * We did not get back the expected block descriptor
3019 		 * length so we cannot determine if the device supports
3020 		 * CDDA. However, we still indicate the device is MMC
3021 		 * according to the successful response to the page
3022 		 * 0x2A mode sense request.
3023 		 */
3024 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3025 		    "sd_set_mmc_caps: Mode Sense returned "
3026 		    "invalid block descriptor length\n");
3027 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3028 		return;
3029 	}
3030 
3031 	/* See if read CDDA is supported */
3032 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
3033 	    bd_len);
3034 	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
3035 
3036 	/* See if writing DVD RAM is supported. */
3037 	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
3038 	if (un->un_f_dvdram_writable_device == TRUE) {
3039 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3040 		return;
3041 	}
3042 
3043 	/*
3044 	 * If the device presents DVD or CD capabilities in the mode
3045 	 * page, we can return here since a RRD will not have
3046 	 * these capabilities.
3047 	 */
3048 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3049 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3050 		return;
3051 	}
3052 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3053 
3054 	/*
3055 	 * If un->un_f_dvdram_writable_device is still FALSE,
3056 	 * check for a Removable Rigid Disk (RRD).  A RRD
3057 	 * device is identified by the features RANDOM_WRITABLE and
3058 	 * HARDWARE_DEFECT_MANAGEMENT.
3059 	 */
3060 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3061 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3062 
3063 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3064 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3065 	    RANDOM_WRITABLE);
3066 	if (rtn != 0) {
3067 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3068 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3069 		return;
3070 	}
3071 
3072 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3073 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3074 
3075 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3076 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3077 	    HARDWARE_DEFECT_MANAGEMENT);
3078 	if (rtn == 0) {
3079 		/*
3080 		 * We have good information, check for random writable
3081 		 * and hardware defect features.
3082 		 */
3083 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3084 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3085 			un->un_f_dvdram_writable_device = TRUE;
3086 		}
3087 	}
3088 
3089 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3090 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3091 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3092 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3093 }
3094 
3095 /*
3096  *    Function: sd_check_for_writable_cd
3097  *
3098  * Description: This routine determines if the media in the device is
3099  *		writable or not. It uses the get configuration command (0x46)
3100  *		to determine if the media is writable
3101  *
3102  *   Arguments: un - driver soft state (unit) structure
3103  *
3104  *     Context: Never called at interrupt context.
3105  */
3106 
3107 static void
3108 sd_check_for_writable_cd(struct sd_lun *un)
3109 {
3110 	struct uscsi_cmd		com;
3111 	uchar_t				*out_data;
3112 	uchar_t				*rqbuf;
3113 	int				rtn;
3114 	uchar_t				*out_data_rw, *out_data_hd;
3115 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3116 	struct mode_header_grp2		*sense_mhp;
3117 	uchar_t				*sense_page;
3118 	caddr_t				buf;
3119 	int				bd_len;
3120 	int				status;
3121 
3122 	ASSERT(un != NULL);
3123 	ASSERT(mutex_owned(SD_MUTEX(un)));
3124 
3125 	/*
3126 	 * Initialize the writable media to false, if configuration info.
3127 	 * tells us otherwise then only we will set it.
3128 	 */
3129 	un->un_f_mmc_writable_media = FALSE;
3130 	mutex_exit(SD_MUTEX(un));
3131 
3132 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3133 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3134 
3135 	rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf, SENSE_LENGTH,
3136 	    out_data, SD_PROFILE_HEADER_LEN);
3137 
3138 	mutex_enter(SD_MUTEX(un));
3139 	if (rtn == 0) {
3140 		/*
3141 		 * We have good information, check for writable DVD.
3142 		 */
3143 		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3144 			un->un_f_mmc_writable_media = TRUE;
3145 			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3146 			kmem_free(rqbuf, SENSE_LENGTH);
3147 			return;
3148 		}
3149 	}
3150 
3151 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3152 	kmem_free(rqbuf, SENSE_LENGTH);
3153 
3154 	/*
3155 	 * Determine if this is a RRD type device.
3156 	 */
3157 	mutex_exit(SD_MUTEX(un));
3158 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3159 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3160 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3161 	mutex_enter(SD_MUTEX(un));
3162 	if (status != 0) {
3163 		/* command failed; just return */
3164 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3165 		return;
3166 	}
3167 
3168 	/* Get to the page data */
3169 	sense_mhp = (struct mode_header_grp2 *)buf;
3170 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3171 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3172 		/*
3173 		 * We did not get back the expected block descriptor length so
3174 		 * we cannot check the mode page.
3175 		 */
3176 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3177 		    "sd_check_for_writable_cd: Mode Sense returned "
3178 		    "invalid block descriptor length\n");
3179 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3180 		return;
3181 	}
3182 
3183 	/*
3184 	 * If the device presents DVD or CD capabilities in the mode
3185 	 * page, we can return here since a RRD device will not have
3186 	 * these capabilities.
3187 	 */
3188 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3189 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3190 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3191 		return;
3192 	}
3193 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3194 
3195 	/*
3196 	 * If un->un_f_mmc_writable_media is still FALSE,
3197 	 * check for RRD type media.  A RRD device is identified
3198 	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3199 	 */
3200 	mutex_exit(SD_MUTEX(un));
3201 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3202 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3203 
3204 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3205 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3206 	    RANDOM_WRITABLE);
3207 	if (rtn != 0) {
3208 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3209 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3210 		mutex_enter(SD_MUTEX(un));
3211 		return;
3212 	}
3213 
3214 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3215 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3216 
3217 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3218 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3219 	    HARDWARE_DEFECT_MANAGEMENT);
3220 	mutex_enter(SD_MUTEX(un));
3221 	if (rtn == 0) {
3222 		/*
3223 		 * We have good information, check for random writable
3224 		 * and hardware defect features as current.
3225 		 */
3226 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3227 		    (out_data_rw[10] & 0x1) &&
3228 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3229 		    (out_data_hd[10] & 0x1)) {
3230 			un->un_f_mmc_writable_media = TRUE;
3231 		}
3232 	}
3233 
3234 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3235 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3236 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3237 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3238 }
3239 
3240 /*
3241  *    Function: sd_read_unit_properties
3242  *
3243  * Description: The following implements a property lookup mechanism.
3244  *		Properties for particular disks (keyed on vendor, model
3245  *		and rev numbers) are sought in the sd.conf file via
3246  *		sd_process_sdconf_file(), and if not found there, are
3247  *		looked for in a list hardcoded in this driver via
3248  *		sd_process_sdconf_table() Once located the properties
3249  *		are used to update the driver unit structure.
3250  *
3251  *   Arguments: un - driver soft state (unit) structure
3252  */
3253 
3254 static void
3255 sd_read_unit_properties(struct sd_lun *un)
3256 {
3257 	/*
3258 	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3259 	 * the "sd-config-list" property (from the sd.conf file) or if
3260 	 * there was not a match for the inquiry vid/pid. If this event
3261 	 * occurs the static driver configuration table is searched for
3262 	 * a match.
3263 	 */
3264 	ASSERT(un != NULL);
3265 	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3266 		sd_process_sdconf_table(un);
3267 	}
3268 
3269 	/* check for LSI device */
3270 	sd_is_lsi(un);
3271 
3272 
3273 }
3274 
3275 
3276 /*
3277  *    Function: sd_process_sdconf_file
3278  *
3279  * Description: Use ddi_getlongprop to obtain the properties from the
3280  *		driver's config file (ie, sd.conf) and update the driver
3281  *		soft state structure accordingly.
3282  *
3283  *   Arguments: un - driver soft state (unit) structure
3284  *
3285  * Return Code: SD_SUCCESS - The properties were successfully set according
3286  *			     to the driver configuration file.
3287  *		SD_FAILURE - The driver config list was not obtained or
3288  *			     there was no vid/pid match. This indicates that
3289  *			     the static config table should be used.
3290  *
3291  * The config file has a property, "sd-config-list", which consists of
3292  * one or more duplets as follows:
3293  *
3294  *  sd-config-list=
3295  *	<duplet>,
3296  *	[<duplet>,]
3297  *	[<duplet>];
3298  *
3299  * The structure of each duplet is as follows:
3300  *
3301  *  <duplet>:= <vid+pid>,<data-property-name_list>
3302  *
3303  * The first entry of the duplet is the device ID string (the concatenated
3304  * vid & pid; not to be confused with a device_id).  This is defined in
3305  * the same way as in the sd_disk_table.
3306  *
3307  * The second part of the duplet is a string that identifies a
3308  * data-property-name-list. The data-property-name-list is defined as
3309  * follows:
3310  *
3311  *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3312  *
3313  * The syntax of <data-property-name> depends on the <version> field.
3314  *
3315  * If version = SD_CONF_VERSION_1 we have the following syntax:
3316  *
3317  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3318  *
3319  * where the prop0 value will be used to set prop0 if bit0 set in the
3320  * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3321  *
3322  */
3323 
3324 static int
3325 sd_process_sdconf_file(struct sd_lun *un)
3326 {
3327 	char	*config_list = NULL;
3328 	int	config_list_len;
3329 	int	len;
3330 	int	dupletlen = 0;
3331 	char	*vidptr;
3332 	int	vidlen;
3333 	char	*dnlist_ptr;
3334 	char	*dataname_ptr;
3335 	int	dnlist_len;
3336 	int	dataname_len;
3337 	int	*data_list;
3338 	int	data_list_len;
3339 	int	rval = SD_FAILURE;
3340 	int	i;
3341 
3342 	ASSERT(un != NULL);
3343 
3344 	/* Obtain the configuration list associated with the .conf file */
3345 	if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), DDI_PROP_DONTPASS,
3346 	    sd_config_list, (caddr_t)&config_list, &config_list_len)
3347 	    != DDI_PROP_SUCCESS) {
3348 		return (SD_FAILURE);
3349 	}
3350 
3351 	/*
3352 	 * Compare vids in each duplet to the inquiry vid - if a match is
3353 	 * made, get the data value and update the soft state structure
3354 	 * accordingly.
3355 	 *
3356 	 * Note: This algorithm is complex and difficult to maintain. It should
3357 	 * be replaced with a more robust implementation.
3358 	 */
3359 	for (len = config_list_len, vidptr = config_list; len > 0;
3360 	    vidptr += dupletlen, len -= dupletlen) {
3361 		/*
3362 		 * Note: The assumption here is that each vid entry is on
3363 		 * a unique line from its associated duplet.
3364 		 */
3365 		vidlen = dupletlen = (int)strlen(vidptr);
3366 		if ((vidlen == 0) ||
3367 		    (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS)) {
3368 			dupletlen++;
3369 			continue;
3370 		}
3371 
3372 		/*
3373 		 * dnlist contains 1 or more blank separated
3374 		 * data-property-name entries
3375 		 */
3376 		dnlist_ptr = vidptr + vidlen + 1;
3377 		dnlist_len = (int)strlen(dnlist_ptr);
3378 		dupletlen += dnlist_len + 2;
3379 
3380 		/*
3381 		 * Set a pointer for the first data-property-name
3382 		 * entry in the list
3383 		 */
3384 		dataname_ptr = dnlist_ptr;
3385 		dataname_len = 0;
3386 
3387 		/*
3388 		 * Loop through all data-property-name entries in the
3389 		 * data-property-name-list setting the properties for each.
3390 		 */
3391 		while (dataname_len < dnlist_len) {
3392 			int version;
3393 
3394 			/*
3395 			 * Determine the length of the current
3396 			 * data-property-name entry by indexing until a
3397 			 * blank or NULL is encountered. When the space is
3398 			 * encountered reset it to a NULL for compliance
3399 			 * with ddi_getlongprop().
3400 			 */
3401 			for (i = 0; ((dataname_ptr[i] != ' ') &&
3402 			    (dataname_ptr[i] != '\0')); i++) {
3403 				;
3404 			}
3405 
3406 			dataname_len += i;
3407 			/* If not null terminated, Make it so */
3408 			if (dataname_ptr[i] == ' ') {
3409 				dataname_ptr[i] = '\0';
3410 			}
3411 			dataname_len++;
3412 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3413 			    "sd_process_sdconf_file: disk:%s, data:%s\n",
3414 			    vidptr, dataname_ptr);
3415 
3416 			/* Get the data list */
3417 			if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), 0,
3418 			    dataname_ptr, (caddr_t)&data_list, &data_list_len)
3419 			    != DDI_PROP_SUCCESS) {
3420 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3421 				    "sd_process_sdconf_file: data property (%s)"
3422 				    " has no value\n", dataname_ptr);
3423 				dataname_ptr = dnlist_ptr + dataname_len;
3424 				continue;
3425 			}
3426 
3427 			version = data_list[0];
3428 
3429 			if (version == SD_CONF_VERSION_1) {
3430 				sd_tunables values;
3431 
3432 				/* Set the properties */
3433 				if (sd_chk_vers1_data(un, data_list[1],
3434 				    &data_list[2], data_list_len, dataname_ptr)
3435 				    == SD_SUCCESS) {
3436 					sd_get_tunables_from_conf(un,
3437 					    data_list[1], &data_list[2],
3438 					    &values);
3439 					sd_set_vers1_properties(un,
3440 					    data_list[1], &values);
3441 					rval = SD_SUCCESS;
3442 				} else {
3443 					rval = SD_FAILURE;
3444 				}
3445 			} else {
3446 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3447 				    "data property %s version 0x%x is invalid.",
3448 				    dataname_ptr, version);
3449 				rval = SD_FAILURE;
3450 			}
3451 			kmem_free(data_list, data_list_len);
3452 			dataname_ptr = dnlist_ptr + dataname_len;
3453 		}
3454 	}
3455 
3456 	/* free up the memory allocated by ddi_getlongprop */
3457 	if (config_list) {
3458 		kmem_free(config_list, config_list_len);
3459 	}
3460 
3461 	return (rval);
3462 }
3463 
3464 /*
3465  *    Function: sd_get_tunables_from_conf()
3466  *
3467  *
3468  *    This function reads the data list from the sd.conf file and pulls
3469  *    the values that can have numeric values as arguments and places
3470  *    the values in the apropriate sd_tunables member.
3471  *    Since the order of the data list members varies across platforms
3472  *    This function reads them from the data list in a platform specific
3473  *    order and places them into the correct sd_tunable member that is
3474  *    a consistant across all platforms.
3475  */
3476 static void
3477 sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
3478     sd_tunables *values)
3479 {
3480 	int i;
3481 	int mask;
3482 
3483 	bzero(values, sizeof (sd_tunables));
3484 
3485 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3486 
3487 		mask = 1 << i;
3488 		if (mask > flags) {
3489 			break;
3490 		}
3491 
3492 		switch (mask & flags) {
3493 		case 0:	/* This mask bit not set in flags */
3494 			continue;
3495 		case SD_CONF_BSET_THROTTLE:
3496 			values->sdt_throttle = data_list[i];
3497 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3498 			    "sd_get_tunables_from_conf: throttle = %d\n",
3499 			    values->sdt_throttle);
3500 			break;
3501 		case SD_CONF_BSET_CTYPE:
3502 			values->sdt_ctype = data_list[i];
3503 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3504 			    "sd_get_tunables_from_conf: ctype = %d\n",
3505 			    values->sdt_ctype);
3506 			break;
3507 		case SD_CONF_BSET_NRR_COUNT:
3508 			values->sdt_not_rdy_retries = data_list[i];
3509 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3510 			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
3511 			    values->sdt_not_rdy_retries);
3512 			break;
3513 		case SD_CONF_BSET_BSY_RETRY_COUNT:
3514 			values->sdt_busy_retries = data_list[i];
3515 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3516 			    "sd_get_tunables_from_conf: busy_retries = %d\n",
3517 			    values->sdt_busy_retries);
3518 			break;
3519 		case SD_CONF_BSET_RST_RETRIES:
3520 			values->sdt_reset_retries = data_list[i];
3521 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3522 			    "sd_get_tunables_from_conf: reset_retries = %d\n",
3523 			    values->sdt_reset_retries);
3524 			break;
3525 		case SD_CONF_BSET_RSV_REL_TIME:
3526 			values->sdt_reserv_rel_time = data_list[i];
3527 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3528 			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
3529 			    values->sdt_reserv_rel_time);
3530 			break;
3531 		case SD_CONF_BSET_MIN_THROTTLE:
3532 			values->sdt_min_throttle = data_list[i];
3533 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3534 			    "sd_get_tunables_from_conf: min_throttle = %d\n",
3535 			    values->sdt_min_throttle);
3536 			break;
3537 		case SD_CONF_BSET_DISKSORT_DISABLED:
3538 			values->sdt_disk_sort_dis = data_list[i];
3539 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3540 			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
3541 			    values->sdt_disk_sort_dis);
3542 			break;
3543 		case SD_CONF_BSET_LUN_RESET_ENABLED:
3544 			values->sdt_lun_reset_enable = data_list[i];
3545 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3546 			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
3547 			    "\n", values->sdt_lun_reset_enable);
3548 			break;
3549 		}
3550 	}
3551 }
3552 
3553 /*
3554  *    Function: sd_process_sdconf_table
3555  *
3556  * Description: Search the static configuration table for a match on the
3557  *		inquiry vid/pid and update the driver soft state structure
3558  *		according to the table property values for the device.
3559  *
3560  *		The form of a configuration table entry is:
3561  *		  <vid+pid>,<flags>,<property-data>
3562  *		  "SEAGATE ST42400N",1,63,0,0			(Fibre)
3563  *		  "SEAGATE ST42400N",1,63,0,0,0,0		(Sparc)
3564  *		  "SEAGATE ST42400N",1,63,0,0,0,0,0,0,0,0,0,0	(Intel)
3565  *
3566  *   Arguments: un - driver soft state (unit) structure
3567  */
3568 
3569 static void
3570 sd_process_sdconf_table(struct sd_lun *un)
3571 {
3572 	char	*id = NULL;
3573 	int	table_index;
3574 	int	idlen;
3575 
3576 	ASSERT(un != NULL);
3577 	for (table_index = 0; table_index < sd_disk_table_size;
3578 	    table_index++) {
3579 		id = sd_disk_table[table_index].device_id;
3580 		idlen = strlen(id);
3581 		if (idlen == 0) {
3582 			continue;
3583 		}
3584 
3585 		/*
3586 		 * The static configuration table currently does not
3587 		 * implement version 10 properties. Additionally,
3588 		 * multiple data-property-name entries are not
3589 		 * implemented in the static configuration table.
3590 		 */
3591 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
3592 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3593 			    "sd_process_sdconf_table: disk %s\n", id);
3594 			sd_set_vers1_properties(un,
3595 			    sd_disk_table[table_index].flags,
3596 			    sd_disk_table[table_index].properties);
3597 			break;
3598 		}
3599 	}
3600 }
3601 
3602 
3603 /*
3604  *    Function: sd_sdconf_id_match
3605  *
3606  * Description: This local function implements a case sensitive vid/pid
3607  *		comparison as well as the boundary cases of wild card and
3608  *		multiple blanks.
3609  *
3610  *		Note: An implicit assumption made here is that the scsi
3611  *		inquiry structure will always keep the vid, pid and
3612  *		revision strings in consecutive sequence, so they can be
3613  *		read as a single string. If this assumption is not the
3614  *		case, a separate string, to be used for the check, needs
3615  *		to be built with these strings concatenated.
3616  *
3617  *   Arguments: un - driver soft state (unit) structure
3618  *		id - table or config file vid/pid
3619  *		idlen  - length of the vid/pid (bytes)
3620  *
3621  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3622  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3623  */
3624 
3625 static int
3626 sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
3627 {
3628 	struct scsi_inquiry	*sd_inq;
3629 	int 			rval = SD_SUCCESS;
3630 
3631 	ASSERT(un != NULL);
3632 	sd_inq = un->un_sd->sd_inq;
3633 	ASSERT(id != NULL);
3634 
3635 	/*
3636 	 * We use the inq_vid as a pointer to a buffer containing the
3637 	 * vid and pid and use the entire vid/pid length of the table
3638 	 * entry for the comparison. This works because the inq_pid
3639 	 * data member follows inq_vid in the scsi_inquiry structure.
3640 	 */
3641 	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
3642 		/*
3643 		 * The user id string is compared to the inquiry vid/pid
3644 		 * using a case insensitive comparison and ignoring
3645 		 * multiple spaces.
3646 		 */
3647 		rval = sd_blank_cmp(un, id, idlen);
3648 		if (rval != SD_SUCCESS) {
3649 			/*
3650 			 * User id strings that start and end with a "*"
3651 			 * are a special case. These do not have a
3652 			 * specific vendor, and the product string can
3653 			 * appear anywhere in the 16 byte PID portion of
3654 			 * the inquiry data. This is a simple strstr()
3655 			 * type search for the user id in the inquiry data.
3656 			 */
3657 			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
3658 				char	*pidptr = &id[1];
3659 				int	i;
3660 				int	j;
3661 				int	pidstrlen = idlen - 2;
3662 				j = sizeof (SD_INQUIRY(un)->inq_pid) -
3663 				    pidstrlen;
3664 
3665 				if (j < 0) {
3666 					return (SD_FAILURE);
3667 				}
3668 				for (i = 0; i < j; i++) {
3669 					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
3670 					    pidptr, pidstrlen) == 0) {
3671 						rval = SD_SUCCESS;
3672 						break;
3673 					}
3674 				}
3675 			}
3676 		}
3677 	}
3678 	return (rval);
3679 }
3680 
3681 
3682 /*
3683  *    Function: sd_blank_cmp
3684  *
3685  * Description: If the id string starts and ends with a space, treat
3686  *		multiple consecutive spaces as equivalent to a single
3687  *		space. For example, this causes a sd_disk_table entry
3688  *		of " NEC CDROM " to match a device's id string of
3689  *		"NEC       CDROM".
3690  *
3691  *		Note: The success exit condition for this routine is if
3692  *		the pointer to the table entry is '\0' and the cnt of
3693  *		the inquiry length is zero. This will happen if the inquiry
3694  *		string returned by the device is padded with spaces to be
3695  *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
3696  *		SCSI spec states that the inquiry string is to be padded with
3697  *		spaces.
3698  *
3699  *   Arguments: un - driver soft state (unit) structure
3700  *		id - table or config file vid/pid
3701  *		idlen  - length of the vid/pid (bytes)
3702  *
3703  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3704  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3705  */
3706 
3707 static int
3708 sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
3709 {
3710 	char		*p1;
3711 	char		*p2;
3712 	int		cnt;
3713 	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
3714 	    sizeof (SD_INQUIRY(un)->inq_pid);
3715 
3716 	ASSERT(un != NULL);
3717 	p2 = un->un_sd->sd_inq->inq_vid;
3718 	ASSERT(id != NULL);
3719 	p1 = id;
3720 
3721 	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
3722 		/*
3723 		 * Note: string p1 is terminated by a NUL but string p2
3724 		 * isn't.  The end of p2 is determined by cnt.
3725 		 */
3726 		for (;;) {
3727 			/* skip over any extra blanks in both strings */
3728 			while ((*p1 != '\0') && (*p1 == ' ')) {
3729 				p1++;
3730 			}
3731 			while ((cnt != 0) && (*p2 == ' ')) {
3732 				p2++;
3733 				cnt--;
3734 			}
3735 
3736 			/* compare the two strings */
3737 			if ((cnt == 0) ||
3738 			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
3739 				break;
3740 			}
3741 			while ((cnt > 0) &&
3742 			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
3743 				p1++;
3744 				p2++;
3745 				cnt--;
3746 			}
3747 		}
3748 	}
3749 
3750 	/* return SD_SUCCESS if both strings match */
3751 	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
3752 }
3753 
3754 
3755 /*
3756  *    Function: sd_chk_vers1_data
3757  *
3758  * Description: Verify the version 1 device properties provided by the
3759  *		user via the configuration file
3760  *
3761  *   Arguments: un	     - driver soft state (unit) structure
3762  *		flags	     - integer mask indicating properties to be set
3763  *		prop_list    - integer list of property values
3764  *		list_len     - length of user provided data
3765  *
3766  * Return Code: SD_SUCCESS - Indicates the user provided data is valid
3767  *		SD_FAILURE - Indicates the user provided data is invalid
3768  */
3769 
3770 static int
3771 sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
3772     int list_len, char *dataname_ptr)
3773 {
3774 	int i;
3775 	int mask = 1;
3776 	int index = 0;
3777 
3778 	ASSERT(un != NULL);
3779 
3780 	/* Check for a NULL property name and list */
3781 	if (dataname_ptr == NULL) {
3782 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3783 		    "sd_chk_vers1_data: NULL data property name.");
3784 		return (SD_FAILURE);
3785 	}
3786 	if (prop_list == NULL) {
3787 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3788 		    "sd_chk_vers1_data: %s NULL data property list.",
3789 		    dataname_ptr);
3790 		return (SD_FAILURE);
3791 	}
3792 
3793 	/* Display a warning if undefined bits are set in the flags */
3794 	if (flags & ~SD_CONF_BIT_MASK) {
3795 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3796 		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
3797 		    "Properties not set.",
3798 		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
3799 		return (SD_FAILURE);
3800 	}
3801 
3802 	/*
3803 	 * Verify the length of the list by identifying the highest bit set
3804 	 * in the flags and validating that the property list has a length
3805 	 * up to the index of this bit.
3806 	 */
3807 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3808 		if (flags & mask) {
3809 			index++;
3810 		}
3811 		mask = 1 << i;
3812 	}
3813 	if ((list_len / sizeof (int)) < (index + 2)) {
3814 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3815 		    "sd_chk_vers1_data: "
3816 		    "Data property list %s size is incorrect. "
3817 		    "Properties not set.", dataname_ptr);
3818 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
3819 		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
3820 		return (SD_FAILURE);
3821 	}
3822 	return (SD_SUCCESS);
3823 }
3824 
3825 
3826 /*
3827  *    Function: sd_set_vers1_properties
3828  *
3829  * Description: Set version 1 device properties based on a property list
3830  *		retrieved from the driver configuration file or static
3831  *		configuration table. Version 1 properties have the format:
3832  *
3833  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3834  *
3835  *		where the prop0 value will be used to set prop0 if bit0
3836  *		is set in the flags
3837  *
3838  *   Arguments: un	     - driver soft state (unit) structure
3839  *		flags	     - integer mask indicating properties to be set
3840  *		prop_list    - integer list of property values
3841  */
3842 
3843 static void
3844 sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
3845 {
3846 	ASSERT(un != NULL);
3847 
3848 	/*
3849 	 * Set the flag to indicate cache is to be disabled. An attempt
3850 	 * to disable the cache via sd_cache_control() will be made
3851 	 * later during attach once the basic initialization is complete.
3852 	 */
3853 	if (flags & SD_CONF_BSET_NOCACHE) {
3854 		un->un_f_opt_disable_cache = TRUE;
3855 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3856 		    "sd_set_vers1_properties: caching disabled flag set\n");
3857 	}
3858 
3859 	/* CD-specific configuration parameters */
3860 	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
3861 		un->un_f_cfg_playmsf_bcd = TRUE;
3862 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3863 		    "sd_set_vers1_properties: playmsf_bcd set\n");
3864 	}
3865 	if (flags & SD_CONF_BSET_READSUB_BCD) {
3866 		un->un_f_cfg_readsub_bcd = TRUE;
3867 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3868 		    "sd_set_vers1_properties: readsub_bcd set\n");
3869 	}
3870 	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
3871 		un->un_f_cfg_read_toc_trk_bcd = TRUE;
3872 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3873 		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
3874 	}
3875 	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
3876 		un->un_f_cfg_read_toc_addr_bcd = TRUE;
3877 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3878 		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
3879 	}
3880 	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
3881 		un->un_f_cfg_no_read_header = TRUE;
3882 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3883 			    "sd_set_vers1_properties: no_read_header set\n");
3884 	}
3885 	if (flags & SD_CONF_BSET_READ_CD_XD4) {
3886 		un->un_f_cfg_read_cd_xd4 = TRUE;
3887 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3888 		    "sd_set_vers1_properties: read_cd_xd4 set\n");
3889 	}
3890 
3891 	/* Support for devices which do not have valid/unique serial numbers */
3892 	if (flags & SD_CONF_BSET_FAB_DEVID) {
3893 		un->un_f_opt_fab_devid = TRUE;
3894 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3895 		    "sd_set_vers1_properties: fab_devid bit set\n");
3896 	}
3897 
3898 	/* Support for user throttle configuration */
3899 	if (flags & SD_CONF_BSET_THROTTLE) {
3900 		ASSERT(prop_list != NULL);
3901 		un->un_saved_throttle = un->un_throttle =
3902 		    prop_list->sdt_throttle;
3903 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3904 		    "sd_set_vers1_properties: throttle set to %d\n",
3905 		    prop_list->sdt_throttle);
3906 	}
3907 
3908 	/* Set the per disk retry count according to the conf file or table. */
3909 	if (flags & SD_CONF_BSET_NRR_COUNT) {
3910 		ASSERT(prop_list != NULL);
3911 		if (prop_list->sdt_not_rdy_retries) {
3912 			un->un_notready_retry_count =
3913 				prop_list->sdt_not_rdy_retries;
3914 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3915 			    "sd_set_vers1_properties: not ready retry count"
3916 			    " set to %d\n", un->un_notready_retry_count);
3917 		}
3918 	}
3919 
3920 	/* The controller type is reported for generic disk driver ioctls */
3921 	if (flags & SD_CONF_BSET_CTYPE) {
3922 		ASSERT(prop_list != NULL);
3923 		switch (prop_list->sdt_ctype) {
3924 		case CTYPE_CDROM:
3925 			un->un_ctype = prop_list->sdt_ctype;
3926 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3927 			    "sd_set_vers1_properties: ctype set to "
3928 			    "CTYPE_CDROM\n");
3929 			break;
3930 		case CTYPE_CCS:
3931 			un->un_ctype = prop_list->sdt_ctype;
3932 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3933 				"sd_set_vers1_properties: ctype set to "
3934 				"CTYPE_CCS\n");
3935 			break;
3936 		case CTYPE_ROD:		/* RW optical */
3937 			un->un_ctype = prop_list->sdt_ctype;
3938 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3939 			    "sd_set_vers1_properties: ctype set to "
3940 			    "CTYPE_ROD\n");
3941 			break;
3942 		default:
3943 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3944 			    "sd_set_vers1_properties: Could not set "
3945 			    "invalid ctype value (%d)",
3946 			    prop_list->sdt_ctype);
3947 		}
3948 	}
3949 
3950 	/* Purple failover timeout */
3951 	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
3952 		ASSERT(prop_list != NULL);
3953 		un->un_busy_retry_count =
3954 			prop_list->sdt_busy_retries;
3955 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3956 		    "sd_set_vers1_properties: "
3957 		    "busy retry count set to %d\n",
3958 		    un->un_busy_retry_count);
3959 	}
3960 
3961 	/* Purple reset retry count */
3962 	if (flags & SD_CONF_BSET_RST_RETRIES) {
3963 		ASSERT(prop_list != NULL);
3964 		un->un_reset_retry_count =
3965 			prop_list->sdt_reset_retries;
3966 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3967 		    "sd_set_vers1_properties: "
3968 		    "reset retry count set to %d\n",
3969 		    un->un_reset_retry_count);
3970 	}
3971 
3972 	/* Purple reservation release timeout */
3973 	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
3974 		ASSERT(prop_list != NULL);
3975 		un->un_reserve_release_time =
3976 			prop_list->sdt_reserv_rel_time;
3977 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3978 		    "sd_set_vers1_properties: "
3979 		    "reservation release timeout set to %d\n",
3980 		    un->un_reserve_release_time);
3981 	}
3982 
3983 	/*
3984 	 * Driver flag telling the driver to verify that no commands are pending
3985 	 * for a device before issuing a Test Unit Ready. This is a workaround
3986 	 * for a firmware bug in some Seagate eliteI drives.
3987 	 */
3988 	if (flags & SD_CONF_BSET_TUR_CHECK) {
3989 		un->un_f_cfg_tur_check = TRUE;
3990 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3991 		    "sd_set_vers1_properties: tur queue check set\n");
3992 	}
3993 
3994 	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
3995 		un->un_min_throttle = prop_list->sdt_min_throttle;
3996 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3997 		    "sd_set_vers1_properties: min throttle set to %d\n",
3998 		    un->un_min_throttle);
3999 	}
4000 
4001 	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
4002 		un->un_f_disksort_disabled =
4003 		    (prop_list->sdt_disk_sort_dis != 0) ?
4004 		    TRUE : FALSE;
4005 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4006 		    "sd_set_vers1_properties: disksort disabled "
4007 		    "flag set to %d\n",
4008 		    prop_list->sdt_disk_sort_dis);
4009 	}
4010 
4011 	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
4012 		un->un_f_lun_reset_enabled =
4013 		    (prop_list->sdt_lun_reset_enable != 0) ?
4014 		    TRUE : FALSE;
4015 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4016 		    "sd_set_vers1_properties: lun reset enabled "
4017 		    "flag set to %d\n",
4018 		    prop_list->sdt_lun_reset_enable);
4019 	}
4020 
4021 	/*
4022 	 * Validate the throttle values.
4023 	 * If any of the numbers are invalid, set everything to defaults.
4024 	 */
4025 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4026 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4027 	    (un->un_min_throttle > un->un_throttle)) {
4028 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4029 		un->un_min_throttle = sd_min_throttle;
4030 	}
4031 }
4032 
4033 /*
4034  *   Function: sd_is_lsi()
4035  *
4036  *   Description: Check for lsi devices, step throught the static device
4037  *	table to match vid/pid.
4038  *
4039  *   Args: un - ptr to sd_lun
4040  *
4041  *   Notes:  When creating new LSI property, need to add the new LSI property
4042  *		to this function.
4043  */
4044 static void
4045 sd_is_lsi(struct sd_lun *un)
4046 {
4047 	char	*id = NULL;
4048 	int	table_index;
4049 	int	idlen;
4050 	void	*prop;
4051 
4052 	ASSERT(un != NULL);
4053 	for (table_index = 0; table_index < sd_disk_table_size;
4054 	    table_index++) {
4055 		id = sd_disk_table[table_index].device_id;
4056 		idlen = strlen(id);
4057 		if (idlen == 0) {
4058 			continue;
4059 		}
4060 
4061 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4062 			prop = sd_disk_table[table_index].properties;
4063 			if (prop == &lsi_properties ||
4064 			    prop == &lsi_oem_properties ||
4065 			    prop == &lsi_properties_scsi ||
4066 			    prop == &symbios_properties) {
4067 				un->un_f_cfg_is_lsi = TRUE;
4068 			}
4069 			break;
4070 		}
4071 	}
4072 }
4073 
4074 
4075 /*
4076  * The following routines support reading and interpretation of disk labels,
4077  * including Solaris BE (8-slice) vtoc's, Solaris LE (16-slice) vtoc's, and
4078  * fdisk tables.
4079  */
4080 
4081 /*
4082  *    Function: sd_validate_geometry
4083  *
4084  * Description: Read the label from the disk (if present). Update the unit's
4085  *		geometry and vtoc information from the data in the label.
4086  *		Verify that the label is valid.
4087  *
4088  *   Arguments: un - driver soft state (unit) structure
4089  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4090  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4091  *			to use the USCSI "direct" chain and bypass the normal
4092  *			command waitq.
4093  *
4094  * Return Code: 0 - Successful completion
4095  *		EINVAL  - Invalid value in un->un_tgt_blocksize or
4096  *			  un->un_blockcount; or label on disk is corrupted
4097  *			  or unreadable.
4098  *		EACCES  - Reservation conflict at the device.
4099  *		ENOMEM  - Resource allocation error
4100  *		ENOTSUP - geometry not applicable
4101  *
4102  *     Context: Kernel thread only (can sleep).
4103  */
4104 
4105 static int
4106 sd_validate_geometry(struct sd_lun *un, int path_flag)
4107 {
4108 	static	char		labelstring[128];
4109 	static	char		buf[256];
4110 	char	*label		= NULL;
4111 	int	label_error	= 0;
4112 	int	gvalid		= un->un_f_geometry_is_valid;
4113 	int	lbasize;
4114 	uint_t	capacity;
4115 	int	count;
4116 
4117 	ASSERT(un != NULL);
4118 	ASSERT(mutex_owned(SD_MUTEX(un)));
4119 
4120 	/*
4121 	 * If the required values are not valid, then try getting them
4122 	 * once via read capacity. If that fails, then fail this call.
4123 	 * This is necessary with the new mpxio failover behavior in
4124 	 * the T300 where we can get an attach for the inactive path
4125 	 * before the active path. The inactive path fails commands with
4126 	 * sense data of 02,04,88 which happens to the read capacity
4127 	 * before mpxio has had sufficient knowledge to know if it should
4128 	 * force a fail over or not. (Which it won't do at attach anyhow).
4129 	 * If the read capacity at attach time fails, un_tgt_blocksize and
4130 	 * un_blockcount won't be valid.
4131 	 */
4132 	if ((un->un_f_tgt_blocksize_is_valid != TRUE) ||
4133 	    (un->un_f_blockcount_is_valid != TRUE)) {
4134 		uint64_t	cap;
4135 		uint32_t	lbasz;
4136 		int		rval;
4137 
4138 		mutex_exit(SD_MUTEX(un));
4139 		rval = sd_send_scsi_READ_CAPACITY(un, &cap,
4140 		    &lbasz, SD_PATH_DIRECT);
4141 		mutex_enter(SD_MUTEX(un));
4142 		if (rval == 0) {
4143 			/*
4144 			 * The following relies on
4145 			 * sd_send_scsi_READ_CAPACITY never
4146 			 * returning 0 for capacity and/or lbasize.
4147 			 */
4148 			sd_update_block_info(un, lbasz, cap);
4149 		}
4150 
4151 		if ((un->un_f_tgt_blocksize_is_valid != TRUE) ||
4152 		    (un->un_f_blockcount_is_valid != TRUE)) {
4153 			return (EINVAL);
4154 		}
4155 	}
4156 
4157 	/*
4158 	 * Copy the lbasize and capacity so that if they're reset while we're
4159 	 * not holding the SD_MUTEX, we will continue to use valid values
4160 	 * after the SD_MUTEX is reacquired. (4119659)
4161 	 */
4162 	lbasize  = un->un_tgt_blocksize;
4163 	capacity = un->un_blockcount;
4164 
4165 #if defined(_SUNOS_VTOC_16)
4166 	/*
4167 	 * Set up the "whole disk" fdisk partition; this should always
4168 	 * exist, regardless of whether the disk contains an fdisk table
4169 	 * or vtoc.
4170 	 */
4171 	un->un_map[P0_RAW_DISK].dkl_cylno = 0;
4172 	un->un_map[P0_RAW_DISK].dkl_nblk  = capacity;
4173 #endif
4174 
4175 	/*
4176 	 * Refresh the logical and physical geometry caches.
4177 	 * (data from MODE SENSE format/rigid disk geometry pages,
4178 	 * and scsi_ifgetcap("geometry").
4179 	 */
4180 	sd_resync_geom_caches(un, capacity, lbasize, path_flag);
4181 
4182 	label_error = sd_use_efi(un, path_flag);
4183 	if (label_error == 0) {
4184 		/* found a valid EFI label */
4185 		SD_TRACE(SD_LOG_IO_PARTITION, un,
4186 			"sd_validate_geometry: found EFI label\n");
4187 		un->un_solaris_offset = 0;
4188 		un->un_solaris_size = capacity;
4189 		return (ENOTSUP);
4190 	}
4191 	if (un->un_blockcount > DK_MAX_BLOCKS) {
4192 		if (label_error == ESRCH) {
4193 			/*
4194 			 * they've configured a LUN over 1TB, but used
4195 			 * format.dat to restrict format's view of the
4196 			 * capacity to be under 1TB
4197 			 */
4198 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4199 "is >1TB and has a VTOC label: use format(1M) to either decrease the");
4200 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
4201 "size to be < 1TB or relabel the disk with an EFI label");
4202 		} else {
4203 			/* unlabeled disk over 1TB */
4204 			return (ENOTSUP);
4205 		}
4206 	}
4207 	label_error = 0;
4208 
4209 	/*
4210 	 * at this point it is either labeled with a VTOC or it is
4211 	 * under 1TB
4212 	 */
4213 	if (un->un_f_vtoc_label_supported) {
4214 		struct	dk_label *dkl;
4215 		offset_t dkl1;
4216 		offset_t label_addr, real_addr;
4217 		int	rval;
4218 		size_t	buffer_size;
4219 
4220 		/*
4221 		 * Note: This will set up un->un_solaris_size and
4222 		 * un->un_solaris_offset.
4223 		 */
4224 		switch (sd_read_fdisk(un, capacity, lbasize, path_flag)) {
4225 		case SD_CMD_RESERVATION_CONFLICT:
4226 			ASSERT(mutex_owned(SD_MUTEX(un)));
4227 			return (EACCES);
4228 		case SD_CMD_FAILURE:
4229 			ASSERT(mutex_owned(SD_MUTEX(un)));
4230 			return (ENOMEM);
4231 		}
4232 
4233 		if (un->un_solaris_size <= DK_LABEL_LOC) {
4234 			/*
4235 			 * Found fdisk table but no Solaris partition entry,
4236 			 * so don't call sd_uselabel() and don't create
4237 			 * a default label.
4238 			 */
4239 			label_error = 0;
4240 			un->un_f_geometry_is_valid = TRUE;
4241 			goto no_solaris_partition;
4242 		}
4243 		label_addr = (daddr_t)(un->un_solaris_offset + DK_LABEL_LOC);
4244 
4245 		/*
4246 		 * sys_blocksize != tgt_blocksize, need to re-adjust
4247 		 * blkno and save the index to beginning of dk_label
4248 		 */
4249 		real_addr = SD_SYS2TGTBLOCK(un, label_addr);
4250 		buffer_size = SD_REQBYTES2TGTBYTES(un,
4251 		    sizeof (struct dk_label));
4252 
4253 		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_validate_geometry: "
4254 		    "label_addr: 0x%x allocation size: 0x%x\n",
4255 		    label_addr, buffer_size);
4256 		dkl = kmem_zalloc(buffer_size, KM_NOSLEEP);
4257 		if (dkl == NULL) {
4258 			return (ENOMEM);
4259 		}
4260 
4261 		mutex_exit(SD_MUTEX(un));
4262 		rval = sd_send_scsi_READ(un, dkl, buffer_size, real_addr,
4263 		    path_flag);
4264 		mutex_enter(SD_MUTEX(un));
4265 
4266 		switch (rval) {
4267 		case 0:
4268 			/*
4269 			 * sd_uselabel will establish that the geometry
4270 			 * is valid.
4271 			 * For sys_blocksize != tgt_blocksize, need
4272 			 * to index into the beginning of dk_label
4273 			 */
4274 			dkl1 = (daddr_t)dkl
4275 				+ SD_TGTBYTEOFFSET(un, label_addr, real_addr);
4276 			if (sd_uselabel(un, (struct dk_label *)(uintptr_t)dkl1,
4277 			    path_flag) != SD_LABEL_IS_VALID) {
4278 				label_error = EINVAL;
4279 			}
4280 			break;
4281 		case EACCES:
4282 			label_error = EACCES;
4283 			break;
4284 		default:
4285 			label_error = EINVAL;
4286 			break;
4287 		}
4288 
4289 		kmem_free(dkl, buffer_size);
4290 
4291 #if defined(_SUNOS_VTOC_8)
4292 		label = (char *)un->un_asciilabel;
4293 #elif defined(_SUNOS_VTOC_16)
4294 		label = (char *)un->un_vtoc.v_asciilabel;
4295 #else
4296 #error "No VTOC format defined."
4297 #endif
4298 	}
4299 
4300 	/*
4301 	 * If a valid label was not found, AND if no reservation conflict
4302 	 * was detected, then go ahead and create a default label (4069506).
4303 	 */
4304 
4305 	if (un->un_f_default_vtoc_supported && (label_error != EACCES)) {
4306 		if (un->un_f_geometry_is_valid == FALSE) {
4307 			sd_build_default_label(un);
4308 		}
4309 		label_error = 0;
4310 	}
4311 
4312 no_solaris_partition:
4313 	if ((!un->un_f_has_removable_media ||
4314 	    (un->un_f_has_removable_media &&
4315 		un->un_mediastate == DKIO_EJECTED)) &&
4316 		(un->un_state == SD_STATE_NORMAL && !gvalid)) {
4317 		/*
4318 		 * Print out a message indicating who and what we are.
4319 		 * We do this only when we happen to really validate the
4320 		 * geometry. We may call sd_validate_geometry() at other
4321 		 * times, e.g., ioctl()'s like Get VTOC in which case we
4322 		 * don't want to print the label.
4323 		 * If the geometry is valid, print the label string,
4324 		 * else print vendor and product info, if available
4325 		 */
4326 		if ((un->un_f_geometry_is_valid == TRUE) && (label != NULL)) {
4327 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "?<%s>\n", label);
4328 		} else {
4329 			mutex_enter(&sd_label_mutex);
4330 			sd_inq_fill(SD_INQUIRY(un)->inq_vid, VIDMAX,
4331 			    labelstring);
4332 			sd_inq_fill(SD_INQUIRY(un)->inq_pid, PIDMAX,
4333 			    &labelstring[64]);
4334 			(void) sprintf(buf, "?Vendor '%s', product '%s'",
4335 			    labelstring, &labelstring[64]);
4336 			if (un->un_f_blockcount_is_valid == TRUE) {
4337 				(void) sprintf(&buf[strlen(buf)],
4338 				    ", %llu %u byte blocks\n",
4339 				    (longlong_t)un->un_blockcount,
4340 				    un->un_tgt_blocksize);
4341 			} else {
4342 				(void) sprintf(&buf[strlen(buf)],
4343 				    ", (unknown capacity)\n");
4344 			}
4345 			SD_INFO(SD_LOG_ATTACH_DETACH, un, buf);
4346 			mutex_exit(&sd_label_mutex);
4347 		}
4348 	}
4349 
4350 #if defined(_SUNOS_VTOC_16)
4351 	/*
4352 	 * If we have valid geometry, set up the remaining fdisk partitions.
4353 	 * Note that dkl_cylno is not used for the fdisk map entries, so
4354 	 * we set it to an entirely bogus value.
4355 	 */
4356 	for (count = 0; count < FD_NUMPART; count++) {
4357 		un->un_map[FDISK_P1 + count].dkl_cylno = -1;
4358 		un->un_map[FDISK_P1 + count].dkl_nblk =
4359 		    un->un_fmap[count].fmap_nblk;
4360 
4361 		un->un_offset[FDISK_P1 + count] =
4362 		    un->un_fmap[count].fmap_start;
4363 	}
4364 #endif
4365 
4366 	for (count = 0; count < NDKMAP; count++) {
4367 #if defined(_SUNOS_VTOC_8)
4368 		struct dk_map *lp  = &un->un_map[count];
4369 		un->un_offset[count] =
4370 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
4371 #elif defined(_SUNOS_VTOC_16)
4372 		struct dkl_partition *vp = &un->un_vtoc.v_part[count];
4373 
4374 		un->un_offset[count] = vp->p_start + un->un_solaris_offset;
4375 #else
4376 #error "No VTOC format defined."
4377 #endif
4378 	}
4379 
4380 	return (label_error);
4381 }
4382 
4383 
4384 #if defined(_SUNOS_VTOC_16)
4385 /*
4386  * Macro: MAX_BLKS
4387  *
4388  *	This macro is used for table entries where we need to have the largest
4389  *	possible sector value for that head & SPT (sectors per track)
4390  *	combination.  Other entries for some smaller disk sizes are set by
4391  *	convention to match those used by X86 BIOS usage.
4392  */
4393 #define	MAX_BLKS(heads, spt)	UINT16_MAX * heads * spt, heads, spt
4394 
4395 /*
4396  *    Function: sd_convert_geometry
4397  *
4398  * Description: Convert physical geometry into a dk_geom structure. In
4399  *		other words, make sure we don't wrap 16-bit values.
4400  *		e.g. converting from geom_cache to dk_geom
4401  *
4402  *     Context: Kernel thread only
4403  */
4404 static void
4405 sd_convert_geometry(uint64_t capacity, struct dk_geom *un_g)
4406 {
4407 	int i;
4408 	static const struct chs_values {
4409 		uint_t max_cap;		/* Max Capacity for this HS. */
4410 		uint_t nhead;		/* Heads to use. */
4411 		uint_t nsect;		/* SPT to use. */
4412 	} CHS_values[] = {
4413 		{0x00200000,  64, 32},		/* 1GB or smaller disk. */
4414 		{0x01000000, 128, 32},		/* 8GB or smaller disk. */
4415 		{MAX_BLKS(255,  63)},		/* 502.02GB or smaller disk. */
4416 		{MAX_BLKS(255, 126)},		/* .98TB or smaller disk. */
4417 		{DK_MAX_BLOCKS, 255, 189}	/* Max size is just under 1TB */
4418 	};
4419 
4420 	/* Unlabeled SCSI floppy device */
4421 	if (capacity <= 0x1000) {
4422 		un_g->dkg_nhead = 2;
4423 		un_g->dkg_ncyl = 80;
4424 		un_g->dkg_nsect = capacity / (un_g->dkg_nhead * un_g->dkg_ncyl);
4425 		return;
4426 	}
4427 
4428 	/*
4429 	 * For all devices we calculate cylinders using the
4430 	 * heads and sectors we assign based on capacity of the
4431 	 * device.  The table is designed to be compatible with the
4432 	 * way other operating systems lay out fdisk tables for X86
4433 	 * and to insure that the cylinders never exceed 65535 to
4434 	 * prevent problems with X86 ioctls that report geometry.
4435 	 * We use SPT that are multiples of 63, since other OSes that
4436 	 * are not limited to 16-bits for cylinders stop at 63 SPT
4437 	 * we make do by using multiples of 63 SPT.
4438 	 *
4439 	 * Note than capacities greater than or equal to 1TB will simply
4440 	 * get the largest geometry from the table. This should be okay
4441 	 * since disks this large shouldn't be using CHS values anyway.
4442 	 */
4443 	for (i = 0; CHS_values[i].max_cap < capacity &&
4444 	    CHS_values[i].max_cap != DK_MAX_BLOCKS; i++)
4445 		;
4446 
4447 	un_g->dkg_nhead = CHS_values[i].nhead;
4448 	un_g->dkg_nsect = CHS_values[i].nsect;
4449 }
4450 #endif
4451 
4452 
4453 /*
4454  *    Function: sd_resync_geom_caches
4455  *
4456  * Description: (Re)initialize both geometry caches: the virtual geometry
4457  *		information is extracted from the HBA (the "geometry"
4458  *		capability), and the physical geometry cache data is
4459  *		generated by issuing MODE SENSE commands.
4460  *
4461  *   Arguments: un - driver soft state (unit) structure
4462  *		capacity - disk capacity in #blocks
4463  *		lbasize - disk block size in bytes
4464  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4465  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4466  *			to use the USCSI "direct" chain and bypass the normal
4467  *			command waitq.
4468  *
4469  *     Context: Kernel thread only (can sleep).
4470  */
4471 
4472 static void
4473 sd_resync_geom_caches(struct sd_lun *un, int capacity, int lbasize,
4474 	int path_flag)
4475 {
4476 	struct 	geom_cache 	pgeom;
4477 	struct 	geom_cache	*pgeom_p = &pgeom;
4478 	int 	spc;
4479 	unsigned short nhead;
4480 	unsigned short nsect;
4481 
4482 	ASSERT(un != NULL);
4483 	ASSERT(mutex_owned(SD_MUTEX(un)));
4484 
4485 	/*
4486 	 * Ask the controller for its logical geometry.
4487 	 * Note: if the HBA does not support scsi_ifgetcap("geometry"),
4488 	 * then the lgeom cache will be invalid.
4489 	 */
4490 	sd_get_virtual_geometry(un, capacity, lbasize);
4491 
4492 	/*
4493 	 * Initialize the pgeom cache from lgeom, so that if MODE SENSE
4494 	 * doesn't work, DKIOCG_PHYSGEOM can return reasonable values.
4495 	 */
4496 	if (un->un_lgeom.g_nsect == 0 || un->un_lgeom.g_nhead == 0) {
4497 		/*
4498 		 * Note: Perhaps this needs to be more adaptive? The rationale
4499 		 * is that, if there's no HBA geometry from the HBA driver, any
4500 		 * guess is good, since this is the physical geometry. If MODE
4501 		 * SENSE fails this gives a max cylinder size for non-LBA access
4502 		 */
4503 		nhead = 255;
4504 		nsect = 63;
4505 	} else {
4506 		nhead = un->un_lgeom.g_nhead;
4507 		nsect = un->un_lgeom.g_nsect;
4508 	}
4509 
4510 	if (ISCD(un)) {
4511 		pgeom_p->g_nhead = 1;
4512 		pgeom_p->g_nsect = nsect * nhead;
4513 	} else {
4514 		pgeom_p->g_nhead = nhead;
4515 		pgeom_p->g_nsect = nsect;
4516 	}
4517 
4518 	spc = pgeom_p->g_nhead * pgeom_p->g_nsect;
4519 	pgeom_p->g_capacity = capacity;
4520 	pgeom_p->g_ncyl = pgeom_p->g_capacity / spc;
4521 	pgeom_p->g_acyl = 0;
4522 
4523 	/*
4524 	 * Retrieve fresh geometry data from the hardware, stash it
4525 	 * here temporarily before we rebuild the incore label.
4526 	 *
4527 	 * We want to use the MODE SENSE commands to derive the
4528 	 * physical geometry of the device, but if either command
4529 	 * fails, the logical geometry is used as the fallback for
4530 	 * disk label geometry.
4531 	 */
4532 	mutex_exit(SD_MUTEX(un));
4533 	sd_get_physical_geometry(un, pgeom_p, capacity, lbasize, path_flag);
4534 	mutex_enter(SD_MUTEX(un));
4535 
4536 	/*
4537 	 * Now update the real copy while holding the mutex. This
4538 	 * way the global copy is never in an inconsistent state.
4539 	 */
4540 	bcopy(pgeom_p, &un->un_pgeom,  sizeof (un->un_pgeom));
4541 
4542 	SD_INFO(SD_LOG_COMMON, un, "sd_resync_geom_caches: "
4543 	    "(cached from lgeom)\n");
4544 	SD_INFO(SD_LOG_COMMON, un,
4545 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
4546 	    un->un_pgeom.g_ncyl, un->un_pgeom.g_acyl,
4547 	    un->un_pgeom.g_nhead, un->un_pgeom.g_nsect);
4548 	SD_INFO(SD_LOG_COMMON, un, "   lbasize: %d; capacity: %ld; "
4549 	    "intrlv: %d; rpm: %d\n", un->un_pgeom.g_secsize,
4550 	    un->un_pgeom.g_capacity, un->un_pgeom.g_intrlv,
4551 	    un->un_pgeom.g_rpm);
4552 }
4553 
4554 
4555 /*
4556  *    Function: sd_read_fdisk
4557  *
4558  * Description: utility routine to read the fdisk table.
4559  *
4560  *   Arguments: un - driver soft state (unit) structure
4561  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4562  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4563  *			to use the USCSI "direct" chain and bypass the normal
4564  *			command waitq.
4565  *
4566  * Return Code: SD_CMD_SUCCESS
4567  *		SD_CMD_FAILURE
4568  *
4569  *     Context: Kernel thread only (can sleep).
4570  */
4571 /* ARGSUSED */
4572 static int
4573 sd_read_fdisk(struct sd_lun *un, uint_t capacity, int lbasize, int path_flag)
4574 {
4575 #if defined(_NO_FDISK_PRESENT)
4576 
4577 	un->un_solaris_offset = 0;
4578 	un->un_solaris_size = capacity;
4579 	bzero(un->un_fmap, sizeof (struct fmap) * FD_NUMPART);
4580 	return (SD_CMD_SUCCESS);
4581 
4582 #elif defined(_FIRMWARE_NEEDS_FDISK)
4583 
4584 	struct ipart	*fdp;
4585 	struct mboot	*mbp;
4586 	struct ipart	fdisk[FD_NUMPART];
4587 	int		i;
4588 	char		sigbuf[2];
4589 	caddr_t		bufp;
4590 	int		uidx;
4591 	int		rval;
4592 	int		lba = 0;
4593 	uint_t		solaris_offset;	/* offset to solaris part. */
4594 	daddr_t		solaris_size;	/* size of solaris partition */
4595 	uint32_t	blocksize;
4596 
4597 	ASSERT(un != NULL);
4598 	ASSERT(mutex_owned(SD_MUTEX(un)));
4599 	ASSERT(un->un_f_tgt_blocksize_is_valid == TRUE);
4600 
4601 	blocksize = un->un_tgt_blocksize;
4602 
4603 	/*
4604 	 * Start off assuming no fdisk table
4605 	 */
4606 	solaris_offset = 0;
4607 	solaris_size   = capacity;
4608 
4609 	mutex_exit(SD_MUTEX(un));
4610 	bufp = kmem_zalloc(blocksize, KM_SLEEP);
4611 	rval = sd_send_scsi_READ(un, bufp, blocksize, 0, path_flag);
4612 	mutex_enter(SD_MUTEX(un));
4613 
4614 	if (rval != 0) {
4615 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4616 		    "sd_read_fdisk: fdisk read err\n");
4617 		kmem_free(bufp, blocksize);
4618 		return (SD_CMD_FAILURE);
4619 	}
4620 
4621 	mbp = (struct mboot *)bufp;
4622 
4623 	/*
4624 	 * The fdisk table does not begin on a 4-byte boundary within the
4625 	 * master boot record, so we copy it to an aligned structure to avoid
4626 	 * alignment exceptions on some processors.
4627 	 */
4628 	bcopy(&mbp->parts[0], fdisk, sizeof (fdisk));
4629 
4630 	/*
4631 	 * Check for lba support before verifying sig; sig might not be
4632 	 * there, say on a blank disk, but the max_chs mark may still
4633 	 * be present.
4634 	 *
4635 	 * Note: LBA support and BEFs are an x86-only concept but this
4636 	 * code should work OK on SPARC as well.
4637 	 */
4638 
4639 	/*
4640 	 * First, check for lba-access-ok on root node (or prom root node)
4641 	 * if present there, don't need to search fdisk table.
4642 	 */
4643 	if (ddi_getprop(DDI_DEV_T_ANY, ddi_root_node(), 0,
4644 	    "lba-access-ok", 0) != 0) {
4645 		/* All drives do LBA; don't search fdisk table */
4646 		lba = 1;
4647 	} else {
4648 		/* Okay, look for mark in fdisk table */
4649 		for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) {
4650 			/* accumulate "lba" value from all partitions */
4651 			lba = (lba || sd_has_max_chs_vals(fdp));
4652 		}
4653 	}
4654 
4655 	if (lba != 0) {
4656 		dev_t dev = sd_make_device(SD_DEVINFO(un));
4657 
4658 		if (ddi_getprop(dev, SD_DEVINFO(un), DDI_PROP_DONTPASS,
4659 		    "lba-access-ok", 0) == 0) {
4660 			/* not found; create it */
4661 			if (ddi_prop_create(dev, SD_DEVINFO(un), 0,
4662 			    "lba-access-ok", (caddr_t)NULL, 0) !=
4663 			    DDI_PROP_SUCCESS) {
4664 				SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4665 				    "sd_read_fdisk: Can't create lba property "
4666 				    "for instance %d\n",
4667 				    ddi_get_instance(SD_DEVINFO(un)));
4668 			}
4669 		}
4670 	}
4671 
4672 	bcopy(&mbp->signature, sigbuf, sizeof (sigbuf));
4673 
4674 	/*
4675 	 * Endian-independent signature check
4676 	 */
4677 	if (((sigbuf[1] & 0xFF) != ((MBB_MAGIC >> 8) & 0xFF)) ||
4678 	    (sigbuf[0] != (MBB_MAGIC & 0xFF))) {
4679 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4680 		    "sd_read_fdisk: no fdisk\n");
4681 		bzero(un->un_fmap, sizeof (struct fmap) * FD_NUMPART);
4682 		rval = SD_CMD_SUCCESS;
4683 		goto done;
4684 	}
4685 
4686 #ifdef SDDEBUG
4687 	if (sd_level_mask & SD_LOGMASK_INFO) {
4688 		fdp = fdisk;
4689 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_read_fdisk:\n");
4690 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "         relsect    "
4691 		    "numsect         sysid       bootid\n");
4692 		for (i = 0; i < FD_NUMPART; i++, fdp++) {
4693 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4694 			    "    %d:  %8d   %8d     0x%08x     0x%08x\n",
4695 			    i, fdp->relsect, fdp->numsect,
4696 			    fdp->systid, fdp->bootid);
4697 		}
4698 	}
4699 #endif
4700 
4701 	/*
4702 	 * Try to find the unix partition
4703 	 */
4704 	uidx = -1;
4705 	solaris_offset = 0;
4706 	solaris_size   = 0;
4707 
4708 	for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) {
4709 		int	relsect;
4710 		int	numsect;
4711 
4712 		if (fdp->numsect == 0) {
4713 			un->un_fmap[i].fmap_start = 0;
4714 			un->un_fmap[i].fmap_nblk  = 0;
4715 			continue;
4716 		}
4717 
4718 		/*
4719 		 * Data in the fdisk table is little-endian.
4720 		 */
4721 		relsect = LE_32(fdp->relsect);
4722 		numsect = LE_32(fdp->numsect);
4723 
4724 		un->un_fmap[i].fmap_start = relsect;
4725 		un->un_fmap[i].fmap_nblk  = numsect;
4726 
4727 		if (fdp->systid != SUNIXOS &&
4728 		    fdp->systid != SUNIXOS2 &&
4729 		    fdp->systid != EFI_PMBR) {
4730 			continue;
4731 		}
4732 
4733 		/*
4734 		 * use the last active solaris partition id found
4735 		 * (there should only be 1 active partition id)
4736 		 *
4737 		 * if there are no active solaris partition id
4738 		 * then use the first inactive solaris partition id
4739 		 */
4740 		if ((uidx == -1) || (fdp->bootid == ACTIVE)) {
4741 			uidx = i;
4742 			solaris_offset = relsect;
4743 			solaris_size   = numsect;
4744 		}
4745 	}
4746 
4747 	SD_INFO(SD_LOG_ATTACH_DETACH, un, "fdisk 0x%x 0x%lx",
4748 	    un->un_solaris_offset, un->un_solaris_size);
4749 
4750 	rval = SD_CMD_SUCCESS;
4751 
4752 done:
4753 
4754 	/*
4755 	 * Clear the VTOC info, only if the Solaris partition entry
4756 	 * has moved, changed size, been deleted, or if the size of
4757 	 * the partition is too small to even fit the label sector.
4758 	 */
4759 	if ((un->un_solaris_offset != solaris_offset) ||
4760 	    (un->un_solaris_size != solaris_size) ||
4761 	    solaris_size <= DK_LABEL_LOC) {
4762 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "fdisk moved 0x%x 0x%lx",
4763 			solaris_offset, solaris_size);
4764 		bzero(&un->un_g, sizeof (struct dk_geom));
4765 		bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
4766 		bzero(&un->un_map, NDKMAP * (sizeof (struct dk_map)));
4767 		un->un_f_geometry_is_valid = FALSE;
4768 	}
4769 	un->un_solaris_offset = solaris_offset;
4770 	un->un_solaris_size = solaris_size;
4771 	kmem_free(bufp, blocksize);
4772 	return (rval);
4773 
4774 #else	/* #elif defined(_FIRMWARE_NEEDS_FDISK) */
4775 #error "fdisk table presence undetermined for this platform."
4776 #endif	/* #if defined(_NO_FDISK_PRESENT) */
4777 }
4778 
4779 
4780 /*
4781  *    Function: sd_get_physical_geometry
4782  *
4783  * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4784  *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4785  *		target, and use this information to initialize the physical
4786  *		geometry cache specified by pgeom_p.
4787  *
4788  *		MODE SENSE is an optional command, so failure in this case
4789  *		does not necessarily denote an error. We want to use the
4790  *		MODE SENSE commands to derive the physical geometry of the
4791  *		device, but if either command fails, the logical geometry is
4792  *		used as the fallback for disk label geometry.
4793  *
4794  *		This requires that un->un_blockcount and un->un_tgt_blocksize
4795  *		have already been initialized for the current target and
4796  *		that the current values be passed as args so that we don't
4797  *		end up ever trying to use -1 as a valid value. This could
4798  *		happen if either value is reset while we're not holding
4799  *		the mutex.
4800  *
4801  *   Arguments: un - driver soft state (unit) structure
4802  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4803  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4804  *			to use the USCSI "direct" chain and bypass the normal
4805  *			command waitq.
4806  *
4807  *     Context: Kernel thread only (can sleep).
4808  */
4809 
4810 static void
4811 sd_get_physical_geometry(struct sd_lun *un, struct geom_cache *pgeom_p,
4812 	int capacity, int lbasize, int path_flag)
4813 {
4814 	struct	mode_format	*page3p;
4815 	struct	mode_geometry	*page4p;
4816 	struct	mode_header	*headerp;
4817 	int	sector_size;
4818 	int	nsect;
4819 	int	nhead;
4820 	int	ncyl;
4821 	int	intrlv;
4822 	int	spc;
4823 	int	modesense_capacity;
4824 	int	rpm;
4825 	int	bd_len;
4826 	int	mode_header_length;
4827 	uchar_t	*p3bufp;
4828 	uchar_t	*p4bufp;
4829 	int	cdbsize;
4830 
4831 	ASSERT(un != NULL);
4832 	ASSERT(!(mutex_owned(SD_MUTEX(un))));
4833 
4834 	if (un->un_f_blockcount_is_valid != TRUE) {
4835 		return;
4836 	}
4837 
4838 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
4839 		return;
4840 	}
4841 
4842 	if (lbasize == 0) {
4843 		if (ISCD(un)) {
4844 			lbasize = 2048;
4845 		} else {
4846 			lbasize = un->un_sys_blocksize;
4847 		}
4848 	}
4849 	pgeom_p->g_secsize = (unsigned short)lbasize;
4850 
4851 	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4852 
4853 	/*
4854 	 * Retrieve MODE SENSE page 3 - Format Device Page
4855 	 */
4856 	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4857 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p3bufp,
4858 	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag)
4859 	    != 0) {
4860 		SD_ERROR(SD_LOG_COMMON, un,
4861 		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4862 		goto page3_exit;
4863 	}
4864 
4865 	/*
4866 	 * Determine size of Block Descriptors in order to locate the mode
4867 	 * page data.  ATAPI devices return 0, SCSI devices should return
4868 	 * MODE_BLK_DESC_LENGTH.
4869 	 */
4870 	headerp = (struct mode_header *)p3bufp;
4871 	if (un->un_f_cfg_is_atapi == TRUE) {
4872 		struct mode_header_grp2 *mhp =
4873 		    (struct mode_header_grp2 *)headerp;
4874 		mode_header_length = MODE_HEADER_LENGTH_GRP2;
4875 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4876 	} else {
4877 		mode_header_length = MODE_HEADER_LENGTH;
4878 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4879 	}
4880 
4881 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4882 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4883 		    "received unexpected bd_len of %d, page3\n", bd_len);
4884 		goto page3_exit;
4885 	}
4886 
4887 	page3p = (struct mode_format *)
4888 	    ((caddr_t)headerp + mode_header_length + bd_len);
4889 
4890 	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
4891 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4892 		    "mode sense pg3 code mismatch %d\n",
4893 		    page3p->mode_page.code);
4894 		goto page3_exit;
4895 	}
4896 
4897 	/*
4898 	 * Use this physical geometry data only if BOTH MODE SENSE commands
4899 	 * complete successfully; otherwise, revert to the logical geometry.
4900 	 * So, we need to save everything in temporary variables.
4901 	 */
4902 	sector_size = BE_16(page3p->data_bytes_sect);
4903 
4904 	/*
4905 	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
4906 	 */
4907 	if (sector_size == 0) {
4908 		sector_size = (ISCD(un)) ? 2048 : un->un_sys_blocksize;
4909 	} else {
4910 		sector_size &= ~(un->un_sys_blocksize - 1);
4911 	}
4912 
4913 	nsect  = BE_16(page3p->sect_track);
4914 	intrlv = BE_16(page3p->interleave);
4915 
4916 	SD_INFO(SD_LOG_COMMON, un,
4917 	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
4918 	SD_INFO(SD_LOG_COMMON, un,
4919 	    "   mode page: %d; nsect: %d; sector size: %d;\n",
4920 	    page3p->mode_page.code, nsect, sector_size);
4921 	SD_INFO(SD_LOG_COMMON, un,
4922 	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
4923 	    BE_16(page3p->track_skew),
4924 	    BE_16(page3p->cylinder_skew));
4925 
4926 
4927 	/*
4928 	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
4929 	 */
4930 	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
4931 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p4bufp,
4932 	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag)
4933 	    != 0) {
4934 		SD_ERROR(SD_LOG_COMMON, un,
4935 		    "sd_get_physical_geometry: mode sense page 4 failed\n");
4936 		goto page4_exit;
4937 	}
4938 
4939 	/*
4940 	 * Determine size of Block Descriptors in order to locate the mode
4941 	 * page data.  ATAPI devices return 0, SCSI devices should return
4942 	 * MODE_BLK_DESC_LENGTH.
4943 	 */
4944 	headerp = (struct mode_header *)p4bufp;
4945 	if (un->un_f_cfg_is_atapi == TRUE) {
4946 		struct mode_header_grp2 *mhp =
4947 		    (struct mode_header_grp2 *)headerp;
4948 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4949 	} else {
4950 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4951 	}
4952 
4953 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4954 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4955 		    "received unexpected bd_len of %d, page4\n", bd_len);
4956 		goto page4_exit;
4957 	}
4958 
4959 	page4p = (struct mode_geometry *)
4960 	    ((caddr_t)headerp + mode_header_length + bd_len);
4961 
4962 	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
4963 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4964 		    "mode sense pg4 code mismatch %d\n",
4965 		    page4p->mode_page.code);
4966 		goto page4_exit;
4967 	}
4968 
4969 	/*
4970 	 * Stash the data now, after we know that both commands completed.
4971 	 */
4972 
4973 	mutex_enter(SD_MUTEX(un));
4974 
4975 	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
4976 	spc   = nhead * nsect;
4977 	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
4978 	rpm   = BE_16(page4p->rpm);
4979 
4980 	modesense_capacity = spc * ncyl;
4981 
4982 	SD_INFO(SD_LOG_COMMON, un,
4983 	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
4984 	SD_INFO(SD_LOG_COMMON, un,
4985 	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
4986 	SD_INFO(SD_LOG_COMMON, un,
4987 	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
4988 	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
4989 	    (void *)pgeom_p, capacity);
4990 
4991 	/*
4992 	 * Compensate if the drive's geometry is not rectangular, i.e.,
4993 	 * the product of C * H * S returned by MODE SENSE >= that returned
4994 	 * by read capacity. This is an idiosyncrasy of the original x86
4995 	 * disk subsystem.
4996 	 */
4997 	if (modesense_capacity >= capacity) {
4998 		SD_INFO(SD_LOG_COMMON, un,
4999 		    "sd_get_physical_geometry: adjusting acyl; "
5000 		    "old: %d; new: %d\n", pgeom_p->g_acyl,
5001 		    (modesense_capacity - capacity + spc - 1) / spc);
5002 		if (sector_size != 0) {
5003 			/* 1243403: NEC D38x7 drives don't support sec size */
5004 			pgeom_p->g_secsize = (unsigned short)sector_size;
5005 		}
5006 		pgeom_p->g_nsect    = (unsigned short)nsect;
5007 		pgeom_p->g_nhead    = (unsigned short)nhead;
5008 		pgeom_p->g_capacity = capacity;
5009 		pgeom_p->g_acyl	    =
5010 		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
5011 		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
5012 	}
5013 
5014 	pgeom_p->g_rpm    = (unsigned short)rpm;
5015 	pgeom_p->g_intrlv = (unsigned short)intrlv;
5016 
5017 	SD_INFO(SD_LOG_COMMON, un,
5018 	    "sd_get_physical_geometry: mode sense geometry:\n");
5019 	SD_INFO(SD_LOG_COMMON, un,
5020 	    "   nsect: %d; sector size: %d; interlv: %d\n",
5021 	    nsect, sector_size, intrlv);
5022 	SD_INFO(SD_LOG_COMMON, un,
5023 	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
5024 	    nhead, ncyl, rpm, modesense_capacity);
5025 	SD_INFO(SD_LOG_COMMON, un,
5026 	    "sd_get_physical_geometry: (cached)\n");
5027 	SD_INFO(SD_LOG_COMMON, un,
5028 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5029 	    un->un_pgeom.g_ncyl,  un->un_pgeom.g_acyl,
5030 	    un->un_pgeom.g_nhead, un->un_pgeom.g_nsect);
5031 	SD_INFO(SD_LOG_COMMON, un,
5032 	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
5033 	    un->un_pgeom.g_secsize, un->un_pgeom.g_capacity,
5034 	    un->un_pgeom.g_intrlv, un->un_pgeom.g_rpm);
5035 
5036 	mutex_exit(SD_MUTEX(un));
5037 
5038 page4_exit:
5039 	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
5040 page3_exit:
5041 	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
5042 }
5043 
5044 
5045 /*
5046  *    Function: sd_get_virtual_geometry
5047  *
5048  * Description: Ask the controller to tell us about the target device.
5049  *
5050  *   Arguments: un - pointer to softstate
5051  *		capacity - disk capacity in #blocks
5052  *		lbasize - disk block size in bytes
5053  *
5054  *     Context: Kernel thread only
5055  */
5056 
5057 static void
5058 sd_get_virtual_geometry(struct sd_lun *un, int capacity, int lbasize)
5059 {
5060 	struct	geom_cache 	*lgeom_p = &un->un_lgeom;
5061 	uint_t	geombuf;
5062 	int	spc;
5063 
5064 	ASSERT(un != NULL);
5065 	ASSERT(mutex_owned(SD_MUTEX(un)));
5066 
5067 	mutex_exit(SD_MUTEX(un));
5068 
5069 	/* Set sector size, and total number of sectors */
5070 	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
5071 	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
5072 
5073 	/* Let the HBA tell us its geometry */
5074 	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
5075 
5076 	mutex_enter(SD_MUTEX(un));
5077 
5078 	/* A value of -1 indicates an undefined "geometry" property */
5079 	if (geombuf == (-1)) {
5080 		return;
5081 	}
5082 
5083 	/* Initialize the logical geometry cache. */
5084 	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
5085 	lgeom_p->g_nsect   = geombuf & 0xffff;
5086 	lgeom_p->g_secsize = un->un_sys_blocksize;
5087 
5088 	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
5089 
5090 	/*
5091 	 * Note: The driver originally converted the capacity value from
5092 	 * target blocks to system blocks. However, the capacity value passed
5093 	 * to this routine is already in terms of system blocks (this scaling
5094 	 * is done when the READ CAPACITY command is issued and processed).
5095 	 * This 'error' may have gone undetected because the usage of g_ncyl
5096 	 * (which is based upon g_capacity) is very limited within the driver
5097 	 */
5098 	lgeom_p->g_capacity = capacity;
5099 
5100 	/*
5101 	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
5102 	 * hba may return zero values if the device has been removed.
5103 	 */
5104 	if (spc == 0) {
5105 		lgeom_p->g_ncyl = 0;
5106 	} else {
5107 		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
5108 	}
5109 	lgeom_p->g_acyl = 0;
5110 
5111 	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
5112 	SD_INFO(SD_LOG_COMMON, un,
5113 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5114 	    un->un_lgeom.g_ncyl,  un->un_lgeom.g_acyl,
5115 	    un->un_lgeom.g_nhead, un->un_lgeom.g_nsect);
5116 	SD_INFO(SD_LOG_COMMON, un, "   lbasize: %d; capacity: %ld; "
5117 	    "intrlv: %d; rpm: %d\n", un->un_lgeom.g_secsize,
5118 	    un->un_lgeom.g_capacity, un->un_lgeom.g_intrlv, un->un_lgeom.g_rpm);
5119 }
5120 
5121 
5122 /*
5123  *    Function: sd_update_block_info
5124  *
5125  * Description: Calculate a byte count to sector count bitshift value
5126  *		from sector size.
5127  *
5128  *   Arguments: un: unit struct.
5129  *		lbasize: new target sector size
5130  *		capacity: new target capacity, ie. block count
5131  *
5132  *     Context: Kernel thread context
5133  */
5134 
5135 static void
5136 sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
5137 {
5138 	if (lbasize != 0) {
5139 		un->un_tgt_blocksize = lbasize;
5140 		un->un_f_tgt_blocksize_is_valid	= TRUE;
5141 	}
5142 
5143 	if (capacity != 0) {
5144 		un->un_blockcount		= capacity;
5145 		un->un_f_blockcount_is_valid	= TRUE;
5146 	}
5147 }
5148 
5149 
5150 static void
5151 sd_swap_efi_gpt(efi_gpt_t *e)
5152 {
5153 	_NOTE(ASSUMING_PROTECTED(*e))
5154 	e->efi_gpt_Signature = LE_64(e->efi_gpt_Signature);
5155 	e->efi_gpt_Revision = LE_32(e->efi_gpt_Revision);
5156 	e->efi_gpt_HeaderSize = LE_32(e->efi_gpt_HeaderSize);
5157 	e->efi_gpt_HeaderCRC32 = LE_32(e->efi_gpt_HeaderCRC32);
5158 	e->efi_gpt_MyLBA = LE_64(e->efi_gpt_MyLBA);
5159 	e->efi_gpt_AlternateLBA = LE_64(e->efi_gpt_AlternateLBA);
5160 	e->efi_gpt_FirstUsableLBA = LE_64(e->efi_gpt_FirstUsableLBA);
5161 	e->efi_gpt_LastUsableLBA = LE_64(e->efi_gpt_LastUsableLBA);
5162 	UUID_LE_CONVERT(e->efi_gpt_DiskGUID, e->efi_gpt_DiskGUID);
5163 	e->efi_gpt_PartitionEntryLBA = LE_64(e->efi_gpt_PartitionEntryLBA);
5164 	e->efi_gpt_NumberOfPartitionEntries =
5165 	    LE_32(e->efi_gpt_NumberOfPartitionEntries);
5166 	e->efi_gpt_SizeOfPartitionEntry =
5167 	    LE_32(e->efi_gpt_SizeOfPartitionEntry);
5168 	e->efi_gpt_PartitionEntryArrayCRC32 =
5169 	    LE_32(e->efi_gpt_PartitionEntryArrayCRC32);
5170 }
5171 
5172 static void
5173 sd_swap_efi_gpe(int nparts, efi_gpe_t *p)
5174 {
5175 	int i;
5176 
5177 	_NOTE(ASSUMING_PROTECTED(*p))
5178 	for (i = 0; i < nparts; i++) {
5179 		UUID_LE_CONVERT(p[i].efi_gpe_PartitionTypeGUID,
5180 		    p[i].efi_gpe_PartitionTypeGUID);
5181 		p[i].efi_gpe_StartingLBA = LE_64(p[i].efi_gpe_StartingLBA);
5182 		p[i].efi_gpe_EndingLBA = LE_64(p[i].efi_gpe_EndingLBA);
5183 		/* PartitionAttrs */
5184 	}
5185 }
5186 
5187 static int
5188 sd_validate_efi(efi_gpt_t *labp)
5189 {
5190 	if (labp->efi_gpt_Signature != EFI_SIGNATURE)
5191 		return (EINVAL);
5192 	/* at least 96 bytes in this version of the spec. */
5193 	if (sizeof (efi_gpt_t) - sizeof (labp->efi_gpt_Reserved2) >
5194 	    labp->efi_gpt_HeaderSize)
5195 		return (EINVAL);
5196 	/* this should be 128 bytes */
5197 	if (labp->efi_gpt_SizeOfPartitionEntry != sizeof (efi_gpe_t))
5198 		return (EINVAL);
5199 	return (0);
5200 }
5201 
5202 static int
5203 sd_use_efi(struct sd_lun *un, int path_flag)
5204 {
5205 	int		i;
5206 	int		rval = 0;
5207 	efi_gpe_t	*partitions;
5208 	uchar_t		*buf;
5209 	uint_t		lbasize;
5210 	uint64_t	cap;
5211 	uint_t		nparts;
5212 	diskaddr_t	gpe_lba;
5213 
5214 	ASSERT(mutex_owned(SD_MUTEX(un)));
5215 	lbasize = un->un_tgt_blocksize;
5216 
5217 	mutex_exit(SD_MUTEX(un));
5218 
5219 	buf = kmem_zalloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP);
5220 
5221 	if (un->un_tgt_blocksize != un->un_sys_blocksize) {
5222 		rval = EINVAL;
5223 		goto done_err;
5224 	}
5225 
5226 	rval = sd_send_scsi_READ(un, buf, lbasize, 0, path_flag);
5227 	if (rval) {
5228 		goto done_err;
5229 	}
5230 	if (((struct dk_label *)buf)->dkl_magic == DKL_MAGIC) {
5231 		/* not ours */
5232 		rval = ESRCH;
5233 		goto done_err;
5234 	}
5235 
5236 	rval = sd_send_scsi_READ(un, buf, lbasize, 1, path_flag);
5237 	if (rval) {
5238 		goto done_err;
5239 	}
5240 	sd_swap_efi_gpt((efi_gpt_t *)buf);
5241 
5242 	if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0) {
5243 		/*
5244 		 * Couldn't read the primary, try the backup.  Our
5245 		 * capacity at this point could be based on CHS, so
5246 		 * check what the device reports.
5247 		 */
5248 		rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
5249 		    path_flag);
5250 		if (rval) {
5251 			goto done_err;
5252 		}
5253 
5254 		/*
5255 		 * The MMC standard allows READ CAPACITY to be
5256 		 * inaccurate by a bounded amount (in the interest of
5257 		 * response latency).  As a result, failed READs are
5258 		 * commonplace (due to the reading of metadata and not
5259 		 * data). Depending on the per-Vendor/drive Sense data,
5260 		 * the failed READ can cause many (unnecessary) retries.
5261 		 */
5262 		if ((rval = sd_send_scsi_READ(un, buf, lbasize,
5263 		    cap - 1, (ISCD(un)) ? SD_PATH_DIRECT_PRIORITY :
5264 			path_flag)) != 0) {
5265 				goto done_err;
5266 		}
5267 
5268 		sd_swap_efi_gpt((efi_gpt_t *)buf);
5269 		if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0)
5270 			goto done_err;
5271 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5272 		    "primary label corrupt; using backup\n");
5273 	}
5274 
5275 	nparts = ((efi_gpt_t *)buf)->efi_gpt_NumberOfPartitionEntries;
5276 	gpe_lba = ((efi_gpt_t *)buf)->efi_gpt_PartitionEntryLBA;
5277 
5278 	rval = sd_send_scsi_READ(un, buf, EFI_MIN_ARRAY_SIZE, gpe_lba,
5279 	    path_flag);
5280 	if (rval) {
5281 		goto done_err;
5282 	}
5283 	partitions = (efi_gpe_t *)buf;
5284 
5285 	if (nparts > MAXPART) {
5286 		nparts = MAXPART;
5287 	}
5288 	sd_swap_efi_gpe(nparts, partitions);
5289 
5290 	mutex_enter(SD_MUTEX(un));
5291 
5292 	/* Fill in partition table. */
5293 	for (i = 0; i < nparts; i++) {
5294 		if (partitions->efi_gpe_StartingLBA != 0 ||
5295 		    partitions->efi_gpe_EndingLBA != 0) {
5296 			un->un_map[i].dkl_cylno =
5297 			    partitions->efi_gpe_StartingLBA;
5298 			un->un_map[i].dkl_nblk =
5299 			    partitions->efi_gpe_EndingLBA -
5300 			    partitions->efi_gpe_StartingLBA + 1;
5301 			un->un_offset[i] =
5302 			    partitions->efi_gpe_StartingLBA;
5303 		}
5304 		if (i == WD_NODE) {
5305 			/*
5306 			 * minor number 7 corresponds to the whole disk
5307 			 */
5308 			un->un_map[i].dkl_cylno = 0;
5309 			un->un_map[i].dkl_nblk = un->un_blockcount;
5310 			un->un_offset[i] = 0;
5311 		}
5312 		partitions++;
5313 	}
5314 	un->un_solaris_offset = 0;
5315 	un->un_solaris_size = cap;
5316 	un->un_f_geometry_is_valid = TRUE;
5317 
5318 	/* clear the vtoc label */
5319 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
5320 
5321 	kmem_free(buf, EFI_MIN_ARRAY_SIZE);
5322 	return (0);
5323 
5324 done_err:
5325 	kmem_free(buf, EFI_MIN_ARRAY_SIZE);
5326 	mutex_enter(SD_MUTEX(un));
5327 	/*
5328 	 * if we didn't find something that could look like a VTOC
5329 	 * and the disk is over 1TB, we know there isn't a valid label.
5330 	 * Otherwise let sd_uselabel decide what to do.  We only
5331 	 * want to invalidate this if we're certain the label isn't
5332 	 * valid because sd_prop_op will now fail, which in turn
5333 	 * causes things like opens and stats on the partition to fail.
5334 	 */
5335 	if ((un->un_blockcount > DK_MAX_BLOCKS) && (rval != ESRCH)) {
5336 		un->un_f_geometry_is_valid = FALSE;
5337 	}
5338 	return (rval);
5339 }
5340 
5341 
5342 /*
5343  *    Function: sd_uselabel
5344  *
5345  * Description: Validate the disk label and update the relevant data (geometry,
5346  *		partition, vtoc, and capacity data) in the sd_lun struct.
5347  *		Marks the geometry of the unit as being valid.
5348  *
5349  *   Arguments: un: unit struct.
5350  *		dk_label: disk label
5351  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
5352  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
5353  *			to use the USCSI "direct" chain and bypass the normal
5354  *			command waitq.
5355  *
5356  * Return Code: SD_LABEL_IS_VALID: Label read from disk is OK; geometry,
5357  *		partition, vtoc, and capacity data are good.
5358  *
5359  *		SD_LABEL_IS_INVALID: Magic number or checksum error in the
5360  *		label; or computed capacity does not jibe with capacity
5361  *		reported from the READ CAPACITY command.
5362  *
5363  *     Context: Kernel thread only (can sleep).
5364  */
5365 
5366 static int
5367 sd_uselabel(struct sd_lun *un, struct dk_label *labp, int path_flag)
5368 {
5369 	short	*sp;
5370 	short	sum;
5371 	short	count;
5372 	int	label_error = SD_LABEL_IS_VALID;
5373 	int	i;
5374 	int	capacity;
5375 	int	part_end;
5376 	int	track_capacity;
5377 	int	err;
5378 #if defined(_SUNOS_VTOC_16)
5379 	struct	dkl_partition	*vpartp;
5380 #endif
5381 	ASSERT(un != NULL);
5382 	ASSERT(mutex_owned(SD_MUTEX(un)));
5383 
5384 	/* Validate the magic number of the label. */
5385 	if (labp->dkl_magic != DKL_MAGIC) {
5386 #if defined(__sparc)
5387 		if ((un->un_state == SD_STATE_NORMAL) &&
5388 			un->un_f_vtoc_errlog_supported) {
5389 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5390 			    "Corrupt label; wrong magic number\n");
5391 		}
5392 #endif
5393 		return (SD_LABEL_IS_INVALID);
5394 	}
5395 
5396 	/* Validate the checksum of the label. */
5397 	sp  = (short *)labp;
5398 	sum = 0;
5399 	count = sizeof (struct dk_label) / sizeof (short);
5400 	while (count--)	 {
5401 		sum ^= *sp++;
5402 	}
5403 
5404 	if (sum != 0) {
5405 #if	defined(_SUNOS_VTOC_16)
5406 		if ((un->un_state == SD_STATE_NORMAL) && !ISCD(un)) {
5407 #elif defined(_SUNOS_VTOC_8)
5408 		if ((un->un_state == SD_STATE_NORMAL) &&
5409 		    un->un_f_vtoc_errlog_supported) {
5410 #endif
5411 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5412 			    "Corrupt label - label checksum failed\n");
5413 		}
5414 		return (SD_LABEL_IS_INVALID);
5415 	}
5416 
5417 
5418 	/*
5419 	 * Fill in geometry structure with data from label.
5420 	 */
5421 	bzero(&un->un_g, sizeof (struct dk_geom));
5422 	un->un_g.dkg_ncyl   = labp->dkl_ncyl;
5423 	un->un_g.dkg_acyl   = labp->dkl_acyl;
5424 	un->un_g.dkg_bcyl   = 0;
5425 	un->un_g.dkg_nhead  = labp->dkl_nhead;
5426 	un->un_g.dkg_nsect  = labp->dkl_nsect;
5427 	un->un_g.dkg_intrlv = labp->dkl_intrlv;
5428 
5429 #if defined(_SUNOS_VTOC_8)
5430 	un->un_g.dkg_gap1   = labp->dkl_gap1;
5431 	un->un_g.dkg_gap2   = labp->dkl_gap2;
5432 	un->un_g.dkg_bhead  = labp->dkl_bhead;
5433 #endif
5434 #if defined(_SUNOS_VTOC_16)
5435 	un->un_dkg_skew = labp->dkl_skew;
5436 #endif
5437 
5438 #if defined(__i386) || defined(__amd64)
5439 	un->un_g.dkg_apc = labp->dkl_apc;
5440 #endif
5441 
5442 	/*
5443 	 * Currently we rely on the values in the label being accurate. If
5444 	 * dlk_rpm or dlk_pcly are zero in the label, use a default value.
5445 	 *
5446 	 * Note: In the future a MODE SENSE may be used to retrieve this data,
5447 	 * although this command is optional in SCSI-2.
5448 	 */
5449 	un->un_g.dkg_rpm  = (labp->dkl_rpm  != 0) ? labp->dkl_rpm  : 3600;
5450 	un->un_g.dkg_pcyl = (labp->dkl_pcyl != 0) ? labp->dkl_pcyl :
5451 	    (un->un_g.dkg_ncyl + un->un_g.dkg_acyl);
5452 
5453 	/*
5454 	 * The Read and Write reinstruct values may not be valid
5455 	 * for older disks.
5456 	 */
5457 	un->un_g.dkg_read_reinstruct  = labp->dkl_read_reinstruct;
5458 	un->un_g.dkg_write_reinstruct = labp->dkl_write_reinstruct;
5459 
5460 	/* Fill in partition table. */
5461 #if defined(_SUNOS_VTOC_8)
5462 	for (i = 0; i < NDKMAP; i++) {
5463 		un->un_map[i].dkl_cylno = labp->dkl_map[i].dkl_cylno;
5464 		un->un_map[i].dkl_nblk  = labp->dkl_map[i].dkl_nblk;
5465 	}
5466 #endif
5467 #if  defined(_SUNOS_VTOC_16)
5468 	vpartp		= labp->dkl_vtoc.v_part;
5469 	track_capacity	= labp->dkl_nhead * labp->dkl_nsect;
5470 
5471 	/* Prevent divide by zero */
5472 	if (track_capacity == 0) {
5473 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5474 		    "Corrupt label - zero nhead or nsect value\n");
5475 
5476 		return (SD_LABEL_IS_INVALID);
5477 	}
5478 
5479 	for (i = 0; i < NDKMAP; i++, vpartp++) {
5480 		un->un_map[i].dkl_cylno = vpartp->p_start / track_capacity;
5481 		un->un_map[i].dkl_nblk  = vpartp->p_size;
5482 	}
5483 #endif
5484 
5485 	/* Fill in VTOC Structure. */
5486 	bcopy(&labp->dkl_vtoc, &un->un_vtoc, sizeof (struct dk_vtoc));
5487 #if defined(_SUNOS_VTOC_8)
5488 	/*
5489 	 * The 8-slice vtoc does not include the ascii label; save it into
5490 	 * the device's soft state structure here.
5491 	 */
5492 	bcopy(labp->dkl_asciilabel, un->un_asciilabel, LEN_DKL_ASCII);
5493 #endif
5494 
5495 	/* Now look for a valid capacity. */
5496 	track_capacity	= (un->un_g.dkg_nhead * un->un_g.dkg_nsect);
5497 	capacity	= (un->un_g.dkg_ncyl  * track_capacity);
5498 
5499 	if (un->un_g.dkg_acyl) {
5500 #if defined(__i386) || defined(__amd64)
5501 		/* we may have > 1 alts cylinder */
5502 		capacity += (track_capacity * un->un_g.dkg_acyl);
5503 #else
5504 		capacity += track_capacity;
5505 #endif
5506 	}
5507 
5508 	/*
5509 	 * Force check here to ensure the computed capacity is valid.
5510 	 * If capacity is zero, it indicates an invalid label and
5511 	 * we should abort updating the relevant data then.
5512 	 */
5513 	if (capacity == 0) {
5514 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5515 		    "Corrupt label - no valid capacity could be retrieved\n");
5516 
5517 		return (SD_LABEL_IS_INVALID);
5518 	}
5519 
5520 	/* Mark the geometry as valid. */
5521 	un->un_f_geometry_is_valid = TRUE;
5522 
5523 	/*
5524 	 * At this point, un->un_blockcount should contain valid data from
5525 	 * the READ CAPACITY command.
5526 	 */
5527 	if (un->un_f_blockcount_is_valid != TRUE) {
5528 		/*
5529 		 * We have a situation where the target didn't give us a good
5530 		 * READ CAPACITY value, yet there appears to be a valid label.
5531 		 * In this case, we'll fake the capacity.
5532 		 */
5533 		un->un_blockcount = capacity;
5534 		un->un_f_blockcount_is_valid = TRUE;
5535 		goto done;
5536 	}
5537 
5538 
5539 	if ((capacity <= un->un_blockcount) ||
5540 	    (un->un_state != SD_STATE_NORMAL)) {
5541 #if defined(_SUNOS_VTOC_8)
5542 		/*
5543 		 * We can't let this happen on drives that are subdivided
5544 		 * into logical disks (i.e., that have an fdisk table).
5545 		 * The un_blockcount field should always hold the full media
5546 		 * size in sectors, period.  This code would overwrite
5547 		 * un_blockcount with the size of the Solaris fdisk partition.
5548 		 */
5549 		SD_ERROR(SD_LOG_COMMON, un,
5550 		    "sd_uselabel: Label %d blocks; Drive %d blocks\n",
5551 		    capacity, un->un_blockcount);
5552 		un->un_blockcount = capacity;
5553 		un->un_f_blockcount_is_valid = TRUE;
5554 #endif	/* defined(_SUNOS_VTOC_8) */
5555 		goto done;
5556 	}
5557 
5558 	if (ISCD(un)) {
5559 		/* For CDROMs, we trust that the data in the label is OK. */
5560 #if defined(_SUNOS_VTOC_8)
5561 		for (i = 0; i < NDKMAP; i++) {
5562 			part_end = labp->dkl_nhead * labp->dkl_nsect *
5563 			    labp->dkl_map[i].dkl_cylno +
5564 			    labp->dkl_map[i].dkl_nblk  - 1;
5565 
5566 			if ((labp->dkl_map[i].dkl_nblk) &&
5567 			    (part_end > un->un_blockcount)) {
5568 				un->un_f_geometry_is_valid = FALSE;
5569 				break;
5570 			}
5571 		}
5572 #endif
5573 #if defined(_SUNOS_VTOC_16)
5574 		vpartp = &(labp->dkl_vtoc.v_part[0]);
5575 		for (i = 0; i < NDKMAP; i++, vpartp++) {
5576 			part_end = vpartp->p_start + vpartp->p_size;
5577 			if ((vpartp->p_size > 0) &&
5578 			    (part_end > un->un_blockcount)) {
5579 				un->un_f_geometry_is_valid = FALSE;
5580 				break;
5581 			}
5582 		}
5583 #endif
5584 	} else {
5585 		uint64_t t_capacity;
5586 		uint32_t t_lbasize;
5587 
5588 		mutex_exit(SD_MUTEX(un));
5589 		err = sd_send_scsi_READ_CAPACITY(un, &t_capacity, &t_lbasize,
5590 		    path_flag);
5591 		ASSERT(t_capacity <= DK_MAX_BLOCKS);
5592 		mutex_enter(SD_MUTEX(un));
5593 
5594 		if (err == 0) {
5595 			sd_update_block_info(un, t_lbasize, t_capacity);
5596 		}
5597 
5598 		if (capacity > un->un_blockcount) {
5599 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5600 			    "Corrupt label - bad geometry\n");
5601 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
5602 			    "Label says %u blocks; Drive says %llu blocks\n",
5603 			    capacity, (unsigned long long)un->un_blockcount);
5604 			un->un_f_geometry_is_valid = FALSE;
5605 			label_error = SD_LABEL_IS_INVALID;
5606 		}
5607 	}
5608 
5609 done:
5610 
5611 	SD_INFO(SD_LOG_COMMON, un, "sd_uselabel: (label geometry)\n");
5612 	SD_INFO(SD_LOG_COMMON, un,
5613 	    "   ncyl: %d; acyl: %d; nhead: %d; nsect: %d\n",
5614 	    un->un_g.dkg_ncyl,  un->un_g.dkg_acyl,
5615 	    un->un_g.dkg_nhead, un->un_g.dkg_nsect);
5616 	SD_INFO(SD_LOG_COMMON, un,
5617 	    "   lbasize: %d; capacity: %d; intrlv: %d; rpm: %d\n",
5618 	    un->un_tgt_blocksize, un->un_blockcount,
5619 	    un->un_g.dkg_intrlv, un->un_g.dkg_rpm);
5620 	SD_INFO(SD_LOG_COMMON, un, "   wrt_reinstr: %d; rd_reinstr: %d\n",
5621 	    un->un_g.dkg_write_reinstruct, un->un_g.dkg_read_reinstruct);
5622 
5623 	ASSERT(mutex_owned(SD_MUTEX(un)));
5624 
5625 	return (label_error);
5626 }
5627 
5628 
5629 /*
5630  *    Function: sd_build_default_label
5631  *
5632  * Description: Generate a default label for those devices that do not have
5633  *		one, e.g., new media, removable cartridges, etc..
5634  *
5635  *     Context: Kernel thread only
5636  */
5637 
5638 static void
5639 sd_build_default_label(struct sd_lun *un)
5640 {
5641 #if defined(_SUNOS_VTOC_16)
5642 	uint_t	phys_spc;
5643 	uint_t	disksize;
5644 	struct	dk_geom un_g;
5645 #endif
5646 
5647 	ASSERT(un != NULL);
5648 	ASSERT(mutex_owned(SD_MUTEX(un)));
5649 
5650 #if defined(_SUNOS_VTOC_8)
5651 	/*
5652 	 * Note: This is a legacy check for non-removable devices on VTOC_8
5653 	 * only. This may be a valid check for VTOC_16 as well.
5654 	 * Once we understand why there is this difference between SPARC and
5655 	 * x86 platform, we could remove this legacy check.
5656 	 */
5657 	ASSERT(un->un_f_default_vtoc_supported);
5658 #endif
5659 
5660 	bzero(&un->un_g, sizeof (struct dk_geom));
5661 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
5662 	bzero(&un->un_map, NDKMAP * (sizeof (struct dk_map)));
5663 
5664 #if defined(_SUNOS_VTOC_8)
5665 
5666 	/*
5667 	 * It's a REMOVABLE media, therefore no label (on sparc, anyway).
5668 	 * But it is still necessary to set up various geometry information,
5669 	 * and we are doing this here.
5670 	 */
5671 
5672 	/*
5673 	 * For the rpm, we use the minimum for the disk.  For the head, cyl,
5674 	 * and number of sector per track, if the capacity <= 1GB, head = 64,
5675 	 * sect = 32.  else head = 255, sect 63 Note: the capacity should be
5676 	 * equal to C*H*S values.  This will cause some truncation of size due
5677 	 * to round off errors. For CD-ROMs, this truncation can have adverse
5678 	 * side effects, so returning ncyl and nhead as 1. The nsect will
5679 	 * overflow for most of CD-ROMs as nsect is of type ushort. (4190569)
5680 	 */
5681 	if (ISCD(un)) {
5682 		/*
5683 		 * Preserve the old behavior for non-writable
5684 		 * medias. Since dkg_nsect is a ushort, it
5685 		 * will lose bits as cdroms have more than
5686 		 * 65536 sectors. So if we recalculate
5687 		 * capacity, it will become much shorter.
5688 		 * But the dkg_* information is not
5689 		 * used for CDROMs so it is OK. But for
5690 		 * Writable CDs we need this information
5691 		 * to be valid (for newfs say). So we
5692 		 * make nsect and nhead > 1 that way
5693 		 * nsect can still stay within ushort limit
5694 		 * without losing any bits.
5695 		 */
5696 		if (un->un_f_mmc_writable_media == TRUE) {
5697 			un->un_g.dkg_nhead = 64;
5698 			un->un_g.dkg_nsect = 32;
5699 			un->un_g.dkg_ncyl = un->un_blockcount / (64 * 32);
5700 			un->un_blockcount = un->un_g.dkg_ncyl *
5701 			    un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5702 		} else {
5703 			un->un_g.dkg_ncyl  = 1;
5704 			un->un_g.dkg_nhead = 1;
5705 			un->un_g.dkg_nsect = un->un_blockcount;
5706 		}
5707 	} else {
5708 		if (un->un_blockcount <= 0x1000) {
5709 			/* unlabeled SCSI floppy device */
5710 			un->un_g.dkg_nhead = 2;
5711 			un->un_g.dkg_ncyl = 80;
5712 			un->un_g.dkg_nsect = un->un_blockcount / (2 * 80);
5713 		} else if (un->un_blockcount <= 0x200000) {
5714 			un->un_g.dkg_nhead = 64;
5715 			un->un_g.dkg_nsect = 32;
5716 			un->un_g.dkg_ncyl  = un->un_blockcount / (64 * 32);
5717 		} else {
5718 			un->un_g.dkg_nhead = 255;
5719 			un->un_g.dkg_nsect = 63;
5720 			un->un_g.dkg_ncyl  = un->un_blockcount / (255 * 63);
5721 		}
5722 		un->un_blockcount =
5723 		    un->un_g.dkg_ncyl * un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5724 	}
5725 
5726 	un->un_g.dkg_acyl	= 0;
5727 	un->un_g.dkg_bcyl	= 0;
5728 	un->un_g.dkg_rpm	= 200;
5729 	un->un_asciilabel[0]	= '\0';
5730 	un->un_g.dkg_pcyl	= un->un_g.dkg_ncyl;
5731 
5732 	un->un_map[0].dkl_cylno = 0;
5733 	un->un_map[0].dkl_nblk  = un->un_blockcount;
5734 	un->un_map[2].dkl_cylno = 0;
5735 	un->un_map[2].dkl_nblk  = un->un_blockcount;
5736 
5737 #elif defined(_SUNOS_VTOC_16)
5738 
5739 	if (un->un_solaris_size == 0) {
5740 		/*
5741 		 * Got fdisk table but no solaris entry therefore
5742 		 * don't create a default label
5743 		 */
5744 		un->un_f_geometry_is_valid = TRUE;
5745 		return;
5746 	}
5747 
5748 	/*
5749 	 * For CDs we continue to use the physical geometry to calculate
5750 	 * number of cylinders. All other devices must convert the
5751 	 * physical geometry (geom_cache) to values that will fit
5752 	 * in a dk_geom structure.
5753 	 */
5754 	if (ISCD(un)) {
5755 		phys_spc = un->un_pgeom.g_nhead * un->un_pgeom.g_nsect;
5756 	} else {
5757 		/* Convert physical geometry to disk geometry */
5758 		bzero(&un_g, sizeof (struct dk_geom));
5759 		sd_convert_geometry(un->un_blockcount, &un_g);
5760 		bcopy(&un_g, &un->un_g, sizeof (un->un_g));
5761 		phys_spc = un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5762 	}
5763 
5764 	ASSERT(phys_spc != 0);
5765 	un->un_g.dkg_pcyl = un->un_solaris_size / phys_spc;
5766 	un->un_g.dkg_acyl = DK_ACYL;
5767 	un->un_g.dkg_ncyl = un->un_g.dkg_pcyl - DK_ACYL;
5768 	disksize = un->un_g.dkg_ncyl * phys_spc;
5769 
5770 	if (ISCD(un)) {
5771 		/*
5772 		 * CD's don't use the "heads * sectors * cyls"-type of
5773 		 * geometry, but instead use the entire capacity of the media.
5774 		 */
5775 		disksize = un->un_solaris_size;
5776 		un->un_g.dkg_nhead = 1;
5777 		un->un_g.dkg_nsect = 1;
5778 		un->un_g.dkg_rpm =
5779 		    (un->un_pgeom.g_rpm == 0) ? 200 : un->un_pgeom.g_rpm;
5780 
5781 		un->un_vtoc.v_part[0].p_start = 0;
5782 		un->un_vtoc.v_part[0].p_size  = disksize;
5783 		un->un_vtoc.v_part[0].p_tag   = V_BACKUP;
5784 		un->un_vtoc.v_part[0].p_flag  = V_UNMNT;
5785 
5786 		un->un_map[0].dkl_cylno = 0;
5787 		un->un_map[0].dkl_nblk  = disksize;
5788 		un->un_offset[0] = 0;
5789 
5790 	} else {
5791 		/*
5792 		 * Hard disks and removable media cartridges
5793 		 */
5794 		un->un_g.dkg_rpm =
5795 		    (un->un_pgeom.g_rpm == 0) ? 3600: un->un_pgeom.g_rpm;
5796 		un->un_vtoc.v_sectorsz = un->un_sys_blocksize;
5797 
5798 		/* Add boot slice */
5799 		un->un_vtoc.v_part[8].p_start = 0;
5800 		un->un_vtoc.v_part[8].p_size  = phys_spc;
5801 		un->un_vtoc.v_part[8].p_tag   = V_BOOT;
5802 		un->un_vtoc.v_part[8].p_flag  = V_UNMNT;
5803 
5804 		un->un_map[8].dkl_cylno = 0;
5805 		un->un_map[8].dkl_nblk  = phys_spc;
5806 		un->un_offset[8] = 0;
5807 	}
5808 
5809 	un->un_g.dkg_apc = 0;
5810 	un->un_vtoc.v_nparts = V_NUMPAR;
5811 
5812 	/* Add backup slice */
5813 	un->un_vtoc.v_part[2].p_start = 0;
5814 	un->un_vtoc.v_part[2].p_size  = disksize;
5815 	un->un_vtoc.v_part[2].p_tag   = V_BACKUP;
5816 	un->un_vtoc.v_part[2].p_flag  = V_UNMNT;
5817 
5818 	un->un_map[2].dkl_cylno = 0;
5819 	un->un_map[2].dkl_nblk  = disksize;
5820 	un->un_offset[2] = 0;
5821 
5822 	(void) sprintf(un->un_vtoc.v_asciilabel, "DEFAULT cyl %d alt %d"
5823 	    " hd %d sec %d", un->un_g.dkg_ncyl, un->un_g.dkg_acyl,
5824 	    un->un_g.dkg_nhead, un->un_g.dkg_nsect);
5825 
5826 #else
5827 #error "No VTOC format defined."
5828 #endif
5829 
5830 	un->un_g.dkg_read_reinstruct  = 0;
5831 	un->un_g.dkg_write_reinstruct = 0;
5832 
5833 	un->un_g.dkg_intrlv = 1;
5834 
5835 	un->un_vtoc.v_version = V_VERSION;
5836 	un->un_vtoc.v_sanity  = VTOC_SANE;
5837 
5838 	un->un_f_geometry_is_valid = TRUE;
5839 
5840 	SD_INFO(SD_LOG_COMMON, un,
5841 	    "sd_build_default_label: Default label created: "
5842 	    "cyl: %d\tacyl: %d\tnhead: %d\tnsect: %d\tcap: %d\n",
5843 	    un->un_g.dkg_ncyl, un->un_g.dkg_acyl, un->un_g.dkg_nhead,
5844 	    un->un_g.dkg_nsect, un->un_blockcount);
5845 }
5846 
5847 
5848 #if defined(_FIRMWARE_NEEDS_FDISK)
5849 /*
5850  * Max CHS values, as they are encoded into bytes, for 1022/254/63
5851  */
5852 #define	LBA_MAX_SECT	(63 | ((1022 & 0x300) >> 2))
5853 #define	LBA_MAX_CYL	(1022 & 0xFF)
5854 #define	LBA_MAX_HEAD	(254)
5855 
5856 
5857 /*
5858  *    Function: sd_has_max_chs_vals
5859  *
5860  * Description: Return TRUE if Cylinder-Head-Sector values are all at maximum.
5861  *
5862  *   Arguments: fdp - ptr to CHS info
5863  *
5864  * Return Code: True or false
5865  *
5866  *     Context: Any.
5867  */
5868 
5869 static int
5870 sd_has_max_chs_vals(struct ipart *fdp)
5871 {
5872 	return ((fdp->begcyl  == LBA_MAX_CYL)	&&
5873 	    (fdp->beghead == LBA_MAX_HEAD)	&&
5874 	    (fdp->begsect == LBA_MAX_SECT)	&&
5875 	    (fdp->endcyl  == LBA_MAX_CYL)	&&
5876 	    (fdp->endhead == LBA_MAX_HEAD)	&&
5877 	    (fdp->endsect == LBA_MAX_SECT));
5878 }
5879 #endif
5880 
5881 
5882 /*
5883  *    Function: sd_inq_fill
5884  *
5885  * Description: Print a piece of inquiry data, cleaned up for non-printable
5886  *		characters and stopping at the first space character after
5887  *		the beginning of the passed string;
5888  *
5889  *   Arguments: p - source string
5890  *		l - maximum length to copy
5891  *		s - destination string
5892  *
5893  *     Context: Any.
5894  */
5895 
5896 static void
5897 sd_inq_fill(char *p, int l, char *s)
5898 {
5899 	unsigned i = 0;
5900 	char c;
5901 
5902 	while (i++ < l) {
5903 		if ((c = *p++) < ' ' || c >= 0x7F) {
5904 			c = '*';
5905 		} else if (i != 1 && c == ' ') {
5906 			break;
5907 		}
5908 		*s++ = c;
5909 	}
5910 	*s++ = 0;
5911 }
5912 
5913 
5914 /*
5915  *    Function: sd_register_devid
5916  *
5917  * Description: This routine will obtain the device id information from the
5918  *		target, obtain the serial number, and register the device
5919  *		id with the ddi framework.
5920  *
5921  *   Arguments: devi - the system's dev_info_t for the device.
5922  *		un - driver soft state (unit) structure
5923  *		reservation_flag - indicates if a reservation conflict
5924  *		occurred during attach
5925  *
5926  *     Context: Kernel Thread
5927  */
5928 static void
5929 sd_register_devid(struct sd_lun *un, dev_info_t *devi, int reservation_flag)
5930 {
5931 	int		rval		= 0;
5932 	uchar_t		*inq80		= NULL;
5933 	size_t		inq80_len	= MAX_INQUIRY_SIZE;
5934 	size_t		inq80_resid	= 0;
5935 	uchar_t		*inq83		= NULL;
5936 	size_t		inq83_len	= MAX_INQUIRY_SIZE;
5937 	size_t		inq83_resid	= 0;
5938 
5939 	ASSERT(un != NULL);
5940 	ASSERT(mutex_owned(SD_MUTEX(un)));
5941 	ASSERT((SD_DEVINFO(un)) == devi);
5942 
5943 	/*
5944 	 * This is the case of antiquated Sun disk drives that have the
5945 	 * FAB_DEVID property set in the disk_table.  These drives
5946 	 * manage the devid's by storing them in last 2 available sectors
5947 	 * on the drive and have them fabricated by the ddi layer by calling
5948 	 * ddi_devid_init and passing the DEVID_FAB flag.
5949 	 */
5950 	if (un->un_f_opt_fab_devid == TRUE) {
5951 		/*
5952 		 * Depending on EINVAL isn't reliable, since a reserved disk
5953 		 * may result in invalid geometry, so check to make sure a
5954 		 * reservation conflict did not occur during attach.
5955 		 */
5956 		if ((sd_get_devid(un) == EINVAL) &&
5957 		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
5958 			/*
5959 			 * The devid is invalid AND there is no reservation
5960 			 * conflict.  Fabricate a new devid.
5961 			 */
5962 			(void) sd_create_devid(un);
5963 		}
5964 
5965 		/* Register the devid if it exists */
5966 		if (un->un_devid != NULL) {
5967 			(void) ddi_devid_register(SD_DEVINFO(un),
5968 			    un->un_devid);
5969 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
5970 			    "sd_register_devid: Devid Fabricated\n");
5971 		}
5972 		return;
5973 	}
5974 
5975 	/*
5976 	 * We check the availibility of the World Wide Name (0x83) and Unit
5977 	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
5978 	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
5979 	 * 0x83 is availible, that is the best choice.  Our next choice is
5980 	 * 0x80.  If neither are availible, we munge the devid from the device
5981 	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
5982 	 * to fabricate a devid for non-Sun qualified disks.
5983 	 */
5984 	if (sd_check_vpd_page_support(un) == 0) {
5985 		/* collect page 80 data if available */
5986 		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
5987 
5988 			mutex_exit(SD_MUTEX(un));
5989 			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
5990 			rval = sd_send_scsi_INQUIRY(un, inq80, inq80_len,
5991 			    0x01, 0x80, &inq80_resid);
5992 
5993 			if (rval != 0) {
5994 				kmem_free(inq80, inq80_len);
5995 				inq80 = NULL;
5996 				inq80_len = 0;
5997 			}
5998 			mutex_enter(SD_MUTEX(un));
5999 		}
6000 
6001 		/* collect page 83 data if available */
6002 		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
6003 			mutex_exit(SD_MUTEX(un));
6004 			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
6005 			rval = sd_send_scsi_INQUIRY(un, inq83, inq83_len,
6006 			    0x01, 0x83, &inq83_resid);
6007 
6008 			if (rval != 0) {
6009 				kmem_free(inq83, inq83_len);
6010 				inq83 = NULL;
6011 				inq83_len = 0;
6012 			}
6013 			mutex_enter(SD_MUTEX(un));
6014 		}
6015 	}
6016 
6017 	/* encode best devid possible based on data available */
6018 	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
6019 	    (char *)ddi_driver_name(SD_DEVINFO(un)),
6020 	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
6021 	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
6022 	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
6023 
6024 		/* devid successfully encoded, register devid */
6025 		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
6026 
6027 	} else {
6028 		/*
6029 		 * Unable to encode a devid based on data available.
6030 		 * This is not a Sun qualified disk.  Older Sun disk
6031 		 * drives that have the SD_FAB_DEVID property
6032 		 * set in the disk_table and non Sun qualified
6033 		 * disks are treated in the same manner.  These
6034 		 * drives manage the devid's by storing them in
6035 		 * last 2 available sectors on the drive and
6036 		 * have them fabricated by the ddi layer by
6037 		 * calling ddi_devid_init and passing the
6038 		 * DEVID_FAB flag.
6039 		 * Create a fabricate devid only if there's no
6040 		 * fabricate devid existed.
6041 		 */
6042 		if (sd_get_devid(un) == EINVAL) {
6043 			(void) sd_create_devid(un);
6044 			un->un_f_opt_fab_devid = TRUE;
6045 		}
6046 
6047 		/* Register the devid if it exists */
6048 		if (un->un_devid != NULL) {
6049 			(void) ddi_devid_register(SD_DEVINFO(un),
6050 			    un->un_devid);
6051 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6052 			    "sd_register_devid: devid fabricated using "
6053 			    "ddi framework\n");
6054 		}
6055 	}
6056 
6057 	/* clean up resources */
6058 	if (inq80 != NULL) {
6059 		kmem_free(inq80, inq80_len);
6060 	}
6061 	if (inq83 != NULL) {
6062 		kmem_free(inq83, inq83_len);
6063 	}
6064 }
6065 
6066 static daddr_t
6067 sd_get_devid_block(struct sd_lun *un)
6068 {
6069 	daddr_t			spc, blk, head, cyl;
6070 
6071 	if (un->un_blockcount <= DK_MAX_BLOCKS) {
6072 		/* this geometry doesn't allow us to write a devid */
6073 		if (un->un_g.dkg_acyl < 2) {
6074 			return (-1);
6075 		}
6076 
6077 		/*
6078 		 * Subtract 2 guarantees that the next to last cylinder
6079 		 * is used
6080 		 */
6081 		cyl  = un->un_g.dkg_ncyl  + un->un_g.dkg_acyl - 2;
6082 		spc  = un->un_g.dkg_nhead * un->un_g.dkg_nsect;
6083 		head = un->un_g.dkg_nhead - 1;
6084 		blk  = (cyl * (spc - un->un_g.dkg_apc)) +
6085 		    (head * un->un_g.dkg_nsect) + 1;
6086 	} else {
6087 		if (un->un_reserved != -1) {
6088 			blk = un->un_map[un->un_reserved].dkl_cylno + 1;
6089 		} else {
6090 			return (-1);
6091 		}
6092 	}
6093 	return (blk);
6094 }
6095 
6096 /*
6097  *    Function: sd_get_devid
6098  *
6099  * Description: This routine will return 0 if a valid device id has been
6100  *		obtained from the target and stored in the soft state. If a
6101  *		valid device id has not been previously read and stored, a
6102  *		read attempt will be made.
6103  *
6104  *   Arguments: un - driver soft state (unit) structure
6105  *
6106  * Return Code: 0 if we successfully get the device id
6107  *
6108  *     Context: Kernel Thread
6109  */
6110 
6111 static int
6112 sd_get_devid(struct sd_lun *un)
6113 {
6114 	struct dk_devid		*dkdevid;
6115 	ddi_devid_t		tmpid;
6116 	uint_t			*ip;
6117 	size_t			sz;
6118 	daddr_t			blk;
6119 	int			status;
6120 	int			chksum;
6121 	int			i;
6122 	size_t			buffer_size;
6123 
6124 	ASSERT(un != NULL);
6125 	ASSERT(mutex_owned(SD_MUTEX(un)));
6126 
6127 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
6128 	    un);
6129 
6130 	if (un->un_devid != NULL) {
6131 		return (0);
6132 	}
6133 
6134 	blk = sd_get_devid_block(un);
6135 	if (blk < 0)
6136 		return (EINVAL);
6137 
6138 	/*
6139 	 * Read and verify device id, stored in the reserved cylinders at the
6140 	 * end of the disk. Backup label is on the odd sectors of the last
6141 	 * track of the last cylinder. Device id will be on track of the next
6142 	 * to last cylinder.
6143 	 */
6144 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
6145 	mutex_exit(SD_MUTEX(un));
6146 	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
6147 	status = sd_send_scsi_READ(un, dkdevid, buffer_size, blk,
6148 	    SD_PATH_DIRECT);
6149 	if (status != 0) {
6150 		goto error;
6151 	}
6152 
6153 	/* Validate the revision */
6154 	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
6155 	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
6156 		status = EINVAL;
6157 		goto error;
6158 	}
6159 
6160 	/* Calculate the checksum */
6161 	chksum = 0;
6162 	ip = (uint_t *)dkdevid;
6163 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
6164 	    i++) {
6165 		chksum ^= ip[i];
6166 	}
6167 
6168 	/* Compare the checksums */
6169 	if (DKD_GETCHKSUM(dkdevid) != chksum) {
6170 		status = EINVAL;
6171 		goto error;
6172 	}
6173 
6174 	/* Validate the device id */
6175 	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
6176 		status = EINVAL;
6177 		goto error;
6178 	}
6179 
6180 	/*
6181 	 * Store the device id in the driver soft state
6182 	 */
6183 	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
6184 	tmpid = kmem_alloc(sz, KM_SLEEP);
6185 
6186 	mutex_enter(SD_MUTEX(un));
6187 
6188 	un->un_devid = tmpid;
6189 	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
6190 
6191 	kmem_free(dkdevid, buffer_size);
6192 
6193 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
6194 
6195 	return (status);
6196 error:
6197 	mutex_enter(SD_MUTEX(un));
6198 	kmem_free(dkdevid, buffer_size);
6199 	return (status);
6200 }
6201 
6202 
6203 /*
6204  *    Function: sd_create_devid
6205  *
6206  * Description: This routine will fabricate the device id and write it
6207  *		to the disk.
6208  *
6209  *   Arguments: un - driver soft state (unit) structure
6210  *
6211  * Return Code: value of the fabricated device id
6212  *
6213  *     Context: Kernel Thread
6214  */
6215 
6216 static ddi_devid_t
6217 sd_create_devid(struct sd_lun *un)
6218 {
6219 	ASSERT(un != NULL);
6220 
6221 	/* Fabricate the devid */
6222 	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
6223 	    == DDI_FAILURE) {
6224 		return (NULL);
6225 	}
6226 
6227 	/* Write the devid to disk */
6228 	if (sd_write_deviceid(un) != 0) {
6229 		ddi_devid_free(un->un_devid);
6230 		un->un_devid = NULL;
6231 	}
6232 
6233 	return (un->un_devid);
6234 }
6235 
6236 
6237 /*
6238  *    Function: sd_write_deviceid
6239  *
6240  * Description: This routine will write the device id to the disk
6241  *		reserved sector.
6242  *
6243  *   Arguments: un - driver soft state (unit) structure
6244  *
6245  * Return Code: EINVAL
6246  *		value returned by sd_send_scsi_cmd
6247  *
6248  *     Context: Kernel Thread
6249  */
6250 
6251 static int
6252 sd_write_deviceid(struct sd_lun *un)
6253 {
6254 	struct dk_devid		*dkdevid;
6255 	daddr_t			blk;
6256 	uint_t			*ip, chksum;
6257 	int			status;
6258 	int			i;
6259 
6260 	ASSERT(mutex_owned(SD_MUTEX(un)));
6261 
6262 	blk = sd_get_devid_block(un);
6263 	if (blk < 0)
6264 		return (-1);
6265 	mutex_exit(SD_MUTEX(un));
6266 
6267 	/* Allocate the buffer */
6268 	dkdevid = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
6269 
6270 	/* Fill in the revision */
6271 	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
6272 	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
6273 
6274 	/* Copy in the device id */
6275 	mutex_enter(SD_MUTEX(un));
6276 	bcopy(un->un_devid, &dkdevid->dkd_devid,
6277 	    ddi_devid_sizeof(un->un_devid));
6278 	mutex_exit(SD_MUTEX(un));
6279 
6280 	/* Calculate the checksum */
6281 	chksum = 0;
6282 	ip = (uint_t *)dkdevid;
6283 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
6284 	    i++) {
6285 		chksum ^= ip[i];
6286 	}
6287 
6288 	/* Fill-in checksum */
6289 	DKD_FORMCHKSUM(chksum, dkdevid);
6290 
6291 	/* Write the reserved sector */
6292 	status = sd_send_scsi_WRITE(un, dkdevid, un->un_sys_blocksize, blk,
6293 	    SD_PATH_DIRECT);
6294 
6295 	kmem_free(dkdevid, un->un_sys_blocksize);
6296 
6297 	mutex_enter(SD_MUTEX(un));
6298 	return (status);
6299 }
6300 
6301 
6302 /*
6303  *    Function: sd_check_vpd_page_support
6304  *
6305  * Description: This routine sends an inquiry command with the EVPD bit set and
6306  *		a page code of 0x00 to the device. It is used to determine which
6307  *		vital product pages are availible to find the devid. We are
6308  *		looking for pages 0x83 or 0x80.  If we return a negative 1, the
6309  *		device does not support that command.
6310  *
6311  *   Arguments: un  - driver soft state (unit) structure
6312  *
6313  * Return Code: 0 - success
6314  *		1 - check condition
6315  *
6316  *     Context: This routine can sleep.
6317  */
6318 
6319 static int
6320 sd_check_vpd_page_support(struct sd_lun *un)
6321 {
6322 	uchar_t	*page_list	= NULL;
6323 	uchar_t	page_length	= 0xff;	/* Use max possible length */
6324 	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
6325 	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
6326 	int    	rval		= 0;
6327 	int	counter;
6328 
6329 	ASSERT(un != NULL);
6330 	ASSERT(mutex_owned(SD_MUTEX(un)));
6331 
6332 	mutex_exit(SD_MUTEX(un));
6333 
6334 	/*
6335 	 * We'll set the page length to the maximum to save figuring it out
6336 	 * with an additional call.
6337 	 */
6338 	page_list =  kmem_zalloc(page_length, KM_SLEEP);
6339 
6340 	rval = sd_send_scsi_INQUIRY(un, page_list, page_length, evpd,
6341 	    page_code, NULL);
6342 
6343 	mutex_enter(SD_MUTEX(un));
6344 
6345 	/*
6346 	 * Now we must validate that the device accepted the command, as some
6347 	 * drives do not support it.  If the drive does support it, we will
6348 	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
6349 	 * not, we return -1.
6350 	 */
6351 	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
6352 		/* Loop to find one of the 2 pages we need */
6353 		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
6354 
6355 		/*
6356 		 * Pages are returned in ascending order, and 0x83 is what we
6357 		 * are hoping for.
6358 		 */
6359 		while ((page_list[counter] <= 0x83) &&
6360 		    (counter <= (page_list[VPD_PAGE_LENGTH] +
6361 		    VPD_HEAD_OFFSET))) {
6362 			/*
6363 			 * Add 3 because page_list[3] is the number of
6364 			 * pages minus 3
6365 			 */
6366 
6367 			switch (page_list[counter]) {
6368 			case 0x00:
6369 				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
6370 				break;
6371 			case 0x80:
6372 				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
6373 				break;
6374 			case 0x81:
6375 				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
6376 				break;
6377 			case 0x82:
6378 				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
6379 				break;
6380 			case 0x83:
6381 				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
6382 				break;
6383 			}
6384 			counter++;
6385 		}
6386 
6387 	} else {
6388 		rval = -1;
6389 
6390 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6391 		    "sd_check_vpd_page_support: This drive does not implement "
6392 		    "VPD pages.\n");
6393 	}
6394 
6395 	kmem_free(page_list, page_length);
6396 
6397 	return (rval);
6398 }
6399 
6400 
6401 /*
6402  *    Function: sd_setup_pm
6403  *
6404  * Description: Initialize Power Management on the device
6405  *
6406  *     Context: Kernel Thread
6407  */
6408 
6409 static void
6410 sd_setup_pm(struct sd_lun *un, dev_info_t *devi)
6411 {
6412 	uint_t	log_page_size;
6413 	uchar_t	*log_page_data;
6414 	int	rval;
6415 
6416 	/*
6417 	 * Since we are called from attach, holding a mutex for
6418 	 * un is unnecessary. Because some of the routines called
6419 	 * from here require SD_MUTEX to not be held, assert this
6420 	 * right up front.
6421 	 */
6422 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6423 	/*
6424 	 * Since the sd device does not have the 'reg' property,
6425 	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
6426 	 * The following code is to tell cpr that this device
6427 	 * DOES need to be suspended and resumed.
6428 	 */
6429 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
6430 	    "pm-hardware-state", "needs-suspend-resume");
6431 
6432 	/*
6433 	 * This complies with the new power management framework
6434 	 * for certain desktop machines. Create the pm_components
6435 	 * property as a string array property.
6436 	 */
6437 	if (un->un_f_pm_supported) {
6438 		/*
6439 		 * not all devices have a motor, try it first.
6440 		 * some devices may return ILLEGAL REQUEST, some
6441 		 * will hang
6442 		 * The following START_STOP_UNIT is used to check if target
6443 		 * device has a motor.
6444 		 */
6445 		un->un_f_start_stop_supported = TRUE;
6446 		if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
6447 		    SD_PATH_DIRECT) != 0) {
6448 			un->un_f_start_stop_supported = FALSE;
6449 		}
6450 
6451 		/*
6452 		 * create pm properties anyways otherwise the parent can't
6453 		 * go to sleep
6454 		 */
6455 		(void) sd_create_pm_components(devi, un);
6456 		un->un_f_pm_is_enabled = TRUE;
6457 		return;
6458 	}
6459 
6460 	if (!un->un_f_log_sense_supported) {
6461 		un->un_power_level = SD_SPINDLE_ON;
6462 		un->un_f_pm_is_enabled = FALSE;
6463 		return;
6464 	}
6465 
6466 	rval = sd_log_page_supported(un, START_STOP_CYCLE_PAGE);
6467 
6468 #ifdef	SDDEBUG
6469 	if (sd_force_pm_supported) {
6470 		/* Force a successful result */
6471 		rval = 1;
6472 	}
6473 #endif
6474 
6475 	/*
6476 	 * If the start-stop cycle counter log page is not supported
6477 	 * or if the pm-capable property is SD_PM_CAPABLE_FALSE (0)
6478 	 * then we should not create the pm_components property.
6479 	 */
6480 	if (rval == -1) {
6481 		/*
6482 		 * Error.
6483 		 * Reading log sense failed, most likely this is
6484 		 * an older drive that does not support log sense.
6485 		 * If this fails auto-pm is not supported.
6486 		 */
6487 		un->un_power_level = SD_SPINDLE_ON;
6488 		un->un_f_pm_is_enabled = FALSE;
6489 
6490 	} else if (rval == 0) {
6491 		/*
6492 		 * Page not found.
6493 		 * The start stop cycle counter is implemented as page
6494 		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
6495 		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
6496 		 */
6497 		if (sd_log_page_supported(un, START_STOP_CYCLE_VU_PAGE) == 1) {
6498 			/*
6499 			 * Page found, use this one.
6500 			 */
6501 			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
6502 			un->un_f_pm_is_enabled = TRUE;
6503 		} else {
6504 			/*
6505 			 * Error or page not found.
6506 			 * auto-pm is not supported for this device.
6507 			 */
6508 			un->un_power_level = SD_SPINDLE_ON;
6509 			un->un_f_pm_is_enabled = FALSE;
6510 		}
6511 	} else {
6512 		/*
6513 		 * Page found, use it.
6514 		 */
6515 		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
6516 		un->un_f_pm_is_enabled = TRUE;
6517 	}
6518 
6519 
6520 	if (un->un_f_pm_is_enabled == TRUE) {
6521 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
6522 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
6523 
6524 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
6525 		    log_page_size, un->un_start_stop_cycle_page,
6526 		    0x01, 0, SD_PATH_DIRECT);
6527 #ifdef	SDDEBUG
6528 		if (sd_force_pm_supported) {
6529 			/* Force a successful result */
6530 			rval = 0;
6531 		}
6532 #endif
6533 
6534 		/*
6535 		 * If the Log sense for Page( Start/stop cycle counter page)
6536 		 * succeeds, then power managment is supported and we can
6537 		 * enable auto-pm.
6538 		 */
6539 		if (rval == 0)  {
6540 			(void) sd_create_pm_components(devi, un);
6541 		} else {
6542 			un->un_power_level = SD_SPINDLE_ON;
6543 			un->un_f_pm_is_enabled = FALSE;
6544 		}
6545 
6546 		kmem_free(log_page_data, log_page_size);
6547 	}
6548 }
6549 
6550 
6551 /*
6552  *    Function: sd_create_pm_components
6553  *
6554  * Description: Initialize PM property.
6555  *
6556  *     Context: Kernel thread context
6557  */
6558 
6559 static void
6560 sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
6561 {
6562 	char *pm_comp[] = { "NAME=spindle-motor", "0=off", "1=on", NULL };
6563 
6564 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6565 
6566 	if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
6567 	    "pm-components", pm_comp, 3) == DDI_PROP_SUCCESS) {
6568 		/*
6569 		 * When components are initially created they are idle,
6570 		 * power up any non-removables.
6571 		 * Note: the return value of pm_raise_power can't be used
6572 		 * for determining if PM should be enabled for this device.
6573 		 * Even if you check the return values and remove this
6574 		 * property created above, the PM framework will not honor the
6575 		 * change after the first call to pm_raise_power. Hence,
6576 		 * removal of that property does not help if pm_raise_power
6577 		 * fails. In the case of removable media, the start/stop
6578 		 * will fail if the media is not present.
6579 		 */
6580 		if (un->un_f_attach_spinup && (pm_raise_power(SD_DEVINFO(un), 0,
6581 		    SD_SPINDLE_ON) == DDI_SUCCESS)) {
6582 			mutex_enter(SD_MUTEX(un));
6583 			un->un_power_level = SD_SPINDLE_ON;
6584 			mutex_enter(&un->un_pm_mutex);
6585 			/* Set to on and not busy. */
6586 			un->un_pm_count = 0;
6587 		} else {
6588 			mutex_enter(SD_MUTEX(un));
6589 			un->un_power_level = SD_SPINDLE_OFF;
6590 			mutex_enter(&un->un_pm_mutex);
6591 			/* Set to off. */
6592 			un->un_pm_count = -1;
6593 		}
6594 		mutex_exit(&un->un_pm_mutex);
6595 		mutex_exit(SD_MUTEX(un));
6596 	} else {
6597 		un->un_power_level = SD_SPINDLE_ON;
6598 		un->un_f_pm_is_enabled = FALSE;
6599 	}
6600 }
6601 
6602 
6603 /*
6604  *    Function: sd_ddi_suspend
6605  *
6606  * Description: Performs system power-down operations. This includes
6607  *		setting the drive state to indicate its suspended so
6608  *		that no new commands will be accepted. Also, wait for
6609  *		all commands that are in transport or queued to a timer
6610  *		for retry to complete. All timeout threads are cancelled.
6611  *
6612  * Return Code: DDI_FAILURE or DDI_SUCCESS
6613  *
6614  *     Context: Kernel thread context
6615  */
6616 
6617 static int
6618 sd_ddi_suspend(dev_info_t *devi)
6619 {
6620 	struct	sd_lun	*un;
6621 	clock_t		wait_cmds_complete;
6622 
6623 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6624 	if (un == NULL) {
6625 		return (DDI_FAILURE);
6626 	}
6627 
6628 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
6629 
6630 	mutex_enter(SD_MUTEX(un));
6631 
6632 	/* Return success if the device is already suspended. */
6633 	if (un->un_state == SD_STATE_SUSPENDED) {
6634 		mutex_exit(SD_MUTEX(un));
6635 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6636 		    "device already suspended, exiting\n");
6637 		return (DDI_SUCCESS);
6638 	}
6639 
6640 	/* Return failure if the device is being used by HA */
6641 	if (un->un_resvd_status &
6642 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
6643 		mutex_exit(SD_MUTEX(un));
6644 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6645 		    "device in use by HA, exiting\n");
6646 		return (DDI_FAILURE);
6647 	}
6648 
6649 	/*
6650 	 * Return failure if the device is in a resource wait
6651 	 * or power changing state.
6652 	 */
6653 	if ((un->un_state == SD_STATE_RWAIT) ||
6654 	    (un->un_state == SD_STATE_PM_CHANGING)) {
6655 		mutex_exit(SD_MUTEX(un));
6656 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6657 		    "device in resource wait state, exiting\n");
6658 		return (DDI_FAILURE);
6659 	}
6660 
6661 
6662 	un->un_save_state = un->un_last_state;
6663 	New_state(un, SD_STATE_SUSPENDED);
6664 
6665 	/*
6666 	 * Wait for all commands that are in transport or queued to a timer
6667 	 * for retry to complete.
6668 	 *
6669 	 * While waiting, no new commands will be accepted or sent because of
6670 	 * the new state we set above.
6671 	 *
6672 	 * Wait till current operation has completed. If we are in the resource
6673 	 * wait state (with an intr outstanding) then we need to wait till the
6674 	 * intr completes and starts the next cmd. We want to wait for
6675 	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
6676 	 */
6677 	wait_cmds_complete = ddi_get_lbolt() +
6678 	    (sd_wait_cmds_complete * drv_usectohz(1000000));
6679 
6680 	while (un->un_ncmds_in_transport != 0) {
6681 		/*
6682 		 * Fail if commands do not finish in the specified time.
6683 		 */
6684 		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
6685 		    wait_cmds_complete) == -1) {
6686 			/*
6687 			 * Undo the state changes made above. Everything
6688 			 * must go back to it's original value.
6689 			 */
6690 			Restore_state(un);
6691 			un->un_last_state = un->un_save_state;
6692 			/* Wake up any threads that might be waiting. */
6693 			cv_broadcast(&un->un_suspend_cv);
6694 			mutex_exit(SD_MUTEX(un));
6695 			SD_ERROR(SD_LOG_IO_PM, un,
6696 			    "sd_ddi_suspend: failed due to outstanding cmds\n");
6697 			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
6698 			return (DDI_FAILURE);
6699 		}
6700 	}
6701 
6702 	/*
6703 	 * Cancel SCSI watch thread and timeouts, if any are active
6704 	 */
6705 
6706 	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
6707 		opaque_t temp_token = un->un_swr_token;
6708 		mutex_exit(SD_MUTEX(un));
6709 		scsi_watch_suspend(temp_token);
6710 		mutex_enter(SD_MUTEX(un));
6711 	}
6712 
6713 	if (un->un_reset_throttle_timeid != NULL) {
6714 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
6715 		un->un_reset_throttle_timeid = NULL;
6716 		mutex_exit(SD_MUTEX(un));
6717 		(void) untimeout(temp_id);
6718 		mutex_enter(SD_MUTEX(un));
6719 	}
6720 
6721 	if (un->un_dcvb_timeid != NULL) {
6722 		timeout_id_t temp_id = un->un_dcvb_timeid;
6723 		un->un_dcvb_timeid = NULL;
6724 		mutex_exit(SD_MUTEX(un));
6725 		(void) untimeout(temp_id);
6726 		mutex_enter(SD_MUTEX(un));
6727 	}
6728 
6729 	mutex_enter(&un->un_pm_mutex);
6730 	if (un->un_pm_timeid != NULL) {
6731 		timeout_id_t temp_id = un->un_pm_timeid;
6732 		un->un_pm_timeid = NULL;
6733 		mutex_exit(&un->un_pm_mutex);
6734 		mutex_exit(SD_MUTEX(un));
6735 		(void) untimeout(temp_id);
6736 		mutex_enter(SD_MUTEX(un));
6737 	} else {
6738 		mutex_exit(&un->un_pm_mutex);
6739 	}
6740 
6741 	if (un->un_retry_timeid != NULL) {
6742 		timeout_id_t temp_id = un->un_retry_timeid;
6743 		un->un_retry_timeid = NULL;
6744 		mutex_exit(SD_MUTEX(un));
6745 		(void) untimeout(temp_id);
6746 		mutex_enter(SD_MUTEX(un));
6747 	}
6748 
6749 	if (un->un_direct_priority_timeid != NULL) {
6750 		timeout_id_t temp_id = un->un_direct_priority_timeid;
6751 		un->un_direct_priority_timeid = NULL;
6752 		mutex_exit(SD_MUTEX(un));
6753 		(void) untimeout(temp_id);
6754 		mutex_enter(SD_MUTEX(un));
6755 	}
6756 
6757 	if (un->un_f_is_fibre == TRUE) {
6758 		/*
6759 		 * Remove callbacks for insert and remove events
6760 		 */
6761 		if (un->un_insert_event != NULL) {
6762 			mutex_exit(SD_MUTEX(un));
6763 			(void) ddi_remove_event_handler(un->un_insert_cb_id);
6764 			mutex_enter(SD_MUTEX(un));
6765 			un->un_insert_event = NULL;
6766 		}
6767 
6768 		if (un->un_remove_event != NULL) {
6769 			mutex_exit(SD_MUTEX(un));
6770 			(void) ddi_remove_event_handler(un->un_remove_cb_id);
6771 			mutex_enter(SD_MUTEX(un));
6772 			un->un_remove_event = NULL;
6773 		}
6774 	}
6775 
6776 	mutex_exit(SD_MUTEX(un));
6777 
6778 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
6779 
6780 	return (DDI_SUCCESS);
6781 }
6782 
6783 
6784 /*
6785  *    Function: sd_ddi_pm_suspend
6786  *
6787  * Description: Set the drive state to low power.
6788  *		Someone else is required to actually change the drive
6789  *		power level.
6790  *
6791  *   Arguments: un - driver soft state (unit) structure
6792  *
6793  * Return Code: DDI_FAILURE or DDI_SUCCESS
6794  *
6795  *     Context: Kernel thread context
6796  */
6797 
6798 static int
6799 sd_ddi_pm_suspend(struct sd_lun *un)
6800 {
6801 	ASSERT(un != NULL);
6802 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: entry\n");
6803 
6804 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6805 	mutex_enter(SD_MUTEX(un));
6806 
6807 	/*
6808 	 * Exit if power management is not enabled for this device, or if
6809 	 * the device is being used by HA.
6810 	 */
6811 	if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
6812 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
6813 		mutex_exit(SD_MUTEX(un));
6814 		SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exiting\n");
6815 		return (DDI_SUCCESS);
6816 	}
6817 
6818 	SD_INFO(SD_LOG_POWER, un, "sd_ddi_pm_suspend: un_ncmds_in_driver=%ld\n",
6819 	    un->un_ncmds_in_driver);
6820 
6821 	/*
6822 	 * See if the device is not busy, ie.:
6823 	 *    - we have no commands in the driver for this device
6824 	 *    - not waiting for resources
6825 	 */
6826 	if ((un->un_ncmds_in_driver == 0) &&
6827 	    (un->un_state != SD_STATE_RWAIT)) {
6828 		/*
6829 		 * The device is not busy, so it is OK to go to low power state.
6830 		 * Indicate low power, but rely on someone else to actually
6831 		 * change it.
6832 		 */
6833 		mutex_enter(&un->un_pm_mutex);
6834 		un->un_pm_count = -1;
6835 		mutex_exit(&un->un_pm_mutex);
6836 		un->un_power_level = SD_SPINDLE_OFF;
6837 	}
6838 
6839 	mutex_exit(SD_MUTEX(un));
6840 
6841 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exit\n");
6842 
6843 	return (DDI_SUCCESS);
6844 }
6845 
6846 
6847 /*
6848  *    Function: sd_ddi_resume
6849  *
6850  * Description: Performs system power-up operations..
6851  *
6852  * Return Code: DDI_SUCCESS
6853  *		DDI_FAILURE
6854  *
6855  *     Context: Kernel thread context
6856  */
6857 
6858 static int
6859 sd_ddi_resume(dev_info_t *devi)
6860 {
6861 	struct	sd_lun	*un;
6862 
6863 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6864 	if (un == NULL) {
6865 		return (DDI_FAILURE);
6866 	}
6867 
6868 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
6869 
6870 	mutex_enter(SD_MUTEX(un));
6871 	Restore_state(un);
6872 
6873 	/*
6874 	 * Restore the state which was saved to give the
6875 	 * the right state in un_last_state
6876 	 */
6877 	un->un_last_state = un->un_save_state;
6878 	/*
6879 	 * Note: throttle comes back at full.
6880 	 * Also note: this MUST be done before calling pm_raise_power
6881 	 * otherwise the system can get hung in biowait. The scenario where
6882 	 * this'll happen is under cpr suspend. Writing of the system
6883 	 * state goes through sddump, which writes 0 to un_throttle. If
6884 	 * writing the system state then fails, example if the partition is
6885 	 * too small, then cpr attempts a resume. If throttle isn't restored
6886 	 * from the saved value until after calling pm_raise_power then
6887 	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
6888 	 * in biowait.
6889 	 */
6890 	un->un_throttle = un->un_saved_throttle;
6891 
6892 	/*
6893 	 * The chance of failure is very rare as the only command done in power
6894 	 * entry point is START command when you transition from 0->1 or
6895 	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
6896 	 * which suspend was done. Ignore the return value as the resume should
6897 	 * not be failed. In the case of removable media the media need not be
6898 	 * inserted and hence there is a chance that raise power will fail with
6899 	 * media not present.
6900 	 */
6901 	if (un->un_f_attach_spinup) {
6902 		mutex_exit(SD_MUTEX(un));
6903 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
6904 		mutex_enter(SD_MUTEX(un));
6905 	}
6906 
6907 	/*
6908 	 * Don't broadcast to the suspend cv and therefore possibly
6909 	 * start I/O until after power has been restored.
6910 	 */
6911 	cv_broadcast(&un->un_suspend_cv);
6912 	cv_broadcast(&un->un_state_cv);
6913 
6914 	/* restart thread */
6915 	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
6916 		scsi_watch_resume(un->un_swr_token);
6917 	}
6918 
6919 #if (defined(__fibre))
6920 	if (un->un_f_is_fibre == TRUE) {
6921 		/*
6922 		 * Add callbacks for insert and remove events
6923 		 */
6924 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
6925 			sd_init_event_callbacks(un);
6926 		}
6927 	}
6928 #endif
6929 
6930 	/*
6931 	 * Transport any pending commands to the target.
6932 	 *
6933 	 * If this is a low-activity device commands in queue will have to wait
6934 	 * until new commands come in, which may take awhile. Also, we
6935 	 * specifically don't check un_ncmds_in_transport because we know that
6936 	 * there really are no commands in progress after the unit was
6937 	 * suspended and we could have reached the throttle level, been
6938 	 * suspended, and have no new commands coming in for awhile. Highly
6939 	 * unlikely, but so is the low-activity disk scenario.
6940 	 */
6941 	ddi_xbuf_dispatch(un->un_xbuf_attr);
6942 
6943 	sd_start_cmds(un, NULL);
6944 	mutex_exit(SD_MUTEX(un));
6945 
6946 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
6947 
6948 	return (DDI_SUCCESS);
6949 }
6950 
6951 
6952 /*
6953  *    Function: sd_ddi_pm_resume
6954  *
6955  * Description: Set the drive state to powered on.
6956  *		Someone else is required to actually change the drive
6957  *		power level.
6958  *
6959  *   Arguments: un - driver soft state (unit) structure
6960  *
6961  * Return Code: DDI_SUCCESS
6962  *
6963  *     Context: Kernel thread context
6964  */
6965 
6966 static int
6967 sd_ddi_pm_resume(struct sd_lun *un)
6968 {
6969 	ASSERT(un != NULL);
6970 
6971 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6972 	mutex_enter(SD_MUTEX(un));
6973 	un->un_power_level = SD_SPINDLE_ON;
6974 
6975 	ASSERT(!mutex_owned(&un->un_pm_mutex));
6976 	mutex_enter(&un->un_pm_mutex);
6977 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
6978 		un->un_pm_count++;
6979 		ASSERT(un->un_pm_count == 0);
6980 		/*
6981 		 * Note: no longer do the cv_broadcast on un_suspend_cv. The
6982 		 * un_suspend_cv is for a system resume, not a power management
6983 		 * device resume. (4297749)
6984 		 *	 cv_broadcast(&un->un_suspend_cv);
6985 		 */
6986 	}
6987 	mutex_exit(&un->un_pm_mutex);
6988 	mutex_exit(SD_MUTEX(un));
6989 
6990 	return (DDI_SUCCESS);
6991 }
6992 
6993 
6994 /*
6995  *    Function: sd_pm_idletimeout_handler
6996  *
6997  * Description: A timer routine that's active only while a device is busy.
6998  *		The purpose is to extend slightly the pm framework's busy
6999  *		view of the device to prevent busy/idle thrashing for
7000  *		back-to-back commands. Do this by comparing the current time
7001  *		to the time at which the last command completed and when the
7002  *		difference is greater than sd_pm_idletime, call
7003  *		pm_idle_component. In addition to indicating idle to the pm
7004  *		framework, update the chain type to again use the internal pm
7005  *		layers of the driver.
7006  *
7007  *   Arguments: arg - driver soft state (unit) structure
7008  *
7009  *     Context: Executes in a timeout(9F) thread context
7010  */
7011 
7012 static void
7013 sd_pm_idletimeout_handler(void *arg)
7014 {
7015 	struct sd_lun *un = arg;
7016 
7017 	time_t	now;
7018 
7019 	mutex_enter(&sd_detach_mutex);
7020 	if (un->un_detach_count != 0) {
7021 		/* Abort if the instance is detaching */
7022 		mutex_exit(&sd_detach_mutex);
7023 		return;
7024 	}
7025 	mutex_exit(&sd_detach_mutex);
7026 
7027 	now = ddi_get_time();
7028 	/*
7029 	 * Grab both mutexes, in the proper order, since we're accessing
7030 	 * both PM and softstate variables.
7031 	 */
7032 	mutex_enter(SD_MUTEX(un));
7033 	mutex_enter(&un->un_pm_mutex);
7034 	if (((now - un->un_pm_idle_time) > sd_pm_idletime) &&
7035 	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
7036 		/*
7037 		 * Update the chain types.
7038 		 * This takes affect on the next new command received.
7039 		 */
7040 		if (un->un_f_non_devbsize_supported) {
7041 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
7042 		} else {
7043 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
7044 		}
7045 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
7046 
7047 		SD_TRACE(SD_LOG_IO_PM, un,
7048 		    "sd_pm_idletimeout_handler: idling device\n");
7049 		(void) pm_idle_component(SD_DEVINFO(un), 0);
7050 		un->un_pm_idle_timeid = NULL;
7051 	} else {
7052 		un->un_pm_idle_timeid =
7053 			timeout(sd_pm_idletimeout_handler, un,
7054 			(drv_usectohz((clock_t)300000))); /* 300 ms. */
7055 	}
7056 	mutex_exit(&un->un_pm_mutex);
7057 	mutex_exit(SD_MUTEX(un));
7058 }
7059 
7060 
7061 /*
7062  *    Function: sd_pm_timeout_handler
7063  *
7064  * Description: Callback to tell framework we are idle.
7065  *
7066  *     Context: timeout(9f) thread context.
7067  */
7068 
7069 static void
7070 sd_pm_timeout_handler(void *arg)
7071 {
7072 	struct sd_lun *un = arg;
7073 
7074 	(void) pm_idle_component(SD_DEVINFO(un), 0);
7075 	mutex_enter(&un->un_pm_mutex);
7076 	un->un_pm_timeid = NULL;
7077 	mutex_exit(&un->un_pm_mutex);
7078 }
7079 
7080 
7081 /*
7082  *    Function: sdpower
7083  *
7084  * Description: PM entry point.
7085  *
7086  * Return Code: DDI_SUCCESS
7087  *		DDI_FAILURE
7088  *
7089  *     Context: Kernel thread context
7090  */
7091 
7092 static int
7093 sdpower(dev_info_t *devi, int component, int level)
7094 {
7095 	struct sd_lun	*un;
7096 	int		instance;
7097 	int		rval = DDI_SUCCESS;
7098 	uint_t		i, log_page_size, maxcycles, ncycles;
7099 	uchar_t		*log_page_data;
7100 	int		log_sense_page;
7101 	int		medium_present;
7102 	time_t		intvlp;
7103 	dev_t		dev;
7104 	struct pm_trans_data	sd_pm_tran_data;
7105 	uchar_t		save_state;
7106 	int		sval;
7107 	uchar_t		state_before_pm;
7108 	int		got_semaphore_here;
7109 
7110 	instance = ddi_get_instance(devi);
7111 
7112 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
7113 	    (SD_SPINDLE_OFF > level) || (level > SD_SPINDLE_ON) ||
7114 	    component != 0) {
7115 		return (DDI_FAILURE);
7116 	}
7117 
7118 	dev = sd_make_device(SD_DEVINFO(un));
7119 
7120 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
7121 
7122 	/*
7123 	 * Must synchronize power down with close.
7124 	 * Attempt to decrement/acquire the open/close semaphore,
7125 	 * but do NOT wait on it. If it's not greater than zero,
7126 	 * ie. it can't be decremented without waiting, then
7127 	 * someone else, either open or close, already has it
7128 	 * and the try returns 0. Use that knowledge here to determine
7129 	 * if it's OK to change the device power level.
7130 	 * Also, only increment it on exit if it was decremented, ie. gotten,
7131 	 * here.
7132 	 */
7133 	got_semaphore_here = sema_tryp(&un->un_semoclose);
7134 
7135 	mutex_enter(SD_MUTEX(un));
7136 
7137 	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
7138 	    un->un_ncmds_in_driver);
7139 
7140 	/*
7141 	 * If un_ncmds_in_driver is non-zero it indicates commands are
7142 	 * already being processed in the driver, or if the semaphore was
7143 	 * not gotten here it indicates an open or close is being processed.
7144 	 * At the same time somebody is requesting to go low power which
7145 	 * can't happen, therefore we need to return failure.
7146 	 */
7147 	if ((level == SD_SPINDLE_OFF) &&
7148 	    ((un->un_ncmds_in_driver != 0) || (got_semaphore_here == 0))) {
7149 		mutex_exit(SD_MUTEX(un));
7150 
7151 		if (got_semaphore_here != 0) {
7152 			sema_v(&un->un_semoclose);
7153 		}
7154 		SD_TRACE(SD_LOG_IO_PM, un,
7155 		    "sdpower: exit, device has queued cmds.\n");
7156 		return (DDI_FAILURE);
7157 	}
7158 
7159 	/*
7160 	 * if it is OFFLINE that means the disk is completely dead
7161 	 * in our case we have to put the disk in on or off by sending commands
7162 	 * Of course that will fail anyway so return back here.
7163 	 *
7164 	 * Power changes to a device that's OFFLINE or SUSPENDED
7165 	 * are not allowed.
7166 	 */
7167 	if ((un->un_state == SD_STATE_OFFLINE) ||
7168 	    (un->un_state == SD_STATE_SUSPENDED)) {
7169 		mutex_exit(SD_MUTEX(un));
7170 
7171 		if (got_semaphore_here != 0) {
7172 			sema_v(&un->un_semoclose);
7173 		}
7174 		SD_TRACE(SD_LOG_IO_PM, un,
7175 		    "sdpower: exit, device is off-line.\n");
7176 		return (DDI_FAILURE);
7177 	}
7178 
7179 	/*
7180 	 * Change the device's state to indicate it's power level
7181 	 * is being changed. Do this to prevent a power off in the
7182 	 * middle of commands, which is especially bad on devices
7183 	 * that are really powered off instead of just spun down.
7184 	 */
7185 	state_before_pm = un->un_state;
7186 	un->un_state = SD_STATE_PM_CHANGING;
7187 
7188 	mutex_exit(SD_MUTEX(un));
7189 
7190 	/*
7191 	 * If "pm-capable" property is set to TRUE by HBA drivers,
7192 	 * bypass the following checking, otherwise, check the log
7193 	 * sense information for this device
7194 	 */
7195 	if ((level == SD_SPINDLE_OFF) && un->un_f_log_sense_supported) {
7196 		/*
7197 		 * Get the log sense information to understand whether the
7198 		 * the powercycle counts have gone beyond the threshhold.
7199 		 */
7200 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
7201 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
7202 
7203 		mutex_enter(SD_MUTEX(un));
7204 		log_sense_page = un->un_start_stop_cycle_page;
7205 		mutex_exit(SD_MUTEX(un));
7206 
7207 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
7208 		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
7209 #ifdef	SDDEBUG
7210 		if (sd_force_pm_supported) {
7211 			/* Force a successful result */
7212 			rval = 0;
7213 		}
7214 #endif
7215 		if (rval != 0) {
7216 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
7217 			    "Log Sense Failed\n");
7218 			kmem_free(log_page_data, log_page_size);
7219 			/* Cannot support power management on those drives */
7220 
7221 			if (got_semaphore_here != 0) {
7222 				sema_v(&un->un_semoclose);
7223 			}
7224 			/*
7225 			 * On exit put the state back to it's original value
7226 			 * and broadcast to anyone waiting for the power
7227 			 * change completion.
7228 			 */
7229 			mutex_enter(SD_MUTEX(un));
7230 			un->un_state = state_before_pm;
7231 			cv_broadcast(&un->un_suspend_cv);
7232 			mutex_exit(SD_MUTEX(un));
7233 			SD_TRACE(SD_LOG_IO_PM, un,
7234 			    "sdpower: exit, Log Sense Failed.\n");
7235 			return (DDI_FAILURE);
7236 		}
7237 
7238 		/*
7239 		 * From the page data - Convert the essential information to
7240 		 * pm_trans_data
7241 		 */
7242 		maxcycles =
7243 		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
7244 		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
7245 
7246 		sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
7247 
7248 		ncycles =
7249 		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
7250 		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
7251 
7252 		sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
7253 
7254 		for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
7255 			sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
7256 			    log_page_data[8+i];
7257 		}
7258 
7259 		kmem_free(log_page_data, log_page_size);
7260 
7261 		/*
7262 		 * Call pm_trans_check routine to get the Ok from
7263 		 * the global policy
7264 		 */
7265 
7266 		sd_pm_tran_data.format = DC_SCSI_FORMAT;
7267 		sd_pm_tran_data.un.scsi_cycles.flag = 0;
7268 
7269 		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
7270 #ifdef	SDDEBUG
7271 		if (sd_force_pm_supported) {
7272 			/* Force a successful result */
7273 			rval = 1;
7274 		}
7275 #endif
7276 		switch (rval) {
7277 		case 0:
7278 			/*
7279 			 * Not Ok to Power cycle or error in parameters passed
7280 			 * Would have given the advised time to consider power
7281 			 * cycle. Based on the new intvlp parameter we are
7282 			 * supposed to pretend we are busy so that pm framework
7283 			 * will never call our power entry point. Because of
7284 			 * that install a timeout handler and wait for the
7285 			 * recommended time to elapse so that power management
7286 			 * can be effective again.
7287 			 *
7288 			 * To effect this behavior, call pm_busy_component to
7289 			 * indicate to the framework this device is busy.
7290 			 * By not adjusting un_pm_count the rest of PM in
7291 			 * the driver will function normally, and independant
7292 			 * of this but because the framework is told the device
7293 			 * is busy it won't attempt powering down until it gets
7294 			 * a matching idle. The timeout handler sends this.
7295 			 * Note: sd_pm_entry can't be called here to do this
7296 			 * because sdpower may have been called as a result
7297 			 * of a call to pm_raise_power from within sd_pm_entry.
7298 			 *
7299 			 * If a timeout handler is already active then
7300 			 * don't install another.
7301 			 */
7302 			mutex_enter(&un->un_pm_mutex);
7303 			if (un->un_pm_timeid == NULL) {
7304 				un->un_pm_timeid =
7305 				    timeout(sd_pm_timeout_handler,
7306 				    un, intvlp * drv_usectohz(1000000));
7307 				mutex_exit(&un->un_pm_mutex);
7308 				(void) pm_busy_component(SD_DEVINFO(un), 0);
7309 			} else {
7310 				mutex_exit(&un->un_pm_mutex);
7311 			}
7312 			if (got_semaphore_here != 0) {
7313 				sema_v(&un->un_semoclose);
7314 			}
7315 			/*
7316 			 * On exit put the state back to it's original value
7317 			 * and broadcast to anyone waiting for the power
7318 			 * change completion.
7319 			 */
7320 			mutex_enter(SD_MUTEX(un));
7321 			un->un_state = state_before_pm;
7322 			cv_broadcast(&un->un_suspend_cv);
7323 			mutex_exit(SD_MUTEX(un));
7324 
7325 			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
7326 			    "trans check Failed, not ok to power cycle.\n");
7327 			return (DDI_FAILURE);
7328 
7329 		case -1:
7330 			if (got_semaphore_here != 0) {
7331 				sema_v(&un->un_semoclose);
7332 			}
7333 			/*
7334 			 * On exit put the state back to it's original value
7335 			 * and broadcast to anyone waiting for the power
7336 			 * change completion.
7337 			 */
7338 			mutex_enter(SD_MUTEX(un));
7339 			un->un_state = state_before_pm;
7340 			cv_broadcast(&un->un_suspend_cv);
7341 			mutex_exit(SD_MUTEX(un));
7342 			SD_TRACE(SD_LOG_IO_PM, un,
7343 			    "sdpower: exit, trans check command Failed.\n");
7344 			return (DDI_FAILURE);
7345 		}
7346 	}
7347 
7348 	if (level == SD_SPINDLE_OFF) {
7349 		/*
7350 		 * Save the last state... if the STOP FAILS we need it
7351 		 * for restoring
7352 		 */
7353 		mutex_enter(SD_MUTEX(un));
7354 		save_state = un->un_last_state;
7355 		/*
7356 		 * There must not be any cmds. getting processed
7357 		 * in the driver when we get here. Power to the
7358 		 * device is potentially going off.
7359 		 */
7360 		ASSERT(un->un_ncmds_in_driver == 0);
7361 		mutex_exit(SD_MUTEX(un));
7362 
7363 		/*
7364 		 * For now suspend the device completely before spindle is
7365 		 * turned off
7366 		 */
7367 		if ((rval = sd_ddi_pm_suspend(un)) == DDI_FAILURE) {
7368 			if (got_semaphore_here != 0) {
7369 				sema_v(&un->un_semoclose);
7370 			}
7371 			/*
7372 			 * On exit put the state back to it's original value
7373 			 * and broadcast to anyone waiting for the power
7374 			 * change completion.
7375 			 */
7376 			mutex_enter(SD_MUTEX(un));
7377 			un->un_state = state_before_pm;
7378 			cv_broadcast(&un->un_suspend_cv);
7379 			mutex_exit(SD_MUTEX(un));
7380 			SD_TRACE(SD_LOG_IO_PM, un,
7381 			    "sdpower: exit, PM suspend Failed.\n");
7382 			return (DDI_FAILURE);
7383 		}
7384 	}
7385 
7386 	/*
7387 	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
7388 	 * close, or strategy. Dump no long uses this routine, it uses it's
7389 	 * own code so it can be done in polled mode.
7390 	 */
7391 
7392 	medium_present = TRUE;
7393 
7394 	/*
7395 	 * When powering up, issue a TUR in case the device is at unit
7396 	 * attention.  Don't do retries. Bypass the PM layer, otherwise
7397 	 * a deadlock on un_pm_busy_cv will occur.
7398 	 */
7399 	if (level == SD_SPINDLE_ON) {
7400 		(void) sd_send_scsi_TEST_UNIT_READY(un,
7401 		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
7402 	}
7403 
7404 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
7405 	    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
7406 
7407 	sval = sd_send_scsi_START_STOP_UNIT(un,
7408 	    ((level == SD_SPINDLE_ON) ? SD_TARGET_START : SD_TARGET_STOP),
7409 	    SD_PATH_DIRECT);
7410 	/* Command failed, check for media present. */
7411 	if ((sval == ENXIO) && un->un_f_has_removable_media) {
7412 		medium_present = FALSE;
7413 	}
7414 
7415 	/*
7416 	 * The conditions of interest here are:
7417 	 *   if a spindle off with media present fails,
7418 	 *	then restore the state and return an error.
7419 	 *   else if a spindle on fails,
7420 	 *	then return an error (there's no state to restore).
7421 	 * In all other cases we setup for the new state
7422 	 * and return success.
7423 	 */
7424 	switch (level) {
7425 	case SD_SPINDLE_OFF:
7426 		if ((medium_present == TRUE) && (sval != 0)) {
7427 			/* The stop command from above failed */
7428 			rval = DDI_FAILURE;
7429 			/*
7430 			 * The stop command failed, and we have media
7431 			 * present. Put the level back by calling the
7432 			 * sd_pm_resume() and set the state back to
7433 			 * it's previous value.
7434 			 */
7435 			(void) sd_ddi_pm_resume(un);
7436 			mutex_enter(SD_MUTEX(un));
7437 			un->un_last_state = save_state;
7438 			mutex_exit(SD_MUTEX(un));
7439 			break;
7440 		}
7441 		/*
7442 		 * The stop command from above succeeded.
7443 		 */
7444 		if (un->un_f_monitor_media_state) {
7445 			/*
7446 			 * Terminate watch thread in case of removable media
7447 			 * devices going into low power state. This is as per
7448 			 * the requirements of pm framework, otherwise commands
7449 			 * will be generated for the device (through watch
7450 			 * thread), even when the device is in low power state.
7451 			 */
7452 			mutex_enter(SD_MUTEX(un));
7453 			un->un_f_watcht_stopped = FALSE;
7454 			if (un->un_swr_token != NULL) {
7455 				opaque_t temp_token = un->un_swr_token;
7456 				un->un_f_watcht_stopped = TRUE;
7457 				un->un_swr_token = NULL;
7458 				mutex_exit(SD_MUTEX(un));
7459 				(void) scsi_watch_request_terminate(temp_token,
7460 				    SCSI_WATCH_TERMINATE_WAIT);
7461 			} else {
7462 				mutex_exit(SD_MUTEX(un));
7463 			}
7464 		}
7465 		break;
7466 
7467 	default:	/* The level requested is spindle on... */
7468 		/*
7469 		 * Legacy behavior: return success on a failed spinup
7470 		 * if there is no media in the drive.
7471 		 * Do this by looking at medium_present here.
7472 		 */
7473 		if ((sval != 0) && medium_present) {
7474 			/* The start command from above failed */
7475 			rval = DDI_FAILURE;
7476 			break;
7477 		}
7478 		/*
7479 		 * The start command from above succeeded
7480 		 * Resume the devices now that we have
7481 		 * started the disks
7482 		 */
7483 		(void) sd_ddi_pm_resume(un);
7484 
7485 		/*
7486 		 * Resume the watch thread since it was suspended
7487 		 * when the device went into low power mode.
7488 		 */
7489 		if (un->un_f_monitor_media_state) {
7490 			mutex_enter(SD_MUTEX(un));
7491 			if (un->un_f_watcht_stopped == TRUE) {
7492 				opaque_t temp_token;
7493 
7494 				un->un_f_watcht_stopped = FALSE;
7495 				mutex_exit(SD_MUTEX(un));
7496 				temp_token = scsi_watch_request_submit(
7497 				    SD_SCSI_DEVP(un),
7498 				    sd_check_media_time,
7499 				    SENSE_LENGTH, sd_media_watch_cb,
7500 				    (caddr_t)dev);
7501 				mutex_enter(SD_MUTEX(un));
7502 				un->un_swr_token = temp_token;
7503 			}
7504 			mutex_exit(SD_MUTEX(un));
7505 		}
7506 	}
7507 	if (got_semaphore_here != 0) {
7508 		sema_v(&un->un_semoclose);
7509 	}
7510 	/*
7511 	 * On exit put the state back to it's original value
7512 	 * and broadcast to anyone waiting for the power
7513 	 * change completion.
7514 	 */
7515 	mutex_enter(SD_MUTEX(un));
7516 	un->un_state = state_before_pm;
7517 	cv_broadcast(&un->un_suspend_cv);
7518 	mutex_exit(SD_MUTEX(un));
7519 
7520 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
7521 
7522 	return (rval);
7523 }
7524 
7525 
7526 
7527 /*
7528  *    Function: sdattach
7529  *
7530  * Description: Driver's attach(9e) entry point function.
7531  *
7532  *   Arguments: devi - opaque device info handle
7533  *		cmd  - attach  type
7534  *
7535  * Return Code: DDI_SUCCESS
7536  *		DDI_FAILURE
7537  *
7538  *     Context: Kernel thread context
7539  */
7540 
7541 static int
7542 sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
7543 {
7544 	switch (cmd) {
7545 	case DDI_ATTACH:
7546 		return (sd_unit_attach(devi));
7547 	case DDI_RESUME:
7548 		return (sd_ddi_resume(devi));
7549 	default:
7550 		break;
7551 	}
7552 	return (DDI_FAILURE);
7553 }
7554 
7555 
7556 /*
7557  *    Function: sddetach
7558  *
7559  * Description: Driver's detach(9E) entry point function.
7560  *
7561  *   Arguments: devi - opaque device info handle
7562  *		cmd  - detach  type
7563  *
7564  * Return Code: DDI_SUCCESS
7565  *		DDI_FAILURE
7566  *
7567  *     Context: Kernel thread context
7568  */
7569 
7570 static int
7571 sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
7572 {
7573 	switch (cmd) {
7574 	case DDI_DETACH:
7575 		return (sd_unit_detach(devi));
7576 	case DDI_SUSPEND:
7577 		return (sd_ddi_suspend(devi));
7578 	default:
7579 		break;
7580 	}
7581 	return (DDI_FAILURE);
7582 }
7583 
7584 
7585 /*
7586  *     Function: sd_sync_with_callback
7587  *
7588  *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
7589  *		 state while the callback routine is active.
7590  *
7591  *    Arguments: un: softstate structure for the instance
7592  *
7593  *	Context: Kernel thread context
7594  */
7595 
7596 static void
7597 sd_sync_with_callback(struct sd_lun *un)
7598 {
7599 	ASSERT(un != NULL);
7600 
7601 	mutex_enter(SD_MUTEX(un));
7602 
7603 	ASSERT(un->un_in_callback >= 0);
7604 
7605 	while (un->un_in_callback > 0) {
7606 		mutex_exit(SD_MUTEX(un));
7607 		delay(2);
7608 		mutex_enter(SD_MUTEX(un));
7609 	}
7610 
7611 	mutex_exit(SD_MUTEX(un));
7612 }
7613 
7614 /*
7615  *    Function: sd_unit_attach
7616  *
7617  * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
7618  *		the soft state structure for the device and performs
7619  *		all necessary structure and device initializations.
7620  *
7621  *   Arguments: devi: the system's dev_info_t for the device.
7622  *
7623  * Return Code: DDI_SUCCESS if attach is successful.
7624  *		DDI_FAILURE if any part of the attach fails.
7625  *
7626  *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
7627  *		Kernel thread context only.  Can sleep.
7628  */
7629 
7630 static int
7631 sd_unit_attach(dev_info_t *devi)
7632 {
7633 	struct	scsi_device	*devp;
7634 	struct	sd_lun		*un;
7635 	char			*variantp;
7636 	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
7637 	int	instance;
7638 	int	rval;
7639 	int	wc_enabled;
7640 	uint64_t	capacity;
7641 	uint_t		lbasize;
7642 
7643 	/*
7644 	 * Retrieve the target driver's private data area. This was set
7645 	 * up by the HBA.
7646 	 */
7647 	devp = ddi_get_driver_private(devi);
7648 
7649 	/*
7650 	 * Since we have no idea what state things were left in by the last
7651 	 * user of the device, set up some 'default' settings, ie. turn 'em
7652 	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
7653 	 * Do this before the scsi_probe, which sends an inquiry.
7654 	 * This is a fix for bug (4430280).
7655 	 * Of special importance is wide-xfer. The drive could have been left
7656 	 * in wide transfer mode by the last driver to communicate with it,
7657 	 * this includes us. If that's the case, and if the following is not
7658 	 * setup properly or we don't re-negotiate with the drive prior to
7659 	 * transferring data to/from the drive, it causes bus parity errors,
7660 	 * data overruns, and unexpected interrupts. This first occurred when
7661 	 * the fix for bug (4378686) was made.
7662 	 */
7663 	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
7664 	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
7665 	(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
7666 	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
7667 
7668 	/*
7669 	 * Use scsi_probe() to issue an INQUIRY command to the device.
7670 	 * This call will allocate and fill in the scsi_inquiry structure
7671 	 * and point the sd_inq member of the scsi_device structure to it.
7672 	 * If the attach succeeds, then this memory will not be de-allocated
7673 	 * (via scsi_unprobe()) until the instance is detached.
7674 	 */
7675 	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
7676 		goto probe_failed;
7677 	}
7678 
7679 	/*
7680 	 * Check the device type as specified in the inquiry data and
7681 	 * claim it if it is of a type that we support.
7682 	 */
7683 	switch (devp->sd_inq->inq_dtype) {
7684 	case DTYPE_DIRECT:
7685 		break;
7686 	case DTYPE_RODIRECT:
7687 		break;
7688 	case DTYPE_OPTICAL:
7689 		break;
7690 	case DTYPE_NOTPRESENT:
7691 	default:
7692 		/* Unsupported device type; fail the attach. */
7693 		goto probe_failed;
7694 	}
7695 
7696 	/*
7697 	 * Allocate the soft state structure for this unit.
7698 	 *
7699 	 * We rely upon this memory being set to all zeroes by
7700 	 * ddi_soft_state_zalloc().  We assume that any member of the
7701 	 * soft state structure that is not explicitly initialized by
7702 	 * this routine will have a value of zero.
7703 	 */
7704 	instance = ddi_get_instance(devp->sd_dev);
7705 	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
7706 		goto probe_failed;
7707 	}
7708 
7709 	/*
7710 	 * Retrieve a pointer to the newly-allocated soft state.
7711 	 *
7712 	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
7713 	 * was successful, unless something has gone horribly wrong and the
7714 	 * ddi's soft state internals are corrupt (in which case it is
7715 	 * probably better to halt here than just fail the attach....)
7716 	 */
7717 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
7718 		panic("sd_unit_attach: NULL soft state on instance:0x%x",
7719 		    instance);
7720 		/*NOTREACHED*/
7721 	}
7722 
7723 	/*
7724 	 * Link the back ptr of the driver soft state to the scsi_device
7725 	 * struct for this lun.
7726 	 * Save a pointer to the softstate in the driver-private area of
7727 	 * the scsi_device struct.
7728 	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
7729 	 * we first set un->un_sd below.
7730 	 */
7731 	un->un_sd = devp;
7732 	devp->sd_private = (opaque_t)un;
7733 
7734 	/*
7735 	 * The following must be after devp is stored in the soft state struct.
7736 	 */
7737 #ifdef SDDEBUG
7738 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7739 	    "%s_unit_attach: un:0x%p instance:%d\n",
7740 	    ddi_driver_name(devi), un, instance);
7741 #endif
7742 
7743 	/*
7744 	 * Set up the device type and node type (for the minor nodes).
7745 	 * By default we assume that the device can at least support the
7746 	 * Common Command Set. Call it a CD-ROM if it reports itself
7747 	 * as a RODIRECT device.
7748 	 */
7749 	switch (devp->sd_inq->inq_dtype) {
7750 	case DTYPE_RODIRECT:
7751 		un->un_node_type = DDI_NT_CD_CHAN;
7752 		un->un_ctype	 = CTYPE_CDROM;
7753 		break;
7754 	case DTYPE_OPTICAL:
7755 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7756 		un->un_ctype	 = CTYPE_ROD;
7757 		break;
7758 	default:
7759 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7760 		un->un_ctype	 = CTYPE_CCS;
7761 		break;
7762 	}
7763 
7764 	/*
7765 	 * Try to read the interconnect type from the HBA.
7766 	 *
7767 	 * Note: This driver is currently compiled as two binaries, a parallel
7768 	 * scsi version (sd) and a fibre channel version (ssd). All functional
7769 	 * differences are determined at compile time. In the future a single
7770 	 * binary will be provided and the inteconnect type will be used to
7771 	 * differentiate between fibre and parallel scsi behaviors. At that time
7772 	 * it will be necessary for all fibre channel HBAs to support this
7773 	 * property.
7774 	 *
7775 	 * set un_f_is_fiber to TRUE ( default fiber )
7776 	 */
7777 	un->un_f_is_fibre = TRUE;
7778 	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
7779 	case INTERCONNECT_SSA:
7780 		un->un_interconnect_type = SD_INTERCONNECT_SSA;
7781 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7782 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
7783 		break;
7784 	case INTERCONNECT_PARALLEL:
7785 		un->un_f_is_fibre = FALSE;
7786 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7787 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7788 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
7789 		break;
7790 	case INTERCONNECT_FIBRE:
7791 		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
7792 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7793 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
7794 		break;
7795 	case INTERCONNECT_FABRIC:
7796 		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
7797 		un->un_node_type = DDI_NT_BLOCK_FABRIC;
7798 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7799 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
7800 		break;
7801 	default:
7802 #ifdef SD_DEFAULT_INTERCONNECT_TYPE
7803 		/*
7804 		 * The HBA does not support the "interconnect-type" property
7805 		 * (or did not provide a recognized type).
7806 		 *
7807 		 * Note: This will be obsoleted when a single fibre channel
7808 		 * and parallel scsi driver is delivered. In the meantime the
7809 		 * interconnect type will be set to the platform default.If that
7810 		 * type is not parallel SCSI, it means that we should be
7811 		 * assuming "ssd" semantics. However, here this also means that
7812 		 * the FC HBA is not supporting the "interconnect-type" property
7813 		 * like we expect it to, so log this occurrence.
7814 		 */
7815 		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
7816 		if (!SD_IS_PARALLEL_SCSI(un)) {
7817 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7818 			    "sd_unit_attach: un:0x%p Assuming "
7819 			    "INTERCONNECT_FIBRE\n", un);
7820 		} else {
7821 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7822 			    "sd_unit_attach: un:0x%p Assuming "
7823 			    "INTERCONNECT_PARALLEL\n", un);
7824 			un->un_f_is_fibre = FALSE;
7825 		}
7826 #else
7827 		/*
7828 		 * Note: This source will be implemented when a single fibre
7829 		 * channel and parallel scsi driver is delivered. The default
7830 		 * will be to assume that if a device does not support the
7831 		 * "interconnect-type" property it is a parallel SCSI HBA and
7832 		 * we will set the interconnect type for parallel scsi.
7833 		 */
7834 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7835 		un->un_f_is_fibre = FALSE;
7836 #endif
7837 		break;
7838 	}
7839 
7840 	if (un->un_f_is_fibre == TRUE) {
7841 		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
7842 			SCSI_VERSION_3) {
7843 			switch (un->un_interconnect_type) {
7844 			case SD_INTERCONNECT_FIBRE:
7845 			case SD_INTERCONNECT_SSA:
7846 				un->un_node_type = DDI_NT_BLOCK_WWN;
7847 				break;
7848 			default:
7849 				break;
7850 			}
7851 		}
7852 	}
7853 
7854 	/*
7855 	 * Initialize the Request Sense command for the target
7856 	 */
7857 	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
7858 		goto alloc_rqs_failed;
7859 	}
7860 
7861 	/*
7862 	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
7863 	 * with seperate binary for sd and ssd.
7864 	 *
7865 	 * x86 has 1 binary, un_retry_count is set base on connection type.
7866 	 * The hardcoded values will go away when Sparc uses 1 binary
7867 	 * for sd and ssd.  This hardcoded values need to match
7868 	 * SD_RETRY_COUNT in sddef.h
7869 	 * The value used is base on interconnect type.
7870 	 * fibre = 3, parallel = 5
7871 	 */
7872 #if defined(__i386) || defined(__amd64)
7873 	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
7874 #else
7875 	un->un_retry_count = SD_RETRY_COUNT;
7876 #endif
7877 
7878 	/*
7879 	 * Set the per disk retry count to the default number of retries
7880 	 * for disks and CDROMs. This value can be overridden by the
7881 	 * disk property list or an entry in sd.conf.
7882 	 */
7883 	un->un_notready_retry_count =
7884 	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
7885 			: DISK_NOT_READY_RETRY_COUNT(un);
7886 
7887 	/*
7888 	 * Set the busy retry count to the default value of un_retry_count.
7889 	 * This can be overridden by entries in sd.conf or the device
7890 	 * config table.
7891 	 */
7892 	un->un_busy_retry_count = un->un_retry_count;
7893 
7894 	/*
7895 	 * Init the reset threshold for retries.  This number determines
7896 	 * how many retries must be performed before a reset can be issued
7897 	 * (for certain error conditions). This can be overridden by entries
7898 	 * in sd.conf or the device config table.
7899 	 */
7900 	un->un_reset_retry_count = (un->un_retry_count / 2);
7901 
7902 	/*
7903 	 * Set the victim_retry_count to the default un_retry_count
7904 	 */
7905 	un->un_victim_retry_count = (2 * un->un_retry_count);
7906 
7907 	/*
7908 	 * Set the reservation release timeout to the default value of
7909 	 * 5 seconds. This can be overridden by entries in ssd.conf or the
7910 	 * device config table.
7911 	 */
7912 	un->un_reserve_release_time = 5;
7913 
7914 	/*
7915 	 * Set up the default maximum transfer size. Note that this may
7916 	 * get updated later in the attach, when setting up default wide
7917 	 * operations for disks.
7918 	 */
7919 #if defined(__i386) || defined(__amd64)
7920 	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
7921 #else
7922 	un->un_max_xfer_size = (uint_t)maxphys;
7923 #endif
7924 
7925 	/*
7926 	 * Get "allow bus device reset" property (defaults to "enabled" if
7927 	 * the property was not defined). This is to disable bus resets for
7928 	 * certain kinds of error recovery. Note: In the future when a run-time
7929 	 * fibre check is available the soft state flag should default to
7930 	 * enabled.
7931 	 */
7932 	if (un->un_f_is_fibre == TRUE) {
7933 		un->un_f_allow_bus_device_reset = TRUE;
7934 	} else {
7935 		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
7936 			"allow-bus-device-reset", 1) != 0) {
7937 			un->un_f_allow_bus_device_reset = TRUE;
7938 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7939 			"sd_unit_attach: un:0x%p Bus device reset enabled\n",
7940 				un);
7941 		} else {
7942 			un->un_f_allow_bus_device_reset = FALSE;
7943 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7944 			"sd_unit_attach: un:0x%p Bus device reset disabled\n",
7945 				un);
7946 		}
7947 	}
7948 
7949 	/*
7950 	 * Check if this is an ATAPI device. ATAPI devices use Group 1
7951 	 * Read/Write commands and Group 2 Mode Sense/Select commands.
7952 	 *
7953 	 * Note: The "obsolete" way of doing this is to check for the "atapi"
7954 	 * property. The new "variant" property with a value of "atapi" has been
7955 	 * introduced so that future 'variants' of standard SCSI behavior (like
7956 	 * atapi) could be specified by the underlying HBA drivers by supplying
7957 	 * a new value for the "variant" property, instead of having to define a
7958 	 * new property.
7959 	 */
7960 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
7961 		un->un_f_cfg_is_atapi = TRUE;
7962 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7963 		    "sd_unit_attach: un:0x%p Atapi device\n", un);
7964 	}
7965 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
7966 	    &variantp) == DDI_PROP_SUCCESS) {
7967 		if (strcmp(variantp, "atapi") == 0) {
7968 			un->un_f_cfg_is_atapi = TRUE;
7969 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7970 			    "sd_unit_attach: un:0x%p Atapi device\n", un);
7971 		}
7972 		ddi_prop_free(variantp);
7973 	}
7974 
7975 	un->un_cmd_timeout	= SD_IO_TIME;
7976 
7977 	/* Info on current states, statuses, etc. (Updated frequently) */
7978 	un->un_state		= SD_STATE_NORMAL;
7979 	un->un_last_state	= SD_STATE_NORMAL;
7980 
7981 	/* Control & status info for command throttling */
7982 	un->un_throttle		= sd_max_throttle;
7983 	un->un_saved_throttle	= sd_max_throttle;
7984 	un->un_min_throttle	= sd_min_throttle;
7985 
7986 	if (un->un_f_is_fibre == TRUE) {
7987 		un->un_f_use_adaptive_throttle = TRUE;
7988 	} else {
7989 		un->un_f_use_adaptive_throttle = FALSE;
7990 	}
7991 
7992 	/* Removable media support. */
7993 	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
7994 	un->un_mediastate		= DKIO_NONE;
7995 	un->un_specified_mediastate	= DKIO_NONE;
7996 
7997 	/* CVs for suspend/resume (PM or DR) */
7998 	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
7999 	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
8000 
8001 	/* Power management support. */
8002 	un->un_power_level = SD_SPINDLE_UNINIT;
8003 
8004 	cv_init(&un->un_wcc_cv,   NULL, CV_DRIVER, NULL);
8005 	un->un_f_wcc_inprog = 0;
8006 
8007 	/*
8008 	 * The open/close semaphore is used to serialize threads executing
8009 	 * in the driver's open & close entry point routines for a given
8010 	 * instance.
8011 	 */
8012 	(void) sema_init(&un->un_semoclose, 1, NULL, SEMA_DRIVER, NULL);
8013 
8014 	/*
8015 	 * The conf file entry and softstate variable is a forceful override,
8016 	 * meaning a non-zero value must be entered to change the default.
8017 	 */
8018 	un->un_f_disksort_disabled = FALSE;
8019 
8020 	/*
8021 	 * Retrieve the properties from the static driver table or the driver
8022 	 * configuration file (.conf) for this unit and update the soft state
8023 	 * for the device as needed for the indicated properties.
8024 	 * Note: the property configuration needs to occur here as some of the
8025 	 * following routines may have dependancies on soft state flags set
8026 	 * as part of the driver property configuration.
8027 	 */
8028 	sd_read_unit_properties(un);
8029 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8030 	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
8031 
8032 	/*
8033 	 * Only if a device has "hotpluggable" property, it is
8034 	 * treated as hotpluggable device. Otherwise, it is
8035 	 * regarded as non-hotpluggable one.
8036 	 */
8037 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "hotpluggable",
8038 	    -1) != -1) {
8039 		un->un_f_is_hotpluggable = TRUE;
8040 	}
8041 
8042 	/*
8043 	 * set unit's attributes(flags) according to "hotpluggable" and
8044 	 * RMB bit in INQUIRY data.
8045 	 */
8046 	sd_set_unit_attributes(un, devi);
8047 
8048 	/*
8049 	 * By default, we mark the capacity, lbasize, and geometry
8050 	 * as invalid. Only if we successfully read a valid capacity
8051 	 * will we update the un_blockcount and un_tgt_blocksize with the
8052 	 * valid values (the geometry will be validated later).
8053 	 */
8054 	un->un_f_blockcount_is_valid	= FALSE;
8055 	un->un_f_tgt_blocksize_is_valid	= FALSE;
8056 	un->un_f_geometry_is_valid	= FALSE;
8057 
8058 	/*
8059 	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
8060 	 * otherwise.
8061 	 */
8062 	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
8063 	un->un_blockcount = 0;
8064 
8065 	/*
8066 	 * Set up the per-instance info needed to determine the correct
8067 	 * CDBs and other info for issuing commands to the target.
8068 	 */
8069 	sd_init_cdb_limits(un);
8070 
8071 	/*
8072 	 * Set up the IO chains to use, based upon the target type.
8073 	 */
8074 	if (un->un_f_non_devbsize_supported) {
8075 		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
8076 	} else {
8077 		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
8078 	}
8079 	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
8080 	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
8081 	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
8082 
8083 	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
8084 	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
8085 	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
8086 	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
8087 
8088 
8089 	if (ISCD(un)) {
8090 		un->un_additional_codes = sd_additional_codes;
8091 	} else {
8092 		un->un_additional_codes = NULL;
8093 	}
8094 
8095 	/*
8096 	 * Create the kstats here so they can be available for attach-time
8097 	 * routines that send commands to the unit (either polled or via
8098 	 * sd_send_scsi_cmd).
8099 	 *
8100 	 * Note: This is a critical sequence that needs to be maintained:
8101 	 *	1) Instantiate the kstats here, before any routines using the
8102 	 *	   iopath (i.e. sd_send_scsi_cmd).
8103 	 *	2) Initialize the error stats (sd_set_errstats) and partition
8104 	 *	   stats (sd_set_pstats), following sd_validate_geometry(),
8105 	 *	   sd_register_devid(), and sd_cache_control().
8106 	 */
8107 
8108 	un->un_stats = kstat_create(sd_label, instance,
8109 	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
8110 	if (un->un_stats != NULL) {
8111 		un->un_stats->ks_lock = SD_MUTEX(un);
8112 		kstat_install(un->un_stats);
8113 	}
8114 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8115 	    "sd_unit_attach: un:0x%p un_stats created\n", un);
8116 
8117 	sd_create_errstats(un, instance);
8118 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8119 	    "sd_unit_attach: un:0x%p errstats created\n", un);
8120 
8121 	/*
8122 	 * The following if/else code was relocated here from below as part
8123 	 * of the fix for bug (4430280). However with the default setup added
8124 	 * on entry to this routine, it's no longer absolutely necessary for
8125 	 * this to be before the call to sd_spin_up_unit.
8126 	 */
8127 	if (SD_IS_PARALLEL_SCSI(un)) {
8128 		/*
8129 		 * If SCSI-2 tagged queueing is supported by the target
8130 		 * and by the host adapter then we will enable it.
8131 		 */
8132 		un->un_tagflags = 0;
8133 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) &&
8134 		    (devp->sd_inq->inq_cmdque) &&
8135 		    (un->un_f_arq_enabled == TRUE)) {
8136 			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
8137 			    1, 1) == 1) {
8138 				un->un_tagflags = FLAG_STAG;
8139 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8140 				    "sd_unit_attach: un:0x%p tag queueing "
8141 				    "enabled\n", un);
8142 			} else if (scsi_ifgetcap(SD_ADDRESS(un),
8143 			    "untagged-qing", 0) == 1) {
8144 				un->un_f_opt_queueing = TRUE;
8145 				un->un_saved_throttle = un->un_throttle =
8146 				    min(un->un_throttle, 3);
8147 			} else {
8148 				un->un_f_opt_queueing = FALSE;
8149 				un->un_saved_throttle = un->un_throttle = 1;
8150 			}
8151 		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
8152 		    == 1) && (un->un_f_arq_enabled == TRUE)) {
8153 			/* The Host Adapter supports internal queueing. */
8154 			un->un_f_opt_queueing = TRUE;
8155 			un->un_saved_throttle = un->un_throttle =
8156 			    min(un->un_throttle, 3);
8157 		} else {
8158 			un->un_f_opt_queueing = FALSE;
8159 			un->un_saved_throttle = un->un_throttle = 1;
8160 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8161 			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
8162 		}
8163 
8164 
8165 		/* Setup or tear down default wide operations for disks */
8166 
8167 		/*
8168 		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
8169 		 * and "ssd_max_xfer_size" to exist simultaneously on the same
8170 		 * system and be set to different values. In the future this
8171 		 * code may need to be updated when the ssd module is
8172 		 * obsoleted and removed from the system. (4299588)
8173 		 */
8174 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) &&
8175 		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
8176 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
8177 			    1, 1) == 1) {
8178 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8179 				    "sd_unit_attach: un:0x%p Wide Transfer "
8180 				    "enabled\n", un);
8181 			}
8182 
8183 			/*
8184 			 * If tagged queuing has also been enabled, then
8185 			 * enable large xfers
8186 			 */
8187 			if (un->un_saved_throttle == sd_max_throttle) {
8188 				un->un_max_xfer_size =
8189 				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8190 				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
8191 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8192 				    "sd_unit_attach: un:0x%p max transfer "
8193 				    "size=0x%x\n", un, un->un_max_xfer_size);
8194 			}
8195 		} else {
8196 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
8197 			    0, 1) == 1) {
8198 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8199 				    "sd_unit_attach: un:0x%p "
8200 				    "Wide Transfer disabled\n", un);
8201 			}
8202 		}
8203 	} else {
8204 		un->un_tagflags = FLAG_STAG;
8205 		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
8206 		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
8207 	}
8208 
8209 	/*
8210 	 * If this target supports LUN reset, try to enable it.
8211 	 */
8212 	if (un->un_f_lun_reset_enabled) {
8213 		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
8214 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
8215 			    "un:0x%p lun_reset capability set\n", un);
8216 		} else {
8217 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
8218 			    "un:0x%p lun-reset capability not set\n", un);
8219 		}
8220 	}
8221 
8222 	/*
8223 	 * At this point in the attach, we have enough info in the
8224 	 * soft state to be able to issue commands to the target.
8225 	 *
8226 	 * All command paths used below MUST issue their commands as
8227 	 * SD_PATH_DIRECT. This is important as intermediate layers
8228 	 * are not all initialized yet (such as PM).
8229 	 */
8230 
8231 	/*
8232 	 * Send a TEST UNIT READY command to the device. This should clear
8233 	 * any outstanding UNIT ATTENTION that may be present.
8234 	 *
8235 	 * Note: Don't check for success, just track if there is a reservation,
8236 	 * this is a throw away command to clear any unit attentions.
8237 	 *
8238 	 * Note: This MUST be the first command issued to the target during
8239 	 * attach to ensure power on UNIT ATTENTIONS are cleared.
8240 	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
8241 	 * with attempts at spinning up a device with no media.
8242 	 */
8243 	if (sd_send_scsi_TEST_UNIT_READY(un, SD_DONT_RETRY_TUR) == EACCES) {
8244 		reservation_flag = SD_TARGET_IS_RESERVED;
8245 	}
8246 
8247 	/*
8248 	 * If the device is NOT a removable media device, attempt to spin
8249 	 * it up (using the START_STOP_UNIT command) and read its capacity
8250 	 * (using the READ CAPACITY command).  Note, however, that either
8251 	 * of these could fail and in some cases we would continue with
8252 	 * the attach despite the failure (see below).
8253 	 */
8254 	if (un->un_f_descr_format_supported) {
8255 		switch (sd_spin_up_unit(un)) {
8256 		case 0:
8257 			/*
8258 			 * Spin-up was successful; now try to read the
8259 			 * capacity.  If successful then save the results
8260 			 * and mark the capacity & lbasize as valid.
8261 			 */
8262 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8263 			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
8264 
8265 			switch (sd_send_scsi_READ_CAPACITY(un, &capacity,
8266 			    &lbasize, SD_PATH_DIRECT)) {
8267 			case 0: {
8268 				if (capacity > DK_MAX_BLOCKS) {
8269 #ifdef _LP64
8270 					/*
8271 					 * Enable descriptor format sense data
8272 					 * so that we can get 64 bit sense
8273 					 * data fields.
8274 					 */
8275 					sd_enable_descr_sense(un);
8276 #else
8277 					/* 32-bit kernels can't handle this */
8278 					scsi_log(SD_DEVINFO(un),
8279 					    sd_label, CE_WARN,
8280 					    "disk has %llu blocks, which "
8281 					    "is too large for a 32-bit "
8282 					    "kernel", capacity);
8283 					goto spinup_failed;
8284 #endif
8285 				}
8286 				/*
8287 				 * The following relies on
8288 				 * sd_send_scsi_READ_CAPACITY never
8289 				 * returning 0 for capacity and/or lbasize.
8290 				 */
8291 				sd_update_block_info(un, lbasize, capacity);
8292 
8293 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8294 				    "sd_unit_attach: un:0x%p capacity = %ld "
8295 				    "blocks; lbasize= %ld.\n", un,
8296 				    un->un_blockcount, un->un_tgt_blocksize);
8297 
8298 				break;
8299 			}
8300 			case EACCES:
8301 				/*
8302 				 * Should never get here if the spin-up
8303 				 * succeeded, but code it in anyway.
8304 				 * From here, just continue with the attach...
8305 				 */
8306 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8307 				    "sd_unit_attach: un:0x%p "
8308 				    "sd_send_scsi_READ_CAPACITY "
8309 				    "returned reservation conflict\n", un);
8310 				reservation_flag = SD_TARGET_IS_RESERVED;
8311 				break;
8312 			default:
8313 				/*
8314 				 * Likewise, should never get here if the
8315 				 * spin-up succeeded. Just continue with
8316 				 * the attach...
8317 				 */
8318 				break;
8319 			}
8320 			break;
8321 		case EACCES:
8322 			/*
8323 			 * Device is reserved by another host.  In this case
8324 			 * we could not spin it up or read the capacity, but
8325 			 * we continue with the attach anyway.
8326 			 */
8327 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8328 			    "sd_unit_attach: un:0x%p spin-up reservation "
8329 			    "conflict.\n", un);
8330 			reservation_flag = SD_TARGET_IS_RESERVED;
8331 			break;
8332 		default:
8333 			/* Fail the attach if the spin-up failed. */
8334 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8335 			    "sd_unit_attach: un:0x%p spin-up failed.", un);
8336 			goto spinup_failed;
8337 		}
8338 	}
8339 
8340 	/*
8341 	 * Check to see if this is a MMC drive
8342 	 */
8343 	if (ISCD(un)) {
8344 		sd_set_mmc_caps(un);
8345 	}
8346 
8347 	/*
8348 	 * Create the minor nodes for the device.
8349 	 * Note: If we want to support fdisk on both sparc and intel, this will
8350 	 * have to separate out the notion that VTOC8 is always sparc, and
8351 	 * VTOC16 is always intel (tho these can be the defaults).  The vtoc
8352 	 * type will have to be determined at run-time, and the fdisk
8353 	 * partitioning will have to have been read & set up before we
8354 	 * create the minor nodes. (any other inits (such as kstats) that
8355 	 * also ought to be done before creating the minor nodes?) (Doesn't
8356 	 * setting up the minor nodes kind of imply that we're ready to
8357 	 * handle an open from userland?)
8358 	 */
8359 	if (sd_create_minor_nodes(un, devi) != DDI_SUCCESS) {
8360 		goto create_minor_nodes_failed;
8361 	}
8362 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8363 	    "sd_unit_attach: un:0x%p minor nodes created\n", un);
8364 
8365 	/*
8366 	 * Add a zero-length attribute to tell the world we support
8367 	 * kernel ioctls (for layered drivers)
8368 	 */
8369 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8370 	    DDI_KERNEL_IOCTL, NULL, 0);
8371 
8372 	/*
8373 	 * Add a boolean property to tell the world we support
8374 	 * the B_FAILFAST flag (for layered drivers)
8375 	 */
8376 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8377 	    "ddi-failfast-supported", NULL, 0);
8378 
8379 	/*
8380 	 * Initialize power management
8381 	 */
8382 	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
8383 	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
8384 	sd_setup_pm(un, devi);
8385 	if (un->un_f_pm_is_enabled == FALSE) {
8386 		/*
8387 		 * For performance, point to a jump table that does
8388 		 * not include pm.
8389 		 * The direct and priority chains don't change with PM.
8390 		 *
8391 		 * Note: this is currently done based on individual device
8392 		 * capabilities. When an interface for determining system
8393 		 * power enabled state becomes available, or when additional
8394 		 * layers are added to the command chain, these values will
8395 		 * have to be re-evaluated for correctness.
8396 		 */
8397 		if (un->un_f_non_devbsize_supported) {
8398 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
8399 		} else {
8400 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
8401 		}
8402 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
8403 	}
8404 
8405 	/*
8406 	 * This property is set to 0 by HA software to avoid retries
8407 	 * on a reserved disk. (The preferred property name is
8408 	 * "retry-on-reservation-conflict") (1189689)
8409 	 *
8410 	 * Note: The use of a global here can have unintended consequences. A
8411 	 * per instance variable is preferrable to match the capabilities of
8412 	 * different underlying hba's (4402600)
8413 	 */
8414 	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
8415 	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
8416 	    sd_retry_on_reservation_conflict);
8417 	if (sd_retry_on_reservation_conflict != 0) {
8418 		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
8419 		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
8420 		    sd_retry_on_reservation_conflict);
8421 	}
8422 
8423 	/* Set up options for QFULL handling. */
8424 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8425 	    "qfull-retries", -1)) != -1) {
8426 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
8427 		    rval, 1);
8428 	}
8429 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8430 	    "qfull-retry-interval", -1)) != -1) {
8431 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
8432 		    rval, 1);
8433 	}
8434 
8435 	/*
8436 	 * This just prints a message that announces the existence of the
8437 	 * device. The message is always printed in the system logfile, but
8438 	 * only appears on the console if the system is booted with the
8439 	 * -v (verbose) argument.
8440 	 */
8441 	ddi_report_dev(devi);
8442 
8443 	/*
8444 	 * The framework calls driver attach routines single-threaded
8445 	 * for a given instance.  However we still acquire SD_MUTEX here
8446 	 * because this required for calling the sd_validate_geometry()
8447 	 * and sd_register_devid() functions.
8448 	 */
8449 	mutex_enter(SD_MUTEX(un));
8450 	un->un_f_geometry_is_valid = FALSE;
8451 	un->un_mediastate = DKIO_NONE;
8452 	un->un_reserved = -1;
8453 
8454 	/*
8455 	 * Read and validate the device's geometry (ie, disk label)
8456 	 * A new unformatted drive will not have a valid geometry, but
8457 	 * the driver needs to successfully attach to this device so
8458 	 * the drive can be formatted via ioctls.
8459 	 */
8460 	if (((sd_validate_geometry(un, SD_PATH_DIRECT) ==
8461 	    ENOTSUP)) &&
8462 	    (un->un_blockcount < DK_MAX_BLOCKS)) {
8463 		/*
8464 		 * We found a small disk with an EFI label on it;
8465 		 * we need to fix up the minor nodes accordingly.
8466 		 */
8467 		ddi_remove_minor_node(devi, "h");
8468 		ddi_remove_minor_node(devi, "h,raw");
8469 		(void) ddi_create_minor_node(devi, "wd",
8470 		    S_IFBLK,
8471 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8472 		    un->un_node_type, NULL);
8473 		(void) ddi_create_minor_node(devi, "wd,raw",
8474 		    S_IFCHR,
8475 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8476 		    un->un_node_type, NULL);
8477 	}
8478 
8479 	/*
8480 	 * Read and initialize the devid for the unit.
8481 	 */
8482 	ASSERT(un->un_errstats != NULL);
8483 	if (un->un_f_devid_supported) {
8484 		sd_register_devid(un, devi, reservation_flag);
8485 	}
8486 	mutex_exit(SD_MUTEX(un));
8487 
8488 #if (defined(__fibre))
8489 	/*
8490 	 * Register callbacks for fibre only.  You can't do this soley
8491 	 * on the basis of the devid_type because this is hba specific.
8492 	 * We need to query our hba capabilities to find out whether to
8493 	 * register or not.
8494 	 */
8495 	if (un->un_f_is_fibre) {
8496 	    if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
8497 		sd_init_event_callbacks(un);
8498 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8499 		    "sd_unit_attach: un:0x%p event callbacks inserted", un);
8500 	    }
8501 	}
8502 #endif
8503 
8504 	if (un->un_f_opt_disable_cache == TRUE) {
8505 		/*
8506 		 * Disable both read cache and write cache.  This is
8507 		 * the historic behavior of the keywords in the config file.
8508 		 */
8509 		if (sd_cache_control(un, SD_CACHE_DISABLE, SD_CACHE_DISABLE) !=
8510 		    0) {
8511 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8512 			    "sd_unit_attach: un:0x%p Could not disable "
8513 			    "caching", un);
8514 			goto devid_failed;
8515 		}
8516 	}
8517 
8518 	/*
8519 	 * Check the value of the WCE bit now and
8520 	 * set un_f_write_cache_enabled accordingly.
8521 	 */
8522 	(void) sd_get_write_cache_enabled(un, &wc_enabled);
8523 	mutex_enter(SD_MUTEX(un));
8524 	un->un_f_write_cache_enabled = (wc_enabled != 0);
8525 	mutex_exit(SD_MUTEX(un));
8526 
8527 	/*
8528 	 * Set the pstat and error stat values here, so data obtained during the
8529 	 * previous attach-time routines is available.
8530 	 *
8531 	 * Note: This is a critical sequence that needs to be maintained:
8532 	 *	1) Instantiate the kstats before any routines using the iopath
8533 	 *	   (i.e. sd_send_scsi_cmd).
8534 	 *	2) Initialize the error stats (sd_set_errstats) and partition
8535 	 *	   stats (sd_set_pstats)here, following sd_validate_geometry(),
8536 	 *	   sd_register_devid(), and sd_cache_control().
8537 	 */
8538 	if (un->un_f_pkstats_enabled) {
8539 		sd_set_pstats(un);
8540 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8541 		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
8542 	}
8543 
8544 	sd_set_errstats(un);
8545 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8546 	    "sd_unit_attach: un:0x%p errstats set\n", un);
8547 
8548 	/*
8549 	 * Find out what type of reservation this disk supports.
8550 	 */
8551 	switch (sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS, 0, NULL)) {
8552 	case 0:
8553 		/*
8554 		 * SCSI-3 reservations are supported.
8555 		 */
8556 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8557 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8558 		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
8559 		break;
8560 	case ENOTSUP:
8561 		/*
8562 		 * The PERSISTENT RESERVE IN command would not be recognized by
8563 		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
8564 		 */
8565 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8566 		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
8567 		un->un_reservation_type = SD_SCSI2_RESERVATION;
8568 		break;
8569 	default:
8570 		/*
8571 		 * default to SCSI-3 reservations
8572 		 */
8573 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8574 		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
8575 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8576 		break;
8577 	}
8578 
8579 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8580 	    "sd_unit_attach: un:0x%p exit success\n", un);
8581 
8582 	return (DDI_SUCCESS);
8583 
8584 	/*
8585 	 * An error occurred during the attach; clean up & return failure.
8586 	 */
8587 
8588 devid_failed:
8589 
8590 setup_pm_failed:
8591 	ddi_remove_minor_node(devi, NULL);
8592 
8593 create_minor_nodes_failed:
8594 	/*
8595 	 * Cleanup from the scsi_ifsetcap() calls (437868)
8596 	 */
8597 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8598 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8599 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8600 
8601 	if (un->un_f_is_fibre == FALSE) {
8602 	    (void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8603 	}
8604 
8605 spinup_failed:
8606 
8607 	mutex_enter(SD_MUTEX(un));
8608 
8609 	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
8610 	if (un->un_direct_priority_timeid != NULL) {
8611 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8612 		un->un_direct_priority_timeid = NULL;
8613 		mutex_exit(SD_MUTEX(un));
8614 		(void) untimeout(temp_id);
8615 		mutex_enter(SD_MUTEX(un));
8616 	}
8617 
8618 	/* Cancel any pending start/stop timeouts */
8619 	if (un->un_startstop_timeid != NULL) {
8620 		timeout_id_t temp_id = un->un_startstop_timeid;
8621 		un->un_startstop_timeid = NULL;
8622 		mutex_exit(SD_MUTEX(un));
8623 		(void) untimeout(temp_id);
8624 		mutex_enter(SD_MUTEX(un));
8625 	}
8626 
8627 	/* Cancel any pending reset-throttle timeouts */
8628 	if (un->un_reset_throttle_timeid != NULL) {
8629 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8630 		un->un_reset_throttle_timeid = NULL;
8631 		mutex_exit(SD_MUTEX(un));
8632 		(void) untimeout(temp_id);
8633 		mutex_enter(SD_MUTEX(un));
8634 	}
8635 
8636 	/* Cancel any pending retry timeouts */
8637 	if (un->un_retry_timeid != NULL) {
8638 		timeout_id_t temp_id = un->un_retry_timeid;
8639 		un->un_retry_timeid = NULL;
8640 		mutex_exit(SD_MUTEX(un));
8641 		(void) untimeout(temp_id);
8642 		mutex_enter(SD_MUTEX(un));
8643 	}
8644 
8645 	/* Cancel any pending delayed cv broadcast timeouts */
8646 	if (un->un_dcvb_timeid != NULL) {
8647 		timeout_id_t temp_id = un->un_dcvb_timeid;
8648 		un->un_dcvb_timeid = NULL;
8649 		mutex_exit(SD_MUTEX(un));
8650 		(void) untimeout(temp_id);
8651 		mutex_enter(SD_MUTEX(un));
8652 	}
8653 
8654 	mutex_exit(SD_MUTEX(un));
8655 
8656 	/* There should not be any in-progress I/O so ASSERT this check */
8657 	ASSERT(un->un_ncmds_in_transport == 0);
8658 	ASSERT(un->un_ncmds_in_driver == 0);
8659 
8660 	/* Do not free the softstate if the callback routine is active */
8661 	sd_sync_with_callback(un);
8662 
8663 	/*
8664 	 * Partition stats apparently are not used with removables. These would
8665 	 * not have been created during attach, so no need to clean them up...
8666 	 */
8667 	if (un->un_stats != NULL) {
8668 		kstat_delete(un->un_stats);
8669 		un->un_stats = NULL;
8670 	}
8671 	if (un->un_errstats != NULL) {
8672 		kstat_delete(un->un_errstats);
8673 		un->un_errstats = NULL;
8674 	}
8675 
8676 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
8677 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
8678 
8679 	ddi_prop_remove_all(devi);
8680 	sema_destroy(&un->un_semoclose);
8681 	cv_destroy(&un->un_state_cv);
8682 
8683 getrbuf_failed:
8684 
8685 	sd_free_rqs(un);
8686 
8687 alloc_rqs_failed:
8688 
8689 	devp->sd_private = NULL;
8690 	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
8691 
8692 get_softstate_failed:
8693 	/*
8694 	 * Note: the man pages are unclear as to whether or not doing a
8695 	 * ddi_soft_state_free(sd_state, instance) is the right way to
8696 	 * clean up after the ddi_soft_state_zalloc() if the subsequent
8697 	 * ddi_get_soft_state() fails.  The implication seems to be
8698 	 * that the get_soft_state cannot fail if the zalloc succeeds.
8699 	 */
8700 	ddi_soft_state_free(sd_state, instance);
8701 
8702 probe_failed:
8703 	scsi_unprobe(devp);
8704 #ifdef SDDEBUG
8705 	if ((sd_component_mask & SD_LOG_ATTACH_DETACH) &&
8706 	    (sd_level_mask & SD_LOGMASK_TRACE)) {
8707 		cmn_err(CE_CONT, "sd_unit_attach: un:0x%p exit failure\n",
8708 		    (void *)un);
8709 	}
8710 #endif
8711 	return (DDI_FAILURE);
8712 }
8713 
8714 
8715 /*
8716  *    Function: sd_unit_detach
8717  *
8718  * Description: Performs DDI_DETACH processing for sddetach().
8719  *
8720  * Return Code: DDI_SUCCESS
8721  *		DDI_FAILURE
8722  *
8723  *     Context: Kernel thread context
8724  */
8725 
8726 static int
8727 sd_unit_detach(dev_info_t *devi)
8728 {
8729 	struct scsi_device	*devp;
8730 	struct sd_lun		*un;
8731 	int			i;
8732 	dev_t			dev;
8733 	int			instance = ddi_get_instance(devi);
8734 
8735 	mutex_enter(&sd_detach_mutex);
8736 
8737 	/*
8738 	 * Fail the detach for any of the following:
8739 	 *  - Unable to get the sd_lun struct for the instance
8740 	 *  - A layered driver has an outstanding open on the instance
8741 	 *  - Another thread is already detaching this instance
8742 	 *  - Another thread is currently performing an open
8743 	 */
8744 	devp = ddi_get_driver_private(devi);
8745 	if ((devp == NULL) ||
8746 	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
8747 	    (un->un_ncmds_in_driver != 0) || (un->un_layer_count != 0) ||
8748 	    (un->un_detach_count != 0) || (un->un_opens_in_progress != 0)) {
8749 		mutex_exit(&sd_detach_mutex);
8750 		return (DDI_FAILURE);
8751 	}
8752 
8753 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
8754 
8755 	/*
8756 	 * Mark this instance as currently in a detach, to inhibit any
8757 	 * opens from a layered driver.
8758 	 */
8759 	un->un_detach_count++;
8760 	mutex_exit(&sd_detach_mutex);
8761 
8762 	dev = sd_make_device(SD_DEVINFO(un));
8763 
8764 	_NOTE(COMPETING_THREADS_NOW);
8765 
8766 	mutex_enter(SD_MUTEX(un));
8767 
8768 	/*
8769 	 * Fail the detach if there are any outstanding layered
8770 	 * opens on this device.
8771 	 */
8772 	for (i = 0; i < NDKMAP; i++) {
8773 		if (un->un_ocmap.lyropen[i] != 0) {
8774 			goto err_notclosed;
8775 		}
8776 	}
8777 
8778 	/*
8779 	 * Verify there are NO outstanding commands issued to this device.
8780 	 * ie, un_ncmds_in_transport == 0.
8781 	 * It's possible to have outstanding commands through the physio
8782 	 * code path, even though everything's closed.
8783 	 */
8784 	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
8785 	    (un->un_direct_priority_timeid != NULL) ||
8786 	    (un->un_state == SD_STATE_RWAIT)) {
8787 		mutex_exit(SD_MUTEX(un));
8788 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8789 		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
8790 		goto err_stillbusy;
8791 	}
8792 
8793 	/*
8794 	 * If we have the device reserved, release the reservation.
8795 	 */
8796 	if ((un->un_resvd_status & SD_RESERVE) &&
8797 	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
8798 		mutex_exit(SD_MUTEX(un));
8799 		/*
8800 		 * Note: sd_reserve_release sends a command to the device
8801 		 * via the sd_ioctlcmd() path, and can sleep.
8802 		 */
8803 		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
8804 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8805 			    "sd_dr_detach: Cannot release reservation \n");
8806 		}
8807 	} else {
8808 		mutex_exit(SD_MUTEX(un));
8809 	}
8810 
8811 	/*
8812 	 * Untimeout any reserve recover, throttle reset, restart unit
8813 	 * and delayed broadcast timeout threads. Protect the timeout pointer
8814 	 * from getting nulled by their callback functions.
8815 	 */
8816 	mutex_enter(SD_MUTEX(un));
8817 	if (un->un_resvd_timeid != NULL) {
8818 		timeout_id_t temp_id = un->un_resvd_timeid;
8819 		un->un_resvd_timeid = NULL;
8820 		mutex_exit(SD_MUTEX(un));
8821 		(void) untimeout(temp_id);
8822 		mutex_enter(SD_MUTEX(un));
8823 	}
8824 
8825 	if (un->un_reset_throttle_timeid != NULL) {
8826 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8827 		un->un_reset_throttle_timeid = NULL;
8828 		mutex_exit(SD_MUTEX(un));
8829 		(void) untimeout(temp_id);
8830 		mutex_enter(SD_MUTEX(un));
8831 	}
8832 
8833 	if (un->un_startstop_timeid != NULL) {
8834 		timeout_id_t temp_id = un->un_startstop_timeid;
8835 		un->un_startstop_timeid = NULL;
8836 		mutex_exit(SD_MUTEX(un));
8837 		(void) untimeout(temp_id);
8838 		mutex_enter(SD_MUTEX(un));
8839 	}
8840 
8841 	if (un->un_dcvb_timeid != NULL) {
8842 		timeout_id_t temp_id = un->un_dcvb_timeid;
8843 		un->un_dcvb_timeid = NULL;
8844 		mutex_exit(SD_MUTEX(un));
8845 		(void) untimeout(temp_id);
8846 	} else {
8847 		mutex_exit(SD_MUTEX(un));
8848 	}
8849 
8850 	/* Remove any pending reservation reclaim requests for this device */
8851 	sd_rmv_resv_reclaim_req(dev);
8852 
8853 	mutex_enter(SD_MUTEX(un));
8854 
8855 	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
8856 	if (un->un_direct_priority_timeid != NULL) {
8857 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8858 		un->un_direct_priority_timeid = NULL;
8859 		mutex_exit(SD_MUTEX(un));
8860 		(void) untimeout(temp_id);
8861 		mutex_enter(SD_MUTEX(un));
8862 	}
8863 
8864 	/* Cancel any active multi-host disk watch thread requests */
8865 	if (un->un_mhd_token != NULL) {
8866 		mutex_exit(SD_MUTEX(un));
8867 		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
8868 		if (scsi_watch_request_terminate(un->un_mhd_token,
8869 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8870 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8871 			    "sd_dr_detach: Cannot cancel mhd watch request\n");
8872 			/*
8873 			 * Note: We are returning here after having removed
8874 			 * some driver timeouts above. This is consistent with
8875 			 * the legacy implementation but perhaps the watch
8876 			 * terminate call should be made with the wait flag set.
8877 			 */
8878 			goto err_stillbusy;
8879 		}
8880 		mutex_enter(SD_MUTEX(un));
8881 		un->un_mhd_token = NULL;
8882 	}
8883 
8884 	if (un->un_swr_token != NULL) {
8885 		mutex_exit(SD_MUTEX(un));
8886 		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
8887 		if (scsi_watch_request_terminate(un->un_swr_token,
8888 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8889 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8890 			    "sd_dr_detach: Cannot cancel swr watch request\n");
8891 			/*
8892 			 * Note: We are returning here after having removed
8893 			 * some driver timeouts above. This is consistent with
8894 			 * the legacy implementation but perhaps the watch
8895 			 * terminate call should be made with the wait flag set.
8896 			 */
8897 			goto err_stillbusy;
8898 		}
8899 		mutex_enter(SD_MUTEX(un));
8900 		un->un_swr_token = NULL;
8901 	}
8902 
8903 	mutex_exit(SD_MUTEX(un));
8904 
8905 	/*
8906 	 * Clear any scsi_reset_notifies. We clear the reset notifies
8907 	 * if we have not registered one.
8908 	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
8909 	 */
8910 	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
8911 	    sd_mhd_reset_notify_cb, (caddr_t)un);
8912 
8913 	/*
8914 	 * protect the timeout pointers from getting nulled by
8915 	 * their callback functions during the cancellation process.
8916 	 * In such a scenario untimeout can be invoked with a null value.
8917 	 */
8918 	_NOTE(NO_COMPETING_THREADS_NOW);
8919 
8920 	mutex_enter(&un->un_pm_mutex);
8921 	if (un->un_pm_idle_timeid != NULL) {
8922 		timeout_id_t temp_id = un->un_pm_idle_timeid;
8923 		un->un_pm_idle_timeid = NULL;
8924 		mutex_exit(&un->un_pm_mutex);
8925 
8926 		/*
8927 		 * Timeout is active; cancel it.
8928 		 * Note that it'll never be active on a device
8929 		 * that does not support PM therefore we don't
8930 		 * have to check before calling pm_idle_component.
8931 		 */
8932 		(void) untimeout(temp_id);
8933 		(void) pm_idle_component(SD_DEVINFO(un), 0);
8934 		mutex_enter(&un->un_pm_mutex);
8935 	}
8936 
8937 	/*
8938 	 * Check whether there is already a timeout scheduled for power
8939 	 * management. If yes then don't lower the power here, that's.
8940 	 * the timeout handler's job.
8941 	 */
8942 	if (un->un_pm_timeid != NULL) {
8943 		timeout_id_t temp_id = un->un_pm_timeid;
8944 		un->un_pm_timeid = NULL;
8945 		mutex_exit(&un->un_pm_mutex);
8946 		/*
8947 		 * Timeout is active; cancel it.
8948 		 * Note that it'll never be active on a device
8949 		 * that does not support PM therefore we don't
8950 		 * have to check before calling pm_idle_component.
8951 		 */
8952 		(void) untimeout(temp_id);
8953 		(void) pm_idle_component(SD_DEVINFO(un), 0);
8954 
8955 	} else {
8956 		mutex_exit(&un->un_pm_mutex);
8957 		if ((un->un_f_pm_is_enabled == TRUE) &&
8958 		    (pm_lower_power(SD_DEVINFO(un), 0, SD_SPINDLE_OFF) !=
8959 		    DDI_SUCCESS)) {
8960 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8961 		    "sd_dr_detach: Lower power request failed, ignoring.\n");
8962 			/*
8963 			 * Fix for bug: 4297749, item # 13
8964 			 * The above test now includes a check to see if PM is
8965 			 * supported by this device before call
8966 			 * pm_lower_power().
8967 			 * Note, the following is not dead code. The call to
8968 			 * pm_lower_power above will generate a call back into
8969 			 * our sdpower routine which might result in a timeout
8970 			 * handler getting activated. Therefore the following
8971 			 * code is valid and necessary.
8972 			 */
8973 			mutex_enter(&un->un_pm_mutex);
8974 			if (un->un_pm_timeid != NULL) {
8975 				timeout_id_t temp_id = un->un_pm_timeid;
8976 				un->un_pm_timeid = NULL;
8977 				mutex_exit(&un->un_pm_mutex);
8978 				(void) untimeout(temp_id);
8979 				(void) pm_idle_component(SD_DEVINFO(un), 0);
8980 			} else {
8981 				mutex_exit(&un->un_pm_mutex);
8982 			}
8983 		}
8984 	}
8985 
8986 	/*
8987 	 * Cleanup from the scsi_ifsetcap() calls (437868)
8988 	 * Relocated here from above to be after the call to
8989 	 * pm_lower_power, which was getting errors.
8990 	 */
8991 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8992 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8993 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8994 
8995 	if (un->un_f_is_fibre == FALSE) {
8996 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8997 	}
8998 
8999 	/*
9000 	 * Remove any event callbacks, fibre only
9001 	 */
9002 	if (un->un_f_is_fibre == TRUE) {
9003 		if ((un->un_insert_event != NULL) &&
9004 			(ddi_remove_event_handler(un->un_insert_cb_id) !=
9005 				DDI_SUCCESS)) {
9006 			/*
9007 			 * Note: We are returning here after having done
9008 			 * substantial cleanup above. This is consistent
9009 			 * with the legacy implementation but this may not
9010 			 * be the right thing to do.
9011 			 */
9012 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9013 				"sd_dr_detach: Cannot cancel insert event\n");
9014 			goto err_remove_event;
9015 		}
9016 		un->un_insert_event = NULL;
9017 
9018 		if ((un->un_remove_event != NULL) &&
9019 			(ddi_remove_event_handler(un->un_remove_cb_id) !=
9020 				DDI_SUCCESS)) {
9021 			/*
9022 			 * Note: We are returning here after having done
9023 			 * substantial cleanup above. This is consistent
9024 			 * with the legacy implementation but this may not
9025 			 * be the right thing to do.
9026 			 */
9027 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9028 				"sd_dr_detach: Cannot cancel remove event\n");
9029 			goto err_remove_event;
9030 		}
9031 		un->un_remove_event = NULL;
9032 	}
9033 
9034 	/* Do not free the softstate if the callback routine is active */
9035 	sd_sync_with_callback(un);
9036 
9037 	/*
9038 	 * Hold the detach mutex here, to make sure that no other threads ever
9039 	 * can access a (partially) freed soft state structure.
9040 	 */
9041 	mutex_enter(&sd_detach_mutex);
9042 
9043 	/*
9044 	 * Clean up the soft state struct.
9045 	 * Cleanup is done in reverse order of allocs/inits.
9046 	 * At this point there should be no competing threads anymore.
9047 	 */
9048 
9049 	/* Unregister and free device id. */
9050 	ddi_devid_unregister(devi);
9051 	if (un->un_devid) {
9052 		ddi_devid_free(un->un_devid);
9053 		un->un_devid = NULL;
9054 	}
9055 
9056 	/*
9057 	 * Destroy wmap cache if it exists.
9058 	 */
9059 	if (un->un_wm_cache != NULL) {
9060 		kmem_cache_destroy(un->un_wm_cache);
9061 		un->un_wm_cache = NULL;
9062 	}
9063 
9064 	/* Remove minor nodes */
9065 	ddi_remove_minor_node(devi, NULL);
9066 
9067 	/*
9068 	 * kstat cleanup is done in detach for all device types (4363169).
9069 	 * We do not want to fail detach if the device kstats are not deleted
9070 	 * since there is a confusion about the devo_refcnt for the device.
9071 	 * We just delete the kstats and let detach complete successfully.
9072 	 */
9073 	if (un->un_stats != NULL) {
9074 		kstat_delete(un->un_stats);
9075 		un->un_stats = NULL;
9076 	}
9077 	if (un->un_errstats != NULL) {
9078 		kstat_delete(un->un_errstats);
9079 		un->un_errstats = NULL;
9080 	}
9081 
9082 	/* Remove partition stats */
9083 	if (un->un_f_pkstats_enabled) {
9084 		for (i = 0; i < NSDMAP; i++) {
9085 			if (un->un_pstats[i] != NULL) {
9086 				kstat_delete(un->un_pstats[i]);
9087 				un->un_pstats[i] = NULL;
9088 			}
9089 		}
9090 	}
9091 
9092 	/* Remove xbuf registration */
9093 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
9094 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
9095 
9096 	/* Remove driver properties */
9097 	ddi_prop_remove_all(devi);
9098 
9099 	mutex_destroy(&un->un_pm_mutex);
9100 	cv_destroy(&un->un_pm_busy_cv);
9101 
9102 	cv_destroy(&un->un_wcc_cv);
9103 
9104 	/* Open/close semaphore */
9105 	sema_destroy(&un->un_semoclose);
9106 
9107 	/* Removable media condvar. */
9108 	cv_destroy(&un->un_state_cv);
9109 
9110 	/* Suspend/resume condvar. */
9111 	cv_destroy(&un->un_suspend_cv);
9112 	cv_destroy(&un->un_disk_busy_cv);
9113 
9114 	sd_free_rqs(un);
9115 
9116 	/* Free up soft state */
9117 	devp->sd_private = NULL;
9118 	bzero(un, sizeof (struct sd_lun));
9119 	ddi_soft_state_free(sd_state, instance);
9120 
9121 	mutex_exit(&sd_detach_mutex);
9122 
9123 	/* This frees up the INQUIRY data associated with the device. */
9124 	scsi_unprobe(devp);
9125 
9126 	return (DDI_SUCCESS);
9127 
9128 err_notclosed:
9129 	mutex_exit(SD_MUTEX(un));
9130 
9131 err_stillbusy:
9132 	_NOTE(NO_COMPETING_THREADS_NOW);
9133 
9134 err_remove_event:
9135 	mutex_enter(&sd_detach_mutex);
9136 	un->un_detach_count--;
9137 	mutex_exit(&sd_detach_mutex);
9138 
9139 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
9140 	return (DDI_FAILURE);
9141 }
9142 
9143 
9144 /*
9145  * Driver minor node structure and data table
9146  */
9147 struct driver_minor_data {
9148 	char	*name;
9149 	minor_t	minor;
9150 	int	type;
9151 };
9152 
9153 static struct driver_minor_data sd_minor_data[] = {
9154 	{"a", 0, S_IFBLK},
9155 	{"b", 1, S_IFBLK},
9156 	{"c", 2, S_IFBLK},
9157 	{"d", 3, S_IFBLK},
9158 	{"e", 4, S_IFBLK},
9159 	{"f", 5, S_IFBLK},
9160 	{"g", 6, S_IFBLK},
9161 	{"h", 7, S_IFBLK},
9162 #if defined(_SUNOS_VTOC_16)
9163 	{"i", 8, S_IFBLK},
9164 	{"j", 9, S_IFBLK},
9165 	{"k", 10, S_IFBLK},
9166 	{"l", 11, S_IFBLK},
9167 	{"m", 12, S_IFBLK},
9168 	{"n", 13, S_IFBLK},
9169 	{"o", 14, S_IFBLK},
9170 	{"p", 15, S_IFBLK},
9171 #endif			/* defined(_SUNOS_VTOC_16) */
9172 #if defined(_FIRMWARE_NEEDS_FDISK)
9173 	{"q", 16, S_IFBLK},
9174 	{"r", 17, S_IFBLK},
9175 	{"s", 18, S_IFBLK},
9176 	{"t", 19, S_IFBLK},
9177 	{"u", 20, S_IFBLK},
9178 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9179 	{"a,raw", 0, S_IFCHR},
9180 	{"b,raw", 1, S_IFCHR},
9181 	{"c,raw", 2, S_IFCHR},
9182 	{"d,raw", 3, S_IFCHR},
9183 	{"e,raw", 4, S_IFCHR},
9184 	{"f,raw", 5, S_IFCHR},
9185 	{"g,raw", 6, S_IFCHR},
9186 	{"h,raw", 7, S_IFCHR},
9187 #if defined(_SUNOS_VTOC_16)
9188 	{"i,raw", 8, S_IFCHR},
9189 	{"j,raw", 9, S_IFCHR},
9190 	{"k,raw", 10, S_IFCHR},
9191 	{"l,raw", 11, S_IFCHR},
9192 	{"m,raw", 12, S_IFCHR},
9193 	{"n,raw", 13, S_IFCHR},
9194 	{"o,raw", 14, S_IFCHR},
9195 	{"p,raw", 15, S_IFCHR},
9196 #endif			/* defined(_SUNOS_VTOC_16) */
9197 #if defined(_FIRMWARE_NEEDS_FDISK)
9198 	{"q,raw", 16, S_IFCHR},
9199 	{"r,raw", 17, S_IFCHR},
9200 	{"s,raw", 18, S_IFCHR},
9201 	{"t,raw", 19, S_IFCHR},
9202 	{"u,raw", 20, S_IFCHR},
9203 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9204 	{0}
9205 };
9206 
9207 static struct driver_minor_data sd_minor_data_efi[] = {
9208 	{"a", 0, S_IFBLK},
9209 	{"b", 1, S_IFBLK},
9210 	{"c", 2, S_IFBLK},
9211 	{"d", 3, S_IFBLK},
9212 	{"e", 4, S_IFBLK},
9213 	{"f", 5, S_IFBLK},
9214 	{"g", 6, S_IFBLK},
9215 	{"wd", 7, S_IFBLK},
9216 #if defined(_FIRMWARE_NEEDS_FDISK)
9217 	{"q", 16, S_IFBLK},
9218 	{"r", 17, S_IFBLK},
9219 	{"s", 18, S_IFBLK},
9220 	{"t", 19, S_IFBLK},
9221 	{"u", 20, S_IFBLK},
9222 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9223 	{"a,raw", 0, S_IFCHR},
9224 	{"b,raw", 1, S_IFCHR},
9225 	{"c,raw", 2, S_IFCHR},
9226 	{"d,raw", 3, S_IFCHR},
9227 	{"e,raw", 4, S_IFCHR},
9228 	{"f,raw", 5, S_IFCHR},
9229 	{"g,raw", 6, S_IFCHR},
9230 	{"wd,raw", 7, S_IFCHR},
9231 #if defined(_FIRMWARE_NEEDS_FDISK)
9232 	{"q,raw", 16, S_IFCHR},
9233 	{"r,raw", 17, S_IFCHR},
9234 	{"s,raw", 18, S_IFCHR},
9235 	{"t,raw", 19, S_IFCHR},
9236 	{"u,raw", 20, S_IFCHR},
9237 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9238 	{0}
9239 };
9240 
9241 
9242 /*
9243  *    Function: sd_create_minor_nodes
9244  *
9245  * Description: Create the minor device nodes for the instance.
9246  *
9247  *   Arguments: un - driver soft state (unit) structure
9248  *		devi - pointer to device info structure
9249  *
9250  * Return Code: DDI_SUCCESS
9251  *		DDI_FAILURE
9252  *
9253  *     Context: Kernel thread context
9254  */
9255 
9256 static int
9257 sd_create_minor_nodes(struct sd_lun *un, dev_info_t *devi)
9258 {
9259 	struct driver_minor_data	*dmdp;
9260 	struct scsi_device		*devp;
9261 	int				instance;
9262 	char				name[48];
9263 
9264 	ASSERT(un != NULL);
9265 	devp = ddi_get_driver_private(devi);
9266 	instance = ddi_get_instance(devp->sd_dev);
9267 
9268 	/*
9269 	 * Create all the minor nodes for this target.
9270 	 */
9271 	if (un->un_blockcount > DK_MAX_BLOCKS)
9272 		dmdp = sd_minor_data_efi;
9273 	else
9274 		dmdp = sd_minor_data;
9275 	while (dmdp->name != NULL) {
9276 
9277 		(void) sprintf(name, "%s", dmdp->name);
9278 
9279 		if (ddi_create_minor_node(devi, name, dmdp->type,
9280 		    (instance << SDUNIT_SHIFT) | dmdp->minor,
9281 		    un->un_node_type, NULL) == DDI_FAILURE) {
9282 			/*
9283 			 * Clean up any nodes that may have been created, in
9284 			 * case this fails in the middle of the loop.
9285 			 */
9286 			ddi_remove_minor_node(devi, NULL);
9287 			return (DDI_FAILURE);
9288 		}
9289 		dmdp++;
9290 	}
9291 
9292 	return (DDI_SUCCESS);
9293 }
9294 
9295 
9296 /*
9297  *    Function: sd_create_errstats
9298  *
9299  * Description: This routine instantiates the device error stats.
9300  *
9301  *		Note: During attach the stats are instantiated first so they are
9302  *		available for attach-time routines that utilize the driver
9303  *		iopath to send commands to the device. The stats are initialized
9304  *		separately so data obtained during some attach-time routines is
9305  *		available. (4362483)
9306  *
9307  *   Arguments: un - driver soft state (unit) structure
9308  *		instance - driver instance
9309  *
9310  *     Context: Kernel thread context
9311  */
9312 
9313 static void
9314 sd_create_errstats(struct sd_lun *un, int instance)
9315 {
9316 	struct	sd_errstats	*stp;
9317 	char	kstatmodule_err[KSTAT_STRLEN];
9318 	char	kstatname[KSTAT_STRLEN];
9319 	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
9320 
9321 	ASSERT(un != NULL);
9322 
9323 	if (un->un_errstats != NULL) {
9324 		return;
9325 	}
9326 
9327 	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
9328 	    "%serr", sd_label);
9329 	(void) snprintf(kstatname, sizeof (kstatname),
9330 	    "%s%d,err", sd_label, instance);
9331 
9332 	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
9333 	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
9334 
9335 	if (un->un_errstats == NULL) {
9336 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9337 		    "sd_create_errstats: Failed kstat_create\n");
9338 		return;
9339 	}
9340 
9341 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9342 	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
9343 	    KSTAT_DATA_UINT32);
9344 	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
9345 	    KSTAT_DATA_UINT32);
9346 	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
9347 	    KSTAT_DATA_UINT32);
9348 	kstat_named_init(&stp->sd_vid,		"Vendor",
9349 	    KSTAT_DATA_CHAR);
9350 	kstat_named_init(&stp->sd_pid,		"Product",
9351 	    KSTAT_DATA_CHAR);
9352 	kstat_named_init(&stp->sd_revision,	"Revision",
9353 	    KSTAT_DATA_CHAR);
9354 	kstat_named_init(&stp->sd_serial,	"Serial No",
9355 	    KSTAT_DATA_CHAR);
9356 	kstat_named_init(&stp->sd_capacity,	"Size",
9357 	    KSTAT_DATA_ULONGLONG);
9358 	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
9359 	    KSTAT_DATA_UINT32);
9360 	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
9361 	    KSTAT_DATA_UINT32);
9362 	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
9363 	    KSTAT_DATA_UINT32);
9364 	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
9365 	    KSTAT_DATA_UINT32);
9366 	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
9367 	    KSTAT_DATA_UINT32);
9368 	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
9369 	    KSTAT_DATA_UINT32);
9370 
9371 	un->un_errstats->ks_private = un;
9372 	un->un_errstats->ks_update  = nulldev;
9373 
9374 	kstat_install(un->un_errstats);
9375 }
9376 
9377 
9378 /*
9379  *    Function: sd_set_errstats
9380  *
9381  * Description: This routine sets the value of the vendor id, product id,
9382  *		revision, serial number, and capacity device error stats.
9383  *
9384  *		Note: During attach the stats are instantiated first so they are
9385  *		available for attach-time routines that utilize the driver
9386  *		iopath to send commands to the device. The stats are initialized
9387  *		separately so data obtained during some attach-time routines is
9388  *		available. (4362483)
9389  *
9390  *   Arguments: un - driver soft state (unit) structure
9391  *
9392  *     Context: Kernel thread context
9393  */
9394 
9395 static void
9396 sd_set_errstats(struct sd_lun *un)
9397 {
9398 	struct	sd_errstats	*stp;
9399 
9400 	ASSERT(un != NULL);
9401 	ASSERT(un->un_errstats != NULL);
9402 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9403 	ASSERT(stp != NULL);
9404 	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
9405 	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
9406 	(void) strncpy(stp->sd_revision.value.c,
9407 	    un->un_sd->sd_inq->inq_revision, 4);
9408 
9409 	/*
9410 	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
9411 	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
9412 	 * (4376302))
9413 	 */
9414 	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
9415 		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9416 		    sizeof (SD_INQUIRY(un)->inq_serial));
9417 	}
9418 
9419 	if (un->un_f_blockcount_is_valid != TRUE) {
9420 		/*
9421 		 * Set capacity error stat to 0 for no media. This ensures
9422 		 * a valid capacity is displayed in response to 'iostat -E'
9423 		 * when no media is present in the device.
9424 		 */
9425 		stp->sd_capacity.value.ui64 = 0;
9426 	} else {
9427 		/*
9428 		 * Multiply un_blockcount by un->un_sys_blocksize to get
9429 		 * capacity.
9430 		 *
9431 		 * Note: for non-512 blocksize devices "un_blockcount" has been
9432 		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
9433 		 * (un_tgt_blocksize / un->un_sys_blocksize).
9434 		 */
9435 		stp->sd_capacity.value.ui64 = (uint64_t)
9436 		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
9437 	}
9438 }
9439 
9440 
9441 /*
9442  *    Function: sd_set_pstats
9443  *
9444  * Description: This routine instantiates and initializes the partition
9445  *              stats for each partition with more than zero blocks.
9446  *		(4363169)
9447  *
9448  *   Arguments: un - driver soft state (unit) structure
9449  *
9450  *     Context: Kernel thread context
9451  */
9452 
9453 static void
9454 sd_set_pstats(struct sd_lun *un)
9455 {
9456 	char	kstatname[KSTAT_STRLEN];
9457 	int	instance;
9458 	int	i;
9459 
9460 	ASSERT(un != NULL);
9461 
9462 	instance = ddi_get_instance(SD_DEVINFO(un));
9463 
9464 	/* Note:x86: is this a VTOC8/VTOC16 difference? */
9465 	for (i = 0; i < NSDMAP; i++) {
9466 		if ((un->un_pstats[i] == NULL) &&
9467 		    (un->un_map[i].dkl_nblk != 0)) {
9468 			(void) snprintf(kstatname, sizeof (kstatname),
9469 			    "%s%d,%s", sd_label, instance,
9470 			    sd_minor_data[i].name);
9471 			un->un_pstats[i] = kstat_create(sd_label,
9472 			    instance, kstatname, "partition", KSTAT_TYPE_IO,
9473 			    1, KSTAT_FLAG_PERSISTENT);
9474 			if (un->un_pstats[i] != NULL) {
9475 				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
9476 				kstat_install(un->un_pstats[i]);
9477 			}
9478 		}
9479 	}
9480 }
9481 
9482 
9483 #if (defined(__fibre))
9484 /*
9485  *    Function: sd_init_event_callbacks
9486  *
9487  * Description: This routine initializes the insertion and removal event
9488  *		callbacks. (fibre only)
9489  *
9490  *   Arguments: un - driver soft state (unit) structure
9491  *
9492  *     Context: Kernel thread context
9493  */
9494 
9495 static void
9496 sd_init_event_callbacks(struct sd_lun *un)
9497 {
9498 	ASSERT(un != NULL);
9499 
9500 	if ((un->un_insert_event == NULL) &&
9501 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_INSERT_EVENT,
9502 	    &un->un_insert_event) == DDI_SUCCESS)) {
9503 		/*
9504 		 * Add the callback for an insertion event
9505 		 */
9506 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9507 		    un->un_insert_event, sd_event_callback, (void *)un,
9508 		    &(un->un_insert_cb_id));
9509 	}
9510 
9511 	if ((un->un_remove_event == NULL) &&
9512 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_REMOVE_EVENT,
9513 	    &un->un_remove_event) == DDI_SUCCESS)) {
9514 		/*
9515 		 * Add the callback for a removal event
9516 		 */
9517 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9518 		    un->un_remove_event, sd_event_callback, (void *)un,
9519 		    &(un->un_remove_cb_id));
9520 	}
9521 }
9522 
9523 
9524 /*
9525  *    Function: sd_event_callback
9526  *
9527  * Description: This routine handles insert/remove events (photon). The
9528  *		state is changed to OFFLINE which can be used to supress
9529  *		error msgs. (fibre only)
9530  *
9531  *   Arguments: un - driver soft state (unit) structure
9532  *
9533  *     Context: Callout thread context
9534  */
9535 /* ARGSUSED */
9536 static void
9537 sd_event_callback(dev_info_t *dip, ddi_eventcookie_t event, void *arg,
9538     void *bus_impldata)
9539 {
9540 	struct sd_lun *un = (struct sd_lun *)arg;
9541 
9542 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_insert_event));
9543 	if (event == un->un_insert_event) {
9544 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: insert event");
9545 		mutex_enter(SD_MUTEX(un));
9546 		if (un->un_state == SD_STATE_OFFLINE) {
9547 			if (un->un_last_state != SD_STATE_SUSPENDED) {
9548 				un->un_state = un->un_last_state;
9549 			} else {
9550 				/*
9551 				 * We have gone through SUSPEND/RESUME while
9552 				 * we were offline. Restore the last state
9553 				 */
9554 				un->un_state = un->un_save_state;
9555 			}
9556 		}
9557 		mutex_exit(SD_MUTEX(un));
9558 
9559 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_remove_event));
9560 	} else if (event == un->un_remove_event) {
9561 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: remove event");
9562 		mutex_enter(SD_MUTEX(un));
9563 		/*
9564 		 * We need to handle an event callback that occurs during
9565 		 * the suspend operation, since we don't prevent it.
9566 		 */
9567 		if (un->un_state != SD_STATE_OFFLINE) {
9568 			if (un->un_state != SD_STATE_SUSPENDED) {
9569 				New_state(un, SD_STATE_OFFLINE);
9570 			} else {
9571 				un->un_last_state = SD_STATE_OFFLINE;
9572 			}
9573 		}
9574 		mutex_exit(SD_MUTEX(un));
9575 	} else {
9576 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
9577 		    "!Unknown event\n");
9578 	}
9579 
9580 }
9581 #endif
9582 
9583 /*
9584  *    Function: sd_cache_control()
9585  *
9586  * Description: This routine is the driver entry point for setting
9587  *		read and write caching by modifying the WCE (write cache
9588  *		enable) and RCD (read cache disable) bits of mode
9589  *		page 8 (MODEPAGE_CACHING).
9590  *
9591  *   Arguments: un - driver soft state (unit) structure
9592  *		rcd_flag - flag for controlling the read cache
9593  *		wce_flag - flag for controlling the write cache
9594  *
9595  * Return Code: EIO
9596  *		code returned by sd_send_scsi_MODE_SENSE and
9597  *		sd_send_scsi_MODE_SELECT
9598  *
9599  *     Context: Kernel Thread
9600  */
9601 
9602 static int
9603 sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag)
9604 {
9605 	struct mode_caching	*mode_caching_page;
9606 	uchar_t			*header;
9607 	size_t			buflen;
9608 	int			hdrlen;
9609 	int			bd_len;
9610 	int			rval = 0;
9611 	struct mode_header_grp2	*mhp;
9612 
9613 	ASSERT(un != NULL);
9614 
9615 	/*
9616 	 * Do a test unit ready, otherwise a mode sense may not work if this
9617 	 * is the first command sent to the device after boot.
9618 	 */
9619 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
9620 
9621 	if (un->un_f_cfg_is_atapi == TRUE) {
9622 		hdrlen = MODE_HEADER_LENGTH_GRP2;
9623 	} else {
9624 		hdrlen = MODE_HEADER_LENGTH;
9625 	}
9626 
9627 	/*
9628 	 * Allocate memory for the retrieved mode page and its headers.  Set
9629 	 * a pointer to the page itself.  Use mode_cache_scsi3 to insure
9630 	 * we get all of the mode sense data otherwise, the mode select
9631 	 * will fail.  mode_cache_scsi3 is a superset of mode_caching.
9632 	 */
9633 	buflen = hdrlen + MODE_BLK_DESC_LENGTH +
9634 		sizeof (struct mode_cache_scsi3);
9635 
9636 	header = kmem_zalloc(buflen, KM_SLEEP);
9637 
9638 	/* Get the information from the device. */
9639 	if (un->un_f_cfg_is_atapi == TRUE) {
9640 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
9641 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9642 	} else {
9643 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
9644 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9645 	}
9646 	if (rval != 0) {
9647 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
9648 		    "sd_cache_control: Mode Sense Failed\n");
9649 		kmem_free(header, buflen);
9650 		return (rval);
9651 	}
9652 
9653 	/*
9654 	 * Determine size of Block Descriptors in order to locate
9655 	 * the mode page data. ATAPI devices return 0, SCSI devices
9656 	 * should return MODE_BLK_DESC_LENGTH.
9657 	 */
9658 	if (un->un_f_cfg_is_atapi == TRUE) {
9659 		mhp	= (struct mode_header_grp2 *)header;
9660 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
9661 	} else {
9662 		bd_len  = ((struct mode_header *)header)->bdesc_length;
9663 	}
9664 
9665 	if (bd_len > MODE_BLK_DESC_LENGTH) {
9666 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9667 		    "sd_cache_control: Mode Sense returned invalid "
9668 		    "block descriptor length\n");
9669 		kmem_free(header, buflen);
9670 		return (EIO);
9671 	}
9672 
9673 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
9674 
9675 	/* Check the relevant bits on successful mode sense. */
9676 	if ((mode_caching_page->rcd && rcd_flag == SD_CACHE_ENABLE) ||
9677 	    (!mode_caching_page->rcd && rcd_flag == SD_CACHE_DISABLE) ||
9678 	    (mode_caching_page->wce && wce_flag == SD_CACHE_DISABLE) ||
9679 	    (!mode_caching_page->wce && wce_flag == SD_CACHE_ENABLE)) {
9680 
9681 		size_t sbuflen;
9682 		uchar_t save_pg;
9683 
9684 		/*
9685 		 * Construct select buffer length based on the
9686 		 * length of the sense data returned.
9687 		 */
9688 		sbuflen =  hdrlen + MODE_BLK_DESC_LENGTH +
9689 				sizeof (struct mode_page) +
9690 				(int)mode_caching_page->mode_page.length;
9691 
9692 		/*
9693 		 * Set the caching bits as requested.
9694 		 */
9695 		if (rcd_flag == SD_CACHE_ENABLE)
9696 			mode_caching_page->rcd = 0;
9697 		else if (rcd_flag == SD_CACHE_DISABLE)
9698 			mode_caching_page->rcd = 1;
9699 
9700 		if (wce_flag == SD_CACHE_ENABLE)
9701 			mode_caching_page->wce = 1;
9702 		else if (wce_flag == SD_CACHE_DISABLE)
9703 			mode_caching_page->wce = 0;
9704 
9705 		/*
9706 		 * Save the page if the mode sense says the
9707 		 * drive supports it.
9708 		 */
9709 		save_pg = mode_caching_page->mode_page.ps ?
9710 				SD_SAVE_PAGE : SD_DONTSAVE_PAGE;
9711 
9712 		/* Clear reserved bits before mode select. */
9713 		mode_caching_page->mode_page.ps = 0;
9714 
9715 		/*
9716 		 * Clear out mode header for mode select.
9717 		 * The rest of the retrieved page will be reused.
9718 		 */
9719 		bzero(header, hdrlen);
9720 
9721 		if (un->un_f_cfg_is_atapi == TRUE) {
9722 			mhp = (struct mode_header_grp2 *)header;
9723 			mhp->bdesc_length_hi = bd_len >> 8;
9724 			mhp->bdesc_length_lo = (uchar_t)bd_len & 0xff;
9725 		} else {
9726 			((struct mode_header *)header)->bdesc_length = bd_len;
9727 		}
9728 
9729 		/* Issue mode select to change the cache settings */
9730 		if (un->un_f_cfg_is_atapi == TRUE) {
9731 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, header,
9732 			    sbuflen, save_pg, SD_PATH_DIRECT);
9733 		} else {
9734 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
9735 			    sbuflen, save_pg, SD_PATH_DIRECT);
9736 		}
9737 	}
9738 
9739 	kmem_free(header, buflen);
9740 	return (rval);
9741 }
9742 
9743 
9744 /*
9745  *    Function: sd_get_write_cache_enabled()
9746  *
9747  * Description: This routine is the driver entry point for determining if
9748  *		write caching is enabled.  It examines the WCE (write cache
9749  *		enable) bits of mode page 8 (MODEPAGE_CACHING).
9750  *
9751  *   Arguments: un - driver soft state (unit) structure
9752  *   		is_enabled - pointer to int where write cache enabled state
9753  *   			is returned (non-zero -> write cache enabled)
9754  *
9755  *
9756  * Return Code: EIO
9757  *		code returned by sd_send_scsi_MODE_SENSE
9758  *
9759  *     Context: Kernel Thread
9760  *
9761  * NOTE: If ioctl is added to disable write cache, this sequence should
9762  * be followed so that no locking is required for accesses to
9763  * un->un_f_write_cache_enabled:
9764  * 	do mode select to clear wce
9765  * 	do synchronize cache to flush cache
9766  * 	set un->un_f_write_cache_enabled = FALSE
9767  *
9768  * Conversely, an ioctl to enable the write cache should be done
9769  * in this order:
9770  * 	set un->un_f_write_cache_enabled = TRUE
9771  * 	do mode select to set wce
9772  */
9773 
9774 static int
9775 sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled)
9776 {
9777 	struct mode_caching	*mode_caching_page;
9778 	uchar_t			*header;
9779 	size_t			buflen;
9780 	int			hdrlen;
9781 	int			bd_len;
9782 	int			rval = 0;
9783 
9784 	ASSERT(un != NULL);
9785 	ASSERT(is_enabled != NULL);
9786 
9787 	/* in case of error, flag as enabled */
9788 	*is_enabled = TRUE;
9789 
9790 	/*
9791 	 * Do a test unit ready, otherwise a mode sense may not work if this
9792 	 * is the first command sent to the device after boot.
9793 	 */
9794 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
9795 
9796 	if (un->un_f_cfg_is_atapi == TRUE) {
9797 		hdrlen = MODE_HEADER_LENGTH_GRP2;
9798 	} else {
9799 		hdrlen = MODE_HEADER_LENGTH;
9800 	}
9801 
9802 	/*
9803 	 * Allocate memory for the retrieved mode page and its headers.  Set
9804 	 * a pointer to the page itself.
9805 	 */
9806 	buflen = hdrlen + MODE_BLK_DESC_LENGTH + sizeof (struct mode_caching);
9807 	header = kmem_zalloc(buflen, KM_SLEEP);
9808 
9809 	/* Get the information from the device. */
9810 	if (un->un_f_cfg_is_atapi == TRUE) {
9811 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
9812 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9813 	} else {
9814 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
9815 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9816 	}
9817 	if (rval != 0) {
9818 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
9819 		    "sd_get_write_cache_enabled: Mode Sense Failed\n");
9820 		kmem_free(header, buflen);
9821 		return (rval);
9822 	}
9823 
9824 	/*
9825 	 * Determine size of Block Descriptors in order to locate
9826 	 * the mode page data. ATAPI devices return 0, SCSI devices
9827 	 * should return MODE_BLK_DESC_LENGTH.
9828 	 */
9829 	if (un->un_f_cfg_is_atapi == TRUE) {
9830 		struct mode_header_grp2	*mhp;
9831 		mhp	= (struct mode_header_grp2 *)header;
9832 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
9833 	} else {
9834 		bd_len  = ((struct mode_header *)header)->bdesc_length;
9835 	}
9836 
9837 	if (bd_len > MODE_BLK_DESC_LENGTH) {
9838 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9839 		    "sd_get_write_cache_enabled: Mode Sense returned invalid "
9840 		    "block descriptor length\n");
9841 		kmem_free(header, buflen);
9842 		return (EIO);
9843 	}
9844 
9845 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
9846 	*is_enabled = mode_caching_page->wce;
9847 
9848 	kmem_free(header, buflen);
9849 	return (0);
9850 }
9851 
9852 
9853 /*
9854  *    Function: sd_make_device
9855  *
9856  * Description: Utility routine to return the Solaris device number from
9857  *		the data in the device's dev_info structure.
9858  *
9859  * Return Code: The Solaris device number
9860  *
9861  *     Context: Any
9862  */
9863 
9864 static dev_t
9865 sd_make_device(dev_info_t *devi)
9866 {
9867 	return (makedevice(ddi_name_to_major(ddi_get_name(devi)),
9868 	    ddi_get_instance(devi) << SDUNIT_SHIFT));
9869 }
9870 
9871 
9872 /*
9873  *    Function: sd_pm_entry
9874  *
9875  * Description: Called at the start of a new command to manage power
9876  *		and busy status of a device. This includes determining whether
9877  *		the current power state of the device is sufficient for
9878  *		performing the command or whether it must be changed.
9879  *		The PM framework is notified appropriately.
9880  *		Only with a return status of DDI_SUCCESS will the
9881  *		component be busy to the framework.
9882  *
9883  *		All callers of sd_pm_entry must check the return status
9884  *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
9885  *		of DDI_FAILURE indicates the device failed to power up.
9886  *		In this case un_pm_count has been adjusted so the result
9887  *		on exit is still powered down, ie. count is less than 0.
9888  *		Calling sd_pm_exit with this count value hits an ASSERT.
9889  *
9890  * Return Code: DDI_SUCCESS or DDI_FAILURE
9891  *
9892  *     Context: Kernel thread context.
9893  */
9894 
9895 static int
9896 sd_pm_entry(struct sd_lun *un)
9897 {
9898 	int return_status = DDI_SUCCESS;
9899 
9900 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9901 	ASSERT(!mutex_owned(&un->un_pm_mutex));
9902 
9903 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
9904 
9905 	if (un->un_f_pm_is_enabled == FALSE) {
9906 		SD_TRACE(SD_LOG_IO_PM, un,
9907 		    "sd_pm_entry: exiting, PM not enabled\n");
9908 		return (return_status);
9909 	}
9910 
9911 	/*
9912 	 * Just increment a counter if PM is enabled. On the transition from
9913 	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
9914 	 * the count with each IO and mark the device as idle when the count
9915 	 * hits 0.
9916 	 *
9917 	 * If the count is less than 0 the device is powered down. If a powered
9918 	 * down device is successfully powered up then the count must be
9919 	 * incremented to reflect the power up. Note that it'll get incremented
9920 	 * a second time to become busy.
9921 	 *
9922 	 * Because the following has the potential to change the device state
9923 	 * and must release the un_pm_mutex to do so, only one thread can be
9924 	 * allowed through at a time.
9925 	 */
9926 
9927 	mutex_enter(&un->un_pm_mutex);
9928 	while (un->un_pm_busy == TRUE) {
9929 		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
9930 	}
9931 	un->un_pm_busy = TRUE;
9932 
9933 	if (un->un_pm_count < 1) {
9934 
9935 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
9936 
9937 		/*
9938 		 * Indicate we are now busy so the framework won't attempt to
9939 		 * power down the device. This call will only fail if either
9940 		 * we passed a bad component number or the device has no
9941 		 * components. Neither of these should ever happen.
9942 		 */
9943 		mutex_exit(&un->un_pm_mutex);
9944 		return_status = pm_busy_component(SD_DEVINFO(un), 0);
9945 		ASSERT(return_status == DDI_SUCCESS);
9946 
9947 		mutex_enter(&un->un_pm_mutex);
9948 
9949 		if (un->un_pm_count < 0) {
9950 			mutex_exit(&un->un_pm_mutex);
9951 
9952 			SD_TRACE(SD_LOG_IO_PM, un,
9953 			    "sd_pm_entry: power up component\n");
9954 
9955 			/*
9956 			 * pm_raise_power will cause sdpower to be called
9957 			 * which brings the device power level to the
9958 			 * desired state, ON in this case. If successful,
9959 			 * un_pm_count and un_power_level will be updated
9960 			 * appropriately.
9961 			 */
9962 			return_status = pm_raise_power(SD_DEVINFO(un), 0,
9963 			    SD_SPINDLE_ON);
9964 
9965 			mutex_enter(&un->un_pm_mutex);
9966 
9967 			if (return_status != DDI_SUCCESS) {
9968 				/*
9969 				 * Power up failed.
9970 				 * Idle the device and adjust the count
9971 				 * so the result on exit is that we're
9972 				 * still powered down, ie. count is less than 0.
9973 				 */
9974 				SD_TRACE(SD_LOG_IO_PM, un,
9975 				    "sd_pm_entry: power up failed,"
9976 				    " idle the component\n");
9977 
9978 				(void) pm_idle_component(SD_DEVINFO(un), 0);
9979 				un->un_pm_count--;
9980 			} else {
9981 				/*
9982 				 * Device is powered up, verify the
9983 				 * count is non-negative.
9984 				 * This is debug only.
9985 				 */
9986 				ASSERT(un->un_pm_count == 0);
9987 			}
9988 		}
9989 
9990 		if (return_status == DDI_SUCCESS) {
9991 			/*
9992 			 * For performance, now that the device has been tagged
9993 			 * as busy, and it's known to be powered up, update the
9994 			 * chain types to use jump tables that do not include
9995 			 * pm. This significantly lowers the overhead and
9996 			 * therefore improves performance.
9997 			 */
9998 
9999 			mutex_exit(&un->un_pm_mutex);
10000 			mutex_enter(SD_MUTEX(un));
10001 			SD_TRACE(SD_LOG_IO_PM, un,
10002 			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
10003 			    un->un_uscsi_chain_type);
10004 
10005 			if (un->un_f_non_devbsize_supported) {
10006 				un->un_buf_chain_type =
10007 				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
10008 			} else {
10009 				un->un_buf_chain_type =
10010 				    SD_CHAIN_INFO_DISK_NO_PM;
10011 			}
10012 			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
10013 
10014 			SD_TRACE(SD_LOG_IO_PM, un,
10015 			    "             changed  uscsi_chain_type to   %d\n",
10016 			    un->un_uscsi_chain_type);
10017 			mutex_exit(SD_MUTEX(un));
10018 			mutex_enter(&un->un_pm_mutex);
10019 
10020 			if (un->un_pm_idle_timeid == NULL) {
10021 				/* 300 ms. */
10022 				un->un_pm_idle_timeid =
10023 				    timeout(sd_pm_idletimeout_handler, un,
10024 				    (drv_usectohz((clock_t)300000)));
10025 				/*
10026 				 * Include an extra call to busy which keeps the
10027 				 * device busy with-respect-to the PM layer
10028 				 * until the timer fires, at which time it'll
10029 				 * get the extra idle call.
10030 				 */
10031 				(void) pm_busy_component(SD_DEVINFO(un), 0);
10032 			}
10033 		}
10034 	}
10035 	un->un_pm_busy = FALSE;
10036 	/* Next... */
10037 	cv_signal(&un->un_pm_busy_cv);
10038 
10039 	un->un_pm_count++;
10040 
10041 	SD_TRACE(SD_LOG_IO_PM, un,
10042 	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
10043 
10044 	mutex_exit(&un->un_pm_mutex);
10045 
10046 	return (return_status);
10047 }
10048 
10049 
10050 /*
10051  *    Function: sd_pm_exit
10052  *
10053  * Description: Called at the completion of a command to manage busy
10054  *		status for the device. If the device becomes idle the
10055  *		PM framework is notified.
10056  *
10057  *     Context: Kernel thread context
10058  */
10059 
10060 static void
10061 sd_pm_exit(struct sd_lun *un)
10062 {
10063 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10064 	ASSERT(!mutex_owned(&un->un_pm_mutex));
10065 
10066 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
10067 
10068 	/*
10069 	 * After attach the following flag is only read, so don't
10070 	 * take the penalty of acquiring a mutex for it.
10071 	 */
10072 	if (un->un_f_pm_is_enabled == TRUE) {
10073 
10074 		mutex_enter(&un->un_pm_mutex);
10075 		un->un_pm_count--;
10076 
10077 		SD_TRACE(SD_LOG_IO_PM, un,
10078 		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
10079 
10080 		ASSERT(un->un_pm_count >= 0);
10081 		if (un->un_pm_count == 0) {
10082 			mutex_exit(&un->un_pm_mutex);
10083 
10084 			SD_TRACE(SD_LOG_IO_PM, un,
10085 			    "sd_pm_exit: idle component\n");
10086 
10087 			(void) pm_idle_component(SD_DEVINFO(un), 0);
10088 
10089 		} else {
10090 			mutex_exit(&un->un_pm_mutex);
10091 		}
10092 	}
10093 
10094 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
10095 }
10096 
10097 
10098 /*
10099  *    Function: sdopen
10100  *
10101  * Description: Driver's open(9e) entry point function.
10102  *
10103  *   Arguments: dev_i   - pointer to device number
10104  *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
10105  *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10106  *		cred_p  - user credential pointer
10107  *
10108  * Return Code: EINVAL
10109  *		ENXIO
10110  *		EIO
10111  *		EROFS
10112  *		EBUSY
10113  *
10114  *     Context: Kernel thread context
10115  */
10116 /* ARGSUSED */
10117 static int
10118 sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
10119 {
10120 	struct sd_lun	*un;
10121 	int		nodelay;
10122 	int		part;
10123 	uint64_t	partmask;
10124 	int		instance;
10125 	dev_t		dev;
10126 	int		rval = EIO;
10127 
10128 	/* Validate the open type */
10129 	if (otyp >= OTYPCNT) {
10130 		return (EINVAL);
10131 	}
10132 
10133 	dev = *dev_p;
10134 	instance = SDUNIT(dev);
10135 	mutex_enter(&sd_detach_mutex);
10136 
10137 	/*
10138 	 * Fail the open if there is no softstate for the instance, or
10139 	 * if another thread somewhere is trying to detach the instance.
10140 	 */
10141 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
10142 	    (un->un_detach_count != 0)) {
10143 		mutex_exit(&sd_detach_mutex);
10144 		/*
10145 		 * The probe cache only needs to be cleared when open (9e) fails
10146 		 * with ENXIO (4238046).
10147 		 */
10148 		/*
10149 		 * un-conditionally clearing probe cache is ok with
10150 		 * separate sd/ssd binaries
10151 		 * x86 platform can be an issue with both parallel
10152 		 * and fibre in 1 binary
10153 		 */
10154 		sd_scsi_clear_probe_cache();
10155 		return (ENXIO);
10156 	}
10157 
10158 	/*
10159 	 * The un_layer_count is to prevent another thread in specfs from
10160 	 * trying to detach the instance, which can happen when we are
10161 	 * called from a higher-layer driver instead of thru specfs.
10162 	 * This will not be needed when DDI provides a layered driver
10163 	 * interface that allows specfs to know that an instance is in
10164 	 * use by a layered driver & should not be detached.
10165 	 *
10166 	 * Note: the semantics for layered driver opens are exactly one
10167 	 * close for every open.
10168 	 */
10169 	if (otyp == OTYP_LYR) {
10170 		un->un_layer_count++;
10171 	}
10172 
10173 	/*
10174 	 * Keep a count of the current # of opens in progress. This is because
10175 	 * some layered drivers try to call us as a regular open. This can
10176 	 * cause problems that we cannot prevent, however by keeping this count
10177 	 * we can at least keep our open and detach routines from racing against
10178 	 * each other under such conditions.
10179 	 */
10180 	un->un_opens_in_progress++;
10181 	mutex_exit(&sd_detach_mutex);
10182 
10183 	nodelay  = (flag & (FNDELAY | FNONBLOCK));
10184 	part	 = SDPART(dev);
10185 	partmask = 1 << part;
10186 
10187 	/*
10188 	 * We use a semaphore here in order to serialize
10189 	 * open and close requests on the device.
10190 	 */
10191 	sema_p(&un->un_semoclose);
10192 
10193 	mutex_enter(SD_MUTEX(un));
10194 
10195 	/*
10196 	 * All device accesses go thru sdstrategy() where we check
10197 	 * on suspend status but there could be a scsi_poll command,
10198 	 * which bypasses sdstrategy(), so we need to check pm
10199 	 * status.
10200 	 */
10201 
10202 	if (!nodelay) {
10203 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10204 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10205 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10206 		}
10207 
10208 		mutex_exit(SD_MUTEX(un));
10209 		if (sd_pm_entry(un) != DDI_SUCCESS) {
10210 			rval = EIO;
10211 			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
10212 			    "sdopen: sd_pm_entry failed\n");
10213 			goto open_failed_with_pm;
10214 		}
10215 		mutex_enter(SD_MUTEX(un));
10216 	}
10217 
10218 	/* check for previous exclusive open */
10219 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
10220 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10221 	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
10222 	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
10223 
10224 	if (un->un_exclopen & (partmask)) {
10225 		goto excl_open_fail;
10226 	}
10227 
10228 	if (flag & FEXCL) {
10229 		int i;
10230 		if (un->un_ocmap.lyropen[part]) {
10231 			goto excl_open_fail;
10232 		}
10233 		for (i = 0; i < (OTYPCNT - 1); i++) {
10234 			if (un->un_ocmap.regopen[i] & (partmask)) {
10235 				goto excl_open_fail;
10236 			}
10237 		}
10238 	}
10239 
10240 	/*
10241 	 * Check the write permission if this is a removable media device,
10242 	 * NDELAY has not been set, and writable permission is requested.
10243 	 *
10244 	 * Note: If NDELAY was set and this is write-protected media the WRITE
10245 	 * attempt will fail with EIO as part of the I/O processing. This is a
10246 	 * more permissive implementation that allows the open to succeed and
10247 	 * WRITE attempts to fail when appropriate.
10248 	 */
10249 	if (un->un_f_chk_wp_open) {
10250 		if ((flag & FWRITE) && (!nodelay)) {
10251 			mutex_exit(SD_MUTEX(un));
10252 			/*
10253 			 * Defer the check for write permission on writable
10254 			 * DVD drive till sdstrategy and will not fail open even
10255 			 * if FWRITE is set as the device can be writable
10256 			 * depending upon the media and the media can change
10257 			 * after the call to open().
10258 			 */
10259 			if (un->un_f_dvdram_writable_device == FALSE) {
10260 				if (ISCD(un) || sr_check_wp(dev)) {
10261 				rval = EROFS;
10262 				mutex_enter(SD_MUTEX(un));
10263 				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10264 				    "write to cd or write protected media\n");
10265 				goto open_fail;
10266 				}
10267 			}
10268 			mutex_enter(SD_MUTEX(un));
10269 		}
10270 	}
10271 
10272 	/*
10273 	 * If opening in NDELAY/NONBLOCK mode, just return.
10274 	 * Check if disk is ready and has a valid geometry later.
10275 	 */
10276 	if (!nodelay) {
10277 		mutex_exit(SD_MUTEX(un));
10278 		rval = sd_ready_and_valid(un);
10279 		mutex_enter(SD_MUTEX(un));
10280 		/*
10281 		 * Fail if device is not ready or if the number of disk
10282 		 * blocks is zero or negative for non CD devices.
10283 		 */
10284 		if ((rval != SD_READY_VALID) ||
10285 		    (!ISCD(un) && un->un_map[part].dkl_nblk <= 0)) {
10286 			rval = un->un_f_has_removable_media ? ENXIO : EIO;
10287 			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10288 			    "device not ready or invalid disk block value\n");
10289 			goto open_fail;
10290 		}
10291 #if defined(__i386) || defined(__amd64)
10292 	} else {
10293 		uchar_t *cp;
10294 		/*
10295 		 * x86 requires special nodelay handling, so that p0 is
10296 		 * always defined and accessible.
10297 		 * Invalidate geometry only if device is not already open.
10298 		 */
10299 		cp = &un->un_ocmap.chkd[0];
10300 		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10301 			if (*cp != (uchar_t)0) {
10302 			    break;
10303 			}
10304 			cp++;
10305 		}
10306 		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10307 			un->un_f_geometry_is_valid = FALSE;
10308 		}
10309 
10310 #endif
10311 	}
10312 
10313 	if (otyp == OTYP_LYR) {
10314 		un->un_ocmap.lyropen[part]++;
10315 	} else {
10316 		un->un_ocmap.regopen[otyp] |= partmask;
10317 	}
10318 
10319 	/* Set up open and exclusive open flags */
10320 	if (flag & FEXCL) {
10321 		un->un_exclopen |= (partmask);
10322 	}
10323 
10324 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10325 	    "open of part %d type %d\n", part, otyp);
10326 
10327 	mutex_exit(SD_MUTEX(un));
10328 	if (!nodelay) {
10329 		sd_pm_exit(un);
10330 	}
10331 
10332 	sema_v(&un->un_semoclose);
10333 
10334 	mutex_enter(&sd_detach_mutex);
10335 	un->un_opens_in_progress--;
10336 	mutex_exit(&sd_detach_mutex);
10337 
10338 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
10339 	return (DDI_SUCCESS);
10340 
10341 excl_open_fail:
10342 	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
10343 	rval = EBUSY;
10344 
10345 open_fail:
10346 	mutex_exit(SD_MUTEX(un));
10347 
10348 	/*
10349 	 * On a failed open we must exit the pm management.
10350 	 */
10351 	if (!nodelay) {
10352 		sd_pm_exit(un);
10353 	}
10354 open_failed_with_pm:
10355 	sema_v(&un->un_semoclose);
10356 
10357 	mutex_enter(&sd_detach_mutex);
10358 	un->un_opens_in_progress--;
10359 	if (otyp == OTYP_LYR) {
10360 		un->un_layer_count--;
10361 	}
10362 	mutex_exit(&sd_detach_mutex);
10363 
10364 	return (rval);
10365 }
10366 
10367 
10368 /*
10369  *    Function: sdclose
10370  *
10371  * Description: Driver's close(9e) entry point function.
10372  *
10373  *   Arguments: dev    - device number
10374  *		flag   - file status flag, informational only
10375  *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10376  *		cred_p - user credential pointer
10377  *
10378  * Return Code: ENXIO
10379  *
10380  *     Context: Kernel thread context
10381  */
10382 /* ARGSUSED */
10383 static int
10384 sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
10385 {
10386 	struct sd_lun	*un;
10387 	uchar_t		*cp;
10388 	int		part;
10389 	int		nodelay;
10390 	int		rval = 0;
10391 
10392 	/* Validate the open type */
10393 	if (otyp >= OTYPCNT) {
10394 		return (ENXIO);
10395 	}
10396 
10397 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10398 		return (ENXIO);
10399 	}
10400 
10401 	part = SDPART(dev);
10402 	nodelay = flag & (FNDELAY | FNONBLOCK);
10403 
10404 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10405 	    "sdclose: close of part %d type %d\n", part, otyp);
10406 
10407 	/*
10408 	 * We use a semaphore here in order to serialize
10409 	 * open and close requests on the device.
10410 	 */
10411 	sema_p(&un->un_semoclose);
10412 
10413 	mutex_enter(SD_MUTEX(un));
10414 
10415 	/* Don't proceed if power is being changed. */
10416 	while (un->un_state == SD_STATE_PM_CHANGING) {
10417 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10418 	}
10419 
10420 	if (un->un_exclopen & (1 << part)) {
10421 		un->un_exclopen &= ~(1 << part);
10422 	}
10423 
10424 	/* Update the open partition map */
10425 	if (otyp == OTYP_LYR) {
10426 		un->un_ocmap.lyropen[part] -= 1;
10427 	} else {
10428 		un->un_ocmap.regopen[otyp] &= ~(1 << part);
10429 	}
10430 
10431 	cp = &un->un_ocmap.chkd[0];
10432 	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10433 		if (*cp != NULL) {
10434 			break;
10435 		}
10436 		cp++;
10437 	}
10438 
10439 	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10440 		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
10441 
10442 		/*
10443 		 * We avoid persistance upon the last close, and set
10444 		 * the throttle back to the maximum.
10445 		 */
10446 		un->un_throttle = un->un_saved_throttle;
10447 
10448 		if (un->un_state == SD_STATE_OFFLINE) {
10449 			if (un->un_f_is_fibre == FALSE) {
10450 				scsi_log(SD_DEVINFO(un), sd_label,
10451 					CE_WARN, "offline\n");
10452 			}
10453 			un->un_f_geometry_is_valid = FALSE;
10454 
10455 		} else {
10456 			/*
10457 			 * Flush any outstanding writes in NVRAM cache.
10458 			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
10459 			 * cmd, it may not work for non-Pluto devices.
10460 			 * SYNCHRONIZE CACHE is not required for removables,
10461 			 * except DVD-RAM drives.
10462 			 *
10463 			 * Also note: because SYNCHRONIZE CACHE is currently
10464 			 * the only command issued here that requires the
10465 			 * drive be powered up, only do the power up before
10466 			 * sending the Sync Cache command. If additional
10467 			 * commands are added which require a powered up
10468 			 * drive, the following sequence may have to change.
10469 			 *
10470 			 * And finally, note that parallel SCSI on SPARC
10471 			 * only issues a Sync Cache to DVD-RAM, a newly
10472 			 * supported device.
10473 			 */
10474 #if defined(__i386) || defined(__amd64)
10475 			if (un->un_f_sync_cache_supported ||
10476 			    un->un_f_dvdram_writable_device == TRUE) {
10477 #else
10478 			if (un->un_f_dvdram_writable_device == TRUE) {
10479 #endif
10480 				mutex_exit(SD_MUTEX(un));
10481 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10482 					rval =
10483 					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
10484 					    NULL);
10485 					/* ignore error if not supported */
10486 					if (rval == ENOTSUP) {
10487 						rval = 0;
10488 					} else if (rval != 0) {
10489 						rval = EIO;
10490 					}
10491 					sd_pm_exit(un);
10492 				} else {
10493 					rval = EIO;
10494 				}
10495 				mutex_enter(SD_MUTEX(un));
10496 			}
10497 
10498 			/*
10499 			 * For devices which supports DOOR_LOCK, send an ALLOW
10500 			 * MEDIA REMOVAL command, but don't get upset if it
10501 			 * fails. We need to raise the power of the drive before
10502 			 * we can call sd_send_scsi_DOORLOCK()
10503 			 */
10504 			if (un->un_f_doorlock_supported) {
10505 				mutex_exit(SD_MUTEX(un));
10506 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10507 					rval = sd_send_scsi_DOORLOCK(un,
10508 					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
10509 
10510 					sd_pm_exit(un);
10511 					if (ISCD(un) && (rval != 0) &&
10512 					    (nodelay != 0)) {
10513 						rval = ENXIO;
10514 					}
10515 				} else {
10516 					rval = EIO;
10517 				}
10518 				mutex_enter(SD_MUTEX(un));
10519 			}
10520 
10521 			/*
10522 			 * If a device has removable media, invalidate all
10523 			 * parameters related to media, such as geometry,
10524 			 * blocksize, and blockcount.
10525 			 */
10526 			if (un->un_f_has_removable_media) {
10527 				sr_ejected(un);
10528 			}
10529 
10530 			/*
10531 			 * Destroy the cache (if it exists) which was
10532 			 * allocated for the write maps since this is
10533 			 * the last close for this media.
10534 			 */
10535 			if (un->un_wm_cache) {
10536 				/*
10537 				 * Check if there are pending commands.
10538 				 * and if there are give a warning and
10539 				 * do not destroy the cache.
10540 				 */
10541 				if (un->un_ncmds_in_driver > 0) {
10542 					scsi_log(SD_DEVINFO(un),
10543 					    sd_label, CE_WARN,
10544 					    "Unable to clean up memory "
10545 					    "because of pending I/O\n");
10546 				} else {
10547 					kmem_cache_destroy(
10548 					    un->un_wm_cache);
10549 					un->un_wm_cache = NULL;
10550 				}
10551 			}
10552 		}
10553 	}
10554 
10555 	mutex_exit(SD_MUTEX(un));
10556 	sema_v(&un->un_semoclose);
10557 
10558 	if (otyp == OTYP_LYR) {
10559 		mutex_enter(&sd_detach_mutex);
10560 		/*
10561 		 * The detach routine may run when the layer count
10562 		 * drops to zero.
10563 		 */
10564 		un->un_layer_count--;
10565 		mutex_exit(&sd_detach_mutex);
10566 	}
10567 
10568 	return (rval);
10569 }
10570 
10571 
10572 /*
10573  *    Function: sd_ready_and_valid
10574  *
10575  * Description: Test if device is ready and has a valid geometry.
10576  *
10577  *   Arguments: dev - device number
10578  *		un  - driver soft state (unit) structure
10579  *
10580  * Return Code: SD_READY_VALID		ready and valid label
10581  *		SD_READY_NOT_VALID	ready, geom ops never applicable
10582  *		SD_NOT_READY_VALID	not ready, no label
10583  *
10584  *     Context: Never called at interrupt context.
10585  */
10586 
10587 static int
10588 sd_ready_and_valid(struct sd_lun *un)
10589 {
10590 	struct sd_errstats	*stp;
10591 	uint64_t		capacity;
10592 	uint_t			lbasize;
10593 	int			rval = SD_READY_VALID;
10594 	char			name_str[48];
10595 
10596 	ASSERT(un != NULL);
10597 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10598 
10599 	mutex_enter(SD_MUTEX(un));
10600 	/*
10601 	 * If a device has removable media, we must check if media is
10602 	 * ready when checking if this device is ready and valid.
10603 	 */
10604 	if (un->un_f_has_removable_media) {
10605 		mutex_exit(SD_MUTEX(un));
10606 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
10607 			rval = SD_NOT_READY_VALID;
10608 			mutex_enter(SD_MUTEX(un));
10609 			goto done;
10610 		}
10611 
10612 		mutex_enter(SD_MUTEX(un));
10613 		if ((un->un_f_geometry_is_valid == FALSE) ||
10614 		    (un->un_f_blockcount_is_valid == FALSE) ||
10615 		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
10616 
10617 			/* capacity has to be read every open. */
10618 			mutex_exit(SD_MUTEX(un));
10619 			if (sd_send_scsi_READ_CAPACITY(un, &capacity,
10620 			    &lbasize, SD_PATH_DIRECT) != 0) {
10621 				mutex_enter(SD_MUTEX(un));
10622 				un->un_f_geometry_is_valid = FALSE;
10623 				rval = SD_NOT_READY_VALID;
10624 				goto done;
10625 			} else {
10626 				mutex_enter(SD_MUTEX(un));
10627 				sd_update_block_info(un, lbasize, capacity);
10628 			}
10629 		}
10630 
10631 		/*
10632 		 * Check if the media in the device is writable or not.
10633 		 */
10634 		if ((un->un_f_geometry_is_valid == FALSE) && ISCD(un)) {
10635 			sd_check_for_writable_cd(un);
10636 		}
10637 
10638 	} else {
10639 		/*
10640 		 * Do a test unit ready to clear any unit attention from non-cd
10641 		 * devices.
10642 		 */
10643 		mutex_exit(SD_MUTEX(un));
10644 		(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
10645 		mutex_enter(SD_MUTEX(un));
10646 	}
10647 
10648 
10649 	/*
10650 	 * If this is a non 512 block device, allocate space for
10651 	 * the wmap cache. This is being done here since every time
10652 	 * a media is changed this routine will be called and the
10653 	 * block size is a function of media rather than device.
10654 	 */
10655 	if (un->un_f_non_devbsize_supported && NOT_DEVBSIZE(un)) {
10656 		if (!(un->un_wm_cache)) {
10657 			(void) snprintf(name_str, sizeof (name_str),
10658 			    "%s%d_cache",
10659 			    ddi_driver_name(SD_DEVINFO(un)),
10660 			    ddi_get_instance(SD_DEVINFO(un)));
10661 			un->un_wm_cache = kmem_cache_create(
10662 			    name_str, sizeof (struct sd_w_map),
10663 			    8, sd_wm_cache_constructor,
10664 			    sd_wm_cache_destructor, NULL,
10665 			    (void *)un, NULL, 0);
10666 			if (!(un->un_wm_cache)) {
10667 					rval = ENOMEM;
10668 					goto done;
10669 			}
10670 		}
10671 	}
10672 
10673 	if (un->un_state == SD_STATE_NORMAL) {
10674 		/*
10675 		 * If the target is not yet ready here (defined by a TUR
10676 		 * failure), invalidate the geometry and print an 'offline'
10677 		 * message. This is a legacy message, as the state of the
10678 		 * target is not actually changed to SD_STATE_OFFLINE.
10679 		 *
10680 		 * If the TUR fails for EACCES (Reservation Conflict), it
10681 		 * means there actually is nothing wrong with the target that
10682 		 * would require invalidating the geometry, so continue in
10683 		 * that case as if the TUR was successful.
10684 		 */
10685 		int err;
10686 
10687 		mutex_exit(SD_MUTEX(un));
10688 		err = sd_send_scsi_TEST_UNIT_READY(un, 0);
10689 		mutex_enter(SD_MUTEX(un));
10690 
10691 		if ((err != 0) && (err != EACCES)) {
10692 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10693 			    "offline\n");
10694 			un->un_f_geometry_is_valid = FALSE;
10695 			rval = SD_NOT_READY_VALID;
10696 			goto done;
10697 		}
10698 	}
10699 
10700 	if (un->un_f_format_in_progress == FALSE) {
10701 		/*
10702 		 * Note: sd_validate_geometry may return TRUE, but that does
10703 		 * not necessarily mean un_f_geometry_is_valid == TRUE!
10704 		 */
10705 		rval = sd_validate_geometry(un, SD_PATH_DIRECT);
10706 		if (rval == ENOTSUP) {
10707 			if (un->un_f_geometry_is_valid == TRUE)
10708 				rval = 0;
10709 			else {
10710 				rval = SD_READY_NOT_VALID;
10711 				goto done;
10712 			}
10713 		}
10714 		if (rval != 0) {
10715 			/*
10716 			 * We don't check the validity of geometry for
10717 			 * CDROMs. Also we assume we have a good label
10718 			 * even if sd_validate_geometry returned ENOMEM.
10719 			 */
10720 			if (!ISCD(un) && rval != ENOMEM) {
10721 				rval = SD_NOT_READY_VALID;
10722 				goto done;
10723 			}
10724 		}
10725 	}
10726 
10727 #ifdef DOESNTWORK /* on eliteII, see 1118607 */
10728 	/*
10729 	 * check to see if this disk is write protected, if it is and we have
10730 	 * not set read-only, then fail
10731 	 */
10732 	if ((flag & FWRITE) && (sr_check_wp(dev))) {
10733 		New_state(un, SD_STATE_CLOSED);
10734 		goto done;
10735 	}
10736 #endif
10737 
10738 	/*
10739 	 * If this device supports DOOR_LOCK command, try and send
10740 	 * this command to PREVENT MEDIA REMOVAL, but don't get upset
10741 	 * if it fails. For a CD, however, it is an error
10742 	 */
10743 	if (un->un_f_doorlock_supported) {
10744 		mutex_exit(SD_MUTEX(un));
10745 		if ((sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
10746 		    SD_PATH_DIRECT) != 0) && ISCD(un)) {
10747 			rval = SD_NOT_READY_VALID;
10748 			mutex_enter(SD_MUTEX(un));
10749 			goto done;
10750 		}
10751 		mutex_enter(SD_MUTEX(un));
10752 	}
10753 
10754 	/* The state has changed, inform the media watch routines */
10755 	un->un_mediastate = DKIO_INSERTED;
10756 	cv_broadcast(&un->un_state_cv);
10757 	rval = SD_READY_VALID;
10758 
10759 done:
10760 
10761 	/*
10762 	 * Initialize the capacity kstat value, if no media previously
10763 	 * (capacity kstat is 0) and a media has been inserted
10764 	 * (un_blockcount > 0).
10765 	 */
10766 	if (un->un_errstats != NULL) {
10767 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
10768 		if ((stp->sd_capacity.value.ui64 == 0) &&
10769 		    (un->un_f_blockcount_is_valid == TRUE)) {
10770 			stp->sd_capacity.value.ui64 =
10771 			    (uint64_t)((uint64_t)un->un_blockcount *
10772 			    un->un_sys_blocksize);
10773 		}
10774 	}
10775 
10776 	mutex_exit(SD_MUTEX(un));
10777 	return (rval);
10778 }
10779 
10780 
10781 /*
10782  *    Function: sdmin
10783  *
10784  * Description: Routine to limit the size of a data transfer. Used in
10785  *		conjunction with physio(9F).
10786  *
10787  *   Arguments: bp - pointer to the indicated buf(9S) struct.
10788  *
10789  *     Context: Kernel thread context.
10790  */
10791 
10792 static void
10793 sdmin(struct buf *bp)
10794 {
10795 	struct sd_lun	*un;
10796 	int		instance;
10797 
10798 	instance = SDUNIT(bp->b_edev);
10799 
10800 	un = ddi_get_soft_state(sd_state, instance);
10801 	ASSERT(un != NULL);
10802 
10803 	if (bp->b_bcount > un->un_max_xfer_size) {
10804 		bp->b_bcount = un->un_max_xfer_size;
10805 	}
10806 }
10807 
10808 
10809 /*
10810  *    Function: sdread
10811  *
10812  * Description: Driver's read(9e) entry point function.
10813  *
10814  *   Arguments: dev   - device number
10815  *		uio   - structure pointer describing where data is to be stored
10816  *			in user's space
10817  *		cred_p  - user credential pointer
10818  *
10819  * Return Code: ENXIO
10820  *		EIO
10821  *		EINVAL
10822  *		value returned by physio
10823  *
10824  *     Context: Kernel thread context.
10825  */
10826 /* ARGSUSED */
10827 static int
10828 sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
10829 {
10830 	struct sd_lun	*un = NULL;
10831 	int		secmask;
10832 	int		err;
10833 
10834 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10835 		return (ENXIO);
10836 	}
10837 
10838 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10839 
10840 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
10841 		mutex_enter(SD_MUTEX(un));
10842 		/*
10843 		 * Because the call to sd_ready_and_valid will issue I/O we
10844 		 * must wait here if either the device is suspended or
10845 		 * if it's power level is changing.
10846 		 */
10847 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10848 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10849 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10850 		}
10851 		un->un_ncmds_in_driver++;
10852 		mutex_exit(SD_MUTEX(un));
10853 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
10854 			mutex_enter(SD_MUTEX(un));
10855 			un->un_ncmds_in_driver--;
10856 			ASSERT(un->un_ncmds_in_driver >= 0);
10857 			mutex_exit(SD_MUTEX(un));
10858 			return (EIO);
10859 		}
10860 		mutex_enter(SD_MUTEX(un));
10861 		un->un_ncmds_in_driver--;
10862 		ASSERT(un->un_ncmds_in_driver >= 0);
10863 		mutex_exit(SD_MUTEX(un));
10864 	}
10865 
10866 	/*
10867 	 * Read requests are restricted to multiples of the system block size.
10868 	 */
10869 	secmask = un->un_sys_blocksize - 1;
10870 
10871 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10872 		SD_ERROR(SD_LOG_READ_WRITE, un,
10873 		    "sdread: file offset not modulo %d\n",
10874 		    un->un_sys_blocksize);
10875 		err = EINVAL;
10876 	} else if (uio->uio_iov->iov_len & (secmask)) {
10877 		SD_ERROR(SD_LOG_READ_WRITE, un,
10878 		    "sdread: transfer length not modulo %d\n",
10879 		    un->un_sys_blocksize);
10880 		err = EINVAL;
10881 	} else {
10882 		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
10883 	}
10884 	return (err);
10885 }
10886 
10887 
10888 /*
10889  *    Function: sdwrite
10890  *
10891  * Description: Driver's write(9e) entry point function.
10892  *
10893  *   Arguments: dev   - device number
10894  *		uio   - structure pointer describing where data is stored in
10895  *			user's space
10896  *		cred_p  - user credential pointer
10897  *
10898  * Return Code: ENXIO
10899  *		EIO
10900  *		EINVAL
10901  *		value returned by physio
10902  *
10903  *     Context: Kernel thread context.
10904  */
10905 /* ARGSUSED */
10906 static int
10907 sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
10908 {
10909 	struct sd_lun	*un = NULL;
10910 	int		secmask;
10911 	int		err;
10912 
10913 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10914 		return (ENXIO);
10915 	}
10916 
10917 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10918 
10919 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
10920 		mutex_enter(SD_MUTEX(un));
10921 		/*
10922 		 * Because the call to sd_ready_and_valid will issue I/O we
10923 		 * must wait here if either the device is suspended or
10924 		 * if it's power level is changing.
10925 		 */
10926 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10927 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10928 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10929 		}
10930 		un->un_ncmds_in_driver++;
10931 		mutex_exit(SD_MUTEX(un));
10932 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
10933 			mutex_enter(SD_MUTEX(un));
10934 			un->un_ncmds_in_driver--;
10935 			ASSERT(un->un_ncmds_in_driver >= 0);
10936 			mutex_exit(SD_MUTEX(un));
10937 			return (EIO);
10938 		}
10939 		mutex_enter(SD_MUTEX(un));
10940 		un->un_ncmds_in_driver--;
10941 		ASSERT(un->un_ncmds_in_driver >= 0);
10942 		mutex_exit(SD_MUTEX(un));
10943 	}
10944 
10945 	/*
10946 	 * Write requests are restricted to multiples of the system block size.
10947 	 */
10948 	secmask = un->un_sys_blocksize - 1;
10949 
10950 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10951 		SD_ERROR(SD_LOG_READ_WRITE, un,
10952 		    "sdwrite: file offset not modulo %d\n",
10953 		    un->un_sys_blocksize);
10954 		err = EINVAL;
10955 	} else if (uio->uio_iov->iov_len & (secmask)) {
10956 		SD_ERROR(SD_LOG_READ_WRITE, un,
10957 		    "sdwrite: transfer length not modulo %d\n",
10958 		    un->un_sys_blocksize);
10959 		err = EINVAL;
10960 	} else {
10961 		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
10962 	}
10963 	return (err);
10964 }
10965 
10966 
10967 /*
10968  *    Function: sdaread
10969  *
10970  * Description: Driver's aread(9e) entry point function.
10971  *
10972  *   Arguments: dev   - device number
10973  *		aio   - structure pointer describing where data is to be stored
10974  *		cred_p  - user credential pointer
10975  *
10976  * Return Code: ENXIO
10977  *		EIO
10978  *		EINVAL
10979  *		value returned by aphysio
10980  *
10981  *     Context: Kernel thread context.
10982  */
10983 /* ARGSUSED */
10984 static int
10985 sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
10986 {
10987 	struct sd_lun	*un = NULL;
10988 	struct uio	*uio = aio->aio_uio;
10989 	int		secmask;
10990 	int		err;
10991 
10992 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10993 		return (ENXIO);
10994 	}
10995 
10996 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10997 
10998 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
10999 		mutex_enter(SD_MUTEX(un));
11000 		/*
11001 		 * Because the call to sd_ready_and_valid will issue I/O we
11002 		 * must wait here if either the device is suspended or
11003 		 * if it's power level is changing.
11004 		 */
11005 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11006 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11007 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11008 		}
11009 		un->un_ncmds_in_driver++;
11010 		mutex_exit(SD_MUTEX(un));
11011 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11012 			mutex_enter(SD_MUTEX(un));
11013 			un->un_ncmds_in_driver--;
11014 			ASSERT(un->un_ncmds_in_driver >= 0);
11015 			mutex_exit(SD_MUTEX(un));
11016 			return (EIO);
11017 		}
11018 		mutex_enter(SD_MUTEX(un));
11019 		un->un_ncmds_in_driver--;
11020 		ASSERT(un->un_ncmds_in_driver >= 0);
11021 		mutex_exit(SD_MUTEX(un));
11022 	}
11023 
11024 	/*
11025 	 * Read requests are restricted to multiples of the system block size.
11026 	 */
11027 	secmask = un->un_sys_blocksize - 1;
11028 
11029 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11030 		SD_ERROR(SD_LOG_READ_WRITE, un,
11031 		    "sdaread: file offset not modulo %d\n",
11032 		    un->un_sys_blocksize);
11033 		err = EINVAL;
11034 	} else if (uio->uio_iov->iov_len & (secmask)) {
11035 		SD_ERROR(SD_LOG_READ_WRITE, un,
11036 		    "sdaread: transfer length not modulo %d\n",
11037 		    un->un_sys_blocksize);
11038 		err = EINVAL;
11039 	} else {
11040 		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
11041 	}
11042 	return (err);
11043 }
11044 
11045 
11046 /*
11047  *    Function: sdawrite
11048  *
11049  * Description: Driver's awrite(9e) entry point function.
11050  *
11051  *   Arguments: dev   - device number
11052  *		aio   - structure pointer describing where data is stored
11053  *		cred_p  - user credential pointer
11054  *
11055  * Return Code: ENXIO
11056  *		EIO
11057  *		EINVAL
11058  *		value returned by aphysio
11059  *
11060  *     Context: Kernel thread context.
11061  */
11062 /* ARGSUSED */
11063 static int
11064 sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11065 {
11066 	struct sd_lun	*un = NULL;
11067 	struct uio	*uio = aio->aio_uio;
11068 	int		secmask;
11069 	int		err;
11070 
11071 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11072 		return (ENXIO);
11073 	}
11074 
11075 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11076 
11077 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11078 		mutex_enter(SD_MUTEX(un));
11079 		/*
11080 		 * Because the call to sd_ready_and_valid will issue I/O we
11081 		 * must wait here if either the device is suspended or
11082 		 * if it's power level is changing.
11083 		 */
11084 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11085 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11086 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11087 		}
11088 		un->un_ncmds_in_driver++;
11089 		mutex_exit(SD_MUTEX(un));
11090 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11091 			mutex_enter(SD_MUTEX(un));
11092 			un->un_ncmds_in_driver--;
11093 			ASSERT(un->un_ncmds_in_driver >= 0);
11094 			mutex_exit(SD_MUTEX(un));
11095 			return (EIO);
11096 		}
11097 		mutex_enter(SD_MUTEX(un));
11098 		un->un_ncmds_in_driver--;
11099 		ASSERT(un->un_ncmds_in_driver >= 0);
11100 		mutex_exit(SD_MUTEX(un));
11101 	}
11102 
11103 	/*
11104 	 * Write requests are restricted to multiples of the system block size.
11105 	 */
11106 	secmask = un->un_sys_blocksize - 1;
11107 
11108 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11109 		SD_ERROR(SD_LOG_READ_WRITE, un,
11110 		    "sdawrite: file offset not modulo %d\n",
11111 		    un->un_sys_blocksize);
11112 		err = EINVAL;
11113 	} else if (uio->uio_iov->iov_len & (secmask)) {
11114 		SD_ERROR(SD_LOG_READ_WRITE, un,
11115 		    "sdawrite: transfer length not modulo %d\n",
11116 		    un->un_sys_blocksize);
11117 		err = EINVAL;
11118 	} else {
11119 		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
11120 	}
11121 	return (err);
11122 }
11123 
11124 
11125 
11126 
11127 
11128 /*
11129  * Driver IO processing follows the following sequence:
11130  *
11131  *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
11132  *         |                |                     ^
11133  *         v                v                     |
11134  * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
11135  *         |                |                     |                   |
11136  *         v                |                     |                   |
11137  * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
11138  *         |                |                     ^                   ^
11139  *         v                v                     |                   |
11140  * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
11141  *         |                |                     |                   |
11142  *     +---+                |                     +------------+      +-------+
11143  *     |                    |                                  |              |
11144  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11145  *     |                    v                                  |              |
11146  *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
11147  *     |                    |                                  ^              |
11148  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11149  *     |                    v                                  |              |
11150  *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
11151  *     |                    |                                  ^              |
11152  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11153  *     |                    v                                  |              |
11154  *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
11155  *     |                    |                                  ^              |
11156  *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
11157  *     |                    v                                  |              |
11158  *     |              sd_pm_iostart()                     sd_pm_iodone()      |
11159  *     |                    |                                  ^              |
11160  *     |                    |                                  |              |
11161  *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
11162  *                          |                           ^
11163  *                          v                           |
11164  *                   sd_core_iostart()                  |
11165  *                          |                           |
11166  *                          |                           +------>(*destroypkt)()
11167  *                          +-> sd_start_cmds() <-+     |           |
11168  *                          |                     |     |           v
11169  *                          |                     |     |  scsi_destroy_pkt(9F)
11170  *                          |                     |     |
11171  *                          +->(*initpkt)()       +- sdintr()
11172  *                          |  |                        |  |
11173  *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
11174  *                          |  +-> scsi_setup_cdb(9F)   |
11175  *                          |                           |
11176  *                          +--> scsi_transport(9F)     |
11177  *                                     |                |
11178  *                                     +----> SCSA ---->+
11179  *
11180  *
11181  * This code is based upon the following presumtions:
11182  *
11183  *   - iostart and iodone functions operate on buf(9S) structures. These
11184  *     functions perform the necessary operations on the buf(9S) and pass
11185  *     them along to the next function in the chain by using the macros
11186  *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
11187  *     (for iodone side functions).
11188  *
11189  *   - The iostart side functions may sleep. The iodone side functions
11190  *     are called under interrupt context and may NOT sleep. Therefore
11191  *     iodone side functions also may not call iostart side functions.
11192  *     (NOTE: iostart side functions should NOT sleep for memory, as
11193  *     this could result in deadlock.)
11194  *
11195  *   - An iostart side function may call its corresponding iodone side
11196  *     function directly (if necessary).
11197  *
11198  *   - In the event of an error, an iostart side function can return a buf(9S)
11199  *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
11200  *     b_error in the usual way of course).
11201  *
11202  *   - The taskq mechanism may be used by the iodone side functions to dispatch
11203  *     requests to the iostart side functions.  The iostart side functions in
11204  *     this case would be called under the context of a taskq thread, so it's
11205  *     OK for them to block/sleep/spin in this case.
11206  *
11207  *   - iostart side functions may allocate "shadow" buf(9S) structs and
11208  *     pass them along to the next function in the chain.  The corresponding
11209  *     iodone side functions must coalesce the "shadow" bufs and return
11210  *     the "original" buf to the next higher layer.
11211  *
11212  *   - The b_private field of the buf(9S) struct holds a pointer to
11213  *     an sd_xbuf struct, which contains information needed to
11214  *     construct the scsi_pkt for the command.
11215  *
11216  *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
11217  *     layer must acquire & release the SD_MUTEX(un) as needed.
11218  */
11219 
11220 
11221 /*
11222  * Create taskq for all targets in the system. This is created at
11223  * _init(9E) and destroyed at _fini(9E).
11224  *
11225  * Note: here we set the minalloc to a reasonably high number to ensure that
11226  * we will have an adequate supply of task entries available at interrupt time.
11227  * This is used in conjunction with the TASKQ_PREPOPULATE flag in
11228  * sd_create_taskq().  Since we do not want to sleep for allocations at
11229  * interrupt time, set maxalloc equal to minalloc. That way we will just fail
11230  * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
11231  * requests any one instant in time.
11232  */
11233 #define	SD_TASKQ_NUMTHREADS	8
11234 #define	SD_TASKQ_MINALLOC	256
11235 #define	SD_TASKQ_MAXALLOC	256
11236 
11237 static taskq_t	*sd_tq = NULL;
11238 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_tq))
11239 
11240 static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
11241 static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
11242 
11243 /*
11244  * The following task queue is being created for the write part of
11245  * read-modify-write of non-512 block size devices.
11246  * Limit the number of threads to 1 for now. This number has been choosen
11247  * considering the fact that it applies only to dvd ram drives/MO drives
11248  * currently. Performance for which is not main criteria at this stage.
11249  * Note: It needs to be explored if we can use a single taskq in future
11250  */
11251 #define	SD_WMR_TASKQ_NUMTHREADS	1
11252 static taskq_t	*sd_wmr_tq = NULL;
11253 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_wmr_tq))
11254 
11255 /*
11256  *    Function: sd_taskq_create
11257  *
11258  * Description: Create taskq thread(s) and preallocate task entries
11259  *
11260  * Return Code: Returns a pointer to the allocated taskq_t.
11261  *
11262  *     Context: Can sleep. Requires blockable context.
11263  *
11264  *       Notes: - The taskq() facility currently is NOT part of the DDI.
11265  *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
11266  *		- taskq_create() will block for memory, also it will panic
11267  *		  if it cannot create the requested number of threads.
11268  *		- Currently taskq_create() creates threads that cannot be
11269  *		  swapped.
11270  *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
11271  *		  supply of taskq entries at interrupt time (ie, so that we
11272  *		  do not have to sleep for memory)
11273  */
11274 
11275 static void
11276 sd_taskq_create(void)
11277 {
11278 	char	taskq_name[TASKQ_NAMELEN];
11279 
11280 	ASSERT(sd_tq == NULL);
11281 	ASSERT(sd_wmr_tq == NULL);
11282 
11283 	(void) snprintf(taskq_name, sizeof (taskq_name),
11284 	    "%s_drv_taskq", sd_label);
11285 	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
11286 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11287 	    TASKQ_PREPOPULATE));
11288 
11289 	(void) snprintf(taskq_name, sizeof (taskq_name),
11290 	    "%s_rmw_taskq", sd_label);
11291 	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
11292 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11293 	    TASKQ_PREPOPULATE));
11294 }
11295 
11296 
11297 /*
11298  *    Function: sd_taskq_delete
11299  *
11300  * Description: Complementary cleanup routine for sd_taskq_create().
11301  *
11302  *     Context: Kernel thread context.
11303  */
11304 
11305 static void
11306 sd_taskq_delete(void)
11307 {
11308 	ASSERT(sd_tq != NULL);
11309 	ASSERT(sd_wmr_tq != NULL);
11310 	taskq_destroy(sd_tq);
11311 	taskq_destroy(sd_wmr_tq);
11312 	sd_tq = NULL;
11313 	sd_wmr_tq = NULL;
11314 }
11315 
11316 
11317 /*
11318  *    Function: sdstrategy
11319  *
11320  * Description: Driver's strategy (9E) entry point function.
11321  *
11322  *   Arguments: bp - pointer to buf(9S)
11323  *
11324  * Return Code: Always returns zero
11325  *
11326  *     Context: Kernel thread context.
11327  */
11328 
11329 static int
11330 sdstrategy(struct buf *bp)
11331 {
11332 	struct sd_lun *un;
11333 
11334 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11335 	if (un == NULL) {
11336 		bioerror(bp, EIO);
11337 		bp->b_resid = bp->b_bcount;
11338 		biodone(bp);
11339 		return (0);
11340 	}
11341 	/* As was done in the past, fail new cmds. if state is dumping. */
11342 	if (un->un_state == SD_STATE_DUMPING) {
11343 		bioerror(bp, ENXIO);
11344 		bp->b_resid = bp->b_bcount;
11345 		biodone(bp);
11346 		return (0);
11347 	}
11348 
11349 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11350 
11351 	/*
11352 	 * Commands may sneak in while we released the mutex in
11353 	 * DDI_SUSPEND, we should block new commands. However, old
11354 	 * commands that are still in the driver at this point should
11355 	 * still be allowed to drain.
11356 	 */
11357 	mutex_enter(SD_MUTEX(un));
11358 	/*
11359 	 * Must wait here if either the device is suspended or
11360 	 * if it's power level is changing.
11361 	 */
11362 	while ((un->un_state == SD_STATE_SUSPENDED) ||
11363 	    (un->un_state == SD_STATE_PM_CHANGING)) {
11364 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11365 	}
11366 
11367 	un->un_ncmds_in_driver++;
11368 
11369 	/*
11370 	 * atapi: Since we are running the CD for now in PIO mode we need to
11371 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11372 	 * the HBA's init_pkt routine.
11373 	 */
11374 	if (un->un_f_cfg_is_atapi == TRUE) {
11375 		mutex_exit(SD_MUTEX(un));
11376 		bp_mapin(bp);
11377 		mutex_enter(SD_MUTEX(un));
11378 	}
11379 	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
11380 	    un->un_ncmds_in_driver);
11381 
11382 	mutex_exit(SD_MUTEX(un));
11383 
11384 	/*
11385 	 * This will (eventually) allocate the sd_xbuf area and
11386 	 * call sd_xbuf_strategy().  We just want to return the
11387 	 * result of ddi_xbuf_qstrategy so that we have an opt-
11388 	 * imized tail call which saves us a stack frame.
11389 	 */
11390 	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
11391 }
11392 
11393 
11394 /*
11395  *    Function: sd_xbuf_strategy
11396  *
11397  * Description: Function for initiating IO operations via the
11398  *		ddi_xbuf_qstrategy() mechanism.
11399  *
11400  *     Context: Kernel thread context.
11401  */
11402 
11403 static void
11404 sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
11405 {
11406 	struct sd_lun *un = arg;
11407 
11408 	ASSERT(bp != NULL);
11409 	ASSERT(xp != NULL);
11410 	ASSERT(un != NULL);
11411 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11412 
11413 	/*
11414 	 * Initialize the fields in the xbuf and save a pointer to the
11415 	 * xbuf in bp->b_private.
11416 	 */
11417 	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
11418 
11419 	/* Send the buf down the iostart chain */
11420 	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
11421 }
11422 
11423 
11424 /*
11425  *    Function: sd_xbuf_init
11426  *
11427  * Description: Prepare the given sd_xbuf struct for use.
11428  *
11429  *   Arguments: un - ptr to softstate
11430  *		bp - ptr to associated buf(9S)
11431  *		xp - ptr to associated sd_xbuf
11432  *		chain_type - IO chain type to use:
11433  *			SD_CHAIN_NULL
11434  *			SD_CHAIN_BUFIO
11435  *			SD_CHAIN_USCSI
11436  *			SD_CHAIN_DIRECT
11437  *			SD_CHAIN_DIRECT_PRIORITY
11438  *		pktinfop - ptr to private data struct for scsi_pkt(9S)
11439  *			initialization; may be NULL if none.
11440  *
11441  *     Context: Kernel thread context
11442  */
11443 
11444 static void
11445 sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
11446 	uchar_t chain_type, void *pktinfop)
11447 {
11448 	int index;
11449 
11450 	ASSERT(un != NULL);
11451 	ASSERT(bp != NULL);
11452 	ASSERT(xp != NULL);
11453 
11454 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
11455 	    bp, chain_type);
11456 
11457 	xp->xb_un	= un;
11458 	xp->xb_pktp	= NULL;
11459 	xp->xb_pktinfo	= pktinfop;
11460 	xp->xb_private	= bp->b_private;
11461 	xp->xb_blkno	= (daddr_t)bp->b_blkno;
11462 
11463 	/*
11464 	 * Set up the iostart and iodone chain indexes in the xbuf, based
11465 	 * upon the specified chain type to use.
11466 	 */
11467 	switch (chain_type) {
11468 	case SD_CHAIN_NULL:
11469 		/*
11470 		 * Fall thru to just use the values for the buf type, even
11471 		 * tho for the NULL chain these values will never be used.
11472 		 */
11473 		/* FALLTHRU */
11474 	case SD_CHAIN_BUFIO:
11475 		index = un->un_buf_chain_type;
11476 		break;
11477 	case SD_CHAIN_USCSI:
11478 		index = un->un_uscsi_chain_type;
11479 		break;
11480 	case SD_CHAIN_DIRECT:
11481 		index = un->un_direct_chain_type;
11482 		break;
11483 	case SD_CHAIN_DIRECT_PRIORITY:
11484 		index = un->un_priority_chain_type;
11485 		break;
11486 	default:
11487 		/* We're really broken if we ever get here... */
11488 		panic("sd_xbuf_init: illegal chain type!");
11489 		/*NOTREACHED*/
11490 	}
11491 
11492 	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
11493 	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
11494 
11495 	/*
11496 	 * It might be a bit easier to simply bzero the entire xbuf above,
11497 	 * but it turns out that since we init a fair number of members anyway,
11498 	 * we save a fair number cycles by doing explicit assignment of zero.
11499 	 */
11500 	xp->xb_pkt_flags	= 0;
11501 	xp->xb_dma_resid	= 0;
11502 	xp->xb_retry_count	= 0;
11503 	xp->xb_victim_retry_count = 0;
11504 	xp->xb_ua_retry_count	= 0;
11505 	xp->xb_sense_bp		= NULL;
11506 	xp->xb_sense_status	= 0;
11507 	xp->xb_sense_state	= 0;
11508 	xp->xb_sense_resid	= 0;
11509 
11510 	bp->b_private	= xp;
11511 	bp->b_flags	&= ~(B_DONE | B_ERROR);
11512 	bp->b_resid	= 0;
11513 	bp->av_forw	= NULL;
11514 	bp->av_back	= NULL;
11515 	bioerror(bp, 0);
11516 
11517 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
11518 }
11519 
11520 
11521 /*
11522  *    Function: sd_uscsi_strategy
11523  *
11524  * Description: Wrapper for calling into the USCSI chain via physio(9F)
11525  *
11526  *   Arguments: bp - buf struct ptr
11527  *
11528  * Return Code: Always returns 0
11529  *
11530  *     Context: Kernel thread context
11531  */
11532 
11533 static int
11534 sd_uscsi_strategy(struct buf *bp)
11535 {
11536 	struct sd_lun		*un;
11537 	struct sd_uscsi_info	*uip;
11538 	struct sd_xbuf		*xp;
11539 	uchar_t			chain_type;
11540 
11541 	ASSERT(bp != NULL);
11542 
11543 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11544 	if (un == NULL) {
11545 		bioerror(bp, EIO);
11546 		bp->b_resid = bp->b_bcount;
11547 		biodone(bp);
11548 		return (0);
11549 	}
11550 
11551 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11552 
11553 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
11554 
11555 	mutex_enter(SD_MUTEX(un));
11556 	/*
11557 	 * atapi: Since we are running the CD for now in PIO mode we need to
11558 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11559 	 * the HBA's init_pkt routine.
11560 	 */
11561 	if (un->un_f_cfg_is_atapi == TRUE) {
11562 		mutex_exit(SD_MUTEX(un));
11563 		bp_mapin(bp);
11564 		mutex_enter(SD_MUTEX(un));
11565 	}
11566 	un->un_ncmds_in_driver++;
11567 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
11568 	    un->un_ncmds_in_driver);
11569 	mutex_exit(SD_MUTEX(un));
11570 
11571 	/*
11572 	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
11573 	 */
11574 	ASSERT(bp->b_private != NULL);
11575 	uip = (struct sd_uscsi_info *)bp->b_private;
11576 
11577 	switch (uip->ui_flags) {
11578 	case SD_PATH_DIRECT:
11579 		chain_type = SD_CHAIN_DIRECT;
11580 		break;
11581 	case SD_PATH_DIRECT_PRIORITY:
11582 		chain_type = SD_CHAIN_DIRECT_PRIORITY;
11583 		break;
11584 	default:
11585 		chain_type = SD_CHAIN_USCSI;
11586 		break;
11587 	}
11588 
11589 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
11590 	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
11591 
11592 	/* Use the index obtained within xbuf_init */
11593 	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
11594 
11595 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
11596 
11597 	return (0);
11598 }
11599 
11600 
11601 /*
11602  * These routines perform raw i/o operations.
11603  */
11604 /*ARGSUSED*/
11605 static void
11606 sduscsimin(struct buf *bp)
11607 {
11608 	/*
11609 	 * do not break up because the CDB count would then
11610 	 * be incorrect and data underruns would result (incomplete
11611 	 * read/writes which would be retried and then failed, see
11612 	 * sdintr().
11613 	 */
11614 }
11615 
11616 
11617 
11618 /*
11619  *    Function: sd_send_scsi_cmd
11620  *
11621  * Description: Runs a USCSI command for user (when called thru sdioctl),
11622  *		or for the driver
11623  *
11624  *   Arguments: dev - the dev_t for the device
11625  *		incmd - ptr to a valid uscsi_cmd struct
11626  *		cdbspace - UIO_USERSPACE or UIO_SYSSPACE
11627  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11628  *		rqbufspace - UIO_USERSPACE or UIO_SYSSPACE
11629  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11630  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11631  *			to use the USCSI "direct" chain and bypass the normal
11632  *			command waitq.
11633  *
11634  * Return Code: 0 -  successful completion of the given command
11635  *		EIO - scsi_reset() failed, or see biowait()/physio() codes.
11636  *		ENXIO  - soft state not found for specified dev
11637  *		EINVAL
11638  *		EFAULT - copyin/copyout error
11639  *		return code of biowait(9F) or physio(9F):
11640  *			EIO - IO error, caller may check incmd->uscsi_status
11641  *			ENXIO
11642  *			EACCES - reservation conflict
11643  *
11644  *     Context: Waits for command to complete. Can sleep.
11645  */
11646 
11647 static int
11648 sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd,
11649 	enum uio_seg cdbspace, enum uio_seg dataspace, enum uio_seg rqbufspace,
11650 	int path_flag)
11651 {
11652 	struct sd_uscsi_info	*uip;
11653 	struct uscsi_cmd	*uscmd;
11654 	struct sd_lun	*un;
11655 	struct buf	*bp;
11656 	int	rval;
11657 	int	flags;
11658 
11659 	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
11660 	if (un == NULL) {
11661 		return (ENXIO);
11662 	}
11663 
11664 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11665 
11666 #ifdef SDDEBUG
11667 	switch (dataspace) {
11668 	case UIO_USERSPACE:
11669 		SD_TRACE(SD_LOG_IO, un,
11670 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_USERSPACE\n", un);
11671 		break;
11672 	case UIO_SYSSPACE:
11673 		SD_TRACE(SD_LOG_IO, un,
11674 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_SYSSPACE\n", un);
11675 		break;
11676 	default:
11677 		SD_TRACE(SD_LOG_IO, un,
11678 		    "sd_send_scsi_cmd: entry: un:0x%p UNEXPECTED SPACE\n", un);
11679 		break;
11680 	}
11681 #endif
11682 
11683 	/*
11684 	 * Perform resets directly; no need to generate a command to do it.
11685 	 */
11686 	if (incmd->uscsi_flags & (USCSI_RESET | USCSI_RESET_ALL)) {
11687 		flags = ((incmd->uscsi_flags & USCSI_RESET_ALL) != 0) ?
11688 		    RESET_ALL : RESET_TARGET;
11689 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: Issuing reset\n");
11690 		if (scsi_reset(SD_ADDRESS(un), flags) == 0) {
11691 			/* Reset attempt was unsuccessful */
11692 			SD_TRACE(SD_LOG_IO, un,
11693 			    "sd_send_scsi_cmd: reset: failure\n");
11694 			return (EIO);
11695 		}
11696 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: reset: success\n");
11697 		return (0);
11698 	}
11699 
11700 	/* Perfunctory sanity check... */
11701 	if (incmd->uscsi_cdblen <= 0) {
11702 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11703 		    "invalid uscsi_cdblen, returning EINVAL\n");
11704 		return (EINVAL);
11705 	}
11706 
11707 	/*
11708 	 * In order to not worry about where the uscsi structure came from
11709 	 * (or where the cdb it points to came from) we're going to make
11710 	 * kmem_alloc'd copies of them here. This will also allow reference
11711 	 * to the data they contain long after this process has gone to
11712 	 * sleep and its kernel stack has been unmapped, etc.
11713 	 *
11714 	 * First get some memory for the uscsi_cmd struct and copy the
11715 	 * contents of the given uscsi_cmd struct into it.
11716 	 */
11717 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
11718 	bcopy(incmd, uscmd, sizeof (struct uscsi_cmd));
11719 
11720 	SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_cmd: uscsi_cmd",
11721 	    (uchar_t *)uscmd, sizeof (struct uscsi_cmd), SD_LOG_HEX);
11722 
11723 	/*
11724 	 * Now get some space for the CDB, and copy the given CDB into
11725 	 * it. Use ddi_copyin() in case the data is in user space.
11726 	 */
11727 	uscmd->uscsi_cdb = kmem_zalloc((size_t)incmd->uscsi_cdblen, KM_SLEEP);
11728 	flags = (cdbspace == UIO_SYSSPACE) ? FKIOCTL : 0;
11729 	if (ddi_copyin(incmd->uscsi_cdb, uscmd->uscsi_cdb,
11730 	    (uint_t)incmd->uscsi_cdblen, flags) != 0) {
11731 		kmem_free(uscmd->uscsi_cdb, (size_t)incmd->uscsi_cdblen);
11732 		kmem_free(uscmd, sizeof (struct uscsi_cmd));
11733 		return (EFAULT);
11734 	}
11735 
11736 	SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_cmd: CDB",
11737 	    (uchar_t *)uscmd->uscsi_cdb, incmd->uscsi_cdblen, SD_LOG_HEX);
11738 
11739 	bp = getrbuf(KM_SLEEP);
11740 
11741 	/*
11742 	 * Allocate an sd_uscsi_info struct and fill it with the info
11743 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
11744 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
11745 	 * since we allocate the buf here in this function, we do not
11746 	 * need to preserve the prior contents of b_private.
11747 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
11748 	 */
11749 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
11750 	uip->ui_flags = path_flag;
11751 	uip->ui_cmdp  = uscmd;
11752 	bp->b_private = uip;
11753 
11754 	/*
11755 	 * Initialize Request Sense buffering, if requested.
11756 	 */
11757 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
11758 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
11759 		/*
11760 		 * Here uscmd->uscsi_rqbuf currently points to the caller's
11761 		 * buffer, but we replace this with a kernel buffer that
11762 		 * we allocate to use with the sense data. The sense data
11763 		 * (if present) gets copied into this new buffer before the
11764 		 * command is completed.  Then we copy the sense data from
11765 		 * our allocated buf into the caller's buffer below. Note
11766 		 * that incmd->uscsi_rqbuf and incmd->uscsi_rqlen are used
11767 		 * below to perform the copy back to the caller's buf.
11768 		 */
11769 		uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
11770 		if (rqbufspace == UIO_USERSPACE) {
11771 			uscmd->uscsi_rqlen   = SENSE_LENGTH;
11772 			uscmd->uscsi_rqresid = SENSE_LENGTH;
11773 		} else {
11774 			uchar_t rlen = min(SENSE_LENGTH, uscmd->uscsi_rqlen);
11775 			uscmd->uscsi_rqlen   = rlen;
11776 			uscmd->uscsi_rqresid = rlen;
11777 		}
11778 	} else {
11779 		uscmd->uscsi_rqbuf = NULL;
11780 		uscmd->uscsi_rqlen   = 0;
11781 		uscmd->uscsi_rqresid = 0;
11782 	}
11783 
11784 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: rqbuf:0x%p  rqlen:%d\n",
11785 	    uscmd->uscsi_rqbuf, uscmd->uscsi_rqlen);
11786 
11787 	if (un->un_f_is_fibre == FALSE) {
11788 		/*
11789 		 * Force asynchronous mode, if necessary.  Doing this here
11790 		 * has the unfortunate effect of running other queued
11791 		 * commands async also, but since the main purpose of this
11792 		 * capability is downloading new drive firmware, we can
11793 		 * probably live with it.
11794 		 */
11795 		if ((uscmd->uscsi_flags & USCSI_ASYNC) != 0) {
11796 			if (scsi_ifgetcap(SD_ADDRESS(un), "synchronous", 1)
11797 				== 1) {
11798 				if (scsi_ifsetcap(SD_ADDRESS(un),
11799 					    "synchronous", 0, 1) == 1) {
11800 					SD_TRACE(SD_LOG_IO, un,
11801 					"sd_send_scsi_cmd: forced async ok\n");
11802 				} else {
11803 					SD_TRACE(SD_LOG_IO, un,
11804 					"sd_send_scsi_cmd:\
11805 					forced async failed\n");
11806 					rval = EINVAL;
11807 					goto done;
11808 				}
11809 			}
11810 		}
11811 
11812 		/*
11813 		 * Re-enable synchronous mode, if requested
11814 		 */
11815 		if (uscmd->uscsi_flags & USCSI_SYNC) {
11816 			if (scsi_ifgetcap(SD_ADDRESS(un), "synchronous", 1)
11817 				== 0) {
11818 				int i = scsi_ifsetcap(SD_ADDRESS(un),
11819 						"synchronous", 1, 1);
11820 				SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11821 					"re-enabled sync %s\n",
11822 					(i == 1) ? "ok" : "failed");
11823 			}
11824 		}
11825 	}
11826 
11827 	/*
11828 	 * Commands sent with priority are intended for error recovery
11829 	 * situations, and do not have retries performed.
11830 	 */
11831 	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
11832 		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
11833 	}
11834 
11835 	/*
11836 	 * If we're going to do actual I/O, let physio do all the right things
11837 	 */
11838 	if (uscmd->uscsi_buflen != 0) {
11839 		struct iovec	aiov;
11840 		struct uio	auio;
11841 		struct uio	*uio = &auio;
11842 
11843 		bzero(&auio, sizeof (struct uio));
11844 		bzero(&aiov, sizeof (struct iovec));
11845 		aiov.iov_base = uscmd->uscsi_bufaddr;
11846 		aiov.iov_len  = uscmd->uscsi_buflen;
11847 		uio->uio_iov  = &aiov;
11848 
11849 		uio->uio_iovcnt  = 1;
11850 		uio->uio_resid   = uscmd->uscsi_buflen;
11851 		uio->uio_segflg  = dataspace;
11852 
11853 		/*
11854 		 * physio() will block here until the command completes....
11855 		 */
11856 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: calling physio.\n");
11857 
11858 		rval = physio(sd_uscsi_strategy, bp, dev,
11859 		    ((uscmd->uscsi_flags & USCSI_READ) ? B_READ : B_WRITE),
11860 		    sduscsimin, uio);
11861 
11862 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11863 		    "returned from physio with 0x%x\n", rval);
11864 
11865 	} else {
11866 		/*
11867 		 * We have to mimic what physio would do here! Argh!
11868 		 */
11869 		bp->b_flags  = B_BUSY |
11870 		    ((uscmd->uscsi_flags & USCSI_READ) ? B_READ : B_WRITE);
11871 		bp->b_edev   = dev;
11872 		bp->b_dev    = cmpdev(dev);	/* maybe unnecessary? */
11873 		bp->b_bcount = 0;
11874 		bp->b_blkno  = 0;
11875 
11876 		SD_TRACE(SD_LOG_IO, un,
11877 		    "sd_send_scsi_cmd: calling sd_uscsi_strategy...\n");
11878 
11879 		(void) sd_uscsi_strategy(bp);
11880 
11881 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: calling biowait\n");
11882 
11883 		rval = biowait(bp);
11884 
11885 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11886 		    "returned from  biowait with 0x%x\n", rval);
11887 	}
11888 
11889 done:
11890 
11891 #ifdef SDDEBUG
11892 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11893 	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
11894 	    uscmd->uscsi_status, uscmd->uscsi_resid);
11895 	if (uscmd->uscsi_bufaddr != NULL) {
11896 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11897 		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
11898 		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
11899 		if (dataspace == UIO_SYSSPACE) {
11900 			SD_DUMP_MEMORY(un, SD_LOG_IO,
11901 			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
11902 			    uscmd->uscsi_buflen, SD_LOG_HEX);
11903 		}
11904 	}
11905 #endif
11906 
11907 	/*
11908 	 * Get the status and residual to return to the caller.
11909 	 */
11910 	incmd->uscsi_status = uscmd->uscsi_status;
11911 	incmd->uscsi_resid  = uscmd->uscsi_resid;
11912 
11913 	/*
11914 	 * If the caller wants sense data, copy back whatever sense data
11915 	 * we may have gotten, and update the relevant rqsense info.
11916 	 */
11917 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
11918 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
11919 
11920 		int rqlen = uscmd->uscsi_rqlen - uscmd->uscsi_rqresid;
11921 		rqlen = min(((int)incmd->uscsi_rqlen), rqlen);
11922 
11923 		/* Update the Request Sense status and resid */
11924 		incmd->uscsi_rqresid  = incmd->uscsi_rqlen - rqlen;
11925 		incmd->uscsi_rqstatus = uscmd->uscsi_rqstatus;
11926 
11927 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11928 		    "uscsi_rqstatus: 0x%02x  uscsi_rqresid:0x%x\n",
11929 		    incmd->uscsi_rqstatus, incmd->uscsi_rqresid);
11930 
11931 		/* Copy out the sense data for user processes */
11932 		if ((incmd->uscsi_rqbuf != NULL) && (rqlen != 0)) {
11933 			int flags =
11934 			    (rqbufspace == UIO_USERSPACE) ? 0 : FKIOCTL;
11935 			if (ddi_copyout(uscmd->uscsi_rqbuf, incmd->uscsi_rqbuf,
11936 			    rqlen, flags) != 0) {
11937 				rval = EFAULT;
11938 			}
11939 			/*
11940 			 * Note: Can't touch incmd->uscsi_rqbuf so use
11941 			 * uscmd->uscsi_rqbuf instead. They're the same.
11942 			 */
11943 			SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11944 			    "incmd->uscsi_rqbuf: 0x%p  rqlen:%d\n",
11945 			    incmd->uscsi_rqbuf, rqlen);
11946 			SD_DUMP_MEMORY(un, SD_LOG_IO, "rq",
11947 			    (uchar_t *)uscmd->uscsi_rqbuf, rqlen, SD_LOG_HEX);
11948 		}
11949 	}
11950 
11951 	/*
11952 	 * Free allocated resources and return; mapout the buf in case it was
11953 	 * mapped in by a lower layer.
11954 	 */
11955 	bp_mapout(bp);
11956 	freerbuf(bp);
11957 	kmem_free(uip, sizeof (struct sd_uscsi_info));
11958 	if (uscmd->uscsi_rqbuf != NULL) {
11959 		kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
11960 	}
11961 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
11962 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
11963 
11964 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: exit\n");
11965 
11966 	return (rval);
11967 }
11968 
11969 
11970 /*
11971  *    Function: sd_buf_iodone
11972  *
11973  * Description: Frees the sd_xbuf & returns the buf to its originator.
11974  *
11975  *     Context: May be called from interrupt context.
11976  */
11977 /* ARGSUSED */
11978 static void
11979 sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
11980 {
11981 	struct sd_xbuf *xp;
11982 
11983 	ASSERT(un != NULL);
11984 	ASSERT(bp != NULL);
11985 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11986 
11987 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
11988 
11989 	xp = SD_GET_XBUF(bp);
11990 	ASSERT(xp != NULL);
11991 
11992 	mutex_enter(SD_MUTEX(un));
11993 
11994 	/*
11995 	 * Grab time when the cmd completed.
11996 	 * This is used for determining if the system has been
11997 	 * idle long enough to make it idle to the PM framework.
11998 	 * This is for lowering the overhead, and therefore improving
11999 	 * performance per I/O operation.
12000 	 */
12001 	un->un_pm_idle_time = ddi_get_time();
12002 
12003 	un->un_ncmds_in_driver--;
12004 	ASSERT(un->un_ncmds_in_driver >= 0);
12005 	SD_INFO(SD_LOG_IO, un, "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
12006 	    un->un_ncmds_in_driver);
12007 
12008 	mutex_exit(SD_MUTEX(un));
12009 
12010 	ddi_xbuf_done(bp, un->un_xbuf_attr);	/* xbuf is gone after this */
12011 	biodone(bp);				/* bp is gone after this */
12012 
12013 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
12014 }
12015 
12016 
12017 /*
12018  *    Function: sd_uscsi_iodone
12019  *
12020  * Description: Frees the sd_xbuf & returns the buf to its originator.
12021  *
12022  *     Context: May be called from interrupt context.
12023  */
12024 /* ARGSUSED */
12025 static void
12026 sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
12027 {
12028 	struct sd_xbuf *xp;
12029 
12030 	ASSERT(un != NULL);
12031 	ASSERT(bp != NULL);
12032 
12033 	xp = SD_GET_XBUF(bp);
12034 	ASSERT(xp != NULL);
12035 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12036 
12037 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
12038 
12039 	bp->b_private = xp->xb_private;
12040 
12041 	mutex_enter(SD_MUTEX(un));
12042 
12043 	/*
12044 	 * Grab time when the cmd completed.
12045 	 * This is used for determining if the system has been
12046 	 * idle long enough to make it idle to the PM framework.
12047 	 * This is for lowering the overhead, and therefore improving
12048 	 * performance per I/O operation.
12049 	 */
12050 	un->un_pm_idle_time = ddi_get_time();
12051 
12052 	un->un_ncmds_in_driver--;
12053 	ASSERT(un->un_ncmds_in_driver >= 0);
12054 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
12055 	    un->un_ncmds_in_driver);
12056 
12057 	mutex_exit(SD_MUTEX(un));
12058 
12059 	kmem_free(xp, sizeof (struct sd_xbuf));
12060 	biodone(bp);
12061 
12062 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
12063 }
12064 
12065 
12066 /*
12067  *    Function: sd_mapblockaddr_iostart
12068  *
12069  * Description: Verify request lies withing the partition limits for
12070  *		the indicated minor device.  Issue "overrun" buf if
12071  *		request would exceed partition range.  Converts
12072  *		partition-relative block address to absolute.
12073  *
12074  *     Context: Can sleep
12075  *
12076  *      Issues: This follows what the old code did, in terms of accessing
12077  *		some of the partition info in the unit struct without holding
12078  *		the mutext.  This is a general issue, if the partition info
12079  *		can be altered while IO is in progress... as soon as we send
12080  *		a buf, its partitioning can be invalid before it gets to the
12081  *		device.  Probably the right fix is to move partitioning out
12082  *		of the driver entirely.
12083  */
12084 
12085 static void
12086 sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
12087 {
12088 	daddr_t	nblocks;	/* #blocks in the given partition */
12089 	daddr_t	blocknum;	/* Block number specified by the buf */
12090 	size_t	requested_nblocks;
12091 	size_t	available_nblocks;
12092 	int	partition;
12093 	diskaddr_t	partition_offset;
12094 	struct sd_xbuf *xp;
12095 
12096 
12097 	ASSERT(un != NULL);
12098 	ASSERT(bp != NULL);
12099 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12100 
12101 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12102 	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
12103 
12104 	xp = SD_GET_XBUF(bp);
12105 	ASSERT(xp != NULL);
12106 
12107 	/*
12108 	 * If the geometry is not indicated as valid, attempt to access
12109 	 * the unit & verify the geometry/label. This can be the case for
12110 	 * removable-media devices, of if the device was opened in
12111 	 * NDELAY/NONBLOCK mode.
12112 	 */
12113 	if ((un->un_f_geometry_is_valid != TRUE) &&
12114 	    (sd_ready_and_valid(un) != SD_READY_VALID)) {
12115 		/*
12116 		 * For removable devices it is possible to start an I/O
12117 		 * without a media by opening the device in nodelay mode.
12118 		 * Also for writable CDs there can be many scenarios where
12119 		 * there is no geometry yet but volume manager is trying to
12120 		 * issue a read() just because it can see TOC on the CD. So
12121 		 * do not print a message for removables.
12122 		 */
12123 		if (!un->un_f_has_removable_media) {
12124 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12125 			    "i/o to invalid geometry\n");
12126 		}
12127 		bioerror(bp, EIO);
12128 		bp->b_resid = bp->b_bcount;
12129 		SD_BEGIN_IODONE(index, un, bp);
12130 		return;
12131 	}
12132 
12133 	partition = SDPART(bp->b_edev);
12134 
12135 	/* #blocks in partition */
12136 	nblocks = un->un_map[partition].dkl_nblk;    /* #blocks in partition */
12137 
12138 	/* Use of a local variable potentially improves performance slightly */
12139 	partition_offset = un->un_offset[partition];
12140 
12141 	/*
12142 	 * blocknum is the starting block number of the request. At this
12143 	 * point it is still relative to the start of the minor device.
12144 	 */
12145 	blocknum = xp->xb_blkno;
12146 
12147 	/*
12148 	 * Legacy: If the starting block number is one past the last block
12149 	 * in the partition, do not set B_ERROR in the buf.
12150 	 */
12151 	if (blocknum == nblocks)  {
12152 		goto error_exit;
12153 	}
12154 
12155 	/*
12156 	 * Confirm that the first block of the request lies within the
12157 	 * partition limits. Also the requested number of bytes must be
12158 	 * a multiple of the system block size.
12159 	 */
12160 	if ((blocknum < 0) || (blocknum >= nblocks) ||
12161 	    ((bp->b_bcount & (un->un_sys_blocksize - 1)) != 0)) {
12162 		bp->b_flags |= B_ERROR;
12163 		goto error_exit;
12164 	}
12165 
12166 	/*
12167 	 * If the requsted # blocks exceeds the available # blocks, that
12168 	 * is an overrun of the partition.
12169 	 */
12170 	requested_nblocks = SD_BYTES2SYSBLOCKS(un, bp->b_bcount);
12171 	available_nblocks = (size_t)(nblocks - blocknum);
12172 	ASSERT(nblocks >= blocknum);
12173 
12174 	if (requested_nblocks > available_nblocks) {
12175 		/*
12176 		 * Allocate an "overrun" buf to allow the request to proceed
12177 		 * for the amount of space available in the partition. The
12178 		 * amount not transferred will be added into the b_resid
12179 		 * when the operation is complete. The overrun buf
12180 		 * replaces the original buf here, and the original buf
12181 		 * is saved inside the overrun buf, for later use.
12182 		 */
12183 		size_t resid = SD_SYSBLOCKS2BYTES(un,
12184 		    (offset_t)(requested_nblocks - available_nblocks));
12185 		size_t count = bp->b_bcount - resid;
12186 		/*
12187 		 * Note: count is an unsigned entity thus it'll NEVER
12188 		 * be less than 0 so ASSERT the original values are
12189 		 * correct.
12190 		 */
12191 		ASSERT(bp->b_bcount >= resid);
12192 
12193 		bp = sd_bioclone_alloc(bp, count, blocknum,
12194 			(int (*)(struct buf *)) sd_mapblockaddr_iodone);
12195 		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
12196 		ASSERT(xp != NULL);
12197 	}
12198 
12199 	/* At this point there should be no residual for this buf. */
12200 	ASSERT(bp->b_resid == 0);
12201 
12202 	/* Convert the block number to an absolute address. */
12203 	xp->xb_blkno += partition_offset;
12204 
12205 	SD_NEXT_IOSTART(index, un, bp);
12206 
12207 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12208 	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
12209 
12210 	return;
12211 
12212 error_exit:
12213 	bp->b_resid = bp->b_bcount;
12214 	SD_BEGIN_IODONE(index, un, bp);
12215 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12216 	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
12217 }
12218 
12219 
12220 /*
12221  *    Function: sd_mapblockaddr_iodone
12222  *
12223  * Description: Completion-side processing for partition management.
12224  *
12225  *     Context: May be called under interrupt context
12226  */
12227 
12228 static void
12229 sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
12230 {
12231 	/* int	partition; */	/* Not used, see below. */
12232 	ASSERT(un != NULL);
12233 	ASSERT(bp != NULL);
12234 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12235 
12236 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12237 	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
12238 
12239 	if (bp->b_iodone == (int (*)(struct buf *)) sd_mapblockaddr_iodone) {
12240 		/*
12241 		 * We have an "overrun" buf to deal with...
12242 		 */
12243 		struct sd_xbuf	*xp;
12244 		struct buf	*obp;	/* ptr to the original buf */
12245 
12246 		xp = SD_GET_XBUF(bp);
12247 		ASSERT(xp != NULL);
12248 
12249 		/* Retrieve the pointer to the original buf */
12250 		obp = (struct buf *)xp->xb_private;
12251 		ASSERT(obp != NULL);
12252 
12253 		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
12254 		bioerror(obp, bp->b_error);
12255 
12256 		sd_bioclone_free(bp);
12257 
12258 		/*
12259 		 * Get back the original buf.
12260 		 * Note that since the restoration of xb_blkno below
12261 		 * was removed, the sd_xbuf is not needed.
12262 		 */
12263 		bp = obp;
12264 		/*
12265 		 * xp = SD_GET_XBUF(bp);
12266 		 * ASSERT(xp != NULL);
12267 		 */
12268 	}
12269 
12270 	/*
12271 	 * Convert sd->xb_blkno back to a minor-device relative value.
12272 	 * Note: this has been commented out, as it is not needed in the
12273 	 * current implementation of the driver (ie, since this function
12274 	 * is at the top of the layering chains, so the info will be
12275 	 * discarded) and it is in the "hot" IO path.
12276 	 *
12277 	 * partition = getminor(bp->b_edev) & SDPART_MASK;
12278 	 * xp->xb_blkno -= un->un_offset[partition];
12279 	 */
12280 
12281 	SD_NEXT_IODONE(index, un, bp);
12282 
12283 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12284 	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
12285 }
12286 
12287 
12288 /*
12289  *    Function: sd_mapblocksize_iostart
12290  *
12291  * Description: Convert between system block size (un->un_sys_blocksize)
12292  *		and target block size (un->un_tgt_blocksize).
12293  *
12294  *     Context: Can sleep to allocate resources.
12295  *
12296  * Assumptions: A higher layer has already performed any partition validation,
12297  *		and converted the xp->xb_blkno to an absolute value relative
12298  *		to the start of the device.
12299  *
12300  *		It is also assumed that the higher layer has implemented
12301  *		an "overrun" mechanism for the case where the request would
12302  *		read/write beyond the end of a partition.  In this case we
12303  *		assume (and ASSERT) that bp->b_resid == 0.
12304  *
12305  *		Note: The implementation for this routine assumes the target
12306  *		block size remains constant between allocation and transport.
12307  */
12308 
12309 static void
12310 sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
12311 {
12312 	struct sd_mapblocksize_info	*bsp;
12313 	struct sd_xbuf			*xp;
12314 	offset_t first_byte;
12315 	daddr_t	start_block, end_block;
12316 	daddr_t	request_bytes;
12317 	ushort_t is_aligned = FALSE;
12318 
12319 	ASSERT(un != NULL);
12320 	ASSERT(bp != NULL);
12321 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12322 	ASSERT(bp->b_resid == 0);
12323 
12324 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12325 	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
12326 
12327 	/*
12328 	 * For a non-writable CD, a write request is an error
12329 	 */
12330 	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
12331 	    (un->un_f_mmc_writable_media == FALSE)) {
12332 		bioerror(bp, EIO);
12333 		bp->b_resid = bp->b_bcount;
12334 		SD_BEGIN_IODONE(index, un, bp);
12335 		return;
12336 	}
12337 
12338 	/*
12339 	 * We do not need a shadow buf if the device is using
12340 	 * un->un_sys_blocksize as its block size or if bcount == 0.
12341 	 * In this case there is no layer-private data block allocated.
12342 	 */
12343 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12344 	    (bp->b_bcount == 0)) {
12345 		goto done;
12346 	}
12347 
12348 #if defined(__i386) || defined(__amd64)
12349 	/* We do not support non-block-aligned transfers for ROD devices */
12350 	ASSERT(!ISROD(un));
12351 #endif
12352 
12353 	xp = SD_GET_XBUF(bp);
12354 	ASSERT(xp != NULL);
12355 
12356 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12357 	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
12358 	    un->un_tgt_blocksize, un->un_sys_blocksize);
12359 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12360 	    "request start block:0x%x\n", xp->xb_blkno);
12361 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12362 	    "request len:0x%x\n", bp->b_bcount);
12363 
12364 	/*
12365 	 * Allocate the layer-private data area for the mapblocksize layer.
12366 	 * Layers are allowed to use the xp_private member of the sd_xbuf
12367 	 * struct to store the pointer to their layer-private data block, but
12368 	 * each layer also has the responsibility of restoring the prior
12369 	 * contents of xb_private before returning the buf/xbuf to the
12370 	 * higher layer that sent it.
12371 	 *
12372 	 * Here we save the prior contents of xp->xb_private into the
12373 	 * bsp->mbs_oprivate field of our layer-private data area. This value
12374 	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
12375 	 * the layer-private area and returning the buf/xbuf to the layer
12376 	 * that sent it.
12377 	 *
12378 	 * Note that here we use kmem_zalloc for the allocation as there are
12379 	 * parts of the mapblocksize code that expect certain fields to be
12380 	 * zero unless explicitly set to a required value.
12381 	 */
12382 	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12383 	bsp->mbs_oprivate = xp->xb_private;
12384 	xp->xb_private = bsp;
12385 
12386 	/*
12387 	 * This treats the data on the disk (target) as an array of bytes.
12388 	 * first_byte is the byte offset, from the beginning of the device,
12389 	 * to the location of the request. This is converted from a
12390 	 * un->un_sys_blocksize block address to a byte offset, and then back
12391 	 * to a block address based upon a un->un_tgt_blocksize block size.
12392 	 *
12393 	 * xp->xb_blkno should be absolute upon entry into this function,
12394 	 * but, but it is based upon partitions that use the "system"
12395 	 * block size. It must be adjusted to reflect the block size of
12396 	 * the target.
12397 	 *
12398 	 * Note that end_block is actually the block that follows the last
12399 	 * block of the request, but that's what is needed for the computation.
12400 	 */
12401 	first_byte  = SD_SYSBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12402 	start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
12403 	end_block   = (first_byte + bp->b_bcount + un->un_tgt_blocksize - 1) /
12404 	    un->un_tgt_blocksize;
12405 
12406 	/* request_bytes is rounded up to a multiple of the target block size */
12407 	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
12408 
12409 	/*
12410 	 * See if the starting address of the request and the request
12411 	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
12412 	 * then we do not need to allocate a shadow buf to handle the request.
12413 	 */
12414 	if (((first_byte   % un->un_tgt_blocksize) == 0) &&
12415 	    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
12416 		is_aligned = TRUE;
12417 	}
12418 
12419 	if ((bp->b_flags & B_READ) == 0) {
12420 		/*
12421 		 * Lock the range for a write operation. An aligned request is
12422 		 * considered a simple write; otherwise the request must be a
12423 		 * read-modify-write.
12424 		 */
12425 		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
12426 		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
12427 	}
12428 
12429 	/*
12430 	 * Alloc a shadow buf if the request is not aligned. Also, this is
12431 	 * where the READ command is generated for a read-modify-write. (The
12432 	 * write phase is deferred until after the read completes.)
12433 	 */
12434 	if (is_aligned == FALSE) {
12435 
12436 		struct sd_mapblocksize_info	*shadow_bsp;
12437 		struct sd_xbuf	*shadow_xp;
12438 		struct buf	*shadow_bp;
12439 
12440 		/*
12441 		 * Allocate the shadow buf and it associated xbuf. Note that
12442 		 * after this call the xb_blkno value in both the original
12443 		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
12444 		 * same: absolute relative to the start of the device, and
12445 		 * adjusted for the target block size. The b_blkno in the
12446 		 * shadow buf will also be set to this value. We should never
12447 		 * change b_blkno in the original bp however.
12448 		 *
12449 		 * Note also that the shadow buf will always need to be a
12450 		 * READ command, regardless of whether the incoming command
12451 		 * is a READ or a WRITE.
12452 		 */
12453 		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
12454 		    xp->xb_blkno,
12455 		    (int (*)(struct buf *)) sd_mapblocksize_iodone);
12456 
12457 		shadow_xp = SD_GET_XBUF(shadow_bp);
12458 
12459 		/*
12460 		 * Allocate the layer-private data for the shadow buf.
12461 		 * (No need to preserve xb_private in the shadow xbuf.)
12462 		 */
12463 		shadow_xp->xb_private = shadow_bsp =
12464 		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12465 
12466 		/*
12467 		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
12468 		 * to figure out where the start of the user data is (based upon
12469 		 * the system block size) in the data returned by the READ
12470 		 * command (which will be based upon the target blocksize). Note
12471 		 * that this is only really used if the request is unaligned.
12472 		 */
12473 		bsp->mbs_copy_offset = (ssize_t)(first_byte -
12474 		    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
12475 		ASSERT((bsp->mbs_copy_offset >= 0) &&
12476 		    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
12477 
12478 		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
12479 
12480 		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
12481 
12482 		/* Transfer the wmap (if any) to the shadow buf */
12483 		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
12484 		bsp->mbs_wmp = NULL;
12485 
12486 		/*
12487 		 * The shadow buf goes on from here in place of the
12488 		 * original buf.
12489 		 */
12490 		shadow_bsp->mbs_orig_bp = bp;
12491 		bp = shadow_bp;
12492 	}
12493 
12494 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12495 	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
12496 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12497 	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
12498 	    request_bytes);
12499 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12500 	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
12501 
12502 done:
12503 	SD_NEXT_IOSTART(index, un, bp);
12504 
12505 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12506 	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
12507 }
12508 
12509 
12510 /*
12511  *    Function: sd_mapblocksize_iodone
12512  *
12513  * Description: Completion side processing for block-size mapping.
12514  *
12515  *     Context: May be called under interrupt context
12516  */
12517 
12518 static void
12519 sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
12520 {
12521 	struct sd_mapblocksize_info	*bsp;
12522 	struct sd_xbuf	*xp;
12523 	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
12524 	struct buf	*orig_bp;	/* ptr to the original buf */
12525 	offset_t	shadow_end;
12526 	offset_t	request_end;
12527 	offset_t	shadow_start;
12528 	ssize_t		copy_offset;
12529 	size_t		copy_length;
12530 	size_t		shortfall;
12531 	uint_t		is_write;	/* TRUE if this bp is a WRITE */
12532 	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
12533 
12534 	ASSERT(un != NULL);
12535 	ASSERT(bp != NULL);
12536 
12537 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12538 	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
12539 
12540 	/*
12541 	 * There is no shadow buf or layer-private data if the target is
12542 	 * using un->un_sys_blocksize as its block size or if bcount == 0.
12543 	 */
12544 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12545 	    (bp->b_bcount == 0)) {
12546 		goto exit;
12547 	}
12548 
12549 	xp = SD_GET_XBUF(bp);
12550 	ASSERT(xp != NULL);
12551 
12552 	/* Retrieve the pointer to the layer-private data area from the xbuf. */
12553 	bsp = xp->xb_private;
12554 
12555 	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
12556 	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
12557 
12558 	if (is_write) {
12559 		/*
12560 		 * For a WRITE request we must free up the block range that
12561 		 * we have locked up.  This holds regardless of whether this is
12562 		 * an aligned write request or a read-modify-write request.
12563 		 */
12564 		sd_range_unlock(un, bsp->mbs_wmp);
12565 		bsp->mbs_wmp = NULL;
12566 	}
12567 
12568 	if ((bp->b_iodone != (int(*)(struct buf *))sd_mapblocksize_iodone)) {
12569 		/*
12570 		 * An aligned read or write command will have no shadow buf;
12571 		 * there is not much else to do with it.
12572 		 */
12573 		goto done;
12574 	}
12575 
12576 	orig_bp = bsp->mbs_orig_bp;
12577 	ASSERT(orig_bp != NULL);
12578 	orig_xp = SD_GET_XBUF(orig_bp);
12579 	ASSERT(orig_xp != NULL);
12580 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12581 
12582 	if (!is_write && has_wmap) {
12583 		/*
12584 		 * A READ with a wmap means this is the READ phase of a
12585 		 * read-modify-write. If an error occurred on the READ then
12586 		 * we do not proceed with the WRITE phase or copy any data.
12587 		 * Just release the write maps and return with an error.
12588 		 */
12589 		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
12590 			orig_bp->b_resid = orig_bp->b_bcount;
12591 			bioerror(orig_bp, bp->b_error);
12592 			sd_range_unlock(un, bsp->mbs_wmp);
12593 			goto freebuf_done;
12594 		}
12595 	}
12596 
12597 	/*
12598 	 * Here is where we set up to copy the data from the shadow buf
12599 	 * into the space associated with the original buf.
12600 	 *
12601 	 * To deal with the conversion between block sizes, these
12602 	 * computations treat the data as an array of bytes, with the
12603 	 * first byte (byte 0) corresponding to the first byte in the
12604 	 * first block on the disk.
12605 	 */
12606 
12607 	/*
12608 	 * shadow_start and shadow_len indicate the location and size of
12609 	 * the data returned with the shadow IO request.
12610 	 */
12611 	shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12612 	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
12613 
12614 	/*
12615 	 * copy_offset gives the offset (in bytes) from the start of the first
12616 	 * block of the READ request to the beginning of the data.  We retrieve
12617 	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
12618 	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
12619 	 * data to be copied (in bytes).
12620 	 */
12621 	copy_offset  = bsp->mbs_copy_offset;
12622 	ASSERT((copy_offset >= 0) && (copy_offset < un->un_tgt_blocksize));
12623 	copy_length  = orig_bp->b_bcount;
12624 	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
12625 
12626 	/*
12627 	 * Set up the resid and error fields of orig_bp as appropriate.
12628 	 */
12629 	if (shadow_end >= request_end) {
12630 		/* We got all the requested data; set resid to zero */
12631 		orig_bp->b_resid = 0;
12632 	} else {
12633 		/*
12634 		 * We failed to get enough data to fully satisfy the original
12635 		 * request. Just copy back whatever data we got and set
12636 		 * up the residual and error code as required.
12637 		 *
12638 		 * 'shortfall' is the amount by which the data received with the
12639 		 * shadow buf has "fallen short" of the requested amount.
12640 		 */
12641 		shortfall = (size_t)(request_end - shadow_end);
12642 
12643 		if (shortfall > orig_bp->b_bcount) {
12644 			/*
12645 			 * We did not get enough data to even partially
12646 			 * fulfill the original request.  The residual is
12647 			 * equal to the amount requested.
12648 			 */
12649 			orig_bp->b_resid = orig_bp->b_bcount;
12650 		} else {
12651 			/*
12652 			 * We did not get all the data that we requested
12653 			 * from the device, but we will try to return what
12654 			 * portion we did get.
12655 			 */
12656 			orig_bp->b_resid = shortfall;
12657 		}
12658 		ASSERT(copy_length >= orig_bp->b_resid);
12659 		copy_length  -= orig_bp->b_resid;
12660 	}
12661 
12662 	/* Propagate the error code from the shadow buf to the original buf */
12663 	bioerror(orig_bp, bp->b_error);
12664 
12665 	if (is_write) {
12666 		goto freebuf_done;	/* No data copying for a WRITE */
12667 	}
12668 
12669 	if (has_wmap) {
12670 		/*
12671 		 * This is a READ command from the READ phase of a
12672 		 * read-modify-write request. We have to copy the data given
12673 		 * by the user OVER the data returned by the READ command,
12674 		 * then convert the command from a READ to a WRITE and send
12675 		 * it back to the target.
12676 		 */
12677 		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
12678 		    copy_length);
12679 
12680 		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
12681 
12682 		/*
12683 		 * Dispatch the WRITE command to the taskq thread, which
12684 		 * will in turn send the command to the target. When the
12685 		 * WRITE command completes, we (sd_mapblocksize_iodone())
12686 		 * will get called again as part of the iodone chain
12687 		 * processing for it. Note that we will still be dealing
12688 		 * with the shadow buf at that point.
12689 		 */
12690 		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
12691 		    KM_NOSLEEP) != 0) {
12692 			/*
12693 			 * Dispatch was successful so we are done. Return
12694 			 * without going any higher up the iodone chain. Do
12695 			 * not free up any layer-private data until after the
12696 			 * WRITE completes.
12697 			 */
12698 			return;
12699 		}
12700 
12701 		/*
12702 		 * Dispatch of the WRITE command failed; set up the error
12703 		 * condition and send this IO back up the iodone chain.
12704 		 */
12705 		bioerror(orig_bp, EIO);
12706 		orig_bp->b_resid = orig_bp->b_bcount;
12707 
12708 	} else {
12709 		/*
12710 		 * This is a regular READ request (ie, not a RMW). Copy the
12711 		 * data from the shadow buf into the original buf. The
12712 		 * copy_offset compensates for any "misalignment" between the
12713 		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
12714 		 * original buf (with its un->un_sys_blocksize blocks).
12715 		 */
12716 		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
12717 		    copy_length);
12718 	}
12719 
12720 freebuf_done:
12721 
12722 	/*
12723 	 * At this point we still have both the shadow buf AND the original
12724 	 * buf to deal with, as well as the layer-private data area in each.
12725 	 * Local variables are as follows:
12726 	 *
12727 	 * bp -- points to shadow buf
12728 	 * xp -- points to xbuf of shadow buf
12729 	 * bsp -- points to layer-private data area of shadow buf
12730 	 * orig_bp -- points to original buf
12731 	 *
12732 	 * First free the shadow buf and its associated xbuf, then free the
12733 	 * layer-private data area from the shadow buf. There is no need to
12734 	 * restore xb_private in the shadow xbuf.
12735 	 */
12736 	sd_shadow_buf_free(bp);
12737 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12738 
12739 	/*
12740 	 * Now update the local variables to point to the original buf, xbuf,
12741 	 * and layer-private area.
12742 	 */
12743 	bp = orig_bp;
12744 	xp = SD_GET_XBUF(bp);
12745 	ASSERT(xp != NULL);
12746 	ASSERT(xp == orig_xp);
12747 	bsp = xp->xb_private;
12748 	ASSERT(bsp != NULL);
12749 
12750 done:
12751 	/*
12752 	 * Restore xb_private to whatever it was set to by the next higher
12753 	 * layer in the chain, then free the layer-private data area.
12754 	 */
12755 	xp->xb_private = bsp->mbs_oprivate;
12756 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12757 
12758 exit:
12759 	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
12760 	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
12761 
12762 	SD_NEXT_IODONE(index, un, bp);
12763 }
12764 
12765 
12766 /*
12767  *    Function: sd_checksum_iostart
12768  *
12769  * Description: A stub function for a layer that's currently not used.
12770  *		For now just a placeholder.
12771  *
12772  *     Context: Kernel thread context
12773  */
12774 
12775 static void
12776 sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
12777 {
12778 	ASSERT(un != NULL);
12779 	ASSERT(bp != NULL);
12780 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12781 	SD_NEXT_IOSTART(index, un, bp);
12782 }
12783 
12784 
12785 /*
12786  *    Function: sd_checksum_iodone
12787  *
12788  * Description: A stub function for a layer that's currently not used.
12789  *		For now just a placeholder.
12790  *
12791  *     Context: May be called under interrupt context
12792  */
12793 
12794 static void
12795 sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
12796 {
12797 	ASSERT(un != NULL);
12798 	ASSERT(bp != NULL);
12799 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12800 	SD_NEXT_IODONE(index, un, bp);
12801 }
12802 
12803 
12804 /*
12805  *    Function: sd_checksum_uscsi_iostart
12806  *
12807  * Description: A stub function for a layer that's currently not used.
12808  *		For now just a placeholder.
12809  *
12810  *     Context: Kernel thread context
12811  */
12812 
12813 static void
12814 sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
12815 {
12816 	ASSERT(un != NULL);
12817 	ASSERT(bp != NULL);
12818 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12819 	SD_NEXT_IOSTART(index, un, bp);
12820 }
12821 
12822 
12823 /*
12824  *    Function: sd_checksum_uscsi_iodone
12825  *
12826  * Description: A stub function for a layer that's currently not used.
12827  *		For now just a placeholder.
12828  *
12829  *     Context: May be called under interrupt context
12830  */
12831 
12832 static void
12833 sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
12834 {
12835 	ASSERT(un != NULL);
12836 	ASSERT(bp != NULL);
12837 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12838 	SD_NEXT_IODONE(index, un, bp);
12839 }
12840 
12841 
12842 /*
12843  *    Function: sd_pm_iostart
12844  *
12845  * Description: iostart-side routine for Power mangement.
12846  *
12847  *     Context: Kernel thread context
12848  */
12849 
12850 static void
12851 sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
12852 {
12853 	ASSERT(un != NULL);
12854 	ASSERT(bp != NULL);
12855 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12856 	ASSERT(!mutex_owned(&un->un_pm_mutex));
12857 
12858 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
12859 
12860 	if (sd_pm_entry(un) != DDI_SUCCESS) {
12861 		/*
12862 		 * Set up to return the failed buf back up the 'iodone'
12863 		 * side of the calling chain.
12864 		 */
12865 		bioerror(bp, EIO);
12866 		bp->b_resid = bp->b_bcount;
12867 
12868 		SD_BEGIN_IODONE(index, un, bp);
12869 
12870 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
12871 		return;
12872 	}
12873 
12874 	SD_NEXT_IOSTART(index, un, bp);
12875 
12876 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
12877 }
12878 
12879 
12880 /*
12881  *    Function: sd_pm_iodone
12882  *
12883  * Description: iodone-side routine for power mangement.
12884  *
12885  *     Context: may be called from interrupt context
12886  */
12887 
12888 static void
12889 sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
12890 {
12891 	ASSERT(un != NULL);
12892 	ASSERT(bp != NULL);
12893 	ASSERT(!mutex_owned(&un->un_pm_mutex));
12894 
12895 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
12896 
12897 	/*
12898 	 * After attach the following flag is only read, so don't
12899 	 * take the penalty of acquiring a mutex for it.
12900 	 */
12901 	if (un->un_f_pm_is_enabled == TRUE) {
12902 		sd_pm_exit(un);
12903 	}
12904 
12905 	SD_NEXT_IODONE(index, un, bp);
12906 
12907 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
12908 }
12909 
12910 
12911 /*
12912  *    Function: sd_core_iostart
12913  *
12914  * Description: Primary driver function for enqueuing buf(9S) structs from
12915  *		the system and initiating IO to the target device
12916  *
12917  *     Context: Kernel thread context. Can sleep.
12918  *
12919  * Assumptions:  - The given xp->xb_blkno is absolute
12920  *		   (ie, relative to the start of the device).
12921  *		 - The IO is to be done using the native blocksize of
12922  *		   the device, as specified in un->un_tgt_blocksize.
12923  */
12924 /* ARGSUSED */
12925 static void
12926 sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
12927 {
12928 	struct sd_xbuf *xp;
12929 
12930 	ASSERT(un != NULL);
12931 	ASSERT(bp != NULL);
12932 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12933 	ASSERT(bp->b_resid == 0);
12934 
12935 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
12936 
12937 	xp = SD_GET_XBUF(bp);
12938 	ASSERT(xp != NULL);
12939 
12940 	mutex_enter(SD_MUTEX(un));
12941 
12942 	/*
12943 	 * If we are currently in the failfast state, fail any new IO
12944 	 * that has B_FAILFAST set, then return.
12945 	 */
12946 	if ((bp->b_flags & B_FAILFAST) &&
12947 	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
12948 		mutex_exit(SD_MUTEX(un));
12949 		bioerror(bp, EIO);
12950 		bp->b_resid = bp->b_bcount;
12951 		SD_BEGIN_IODONE(index, un, bp);
12952 		return;
12953 	}
12954 
12955 	if (SD_IS_DIRECT_PRIORITY(xp)) {
12956 		/*
12957 		 * Priority command -- transport it immediately.
12958 		 *
12959 		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
12960 		 * because all direct priority commands should be associated
12961 		 * with error recovery actions which we don't want to retry.
12962 		 */
12963 		sd_start_cmds(un, bp);
12964 	} else {
12965 		/*
12966 		 * Normal command -- add it to the wait queue, then start
12967 		 * transporting commands from the wait queue.
12968 		 */
12969 		sd_add_buf_to_waitq(un, bp);
12970 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
12971 		sd_start_cmds(un, NULL);
12972 	}
12973 
12974 	mutex_exit(SD_MUTEX(un));
12975 
12976 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
12977 }
12978 
12979 
12980 /*
12981  *    Function: sd_init_cdb_limits
12982  *
12983  * Description: This is to handle scsi_pkt initialization differences
12984  *		between the driver platforms.
12985  *
12986  *		Legacy behaviors:
12987  *
12988  *		If the block number or the sector count exceeds the
12989  *		capabilities of a Group 0 command, shift over to a
12990  *		Group 1 command. We don't blindly use Group 1
12991  *		commands because a) some drives (CDC Wren IVs) get a
12992  *		bit confused, and b) there is probably a fair amount
12993  *		of speed difference for a target to receive and decode
12994  *		a 10 byte command instead of a 6 byte command.
12995  *
12996  *		The xfer time difference of 6 vs 10 byte CDBs is
12997  *		still significant so this code is still worthwhile.
12998  *		10 byte CDBs are very inefficient with the fas HBA driver
12999  *		and older disks. Each CDB byte took 1 usec with some
13000  *		popular disks.
13001  *
13002  *     Context: Must be called at attach time
13003  */
13004 
13005 static void
13006 sd_init_cdb_limits(struct sd_lun *un)
13007 {
13008 	/*
13009 	 * Use CDB_GROUP1 commands for most devices except for
13010 	 * parallel SCSI fixed drives in which case we get better
13011 	 * performance using CDB_GROUP0 commands (where applicable).
13012 	 */
13013 	un->un_mincdb = SD_CDB_GROUP1;
13014 #if !defined(__fibre)
13015 	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
13016 	    !un->un_f_has_removable_media) {
13017 		un->un_mincdb = SD_CDB_GROUP0;
13018 	}
13019 #endif
13020 
13021 	/*
13022 	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
13023 	 * commands for fixed disks unless we are building for a 32 bit
13024 	 * kernel.
13025 	 */
13026 #ifdef _LP64
13027 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13028 	    SD_CDB_GROUP4;
13029 #else
13030 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13031 	    SD_CDB_GROUP1;
13032 #endif
13033 
13034 	/*
13035 	 * x86 systems require the PKT_DMA_PARTIAL flag
13036 	 */
13037 #if defined(__x86)
13038 	un->un_pkt_flags = PKT_DMA_PARTIAL;
13039 #else
13040 	un->un_pkt_flags = 0;
13041 #endif
13042 
13043 	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
13044 	    ? sizeof (struct scsi_arq_status) : 1);
13045 	un->un_cmd_timeout = (ushort_t)sd_io_time;
13046 	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
13047 }
13048 
13049 
13050 /*
13051  *    Function: sd_initpkt_for_buf
13052  *
13053  * Description: Allocate and initialize for transport a scsi_pkt struct,
13054  *		based upon the info specified in the given buf struct.
13055  *
13056  *		Assumes the xb_blkno in the request is absolute (ie,
13057  *		relative to the start of the device (NOT partition!).
13058  *		Also assumes that the request is using the native block
13059  *		size of the device (as returned by the READ CAPACITY
13060  *		command).
13061  *
13062  * Return Code: SD_PKT_ALLOC_SUCCESS
13063  *		SD_PKT_ALLOC_FAILURE
13064  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13065  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13066  *
13067  *     Context: Kernel thread and may be called from software interrupt context
13068  *		as part of a sdrunout callback. This function may not block or
13069  *		call routines that block
13070  */
13071 
13072 static int
13073 sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
13074 {
13075 	struct sd_xbuf	*xp;
13076 	struct scsi_pkt *pktp = NULL;
13077 	struct sd_lun	*un;
13078 	size_t		blockcount;
13079 	daddr_t		startblock;
13080 	int		rval;
13081 	int		cmd_flags;
13082 
13083 	ASSERT(bp != NULL);
13084 	ASSERT(pktpp != NULL);
13085 	xp = SD_GET_XBUF(bp);
13086 	ASSERT(xp != NULL);
13087 	un = SD_GET_UN(bp);
13088 	ASSERT(un != NULL);
13089 	ASSERT(mutex_owned(SD_MUTEX(un)));
13090 	ASSERT(bp->b_resid == 0);
13091 
13092 	SD_TRACE(SD_LOG_IO_CORE, un,
13093 	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
13094 
13095 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13096 	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
13097 		/*
13098 		 * Already have a scsi_pkt -- just need DMA resources.
13099 		 * We must recompute the CDB in case the mapping returns
13100 		 * a nonzero pkt_resid.
13101 		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
13102 		 * that is being retried, the unmap/remap of the DMA resouces
13103 		 * will result in the entire transfer starting over again
13104 		 * from the very first block.
13105 		 */
13106 		ASSERT(xp->xb_pktp != NULL);
13107 		pktp = xp->xb_pktp;
13108 	} else {
13109 		pktp = NULL;
13110 	}
13111 #endif /* __i386 || __amd64 */
13112 
13113 	startblock = xp->xb_blkno;	/* Absolute block num. */
13114 	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
13115 
13116 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13117 
13118 	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
13119 
13120 #else
13121 
13122 	cmd_flags = un->un_pkt_flags | xp->xb_pkt_flags;
13123 
13124 #endif
13125 
13126 	/*
13127 	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
13128 	 * call scsi_init_pkt, and build the CDB.
13129 	 */
13130 	rval = sd_setup_rw_pkt(un, &pktp, bp,
13131 	    cmd_flags, sdrunout, (caddr_t)un,
13132 	    startblock, blockcount);
13133 
13134 	if (rval == 0) {
13135 		/*
13136 		 * Success.
13137 		 *
13138 		 * If partial DMA is being used and required for this transfer.
13139 		 * set it up here.
13140 		 */
13141 		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
13142 		    (pktp->pkt_resid != 0)) {
13143 
13144 			/*
13145 			 * Save the CDB length and pkt_resid for the
13146 			 * next xfer
13147 			 */
13148 			xp->xb_dma_resid = pktp->pkt_resid;
13149 
13150 			/* rezero resid */
13151 			pktp->pkt_resid = 0;
13152 
13153 		} else {
13154 			xp->xb_dma_resid = 0;
13155 		}
13156 
13157 		pktp->pkt_flags = un->un_tagflags;
13158 		pktp->pkt_time  = un->un_cmd_timeout;
13159 		pktp->pkt_comp  = sdintr;
13160 
13161 		pktp->pkt_private = bp;
13162 		*pktpp = pktp;
13163 
13164 		SD_TRACE(SD_LOG_IO_CORE, un,
13165 		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
13166 
13167 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13168 		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
13169 #endif
13170 
13171 		return (SD_PKT_ALLOC_SUCCESS);
13172 
13173 	}
13174 
13175 	/*
13176 	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
13177 	 * from sd_setup_rw_pkt.
13178 	 */
13179 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
13180 
13181 	if (rval == SD_PKT_ALLOC_FAILURE) {
13182 		*pktpp = NULL;
13183 		/*
13184 		 * Set the driver state to RWAIT to indicate the driver
13185 		 * is waiting on resource allocations. The driver will not
13186 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13187 		 */
13188 		New_state(un, SD_STATE_RWAIT);
13189 
13190 		SD_ERROR(SD_LOG_IO_CORE, un,
13191 		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
13192 
13193 		if ((bp->b_flags & B_ERROR) != 0) {
13194 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13195 		}
13196 		return (SD_PKT_ALLOC_FAILURE);
13197 	} else {
13198 		/*
13199 		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13200 		 *
13201 		 * This should never happen.  Maybe someone messed with the
13202 		 * kernel's minphys?
13203 		 */
13204 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13205 		    "Request rejected: too large for CDB: "
13206 		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
13207 		SD_ERROR(SD_LOG_IO_CORE, un,
13208 		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
13209 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13210 
13211 	}
13212 }
13213 
13214 
13215 /*
13216  *    Function: sd_destroypkt_for_buf
13217  *
13218  * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
13219  *
13220  *     Context: Kernel thread or interrupt context
13221  */
13222 
13223 static void
13224 sd_destroypkt_for_buf(struct buf *bp)
13225 {
13226 	ASSERT(bp != NULL);
13227 	ASSERT(SD_GET_UN(bp) != NULL);
13228 
13229 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13230 	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
13231 
13232 	ASSERT(SD_GET_PKTP(bp) != NULL);
13233 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13234 
13235 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13236 	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
13237 }
13238 
13239 /*
13240  *    Function: sd_setup_rw_pkt
13241  *
13242  * Description: Determines appropriate CDB group for the requested LBA
13243  *		and transfer length, calls scsi_init_pkt, and builds
13244  *		the CDB.  Do not use for partial DMA transfers except
13245  *		for the initial transfer since the CDB size must
13246  *		remain constant.
13247  *
13248  *     Context: Kernel thread and may be called from software interrupt
13249  *		context as part of a sdrunout callback. This function may not
13250  *		block or call routines that block
13251  */
13252 
13253 
13254 int
13255 sd_setup_rw_pkt(struct sd_lun *un,
13256     struct scsi_pkt **pktpp, struct buf *bp, int flags,
13257     int (*callback)(caddr_t), caddr_t callback_arg,
13258     diskaddr_t lba, uint32_t blockcount)
13259 {
13260 	struct scsi_pkt *return_pktp;
13261 	union scsi_cdb *cdbp;
13262 	struct sd_cdbinfo *cp = NULL;
13263 	int i;
13264 
13265 	/*
13266 	 * See which size CDB to use, based upon the request.
13267 	 */
13268 	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
13269 
13270 		/*
13271 		 * Check lba and block count against sd_cdbtab limits.
13272 		 * In the partial DMA case, we have to use the same size
13273 		 * CDB for all the transfers.  Check lba + blockcount
13274 		 * against the max LBA so we know that segment of the
13275 		 * transfer can use the CDB we select.
13276 		 */
13277 		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
13278 		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
13279 
13280 			/*
13281 			 * The command will fit into the CDB type
13282 			 * specified by sd_cdbtab[i].
13283 			 */
13284 			cp = sd_cdbtab + i;
13285 
13286 			/*
13287 			 * Call scsi_init_pkt so we can fill in the
13288 			 * CDB.
13289 			 */
13290 			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
13291 			    bp, cp->sc_grpcode, un->un_status_len, 0,
13292 			    flags, callback, callback_arg);
13293 
13294 			if (return_pktp != NULL) {
13295 
13296 				/*
13297 				 * Return new value of pkt
13298 				 */
13299 				*pktpp = return_pktp;
13300 
13301 				/*
13302 				 * To be safe, zero the CDB insuring there is
13303 				 * no leftover data from a previous command.
13304 				 */
13305 				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
13306 
13307 				/*
13308 				 * Handle partial DMA mapping
13309 				 */
13310 				if (return_pktp->pkt_resid != 0) {
13311 
13312 					/*
13313 					 * Not going to xfer as many blocks as
13314 					 * originally expected
13315 					 */
13316 					blockcount -=
13317 					    SD_BYTES2TGTBLOCKS(un,
13318 						return_pktp->pkt_resid);
13319 				}
13320 
13321 				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
13322 
13323 				/*
13324 				 * Set command byte based on the CDB
13325 				 * type we matched.
13326 				 */
13327 				cdbp->scc_cmd = cp->sc_grpmask |
13328 				    ((bp->b_flags & B_READ) ?
13329 					SCMD_READ : SCMD_WRITE);
13330 
13331 				SD_FILL_SCSI1_LUN(un, return_pktp);
13332 
13333 				/*
13334 				 * Fill in LBA and length
13335 				 */
13336 				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
13337 				    (cp->sc_grpcode == CDB_GROUP4) ||
13338 				    (cp->sc_grpcode == CDB_GROUP0) ||
13339 				    (cp->sc_grpcode == CDB_GROUP5));
13340 
13341 				if (cp->sc_grpcode == CDB_GROUP1) {
13342 					FORMG1ADDR(cdbp, lba);
13343 					FORMG1COUNT(cdbp, blockcount);
13344 					return (0);
13345 				} else if (cp->sc_grpcode == CDB_GROUP4) {
13346 					FORMG4LONGADDR(cdbp, lba);
13347 					FORMG4COUNT(cdbp, blockcount);
13348 					return (0);
13349 				} else if (cp->sc_grpcode == CDB_GROUP0) {
13350 					FORMG0ADDR(cdbp, lba);
13351 					FORMG0COUNT(cdbp, blockcount);
13352 					return (0);
13353 				} else if (cp->sc_grpcode == CDB_GROUP5) {
13354 					FORMG5ADDR(cdbp, lba);
13355 					FORMG5COUNT(cdbp, blockcount);
13356 					return (0);
13357 				}
13358 
13359 				/*
13360 				 * It should be impossible to not match one
13361 				 * of the CDB types above, so we should never
13362 				 * reach this point.  Set the CDB command byte
13363 				 * to test-unit-ready to avoid writing
13364 				 * to somewhere we don't intend.
13365 				 */
13366 				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
13367 				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13368 			} else {
13369 				/*
13370 				 * Couldn't get scsi_pkt
13371 				 */
13372 				return (SD_PKT_ALLOC_FAILURE);
13373 			}
13374 		}
13375 	}
13376 
13377 	/*
13378 	 * None of the available CDB types were suitable.  This really
13379 	 * should never happen:  on a 64 bit system we support
13380 	 * READ16/WRITE16 which will hold an entire 64 bit disk address
13381 	 * and on a 32 bit system we will refuse to bind to a device
13382 	 * larger than 2TB so addresses will never be larger than 32 bits.
13383 	 */
13384 	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13385 }
13386 
13387 #if defined(__i386) || defined(__amd64)
13388 /*
13389  *    Function: sd_setup_next_rw_pkt
13390  *
13391  * Description: Setup packet for partial DMA transfers, except for the
13392  * 		initial transfer.  sd_setup_rw_pkt should be used for
13393  *		the initial transfer.
13394  *
13395  *     Context: Kernel thread and may be called from interrupt context.
13396  */
13397 
13398 int
13399 sd_setup_next_rw_pkt(struct sd_lun *un,
13400     struct scsi_pkt *pktp, struct buf *bp,
13401     diskaddr_t lba, uint32_t blockcount)
13402 {
13403 	uchar_t com;
13404 	union scsi_cdb *cdbp;
13405 	uchar_t cdb_group_id;
13406 
13407 	ASSERT(pktp != NULL);
13408 	ASSERT(pktp->pkt_cdbp != NULL);
13409 
13410 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
13411 	com = cdbp->scc_cmd;
13412 	cdb_group_id = CDB_GROUPID(com);
13413 
13414 	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
13415 	    (cdb_group_id == CDB_GROUPID_1) ||
13416 	    (cdb_group_id == CDB_GROUPID_4) ||
13417 	    (cdb_group_id == CDB_GROUPID_5));
13418 
13419 	/*
13420 	 * Move pkt to the next portion of the xfer.
13421 	 * func is NULL_FUNC so we do not have to release
13422 	 * the disk mutex here.
13423 	 */
13424 	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
13425 	    NULL_FUNC, NULL) == pktp) {
13426 		/* Success.  Handle partial DMA */
13427 		if (pktp->pkt_resid != 0) {
13428 			blockcount -=
13429 			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
13430 		}
13431 
13432 		cdbp->scc_cmd = com;
13433 		SD_FILL_SCSI1_LUN(un, pktp);
13434 		if (cdb_group_id == CDB_GROUPID_1) {
13435 			FORMG1ADDR(cdbp, lba);
13436 			FORMG1COUNT(cdbp, blockcount);
13437 			return (0);
13438 		} else if (cdb_group_id == CDB_GROUPID_4) {
13439 			FORMG4LONGADDR(cdbp, lba);
13440 			FORMG4COUNT(cdbp, blockcount);
13441 			return (0);
13442 		} else if (cdb_group_id == CDB_GROUPID_0) {
13443 			FORMG0ADDR(cdbp, lba);
13444 			FORMG0COUNT(cdbp, blockcount);
13445 			return (0);
13446 		} else if (cdb_group_id == CDB_GROUPID_5) {
13447 			FORMG5ADDR(cdbp, lba);
13448 			FORMG5COUNT(cdbp, blockcount);
13449 			return (0);
13450 		}
13451 
13452 		/* Unreachable */
13453 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13454 	}
13455 
13456 	/*
13457 	 * Error setting up next portion of cmd transfer.
13458 	 * Something is definitely very wrong and this
13459 	 * should not happen.
13460 	 */
13461 	return (SD_PKT_ALLOC_FAILURE);
13462 }
13463 #endif /* defined(__i386) || defined(__amd64) */
13464 
13465 /*
13466  *    Function: sd_initpkt_for_uscsi
13467  *
13468  * Description: Allocate and initialize for transport a scsi_pkt struct,
13469  *		based upon the info specified in the given uscsi_cmd struct.
13470  *
13471  * Return Code: SD_PKT_ALLOC_SUCCESS
13472  *		SD_PKT_ALLOC_FAILURE
13473  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13474  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13475  *
13476  *     Context: Kernel thread and may be called from software interrupt context
13477  *		as part of a sdrunout callback. This function may not block or
13478  *		call routines that block
13479  */
13480 
13481 static int
13482 sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
13483 {
13484 	struct uscsi_cmd *uscmd;
13485 	struct sd_xbuf	*xp;
13486 	struct scsi_pkt	*pktp;
13487 	struct sd_lun	*un;
13488 	uint32_t	flags = 0;
13489 
13490 	ASSERT(bp != NULL);
13491 	ASSERT(pktpp != NULL);
13492 	xp = SD_GET_XBUF(bp);
13493 	ASSERT(xp != NULL);
13494 	un = SD_GET_UN(bp);
13495 	ASSERT(un != NULL);
13496 	ASSERT(mutex_owned(SD_MUTEX(un)));
13497 
13498 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13499 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13500 	ASSERT(uscmd != NULL);
13501 
13502 	SD_TRACE(SD_LOG_IO_CORE, un,
13503 	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
13504 
13505 	/*
13506 	 * Allocate the scsi_pkt for the command.
13507 	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
13508 	 *	 during scsi_init_pkt time and will continue to use the
13509 	 *	 same path as long as the same scsi_pkt is used without
13510 	 *	 intervening scsi_dma_free(). Since uscsi command does
13511 	 *	 not call scsi_dmafree() before retry failed command, it
13512 	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
13513 	 *	 set such that scsi_vhci can use other available path for
13514 	 *	 retry. Besides, ucsci command does not allow DMA breakup,
13515 	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
13516 	 */
13517 	pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
13518 	    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
13519 	    sizeof (struct scsi_arq_status), 0,
13520 	    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
13521 	    sdrunout, (caddr_t)un);
13522 
13523 	if (pktp == NULL) {
13524 		*pktpp = NULL;
13525 		/*
13526 		 * Set the driver state to RWAIT to indicate the driver
13527 		 * is waiting on resource allocations. The driver will not
13528 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13529 		 */
13530 		New_state(un, SD_STATE_RWAIT);
13531 
13532 		SD_ERROR(SD_LOG_IO_CORE, un,
13533 		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
13534 
13535 		if ((bp->b_flags & B_ERROR) != 0) {
13536 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13537 		}
13538 		return (SD_PKT_ALLOC_FAILURE);
13539 	}
13540 
13541 	/*
13542 	 * We do not do DMA breakup for USCSI commands, so return failure
13543 	 * here if all the needed DMA resources were not allocated.
13544 	 */
13545 	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
13546 	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
13547 		scsi_destroy_pkt(pktp);
13548 		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
13549 		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
13550 		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
13551 	}
13552 
13553 	/* Init the cdb from the given uscsi struct */
13554 	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
13555 	    uscmd->uscsi_cdb[0], 0, 0, 0);
13556 
13557 	SD_FILL_SCSI1_LUN(un, pktp);
13558 
13559 	/*
13560 	 * Set up the optional USCSI flags. See the uscsi (7I) man page
13561 	 * for listing of the supported flags.
13562 	 */
13563 
13564 	if (uscmd->uscsi_flags & USCSI_SILENT) {
13565 		flags |= FLAG_SILENT;
13566 	}
13567 
13568 	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
13569 		flags |= FLAG_DIAGNOSE;
13570 	}
13571 
13572 	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
13573 		flags |= FLAG_ISOLATE;
13574 	}
13575 
13576 	if (un->un_f_is_fibre == FALSE) {
13577 		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
13578 			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
13579 		}
13580 	}
13581 
13582 	/*
13583 	 * Set the pkt flags here so we save time later.
13584 	 * Note: These flags are NOT in the uscsi man page!!!
13585 	 */
13586 	if (uscmd->uscsi_flags & USCSI_HEAD) {
13587 		flags |= FLAG_HEAD;
13588 	}
13589 
13590 	if (uscmd->uscsi_flags & USCSI_NOINTR) {
13591 		flags |= FLAG_NOINTR;
13592 	}
13593 
13594 	/*
13595 	 * For tagged queueing, things get a bit complicated.
13596 	 * Check first for head of queue and last for ordered queue.
13597 	 * If neither head nor order, use the default driver tag flags.
13598 	 */
13599 	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
13600 		if (uscmd->uscsi_flags & USCSI_HTAG) {
13601 			flags |= FLAG_HTAG;
13602 		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
13603 			flags |= FLAG_OTAG;
13604 		} else {
13605 			flags |= un->un_tagflags & FLAG_TAGMASK;
13606 		}
13607 	}
13608 
13609 	if (uscmd->uscsi_flags & USCSI_NODISCON) {
13610 		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
13611 	}
13612 
13613 	pktp->pkt_flags = flags;
13614 
13615 	/* Copy the caller's CDB into the pkt... */
13616 	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
13617 
13618 	if (uscmd->uscsi_timeout == 0) {
13619 		pktp->pkt_time = un->un_uscsi_timeout;
13620 	} else {
13621 		pktp->pkt_time = uscmd->uscsi_timeout;
13622 	}
13623 
13624 	/* need it later to identify USCSI request in sdintr */
13625 	xp->xb_pkt_flags |= SD_XB_USCSICMD;
13626 
13627 	xp->xb_sense_resid = uscmd->uscsi_rqresid;
13628 
13629 	pktp->pkt_private = bp;
13630 	pktp->pkt_comp = sdintr;
13631 	*pktpp = pktp;
13632 
13633 	SD_TRACE(SD_LOG_IO_CORE, un,
13634 	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
13635 
13636 	return (SD_PKT_ALLOC_SUCCESS);
13637 }
13638 
13639 
13640 /*
13641  *    Function: sd_destroypkt_for_uscsi
13642  *
13643  * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
13644  *		IOs.. Also saves relevant info into the associated uscsi_cmd
13645  *		struct.
13646  *
13647  *     Context: May be called under interrupt context
13648  */
13649 
13650 static void
13651 sd_destroypkt_for_uscsi(struct buf *bp)
13652 {
13653 	struct uscsi_cmd *uscmd;
13654 	struct sd_xbuf	*xp;
13655 	struct scsi_pkt	*pktp;
13656 	struct sd_lun	*un;
13657 
13658 	ASSERT(bp != NULL);
13659 	xp = SD_GET_XBUF(bp);
13660 	ASSERT(xp != NULL);
13661 	un = SD_GET_UN(bp);
13662 	ASSERT(un != NULL);
13663 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13664 	pktp = SD_GET_PKTP(bp);
13665 	ASSERT(pktp != NULL);
13666 
13667 	SD_TRACE(SD_LOG_IO_CORE, un,
13668 	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
13669 
13670 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13671 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13672 	ASSERT(uscmd != NULL);
13673 
13674 	/* Save the status and the residual into the uscsi_cmd struct */
13675 	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
13676 	uscmd->uscsi_resid  = bp->b_resid;
13677 
13678 	/*
13679 	 * If enabled, copy any saved sense data into the area specified
13680 	 * by the uscsi command.
13681 	 */
13682 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
13683 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
13684 		/*
13685 		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
13686 		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
13687 		 */
13688 		uscmd->uscsi_rqstatus = xp->xb_sense_status;
13689 		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
13690 		bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf, SENSE_LENGTH);
13691 	}
13692 
13693 	/* We are done with the scsi_pkt; free it now */
13694 	ASSERT(SD_GET_PKTP(bp) != NULL);
13695 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13696 
13697 	SD_TRACE(SD_LOG_IO_CORE, un,
13698 	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
13699 }
13700 
13701 
13702 /*
13703  *    Function: sd_bioclone_alloc
13704  *
13705  * Description: Allocate a buf(9S) and init it as per the given buf
13706  *		and the various arguments.  The associated sd_xbuf
13707  *		struct is (nearly) duplicated.  The struct buf *bp
13708  *		argument is saved in new_xp->xb_private.
13709  *
13710  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13711  *		datalen - size of data area for the shadow bp
13712  *		blkno - starting LBA
13713  *		func - function pointer for b_iodone in the shadow buf. (May
13714  *			be NULL if none.)
13715  *
13716  * Return Code: Pointer to allocates buf(9S) struct
13717  *
13718  *     Context: Can sleep.
13719  */
13720 
13721 static struct buf *
13722 sd_bioclone_alloc(struct buf *bp, size_t datalen,
13723 	daddr_t blkno, int (*func)(struct buf *))
13724 {
13725 	struct	sd_lun	*un;
13726 	struct	sd_xbuf	*xp;
13727 	struct	sd_xbuf	*new_xp;
13728 	struct	buf	*new_bp;
13729 
13730 	ASSERT(bp != NULL);
13731 	xp = SD_GET_XBUF(bp);
13732 	ASSERT(xp != NULL);
13733 	un = SD_GET_UN(bp);
13734 	ASSERT(un != NULL);
13735 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13736 
13737 	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
13738 	    NULL, KM_SLEEP);
13739 
13740 	new_bp->b_lblkno	= blkno;
13741 
13742 	/*
13743 	 * Allocate an xbuf for the shadow bp and copy the contents of the
13744 	 * original xbuf into it.
13745 	 */
13746 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
13747 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
13748 
13749 	/*
13750 	 * The given bp is automatically saved in the xb_private member
13751 	 * of the new xbuf.  Callers are allowed to depend on this.
13752 	 */
13753 	new_xp->xb_private = bp;
13754 
13755 	new_bp->b_private  = new_xp;
13756 
13757 	return (new_bp);
13758 }
13759 
13760 /*
13761  *    Function: sd_shadow_buf_alloc
13762  *
13763  * Description: Allocate a buf(9S) and init it as per the given buf
13764  *		and the various arguments.  The associated sd_xbuf
13765  *		struct is (nearly) duplicated.  The struct buf *bp
13766  *		argument is saved in new_xp->xb_private.
13767  *
13768  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13769  *		datalen - size of data area for the shadow bp
13770  *		bflags - B_READ or B_WRITE (pseudo flag)
13771  *		blkno - starting LBA
13772  *		func - function pointer for b_iodone in the shadow buf. (May
13773  *			be NULL if none.)
13774  *
13775  * Return Code: Pointer to allocates buf(9S) struct
13776  *
13777  *     Context: Can sleep.
13778  */
13779 
13780 static struct buf *
13781 sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
13782 	daddr_t blkno, int (*func)(struct buf *))
13783 {
13784 	struct	sd_lun	*un;
13785 	struct	sd_xbuf	*xp;
13786 	struct	sd_xbuf	*new_xp;
13787 	struct	buf	*new_bp;
13788 
13789 	ASSERT(bp != NULL);
13790 	xp = SD_GET_XBUF(bp);
13791 	ASSERT(xp != NULL);
13792 	un = SD_GET_UN(bp);
13793 	ASSERT(un != NULL);
13794 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13795 
13796 	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
13797 		bp_mapin(bp);
13798 	}
13799 
13800 	bflags &= (B_READ | B_WRITE);
13801 #if defined(__i386) || defined(__amd64)
13802 	new_bp = getrbuf(KM_SLEEP);
13803 	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
13804 	new_bp->b_bcount = datalen;
13805 	new_bp->b_flags	= bp->b_flags | bflags;
13806 #else
13807 	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
13808 	    datalen, bflags, SLEEP_FUNC, NULL);
13809 #endif
13810 	new_bp->av_forw	= NULL;
13811 	new_bp->av_back	= NULL;
13812 	new_bp->b_dev	= bp->b_dev;
13813 	new_bp->b_blkno	= blkno;
13814 	new_bp->b_iodone = func;
13815 	new_bp->b_edev	= bp->b_edev;
13816 	new_bp->b_resid	= 0;
13817 
13818 	/* We need to preserve the B_FAILFAST flag */
13819 	if (bp->b_flags & B_FAILFAST) {
13820 		new_bp->b_flags |= B_FAILFAST;
13821 	}
13822 
13823 	/*
13824 	 * Allocate an xbuf for the shadow bp and copy the contents of the
13825 	 * original xbuf into it.
13826 	 */
13827 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
13828 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
13829 
13830 	/* Need later to copy data between the shadow buf & original buf! */
13831 	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
13832 
13833 	/*
13834 	 * The given bp is automatically saved in the xb_private member
13835 	 * of the new xbuf.  Callers are allowed to depend on this.
13836 	 */
13837 	new_xp->xb_private = bp;
13838 
13839 	new_bp->b_private  = new_xp;
13840 
13841 	return (new_bp);
13842 }
13843 
13844 /*
13845  *    Function: sd_bioclone_free
13846  *
13847  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
13848  *		in the larger than partition operation.
13849  *
13850  *     Context: May be called under interrupt context
13851  */
13852 
13853 static void
13854 sd_bioclone_free(struct buf *bp)
13855 {
13856 	struct sd_xbuf	*xp;
13857 
13858 	ASSERT(bp != NULL);
13859 	xp = SD_GET_XBUF(bp);
13860 	ASSERT(xp != NULL);
13861 
13862 	/*
13863 	 * Call bp_mapout() before freeing the buf,  in case a lower
13864 	 * layer or HBA  had done a bp_mapin().  we must do this here
13865 	 * as we are the "originator" of the shadow buf.
13866 	 */
13867 	bp_mapout(bp);
13868 
13869 	/*
13870 	 * Null out b_iodone before freeing the bp, to ensure that the driver
13871 	 * never gets confused by a stale value in this field. (Just a little
13872 	 * extra defensiveness here.)
13873 	 */
13874 	bp->b_iodone = NULL;
13875 
13876 	freerbuf(bp);
13877 
13878 	kmem_free(xp, sizeof (struct sd_xbuf));
13879 }
13880 
13881 /*
13882  *    Function: sd_shadow_buf_free
13883  *
13884  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
13885  *
13886  *     Context: May be called under interrupt context
13887  */
13888 
13889 static void
13890 sd_shadow_buf_free(struct buf *bp)
13891 {
13892 	struct sd_xbuf	*xp;
13893 
13894 	ASSERT(bp != NULL);
13895 	xp = SD_GET_XBUF(bp);
13896 	ASSERT(xp != NULL);
13897 
13898 #if defined(__sparc)
13899 	/*
13900 	 * Call bp_mapout() before freeing the buf,  in case a lower
13901 	 * layer or HBA  had done a bp_mapin().  we must do this here
13902 	 * as we are the "originator" of the shadow buf.
13903 	 */
13904 	bp_mapout(bp);
13905 #endif
13906 
13907 	/*
13908 	 * Null out b_iodone before freeing the bp, to ensure that the driver
13909 	 * never gets confused by a stale value in this field. (Just a little
13910 	 * extra defensiveness here.)
13911 	 */
13912 	bp->b_iodone = NULL;
13913 
13914 #if defined(__i386) || defined(__amd64)
13915 	kmem_free(bp->b_un.b_addr, bp->b_bcount);
13916 	freerbuf(bp);
13917 #else
13918 	scsi_free_consistent_buf(bp);
13919 #endif
13920 
13921 	kmem_free(xp, sizeof (struct sd_xbuf));
13922 }
13923 
13924 
13925 /*
13926  *    Function: sd_print_transport_rejected_message
13927  *
13928  * Description: This implements the ludicrously complex rules for printing
13929  *		a "transport rejected" message.  This is to address the
13930  *		specific problem of having a flood of this error message
13931  *		produced when a failover occurs.
13932  *
13933  *     Context: Any.
13934  */
13935 
13936 static void
13937 sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
13938 	int code)
13939 {
13940 	ASSERT(un != NULL);
13941 	ASSERT(mutex_owned(SD_MUTEX(un)));
13942 	ASSERT(xp != NULL);
13943 
13944 	/*
13945 	 * Print the "transport rejected" message under the following
13946 	 * conditions:
13947 	 *
13948 	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
13949 	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
13950 	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
13951 	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
13952 	 *   scsi_transport(9F) (which indicates that the target might have
13953 	 *   gone off-line).  This uses the un->un_tran_fatal_count
13954 	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
13955 	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
13956 	 *   from scsi_transport().
13957 	 *
13958 	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
13959 	 * the preceeding cases in order for the message to be printed.
13960 	 */
13961 	if ((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) {
13962 		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
13963 		    (code != TRAN_FATAL_ERROR) ||
13964 		    (un->un_tran_fatal_count == 1)) {
13965 			switch (code) {
13966 			case TRAN_BADPKT:
13967 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13968 				    "transport rejected bad packet\n");
13969 				break;
13970 			case TRAN_FATAL_ERROR:
13971 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13972 				    "transport rejected fatal error\n");
13973 				break;
13974 			default:
13975 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13976 				    "transport rejected (%d)\n", code);
13977 				break;
13978 			}
13979 		}
13980 	}
13981 }
13982 
13983 
13984 /*
13985  *    Function: sd_add_buf_to_waitq
13986  *
13987  * Description: Add the given buf(9S) struct to the wait queue for the
13988  *		instance.  If sorting is enabled, then the buf is added
13989  *		to the queue via an elevator sort algorithm (a la
13990  *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
13991  *		If sorting is not enabled, then the buf is just added
13992  *		to the end of the wait queue.
13993  *
13994  * Return Code: void
13995  *
13996  *     Context: Does not sleep/block, therefore technically can be called
13997  *		from any context.  However if sorting is enabled then the
13998  *		execution time is indeterminate, and may take long if
13999  *		the wait queue grows large.
14000  */
14001 
14002 static void
14003 sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
14004 {
14005 	struct buf *ap;
14006 
14007 	ASSERT(bp != NULL);
14008 	ASSERT(un != NULL);
14009 	ASSERT(mutex_owned(SD_MUTEX(un)));
14010 
14011 	/* If the queue is empty, add the buf as the only entry & return. */
14012 	if (un->un_waitq_headp == NULL) {
14013 		ASSERT(un->un_waitq_tailp == NULL);
14014 		un->un_waitq_headp = un->un_waitq_tailp = bp;
14015 		bp->av_forw = NULL;
14016 		return;
14017 	}
14018 
14019 	ASSERT(un->un_waitq_tailp != NULL);
14020 
14021 	/*
14022 	 * If sorting is disabled, just add the buf to the tail end of
14023 	 * the wait queue and return.
14024 	 */
14025 	if (un->un_f_disksort_disabled) {
14026 		un->un_waitq_tailp->av_forw = bp;
14027 		un->un_waitq_tailp = bp;
14028 		bp->av_forw = NULL;
14029 		return;
14030 	}
14031 
14032 	/*
14033 	 * Sort thru the list of requests currently on the wait queue
14034 	 * and add the new buf request at the appropriate position.
14035 	 *
14036 	 * The un->un_waitq_headp is an activity chain pointer on which
14037 	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
14038 	 * first queue holds those requests which are positioned after
14039 	 * the current SD_GET_BLKNO() (in the first request); the second holds
14040 	 * requests which came in after their SD_GET_BLKNO() number was passed.
14041 	 * Thus we implement a one way scan, retracting after reaching
14042 	 * the end of the drive to the first request on the second
14043 	 * queue, at which time it becomes the first queue.
14044 	 * A one-way scan is natural because of the way UNIX read-ahead
14045 	 * blocks are allocated.
14046 	 *
14047 	 * If we lie after the first request, then we must locate the
14048 	 * second request list and add ourselves to it.
14049 	 */
14050 	ap = un->un_waitq_headp;
14051 	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
14052 		while (ap->av_forw != NULL) {
14053 			/*
14054 			 * Look for an "inversion" in the (normally
14055 			 * ascending) block numbers. This indicates
14056 			 * the start of the second request list.
14057 			 */
14058 			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
14059 				/*
14060 				 * Search the second request list for the
14061 				 * first request at a larger block number.
14062 				 * We go before that; however if there is
14063 				 * no such request, we go at the end.
14064 				 */
14065 				do {
14066 					if (SD_GET_BLKNO(bp) <
14067 					    SD_GET_BLKNO(ap->av_forw)) {
14068 						goto insert;
14069 					}
14070 					ap = ap->av_forw;
14071 				} while (ap->av_forw != NULL);
14072 				goto insert;		/* after last */
14073 			}
14074 			ap = ap->av_forw;
14075 		}
14076 
14077 		/*
14078 		 * No inversions... we will go after the last, and
14079 		 * be the first request in the second request list.
14080 		 */
14081 		goto insert;
14082 	}
14083 
14084 	/*
14085 	 * Request is at/after the current request...
14086 	 * sort in the first request list.
14087 	 */
14088 	while (ap->av_forw != NULL) {
14089 		/*
14090 		 * We want to go after the current request (1) if
14091 		 * there is an inversion after it (i.e. it is the end
14092 		 * of the first request list), or (2) if the next
14093 		 * request is a larger block no. than our request.
14094 		 */
14095 		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
14096 		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
14097 			goto insert;
14098 		}
14099 		ap = ap->av_forw;
14100 	}
14101 
14102 	/*
14103 	 * Neither a second list nor a larger request, therefore
14104 	 * we go at the end of the first list (which is the same
14105 	 * as the end of the whole schebang).
14106 	 */
14107 insert:
14108 	bp->av_forw = ap->av_forw;
14109 	ap->av_forw = bp;
14110 
14111 	/*
14112 	 * If we inserted onto the tail end of the waitq, make sure the
14113 	 * tail pointer is updated.
14114 	 */
14115 	if (ap == un->un_waitq_tailp) {
14116 		un->un_waitq_tailp = bp;
14117 	}
14118 }
14119 
14120 
14121 /*
14122  *    Function: sd_start_cmds
14123  *
14124  * Description: Remove and transport cmds from the driver queues.
14125  *
14126  *   Arguments: un - pointer to the unit (soft state) struct for the target.
14127  *
14128  *		immed_bp - ptr to a buf to be transported immediately. Only
14129  *		the immed_bp is transported; bufs on the waitq are not
14130  *		processed and the un_retry_bp is not checked.  If immed_bp is
14131  *		NULL, then normal queue processing is performed.
14132  *
14133  *     Context: May be called from kernel thread context, interrupt context,
14134  *		or runout callback context. This function may not block or
14135  *		call routines that block.
14136  */
14137 
14138 static void
14139 sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
14140 {
14141 	struct	sd_xbuf	*xp;
14142 	struct	buf	*bp;
14143 	void	(*statp)(kstat_io_t *);
14144 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14145 	void	(*saved_statp)(kstat_io_t *);
14146 #endif
14147 	int	rval;
14148 
14149 	ASSERT(un != NULL);
14150 	ASSERT(mutex_owned(SD_MUTEX(un)));
14151 	ASSERT(un->un_ncmds_in_transport >= 0);
14152 	ASSERT(un->un_throttle >= 0);
14153 
14154 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
14155 
14156 	do {
14157 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14158 		saved_statp = NULL;
14159 #endif
14160 
14161 		/*
14162 		 * If we are syncing or dumping, fail the command to
14163 		 * avoid recursively calling back into scsi_transport().
14164 		 * The dump I/O itself uses a separate code path so this
14165 		 * only prevents non-dump I/O from being sent while dumping.
14166 		 * File system sync takes place before dumping begins.
14167 		 * During panic, filesystem I/O is allowed provided
14168 		 * un_in_callback is <= 1.  This is to prevent recursion
14169 		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
14170 		 * sd_start_cmds and so on.  See panic.c for more information
14171 		 * about the states the system can be in during panic.
14172 		 */
14173 		if ((un->un_state == SD_STATE_DUMPING) ||
14174 		    (ddi_in_panic() && (un->un_in_callback > 1))) {
14175 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14176 			    "sd_start_cmds: panicking\n");
14177 			goto exit;
14178 		}
14179 
14180 		if ((bp = immed_bp) != NULL) {
14181 			/*
14182 			 * We have a bp that must be transported immediately.
14183 			 * It's OK to transport the immed_bp here without doing
14184 			 * the throttle limit check because the immed_bp is
14185 			 * always used in a retry/recovery case. This means
14186 			 * that we know we are not at the throttle limit by
14187 			 * virtue of the fact that to get here we must have
14188 			 * already gotten a command back via sdintr(). This also
14189 			 * relies on (1) the command on un_retry_bp preventing
14190 			 * further commands from the waitq from being issued;
14191 			 * and (2) the code in sd_retry_command checking the
14192 			 * throttle limit before issuing a delayed or immediate
14193 			 * retry. This holds even if the throttle limit is
14194 			 * currently ratcheted down from its maximum value.
14195 			 */
14196 			statp = kstat_runq_enter;
14197 			if (bp == un->un_retry_bp) {
14198 				ASSERT((un->un_retry_statp == NULL) ||
14199 				    (un->un_retry_statp == kstat_waitq_enter) ||
14200 				    (un->un_retry_statp ==
14201 				    kstat_runq_back_to_waitq));
14202 				/*
14203 				 * If the waitq kstat was incremented when
14204 				 * sd_set_retry_bp() queued this bp for a retry,
14205 				 * then we must set up statp so that the waitq
14206 				 * count will get decremented correctly below.
14207 				 * Also we must clear un->un_retry_statp to
14208 				 * ensure that we do not act on a stale value
14209 				 * in this field.
14210 				 */
14211 				if ((un->un_retry_statp == kstat_waitq_enter) ||
14212 				    (un->un_retry_statp ==
14213 				    kstat_runq_back_to_waitq)) {
14214 					statp = kstat_waitq_to_runq;
14215 				}
14216 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14217 				saved_statp = un->un_retry_statp;
14218 #endif
14219 				un->un_retry_statp = NULL;
14220 
14221 				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14222 				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
14223 				    "un_throttle:%d un_ncmds_in_transport:%d\n",
14224 				    un, un->un_retry_bp, un->un_throttle,
14225 				    un->un_ncmds_in_transport);
14226 			} else {
14227 				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
14228 				    "processing priority bp:0x%p\n", bp);
14229 			}
14230 
14231 		} else if ((bp = un->un_waitq_headp) != NULL) {
14232 			/*
14233 			 * A command on the waitq is ready to go, but do not
14234 			 * send it if:
14235 			 *
14236 			 * (1) the throttle limit has been reached, or
14237 			 * (2) a retry is pending, or
14238 			 * (3) a START_STOP_UNIT callback pending, or
14239 			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
14240 			 *	command is pending.
14241 			 *
14242 			 * For all of these conditions, IO processing will
14243 			 * restart after the condition is cleared.
14244 			 */
14245 			if (un->un_ncmds_in_transport >= un->un_throttle) {
14246 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14247 				    "sd_start_cmds: exiting, "
14248 				    "throttle limit reached!\n");
14249 				goto exit;
14250 			}
14251 			if (un->un_retry_bp != NULL) {
14252 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14253 				    "sd_start_cmds: exiting, retry pending!\n");
14254 				goto exit;
14255 			}
14256 			if (un->un_startstop_timeid != NULL) {
14257 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14258 				    "sd_start_cmds: exiting, "
14259 				    "START_STOP pending!\n");
14260 				goto exit;
14261 			}
14262 			if (un->un_direct_priority_timeid != NULL) {
14263 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14264 				    "sd_start_cmds: exiting, "
14265 				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
14266 				goto exit;
14267 			}
14268 
14269 			/* Dequeue the command */
14270 			un->un_waitq_headp = bp->av_forw;
14271 			if (un->un_waitq_headp == NULL) {
14272 				un->un_waitq_tailp = NULL;
14273 			}
14274 			bp->av_forw = NULL;
14275 			statp = kstat_waitq_to_runq;
14276 			SD_TRACE(SD_LOG_IO_CORE, un,
14277 			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
14278 
14279 		} else {
14280 			/* No work to do so bail out now */
14281 			SD_TRACE(SD_LOG_IO_CORE, un,
14282 			    "sd_start_cmds: no more work, exiting!\n");
14283 			goto exit;
14284 		}
14285 
14286 		/*
14287 		 * Reset the state to normal. This is the mechanism by which
14288 		 * the state transitions from either SD_STATE_RWAIT or
14289 		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
14290 		 * If state is SD_STATE_PM_CHANGING then this command is
14291 		 * part of the device power control and the state must
14292 		 * not be put back to normal. Doing so would would
14293 		 * allow new commands to proceed when they shouldn't,
14294 		 * the device may be going off.
14295 		 */
14296 		if ((un->un_state != SD_STATE_SUSPENDED) &&
14297 		    (un->un_state != SD_STATE_PM_CHANGING)) {
14298 			New_state(un, SD_STATE_NORMAL);
14299 		    }
14300 
14301 		xp = SD_GET_XBUF(bp);
14302 		ASSERT(xp != NULL);
14303 
14304 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14305 		/*
14306 		 * Allocate the scsi_pkt if we need one, or attach DMA
14307 		 * resources if we have a scsi_pkt that needs them. The
14308 		 * latter should only occur for commands that are being
14309 		 * retried.
14310 		 */
14311 		if ((xp->xb_pktp == NULL) ||
14312 		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
14313 #else
14314 		if (xp->xb_pktp == NULL) {
14315 #endif
14316 			/*
14317 			 * There is no scsi_pkt allocated for this buf. Call
14318 			 * the initpkt function to allocate & init one.
14319 			 *
14320 			 * The scsi_init_pkt runout callback functionality is
14321 			 * implemented as follows:
14322 			 *
14323 			 * 1) The initpkt function always calls
14324 			 *    scsi_init_pkt(9F) with sdrunout specified as the
14325 			 *    callback routine.
14326 			 * 2) A successful packet allocation is initialized and
14327 			 *    the I/O is transported.
14328 			 * 3) The I/O associated with an allocation resource
14329 			 *    failure is left on its queue to be retried via
14330 			 *    runout or the next I/O.
14331 			 * 4) The I/O associated with a DMA error is removed
14332 			 *    from the queue and failed with EIO. Processing of
14333 			 *    the transport queues is also halted to be
14334 			 *    restarted via runout or the next I/O.
14335 			 * 5) The I/O associated with a CDB size or packet
14336 			 *    size error is removed from the queue and failed
14337 			 *    with EIO. Processing of the transport queues is
14338 			 *    continued.
14339 			 *
14340 			 * Note: there is no interface for canceling a runout
14341 			 * callback. To prevent the driver from detaching or
14342 			 * suspending while a runout is pending the driver
14343 			 * state is set to SD_STATE_RWAIT
14344 			 *
14345 			 * Note: using the scsi_init_pkt callback facility can
14346 			 * result in an I/O request persisting at the head of
14347 			 * the list which cannot be satisfied even after
14348 			 * multiple retries. In the future the driver may
14349 			 * implement some kind of maximum runout count before
14350 			 * failing an I/O.
14351 			 *
14352 			 * Note: the use of funcp below may seem superfluous,
14353 			 * but it helps warlock figure out the correct
14354 			 * initpkt function calls (see [s]sd.wlcmd).
14355 			 */
14356 			struct scsi_pkt	*pktp;
14357 			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
14358 
14359 			ASSERT(bp != un->un_rqs_bp);
14360 
14361 			funcp = sd_initpkt_map[xp->xb_chain_iostart];
14362 			switch ((*funcp)(bp, &pktp)) {
14363 			case  SD_PKT_ALLOC_SUCCESS:
14364 				xp->xb_pktp = pktp;
14365 				SD_TRACE(SD_LOG_IO_CORE, un,
14366 				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
14367 				    pktp);
14368 				goto got_pkt;
14369 
14370 			case SD_PKT_ALLOC_FAILURE:
14371 				/*
14372 				 * Temporary (hopefully) resource depletion.
14373 				 * Since retries and RQS commands always have a
14374 				 * scsi_pkt allocated, these cases should never
14375 				 * get here. So the only cases this needs to
14376 				 * handle is a bp from the waitq (which we put
14377 				 * back onto the waitq for sdrunout), or a bp
14378 				 * sent as an immed_bp (which we just fail).
14379 				 */
14380 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14381 				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
14382 
14383 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14384 
14385 				if (bp == immed_bp) {
14386 					/*
14387 					 * If SD_XB_DMA_FREED is clear, then
14388 					 * this is a failure to allocate a
14389 					 * scsi_pkt, and we must fail the
14390 					 * command.
14391 					 */
14392 					if ((xp->xb_pkt_flags &
14393 					    SD_XB_DMA_FREED) == 0) {
14394 						break;
14395 					}
14396 
14397 					/*
14398 					 * If this immediate command is NOT our
14399 					 * un_retry_bp, then we must fail it.
14400 					 */
14401 					if (bp != un->un_retry_bp) {
14402 						break;
14403 					}
14404 
14405 					/*
14406 					 * We get here if this cmd is our
14407 					 * un_retry_bp that was DMAFREED, but
14408 					 * scsi_init_pkt() failed to reallocate
14409 					 * DMA resources when we attempted to
14410 					 * retry it. This can happen when an
14411 					 * mpxio failover is in progress, but
14412 					 * we don't want to just fail the
14413 					 * command in this case.
14414 					 *
14415 					 * Use timeout(9F) to restart it after
14416 					 * a 100ms delay.  We don't want to
14417 					 * let sdrunout() restart it, because
14418 					 * sdrunout() is just supposed to start
14419 					 * commands that are sitting on the
14420 					 * wait queue.  The un_retry_bp stays
14421 					 * set until the command completes, but
14422 					 * sdrunout can be called many times
14423 					 * before that happens.  Since sdrunout
14424 					 * cannot tell if the un_retry_bp is
14425 					 * already in the transport, it could
14426 					 * end up calling scsi_transport() for
14427 					 * the un_retry_bp multiple times.
14428 					 *
14429 					 * Also: don't schedule the callback
14430 					 * if some other callback is already
14431 					 * pending.
14432 					 */
14433 					if (un->un_retry_statp == NULL) {
14434 						/*
14435 						 * restore the kstat pointer to
14436 						 * keep kstat counts coherent
14437 						 * when we do retry the command.
14438 						 */
14439 						un->un_retry_statp =
14440 						    saved_statp;
14441 					}
14442 
14443 					if ((un->un_startstop_timeid == NULL) &&
14444 					    (un->un_retry_timeid == NULL) &&
14445 					    (un->un_direct_priority_timeid ==
14446 					    NULL)) {
14447 
14448 						un->un_retry_timeid =
14449 						    timeout(
14450 						    sd_start_retry_command,
14451 						    un, SD_RESTART_TIMEOUT);
14452 					}
14453 					goto exit;
14454 				}
14455 
14456 #else
14457 				if (bp == immed_bp) {
14458 					break;	/* Just fail the command */
14459 				}
14460 #endif
14461 
14462 				/* Add the buf back to the head of the waitq */
14463 				bp->av_forw = un->un_waitq_headp;
14464 				un->un_waitq_headp = bp;
14465 				if (un->un_waitq_tailp == NULL) {
14466 					un->un_waitq_tailp = bp;
14467 				}
14468 				goto exit;
14469 
14470 			case SD_PKT_ALLOC_FAILURE_NO_DMA:
14471 				/*
14472 				 * HBA DMA resource failure. Fail the command
14473 				 * and continue processing of the queues.
14474 				 */
14475 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14476 				    "sd_start_cmds: "
14477 				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
14478 				break;
14479 
14480 			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
14481 				/*
14482 				 * Note:x86: Partial DMA mapping not supported
14483 				 * for USCSI commands, and all the needed DMA
14484 				 * resources were not allocated.
14485 				 */
14486 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14487 				    "sd_start_cmds: "
14488 				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
14489 				break;
14490 
14491 			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
14492 				/*
14493 				 * Note:x86: Request cannot fit into CDB based
14494 				 * on lba and len.
14495 				 */
14496 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14497 				    "sd_start_cmds: "
14498 				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
14499 				break;
14500 
14501 			default:
14502 				/* Should NEVER get here! */
14503 				panic("scsi_initpkt error");
14504 				/*NOTREACHED*/
14505 			}
14506 
14507 			/*
14508 			 * Fatal error in allocating a scsi_pkt for this buf.
14509 			 * Update kstats & return the buf with an error code.
14510 			 * We must use sd_return_failed_command_no_restart() to
14511 			 * avoid a recursive call back into sd_start_cmds().
14512 			 * However this also means that we must keep processing
14513 			 * the waitq here in order to avoid stalling.
14514 			 */
14515 			if (statp == kstat_waitq_to_runq) {
14516 				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
14517 			}
14518 			sd_return_failed_command_no_restart(un, bp, EIO);
14519 			if (bp == immed_bp) {
14520 				/* immed_bp is gone by now, so clear this */
14521 				immed_bp = NULL;
14522 			}
14523 			continue;
14524 		}
14525 got_pkt:
14526 		if (bp == immed_bp) {
14527 			/* goto the head of the class.... */
14528 			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
14529 		}
14530 
14531 		un->un_ncmds_in_transport++;
14532 		SD_UPDATE_KSTATS(un, statp, bp);
14533 
14534 		/*
14535 		 * Call scsi_transport() to send the command to the target.
14536 		 * According to SCSA architecture, we must drop the mutex here
14537 		 * before calling scsi_transport() in order to avoid deadlock.
14538 		 * Note that the scsi_pkt's completion routine can be executed
14539 		 * (from interrupt context) even before the call to
14540 		 * scsi_transport() returns.
14541 		 */
14542 		SD_TRACE(SD_LOG_IO_CORE, un,
14543 		    "sd_start_cmds: calling scsi_transport()\n");
14544 		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
14545 
14546 		mutex_exit(SD_MUTEX(un));
14547 		rval = scsi_transport(xp->xb_pktp);
14548 		mutex_enter(SD_MUTEX(un));
14549 
14550 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14551 		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
14552 
14553 		switch (rval) {
14554 		case TRAN_ACCEPT:
14555 			/* Clear this with every pkt accepted by the HBA */
14556 			un->un_tran_fatal_count = 0;
14557 			break;	/* Success; try the next cmd (if any) */
14558 
14559 		case TRAN_BUSY:
14560 			un->un_ncmds_in_transport--;
14561 			ASSERT(un->un_ncmds_in_transport >= 0);
14562 
14563 			/*
14564 			 * Don't retry request sense, the sense data
14565 			 * is lost when another request is sent.
14566 			 * Free up the rqs buf and retry
14567 			 * the original failed cmd.  Update kstat.
14568 			 */
14569 			if (bp == un->un_rqs_bp) {
14570 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14571 				bp = sd_mark_rqs_idle(un, xp);
14572 				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
14573 					NULL, NULL, EIO, SD_BSY_TIMEOUT / 500,
14574 					kstat_waitq_enter);
14575 				goto exit;
14576 			}
14577 
14578 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14579 			/*
14580 			 * Free the DMA resources for the  scsi_pkt. This will
14581 			 * allow mpxio to select another path the next time
14582 			 * we call scsi_transport() with this scsi_pkt.
14583 			 * See sdintr() for the rationalization behind this.
14584 			 */
14585 			if ((un->un_f_is_fibre == TRUE) &&
14586 			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14587 			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
14588 				scsi_dmafree(xp->xb_pktp);
14589 				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14590 			}
14591 #endif
14592 
14593 			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
14594 				/*
14595 				 * Commands that are SD_PATH_DIRECT_PRIORITY
14596 				 * are for error recovery situations. These do
14597 				 * not use the normal command waitq, so if they
14598 				 * get a TRAN_BUSY we cannot put them back onto
14599 				 * the waitq for later retry. One possible
14600 				 * problem is that there could already be some
14601 				 * other command on un_retry_bp that is waiting
14602 				 * for this one to complete, so we would be
14603 				 * deadlocked if we put this command back onto
14604 				 * the waitq for later retry (since un_retry_bp
14605 				 * must complete before the driver gets back to
14606 				 * commands on the waitq).
14607 				 *
14608 				 * To avoid deadlock we must schedule a callback
14609 				 * that will restart this command after a set
14610 				 * interval.  This should keep retrying for as
14611 				 * long as the underlying transport keeps
14612 				 * returning TRAN_BUSY (just like for other
14613 				 * commands).  Use the same timeout interval as
14614 				 * for the ordinary TRAN_BUSY retry.
14615 				 */
14616 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14617 				    "sd_start_cmds: scsi_transport() returned "
14618 				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
14619 
14620 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14621 				un->un_direct_priority_timeid =
14622 				    timeout(sd_start_direct_priority_command,
14623 				    bp, SD_BSY_TIMEOUT / 500);
14624 
14625 				goto exit;
14626 			}
14627 
14628 			/*
14629 			 * For TRAN_BUSY, we want to reduce the throttle value,
14630 			 * unless we are retrying a command.
14631 			 */
14632 			if (bp != un->un_retry_bp) {
14633 				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
14634 			}
14635 
14636 			/*
14637 			 * Set up the bp to be tried again 10 ms later.
14638 			 * Note:x86: Is there a timeout value in the sd_lun
14639 			 * for this condition?
14640 			 */
14641 			sd_set_retry_bp(un, bp, SD_BSY_TIMEOUT / 500,
14642 				kstat_runq_back_to_waitq);
14643 			goto exit;
14644 
14645 		case TRAN_FATAL_ERROR:
14646 			un->un_tran_fatal_count++;
14647 			/* FALLTHRU */
14648 
14649 		case TRAN_BADPKT:
14650 		default:
14651 			un->un_ncmds_in_transport--;
14652 			ASSERT(un->un_ncmds_in_transport >= 0);
14653 
14654 			/*
14655 			 * If this is our REQUEST SENSE command with a
14656 			 * transport error, we must get back the pointers
14657 			 * to the original buf, and mark the REQUEST
14658 			 * SENSE command as "available".
14659 			 */
14660 			if (bp == un->un_rqs_bp) {
14661 				bp = sd_mark_rqs_idle(un, xp);
14662 				xp = SD_GET_XBUF(bp);
14663 			} else {
14664 				/*
14665 				 * Legacy behavior: do not update transport
14666 				 * error count for request sense commands.
14667 				 */
14668 				SD_UPDATE_ERRSTATS(un, sd_transerrs);
14669 			}
14670 
14671 			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14672 			sd_print_transport_rejected_message(un, xp, rval);
14673 
14674 			/*
14675 			 * We must use sd_return_failed_command_no_restart() to
14676 			 * avoid a recursive call back into sd_start_cmds().
14677 			 * However this also means that we must keep processing
14678 			 * the waitq here in order to avoid stalling.
14679 			 */
14680 			sd_return_failed_command_no_restart(un, bp, EIO);
14681 
14682 			/*
14683 			 * Notify any threads waiting in sd_ddi_suspend() that
14684 			 * a command completion has occurred.
14685 			 */
14686 			if (un->un_state == SD_STATE_SUSPENDED) {
14687 				cv_broadcast(&un->un_disk_busy_cv);
14688 			}
14689 
14690 			if (bp == immed_bp) {
14691 				/* immed_bp is gone by now, so clear this */
14692 				immed_bp = NULL;
14693 			}
14694 			break;
14695 		}
14696 
14697 	} while (immed_bp == NULL);
14698 
14699 exit:
14700 	ASSERT(mutex_owned(SD_MUTEX(un)));
14701 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
14702 }
14703 
14704 
14705 /*
14706  *    Function: sd_return_command
14707  *
14708  * Description: Returns a command to its originator (with or without an
14709  *		error).  Also starts commands waiting to be transported
14710  *		to the target.
14711  *
14712  *     Context: May be called from interrupt, kernel, or timeout context
14713  */
14714 
14715 static void
14716 sd_return_command(struct sd_lun *un, struct buf *bp)
14717 {
14718 	struct sd_xbuf *xp;
14719 #if defined(__i386) || defined(__amd64)
14720 	struct scsi_pkt *pktp;
14721 #endif
14722 
14723 	ASSERT(bp != NULL);
14724 	ASSERT(un != NULL);
14725 	ASSERT(mutex_owned(SD_MUTEX(un)));
14726 	ASSERT(bp != un->un_rqs_bp);
14727 	xp = SD_GET_XBUF(bp);
14728 	ASSERT(xp != NULL);
14729 
14730 #if defined(__i386) || defined(__amd64)
14731 	pktp = SD_GET_PKTP(bp);
14732 #endif
14733 
14734 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
14735 
14736 #if defined(__i386) || defined(__amd64)
14737 	/*
14738 	 * Note:x86: check for the "sdrestart failed" case.
14739 	 */
14740 	if (((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
14741 		(geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
14742 		(xp->xb_pktp->pkt_resid == 0)) {
14743 
14744 		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
14745 			/*
14746 			 * Successfully set up next portion of cmd
14747 			 * transfer, try sending it
14748 			 */
14749 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
14750 			    NULL, NULL, 0, (clock_t)0, NULL);
14751 			sd_start_cmds(un, NULL);
14752 			return;	/* Note:x86: need a return here? */
14753 		}
14754 	}
14755 #endif
14756 
14757 	/*
14758 	 * If this is the failfast bp, clear it from un_failfast_bp. This
14759 	 * can happen if upon being re-tried the failfast bp either
14760 	 * succeeded or encountered another error (possibly even a different
14761 	 * error than the one that precipitated the failfast state, but in
14762 	 * that case it would have had to exhaust retries as well). Regardless,
14763 	 * this should not occur whenever the instance is in the active
14764 	 * failfast state.
14765 	 */
14766 	if (bp == un->un_failfast_bp) {
14767 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
14768 		un->un_failfast_bp = NULL;
14769 	}
14770 
14771 	/*
14772 	 * Clear the failfast state upon successful completion of ANY cmd.
14773 	 */
14774 	if (bp->b_error == 0) {
14775 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
14776 	}
14777 
14778 	/*
14779 	 * This is used if the command was retried one or more times. Show that
14780 	 * we are done with it, and allow processing of the waitq to resume.
14781 	 */
14782 	if (bp == un->un_retry_bp) {
14783 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14784 		    "sd_return_command: un:0x%p: "
14785 		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
14786 		un->un_retry_bp = NULL;
14787 		un->un_retry_statp = NULL;
14788 	}
14789 
14790 	SD_UPDATE_RDWR_STATS(un, bp);
14791 	SD_UPDATE_PARTITION_STATS(un, bp);
14792 
14793 	switch (un->un_state) {
14794 	case SD_STATE_SUSPENDED:
14795 		/*
14796 		 * Notify any threads waiting in sd_ddi_suspend() that
14797 		 * a command completion has occurred.
14798 		 */
14799 		cv_broadcast(&un->un_disk_busy_cv);
14800 		break;
14801 	default:
14802 		sd_start_cmds(un, NULL);
14803 		break;
14804 	}
14805 
14806 	/* Return this command up the iodone chain to its originator. */
14807 	mutex_exit(SD_MUTEX(un));
14808 
14809 	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
14810 	xp->xb_pktp = NULL;
14811 
14812 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
14813 
14814 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14815 	mutex_enter(SD_MUTEX(un));
14816 
14817 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
14818 }
14819 
14820 
14821 /*
14822  *    Function: sd_return_failed_command
14823  *
14824  * Description: Command completion when an error occurred.
14825  *
14826  *     Context: May be called from interrupt context
14827  */
14828 
14829 static void
14830 sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
14831 {
14832 	ASSERT(bp != NULL);
14833 	ASSERT(un != NULL);
14834 	ASSERT(mutex_owned(SD_MUTEX(un)));
14835 
14836 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14837 	    "sd_return_failed_command: entry\n");
14838 
14839 	/*
14840 	 * b_resid could already be nonzero due to a partial data
14841 	 * transfer, so do not change it here.
14842 	 */
14843 	SD_BIOERROR(bp, errcode);
14844 
14845 	sd_return_command(un, bp);
14846 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14847 	    "sd_return_failed_command: exit\n");
14848 }
14849 
14850 
14851 /*
14852  *    Function: sd_return_failed_command_no_restart
14853  *
14854  * Description: Same as sd_return_failed_command, but ensures that no
14855  *		call back into sd_start_cmds will be issued.
14856  *
14857  *     Context: May be called from interrupt context
14858  */
14859 
14860 static void
14861 sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
14862 	int errcode)
14863 {
14864 	struct sd_xbuf *xp;
14865 
14866 	ASSERT(bp != NULL);
14867 	ASSERT(un != NULL);
14868 	ASSERT(mutex_owned(SD_MUTEX(un)));
14869 	xp = SD_GET_XBUF(bp);
14870 	ASSERT(xp != NULL);
14871 	ASSERT(errcode != 0);
14872 
14873 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14874 	    "sd_return_failed_command_no_restart: entry\n");
14875 
14876 	/*
14877 	 * b_resid could already be nonzero due to a partial data
14878 	 * transfer, so do not change it here.
14879 	 */
14880 	SD_BIOERROR(bp, errcode);
14881 
14882 	/*
14883 	 * If this is the failfast bp, clear it. This can happen if the
14884 	 * failfast bp encounterd a fatal error when we attempted to
14885 	 * re-try it (such as a scsi_transport(9F) failure).  However
14886 	 * we should NOT be in an active failfast state if the failfast
14887 	 * bp is not NULL.
14888 	 */
14889 	if (bp == un->un_failfast_bp) {
14890 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
14891 		un->un_failfast_bp = NULL;
14892 	}
14893 
14894 	if (bp == un->un_retry_bp) {
14895 		/*
14896 		 * This command was retried one or more times. Show that we are
14897 		 * done with it, and allow processing of the waitq to resume.
14898 		 */
14899 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14900 		    "sd_return_failed_command_no_restart: "
14901 		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
14902 		un->un_retry_bp = NULL;
14903 		un->un_retry_statp = NULL;
14904 	}
14905 
14906 	SD_UPDATE_RDWR_STATS(un, bp);
14907 	SD_UPDATE_PARTITION_STATS(un, bp);
14908 
14909 	mutex_exit(SD_MUTEX(un));
14910 
14911 	if (xp->xb_pktp != NULL) {
14912 		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
14913 		xp->xb_pktp = NULL;
14914 	}
14915 
14916 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
14917 
14918 	mutex_enter(SD_MUTEX(un));
14919 
14920 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14921 	    "sd_return_failed_command_no_restart: exit\n");
14922 }
14923 
14924 
14925 /*
14926  *    Function: sd_retry_command
14927  *
14928  * Description: queue up a command for retry, or (optionally) fail it
14929  *		if retry counts are exhausted.
14930  *
14931  *   Arguments: un - Pointer to the sd_lun struct for the target.
14932  *
14933  *		bp - Pointer to the buf for the command to be retried.
14934  *
14935  *		retry_check_flag - Flag to see which (if any) of the retry
14936  *		   counts should be decremented/checked. If the indicated
14937  *		   retry count is exhausted, then the command will not be
14938  *		   retried; it will be failed instead. This should use a
14939  *		   value equal to one of the following:
14940  *
14941  *			SD_RETRIES_NOCHECK
14942  *			SD_RESD_RETRIES_STANDARD
14943  *			SD_RETRIES_VICTIM
14944  *
14945  *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
14946  *		   if the check should be made to see of FLAG_ISOLATE is set
14947  *		   in the pkt. If FLAG_ISOLATE is set, then the command is
14948  *		   not retried, it is simply failed.
14949  *
14950  *		user_funcp - Ptr to function to call before dispatching the
14951  *		   command. May be NULL if no action needs to be performed.
14952  *		   (Primarily intended for printing messages.)
14953  *
14954  *		user_arg - Optional argument to be passed along to
14955  *		   the user_funcp call.
14956  *
14957  *		failure_code - errno return code to set in the bp if the
14958  *		   command is going to be failed.
14959  *
14960  *		retry_delay - Retry delay interval in (clock_t) units. May
14961  *		   be zero which indicates that the retry should be retried
14962  *		   immediately (ie, without an intervening delay).
14963  *
14964  *		statp - Ptr to kstat function to be updated if the command
14965  *		   is queued for a delayed retry. May be NULL if no kstat
14966  *		   update is desired.
14967  *
14968  *     Context: May be called from interupt context.
14969  */
14970 
14971 static void
14972 sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
14973 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int
14974 	code), void *user_arg, int failure_code,  clock_t retry_delay,
14975 	void (*statp)(kstat_io_t *))
14976 {
14977 	struct sd_xbuf	*xp;
14978 	struct scsi_pkt	*pktp;
14979 
14980 	ASSERT(un != NULL);
14981 	ASSERT(mutex_owned(SD_MUTEX(un)));
14982 	ASSERT(bp != NULL);
14983 	xp = SD_GET_XBUF(bp);
14984 	ASSERT(xp != NULL);
14985 	pktp = SD_GET_PKTP(bp);
14986 	ASSERT(pktp != NULL);
14987 
14988 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14989 	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
14990 
14991 	/*
14992 	 * If we are syncing or dumping, fail the command to avoid
14993 	 * recursively calling back into scsi_transport().
14994 	 */
14995 	if (ddi_in_panic()) {
14996 		goto fail_command_no_log;
14997 	}
14998 
14999 	/*
15000 	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
15001 	 * log an error and fail the command.
15002 	 */
15003 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
15004 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
15005 		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
15006 		sd_dump_memory(un, SD_LOG_IO, "CDB",
15007 		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15008 		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
15009 		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
15010 		goto fail_command;
15011 	}
15012 
15013 	/*
15014 	 * If we are suspended, then put the command onto head of the
15015 	 * wait queue since we don't want to start more commands.
15016 	 */
15017 	switch (un->un_state) {
15018 	case SD_STATE_SUSPENDED:
15019 	case SD_STATE_DUMPING:
15020 		bp->av_forw = un->un_waitq_headp;
15021 		un->un_waitq_headp = bp;
15022 		if (un->un_waitq_tailp == NULL) {
15023 			un->un_waitq_tailp = bp;
15024 		}
15025 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
15026 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
15027 		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
15028 		return;
15029 	default:
15030 		break;
15031 	}
15032 
15033 	/*
15034 	 * If the caller wants us to check FLAG_ISOLATE, then see if that
15035 	 * is set; if it is then we do not want to retry the command.
15036 	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
15037 	 */
15038 	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
15039 		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
15040 			goto fail_command;
15041 		}
15042 	}
15043 
15044 
15045 	/*
15046 	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
15047 	 * command timeout or a selection timeout has occurred. This means
15048 	 * that we were unable to establish an kind of communication with
15049 	 * the target, and subsequent retries and/or commands are likely
15050 	 * to encounter similar results and take a long time to complete.
15051 	 *
15052 	 * If this is a failfast error condition, we need to update the
15053 	 * failfast state, even if this bp does not have B_FAILFAST set.
15054 	 */
15055 	if (retry_check_flag & SD_RETRIES_FAILFAST) {
15056 		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
15057 			ASSERT(un->un_failfast_bp == NULL);
15058 			/*
15059 			 * If we are already in the active failfast state, and
15060 			 * another failfast error condition has been detected,
15061 			 * then fail this command if it has B_FAILFAST set.
15062 			 * If B_FAILFAST is clear, then maintain the legacy
15063 			 * behavior of retrying heroically, even tho this will
15064 			 * take a lot more time to fail the command.
15065 			 */
15066 			if (bp->b_flags & B_FAILFAST) {
15067 				goto fail_command;
15068 			}
15069 		} else {
15070 			/*
15071 			 * We're not in the active failfast state, but we
15072 			 * have a failfast error condition, so we must begin
15073 			 * transition to the next state. We do this regardless
15074 			 * of whether or not this bp has B_FAILFAST set.
15075 			 */
15076 			if (un->un_failfast_bp == NULL) {
15077 				/*
15078 				 * This is the first bp to meet a failfast
15079 				 * condition so save it on un_failfast_bp &
15080 				 * do normal retry processing. Do not enter
15081 				 * active failfast state yet. This marks
15082 				 * entry into the "failfast pending" state.
15083 				 */
15084 				un->un_failfast_bp = bp;
15085 
15086 			} else if (un->un_failfast_bp == bp) {
15087 				/*
15088 				 * This is the second time *this* bp has
15089 				 * encountered a failfast error condition,
15090 				 * so enter active failfast state & flush
15091 				 * queues as appropriate.
15092 				 */
15093 				un->un_failfast_state = SD_FAILFAST_ACTIVE;
15094 				un->un_failfast_bp = NULL;
15095 				sd_failfast_flushq(un);
15096 
15097 				/*
15098 				 * Fail this bp now if B_FAILFAST set;
15099 				 * otherwise continue with retries. (It would
15100 				 * be pretty ironic if this bp succeeded on a
15101 				 * subsequent retry after we just flushed all
15102 				 * the queues).
15103 				 */
15104 				if (bp->b_flags & B_FAILFAST) {
15105 					goto fail_command;
15106 				}
15107 
15108 #if !defined(lint) && !defined(__lint)
15109 			} else {
15110 				/*
15111 				 * If neither of the preceeding conditionals
15112 				 * was true, it means that there is some
15113 				 * *other* bp that has met an inital failfast
15114 				 * condition and is currently either being
15115 				 * retried or is waiting to be retried. In
15116 				 * that case we should perform normal retry
15117 				 * processing on *this* bp, since there is a
15118 				 * chance that the current failfast condition
15119 				 * is transient and recoverable. If that does
15120 				 * not turn out to be the case, then retries
15121 				 * will be cleared when the wait queue is
15122 				 * flushed anyway.
15123 				 */
15124 #endif
15125 			}
15126 		}
15127 	} else {
15128 		/*
15129 		 * SD_RETRIES_FAILFAST is clear, which indicates that we
15130 		 * likely were able to at least establish some level of
15131 		 * communication with the target and subsequent commands
15132 		 * and/or retries are likely to get through to the target,
15133 		 * In this case we want to be aggressive about clearing
15134 		 * the failfast state. Note that this does not affect
15135 		 * the "failfast pending" condition.
15136 		 */
15137 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15138 	}
15139 
15140 
15141 	/*
15142 	 * Check the specified retry count to see if we can still do
15143 	 * any retries with this pkt before we should fail it.
15144 	 */
15145 	switch (retry_check_flag & SD_RETRIES_MASK) {
15146 	case SD_RETRIES_VICTIM:
15147 		/*
15148 		 * Check the victim retry count. If exhausted, then fall
15149 		 * thru & check against the standard retry count.
15150 		 */
15151 		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
15152 			/* Increment count & proceed with the retry */
15153 			xp->xb_victim_retry_count++;
15154 			break;
15155 		}
15156 		/* Victim retries exhausted, fall back to std. retries... */
15157 		/* FALLTHRU */
15158 
15159 	case SD_RETRIES_STANDARD:
15160 		if (xp->xb_retry_count >= un->un_retry_count) {
15161 			/* Retries exhausted, fail the command */
15162 			SD_TRACE(SD_LOG_IO_CORE, un,
15163 			    "sd_retry_command: retries exhausted!\n");
15164 			/*
15165 			 * update b_resid for failed SCMD_READ & SCMD_WRITE
15166 			 * commands with nonzero pkt_resid.
15167 			 */
15168 			if ((pktp->pkt_reason == CMD_CMPLT) &&
15169 			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
15170 			    (pktp->pkt_resid != 0)) {
15171 				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
15172 				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
15173 					SD_UPDATE_B_RESID(bp, pktp);
15174 				}
15175 			}
15176 			goto fail_command;
15177 		}
15178 		xp->xb_retry_count++;
15179 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15180 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15181 		break;
15182 
15183 	case SD_RETRIES_UA:
15184 		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
15185 			/* Retries exhausted, fail the command */
15186 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15187 			    "Unit Attention retries exhausted. "
15188 			    "Check the target.\n");
15189 			goto fail_command;
15190 		}
15191 		xp->xb_ua_retry_count++;
15192 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15193 		    "sd_retry_command: retry count:%d\n",
15194 			xp->xb_ua_retry_count);
15195 		break;
15196 
15197 	case SD_RETRIES_BUSY:
15198 		if (xp->xb_retry_count >= un->un_busy_retry_count) {
15199 			/* Retries exhausted, fail the command */
15200 			SD_TRACE(SD_LOG_IO_CORE, un,
15201 			    "sd_retry_command: retries exhausted!\n");
15202 			goto fail_command;
15203 		}
15204 		xp->xb_retry_count++;
15205 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15206 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15207 		break;
15208 
15209 	case SD_RETRIES_NOCHECK:
15210 	default:
15211 		/* No retry count to check. Just proceed with the retry */
15212 		break;
15213 	}
15214 
15215 	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15216 
15217 	/*
15218 	 * If we were given a zero timeout, we must attempt to retry the
15219 	 * command immediately (ie, without a delay).
15220 	 */
15221 	if (retry_delay == 0) {
15222 		/*
15223 		 * Check some limiting conditions to see if we can actually
15224 		 * do the immediate retry.  If we cannot, then we must
15225 		 * fall back to queueing up a delayed retry.
15226 		 */
15227 		if (un->un_ncmds_in_transport >= un->un_throttle) {
15228 			/*
15229 			 * We are at the throttle limit for the target,
15230 			 * fall back to delayed retry.
15231 			 */
15232 			retry_delay = SD_BSY_TIMEOUT;
15233 			statp = kstat_waitq_enter;
15234 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15235 			    "sd_retry_command: immed. retry hit "
15236 			    "throttle!\n");
15237 		} else {
15238 			/*
15239 			 * We're clear to proceed with the immediate retry.
15240 			 * First call the user-provided function (if any)
15241 			 */
15242 			if (user_funcp != NULL) {
15243 				(*user_funcp)(un, bp, user_arg,
15244 				    SD_IMMEDIATE_RETRY_ISSUED);
15245 #ifdef __lock_lint
15246 				sd_print_incomplete_msg(un, bp, user_arg,
15247 				    SD_IMMEDIATE_RETRY_ISSUED);
15248 				sd_print_cmd_incomplete_msg(un, bp, user_arg,
15249 				    SD_IMMEDIATE_RETRY_ISSUED);
15250 				sd_print_sense_failed_msg(un, bp, user_arg,
15251 				    SD_IMMEDIATE_RETRY_ISSUED);
15252 #endif
15253 			}
15254 
15255 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15256 			    "sd_retry_command: issuing immediate retry\n");
15257 
15258 			/*
15259 			 * Call sd_start_cmds() to transport the command to
15260 			 * the target.
15261 			 */
15262 			sd_start_cmds(un, bp);
15263 
15264 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15265 			    "sd_retry_command exit\n");
15266 			return;
15267 		}
15268 	}
15269 
15270 	/*
15271 	 * Set up to retry the command after a delay.
15272 	 * First call the user-provided function (if any)
15273 	 */
15274 	if (user_funcp != NULL) {
15275 		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
15276 	}
15277 
15278 	sd_set_retry_bp(un, bp, retry_delay, statp);
15279 
15280 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15281 	return;
15282 
15283 fail_command:
15284 
15285 	if (user_funcp != NULL) {
15286 		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
15287 	}
15288 
15289 fail_command_no_log:
15290 
15291 	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15292 	    "sd_retry_command: returning failed command\n");
15293 
15294 	sd_return_failed_command(un, bp, failure_code);
15295 
15296 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15297 }
15298 
15299 
15300 /*
15301  *    Function: sd_set_retry_bp
15302  *
15303  * Description: Set up the given bp for retry.
15304  *
15305  *   Arguments: un - ptr to associated softstate
15306  *		bp - ptr to buf(9S) for the command
15307  *		retry_delay - time interval before issuing retry (may be 0)
15308  *		statp - optional pointer to kstat function
15309  *
15310  *     Context: May be called under interrupt context
15311  */
15312 
15313 static void
15314 sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
15315 	void (*statp)(kstat_io_t *))
15316 {
15317 	ASSERT(un != NULL);
15318 	ASSERT(mutex_owned(SD_MUTEX(un)));
15319 	ASSERT(bp != NULL);
15320 
15321 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15322 	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
15323 
15324 	/*
15325 	 * Indicate that the command is being retried. This will not allow any
15326 	 * other commands on the wait queue to be transported to the target
15327 	 * until this command has been completed (success or failure). The
15328 	 * "retry command" is not transported to the target until the given
15329 	 * time delay expires, unless the user specified a 0 retry_delay.
15330 	 *
15331 	 * Note: the timeout(9F) callback routine is what actually calls
15332 	 * sd_start_cmds() to transport the command, with the exception of a
15333 	 * zero retry_delay. The only current implementor of a zero retry delay
15334 	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
15335 	 */
15336 	if (un->un_retry_bp == NULL) {
15337 		ASSERT(un->un_retry_statp == NULL);
15338 		un->un_retry_bp = bp;
15339 
15340 		/*
15341 		 * If the user has not specified a delay the command should
15342 		 * be queued and no timeout should be scheduled.
15343 		 */
15344 		if (retry_delay == 0) {
15345 			/*
15346 			 * Save the kstat pointer that will be used in the
15347 			 * call to SD_UPDATE_KSTATS() below, so that
15348 			 * sd_start_cmds() can correctly decrement the waitq
15349 			 * count when it is time to transport this command.
15350 			 */
15351 			un->un_retry_statp = statp;
15352 			goto done;
15353 		}
15354 	}
15355 
15356 	if (un->un_retry_bp == bp) {
15357 		/*
15358 		 * Save the kstat pointer that will be used in the call to
15359 		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
15360 		 * correctly decrement the waitq count when it is time to
15361 		 * transport this command.
15362 		 */
15363 		un->un_retry_statp = statp;
15364 
15365 		/*
15366 		 * Schedule a timeout if:
15367 		 *   1) The user has specified a delay.
15368 		 *   2) There is not a START_STOP_UNIT callback pending.
15369 		 *
15370 		 * If no delay has been specified, then it is up to the caller
15371 		 * to ensure that IO processing continues without stalling.
15372 		 * Effectively, this means that the caller will issue the
15373 		 * required call to sd_start_cmds(). The START_STOP_UNIT
15374 		 * callback does this after the START STOP UNIT command has
15375 		 * completed. In either of these cases we should not schedule
15376 		 * a timeout callback here.  Also don't schedule the timeout if
15377 		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
15378 		 */
15379 		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
15380 		    (un->un_direct_priority_timeid == NULL)) {
15381 			un->un_retry_timeid =
15382 			    timeout(sd_start_retry_command, un, retry_delay);
15383 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15384 			    "sd_set_retry_bp: setting timeout: un: 0x%p"
15385 			    " bp:0x%p un_retry_timeid:0x%p\n",
15386 			    un, bp, un->un_retry_timeid);
15387 		}
15388 	} else {
15389 		/*
15390 		 * We only get in here if there is already another command
15391 		 * waiting to be retried.  In this case, we just put the
15392 		 * given command onto the wait queue, so it can be transported
15393 		 * after the current retry command has completed.
15394 		 *
15395 		 * Also we have to make sure that if the command at the head
15396 		 * of the wait queue is the un_failfast_bp, that we do not
15397 		 * put ahead of it any other commands that are to be retried.
15398 		 */
15399 		if ((un->un_failfast_bp != NULL) &&
15400 		    (un->un_failfast_bp == un->un_waitq_headp)) {
15401 			/*
15402 			 * Enqueue this command AFTER the first command on
15403 			 * the wait queue (which is also un_failfast_bp).
15404 			 */
15405 			bp->av_forw = un->un_waitq_headp->av_forw;
15406 			un->un_waitq_headp->av_forw = bp;
15407 			if (un->un_waitq_headp == un->un_waitq_tailp) {
15408 				un->un_waitq_tailp = bp;
15409 			}
15410 		} else {
15411 			/* Enqueue this command at the head of the waitq. */
15412 			bp->av_forw = un->un_waitq_headp;
15413 			un->un_waitq_headp = bp;
15414 			if (un->un_waitq_tailp == NULL) {
15415 				un->un_waitq_tailp = bp;
15416 			}
15417 		}
15418 
15419 		if (statp == NULL) {
15420 			statp = kstat_waitq_enter;
15421 		}
15422 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15423 		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
15424 	}
15425 
15426 done:
15427 	if (statp != NULL) {
15428 		SD_UPDATE_KSTATS(un, statp, bp);
15429 	}
15430 
15431 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15432 	    "sd_set_retry_bp: exit un:0x%p\n", un);
15433 }
15434 
15435 
15436 /*
15437  *    Function: sd_start_retry_command
15438  *
15439  * Description: Start the command that has been waiting on the target's
15440  *		retry queue.  Called from timeout(9F) context after the
15441  *		retry delay interval has expired.
15442  *
15443  *   Arguments: arg - pointer to associated softstate for the device.
15444  *
15445  *     Context: timeout(9F) thread context.  May not sleep.
15446  */
15447 
15448 static void
15449 sd_start_retry_command(void *arg)
15450 {
15451 	struct sd_lun *un = arg;
15452 
15453 	ASSERT(un != NULL);
15454 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15455 
15456 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15457 	    "sd_start_retry_command: entry\n");
15458 
15459 	mutex_enter(SD_MUTEX(un));
15460 
15461 	un->un_retry_timeid = NULL;
15462 
15463 	if (un->un_retry_bp != NULL) {
15464 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15465 		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
15466 		    un, un->un_retry_bp);
15467 		sd_start_cmds(un, un->un_retry_bp);
15468 	}
15469 
15470 	mutex_exit(SD_MUTEX(un));
15471 
15472 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15473 	    "sd_start_retry_command: exit\n");
15474 }
15475 
15476 
15477 /*
15478  *    Function: sd_start_direct_priority_command
15479  *
15480  * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
15481  *		received TRAN_BUSY when we called scsi_transport() to send it
15482  *		to the underlying HBA. This function is called from timeout(9F)
15483  *		context after the delay interval has expired.
15484  *
15485  *   Arguments: arg - pointer to associated buf(9S) to be restarted.
15486  *
15487  *     Context: timeout(9F) thread context.  May not sleep.
15488  */
15489 
15490 static void
15491 sd_start_direct_priority_command(void *arg)
15492 {
15493 	struct buf	*priority_bp = arg;
15494 	struct sd_lun	*un;
15495 
15496 	ASSERT(priority_bp != NULL);
15497 	un = SD_GET_UN(priority_bp);
15498 	ASSERT(un != NULL);
15499 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15500 
15501 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15502 	    "sd_start_direct_priority_command: entry\n");
15503 
15504 	mutex_enter(SD_MUTEX(un));
15505 	un->un_direct_priority_timeid = NULL;
15506 	sd_start_cmds(un, priority_bp);
15507 	mutex_exit(SD_MUTEX(un));
15508 
15509 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15510 	    "sd_start_direct_priority_command: exit\n");
15511 }
15512 
15513 
15514 /*
15515  *    Function: sd_send_request_sense_command
15516  *
15517  * Description: Sends a REQUEST SENSE command to the target
15518  *
15519  *     Context: May be called from interrupt context.
15520  */
15521 
15522 static void
15523 sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
15524 	struct scsi_pkt *pktp)
15525 {
15526 	ASSERT(bp != NULL);
15527 	ASSERT(un != NULL);
15528 	ASSERT(mutex_owned(SD_MUTEX(un)));
15529 
15530 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
15531 	    "entry: buf:0x%p\n", bp);
15532 
15533 	/*
15534 	 * If we are syncing or dumping, then fail the command to avoid a
15535 	 * recursive callback into scsi_transport(). Also fail the command
15536 	 * if we are suspended (legacy behavior).
15537 	 */
15538 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
15539 	    (un->un_state == SD_STATE_DUMPING)) {
15540 		sd_return_failed_command(un, bp, EIO);
15541 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15542 		    "sd_send_request_sense_command: syncing/dumping, exit\n");
15543 		return;
15544 	}
15545 
15546 	/*
15547 	 * Retry the failed command and don't issue the request sense if:
15548 	 *    1) the sense buf is busy
15549 	 *    2) we have 1 or more outstanding commands on the target
15550 	 *    (the sense data will be cleared or invalidated any way)
15551 	 *
15552 	 * Note: There could be an issue with not checking a retry limit here,
15553 	 * the problem is determining which retry limit to check.
15554 	 */
15555 	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
15556 		/* Don't retry if the command is flagged as non-retryable */
15557 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15558 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
15559 			    NULL, NULL, 0, SD_BSY_TIMEOUT, kstat_waitq_enter);
15560 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15561 			    "sd_send_request_sense_command: "
15562 			    "at full throttle, retrying exit\n");
15563 		} else {
15564 			sd_return_failed_command(un, bp, EIO);
15565 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15566 			    "sd_send_request_sense_command: "
15567 			    "at full throttle, non-retryable exit\n");
15568 		}
15569 		return;
15570 	}
15571 
15572 	sd_mark_rqs_busy(un, bp);
15573 	sd_start_cmds(un, un->un_rqs_bp);
15574 
15575 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15576 	    "sd_send_request_sense_command: exit\n");
15577 }
15578 
15579 
15580 /*
15581  *    Function: sd_mark_rqs_busy
15582  *
15583  * Description: Indicate that the request sense bp for this instance is
15584  *		in use.
15585  *
15586  *     Context: May be called under interrupt context
15587  */
15588 
15589 static void
15590 sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
15591 {
15592 	struct sd_xbuf	*sense_xp;
15593 
15594 	ASSERT(un != NULL);
15595 	ASSERT(bp != NULL);
15596 	ASSERT(mutex_owned(SD_MUTEX(un)));
15597 	ASSERT(un->un_sense_isbusy == 0);
15598 
15599 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
15600 	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
15601 
15602 	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
15603 	ASSERT(sense_xp != NULL);
15604 
15605 	SD_INFO(SD_LOG_IO, un,
15606 	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
15607 
15608 	ASSERT(sense_xp->xb_pktp != NULL);
15609 	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
15610 	    == (FLAG_SENSING | FLAG_HEAD));
15611 
15612 	un->un_sense_isbusy = 1;
15613 	un->un_rqs_bp->b_resid = 0;
15614 	sense_xp->xb_pktp->pkt_resid  = 0;
15615 	sense_xp->xb_pktp->pkt_reason = 0;
15616 
15617 	/* So we can get back the bp at interrupt time! */
15618 	sense_xp->xb_sense_bp = bp;
15619 
15620 	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
15621 
15622 	/*
15623 	 * Mark this buf as awaiting sense data. (This is already set in
15624 	 * the pkt_flags for the RQS packet.)
15625 	 */
15626 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
15627 
15628 	sense_xp->xb_retry_count	= 0;
15629 	sense_xp->xb_victim_retry_count = 0;
15630 	sense_xp->xb_ua_retry_count	= 0;
15631 	sense_xp->xb_dma_resid  = 0;
15632 
15633 	/* Clean up the fields for auto-request sense */
15634 	sense_xp->xb_sense_status = 0;
15635 	sense_xp->xb_sense_state  = 0;
15636 	sense_xp->xb_sense_resid  = 0;
15637 	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
15638 
15639 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
15640 }
15641 
15642 
15643 /*
15644  *    Function: sd_mark_rqs_idle
15645  *
15646  * Description: SD_MUTEX must be held continuously through this routine
15647  *		to prevent reuse of the rqs struct before the caller can
15648  *		complete it's processing.
15649  *
15650  * Return Code: Pointer to the RQS buf
15651  *
15652  *     Context: May be called under interrupt context
15653  */
15654 
15655 static struct buf *
15656 sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
15657 {
15658 	struct buf *bp;
15659 	ASSERT(un != NULL);
15660 	ASSERT(sense_xp != NULL);
15661 	ASSERT(mutex_owned(SD_MUTEX(un)));
15662 	ASSERT(un->un_sense_isbusy != 0);
15663 
15664 	un->un_sense_isbusy = 0;
15665 	bp = sense_xp->xb_sense_bp;
15666 	sense_xp->xb_sense_bp = NULL;
15667 
15668 	/* This pkt is no longer interested in getting sense data */
15669 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
15670 
15671 	return (bp);
15672 }
15673 
15674 
15675 
15676 /*
15677  *    Function: sd_alloc_rqs
15678  *
15679  * Description: Set up the unit to receive auto request sense data
15680  *
15681  * Return Code: DDI_SUCCESS or DDI_FAILURE
15682  *
15683  *     Context: Called under attach(9E) context
15684  */
15685 
15686 static int
15687 sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
15688 {
15689 	struct sd_xbuf *xp;
15690 
15691 	ASSERT(un != NULL);
15692 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15693 	ASSERT(un->un_rqs_bp == NULL);
15694 	ASSERT(un->un_rqs_pktp == NULL);
15695 
15696 	/*
15697 	 * First allocate the required buf and scsi_pkt structs, then set up
15698 	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
15699 	 */
15700 	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
15701 	    SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
15702 	if (un->un_rqs_bp == NULL) {
15703 		return (DDI_FAILURE);
15704 	}
15705 
15706 	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
15707 	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
15708 
15709 	if (un->un_rqs_pktp == NULL) {
15710 		sd_free_rqs(un);
15711 		return (DDI_FAILURE);
15712 	}
15713 
15714 	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
15715 	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
15716 	    SCMD_REQUEST_SENSE, 0, SENSE_LENGTH, 0);
15717 
15718 	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
15719 
15720 	/* Set up the other needed members in the ARQ scsi_pkt. */
15721 	un->un_rqs_pktp->pkt_comp   = sdintr;
15722 	un->un_rqs_pktp->pkt_time   = sd_io_time;
15723 	un->un_rqs_pktp->pkt_flags |=
15724 	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
15725 
15726 	/*
15727 	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
15728 	 * provide any intpkt, destroypkt routines as we take care of
15729 	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
15730 	 */
15731 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
15732 	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
15733 	xp->xb_pktp = un->un_rqs_pktp;
15734 	SD_INFO(SD_LOG_ATTACH_DETACH, un,
15735 	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
15736 	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
15737 
15738 	/*
15739 	 * Save the pointer to the request sense private bp so it can
15740 	 * be retrieved in sdintr.
15741 	 */
15742 	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
15743 	ASSERT(un->un_rqs_bp->b_private == xp);
15744 
15745 	/*
15746 	 * See if the HBA supports auto-request sense for the specified
15747 	 * target/lun. If it does, then try to enable it (if not already
15748 	 * enabled).
15749 	 *
15750 	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
15751 	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
15752 	 * return success.  However, in both of these cases ARQ is always
15753 	 * enabled and scsi_ifgetcap will always return true. The best approach
15754 	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
15755 	 *
15756 	 * The 3rd case is the HBA (adp) always return enabled on
15757 	 * scsi_ifgetgetcap even when it's not enable, the best approach
15758 	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
15759 	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
15760 	 */
15761 
15762 	if (un->un_f_is_fibre == TRUE) {
15763 		un->un_f_arq_enabled = TRUE;
15764 	} else {
15765 #if defined(__i386) || defined(__amd64)
15766 		/*
15767 		 * Circumvent the Adaptec bug, remove this code when
15768 		 * the bug is fixed
15769 		 */
15770 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
15771 #endif
15772 		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
15773 		case 0:
15774 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15775 				"sd_alloc_rqs: HBA supports ARQ\n");
15776 			/*
15777 			 * ARQ is supported by this HBA but currently is not
15778 			 * enabled. Attempt to enable it and if successful then
15779 			 * mark this instance as ARQ enabled.
15780 			 */
15781 			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
15782 				== 1) {
15783 				/* Successfully enabled ARQ in the HBA */
15784 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
15785 					"sd_alloc_rqs: ARQ enabled\n");
15786 				un->un_f_arq_enabled = TRUE;
15787 			} else {
15788 				/* Could not enable ARQ in the HBA */
15789 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
15790 				"sd_alloc_rqs: failed ARQ enable\n");
15791 				un->un_f_arq_enabled = FALSE;
15792 			}
15793 			break;
15794 		case 1:
15795 			/*
15796 			 * ARQ is supported by this HBA and is already enabled.
15797 			 * Just mark ARQ as enabled for this instance.
15798 			 */
15799 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15800 				"sd_alloc_rqs: ARQ already enabled\n");
15801 			un->un_f_arq_enabled = TRUE;
15802 			break;
15803 		default:
15804 			/*
15805 			 * ARQ is not supported by this HBA; disable it for this
15806 			 * instance.
15807 			 */
15808 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15809 				"sd_alloc_rqs: HBA does not support ARQ\n");
15810 			un->un_f_arq_enabled = FALSE;
15811 			break;
15812 		}
15813 	}
15814 
15815 	return (DDI_SUCCESS);
15816 }
15817 
15818 
15819 /*
15820  *    Function: sd_free_rqs
15821  *
15822  * Description: Cleanup for the pre-instance RQS command.
15823  *
15824  *     Context: Kernel thread context
15825  */
15826 
15827 static void
15828 sd_free_rqs(struct sd_lun *un)
15829 {
15830 	ASSERT(un != NULL);
15831 
15832 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
15833 
15834 	/*
15835 	 * If consistent memory is bound to a scsi_pkt, the pkt
15836 	 * has to be destroyed *before* freeing the consistent memory.
15837 	 * Don't change the sequence of this operations.
15838 	 * scsi_destroy_pkt() might access memory, which isn't allowed,
15839 	 * after it was freed in scsi_free_consistent_buf().
15840 	 */
15841 	if (un->un_rqs_pktp != NULL) {
15842 		scsi_destroy_pkt(un->un_rqs_pktp);
15843 		un->un_rqs_pktp = NULL;
15844 	}
15845 
15846 	if (un->un_rqs_bp != NULL) {
15847 		kmem_free(SD_GET_XBUF(un->un_rqs_bp), sizeof (struct sd_xbuf));
15848 		scsi_free_consistent_buf(un->un_rqs_bp);
15849 		un->un_rqs_bp = NULL;
15850 	}
15851 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
15852 }
15853 
15854 
15855 
15856 /*
15857  *    Function: sd_reduce_throttle
15858  *
15859  * Description: Reduces the maximun # of outstanding commands on a
15860  *		target to the current number of outstanding commands.
15861  *		Queues a tiemout(9F) callback to restore the limit
15862  *		after a specified interval has elapsed.
15863  *		Typically used when we get a TRAN_BUSY return code
15864  *		back from scsi_transport().
15865  *
15866  *   Arguments: un - ptr to the sd_lun softstate struct
15867  *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
15868  *
15869  *     Context: May be called from interrupt context
15870  */
15871 
15872 static void
15873 sd_reduce_throttle(struct sd_lun *un, int throttle_type)
15874 {
15875 	ASSERT(un != NULL);
15876 	ASSERT(mutex_owned(SD_MUTEX(un)));
15877 	ASSERT(un->un_ncmds_in_transport >= 0);
15878 
15879 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
15880 	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
15881 	    un, un->un_throttle, un->un_ncmds_in_transport);
15882 
15883 	if (un->un_throttle > 1) {
15884 		if (un->un_f_use_adaptive_throttle == TRUE) {
15885 			switch (throttle_type) {
15886 			case SD_THROTTLE_TRAN_BUSY:
15887 				if (un->un_busy_throttle == 0) {
15888 					un->un_busy_throttle = un->un_throttle;
15889 				}
15890 				break;
15891 			case SD_THROTTLE_QFULL:
15892 				un->un_busy_throttle = 0;
15893 				break;
15894 			default:
15895 				ASSERT(FALSE);
15896 			}
15897 
15898 			if (un->un_ncmds_in_transport > 0) {
15899 			    un->un_throttle = un->un_ncmds_in_transport;
15900 			}
15901 
15902 		} else {
15903 			if (un->un_ncmds_in_transport == 0) {
15904 				un->un_throttle = 1;
15905 			} else {
15906 				un->un_throttle = un->un_ncmds_in_transport;
15907 			}
15908 		}
15909 	}
15910 
15911 	/* Reschedule the timeout if none is currently active */
15912 	if (un->un_reset_throttle_timeid == NULL) {
15913 		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
15914 		    un, SD_THROTTLE_RESET_INTERVAL);
15915 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15916 		    "sd_reduce_throttle: timeout scheduled!\n");
15917 	}
15918 
15919 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
15920 	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
15921 }
15922 
15923 
15924 
15925 /*
15926  *    Function: sd_restore_throttle
15927  *
15928  * Description: Callback function for timeout(9F).  Resets the current
15929  *		value of un->un_throttle to its default.
15930  *
15931  *   Arguments: arg - pointer to associated softstate for the device.
15932  *
15933  *     Context: May be called from interrupt context
15934  */
15935 
15936 static void
15937 sd_restore_throttle(void *arg)
15938 {
15939 	struct sd_lun	*un = arg;
15940 
15941 	ASSERT(un != NULL);
15942 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15943 
15944 	mutex_enter(SD_MUTEX(un));
15945 
15946 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
15947 	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
15948 
15949 	un->un_reset_throttle_timeid = NULL;
15950 
15951 	if (un->un_f_use_adaptive_throttle == TRUE) {
15952 		/*
15953 		 * If un_busy_throttle is nonzero, then it contains the
15954 		 * value that un_throttle was when we got a TRAN_BUSY back
15955 		 * from scsi_transport(). We want to revert back to this
15956 		 * value.
15957 		 *
15958 		 * In the QFULL case, the throttle limit will incrementally
15959 		 * increase until it reaches max throttle.
15960 		 */
15961 		if (un->un_busy_throttle > 0) {
15962 			un->un_throttle = un->un_busy_throttle;
15963 			un->un_busy_throttle = 0;
15964 		} else {
15965 			/*
15966 			 * increase throttle by 10% open gate slowly, schedule
15967 			 * another restore if saved throttle has not been
15968 			 * reached
15969 			 */
15970 			short throttle;
15971 			if (sd_qfull_throttle_enable) {
15972 				throttle = un->un_throttle +
15973 				    max((un->un_throttle / 10), 1);
15974 				un->un_throttle =
15975 				    (throttle < un->un_saved_throttle) ?
15976 				    throttle : un->un_saved_throttle;
15977 				if (un->un_throttle < un->un_saved_throttle) {
15978 				    un->un_reset_throttle_timeid =
15979 					timeout(sd_restore_throttle,
15980 					un, SD_QFULL_THROTTLE_RESET_INTERVAL);
15981 				}
15982 			}
15983 		}
15984 
15985 		/*
15986 		 * If un_throttle has fallen below the low-water mark, we
15987 		 * restore the maximum value here (and allow it to ratchet
15988 		 * down again if necessary).
15989 		 */
15990 		if (un->un_throttle < un->un_min_throttle) {
15991 			un->un_throttle = un->un_saved_throttle;
15992 		}
15993 	} else {
15994 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
15995 		    "restoring limit from 0x%x to 0x%x\n",
15996 		    un->un_throttle, un->un_saved_throttle);
15997 		un->un_throttle = un->un_saved_throttle;
15998 	}
15999 
16000 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16001 	    "sd_restore_throttle: calling sd_start_cmds!\n");
16002 
16003 	sd_start_cmds(un, NULL);
16004 
16005 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16006 	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
16007 	    un, un->un_throttle);
16008 
16009 	mutex_exit(SD_MUTEX(un));
16010 
16011 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
16012 }
16013 
16014 /*
16015  *    Function: sdrunout
16016  *
16017  * Description: Callback routine for scsi_init_pkt when a resource allocation
16018  *		fails.
16019  *
16020  *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
16021  *		soft state instance.
16022  *
16023  * Return Code: The scsi_init_pkt routine allows for the callback function to
16024  *		return a 0 indicating the callback should be rescheduled or a 1
16025  *		indicating not to reschedule. This routine always returns 1
16026  *		because the driver always provides a callback function to
16027  *		scsi_init_pkt. This results in a callback always being scheduled
16028  *		(via the scsi_init_pkt callback implementation) if a resource
16029  *		failure occurs.
16030  *
16031  *     Context: This callback function may not block or call routines that block
16032  *
16033  *        Note: Using the scsi_init_pkt callback facility can result in an I/O
16034  *		request persisting at the head of the list which cannot be
16035  *		satisfied even after multiple retries. In the future the driver
16036  *		may implement some time of maximum runout count before failing
16037  *		an I/O.
16038  */
16039 
16040 static int
16041 sdrunout(caddr_t arg)
16042 {
16043 	struct sd_lun	*un = (struct sd_lun *)arg;
16044 
16045 	ASSERT(un != NULL);
16046 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16047 
16048 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
16049 
16050 	mutex_enter(SD_MUTEX(un));
16051 	sd_start_cmds(un, NULL);
16052 	mutex_exit(SD_MUTEX(un));
16053 	/*
16054 	 * This callback routine always returns 1 (i.e. do not reschedule)
16055 	 * because we always specify sdrunout as the callback handler for
16056 	 * scsi_init_pkt inside the call to sd_start_cmds.
16057 	 */
16058 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
16059 	return (1);
16060 }
16061 
16062 
16063 /*
16064  *    Function: sdintr
16065  *
16066  * Description: Completion callback routine for scsi_pkt(9S) structs
16067  *		sent to the HBA driver via scsi_transport(9F).
16068  *
16069  *     Context: Interrupt context
16070  */
16071 
16072 static void
16073 sdintr(struct scsi_pkt *pktp)
16074 {
16075 	struct buf	*bp;
16076 	struct sd_xbuf	*xp;
16077 	struct sd_lun	*un;
16078 
16079 	ASSERT(pktp != NULL);
16080 	bp = (struct buf *)pktp->pkt_private;
16081 	ASSERT(bp != NULL);
16082 	xp = SD_GET_XBUF(bp);
16083 	ASSERT(xp != NULL);
16084 	ASSERT(xp->xb_pktp != NULL);
16085 	un = SD_GET_UN(bp);
16086 	ASSERT(un != NULL);
16087 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16088 
16089 #ifdef SD_FAULT_INJECTION
16090 
16091 	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
16092 	/* SD FaultInjection */
16093 	sd_faultinjection(pktp);
16094 
16095 #endif /* SD_FAULT_INJECTION */
16096 
16097 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
16098 	    " xp:0x%p, un:0x%p\n", bp, xp, un);
16099 
16100 	mutex_enter(SD_MUTEX(un));
16101 
16102 	/* Reduce the count of the #commands currently in transport */
16103 	un->un_ncmds_in_transport--;
16104 	ASSERT(un->un_ncmds_in_transport >= 0);
16105 
16106 	/* Increment counter to indicate that the callback routine is active */
16107 	un->un_in_callback++;
16108 
16109 	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
16110 
16111 #ifdef	SDDEBUG
16112 	if (bp == un->un_retry_bp) {
16113 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
16114 		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
16115 		    un, un->un_retry_bp, un->un_ncmds_in_transport);
16116 	}
16117 #endif
16118 
16119 	/*
16120 	 * If pkt_reason is CMD_DEV_GONE, just fail the command
16121 	 */
16122 	if (pktp->pkt_reason == CMD_DEV_GONE) {
16123 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16124 			    "Device is gone\n");
16125 		sd_return_failed_command(un, bp, EIO);
16126 		goto exit;
16127 	}
16128 
16129 	/*
16130 	 * First see if the pkt has auto-request sense data with it....
16131 	 * Look at the packet state first so we don't take a performance
16132 	 * hit looking at the arq enabled flag unless absolutely necessary.
16133 	 */
16134 	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
16135 	    (un->un_f_arq_enabled == TRUE)) {
16136 		/*
16137 		 * The HBA did an auto request sense for this command so check
16138 		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16139 		 * driver command that should not be retried.
16140 		 */
16141 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16142 			/*
16143 			 * Save the relevant sense info into the xp for the
16144 			 * original cmd.
16145 			 */
16146 			struct scsi_arq_status *asp;
16147 			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16148 			xp->xb_sense_status =
16149 			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
16150 			xp->xb_sense_state  = asp->sts_rqpkt_state;
16151 			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16152 			bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16153 			    min(sizeof (struct scsi_extended_sense),
16154 			    SENSE_LENGTH));
16155 
16156 			/* fail the command */
16157 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16158 			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
16159 			sd_return_failed_command(un, bp, EIO);
16160 			goto exit;
16161 		}
16162 
16163 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16164 		/*
16165 		 * We want to either retry or fail this command, so free
16166 		 * the DMA resources here.  If we retry the command then
16167 		 * the DMA resources will be reallocated in sd_start_cmds().
16168 		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
16169 		 * causes the *entire* transfer to start over again from the
16170 		 * beginning of the request, even for PARTIAL chunks that
16171 		 * have already transferred successfully.
16172 		 */
16173 		if ((un->un_f_is_fibre == TRUE) &&
16174 		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16175 		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16176 			scsi_dmafree(pktp);
16177 			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16178 		}
16179 #endif
16180 
16181 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16182 		    "sdintr: arq done, sd_handle_auto_request_sense\n");
16183 
16184 		sd_handle_auto_request_sense(un, bp, xp, pktp);
16185 		goto exit;
16186 	}
16187 
16188 	/* Next see if this is the REQUEST SENSE pkt for the instance */
16189 	if (pktp->pkt_flags & FLAG_SENSING)  {
16190 		/* This pktp is from the unit's REQUEST_SENSE command */
16191 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16192 		    "sdintr: sd_handle_request_sense\n");
16193 		sd_handle_request_sense(un, bp, xp, pktp);
16194 		goto exit;
16195 	}
16196 
16197 	/*
16198 	 * Check to see if the command successfully completed as requested;
16199 	 * this is the most common case (and also the hot performance path).
16200 	 *
16201 	 * Requirements for successful completion are:
16202 	 * pkt_reason is CMD_CMPLT and packet status is status good.
16203 	 * In addition:
16204 	 * - A residual of zero indicates successful completion no matter what
16205 	 *   the command is.
16206 	 * - If the residual is not zero and the command is not a read or
16207 	 *   write, then it's still defined as successful completion. In other
16208 	 *   words, if the command is a read or write the residual must be
16209 	 *   zero for successful completion.
16210 	 * - If the residual is not zero and the command is a read or
16211 	 *   write, and it's a USCSICMD, then it's still defined as
16212 	 *   successful completion.
16213 	 */
16214 	if ((pktp->pkt_reason == CMD_CMPLT) &&
16215 	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
16216 
16217 		/*
16218 		 * Since this command is returned with a good status, we
16219 		 * can reset the count for Sonoma failover.
16220 		 */
16221 		un->un_sonoma_failure_count = 0;
16222 
16223 		/*
16224 		 * Return all USCSI commands on good status
16225 		 */
16226 		if (pktp->pkt_resid == 0) {
16227 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16228 			    "sdintr: returning command for resid == 0\n");
16229 		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
16230 		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
16231 			SD_UPDATE_B_RESID(bp, pktp);
16232 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16233 			    "sdintr: returning command for resid != 0\n");
16234 		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16235 			SD_UPDATE_B_RESID(bp, pktp);
16236 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16237 				"sdintr: returning uscsi command\n");
16238 		} else {
16239 			goto not_successful;
16240 		}
16241 		sd_return_command(un, bp);
16242 
16243 		/*
16244 		 * Decrement counter to indicate that the callback routine
16245 		 * is done.
16246 		 */
16247 		un->un_in_callback--;
16248 		ASSERT(un->un_in_callback >= 0);
16249 		mutex_exit(SD_MUTEX(un));
16250 
16251 		return;
16252 	}
16253 
16254 not_successful:
16255 
16256 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16257 	/*
16258 	 * The following is based upon knowledge of the underlying transport
16259 	 * and its use of DMA resources.  This code should be removed when
16260 	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
16261 	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
16262 	 * and sd_start_cmds().
16263 	 *
16264 	 * Free any DMA resources associated with this command if there
16265 	 * is a chance it could be retried or enqueued for later retry.
16266 	 * If we keep the DMA binding then mpxio cannot reissue the
16267 	 * command on another path whenever a path failure occurs.
16268 	 *
16269 	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
16270 	 * causes the *entire* transfer to start over again from the
16271 	 * beginning of the request, even for PARTIAL chunks that
16272 	 * have already transferred successfully.
16273 	 *
16274 	 * This is only done for non-uscsi commands (and also skipped for the
16275 	 * driver's internal RQS command). Also just do this for Fibre Channel
16276 	 * devices as these are the only ones that support mpxio.
16277 	 */
16278 	if ((un->un_f_is_fibre == TRUE) &&
16279 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16280 	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16281 		scsi_dmafree(pktp);
16282 		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16283 	}
16284 #endif
16285 
16286 	/*
16287 	 * The command did not successfully complete as requested so check
16288 	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16289 	 * driver command that should not be retried so just return. If
16290 	 * FLAG_DIAGNOSE is not set the error will be processed below.
16291 	 */
16292 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16293 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16294 		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
16295 		/*
16296 		 * Issue a request sense if a check condition caused the error
16297 		 * (we handle the auto request sense case above), otherwise
16298 		 * just fail the command.
16299 		 */
16300 		if ((pktp->pkt_reason == CMD_CMPLT) &&
16301 		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
16302 			sd_send_request_sense_command(un, bp, pktp);
16303 		} else {
16304 			sd_return_failed_command(un, bp, EIO);
16305 		}
16306 		goto exit;
16307 	}
16308 
16309 	/*
16310 	 * The command did not successfully complete as requested so process
16311 	 * the error, retry, and/or attempt recovery.
16312 	 */
16313 	switch (pktp->pkt_reason) {
16314 	case CMD_CMPLT:
16315 		switch (SD_GET_PKT_STATUS(pktp)) {
16316 		case STATUS_GOOD:
16317 			/*
16318 			 * The command completed successfully with a non-zero
16319 			 * residual
16320 			 */
16321 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16322 			    "sdintr: STATUS_GOOD \n");
16323 			sd_pkt_status_good(un, bp, xp, pktp);
16324 			break;
16325 
16326 		case STATUS_CHECK:
16327 		case STATUS_TERMINATED:
16328 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16329 			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
16330 			sd_pkt_status_check_condition(un, bp, xp, pktp);
16331 			break;
16332 
16333 		case STATUS_BUSY:
16334 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16335 			    "sdintr: STATUS_BUSY\n");
16336 			sd_pkt_status_busy(un, bp, xp, pktp);
16337 			break;
16338 
16339 		case STATUS_RESERVATION_CONFLICT:
16340 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16341 			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
16342 			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16343 			break;
16344 
16345 		case STATUS_QFULL:
16346 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16347 			    "sdintr: STATUS_QFULL\n");
16348 			sd_pkt_status_qfull(un, bp, xp, pktp);
16349 			break;
16350 
16351 		case STATUS_MET:
16352 		case STATUS_INTERMEDIATE:
16353 		case STATUS_SCSI2:
16354 		case STATUS_INTERMEDIATE_MET:
16355 		case STATUS_ACA_ACTIVE:
16356 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16357 			    "Unexpected SCSI status received: 0x%x\n",
16358 			    SD_GET_PKT_STATUS(pktp));
16359 			sd_return_failed_command(un, bp, EIO);
16360 			break;
16361 
16362 		default:
16363 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16364 			    "Invalid SCSI status received: 0x%x\n",
16365 			    SD_GET_PKT_STATUS(pktp));
16366 			sd_return_failed_command(un, bp, EIO);
16367 			break;
16368 
16369 		}
16370 		break;
16371 
16372 	case CMD_INCOMPLETE:
16373 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16374 		    "sdintr:  CMD_INCOMPLETE\n");
16375 		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
16376 		break;
16377 	case CMD_TRAN_ERR:
16378 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16379 		    "sdintr: CMD_TRAN_ERR\n");
16380 		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
16381 		break;
16382 	case CMD_RESET:
16383 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16384 		    "sdintr: CMD_RESET \n");
16385 		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
16386 		break;
16387 	case CMD_ABORTED:
16388 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16389 		    "sdintr: CMD_ABORTED \n");
16390 		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
16391 		break;
16392 	case CMD_TIMEOUT:
16393 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16394 		    "sdintr: CMD_TIMEOUT\n");
16395 		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
16396 		break;
16397 	case CMD_UNX_BUS_FREE:
16398 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16399 		    "sdintr: CMD_UNX_BUS_FREE \n");
16400 		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
16401 		break;
16402 	case CMD_TAG_REJECT:
16403 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16404 		    "sdintr: CMD_TAG_REJECT\n");
16405 		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
16406 		break;
16407 	default:
16408 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16409 		    "sdintr: default\n");
16410 		sd_pkt_reason_default(un, bp, xp, pktp);
16411 		break;
16412 	}
16413 
16414 exit:
16415 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
16416 
16417 	/* Decrement counter to indicate that the callback routine is done. */
16418 	un->un_in_callback--;
16419 	ASSERT(un->un_in_callback >= 0);
16420 
16421 	/*
16422 	 * At this point, the pkt has been dispatched, ie, it is either
16423 	 * being re-tried or has been returned to its caller and should
16424 	 * not be referenced.
16425 	 */
16426 
16427 	mutex_exit(SD_MUTEX(un));
16428 }
16429 
16430 
16431 /*
16432  *    Function: sd_print_incomplete_msg
16433  *
16434  * Description: Prints the error message for a CMD_INCOMPLETE error.
16435  *
16436  *   Arguments: un - ptr to associated softstate for the device.
16437  *		bp - ptr to the buf(9S) for the command.
16438  *		arg - message string ptr
16439  *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
16440  *			or SD_NO_RETRY_ISSUED.
16441  *
16442  *     Context: May be called under interrupt context
16443  */
16444 
16445 static void
16446 sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
16447 {
16448 	struct scsi_pkt	*pktp;
16449 	char	*msgp;
16450 	char	*cmdp = arg;
16451 
16452 	ASSERT(un != NULL);
16453 	ASSERT(mutex_owned(SD_MUTEX(un)));
16454 	ASSERT(bp != NULL);
16455 	ASSERT(arg != NULL);
16456 	pktp = SD_GET_PKTP(bp);
16457 	ASSERT(pktp != NULL);
16458 
16459 	switch (code) {
16460 	case SD_DELAYED_RETRY_ISSUED:
16461 	case SD_IMMEDIATE_RETRY_ISSUED:
16462 		msgp = "retrying";
16463 		break;
16464 	case SD_NO_RETRY_ISSUED:
16465 	default:
16466 		msgp = "giving up";
16467 		break;
16468 	}
16469 
16470 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16471 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16472 		    "incomplete %s- %s\n", cmdp, msgp);
16473 	}
16474 }
16475 
16476 
16477 
16478 /*
16479  *    Function: sd_pkt_status_good
16480  *
16481  * Description: Processing for a STATUS_GOOD code in pkt_status.
16482  *
16483  *     Context: May be called under interrupt context
16484  */
16485 
16486 static void
16487 sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
16488 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16489 {
16490 	char	*cmdp;
16491 
16492 	ASSERT(un != NULL);
16493 	ASSERT(mutex_owned(SD_MUTEX(un)));
16494 	ASSERT(bp != NULL);
16495 	ASSERT(xp != NULL);
16496 	ASSERT(pktp != NULL);
16497 	ASSERT(pktp->pkt_reason == CMD_CMPLT);
16498 	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
16499 	ASSERT(pktp->pkt_resid != 0);
16500 
16501 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
16502 
16503 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16504 	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
16505 	case SCMD_READ:
16506 		cmdp = "read";
16507 		break;
16508 	case SCMD_WRITE:
16509 		cmdp = "write";
16510 		break;
16511 	default:
16512 		SD_UPDATE_B_RESID(bp, pktp);
16513 		sd_return_command(un, bp);
16514 		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16515 		return;
16516 	}
16517 
16518 	/*
16519 	 * See if we can retry the read/write, preferrably immediately.
16520 	 * If retries are exhaused, then sd_retry_command() will update
16521 	 * the b_resid count.
16522 	 */
16523 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
16524 	    cmdp, EIO, (clock_t)0, NULL);
16525 
16526 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16527 }
16528 
16529 
16530 
16531 
16532 
16533 /*
16534  *    Function: sd_handle_request_sense
16535  *
16536  * Description: Processing for non-auto Request Sense command.
16537  *
16538  *   Arguments: un - ptr to associated softstate
16539  *		sense_bp - ptr to buf(9S) for the RQS command
16540  *		sense_xp - ptr to the sd_xbuf for the RQS command
16541  *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
16542  *
16543  *     Context: May be called under interrupt context
16544  */
16545 
16546 static void
16547 sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
16548 	struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
16549 {
16550 	struct buf	*cmd_bp;	/* buf for the original command */
16551 	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
16552 	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
16553 
16554 	ASSERT(un != NULL);
16555 	ASSERT(mutex_owned(SD_MUTEX(un)));
16556 	ASSERT(sense_bp != NULL);
16557 	ASSERT(sense_xp != NULL);
16558 	ASSERT(sense_pktp != NULL);
16559 
16560 	/*
16561 	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
16562 	 * RQS command and not the original command.
16563 	 */
16564 	ASSERT(sense_pktp == un->un_rqs_pktp);
16565 	ASSERT(sense_bp   == un->un_rqs_bp);
16566 	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
16567 	    (FLAG_SENSING | FLAG_HEAD));
16568 	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
16569 	    FLAG_SENSING) == FLAG_SENSING);
16570 
16571 	/* These are the bp, xp, and pktp for the original command */
16572 	cmd_bp = sense_xp->xb_sense_bp;
16573 	cmd_xp = SD_GET_XBUF(cmd_bp);
16574 	cmd_pktp = SD_GET_PKTP(cmd_bp);
16575 
16576 	if (sense_pktp->pkt_reason != CMD_CMPLT) {
16577 		/*
16578 		 * The REQUEST SENSE command failed.  Release the REQUEST
16579 		 * SENSE command for re-use, get back the bp for the original
16580 		 * command, and attempt to re-try the original command if
16581 		 * FLAG_DIAGNOSE is not set in the original packet.
16582 		 */
16583 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
16584 		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16585 			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
16586 			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
16587 			    NULL, NULL, EIO, (clock_t)0, NULL);
16588 			return;
16589 		}
16590 	}
16591 
16592 	/*
16593 	 * Save the relevant sense info into the xp for the original cmd.
16594 	 *
16595 	 * Note: if the request sense failed the state info will be zero
16596 	 * as set in sd_mark_rqs_busy()
16597 	 */
16598 	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
16599 	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
16600 	cmd_xp->xb_sense_resid  = sense_pktp->pkt_resid;
16601 	bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data, SENSE_LENGTH);
16602 
16603 	/*
16604 	 *  Free up the RQS command....
16605 	 *  NOTE:
16606 	 *	Must do this BEFORE calling sd_validate_sense_data!
16607 	 *	sd_validate_sense_data may return the original command in
16608 	 *	which case the pkt will be freed and the flags can no
16609 	 *	longer be touched.
16610 	 *	SD_MUTEX is held through this process until the command
16611 	 *	is dispatched based upon the sense data, so there are
16612 	 *	no race conditions.
16613 	 */
16614 	(void) sd_mark_rqs_idle(un, sense_xp);
16615 
16616 	/*
16617 	 * For a retryable command see if we have valid sense data, if so then
16618 	 * turn it over to sd_decode_sense() to figure out the right course of
16619 	 * action. Just fail a non-retryable command.
16620 	 */
16621 	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16622 		if (sd_validate_sense_data(un, cmd_bp, cmd_xp) ==
16623 		    SD_SENSE_DATA_IS_VALID) {
16624 			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
16625 		}
16626 	} else {
16627 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
16628 		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
16629 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
16630 		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
16631 		sd_return_failed_command(un, cmd_bp, EIO);
16632 	}
16633 }
16634 
16635 
16636 
16637 
16638 /*
16639  *    Function: sd_handle_auto_request_sense
16640  *
16641  * Description: Processing for auto-request sense information.
16642  *
16643  *   Arguments: un - ptr to associated softstate
16644  *		bp - ptr to buf(9S) for the command
16645  *		xp - ptr to the sd_xbuf for the command
16646  *		pktp - ptr to the scsi_pkt(9S) for the command
16647  *
16648  *     Context: May be called under interrupt context
16649  */
16650 
16651 static void
16652 sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
16653 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16654 {
16655 	struct scsi_arq_status *asp;
16656 
16657 	ASSERT(un != NULL);
16658 	ASSERT(mutex_owned(SD_MUTEX(un)));
16659 	ASSERT(bp != NULL);
16660 	ASSERT(xp != NULL);
16661 	ASSERT(pktp != NULL);
16662 	ASSERT(pktp != un->un_rqs_pktp);
16663 	ASSERT(bp   != un->un_rqs_bp);
16664 
16665 	/*
16666 	 * For auto-request sense, we get a scsi_arq_status back from
16667 	 * the HBA, with the sense data in the sts_sensedata member.
16668 	 * The pkt_scbp of the packet points to this scsi_arq_status.
16669 	 */
16670 	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16671 
16672 	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
16673 		/*
16674 		 * The auto REQUEST SENSE failed; see if we can re-try
16675 		 * the original command.
16676 		 */
16677 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16678 		    "auto request sense failed (reason=%s)\n",
16679 		    scsi_rname(asp->sts_rqpkt_reason));
16680 
16681 		sd_reset_target(un, pktp);
16682 
16683 		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16684 		    NULL, NULL, EIO, (clock_t)0, NULL);
16685 		return;
16686 	}
16687 
16688 	/* Save the relevant sense info into the xp for the original cmd. */
16689 	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
16690 	xp->xb_sense_state  = asp->sts_rqpkt_state;
16691 	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16692 	bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16693 	    min(sizeof (struct scsi_extended_sense), SENSE_LENGTH));
16694 
16695 	/*
16696 	 * See if we have valid sense data, if so then turn it over to
16697 	 * sd_decode_sense() to figure out the right course of action.
16698 	 */
16699 	if (sd_validate_sense_data(un, bp, xp) == SD_SENSE_DATA_IS_VALID) {
16700 		sd_decode_sense(un, bp, xp, pktp);
16701 	}
16702 }
16703 
16704 
16705 /*
16706  *    Function: sd_print_sense_failed_msg
16707  *
16708  * Description: Print log message when RQS has failed.
16709  *
16710  *   Arguments: un - ptr to associated softstate
16711  *		bp - ptr to buf(9S) for the command
16712  *		arg - generic message string ptr
16713  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16714  *			or SD_NO_RETRY_ISSUED
16715  *
16716  *     Context: May be called from interrupt context
16717  */
16718 
16719 static void
16720 sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
16721 	int code)
16722 {
16723 	char	*msgp = arg;
16724 
16725 	ASSERT(un != NULL);
16726 	ASSERT(mutex_owned(SD_MUTEX(un)));
16727 	ASSERT(bp != NULL);
16728 
16729 	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
16730 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
16731 	}
16732 }
16733 
16734 
16735 /*
16736  *    Function: sd_validate_sense_data
16737  *
16738  * Description: Check the given sense data for validity.
16739  *		If the sense data is not valid, the command will
16740  *		be either failed or retried!
16741  *
16742  * Return Code: SD_SENSE_DATA_IS_INVALID
16743  *		SD_SENSE_DATA_IS_VALID
16744  *
16745  *     Context: May be called from interrupt context
16746  */
16747 
16748 static int
16749 sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp)
16750 {
16751 	struct scsi_extended_sense *esp;
16752 	struct	scsi_pkt *pktp;
16753 	size_t	actual_len;
16754 	char	*msgp = NULL;
16755 
16756 	ASSERT(un != NULL);
16757 	ASSERT(mutex_owned(SD_MUTEX(un)));
16758 	ASSERT(bp != NULL);
16759 	ASSERT(bp != un->un_rqs_bp);
16760 	ASSERT(xp != NULL);
16761 
16762 	pktp = SD_GET_PKTP(bp);
16763 	ASSERT(pktp != NULL);
16764 
16765 	/*
16766 	 * Check the status of the RQS command (auto or manual).
16767 	 */
16768 	switch (xp->xb_sense_status & STATUS_MASK) {
16769 	case STATUS_GOOD:
16770 		break;
16771 
16772 	case STATUS_RESERVATION_CONFLICT:
16773 		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16774 		return (SD_SENSE_DATA_IS_INVALID);
16775 
16776 	case STATUS_BUSY:
16777 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16778 		    "Busy Status on REQUEST SENSE\n");
16779 		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
16780 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
16781 		return (SD_SENSE_DATA_IS_INVALID);
16782 
16783 	case STATUS_QFULL:
16784 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16785 		    "QFULL Status on REQUEST SENSE\n");
16786 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
16787 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
16788 		return (SD_SENSE_DATA_IS_INVALID);
16789 
16790 	case STATUS_CHECK:
16791 	case STATUS_TERMINATED:
16792 		msgp = "Check Condition on REQUEST SENSE\n";
16793 		goto sense_failed;
16794 
16795 	default:
16796 		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
16797 		goto sense_failed;
16798 	}
16799 
16800 	/*
16801 	 * See if we got the minimum required amount of sense data.
16802 	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
16803 	 * or less.
16804 	 */
16805 	actual_len = (int)(SENSE_LENGTH - xp->xb_sense_resid);
16806 	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
16807 	    (actual_len == 0)) {
16808 		msgp = "Request Sense couldn't get sense data\n";
16809 		goto sense_failed;
16810 	}
16811 
16812 	if (actual_len < SUN_MIN_SENSE_LENGTH) {
16813 		msgp = "Not enough sense information\n";
16814 		goto sense_failed;
16815 	}
16816 
16817 	/*
16818 	 * We require the extended sense data
16819 	 */
16820 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
16821 	if (esp->es_class != CLASS_EXTENDED_SENSE) {
16822 		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16823 			static char tmp[8];
16824 			static char buf[148];
16825 			char *p = (char *)(xp->xb_sense_data);
16826 			int i;
16827 
16828 			mutex_enter(&sd_sense_mutex);
16829 			(void) strcpy(buf, "undecodable sense information:");
16830 			for (i = 0; i < actual_len; i++) {
16831 				(void) sprintf(tmp, " 0x%x", *(p++)&0xff);
16832 				(void) strcpy(&buf[strlen(buf)], tmp);
16833 			}
16834 			i = strlen(buf);
16835 			(void) strcpy(&buf[i], "-(assumed fatal)\n");
16836 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, buf);
16837 			mutex_exit(&sd_sense_mutex);
16838 		}
16839 		/* Note: Legacy behavior, fail the command with no retry */
16840 		sd_return_failed_command(un, bp, EIO);
16841 		return (SD_SENSE_DATA_IS_INVALID);
16842 	}
16843 
16844 	/*
16845 	 * Check that es_code is valid (es_class concatenated with es_code
16846 	 * make up the "response code" field.  es_class will always be 7, so
16847 	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
16848 	 * format.
16849 	 */
16850 	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
16851 	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
16852 	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
16853 	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
16854 	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
16855 		goto sense_failed;
16856 	}
16857 
16858 	return (SD_SENSE_DATA_IS_VALID);
16859 
16860 sense_failed:
16861 	/*
16862 	 * If the request sense failed (for whatever reason), attempt
16863 	 * to retry the original command.
16864 	 */
16865 #if defined(__i386) || defined(__amd64)
16866 	/*
16867 	 * SD_RETRY_DELAY is conditionally compile (#if fibre) in
16868 	 * sddef.h for Sparc platform, and x86 uses 1 binary
16869 	 * for both SCSI/FC.
16870 	 * The SD_RETRY_DELAY value need to be adjusted here
16871 	 * when SD_RETRY_DELAY change in sddef.h
16872 	 */
16873 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16874 	    sd_print_sense_failed_msg, msgp, EIO,
16875 		un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0, NULL);
16876 #else
16877 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16878 	    sd_print_sense_failed_msg, msgp, EIO, SD_RETRY_DELAY, NULL);
16879 #endif
16880 
16881 	return (SD_SENSE_DATA_IS_INVALID);
16882 }
16883 
16884 
16885 
16886 /*
16887  *    Function: sd_decode_sense
16888  *
16889  * Description: Take recovery action(s) when SCSI Sense Data is received.
16890  *
16891  *     Context: Interrupt context.
16892  */
16893 
16894 static void
16895 sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
16896 	struct scsi_pkt *pktp)
16897 {
16898 	struct scsi_extended_sense *esp;
16899 	struct scsi_descr_sense_hdr *sdsp;
16900 	uint8_t asc, ascq, sense_key;
16901 
16902 	ASSERT(un != NULL);
16903 	ASSERT(mutex_owned(SD_MUTEX(un)));
16904 	ASSERT(bp != NULL);
16905 	ASSERT(bp != un->un_rqs_bp);
16906 	ASSERT(xp != NULL);
16907 	ASSERT(pktp != NULL);
16908 
16909 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
16910 
16911 	switch (esp->es_code) {
16912 	case CODE_FMT_DESCR_CURRENT:
16913 	case CODE_FMT_DESCR_DEFERRED:
16914 		sdsp = (struct scsi_descr_sense_hdr *)xp->xb_sense_data;
16915 		sense_key = sdsp->ds_key;
16916 		asc = sdsp->ds_add_code;
16917 		ascq = sdsp->ds_qual_code;
16918 		break;
16919 	case CODE_FMT_VENDOR_SPECIFIC:
16920 	case CODE_FMT_FIXED_CURRENT:
16921 	case CODE_FMT_FIXED_DEFERRED:
16922 	default:
16923 		sense_key = esp->es_key;
16924 		asc = esp->es_add_code;
16925 		ascq = esp->es_qual_code;
16926 		break;
16927 	}
16928 
16929 	switch (sense_key) {
16930 	case KEY_NO_SENSE:
16931 		sd_sense_key_no_sense(un, bp, xp, pktp);
16932 		break;
16933 	case KEY_RECOVERABLE_ERROR:
16934 		sd_sense_key_recoverable_error(un, asc, bp, xp, pktp);
16935 		break;
16936 	case KEY_NOT_READY:
16937 		sd_sense_key_not_ready(un, asc, ascq, bp, xp, pktp);
16938 		break;
16939 	case KEY_MEDIUM_ERROR:
16940 	case KEY_HARDWARE_ERROR:
16941 		sd_sense_key_medium_or_hardware_error(un,
16942 		    sense_key, asc, bp, xp, pktp);
16943 		break;
16944 	case KEY_ILLEGAL_REQUEST:
16945 		sd_sense_key_illegal_request(un, bp, xp, pktp);
16946 		break;
16947 	case KEY_UNIT_ATTENTION:
16948 		sd_sense_key_unit_attention(un, asc, bp, xp, pktp);
16949 		break;
16950 	case KEY_WRITE_PROTECT:
16951 	case KEY_VOLUME_OVERFLOW:
16952 	case KEY_MISCOMPARE:
16953 		sd_sense_key_fail_command(un, bp, xp, pktp);
16954 		break;
16955 	case KEY_BLANK_CHECK:
16956 		sd_sense_key_blank_check(un, bp, xp, pktp);
16957 		break;
16958 	case KEY_ABORTED_COMMAND:
16959 		sd_sense_key_aborted_command(un, bp, xp, pktp);
16960 		break;
16961 	case KEY_VENDOR_UNIQUE:
16962 	case KEY_COPY_ABORTED:
16963 	case KEY_EQUAL:
16964 	case KEY_RESERVED:
16965 	default:
16966 		sd_sense_key_default(un, sense_key, bp, xp, pktp);
16967 		break;
16968 	}
16969 }
16970 
16971 
16972 /*
16973  *    Function: sd_dump_memory
16974  *
16975  * Description: Debug logging routine to print the contents of a user provided
16976  *		buffer. The output of the buffer is broken up into 256 byte
16977  *		segments due to a size constraint of the scsi_log.
16978  *		implementation.
16979  *
16980  *   Arguments: un - ptr to softstate
16981  *		comp - component mask
16982  *		title - "title" string to preceed data when printed
16983  *		data - ptr to data block to be printed
16984  *		len - size of data block to be printed
16985  *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
16986  *
16987  *     Context: May be called from interrupt context
16988  */
16989 
16990 #define	SD_DUMP_MEMORY_BUF_SIZE	256
16991 
16992 static char *sd_dump_format_string[] = {
16993 		" 0x%02x",
16994 		" %c"
16995 };
16996 
16997 static void
16998 sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
16999     int len, int fmt)
17000 {
17001 	int	i, j;
17002 	int	avail_count;
17003 	int	start_offset;
17004 	int	end_offset;
17005 	size_t	entry_len;
17006 	char	*bufp;
17007 	char	*local_buf;
17008 	char	*format_string;
17009 
17010 	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
17011 
17012 	/*
17013 	 * In the debug version of the driver, this function is called from a
17014 	 * number of places which are NOPs in the release driver.
17015 	 * The debug driver therefore has additional methods of filtering
17016 	 * debug output.
17017 	 */
17018 #ifdef SDDEBUG
17019 	/*
17020 	 * In the debug version of the driver we can reduce the amount of debug
17021 	 * messages by setting sd_error_level to something other than
17022 	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
17023 	 * sd_component_mask.
17024 	 */
17025 	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
17026 	    (sd_error_level != SCSI_ERR_ALL)) {
17027 		return;
17028 	}
17029 	if (((sd_component_mask & comp) == 0) ||
17030 	    (sd_error_level != SCSI_ERR_ALL)) {
17031 		return;
17032 	}
17033 #else
17034 	if (sd_error_level != SCSI_ERR_ALL) {
17035 		return;
17036 	}
17037 #endif
17038 
17039 	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
17040 	bufp = local_buf;
17041 	/*
17042 	 * Available length is the length of local_buf[], minus the
17043 	 * length of the title string, minus one for the ":", minus
17044 	 * one for the newline, minus one for the NULL terminator.
17045 	 * This gives the #bytes available for holding the printed
17046 	 * values from the given data buffer.
17047 	 */
17048 	if (fmt == SD_LOG_HEX) {
17049 		format_string = sd_dump_format_string[0];
17050 	} else /* SD_LOG_CHAR */ {
17051 		format_string = sd_dump_format_string[1];
17052 	}
17053 	/*
17054 	 * Available count is the number of elements from the given
17055 	 * data buffer that we can fit into the available length.
17056 	 * This is based upon the size of the format string used.
17057 	 * Make one entry and find it's size.
17058 	 */
17059 	(void) sprintf(bufp, format_string, data[0]);
17060 	entry_len = strlen(bufp);
17061 	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
17062 
17063 	j = 0;
17064 	while (j < len) {
17065 		bufp = local_buf;
17066 		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
17067 		start_offset = j;
17068 
17069 		end_offset = start_offset + avail_count;
17070 
17071 		(void) sprintf(bufp, "%s:", title);
17072 		bufp += strlen(bufp);
17073 		for (i = start_offset; ((i < end_offset) && (j < len));
17074 		    i++, j++) {
17075 			(void) sprintf(bufp, format_string, data[i]);
17076 			bufp += entry_len;
17077 		}
17078 		(void) sprintf(bufp, "\n");
17079 
17080 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
17081 	}
17082 	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
17083 }
17084 
17085 /*
17086  *    Function: sd_print_sense_msg
17087  *
17088  * Description: Log a message based upon the given sense data.
17089  *
17090  *   Arguments: un - ptr to associated softstate
17091  *		bp - ptr to buf(9S) for the command
17092  *		arg - ptr to associate sd_sense_info struct
17093  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17094  *			or SD_NO_RETRY_ISSUED
17095  *
17096  *     Context: May be called from interrupt context
17097  */
17098 
17099 static void
17100 sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
17101 {
17102 	struct sd_xbuf	*xp;
17103 	struct scsi_pkt	*pktp;
17104 	struct scsi_extended_sense *sensep;
17105 	daddr_t request_blkno;
17106 	diskaddr_t err_blkno;
17107 	int severity;
17108 	int pfa_flag;
17109 	int fixed_format = TRUE;
17110 	extern struct scsi_key_strings scsi_cmds[];
17111 
17112 	ASSERT(un != NULL);
17113 	ASSERT(mutex_owned(SD_MUTEX(un)));
17114 	ASSERT(bp != NULL);
17115 	xp = SD_GET_XBUF(bp);
17116 	ASSERT(xp != NULL);
17117 	pktp = SD_GET_PKTP(bp);
17118 	ASSERT(pktp != NULL);
17119 	ASSERT(arg != NULL);
17120 
17121 	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
17122 	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
17123 
17124 	if ((code == SD_DELAYED_RETRY_ISSUED) ||
17125 	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
17126 		severity = SCSI_ERR_RETRYABLE;
17127 	}
17128 
17129 	/* Use absolute block number for the request block number */
17130 	request_blkno = xp->xb_blkno;
17131 
17132 	/*
17133 	 * Now try to get the error block number from the sense data
17134 	 */
17135 	sensep = (struct scsi_extended_sense *)xp->xb_sense_data;
17136 	switch (sensep->es_code) {
17137 	case CODE_FMT_DESCR_CURRENT:
17138 	case CODE_FMT_DESCR_DEFERRED:
17139 		err_blkno =
17140 		    sd_extract_sense_info_descr(
17141 			(struct scsi_descr_sense_hdr *)sensep);
17142 		fixed_format = FALSE;
17143 		break;
17144 	case CODE_FMT_FIXED_CURRENT:
17145 	case CODE_FMT_FIXED_DEFERRED:
17146 	case CODE_FMT_VENDOR_SPECIFIC:
17147 	default:
17148 		/*
17149 		 * With the es_valid bit set, we assume that the error
17150 		 * blkno is in the sense data.  Also, if xp->xb_blkno is
17151 		 * greater than 0xffffffff then the target *should* have used
17152 		 * a descriptor sense format (or it shouldn't have set
17153 		 * the es_valid bit), and we may as well ignore the
17154 		 * 32-bit value.
17155 		 */
17156 		if ((sensep->es_valid != 0) && (xp->xb_blkno <= 0xffffffff)) {
17157 			err_blkno = (diskaddr_t)
17158 			    ((sensep->es_info_1 << 24) |
17159 			    (sensep->es_info_2 << 16) |
17160 			    (sensep->es_info_3 << 8)  |
17161 			    (sensep->es_info_4));
17162 		} else {
17163 			err_blkno = (diskaddr_t)-1;
17164 		}
17165 		break;
17166 	}
17167 
17168 	if (err_blkno == (diskaddr_t)-1) {
17169 		/*
17170 		 * Without the es_valid bit set (for fixed format) or an
17171 		 * information descriptor (for descriptor format) we cannot
17172 		 * be certain of the error blkno, so just use the
17173 		 * request_blkno.
17174 		 */
17175 		err_blkno = (diskaddr_t)request_blkno;
17176 	} else {
17177 		/*
17178 		 * We retrieved the error block number from the information
17179 		 * portion of the sense data.
17180 		 *
17181 		 * For USCSI commands we are better off using the error
17182 		 * block no. as the requested block no. (This is the best
17183 		 * we can estimate.)
17184 		 */
17185 		if ((SD_IS_BUFIO(xp) == FALSE) &&
17186 		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
17187 			request_blkno = err_blkno;
17188 		}
17189 	}
17190 
17191 	/*
17192 	 * The following will log the buffer contents for the release driver
17193 	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
17194 	 * level is set to verbose.
17195 	 */
17196 	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
17197 	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
17198 	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
17199 	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
17200 
17201 	if (pfa_flag == FALSE) {
17202 		/* This is normally only set for USCSI */
17203 		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
17204 			return;
17205 		}
17206 
17207 		if ((SD_IS_BUFIO(xp) == TRUE) &&
17208 		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
17209 		    (severity < sd_error_level))) {
17210 			return;
17211 		}
17212 	}
17213 
17214 	/*
17215 	 * If the data is fixed format then check for Sonoma Failover,
17216 	 * and keep a count of how many failed I/O's.  We should not have
17217 	 * to worry about Sonoma returning descriptor format sense data,
17218 	 * and asc/ascq are in a different location in descriptor format.
17219 	 */
17220 	if (fixed_format &&
17221 	    (SD_IS_LSI(un)) && (sensep->es_key == KEY_ILLEGAL_REQUEST) &&
17222 	    (sensep->es_add_code == 0x94) && (sensep->es_qual_code == 0x01)) {
17223 		un->un_sonoma_failure_count++;
17224 		if (un->un_sonoma_failure_count > 1) {
17225 			return;
17226 		}
17227 	}
17228 
17229 	scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
17230 	    request_blkno, err_blkno, scsi_cmds, sensep,
17231 	    un->un_additional_codes, NULL);
17232 }
17233 
17234 /*
17235  *    Function: sd_extract_sense_info_descr
17236  *
17237  * Description: Retrieve "information" field from descriptor format
17238  *              sense data.  Iterates through each sense descriptor
17239  *              looking for the information descriptor and returns
17240  *              the information field from that descriptor.
17241  *
17242  *     Context: May be called from interrupt context
17243  */
17244 
17245 static diskaddr_t
17246 sd_extract_sense_info_descr(struct scsi_descr_sense_hdr *sdsp)
17247 {
17248 	diskaddr_t result;
17249 	uint8_t *descr_offset;
17250 	int valid_sense_length;
17251 	struct scsi_information_sense_descr *isd;
17252 
17253 	/*
17254 	 * Initialize result to -1 indicating there is no information
17255 	 * descriptor
17256 	 */
17257 	result = (diskaddr_t)-1;
17258 
17259 	/*
17260 	 * The first descriptor will immediately follow the header
17261 	 */
17262 	descr_offset = (uint8_t *)(sdsp+1); /* Pointer arithmetic */
17263 
17264 	/*
17265 	 * Calculate the amount of valid sense data
17266 	 */
17267 	valid_sense_length =
17268 	    min((sizeof (struct scsi_descr_sense_hdr) +
17269 	    sdsp->ds_addl_sense_length),
17270 	    SENSE_LENGTH);
17271 
17272 	/*
17273 	 * Iterate through the list of descriptors, stopping when we
17274 	 * run out of sense data
17275 	 */
17276 	while ((descr_offset + sizeof (struct scsi_information_sense_descr)) <=
17277 	    (uint8_t *)sdsp + valid_sense_length) {
17278 		/*
17279 		 * Check if this is an information descriptor.  We can
17280 		 * use the scsi_information_sense_descr structure as a
17281 		 * template sense the first two fields are always the
17282 		 * same
17283 		 */
17284 		isd = (struct scsi_information_sense_descr *)descr_offset;
17285 		if (isd->isd_descr_type == DESCR_INFORMATION) {
17286 			/*
17287 			 * Found an information descriptor.  Copy the
17288 			 * information field.  There will only be one
17289 			 * information descriptor so we can stop looking.
17290 			 */
17291 			result =
17292 			    (((diskaddr_t)isd->isd_information[0] << 56) |
17293 				((diskaddr_t)isd->isd_information[1] << 48) |
17294 				((diskaddr_t)isd->isd_information[2] << 40) |
17295 				((diskaddr_t)isd->isd_information[3] << 32) |
17296 				((diskaddr_t)isd->isd_information[4] << 24) |
17297 				((diskaddr_t)isd->isd_information[5] << 16) |
17298 				((diskaddr_t)isd->isd_information[6] << 8)  |
17299 				((diskaddr_t)isd->isd_information[7]));
17300 			break;
17301 		}
17302 
17303 		/*
17304 		 * Get pointer to the next descriptor.  The "additional
17305 		 * length" field holds the length of the descriptor except
17306 		 * for the "type" and "additional length" fields, so
17307 		 * we need to add 2 to get the total length.
17308 		 */
17309 		descr_offset += (isd->isd_addl_length + 2);
17310 	}
17311 
17312 	return (result);
17313 }
17314 
17315 /*
17316  *    Function: sd_sense_key_no_sense
17317  *
17318  * Description: Recovery action when sense data was not received.
17319  *
17320  *     Context: May be called from interrupt context
17321  */
17322 
17323 static void
17324 sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
17325 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17326 {
17327 	struct sd_sense_info	si;
17328 
17329 	ASSERT(un != NULL);
17330 	ASSERT(mutex_owned(SD_MUTEX(un)));
17331 	ASSERT(bp != NULL);
17332 	ASSERT(xp != NULL);
17333 	ASSERT(pktp != NULL);
17334 
17335 	si.ssi_severity = SCSI_ERR_FATAL;
17336 	si.ssi_pfa_flag = FALSE;
17337 
17338 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17339 
17340 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17341 		&si, EIO, (clock_t)0, NULL);
17342 }
17343 
17344 
17345 /*
17346  *    Function: sd_sense_key_recoverable_error
17347  *
17348  * Description: Recovery actions for a SCSI "Recovered Error" sense key.
17349  *
17350  *     Context: May be called from interrupt context
17351  */
17352 
17353 static void
17354 sd_sense_key_recoverable_error(struct sd_lun *un,
17355 	uint8_t asc,
17356 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17357 {
17358 	struct sd_sense_info	si;
17359 
17360 	ASSERT(un != NULL);
17361 	ASSERT(mutex_owned(SD_MUTEX(un)));
17362 	ASSERT(bp != NULL);
17363 	ASSERT(xp != NULL);
17364 	ASSERT(pktp != NULL);
17365 
17366 	/*
17367 	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
17368 	 */
17369 	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
17370 		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17371 		si.ssi_severity = SCSI_ERR_INFO;
17372 		si.ssi_pfa_flag = TRUE;
17373 	} else {
17374 		SD_UPDATE_ERRSTATS(un, sd_softerrs);
17375 		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
17376 		si.ssi_severity = SCSI_ERR_RECOVERED;
17377 		si.ssi_pfa_flag = FALSE;
17378 	}
17379 
17380 	if (pktp->pkt_resid == 0) {
17381 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17382 		sd_return_command(un, bp);
17383 		return;
17384 	}
17385 
17386 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17387 	    &si, EIO, (clock_t)0, NULL);
17388 }
17389 
17390 
17391 
17392 
17393 /*
17394  *    Function: sd_sense_key_not_ready
17395  *
17396  * Description: Recovery actions for a SCSI "Not Ready" sense key.
17397  *
17398  *     Context: May be called from interrupt context
17399  */
17400 
17401 static void
17402 sd_sense_key_not_ready(struct sd_lun *un,
17403 	uint8_t asc, uint8_t ascq,
17404 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17405 {
17406 	struct sd_sense_info	si;
17407 
17408 	ASSERT(un != NULL);
17409 	ASSERT(mutex_owned(SD_MUTEX(un)));
17410 	ASSERT(bp != NULL);
17411 	ASSERT(xp != NULL);
17412 	ASSERT(pktp != NULL);
17413 
17414 	si.ssi_severity = SCSI_ERR_FATAL;
17415 	si.ssi_pfa_flag = FALSE;
17416 
17417 	/*
17418 	 * Update error stats after first NOT READY error. Disks may have
17419 	 * been powered down and may need to be restarted.  For CDROMs,
17420 	 * report NOT READY errors only if media is present.
17421 	 */
17422 	if ((ISCD(un) && (un->un_f_geometry_is_valid == TRUE)) ||
17423 	    (xp->xb_retry_count > 0)) {
17424 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17425 		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
17426 	}
17427 
17428 	/*
17429 	 * Just fail if the "not ready" retry limit has been reached.
17430 	 */
17431 	if (xp->xb_retry_count >= un->un_notready_retry_count) {
17432 		/* Special check for error message printing for removables. */
17433 		if (un->un_f_has_removable_media && (asc == 0x04) &&
17434 		    (ascq >= 0x04)) {
17435 			si.ssi_severity = SCSI_ERR_ALL;
17436 		}
17437 		goto fail_command;
17438 	}
17439 
17440 	/*
17441 	 * Check the ASC and ASCQ in the sense data as needed, to determine
17442 	 * what to do.
17443 	 */
17444 	switch (asc) {
17445 	case 0x04:	/* LOGICAL UNIT NOT READY */
17446 		/*
17447 		 * disk drives that don't spin up result in a very long delay
17448 		 * in format without warning messages. We will log a message
17449 		 * if the error level is set to verbose.
17450 		 */
17451 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17452 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17453 			    "logical unit not ready, resetting disk\n");
17454 		}
17455 
17456 		/*
17457 		 * There are different requirements for CDROMs and disks for
17458 		 * the number of retries.  If a CD-ROM is giving this, it is
17459 		 * probably reading TOC and is in the process of getting
17460 		 * ready, so we should keep on trying for a long time to make
17461 		 * sure that all types of media are taken in account (for
17462 		 * some media the drive takes a long time to read TOC).  For
17463 		 * disks we do not want to retry this too many times as this
17464 		 * can cause a long hang in format when the drive refuses to
17465 		 * spin up (a very common failure).
17466 		 */
17467 		switch (ascq) {
17468 		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
17469 			/*
17470 			 * Disk drives frequently refuse to spin up which
17471 			 * results in a very long hang in format without
17472 			 * warning messages.
17473 			 *
17474 			 * Note: This code preserves the legacy behavior of
17475 			 * comparing xb_retry_count against zero for fibre
17476 			 * channel targets instead of comparing against the
17477 			 * un_reset_retry_count value.  The reason for this
17478 			 * discrepancy has been so utterly lost beneath the
17479 			 * Sands of Time that even Indiana Jones could not
17480 			 * find it.
17481 			 */
17482 			if (un->un_f_is_fibre == TRUE) {
17483 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17484 					(xp->xb_retry_count > 0)) &&
17485 					(un->un_startstop_timeid == NULL)) {
17486 					scsi_log(SD_DEVINFO(un), sd_label,
17487 					CE_WARN, "logical unit not ready, "
17488 					"resetting disk\n");
17489 					sd_reset_target(un, pktp);
17490 				}
17491 			} else {
17492 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17493 					(xp->xb_retry_count >
17494 					un->un_reset_retry_count)) &&
17495 					(un->un_startstop_timeid == NULL)) {
17496 					scsi_log(SD_DEVINFO(un), sd_label,
17497 					CE_WARN, "logical unit not ready, "
17498 					"resetting disk\n");
17499 					sd_reset_target(un, pktp);
17500 				}
17501 			}
17502 			break;
17503 
17504 		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
17505 			/*
17506 			 * If the target is in the process of becoming
17507 			 * ready, just proceed with the retry. This can
17508 			 * happen with CD-ROMs that take a long time to
17509 			 * read TOC after a power cycle or reset.
17510 			 */
17511 			goto do_retry;
17512 
17513 		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
17514 			break;
17515 
17516 		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
17517 			/*
17518 			 * Retries cannot help here so just fail right away.
17519 			 */
17520 			goto fail_command;
17521 
17522 		case 0x88:
17523 			/*
17524 			 * Vendor-unique code for T3/T4: it indicates a
17525 			 * path problem in a mutipathed config, but as far as
17526 			 * the target driver is concerned it equates to a fatal
17527 			 * error, so we should just fail the command right away
17528 			 * (without printing anything to the console). If this
17529 			 * is not a T3/T4, fall thru to the default recovery
17530 			 * action.
17531 			 * T3/T4 is FC only, don't need to check is_fibre
17532 			 */
17533 			if (SD_IS_T3(un) || SD_IS_T4(un)) {
17534 				sd_return_failed_command(un, bp, EIO);
17535 				return;
17536 			}
17537 			/* FALLTHRU */
17538 
17539 		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
17540 		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
17541 		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
17542 		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
17543 		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
17544 		default:    /* Possible future codes in SCSI spec? */
17545 			/*
17546 			 * For removable-media devices, do not retry if
17547 			 * ASCQ > 2 as these result mostly from USCSI commands
17548 			 * on MMC devices issued to check status of an
17549 			 * operation initiated in immediate mode.  Also for
17550 			 * ASCQ >= 4 do not print console messages as these
17551 			 * mainly represent a user-initiated operation
17552 			 * instead of a system failure.
17553 			 */
17554 			if (un->un_f_has_removable_media) {
17555 				si.ssi_severity = SCSI_ERR_ALL;
17556 				goto fail_command;
17557 			}
17558 			break;
17559 		}
17560 
17561 		/*
17562 		 * As part of our recovery attempt for the NOT READY
17563 		 * condition, we issue a START STOP UNIT command. However
17564 		 * we want to wait for a short delay before attempting this
17565 		 * as there may still be more commands coming back from the
17566 		 * target with the check condition. To do this we use
17567 		 * timeout(9F) to call sd_start_stop_unit_callback() after
17568 		 * the delay interval expires. (sd_start_stop_unit_callback()
17569 		 * dispatches sd_start_stop_unit_task(), which will issue
17570 		 * the actual START STOP UNIT command. The delay interval
17571 		 * is one-half of the delay that we will use to retry the
17572 		 * command that generated the NOT READY condition.
17573 		 *
17574 		 * Note that we could just dispatch sd_start_stop_unit_task()
17575 		 * from here and allow it to sleep for the delay interval,
17576 		 * but then we would be tying up the taskq thread
17577 		 * uncesessarily for the duration of the delay.
17578 		 *
17579 		 * Do not issue the START STOP UNIT if the current command
17580 		 * is already a START STOP UNIT.
17581 		 */
17582 		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
17583 			break;
17584 		}
17585 
17586 		/*
17587 		 * Do not schedule the timeout if one is already pending.
17588 		 */
17589 		if (un->un_startstop_timeid != NULL) {
17590 			SD_INFO(SD_LOG_ERROR, un,
17591 			    "sd_sense_key_not_ready: restart already issued to"
17592 			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
17593 			    ddi_get_instance(SD_DEVINFO(un)));
17594 			break;
17595 		}
17596 
17597 		/*
17598 		 * Schedule the START STOP UNIT command, then queue the command
17599 		 * for a retry.
17600 		 *
17601 		 * Note: A timeout is not scheduled for this retry because we
17602 		 * want the retry to be serial with the START_STOP_UNIT. The
17603 		 * retry will be started when the START_STOP_UNIT is completed
17604 		 * in sd_start_stop_unit_task.
17605 		 */
17606 		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
17607 		    un, SD_BSY_TIMEOUT / 2);
17608 		xp->xb_retry_count++;
17609 		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
17610 		return;
17611 
17612 	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
17613 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17614 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17615 			    "unit does not respond to selection\n");
17616 		}
17617 		break;
17618 
17619 	case 0x3A:	/* MEDIUM NOT PRESENT */
17620 		if (sd_error_level >= SCSI_ERR_FATAL) {
17621 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17622 			    "Caddy not inserted in drive\n");
17623 		}
17624 
17625 		sr_ejected(un);
17626 		un->un_mediastate = DKIO_EJECTED;
17627 		/* The state has changed, inform the media watch routines */
17628 		cv_broadcast(&un->un_state_cv);
17629 		/* Just fail if no media is present in the drive. */
17630 		goto fail_command;
17631 
17632 	default:
17633 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17634 			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
17635 			    "Unit not Ready. Additional sense code 0x%x\n",
17636 			    asc);
17637 		}
17638 		break;
17639 	}
17640 
17641 do_retry:
17642 
17643 	/*
17644 	 * Retry the command, as some targets may report NOT READY for
17645 	 * several seconds after being reset.
17646 	 */
17647 	xp->xb_retry_count++;
17648 	si.ssi_severity = SCSI_ERR_RETRYABLE;
17649 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
17650 	    &si, EIO, SD_BSY_TIMEOUT, NULL);
17651 
17652 	return;
17653 
17654 fail_command:
17655 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17656 	sd_return_failed_command(un, bp, EIO);
17657 }
17658 
17659 
17660 
17661 /*
17662  *    Function: sd_sense_key_medium_or_hardware_error
17663  *
17664  * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
17665  *		sense key.
17666  *
17667  *     Context: May be called from interrupt context
17668  */
17669 
17670 static void
17671 sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
17672 	int sense_key, uint8_t asc,
17673 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17674 {
17675 	struct sd_sense_info	si;
17676 
17677 	ASSERT(un != NULL);
17678 	ASSERT(mutex_owned(SD_MUTEX(un)));
17679 	ASSERT(bp != NULL);
17680 	ASSERT(xp != NULL);
17681 	ASSERT(pktp != NULL);
17682 
17683 	si.ssi_severity = SCSI_ERR_FATAL;
17684 	si.ssi_pfa_flag = FALSE;
17685 
17686 	if (sense_key == KEY_MEDIUM_ERROR) {
17687 		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
17688 	}
17689 
17690 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17691 
17692 	if ((un->un_reset_retry_count != 0) &&
17693 	    (xp->xb_retry_count == un->un_reset_retry_count)) {
17694 		mutex_exit(SD_MUTEX(un));
17695 		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
17696 		if (un->un_f_allow_bus_device_reset == TRUE) {
17697 
17698 			boolean_t try_resetting_target = B_TRUE;
17699 
17700 			/*
17701 			 * We need to be able to handle specific ASC when we are
17702 			 * handling a KEY_HARDWARE_ERROR. In particular
17703 			 * taking the default action of resetting the target may
17704 			 * not be the appropriate way to attempt recovery.
17705 			 * Resetting a target because of a single LUN failure
17706 			 * victimizes all LUNs on that target.
17707 			 *
17708 			 * This is true for the LSI arrays, if an LSI
17709 			 * array controller returns an ASC of 0x84 (LUN Dead) we
17710 			 * should trust it.
17711 			 */
17712 
17713 			if (sense_key == KEY_HARDWARE_ERROR) {
17714 				switch (asc) {
17715 				case 0x84:
17716 					if (SD_IS_LSI(un)) {
17717 						try_resetting_target = B_FALSE;
17718 					}
17719 					break;
17720 				default:
17721 					break;
17722 				}
17723 			}
17724 
17725 			if (try_resetting_target == B_TRUE) {
17726 				int reset_retval = 0;
17727 				if (un->un_f_lun_reset_enabled == TRUE) {
17728 					SD_TRACE(SD_LOG_IO_CORE, un,
17729 					    "sd_sense_key_medium_or_hardware_"
17730 					    "error: issuing RESET_LUN\n");
17731 					reset_retval =
17732 					    scsi_reset(SD_ADDRESS(un),
17733 					    RESET_LUN);
17734 				}
17735 				if (reset_retval == 0) {
17736 					SD_TRACE(SD_LOG_IO_CORE, un,
17737 					    "sd_sense_key_medium_or_hardware_"
17738 					    "error: issuing RESET_TARGET\n");
17739 					(void) scsi_reset(SD_ADDRESS(un),
17740 					    RESET_TARGET);
17741 				}
17742 			}
17743 		}
17744 		mutex_enter(SD_MUTEX(un));
17745 	}
17746 
17747 	/*
17748 	 * This really ought to be a fatal error, but we will retry anyway
17749 	 * as some drives report this as a spurious error.
17750 	 */
17751 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17752 	    &si, EIO, (clock_t)0, NULL);
17753 }
17754 
17755 
17756 
17757 /*
17758  *    Function: sd_sense_key_illegal_request
17759  *
17760  * Description: Recovery actions for a SCSI "Illegal Request" sense key.
17761  *
17762  *     Context: May be called from interrupt context
17763  */
17764 
17765 static void
17766 sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
17767 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17768 {
17769 	struct sd_sense_info	si;
17770 
17771 	ASSERT(un != NULL);
17772 	ASSERT(mutex_owned(SD_MUTEX(un)));
17773 	ASSERT(bp != NULL);
17774 	ASSERT(xp != NULL);
17775 	ASSERT(pktp != NULL);
17776 
17777 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17778 	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
17779 
17780 	si.ssi_severity = SCSI_ERR_INFO;
17781 	si.ssi_pfa_flag = FALSE;
17782 
17783 	/* Pointless to retry if the target thinks it's an illegal request */
17784 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17785 	sd_return_failed_command(un, bp, EIO);
17786 }
17787 
17788 
17789 
17790 
17791 /*
17792  *    Function: sd_sense_key_unit_attention
17793  *
17794  * Description: Recovery actions for a SCSI "Unit Attention" sense key.
17795  *
17796  *     Context: May be called from interrupt context
17797  */
17798 
17799 static void
17800 sd_sense_key_unit_attention(struct sd_lun *un,
17801 	uint8_t asc,
17802 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17803 {
17804 	/*
17805 	 * For UNIT ATTENTION we allow retries for one minute. Devices
17806 	 * like Sonoma can return UNIT ATTENTION close to a minute
17807 	 * under certain conditions.
17808 	 */
17809 	int	retry_check_flag = SD_RETRIES_UA;
17810 	boolean_t	kstat_updated = B_FALSE;
17811 	struct	sd_sense_info		si;
17812 
17813 	ASSERT(un != NULL);
17814 	ASSERT(mutex_owned(SD_MUTEX(un)));
17815 	ASSERT(bp != NULL);
17816 	ASSERT(xp != NULL);
17817 	ASSERT(pktp != NULL);
17818 
17819 	si.ssi_severity = SCSI_ERR_INFO;
17820 	si.ssi_pfa_flag = FALSE;
17821 
17822 
17823 	switch (asc) {
17824 	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
17825 		if (sd_report_pfa != 0) {
17826 			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17827 			si.ssi_pfa_flag = TRUE;
17828 			retry_check_flag = SD_RETRIES_STANDARD;
17829 			goto do_retry;
17830 		}
17831 		break;
17832 
17833 	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
17834 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
17835 			un->un_resvd_status |=
17836 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
17837 		}
17838 		/* FALLTHRU */
17839 
17840 	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
17841 		if (!un->un_f_has_removable_media) {
17842 			break;
17843 		}
17844 
17845 		/*
17846 		 * When we get a unit attention from a removable-media device,
17847 		 * it may be in a state that will take a long time to recover
17848 		 * (e.g., from a reset).  Since we are executing in interrupt
17849 		 * context here, we cannot wait around for the device to come
17850 		 * back. So hand this command off to sd_media_change_task()
17851 		 * for deferred processing under taskq thread context. (Note
17852 		 * that the command still may be failed if a problem is
17853 		 * encountered at a later time.)
17854 		 */
17855 		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
17856 		    KM_NOSLEEP) == 0) {
17857 			/*
17858 			 * Cannot dispatch the request so fail the command.
17859 			 */
17860 			SD_UPDATE_ERRSTATS(un, sd_harderrs);
17861 			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17862 			si.ssi_severity = SCSI_ERR_FATAL;
17863 			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17864 			sd_return_failed_command(un, bp, EIO);
17865 		}
17866 
17867 		/*
17868 		 * If failed to dispatch sd_media_change_task(), we already
17869 		 * updated kstat. If succeed to dispatch sd_media_change_task(),
17870 		 * we should update kstat later if it encounters an error. So,
17871 		 * we update kstat_updated flag here.
17872 		 */
17873 		kstat_updated = B_TRUE;
17874 
17875 		/*
17876 		 * Either the command has been successfully dispatched to a
17877 		 * task Q for retrying, or the dispatch failed. In either case
17878 		 * do NOT retry again by calling sd_retry_command. This sets up
17879 		 * two retries of the same command and when one completes and
17880 		 * frees the resources the other will access freed memory,
17881 		 * a bad thing.
17882 		 */
17883 		return;
17884 
17885 	default:
17886 		break;
17887 	}
17888 
17889 	/*
17890 	 * Update kstat if we haven't done that.
17891 	 */
17892 	if (!kstat_updated) {
17893 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17894 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17895 	}
17896 
17897 do_retry:
17898 	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
17899 	    EIO, SD_UA_RETRY_DELAY, NULL);
17900 }
17901 
17902 
17903 
17904 /*
17905  *    Function: sd_sense_key_fail_command
17906  *
17907  * Description: Use to fail a command when we don't like the sense key that
17908  *		was returned.
17909  *
17910  *     Context: May be called from interrupt context
17911  */
17912 
17913 static void
17914 sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
17915 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17916 {
17917 	struct sd_sense_info	si;
17918 
17919 	ASSERT(un != NULL);
17920 	ASSERT(mutex_owned(SD_MUTEX(un)));
17921 	ASSERT(bp != NULL);
17922 	ASSERT(xp != NULL);
17923 	ASSERT(pktp != NULL);
17924 
17925 	si.ssi_severity = SCSI_ERR_FATAL;
17926 	si.ssi_pfa_flag = FALSE;
17927 
17928 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17929 	sd_return_failed_command(un, bp, EIO);
17930 }
17931 
17932 
17933 
17934 /*
17935  *    Function: sd_sense_key_blank_check
17936  *
17937  * Description: Recovery actions for a SCSI "Blank Check" sense key.
17938  *		Has no monetary connotation.
17939  *
17940  *     Context: May be called from interrupt context
17941  */
17942 
17943 static void
17944 sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
17945 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17946 {
17947 	struct sd_sense_info	si;
17948 
17949 	ASSERT(un != NULL);
17950 	ASSERT(mutex_owned(SD_MUTEX(un)));
17951 	ASSERT(bp != NULL);
17952 	ASSERT(xp != NULL);
17953 	ASSERT(pktp != NULL);
17954 
17955 	/*
17956 	 * Blank check is not fatal for removable devices, therefore
17957 	 * it does not require a console message.
17958 	 */
17959 	si.ssi_severity = (un->un_f_has_removable_media) ? SCSI_ERR_ALL :
17960 	    SCSI_ERR_FATAL;
17961 	si.ssi_pfa_flag = FALSE;
17962 
17963 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17964 	sd_return_failed_command(un, bp, EIO);
17965 }
17966 
17967 
17968 
17969 
17970 /*
17971  *    Function: sd_sense_key_aborted_command
17972  *
17973  * Description: Recovery actions for a SCSI "Aborted Command" sense key.
17974  *
17975  *     Context: May be called from interrupt context
17976  */
17977 
17978 static void
17979 sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
17980 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17981 {
17982 	struct sd_sense_info	si;
17983 
17984 	ASSERT(un != NULL);
17985 	ASSERT(mutex_owned(SD_MUTEX(un)));
17986 	ASSERT(bp != NULL);
17987 	ASSERT(xp != NULL);
17988 	ASSERT(pktp != NULL);
17989 
17990 	si.ssi_severity = SCSI_ERR_FATAL;
17991 	si.ssi_pfa_flag = FALSE;
17992 
17993 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17994 
17995 	/*
17996 	 * This really ought to be a fatal error, but we will retry anyway
17997 	 * as some drives report this as a spurious error.
17998 	 */
17999 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18000 	    &si, EIO, (clock_t)0, NULL);
18001 }
18002 
18003 
18004 
18005 /*
18006  *    Function: sd_sense_key_default
18007  *
18008  * Description: Default recovery action for several SCSI sense keys (basically
18009  *		attempts a retry).
18010  *
18011  *     Context: May be called from interrupt context
18012  */
18013 
18014 static void
18015 sd_sense_key_default(struct sd_lun *un,
18016 	int sense_key,
18017 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18018 {
18019 	struct sd_sense_info	si;
18020 
18021 	ASSERT(un != NULL);
18022 	ASSERT(mutex_owned(SD_MUTEX(un)));
18023 	ASSERT(bp != NULL);
18024 	ASSERT(xp != NULL);
18025 	ASSERT(pktp != NULL);
18026 
18027 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18028 
18029 	/*
18030 	 * Undecoded sense key.	Attempt retries and hope that will fix
18031 	 * the problem.  Otherwise, we're dead.
18032 	 */
18033 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
18034 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18035 		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
18036 	}
18037 
18038 	si.ssi_severity = SCSI_ERR_FATAL;
18039 	si.ssi_pfa_flag = FALSE;
18040 
18041 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18042 	    &si, EIO, (clock_t)0, NULL);
18043 }
18044 
18045 
18046 
18047 /*
18048  *    Function: sd_print_retry_msg
18049  *
18050  * Description: Print a message indicating the retry action being taken.
18051  *
18052  *   Arguments: un - ptr to associated softstate
18053  *		bp - ptr to buf(9S) for the command
18054  *		arg - not used.
18055  *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18056  *			or SD_NO_RETRY_ISSUED
18057  *
18058  *     Context: May be called from interrupt context
18059  */
18060 /* ARGSUSED */
18061 static void
18062 sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
18063 {
18064 	struct sd_xbuf	*xp;
18065 	struct scsi_pkt *pktp;
18066 	char *reasonp;
18067 	char *msgp;
18068 
18069 	ASSERT(un != NULL);
18070 	ASSERT(mutex_owned(SD_MUTEX(un)));
18071 	ASSERT(bp != NULL);
18072 	pktp = SD_GET_PKTP(bp);
18073 	ASSERT(pktp != NULL);
18074 	xp = SD_GET_XBUF(bp);
18075 	ASSERT(xp != NULL);
18076 
18077 	ASSERT(!mutex_owned(&un->un_pm_mutex));
18078 	mutex_enter(&un->un_pm_mutex);
18079 	if ((un->un_state == SD_STATE_SUSPENDED) ||
18080 	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
18081 	    (pktp->pkt_flags & FLAG_SILENT)) {
18082 		mutex_exit(&un->un_pm_mutex);
18083 		goto update_pkt_reason;
18084 	}
18085 	mutex_exit(&un->un_pm_mutex);
18086 
18087 	/*
18088 	 * Suppress messages if they are all the same pkt_reason; with
18089 	 * TQ, many (up to 256) are returned with the same pkt_reason.
18090 	 * If we are in panic, then suppress the retry messages.
18091 	 */
18092 	switch (flag) {
18093 	case SD_NO_RETRY_ISSUED:
18094 		msgp = "giving up";
18095 		break;
18096 	case SD_IMMEDIATE_RETRY_ISSUED:
18097 	case SD_DELAYED_RETRY_ISSUED:
18098 		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
18099 		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
18100 		    (sd_error_level != SCSI_ERR_ALL))) {
18101 			return;
18102 		}
18103 		msgp = "retrying command";
18104 		break;
18105 	default:
18106 		goto update_pkt_reason;
18107 	}
18108 
18109 	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
18110 	    scsi_rname(pktp->pkt_reason));
18111 
18112 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18113 	    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
18114 
18115 update_pkt_reason:
18116 	/*
18117 	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
18118 	 * This is to prevent multiple console messages for the same failure
18119 	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
18120 	 * when the command is retried successfully because there still may be
18121 	 * more commands coming back with the same value of pktp->pkt_reason.
18122 	 */
18123 	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
18124 		un->un_last_pkt_reason = pktp->pkt_reason;
18125 	}
18126 }
18127 
18128 
18129 /*
18130  *    Function: sd_print_cmd_incomplete_msg
18131  *
18132  * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
18133  *
18134  *   Arguments: un - ptr to associated softstate
18135  *		bp - ptr to buf(9S) for the command
18136  *		arg - passed to sd_print_retry_msg()
18137  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18138  *			or SD_NO_RETRY_ISSUED
18139  *
18140  *     Context: May be called from interrupt context
18141  */
18142 
18143 static void
18144 sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
18145 	int code)
18146 {
18147 	dev_info_t	*dip;
18148 
18149 	ASSERT(un != NULL);
18150 	ASSERT(mutex_owned(SD_MUTEX(un)));
18151 	ASSERT(bp != NULL);
18152 
18153 	switch (code) {
18154 	case SD_NO_RETRY_ISSUED:
18155 		/* Command was failed. Someone turned off this target? */
18156 		if (un->un_state != SD_STATE_OFFLINE) {
18157 			/*
18158 			 * Suppress message if we are detaching and
18159 			 * device has been disconnected
18160 			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
18161 			 * private interface and not part of the DDI
18162 			 */
18163 			dip = un->un_sd->sd_dev;
18164 			if (!(DEVI_IS_DETACHING(dip) &&
18165 			    DEVI_IS_DEVICE_REMOVED(dip))) {
18166 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18167 				"disk not responding to selection\n");
18168 			}
18169 			New_state(un, SD_STATE_OFFLINE);
18170 		}
18171 		break;
18172 
18173 	case SD_DELAYED_RETRY_ISSUED:
18174 	case SD_IMMEDIATE_RETRY_ISSUED:
18175 	default:
18176 		/* Command was successfully queued for retry */
18177 		sd_print_retry_msg(un, bp, arg, code);
18178 		break;
18179 	}
18180 }
18181 
18182 
18183 /*
18184  *    Function: sd_pkt_reason_cmd_incomplete
18185  *
18186  * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
18187  *
18188  *     Context: May be called from interrupt context
18189  */
18190 
18191 static void
18192 sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
18193 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18194 {
18195 	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
18196 
18197 	ASSERT(un != NULL);
18198 	ASSERT(mutex_owned(SD_MUTEX(un)));
18199 	ASSERT(bp != NULL);
18200 	ASSERT(xp != NULL);
18201 	ASSERT(pktp != NULL);
18202 
18203 	/* Do not do a reset if selection did not complete */
18204 	/* Note: Should this not just check the bit? */
18205 	if (pktp->pkt_state != STATE_GOT_BUS) {
18206 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18207 		sd_reset_target(un, pktp);
18208 	}
18209 
18210 	/*
18211 	 * If the target was not successfully selected, then set
18212 	 * SD_RETRIES_FAILFAST to indicate that we lost communication
18213 	 * with the target, and further retries and/or commands are
18214 	 * likely to take a long time.
18215 	 */
18216 	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
18217 		flag |= SD_RETRIES_FAILFAST;
18218 	}
18219 
18220 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18221 
18222 	sd_retry_command(un, bp, flag,
18223 	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18224 }
18225 
18226 
18227 
18228 /*
18229  *    Function: sd_pkt_reason_cmd_tran_err
18230  *
18231  * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
18232  *
18233  *     Context: May be called from interrupt context
18234  */
18235 
18236 static void
18237 sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
18238 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18239 {
18240 	ASSERT(un != NULL);
18241 	ASSERT(mutex_owned(SD_MUTEX(un)));
18242 	ASSERT(bp != NULL);
18243 	ASSERT(xp != NULL);
18244 	ASSERT(pktp != NULL);
18245 
18246 	/*
18247 	 * Do not reset if we got a parity error, or if
18248 	 * selection did not complete.
18249 	 */
18250 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18251 	/* Note: Should this not just check the bit for pkt_state? */
18252 	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
18253 	    (pktp->pkt_state != STATE_GOT_BUS)) {
18254 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18255 		sd_reset_target(un, pktp);
18256 	}
18257 
18258 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18259 
18260 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18261 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18262 }
18263 
18264 
18265 
18266 /*
18267  *    Function: sd_pkt_reason_cmd_reset
18268  *
18269  * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
18270  *
18271  *     Context: May be called from interrupt context
18272  */
18273 
18274 static void
18275 sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
18276 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18277 {
18278 	ASSERT(un != NULL);
18279 	ASSERT(mutex_owned(SD_MUTEX(un)));
18280 	ASSERT(bp != NULL);
18281 	ASSERT(xp != NULL);
18282 	ASSERT(pktp != NULL);
18283 
18284 	/* The target may still be running the command, so try to reset. */
18285 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18286 	sd_reset_target(un, pktp);
18287 
18288 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18289 
18290 	/*
18291 	 * If pkt_reason is CMD_RESET chances are that this pkt got
18292 	 * reset because another target on this bus caused it. The target
18293 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18294 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18295 	 */
18296 
18297 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18298 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18299 }
18300 
18301 
18302 
18303 
18304 /*
18305  *    Function: sd_pkt_reason_cmd_aborted
18306  *
18307  * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
18308  *
18309  *     Context: May be called from interrupt context
18310  */
18311 
18312 static void
18313 sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
18314 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18315 {
18316 	ASSERT(un != NULL);
18317 	ASSERT(mutex_owned(SD_MUTEX(un)));
18318 	ASSERT(bp != NULL);
18319 	ASSERT(xp != NULL);
18320 	ASSERT(pktp != NULL);
18321 
18322 	/* The target may still be running the command, so try to reset. */
18323 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18324 	sd_reset_target(un, pktp);
18325 
18326 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18327 
18328 	/*
18329 	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
18330 	 * aborted because another target on this bus caused it. The target
18331 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18332 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18333 	 */
18334 
18335 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18336 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18337 }
18338 
18339 
18340 
18341 /*
18342  *    Function: sd_pkt_reason_cmd_timeout
18343  *
18344  * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
18345  *
18346  *     Context: May be called from interrupt context
18347  */
18348 
18349 static void
18350 sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
18351 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18352 {
18353 	ASSERT(un != NULL);
18354 	ASSERT(mutex_owned(SD_MUTEX(un)));
18355 	ASSERT(bp != NULL);
18356 	ASSERT(xp != NULL);
18357 	ASSERT(pktp != NULL);
18358 
18359 
18360 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18361 	sd_reset_target(un, pktp);
18362 
18363 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18364 
18365 	/*
18366 	 * A command timeout indicates that we could not establish
18367 	 * communication with the target, so set SD_RETRIES_FAILFAST
18368 	 * as further retries/commands are likely to take a long time.
18369 	 */
18370 	sd_retry_command(un, bp,
18371 	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
18372 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18373 }
18374 
18375 
18376 
18377 /*
18378  *    Function: sd_pkt_reason_cmd_unx_bus_free
18379  *
18380  * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
18381  *
18382  *     Context: May be called from interrupt context
18383  */
18384 
18385 static void
18386 sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
18387 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18388 {
18389 	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
18390 
18391 	ASSERT(un != NULL);
18392 	ASSERT(mutex_owned(SD_MUTEX(un)));
18393 	ASSERT(bp != NULL);
18394 	ASSERT(xp != NULL);
18395 	ASSERT(pktp != NULL);
18396 
18397 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18398 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18399 
18400 	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
18401 	    sd_print_retry_msg : NULL;
18402 
18403 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18404 	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18405 }
18406 
18407 
18408 /*
18409  *    Function: sd_pkt_reason_cmd_tag_reject
18410  *
18411  * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
18412  *
18413  *     Context: May be called from interrupt context
18414  */
18415 
18416 static void
18417 sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
18418 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18419 {
18420 	ASSERT(un != NULL);
18421 	ASSERT(mutex_owned(SD_MUTEX(un)));
18422 	ASSERT(bp != NULL);
18423 	ASSERT(xp != NULL);
18424 	ASSERT(pktp != NULL);
18425 
18426 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18427 	pktp->pkt_flags = 0;
18428 	un->un_tagflags = 0;
18429 	if (un->un_f_opt_queueing == TRUE) {
18430 		un->un_throttle = min(un->un_throttle, 3);
18431 	} else {
18432 		un->un_throttle = 1;
18433 	}
18434 	mutex_exit(SD_MUTEX(un));
18435 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
18436 	mutex_enter(SD_MUTEX(un));
18437 
18438 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18439 
18440 	/* Legacy behavior not to check retry counts here. */
18441 	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
18442 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18443 }
18444 
18445 
18446 /*
18447  *    Function: sd_pkt_reason_default
18448  *
18449  * Description: Default recovery actions for SCSA pkt_reason values that
18450  *		do not have more explicit recovery actions.
18451  *
18452  *     Context: May be called from interrupt context
18453  */
18454 
18455 static void
18456 sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
18457 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18458 {
18459 	ASSERT(un != NULL);
18460 	ASSERT(mutex_owned(SD_MUTEX(un)));
18461 	ASSERT(bp != NULL);
18462 	ASSERT(xp != NULL);
18463 	ASSERT(pktp != NULL);
18464 
18465 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18466 	sd_reset_target(un, pktp);
18467 
18468 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18469 
18470 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18471 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18472 }
18473 
18474 
18475 
18476 /*
18477  *    Function: sd_pkt_status_check_condition
18478  *
18479  * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
18480  *
18481  *     Context: May be called from interrupt context
18482  */
18483 
18484 static void
18485 sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
18486 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18487 {
18488 	ASSERT(un != NULL);
18489 	ASSERT(mutex_owned(SD_MUTEX(un)));
18490 	ASSERT(bp != NULL);
18491 	ASSERT(xp != NULL);
18492 	ASSERT(pktp != NULL);
18493 
18494 	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
18495 	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
18496 
18497 	/*
18498 	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
18499 	 * command will be retried after the request sense). Otherwise, retry
18500 	 * the command. Note: we are issuing the request sense even though the
18501 	 * retry limit may have been reached for the failed command.
18502 	 */
18503 	if (un->un_f_arq_enabled == FALSE) {
18504 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18505 		    "no ARQ, sending request sense command\n");
18506 		sd_send_request_sense_command(un, bp, pktp);
18507 	} else {
18508 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18509 		    "ARQ,retrying request sense command\n");
18510 #if defined(__i386) || defined(__amd64)
18511 		/*
18512 		 * The SD_RETRY_DELAY value need to be adjusted here
18513 		 * when SD_RETRY_DELAY change in sddef.h
18514 		 */
18515 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18516 			un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
18517 			NULL);
18518 #else
18519 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL,
18520 		    EIO, SD_RETRY_DELAY, NULL);
18521 #endif
18522 	}
18523 
18524 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
18525 }
18526 
18527 
18528 /*
18529  *    Function: sd_pkt_status_busy
18530  *
18531  * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
18532  *
18533  *     Context: May be called from interrupt context
18534  */
18535 
18536 static void
18537 sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18538 	struct scsi_pkt *pktp)
18539 {
18540 	ASSERT(un != NULL);
18541 	ASSERT(mutex_owned(SD_MUTEX(un)));
18542 	ASSERT(bp != NULL);
18543 	ASSERT(xp != NULL);
18544 	ASSERT(pktp != NULL);
18545 
18546 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18547 	    "sd_pkt_status_busy: entry\n");
18548 
18549 	/* If retries are exhausted, just fail the command. */
18550 	if (xp->xb_retry_count >= un->un_busy_retry_count) {
18551 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18552 		    "device busy too long\n");
18553 		sd_return_failed_command(un, bp, EIO);
18554 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18555 		    "sd_pkt_status_busy: exit\n");
18556 		return;
18557 	}
18558 	xp->xb_retry_count++;
18559 
18560 	/*
18561 	 * Try to reset the target. However, we do not want to perform
18562 	 * more than one reset if the device continues to fail. The reset
18563 	 * will be performed when the retry count reaches the reset
18564 	 * threshold.  This threshold should be set such that at least
18565 	 * one retry is issued before the reset is performed.
18566 	 */
18567 	if (xp->xb_retry_count ==
18568 	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
18569 		int rval = 0;
18570 		mutex_exit(SD_MUTEX(un));
18571 		if (un->un_f_allow_bus_device_reset == TRUE) {
18572 			/*
18573 			 * First try to reset the LUN; if we cannot then
18574 			 * try to reset the target.
18575 			 */
18576 			if (un->un_f_lun_reset_enabled == TRUE) {
18577 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18578 				    "sd_pkt_status_busy: RESET_LUN\n");
18579 				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18580 			}
18581 			if (rval == 0) {
18582 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18583 				    "sd_pkt_status_busy: RESET_TARGET\n");
18584 				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18585 			}
18586 		}
18587 		if (rval == 0) {
18588 			/*
18589 			 * If the RESET_LUN and/or RESET_TARGET failed,
18590 			 * try RESET_ALL
18591 			 */
18592 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18593 			    "sd_pkt_status_busy: RESET_ALL\n");
18594 			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
18595 		}
18596 		mutex_enter(SD_MUTEX(un));
18597 		if (rval == 0) {
18598 			/*
18599 			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
18600 			 * At this point we give up & fail the command.
18601 			 */
18602 			sd_return_failed_command(un, bp, EIO);
18603 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18604 			    "sd_pkt_status_busy: exit (failed cmd)\n");
18605 			return;
18606 		}
18607 	}
18608 
18609 	/*
18610 	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
18611 	 * we have already checked the retry counts above.
18612 	 */
18613 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
18614 	    EIO, SD_BSY_TIMEOUT, NULL);
18615 
18616 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18617 	    "sd_pkt_status_busy: exit\n");
18618 }
18619 
18620 
18621 /*
18622  *    Function: sd_pkt_status_reservation_conflict
18623  *
18624  * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
18625  *		command status.
18626  *
18627  *     Context: May be called from interrupt context
18628  */
18629 
18630 static void
18631 sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
18632 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18633 {
18634 	ASSERT(un != NULL);
18635 	ASSERT(mutex_owned(SD_MUTEX(un)));
18636 	ASSERT(bp != NULL);
18637 	ASSERT(xp != NULL);
18638 	ASSERT(pktp != NULL);
18639 
18640 	/*
18641 	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
18642 	 * conflict could be due to various reasons like incorrect keys, not
18643 	 * registered or not reserved etc. So, we return EACCES to the caller.
18644 	 */
18645 	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
18646 		int cmd = SD_GET_PKT_OPCODE(pktp);
18647 		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
18648 		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
18649 			sd_return_failed_command(un, bp, EACCES);
18650 			return;
18651 		}
18652 	}
18653 
18654 	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
18655 
18656 	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
18657 		if (sd_failfast_enable != 0) {
18658 			/* By definition, we must panic here.... */
18659 			sd_panic_for_res_conflict(un);
18660 			/*NOTREACHED*/
18661 		}
18662 		SD_ERROR(SD_LOG_IO, un,
18663 		    "sd_handle_resv_conflict: Disk Reserved\n");
18664 		sd_return_failed_command(un, bp, EACCES);
18665 		return;
18666 	}
18667 
18668 	/*
18669 	 * 1147670: retry only if sd_retry_on_reservation_conflict
18670 	 * property is set (default is 1). Retries will not succeed
18671 	 * on a disk reserved by another initiator. HA systems
18672 	 * may reset this via sd.conf to avoid these retries.
18673 	 *
18674 	 * Note: The legacy return code for this failure is EIO, however EACCES
18675 	 * seems more appropriate for a reservation conflict.
18676 	 */
18677 	if (sd_retry_on_reservation_conflict == 0) {
18678 		SD_ERROR(SD_LOG_IO, un,
18679 		    "sd_handle_resv_conflict: Device Reserved\n");
18680 		sd_return_failed_command(un, bp, EIO);
18681 		return;
18682 	}
18683 
18684 	/*
18685 	 * Retry the command if we can.
18686 	 *
18687 	 * Note: The legacy return code for this failure is EIO, however EACCES
18688 	 * seems more appropriate for a reservation conflict.
18689 	 */
18690 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18691 	    (clock_t)2, NULL);
18692 }
18693 
18694 
18695 
18696 /*
18697  *    Function: sd_pkt_status_qfull
18698  *
18699  * Description: Handle a QUEUE FULL condition from the target.  This can
18700  *		occur if the HBA does not handle the queue full condition.
18701  *		(Basically this means third-party HBAs as Sun HBAs will
18702  *		handle the queue full condition.)  Note that if there are
18703  *		some commands already in the transport, then the queue full
18704  *		has occurred because the queue for this nexus is actually
18705  *		full. If there are no commands in the transport, then the
18706  *		queue full is resulting from some other initiator or lun
18707  *		consuming all the resources at the target.
18708  *
18709  *     Context: May be called from interrupt context
18710  */
18711 
18712 static void
18713 sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
18714 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18715 {
18716 	ASSERT(un != NULL);
18717 	ASSERT(mutex_owned(SD_MUTEX(un)));
18718 	ASSERT(bp != NULL);
18719 	ASSERT(xp != NULL);
18720 	ASSERT(pktp != NULL);
18721 
18722 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18723 	    "sd_pkt_status_qfull: entry\n");
18724 
18725 	/*
18726 	 * Just lower the QFULL throttle and retry the command.  Note that
18727 	 * we do not limit the number of retries here.
18728 	 */
18729 	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
18730 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
18731 	    SD_RESTART_TIMEOUT, NULL);
18732 
18733 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18734 	    "sd_pkt_status_qfull: exit\n");
18735 }
18736 
18737 
18738 /*
18739  *    Function: sd_reset_target
18740  *
18741  * Description: Issue a scsi_reset(9F), with either RESET_LUN,
18742  *		RESET_TARGET, or RESET_ALL.
18743  *
18744  *     Context: May be called under interrupt context.
18745  */
18746 
18747 static void
18748 sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
18749 {
18750 	int rval = 0;
18751 
18752 	ASSERT(un != NULL);
18753 	ASSERT(mutex_owned(SD_MUTEX(un)));
18754 	ASSERT(pktp != NULL);
18755 
18756 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
18757 
18758 	/*
18759 	 * No need to reset if the transport layer has already done so.
18760 	 */
18761 	if ((pktp->pkt_statistics &
18762 	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
18763 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18764 		    "sd_reset_target: no reset\n");
18765 		return;
18766 	}
18767 
18768 	mutex_exit(SD_MUTEX(un));
18769 
18770 	if (un->un_f_allow_bus_device_reset == TRUE) {
18771 		if (un->un_f_lun_reset_enabled == TRUE) {
18772 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18773 			    "sd_reset_target: RESET_LUN\n");
18774 			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18775 		}
18776 		if (rval == 0) {
18777 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18778 			    "sd_reset_target: RESET_TARGET\n");
18779 			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18780 		}
18781 	}
18782 
18783 	if (rval == 0) {
18784 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18785 		    "sd_reset_target: RESET_ALL\n");
18786 		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
18787 	}
18788 
18789 	mutex_enter(SD_MUTEX(un));
18790 
18791 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
18792 }
18793 
18794 
18795 /*
18796  *    Function: sd_media_change_task
18797  *
18798  * Description: Recovery action for CDROM to become available.
18799  *
18800  *     Context: Executes in a taskq() thread context
18801  */
18802 
18803 static void
18804 sd_media_change_task(void *arg)
18805 {
18806 	struct	scsi_pkt	*pktp = arg;
18807 	struct	sd_lun		*un;
18808 	struct	buf		*bp;
18809 	struct	sd_xbuf		*xp;
18810 	int	err		= 0;
18811 	int	retry_count	= 0;
18812 	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
18813 	struct	sd_sense_info	si;
18814 
18815 	ASSERT(pktp != NULL);
18816 	bp = (struct buf *)pktp->pkt_private;
18817 	ASSERT(bp != NULL);
18818 	xp = SD_GET_XBUF(bp);
18819 	ASSERT(xp != NULL);
18820 	un = SD_GET_UN(bp);
18821 	ASSERT(un != NULL);
18822 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18823 	ASSERT(un->un_f_monitor_media_state);
18824 
18825 	si.ssi_severity = SCSI_ERR_INFO;
18826 	si.ssi_pfa_flag = FALSE;
18827 
18828 	/*
18829 	 * When a reset is issued on a CDROM, it takes a long time to
18830 	 * recover. First few attempts to read capacity and other things
18831 	 * related to handling unit attention fail (with a ASC 0x4 and
18832 	 * ASCQ 0x1). In that case we want to do enough retries and we want
18833 	 * to limit the retries in other cases of genuine failures like
18834 	 * no media in drive.
18835 	 */
18836 	while (retry_count++ < retry_limit) {
18837 		if ((err = sd_handle_mchange(un)) == 0) {
18838 			break;
18839 		}
18840 		if (err == EAGAIN) {
18841 			retry_limit = SD_UNIT_ATTENTION_RETRY;
18842 		}
18843 		/* Sleep for 0.5 sec. & try again */
18844 		delay(drv_usectohz(500000));
18845 	}
18846 
18847 	/*
18848 	 * Dispatch (retry or fail) the original command here,
18849 	 * along with appropriate console messages....
18850 	 *
18851 	 * Must grab the mutex before calling sd_retry_command,
18852 	 * sd_print_sense_msg and sd_return_failed_command.
18853 	 */
18854 	mutex_enter(SD_MUTEX(un));
18855 	if (err != SD_CMD_SUCCESS) {
18856 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18857 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18858 		si.ssi_severity = SCSI_ERR_FATAL;
18859 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18860 		sd_return_failed_command(un, bp, EIO);
18861 	} else {
18862 		sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
18863 		    &si, EIO, (clock_t)0, NULL);
18864 	}
18865 	mutex_exit(SD_MUTEX(un));
18866 }
18867 
18868 
18869 
18870 /*
18871  *    Function: sd_handle_mchange
18872  *
18873  * Description: Perform geometry validation & other recovery when CDROM
18874  *		has been removed from drive.
18875  *
18876  * Return Code: 0 for success
18877  *		errno-type return code of either sd_send_scsi_DOORLOCK() or
18878  *		sd_send_scsi_READ_CAPACITY()
18879  *
18880  *     Context: Executes in a taskq() thread context
18881  */
18882 
18883 static int
18884 sd_handle_mchange(struct sd_lun *un)
18885 {
18886 	uint64_t	capacity;
18887 	uint32_t	lbasize;
18888 	int		rval;
18889 
18890 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18891 	ASSERT(un->un_f_monitor_media_state);
18892 
18893 	if ((rval = sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
18894 	    SD_PATH_DIRECT_PRIORITY)) != 0) {
18895 		return (rval);
18896 	}
18897 
18898 	mutex_enter(SD_MUTEX(un));
18899 	sd_update_block_info(un, lbasize, capacity);
18900 
18901 	if (un->un_errstats != NULL) {
18902 		struct	sd_errstats *stp =
18903 		    (struct sd_errstats *)un->un_errstats->ks_data;
18904 		stp->sd_capacity.value.ui64 = (uint64_t)
18905 		    ((uint64_t)un->un_blockcount *
18906 		    (uint64_t)un->un_tgt_blocksize);
18907 	}
18908 
18909 	/*
18910 	 * Note: Maybe let the strategy/partitioning chain worry about getting
18911 	 * valid geometry.
18912 	 */
18913 	un->un_f_geometry_is_valid = FALSE;
18914 	(void) sd_validate_geometry(un, SD_PATH_DIRECT_PRIORITY);
18915 	if (un->un_f_geometry_is_valid == FALSE) {
18916 		mutex_exit(SD_MUTEX(un));
18917 		return (EIO);
18918 	}
18919 
18920 	mutex_exit(SD_MUTEX(un));
18921 
18922 	/*
18923 	 * Try to lock the door
18924 	 */
18925 	return (sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
18926 	    SD_PATH_DIRECT_PRIORITY));
18927 }
18928 
18929 
18930 /*
18931  *    Function: sd_send_scsi_DOORLOCK
18932  *
18933  * Description: Issue the scsi DOOR LOCK command
18934  *
18935  *   Arguments: un    - pointer to driver soft state (unit) structure for
18936  *			this target.
18937  *		flag  - SD_REMOVAL_ALLOW
18938  *			SD_REMOVAL_PREVENT
18939  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
18940  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
18941  *			to use the USCSI "direct" chain and bypass the normal
18942  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
18943  *			command is issued as part of an error recovery action.
18944  *
18945  * Return Code: 0   - Success
18946  *		errno return code from sd_send_scsi_cmd()
18947  *
18948  *     Context: Can sleep.
18949  */
18950 
18951 static int
18952 sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag)
18953 {
18954 	union scsi_cdb		cdb;
18955 	struct uscsi_cmd	ucmd_buf;
18956 	struct scsi_extended_sense	sense_buf;
18957 	int			status;
18958 
18959 	ASSERT(un != NULL);
18960 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18961 
18962 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
18963 
18964 	/* already determined doorlock is not supported, fake success */
18965 	if (un->un_f_doorlock_supported == FALSE) {
18966 		return (0);
18967 	}
18968 
18969 	bzero(&cdb, sizeof (cdb));
18970 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18971 
18972 	cdb.scc_cmd = SCMD_DOORLOCK;
18973 	cdb.cdb_opaque[4] = (uchar_t)flag;
18974 
18975 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18976 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
18977 	ucmd_buf.uscsi_bufaddr	= NULL;
18978 	ucmd_buf.uscsi_buflen	= 0;
18979 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18980 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
18981 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
18982 	ucmd_buf.uscsi_timeout	= 15;
18983 
18984 	SD_TRACE(SD_LOG_IO, un,
18985 	    "sd_send_scsi_DOORLOCK: returning sd_send_scsi_cmd()\n");
18986 
18987 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
18988 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
18989 
18990 	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
18991 	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
18992 	    (sense_buf.es_key == KEY_ILLEGAL_REQUEST)) {
18993 		/* fake success and skip subsequent doorlock commands */
18994 		un->un_f_doorlock_supported = FALSE;
18995 		return (0);
18996 	}
18997 
18998 	return (status);
18999 }
19000 
19001 /*
19002  *    Function: sd_send_scsi_READ_CAPACITY
19003  *
19004  * Description: This routine uses the scsi READ CAPACITY command to determine
19005  *		the device capacity in number of blocks and the device native
19006  *		block size. If this function returns a failure, then the
19007  *		values in *capp and *lbap are undefined.  If the capacity
19008  *		returned is 0xffffffff then the lun is too large for a
19009  *		normal READ CAPACITY command and the results of a
19010  *		READ CAPACITY 16 will be used instead.
19011  *
19012  *   Arguments: un   - ptr to soft state struct for the target
19013  *		capp - ptr to unsigned 64-bit variable to receive the
19014  *			capacity value from the command.
19015  *		lbap - ptr to unsigned 32-bit varaible to receive the
19016  *			block size value from the command
19017  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19018  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19019  *			to use the USCSI "direct" chain and bypass the normal
19020  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19021  *			command is issued as part of an error recovery action.
19022  *
19023  * Return Code: 0   - Success
19024  *		EIO - IO error
19025  *		EACCES - Reservation conflict detected
19026  *		EAGAIN - Device is becoming ready
19027  *		errno return code from sd_send_scsi_cmd()
19028  *
19029  *     Context: Can sleep.  Blocks until command completes.
19030  */
19031 
19032 #define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
19033 
19034 static int
19035 sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp, uint32_t *lbap,
19036 	int path_flag)
19037 {
19038 	struct	scsi_extended_sense	sense_buf;
19039 	struct	uscsi_cmd	ucmd_buf;
19040 	union	scsi_cdb	cdb;
19041 	uint32_t		*capacity_buf;
19042 	uint64_t		capacity;
19043 	uint32_t		lbasize;
19044 	int			status;
19045 
19046 	ASSERT(un != NULL);
19047 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19048 	ASSERT(capp != NULL);
19049 	ASSERT(lbap != NULL);
19050 
19051 	SD_TRACE(SD_LOG_IO, un,
19052 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19053 
19054 	/*
19055 	 * First send a READ_CAPACITY command to the target.
19056 	 * (This command is mandatory under SCSI-2.)
19057 	 *
19058 	 * Set up the CDB for the READ_CAPACITY command.  The Partial
19059 	 * Medium Indicator bit is cleared.  The address field must be
19060 	 * zero if the PMI bit is zero.
19061 	 */
19062 	bzero(&cdb, sizeof (cdb));
19063 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19064 
19065 	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
19066 
19067 	cdb.scc_cmd = SCMD_READ_CAPACITY;
19068 
19069 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19070 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19071 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
19072 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
19073 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19074 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19075 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19076 	ucmd_buf.uscsi_timeout	= 60;
19077 
19078 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19079 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19080 
19081 	switch (status) {
19082 	case 0:
19083 		/* Return failure if we did not get valid capacity data. */
19084 		if (ucmd_buf.uscsi_resid != 0) {
19085 			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19086 			return (EIO);
19087 		}
19088 
19089 		/*
19090 		 * Read capacity and block size from the READ CAPACITY 10 data.
19091 		 * This data may be adjusted later due to device specific
19092 		 * issues.
19093 		 *
19094 		 * According to the SCSI spec, the READ CAPACITY 10
19095 		 * command returns the following:
19096 		 *
19097 		 *  bytes 0-3: Maximum logical block address available.
19098 		 *		(MSB in byte:0 & LSB in byte:3)
19099 		 *
19100 		 *  bytes 4-7: Block length in bytes
19101 		 *		(MSB in byte:4 & LSB in byte:7)
19102 		 *
19103 		 */
19104 		capacity = BE_32(capacity_buf[0]);
19105 		lbasize = BE_32(capacity_buf[1]);
19106 
19107 		/*
19108 		 * Done with capacity_buf
19109 		 */
19110 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19111 
19112 		/*
19113 		 * if the reported capacity is set to all 0xf's, then
19114 		 * this disk is too large and requires SBC-2 commands.
19115 		 * Reissue the request using READ CAPACITY 16.
19116 		 */
19117 		if (capacity == 0xffffffff) {
19118 			status = sd_send_scsi_READ_CAPACITY_16(un, &capacity,
19119 			    &lbasize, path_flag);
19120 			if (status != 0) {
19121 				return (status);
19122 			}
19123 		}
19124 		break;	/* Success! */
19125 	case EIO:
19126 		switch (ucmd_buf.uscsi_status) {
19127 		case STATUS_RESERVATION_CONFLICT:
19128 			status = EACCES;
19129 			break;
19130 		case STATUS_CHECK:
19131 			/*
19132 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19133 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19134 			 */
19135 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19136 			    (sense_buf.es_add_code  == 0x04) &&
19137 			    (sense_buf.es_qual_code == 0x01)) {
19138 				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19139 				return (EAGAIN);
19140 			}
19141 			break;
19142 		default:
19143 			break;
19144 		}
19145 		/* FALLTHRU */
19146 	default:
19147 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19148 		return (status);
19149 	}
19150 
19151 	/*
19152 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
19153 	 * (2352 and 0 are common) so for these devices always force the value
19154 	 * to 2048 as required by the ATAPI specs.
19155 	 */
19156 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
19157 		lbasize = 2048;
19158 	}
19159 
19160 	/*
19161 	 * Get the maximum LBA value from the READ CAPACITY data.
19162 	 * Here we assume that the Partial Medium Indicator (PMI) bit
19163 	 * was cleared when issuing the command. This means that the LBA
19164 	 * returned from the device is the LBA of the last logical block
19165 	 * on the logical unit.  The actual logical block count will be
19166 	 * this value plus one.
19167 	 *
19168 	 * Currently the capacity is saved in terms of un->un_sys_blocksize,
19169 	 * so scale the capacity value to reflect this.
19170 	 */
19171 	capacity = (capacity + 1) * (lbasize / un->un_sys_blocksize);
19172 
19173 #if defined(__i386) || defined(__amd64)
19174 	/*
19175 	 * On x86, compensate for off-by-1 error (number of sectors on
19176 	 * media)  (1175930)
19177 	 */
19178 	if (!un->un_f_has_removable_media && !un->un_f_is_hotpluggable &&
19179 	    (lbasize == un->un_sys_blocksize)) {
19180 		capacity -= 1;
19181 	}
19182 #endif
19183 
19184 	/*
19185 	 * Copy the values from the READ CAPACITY command into the space
19186 	 * provided by the caller.
19187 	 */
19188 	*capp = capacity;
19189 	*lbap = lbasize;
19190 
19191 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
19192 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19193 
19194 	/*
19195 	 * Both the lbasize and capacity from the device must be nonzero,
19196 	 * otherwise we assume that the values are not valid and return
19197 	 * failure to the caller. (4203735)
19198 	 */
19199 	if ((capacity == 0) || (lbasize == 0)) {
19200 		return (EIO);
19201 	}
19202 
19203 	return (0);
19204 }
19205 
19206 /*
19207  *    Function: sd_send_scsi_READ_CAPACITY_16
19208  *
19209  * Description: This routine uses the scsi READ CAPACITY 16 command to
19210  *		determine the device capacity in number of blocks and the
19211  *		device native block size.  If this function returns a failure,
19212  *		then the values in *capp and *lbap are undefined.
19213  *		This routine should always be called by
19214  *		sd_send_scsi_READ_CAPACITY which will appy any device
19215  *		specific adjustments to capacity and lbasize.
19216  *
19217  *   Arguments: un   - ptr to soft state struct for the target
19218  *		capp - ptr to unsigned 64-bit variable to receive the
19219  *			capacity value from the command.
19220  *		lbap - ptr to unsigned 32-bit varaible to receive the
19221  *			block size value from the command
19222  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19223  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19224  *			to use the USCSI "direct" chain and bypass the normal
19225  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
19226  *			this command is issued as part of an error recovery
19227  *			action.
19228  *
19229  * Return Code: 0   - Success
19230  *		EIO - IO error
19231  *		EACCES - Reservation conflict detected
19232  *		EAGAIN - Device is becoming ready
19233  *		errno return code from sd_send_scsi_cmd()
19234  *
19235  *     Context: Can sleep.  Blocks until command completes.
19236  */
19237 
19238 #define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
19239 
19240 static int
19241 sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
19242 	uint32_t *lbap, int path_flag)
19243 {
19244 	struct	scsi_extended_sense	sense_buf;
19245 	struct	uscsi_cmd	ucmd_buf;
19246 	union	scsi_cdb	cdb;
19247 	uint64_t		*capacity16_buf;
19248 	uint64_t		capacity;
19249 	uint32_t		lbasize;
19250 	int			status;
19251 
19252 	ASSERT(un != NULL);
19253 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19254 	ASSERT(capp != NULL);
19255 	ASSERT(lbap != NULL);
19256 
19257 	SD_TRACE(SD_LOG_IO, un,
19258 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19259 
19260 	/*
19261 	 * First send a READ_CAPACITY_16 command to the target.
19262 	 *
19263 	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
19264 	 * Medium Indicator bit is cleared.  The address field must be
19265 	 * zero if the PMI bit is zero.
19266 	 */
19267 	bzero(&cdb, sizeof (cdb));
19268 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19269 
19270 	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
19271 
19272 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19273 	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
19274 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
19275 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
19276 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19277 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19278 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19279 	ucmd_buf.uscsi_timeout	= 60;
19280 
19281 	/*
19282 	 * Read Capacity (16) is a Service Action In command.  One
19283 	 * command byte (0x9E) is overloaded for multiple operations,
19284 	 * with the second CDB byte specifying the desired operation
19285 	 */
19286 	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
19287 	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
19288 
19289 	/*
19290 	 * Fill in allocation length field
19291 	 */
19292 	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
19293 
19294 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19295 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19296 
19297 	switch (status) {
19298 	case 0:
19299 		/* Return failure if we did not get valid capacity data. */
19300 		if (ucmd_buf.uscsi_resid > 20) {
19301 			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19302 			return (EIO);
19303 		}
19304 
19305 		/*
19306 		 * Read capacity and block size from the READ CAPACITY 10 data.
19307 		 * This data may be adjusted later due to device specific
19308 		 * issues.
19309 		 *
19310 		 * According to the SCSI spec, the READ CAPACITY 10
19311 		 * command returns the following:
19312 		 *
19313 		 *  bytes 0-7: Maximum logical block address available.
19314 		 *		(MSB in byte:0 & LSB in byte:7)
19315 		 *
19316 		 *  bytes 8-11: Block length in bytes
19317 		 *		(MSB in byte:8 & LSB in byte:11)
19318 		 *
19319 		 */
19320 		capacity = BE_64(capacity16_buf[0]);
19321 		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
19322 
19323 		/*
19324 		 * Done with capacity16_buf
19325 		 */
19326 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19327 
19328 		/*
19329 		 * if the reported capacity is set to all 0xf's, then
19330 		 * this disk is too large.  This could only happen with
19331 		 * a device that supports LBAs larger than 64 bits which
19332 		 * are not defined by any current T10 standards.
19333 		 */
19334 		if (capacity == 0xffffffffffffffff) {
19335 			return (EIO);
19336 		}
19337 		break;	/* Success! */
19338 	case EIO:
19339 		switch (ucmd_buf.uscsi_status) {
19340 		case STATUS_RESERVATION_CONFLICT:
19341 			status = EACCES;
19342 			break;
19343 		case STATUS_CHECK:
19344 			/*
19345 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19346 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19347 			 */
19348 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19349 			    (sense_buf.es_add_code  == 0x04) &&
19350 			    (sense_buf.es_qual_code == 0x01)) {
19351 				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19352 				return (EAGAIN);
19353 			}
19354 			break;
19355 		default:
19356 			break;
19357 		}
19358 		/* FALLTHRU */
19359 	default:
19360 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19361 		return (status);
19362 	}
19363 
19364 	*capp = capacity;
19365 	*lbap = lbasize;
19366 
19367 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
19368 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19369 
19370 	return (0);
19371 }
19372 
19373 
19374 /*
19375  *    Function: sd_send_scsi_START_STOP_UNIT
19376  *
19377  * Description: Issue a scsi START STOP UNIT command to the target.
19378  *
19379  *   Arguments: un    - pointer to driver soft state (unit) structure for
19380  *			this target.
19381  *		flag  - SD_TARGET_START
19382  *			SD_TARGET_STOP
19383  *			SD_TARGET_EJECT
19384  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19385  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19386  *			to use the USCSI "direct" chain and bypass the normal
19387  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19388  *			command is issued as part of an error recovery action.
19389  *
19390  * Return Code: 0   - Success
19391  *		EIO - IO error
19392  *		EACCES - Reservation conflict detected
19393  *		ENXIO  - Not Ready, medium not present
19394  *		errno return code from sd_send_scsi_cmd()
19395  *
19396  *     Context: Can sleep.
19397  */
19398 
19399 static int
19400 sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag, int path_flag)
19401 {
19402 	struct	scsi_extended_sense	sense_buf;
19403 	union scsi_cdb		cdb;
19404 	struct uscsi_cmd	ucmd_buf;
19405 	int			status;
19406 
19407 	ASSERT(un != NULL);
19408 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19409 
19410 	SD_TRACE(SD_LOG_IO, un,
19411 	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
19412 
19413 	if (un->un_f_check_start_stop &&
19414 	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
19415 	    (un->un_f_start_stop_supported != TRUE)) {
19416 		return (0);
19417 	}
19418 
19419 	bzero(&cdb, sizeof (cdb));
19420 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19421 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19422 
19423 	cdb.scc_cmd = SCMD_START_STOP;
19424 	cdb.cdb_opaque[4] = (uchar_t)flag;
19425 
19426 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19427 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19428 	ucmd_buf.uscsi_bufaddr	= NULL;
19429 	ucmd_buf.uscsi_buflen	= 0;
19430 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19431 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19432 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19433 	ucmd_buf.uscsi_timeout	= 200;
19434 
19435 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19436 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19437 
19438 	switch (status) {
19439 	case 0:
19440 		break;	/* Success! */
19441 	case EIO:
19442 		switch (ucmd_buf.uscsi_status) {
19443 		case STATUS_RESERVATION_CONFLICT:
19444 			status = EACCES;
19445 			break;
19446 		case STATUS_CHECK:
19447 			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
19448 				switch (sense_buf.es_key) {
19449 				case KEY_ILLEGAL_REQUEST:
19450 					status = ENOTSUP;
19451 					break;
19452 				case KEY_NOT_READY:
19453 					if (sense_buf.es_add_code == 0x3A) {
19454 						status = ENXIO;
19455 					}
19456 					break;
19457 				default:
19458 					break;
19459 				}
19460 			}
19461 			break;
19462 		default:
19463 			break;
19464 		}
19465 		break;
19466 	default:
19467 		break;
19468 	}
19469 
19470 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
19471 
19472 	return (status);
19473 }
19474 
19475 
19476 /*
19477  *    Function: sd_start_stop_unit_callback
19478  *
19479  * Description: timeout(9F) callback to begin recovery process for a
19480  *		device that has spun down.
19481  *
19482  *   Arguments: arg - pointer to associated softstate struct.
19483  *
19484  *     Context: Executes in a timeout(9F) thread context
19485  */
19486 
19487 static void
19488 sd_start_stop_unit_callback(void *arg)
19489 {
19490 	struct sd_lun	*un = arg;
19491 	ASSERT(un != NULL);
19492 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19493 
19494 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
19495 
19496 	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
19497 }
19498 
19499 
19500 /*
19501  *    Function: sd_start_stop_unit_task
19502  *
19503  * Description: Recovery procedure when a drive is spun down.
19504  *
19505  *   Arguments: arg - pointer to associated softstate struct.
19506  *
19507  *     Context: Executes in a taskq() thread context
19508  */
19509 
19510 static void
19511 sd_start_stop_unit_task(void *arg)
19512 {
19513 	struct sd_lun	*un = arg;
19514 
19515 	ASSERT(un != NULL);
19516 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19517 
19518 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
19519 
19520 	/*
19521 	 * Some unformatted drives report not ready error, no need to
19522 	 * restart if format has been initiated.
19523 	 */
19524 	mutex_enter(SD_MUTEX(un));
19525 	if (un->un_f_format_in_progress == TRUE) {
19526 		mutex_exit(SD_MUTEX(un));
19527 		return;
19528 	}
19529 	mutex_exit(SD_MUTEX(un));
19530 
19531 	/*
19532 	 * When a START STOP command is issued from here, it is part of a
19533 	 * failure recovery operation and must be issued before any other
19534 	 * commands, including any pending retries. Thus it must be sent
19535 	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
19536 	 * succeeds or not, we will start I/O after the attempt.
19537 	 */
19538 	(void) sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
19539 	    SD_PATH_DIRECT_PRIORITY);
19540 
19541 	/*
19542 	 * The above call blocks until the START_STOP_UNIT command completes.
19543 	 * Now that it has completed, we must re-try the original IO that
19544 	 * received the NOT READY condition in the first place. There are
19545 	 * three possible conditions here:
19546 	 *
19547 	 *  (1) The original IO is on un_retry_bp.
19548 	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
19549 	 *	is NULL.
19550 	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
19551 	 *	points to some other, unrelated bp.
19552 	 *
19553 	 * For each case, we must call sd_start_cmds() with un_retry_bp
19554 	 * as the argument. If un_retry_bp is NULL, this will initiate
19555 	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
19556 	 * then this will process the bp on un_retry_bp. That may or may not
19557 	 * be the original IO, but that does not matter: the important thing
19558 	 * is to keep the IO processing going at this point.
19559 	 *
19560 	 * Note: This is a very specific error recovery sequence associated
19561 	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
19562 	 * serialize the I/O with completion of the spin-up.
19563 	 */
19564 	mutex_enter(SD_MUTEX(un));
19565 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19566 	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
19567 	    un, un->un_retry_bp);
19568 	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
19569 	sd_start_cmds(un, un->un_retry_bp);
19570 	mutex_exit(SD_MUTEX(un));
19571 
19572 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
19573 }
19574 
19575 
19576 /*
19577  *    Function: sd_send_scsi_INQUIRY
19578  *
19579  * Description: Issue the scsi INQUIRY command.
19580  *
19581  *   Arguments: un
19582  *		bufaddr
19583  *		buflen
19584  *		evpd
19585  *		page_code
19586  *		page_length
19587  *
19588  * Return Code: 0   - Success
19589  *		errno return code from sd_send_scsi_cmd()
19590  *
19591  *     Context: Can sleep. Does not return until command is completed.
19592  */
19593 
19594 static int
19595 sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr, size_t buflen,
19596 	uchar_t evpd, uchar_t page_code, size_t *residp)
19597 {
19598 	union scsi_cdb		cdb;
19599 	struct uscsi_cmd	ucmd_buf;
19600 	int			status;
19601 
19602 	ASSERT(un != NULL);
19603 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19604 	ASSERT(bufaddr != NULL);
19605 
19606 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
19607 
19608 	bzero(&cdb, sizeof (cdb));
19609 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19610 	bzero(bufaddr, buflen);
19611 
19612 	cdb.scc_cmd = SCMD_INQUIRY;
19613 	cdb.cdb_opaque[1] = evpd;
19614 	cdb.cdb_opaque[2] = page_code;
19615 	FORMG0COUNT(&cdb, buflen);
19616 
19617 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19618 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19619 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
19620 	ucmd_buf.uscsi_buflen	= buflen;
19621 	ucmd_buf.uscsi_rqbuf	= NULL;
19622 	ucmd_buf.uscsi_rqlen	= 0;
19623 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
19624 	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
19625 
19626 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19627 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_DIRECT);
19628 
19629 	if ((status == 0) && (residp != NULL)) {
19630 		*residp = ucmd_buf.uscsi_resid;
19631 	}
19632 
19633 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
19634 
19635 	return (status);
19636 }
19637 
19638 
19639 /*
19640  *    Function: sd_send_scsi_TEST_UNIT_READY
19641  *
19642  * Description: Issue the scsi TEST UNIT READY command.
19643  *		This routine can be told to set the flag USCSI_DIAGNOSE to
19644  *		prevent retrying failed commands. Use this when the intent
19645  *		is either to check for device readiness, to clear a Unit
19646  *		Attention, or to clear any outstanding sense data.
19647  *		However under specific conditions the expected behavior
19648  *		is for retries to bring a device ready, so use the flag
19649  *		with caution.
19650  *
19651  *   Arguments: un
19652  *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
19653  *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
19654  *			0: dont check for media present, do retries on cmd.
19655  *
19656  * Return Code: 0   - Success
19657  *		EIO - IO error
19658  *		EACCES - Reservation conflict detected
19659  *		ENXIO  - Not Ready, medium not present
19660  *		errno return code from sd_send_scsi_cmd()
19661  *
19662  *     Context: Can sleep. Does not return until command is completed.
19663  */
19664 
19665 static int
19666 sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag)
19667 {
19668 	struct	scsi_extended_sense	sense_buf;
19669 	union scsi_cdb		cdb;
19670 	struct uscsi_cmd	ucmd_buf;
19671 	int			status;
19672 
19673 	ASSERT(un != NULL);
19674 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19675 
19676 	SD_TRACE(SD_LOG_IO, un,
19677 	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
19678 
19679 	/*
19680 	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
19681 	 * timeouts when they receive a TUR and the queue is not empty. Check
19682 	 * the configuration flag set during attach (indicating the drive has
19683 	 * this firmware bug) and un_ncmds_in_transport before issuing the
19684 	 * TUR. If there are
19685 	 * pending commands return success, this is a bit arbitrary but is ok
19686 	 * for non-removables (i.e. the eliteI disks) and non-clustering
19687 	 * configurations.
19688 	 */
19689 	if (un->un_f_cfg_tur_check == TRUE) {
19690 		mutex_enter(SD_MUTEX(un));
19691 		if (un->un_ncmds_in_transport != 0) {
19692 			mutex_exit(SD_MUTEX(un));
19693 			return (0);
19694 		}
19695 		mutex_exit(SD_MUTEX(un));
19696 	}
19697 
19698 	bzero(&cdb, sizeof (cdb));
19699 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19700 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19701 
19702 	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
19703 
19704 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19705 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19706 	ucmd_buf.uscsi_bufaddr	= NULL;
19707 	ucmd_buf.uscsi_buflen	= 0;
19708 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19709 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19710 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19711 
19712 	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
19713 	if ((flag & SD_DONT_RETRY_TUR) != 0) {
19714 		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
19715 	}
19716 	ucmd_buf.uscsi_timeout	= 60;
19717 
19718 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19719 	    UIO_SYSSPACE, UIO_SYSSPACE,
19720 	    ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT : SD_PATH_STANDARD));
19721 
19722 	switch (status) {
19723 	case 0:
19724 		break;	/* Success! */
19725 	case EIO:
19726 		switch (ucmd_buf.uscsi_status) {
19727 		case STATUS_RESERVATION_CONFLICT:
19728 			status = EACCES;
19729 			break;
19730 		case STATUS_CHECK:
19731 			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
19732 				break;
19733 			}
19734 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19735 			    (sense_buf.es_key == KEY_NOT_READY) &&
19736 			    (sense_buf.es_add_code == 0x3A)) {
19737 				status = ENXIO;
19738 			}
19739 			break;
19740 		default:
19741 			break;
19742 		}
19743 		break;
19744 	default:
19745 		break;
19746 	}
19747 
19748 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
19749 
19750 	return (status);
19751 }
19752 
19753 
19754 /*
19755  *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
19756  *
19757  * Description: Issue the scsi PERSISTENT RESERVE IN command.
19758  *
19759  *   Arguments: un
19760  *
19761  * Return Code: 0   - Success
19762  *		EACCES
19763  *		ENOTSUP
19764  *		errno return code from sd_send_scsi_cmd()
19765  *
19766  *     Context: Can sleep. Does not return until command is completed.
19767  */
19768 
19769 static int
19770 sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un, uchar_t  usr_cmd,
19771 	uint16_t data_len, uchar_t *data_bufp)
19772 {
19773 	struct scsi_extended_sense	sense_buf;
19774 	union scsi_cdb		cdb;
19775 	struct uscsi_cmd	ucmd_buf;
19776 	int			status;
19777 	int			no_caller_buf = FALSE;
19778 
19779 	ASSERT(un != NULL);
19780 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19781 	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
19782 
19783 	SD_TRACE(SD_LOG_IO, un,
19784 	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
19785 
19786 	bzero(&cdb, sizeof (cdb));
19787 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19788 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19789 	if (data_bufp == NULL) {
19790 		/* Allocate a default buf if the caller did not give one */
19791 		ASSERT(data_len == 0);
19792 		data_len  = MHIOC_RESV_KEY_SIZE;
19793 		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
19794 		no_caller_buf = TRUE;
19795 	}
19796 
19797 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
19798 	cdb.cdb_opaque[1] = usr_cmd;
19799 	FORMG1COUNT(&cdb, data_len);
19800 
19801 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19802 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19803 	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
19804 	ucmd_buf.uscsi_buflen	= data_len;
19805 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19806 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19807 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19808 	ucmd_buf.uscsi_timeout	= 60;
19809 
19810 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19811 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
19812 
19813 	switch (status) {
19814 	case 0:
19815 		break;	/* Success! */
19816 	case EIO:
19817 		switch (ucmd_buf.uscsi_status) {
19818 		case STATUS_RESERVATION_CONFLICT:
19819 			status = EACCES;
19820 			break;
19821 		case STATUS_CHECK:
19822 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19823 			    (sense_buf.es_key == KEY_ILLEGAL_REQUEST)) {
19824 				status = ENOTSUP;
19825 			}
19826 			break;
19827 		default:
19828 			break;
19829 		}
19830 		break;
19831 	default:
19832 		break;
19833 	}
19834 
19835 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
19836 
19837 	if (no_caller_buf == TRUE) {
19838 		kmem_free(data_bufp, data_len);
19839 	}
19840 
19841 	return (status);
19842 }
19843 
19844 
19845 /*
19846  *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
19847  *
19848  * Description: This routine is the driver entry point for handling CD-ROM
19849  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
19850  *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
19851  *		device.
19852  *
19853  *   Arguments: un  -   Pointer to soft state struct for the target.
19854  *		usr_cmd SCSI-3 reservation facility command (one of
19855  *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
19856  *			SD_SCSI3_PREEMPTANDABORT)
19857  *		usr_bufp - user provided pointer register, reserve descriptor or
19858  *			preempt and abort structure (mhioc_register_t,
19859  *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
19860  *
19861  * Return Code: 0   - Success
19862  *		EACCES
19863  *		ENOTSUP
19864  *		errno return code from sd_send_scsi_cmd()
19865  *
19866  *     Context: Can sleep. Does not return until command is completed.
19867  */
19868 
19869 static int
19870 sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un, uchar_t usr_cmd,
19871 	uchar_t	*usr_bufp)
19872 {
19873 	struct scsi_extended_sense	sense_buf;
19874 	union scsi_cdb		cdb;
19875 	struct uscsi_cmd	ucmd_buf;
19876 	int			status;
19877 	uchar_t			data_len = sizeof (sd_prout_t);
19878 	sd_prout_t		*prp;
19879 
19880 	ASSERT(un != NULL);
19881 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19882 	ASSERT(data_len == 24);	/* required by scsi spec */
19883 
19884 	SD_TRACE(SD_LOG_IO, un,
19885 	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
19886 
19887 	if (usr_bufp == NULL) {
19888 		return (EINVAL);
19889 	}
19890 
19891 	bzero(&cdb, sizeof (cdb));
19892 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19893 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19894 	prp = kmem_zalloc(data_len, KM_SLEEP);
19895 
19896 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
19897 	cdb.cdb_opaque[1] = usr_cmd;
19898 	FORMG1COUNT(&cdb, data_len);
19899 
19900 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19901 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19902 	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
19903 	ucmd_buf.uscsi_buflen	= data_len;
19904 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19905 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19906 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
19907 	ucmd_buf.uscsi_timeout	= 60;
19908 
19909 	switch (usr_cmd) {
19910 	case SD_SCSI3_REGISTER: {
19911 		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
19912 
19913 		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
19914 		bcopy(ptr->newkey.key, prp->service_key,
19915 		    MHIOC_RESV_KEY_SIZE);
19916 		prp->aptpl = ptr->aptpl;
19917 		break;
19918 	}
19919 	case SD_SCSI3_RESERVE:
19920 	case SD_SCSI3_RELEASE: {
19921 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
19922 
19923 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
19924 		prp->scope_address = BE_32(ptr->scope_specific_addr);
19925 		cdb.cdb_opaque[2] = ptr->type;
19926 		break;
19927 	}
19928 	case SD_SCSI3_PREEMPTANDABORT: {
19929 		mhioc_preemptandabort_t *ptr =
19930 		    (mhioc_preemptandabort_t *)usr_bufp;
19931 
19932 		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
19933 		bcopy(ptr->victim_key.key, prp->service_key,
19934 		    MHIOC_RESV_KEY_SIZE);
19935 		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
19936 		cdb.cdb_opaque[2] = ptr->resvdesc.type;
19937 		ucmd_buf.uscsi_flags |= USCSI_HEAD;
19938 		break;
19939 	}
19940 	case SD_SCSI3_REGISTERANDIGNOREKEY:
19941 	{
19942 		mhioc_registerandignorekey_t *ptr;
19943 		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
19944 		bcopy(ptr->newkey.key,
19945 		    prp->service_key, MHIOC_RESV_KEY_SIZE);
19946 		prp->aptpl = ptr->aptpl;
19947 		break;
19948 	}
19949 	default:
19950 		ASSERT(FALSE);
19951 		break;
19952 	}
19953 
19954 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19955 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
19956 
19957 	switch (status) {
19958 	case 0:
19959 		break;	/* Success! */
19960 	case EIO:
19961 		switch (ucmd_buf.uscsi_status) {
19962 		case STATUS_RESERVATION_CONFLICT:
19963 			status = EACCES;
19964 			break;
19965 		case STATUS_CHECK:
19966 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19967 			    (sense_buf.es_key == KEY_ILLEGAL_REQUEST)) {
19968 				status = ENOTSUP;
19969 			}
19970 			break;
19971 		default:
19972 			break;
19973 		}
19974 		break;
19975 	default:
19976 		break;
19977 	}
19978 
19979 	kmem_free(prp, data_len);
19980 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
19981 	return (status);
19982 }
19983 
19984 
19985 /*
19986  *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
19987  *
19988  * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
19989  *
19990  *   Arguments: un - pointer to the target's soft state struct
19991  *
19992  * Return Code: 0 - success
19993  *		errno-type error code
19994  *
19995  *     Context: kernel thread context only.
19996  */
19997 
19998 static int
19999 sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
20000 {
20001 	struct sd_uscsi_info	*uip;
20002 	struct uscsi_cmd	*uscmd;
20003 	union scsi_cdb		*cdb;
20004 	struct buf		*bp;
20005 	int			rval = 0;
20006 
20007 	SD_TRACE(SD_LOG_IO, un,
20008 	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
20009 
20010 	ASSERT(un != NULL);
20011 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20012 
20013 	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
20014 	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
20015 
20016 	/*
20017 	 * First get some memory for the uscsi_cmd struct and cdb
20018 	 * and initialize for SYNCHRONIZE_CACHE cmd.
20019 	 */
20020 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
20021 	uscmd->uscsi_cdblen = CDB_GROUP1;
20022 	uscmd->uscsi_cdb = (caddr_t)cdb;
20023 	uscmd->uscsi_bufaddr = NULL;
20024 	uscmd->uscsi_buflen = 0;
20025 	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
20026 	uscmd->uscsi_rqlen = SENSE_LENGTH;
20027 	uscmd->uscsi_rqresid = SENSE_LENGTH;
20028 	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
20029 	uscmd->uscsi_timeout = sd_io_time;
20030 
20031 	/*
20032 	 * Allocate an sd_uscsi_info struct and fill it with the info
20033 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
20034 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
20035 	 * since we allocate the buf here in this function, we do not
20036 	 * need to preserve the prior contents of b_private.
20037 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
20038 	 */
20039 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
20040 	uip->ui_flags = SD_PATH_DIRECT;
20041 	uip->ui_cmdp  = uscmd;
20042 
20043 	bp = getrbuf(KM_SLEEP);
20044 	bp->b_private = uip;
20045 
20046 	/*
20047 	 * Setup buffer to carry uscsi request.
20048 	 */
20049 	bp->b_flags  = B_BUSY;
20050 	bp->b_bcount = 0;
20051 	bp->b_blkno  = 0;
20052 
20053 	if (dkc != NULL) {
20054 		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
20055 		uip->ui_dkc = *dkc;
20056 	}
20057 
20058 	bp->b_edev = SD_GET_DEV(un);
20059 	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
20060 
20061 	(void) sd_uscsi_strategy(bp);
20062 
20063 	/*
20064 	 * If synchronous request, wait for completion
20065 	 * If async just return and let b_iodone callback
20066 	 * cleanup.
20067 	 * NOTE: On return, u_ncmds_in_driver will be decremented,
20068 	 * but it was also incremented in sd_uscsi_strategy(), so
20069 	 * we should be ok.
20070 	 */
20071 	if (dkc == NULL) {
20072 		(void) biowait(bp);
20073 		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
20074 	}
20075 
20076 	return (rval);
20077 }
20078 
20079 
20080 static int
20081 sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
20082 {
20083 	struct sd_uscsi_info *uip;
20084 	struct uscsi_cmd *uscmd;
20085 	struct scsi_extended_sense *sense_buf;
20086 	struct sd_lun *un;
20087 	int status;
20088 
20089 	uip = (struct sd_uscsi_info *)(bp->b_private);
20090 	ASSERT(uip != NULL);
20091 
20092 	uscmd = uip->ui_cmdp;
20093 	ASSERT(uscmd != NULL);
20094 
20095 	sense_buf = (struct scsi_extended_sense *)uscmd->uscsi_rqbuf;
20096 	ASSERT(sense_buf != NULL);
20097 
20098 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
20099 	ASSERT(un != NULL);
20100 
20101 	status = geterror(bp);
20102 	switch (status) {
20103 	case 0:
20104 		break;	/* Success! */
20105 	case EIO:
20106 		switch (uscmd->uscsi_status) {
20107 		case STATUS_RESERVATION_CONFLICT:
20108 			/* Ignore reservation conflict */
20109 			status = 0;
20110 			goto done;
20111 
20112 		case STATUS_CHECK:
20113 			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
20114 			    (sense_buf->es_key == KEY_ILLEGAL_REQUEST)) {
20115 				/* Ignore Illegal Request error */
20116 				mutex_enter(SD_MUTEX(un));
20117 				un->un_f_sync_cache_supported = FALSE;
20118 				mutex_exit(SD_MUTEX(un));
20119 				status = ENOTSUP;
20120 				goto done;
20121 			}
20122 			break;
20123 		default:
20124 			break;
20125 		}
20126 		/* FALLTHRU */
20127 	default:
20128 		/* Ignore error if the media is not present */
20129 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
20130 			status = 0;
20131 			goto done;
20132 		}
20133 		/* If we reach this, we had an error */
20134 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
20135 		    "SYNCHRONIZE CACHE command failed (%d)\n", status);
20136 		break;
20137 	}
20138 
20139 done:
20140 	if (uip->ui_dkc.dkc_callback != NULL) {
20141 		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
20142 	}
20143 
20144 	ASSERT((bp->b_flags & B_REMAPPED) == 0);
20145 	freerbuf(bp);
20146 	kmem_free(uip, sizeof (struct sd_uscsi_info));
20147 	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
20148 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
20149 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
20150 
20151 	return (status);
20152 }
20153 
20154 
20155 /*
20156  *    Function: sd_send_scsi_GET_CONFIGURATION
20157  *
20158  * Description: Issues the get configuration command to the device.
20159  *		Called from sd_check_for_writable_cd & sd_get_media_info
20160  *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
20161  *   Arguments: un
20162  *		ucmdbuf
20163  *		rqbuf
20164  *		rqbuflen
20165  *		bufaddr
20166  *		buflen
20167  *
20168  * Return Code: 0   - Success
20169  *		errno return code from sd_send_scsi_cmd()
20170  *
20171  *     Context: Can sleep. Does not return until command is completed.
20172  *
20173  */
20174 
20175 static int
20176 sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un, struct uscsi_cmd *ucmdbuf,
20177 	uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen)
20178 {
20179 	char	cdb[CDB_GROUP1];
20180 	int	status;
20181 
20182 	ASSERT(un != NULL);
20183 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20184 	ASSERT(bufaddr != NULL);
20185 	ASSERT(ucmdbuf != NULL);
20186 	ASSERT(rqbuf != NULL);
20187 
20188 	SD_TRACE(SD_LOG_IO, un,
20189 	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
20190 
20191 	bzero(cdb, sizeof (cdb));
20192 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20193 	bzero(rqbuf, rqbuflen);
20194 	bzero(bufaddr, buflen);
20195 
20196 	/*
20197 	 * Set up cdb field for the get configuration command.
20198 	 */
20199 	cdb[0] = SCMD_GET_CONFIGURATION;
20200 	cdb[1] = 0x02;  /* Requested Type */
20201 	cdb[8] = SD_PROFILE_HEADER_LEN;
20202 	ucmdbuf->uscsi_cdb = cdb;
20203 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20204 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20205 	ucmdbuf->uscsi_buflen = buflen;
20206 	ucmdbuf->uscsi_timeout = sd_io_time;
20207 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20208 	ucmdbuf->uscsi_rqlen = rqbuflen;
20209 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20210 
20211 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, UIO_SYSSPACE,
20212 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20213 
20214 	switch (status) {
20215 	case 0:
20216 		break;  /* Success! */
20217 	case EIO:
20218 		switch (ucmdbuf->uscsi_status) {
20219 		case STATUS_RESERVATION_CONFLICT:
20220 			status = EACCES;
20221 			break;
20222 		default:
20223 			break;
20224 		}
20225 		break;
20226 	default:
20227 		break;
20228 	}
20229 
20230 	if (status == 0) {
20231 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20232 		    "sd_send_scsi_GET_CONFIGURATION: data",
20233 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20234 	}
20235 
20236 	SD_TRACE(SD_LOG_IO, un,
20237 	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
20238 
20239 	return (status);
20240 }
20241 
20242 /*
20243  *    Function: sd_send_scsi_feature_GET_CONFIGURATION
20244  *
20245  * Description: Issues the get configuration command to the device to
20246  *              retrieve a specfic feature. Called from
20247  *		sd_check_for_writable_cd & sd_set_mmc_caps.
20248  *   Arguments: un
20249  *              ucmdbuf
20250  *              rqbuf
20251  *              rqbuflen
20252  *              bufaddr
20253  *              buflen
20254  *		feature
20255  *
20256  * Return Code: 0   - Success
20257  *              errno return code from sd_send_scsi_cmd()
20258  *
20259  *     Context: Can sleep. Does not return until command is completed.
20260  *
20261  */
20262 static int
20263 sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
20264 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
20265 	uchar_t *bufaddr, uint_t buflen, char feature)
20266 {
20267 	char    cdb[CDB_GROUP1];
20268 	int	status;
20269 
20270 	ASSERT(un != NULL);
20271 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20272 	ASSERT(bufaddr != NULL);
20273 	ASSERT(ucmdbuf != NULL);
20274 	ASSERT(rqbuf != NULL);
20275 
20276 	SD_TRACE(SD_LOG_IO, un,
20277 	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
20278 
20279 	bzero(cdb, sizeof (cdb));
20280 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20281 	bzero(rqbuf, rqbuflen);
20282 	bzero(bufaddr, buflen);
20283 
20284 	/*
20285 	 * Set up cdb field for the get configuration command.
20286 	 */
20287 	cdb[0] = SCMD_GET_CONFIGURATION;
20288 	cdb[1] = 0x02;  /* Requested Type */
20289 	cdb[3] = feature;
20290 	cdb[8] = buflen;
20291 	ucmdbuf->uscsi_cdb = cdb;
20292 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20293 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20294 	ucmdbuf->uscsi_buflen = buflen;
20295 	ucmdbuf->uscsi_timeout = sd_io_time;
20296 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20297 	ucmdbuf->uscsi_rqlen = rqbuflen;
20298 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20299 
20300 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, UIO_SYSSPACE,
20301 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20302 
20303 	switch (status) {
20304 	case 0:
20305 		break;  /* Success! */
20306 	case EIO:
20307 		switch (ucmdbuf->uscsi_status) {
20308 		case STATUS_RESERVATION_CONFLICT:
20309 			status = EACCES;
20310 			break;
20311 		default:
20312 			break;
20313 		}
20314 		break;
20315 	default:
20316 		break;
20317 	}
20318 
20319 	if (status == 0) {
20320 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20321 		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
20322 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20323 	}
20324 
20325 	SD_TRACE(SD_LOG_IO, un,
20326 	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
20327 
20328 	return (status);
20329 }
20330 
20331 
20332 /*
20333  *    Function: sd_send_scsi_MODE_SENSE
20334  *
20335  * Description: Utility function for issuing a scsi MODE SENSE command.
20336  *		Note: This routine uses a consistent implementation for Group0,
20337  *		Group1, and Group2 commands across all platforms. ATAPI devices
20338  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20339  *
20340  *   Arguments: un - pointer to the softstate struct for the target.
20341  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20342  *			  CDB_GROUP[1|2] (10 byte).
20343  *		bufaddr - buffer for page data retrieved from the target.
20344  *		buflen - size of page to be retrieved.
20345  *		page_code - page code of data to be retrieved from the target.
20346  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20347  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20348  *			to use the USCSI "direct" chain and bypass the normal
20349  *			command waitq.
20350  *
20351  * Return Code: 0   - Success
20352  *		errno return code from sd_send_scsi_cmd()
20353  *
20354  *     Context: Can sleep. Does not return until command is completed.
20355  */
20356 
20357 static int
20358 sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
20359 	size_t buflen,  uchar_t page_code, int path_flag)
20360 {
20361 	struct	scsi_extended_sense	sense_buf;
20362 	union scsi_cdb		cdb;
20363 	struct uscsi_cmd	ucmd_buf;
20364 	int			status;
20365 
20366 	ASSERT(un != NULL);
20367 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20368 	ASSERT(bufaddr != NULL);
20369 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20370 	    (cdbsize == CDB_GROUP2));
20371 
20372 	SD_TRACE(SD_LOG_IO, un,
20373 	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
20374 
20375 	bzero(&cdb, sizeof (cdb));
20376 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20377 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20378 	bzero(bufaddr, buflen);
20379 
20380 	if (cdbsize == CDB_GROUP0) {
20381 		cdb.scc_cmd = SCMD_MODE_SENSE;
20382 		cdb.cdb_opaque[2] = page_code;
20383 		FORMG0COUNT(&cdb, buflen);
20384 	} else {
20385 		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
20386 		cdb.cdb_opaque[2] = page_code;
20387 		FORMG1COUNT(&cdb, buflen);
20388 	}
20389 
20390 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20391 
20392 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20393 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20394 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20395 	ucmd_buf.uscsi_buflen	= buflen;
20396 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20397 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20398 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20399 	ucmd_buf.uscsi_timeout	= 60;
20400 
20401 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20402 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20403 
20404 	switch (status) {
20405 	case 0:
20406 		break;	/* Success! */
20407 	case EIO:
20408 		switch (ucmd_buf.uscsi_status) {
20409 		case STATUS_RESERVATION_CONFLICT:
20410 			status = EACCES;
20411 			break;
20412 		default:
20413 			break;
20414 		}
20415 		break;
20416 	default:
20417 		break;
20418 	}
20419 
20420 	if (status == 0) {
20421 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
20422 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20423 	}
20424 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
20425 
20426 	return (status);
20427 }
20428 
20429 
20430 /*
20431  *    Function: sd_send_scsi_MODE_SELECT
20432  *
20433  * Description: Utility function for issuing a scsi MODE SELECT command.
20434  *		Note: This routine uses a consistent implementation for Group0,
20435  *		Group1, and Group2 commands across all platforms. ATAPI devices
20436  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20437  *
20438  *   Arguments: un - pointer to the softstate struct for the target.
20439  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20440  *			  CDB_GROUP[1|2] (10 byte).
20441  *		bufaddr - buffer for page data retrieved from the target.
20442  *		buflen - size of page to be retrieved.
20443  *		save_page - boolean to determin if SP bit should be set.
20444  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20445  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20446  *			to use the USCSI "direct" chain and bypass the normal
20447  *			command waitq.
20448  *
20449  * Return Code: 0   - Success
20450  *		errno return code from sd_send_scsi_cmd()
20451  *
20452  *     Context: Can sleep. Does not return until command is completed.
20453  */
20454 
20455 static int
20456 sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
20457 	size_t buflen,  uchar_t save_page, int path_flag)
20458 {
20459 	struct	scsi_extended_sense	sense_buf;
20460 	union scsi_cdb		cdb;
20461 	struct uscsi_cmd	ucmd_buf;
20462 	int			status;
20463 
20464 	ASSERT(un != NULL);
20465 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20466 	ASSERT(bufaddr != NULL);
20467 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20468 	    (cdbsize == CDB_GROUP2));
20469 
20470 	SD_TRACE(SD_LOG_IO, un,
20471 	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
20472 
20473 	bzero(&cdb, sizeof (cdb));
20474 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20475 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20476 
20477 	/* Set the PF bit for many third party drives */
20478 	cdb.cdb_opaque[1] = 0x10;
20479 
20480 	/* Set the savepage(SP) bit if given */
20481 	if (save_page == SD_SAVE_PAGE) {
20482 		cdb.cdb_opaque[1] |= 0x01;
20483 	}
20484 
20485 	if (cdbsize == CDB_GROUP0) {
20486 		cdb.scc_cmd = SCMD_MODE_SELECT;
20487 		FORMG0COUNT(&cdb, buflen);
20488 	} else {
20489 		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
20490 		FORMG1COUNT(&cdb, buflen);
20491 	}
20492 
20493 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20494 
20495 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20496 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20497 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20498 	ucmd_buf.uscsi_buflen	= buflen;
20499 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20500 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20501 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20502 	ucmd_buf.uscsi_timeout	= 60;
20503 
20504 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20505 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20506 
20507 	switch (status) {
20508 	case 0:
20509 		break;	/* Success! */
20510 	case EIO:
20511 		switch (ucmd_buf.uscsi_status) {
20512 		case STATUS_RESERVATION_CONFLICT:
20513 			status = EACCES;
20514 			break;
20515 		default:
20516 			break;
20517 		}
20518 		break;
20519 	default:
20520 		break;
20521 	}
20522 
20523 	if (status == 0) {
20524 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
20525 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20526 	}
20527 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
20528 
20529 	return (status);
20530 }
20531 
20532 
20533 /*
20534  *    Function: sd_send_scsi_RDWR
20535  *
20536  * Description: Issue a scsi READ or WRITE command with the given parameters.
20537  *
20538  *   Arguments: un:      Pointer to the sd_lun struct for the target.
20539  *		cmd:	 SCMD_READ or SCMD_WRITE
20540  *		bufaddr: Address of caller's buffer to receive the RDWR data
20541  *		buflen:  Length of caller's buffer receive the RDWR data.
20542  *		start_block: Block number for the start of the RDWR operation.
20543  *			 (Assumes target-native block size.)
20544  *		residp:  Pointer to variable to receive the redisual of the
20545  *			 RDWR operation (may be NULL of no residual requested).
20546  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20547  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20548  *			to use the USCSI "direct" chain and bypass the normal
20549  *			command waitq.
20550  *
20551  * Return Code: 0   - Success
20552  *		errno return code from sd_send_scsi_cmd()
20553  *
20554  *     Context: Can sleep. Does not return until command is completed.
20555  */
20556 
20557 static int
20558 sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
20559 	size_t buflen, daddr_t start_block, int path_flag)
20560 {
20561 	struct	scsi_extended_sense	sense_buf;
20562 	union scsi_cdb		cdb;
20563 	struct uscsi_cmd	ucmd_buf;
20564 	uint32_t		block_count;
20565 	int			status;
20566 	int			cdbsize;
20567 	uchar_t			flag;
20568 
20569 	ASSERT(un != NULL);
20570 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20571 	ASSERT(bufaddr != NULL);
20572 	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
20573 
20574 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
20575 
20576 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
20577 		return (EINVAL);
20578 	}
20579 
20580 	mutex_enter(SD_MUTEX(un));
20581 	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
20582 	mutex_exit(SD_MUTEX(un));
20583 
20584 	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
20585 
20586 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
20587 	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
20588 	    bufaddr, buflen, start_block, block_count);
20589 
20590 	bzero(&cdb, sizeof (cdb));
20591 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20592 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20593 
20594 	/* Compute CDB size to use */
20595 	if (start_block > 0xffffffff)
20596 		cdbsize = CDB_GROUP4;
20597 	else if ((start_block & 0xFFE00000) ||
20598 	    (un->un_f_cfg_is_atapi == TRUE))
20599 		cdbsize = CDB_GROUP1;
20600 	else
20601 		cdbsize = CDB_GROUP0;
20602 
20603 	switch (cdbsize) {
20604 	case CDB_GROUP0:	/* 6-byte CDBs */
20605 		cdb.scc_cmd = cmd;
20606 		FORMG0ADDR(&cdb, start_block);
20607 		FORMG0COUNT(&cdb, block_count);
20608 		break;
20609 	case CDB_GROUP1:	/* 10-byte CDBs */
20610 		cdb.scc_cmd = cmd | SCMD_GROUP1;
20611 		FORMG1ADDR(&cdb, start_block);
20612 		FORMG1COUNT(&cdb, block_count);
20613 		break;
20614 	case CDB_GROUP4:	/* 16-byte CDBs */
20615 		cdb.scc_cmd = cmd | SCMD_GROUP4;
20616 		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
20617 		FORMG4COUNT(&cdb, block_count);
20618 		break;
20619 	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
20620 	default:
20621 		/* All others reserved */
20622 		return (EINVAL);
20623 	}
20624 
20625 	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
20626 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20627 
20628 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20629 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20630 	ucmd_buf.uscsi_bufaddr	= bufaddr;
20631 	ucmd_buf.uscsi_buflen	= buflen;
20632 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20633 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20634 	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
20635 	ucmd_buf.uscsi_timeout	= 60;
20636 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20637 				UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20638 	switch (status) {
20639 	case 0:
20640 		break;	/* Success! */
20641 	case EIO:
20642 		switch (ucmd_buf.uscsi_status) {
20643 		case STATUS_RESERVATION_CONFLICT:
20644 			status = EACCES;
20645 			break;
20646 		default:
20647 			break;
20648 		}
20649 		break;
20650 	default:
20651 		break;
20652 	}
20653 
20654 	if (status == 0) {
20655 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
20656 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20657 	}
20658 
20659 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
20660 
20661 	return (status);
20662 }
20663 
20664 
20665 /*
20666  *    Function: sd_send_scsi_LOG_SENSE
20667  *
20668  * Description: Issue a scsi LOG_SENSE command with the given parameters.
20669  *
20670  *   Arguments: un:      Pointer to the sd_lun struct for the target.
20671  *
20672  * Return Code: 0   - Success
20673  *		errno return code from sd_send_scsi_cmd()
20674  *
20675  *     Context: Can sleep. Does not return until command is completed.
20676  */
20677 
20678 static int
20679 sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr, uint16_t buflen,
20680 	uchar_t page_code, uchar_t page_control, uint16_t param_ptr,
20681 	int path_flag)
20682 
20683 {
20684 	struct	scsi_extended_sense	sense_buf;
20685 	union scsi_cdb		cdb;
20686 	struct uscsi_cmd	ucmd_buf;
20687 	int			status;
20688 
20689 	ASSERT(un != NULL);
20690 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20691 
20692 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
20693 
20694 	bzero(&cdb, sizeof (cdb));
20695 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20696 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20697 
20698 	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
20699 	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
20700 	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
20701 	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
20702 	FORMG1COUNT(&cdb, buflen);
20703 
20704 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20705 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20706 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20707 	ucmd_buf.uscsi_buflen	= buflen;
20708 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20709 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20710 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20711 	ucmd_buf.uscsi_timeout	= 60;
20712 
20713 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20714 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20715 
20716 	switch (status) {
20717 	case 0:
20718 		break;
20719 	case EIO:
20720 		switch (ucmd_buf.uscsi_status) {
20721 		case STATUS_RESERVATION_CONFLICT:
20722 			status = EACCES;
20723 			break;
20724 		case STATUS_CHECK:
20725 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20726 			    (sense_buf.es_key == KEY_ILLEGAL_REQUEST) &&
20727 			    (sense_buf.es_add_code == 0x24)) {
20728 				/*
20729 				 * ASC 0x24: INVALID FIELD IN CDB
20730 				 */
20731 				switch (page_code) {
20732 				case START_STOP_CYCLE_PAGE:
20733 					/*
20734 					 * The start stop cycle counter is
20735 					 * implemented as page 0x31 in earlier
20736 					 * generation disks. In new generation
20737 					 * disks the start stop cycle counter is
20738 					 * implemented as page 0xE. To properly
20739 					 * handle this case if an attempt for
20740 					 * log page 0xE is made and fails we
20741 					 * will try again using page 0x31.
20742 					 *
20743 					 * Network storage BU committed to
20744 					 * maintain the page 0x31 for this
20745 					 * purpose and will not have any other
20746 					 * page implemented with page code 0x31
20747 					 * until all disks transition to the
20748 					 * standard page.
20749 					 */
20750 					mutex_enter(SD_MUTEX(un));
20751 					un->un_start_stop_cycle_page =
20752 					    START_STOP_CYCLE_VU_PAGE;
20753 					cdb.cdb_opaque[2] =
20754 					    (char)(page_control << 6) |
20755 					    un->un_start_stop_cycle_page;
20756 					mutex_exit(SD_MUTEX(un));
20757 					status = sd_send_scsi_cmd(
20758 					    SD_GET_DEV(un), &ucmd_buf,
20759 					    UIO_SYSSPACE, UIO_SYSSPACE,
20760 					    UIO_SYSSPACE, path_flag);
20761 
20762 					break;
20763 				case TEMPERATURE_PAGE:
20764 					status = ENOTTY;
20765 					break;
20766 				default:
20767 					break;
20768 				}
20769 			}
20770 			break;
20771 		default:
20772 			break;
20773 		}
20774 		break;
20775 	default:
20776 		break;
20777 	}
20778 
20779 	if (status == 0) {
20780 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
20781 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20782 	}
20783 
20784 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
20785 
20786 	return (status);
20787 }
20788 
20789 
20790 /*
20791  *    Function: sdioctl
20792  *
20793  * Description: Driver's ioctl(9e) entry point function.
20794  *
20795  *   Arguments: dev     - device number
20796  *		cmd     - ioctl operation to be performed
20797  *		arg     - user argument, contains data to be set or reference
20798  *			  parameter for get
20799  *		flag    - bit flag, indicating open settings, 32/64 bit type
20800  *		cred_p  - user credential pointer
20801  *		rval_p  - calling process return value (OPT)
20802  *
20803  * Return Code: EINVAL
20804  *		ENOTTY
20805  *		ENXIO
20806  *		EIO
20807  *		EFAULT
20808  *		ENOTSUP
20809  *		EPERM
20810  *
20811  *     Context: Called from the device switch at normal priority.
20812  */
20813 
20814 static int
20815 sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
20816 {
20817 	struct sd_lun	*un = NULL;
20818 	int		geom_validated = FALSE;
20819 	int		err = 0;
20820 	int		i = 0;
20821 	cred_t		*cr;
20822 
20823 	/*
20824 	 * All device accesses go thru sdstrategy where we check on suspend
20825 	 * status
20826 	 */
20827 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20828 		return (ENXIO);
20829 	}
20830 
20831 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20832 
20833 	/*
20834 	 * Moved this wait from sd_uscsi_strategy to here for
20835 	 * reasons of deadlock prevention. Internal driver commands,
20836 	 * specifically those to change a devices power level, result
20837 	 * in a call to sd_uscsi_strategy.
20838 	 */
20839 	mutex_enter(SD_MUTEX(un));
20840 	while ((un->un_state == SD_STATE_SUSPENDED) ||
20841 	    (un->un_state == SD_STATE_PM_CHANGING)) {
20842 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
20843 	}
20844 	/*
20845 	 * Twiddling the counter here protects commands from now
20846 	 * through to the top of sd_uscsi_strategy. Without the
20847 	 * counter inc. a power down, for example, could get in
20848 	 * after the above check for state is made and before
20849 	 * execution gets to the top of sd_uscsi_strategy.
20850 	 * That would cause problems.
20851 	 */
20852 	un->un_ncmds_in_driver++;
20853 
20854 	if ((un->un_f_geometry_is_valid == FALSE) &&
20855 	    (flag & (FNDELAY | FNONBLOCK))) {
20856 		switch (cmd) {
20857 		case CDROMPAUSE:
20858 		case CDROMRESUME:
20859 		case CDROMPLAYMSF:
20860 		case CDROMPLAYTRKIND:
20861 		case CDROMREADTOCHDR:
20862 		case CDROMREADTOCENTRY:
20863 		case CDROMSTOP:
20864 		case CDROMSTART:
20865 		case CDROMVOLCTRL:
20866 		case CDROMSUBCHNL:
20867 		case CDROMREADMODE2:
20868 		case CDROMREADMODE1:
20869 		case CDROMREADOFFSET:
20870 		case CDROMSBLKMODE:
20871 		case CDROMGBLKMODE:
20872 		case CDROMGDRVSPEED:
20873 		case CDROMSDRVSPEED:
20874 		case CDROMCDDA:
20875 		case CDROMCDXA:
20876 		case CDROMSUBCODE:
20877 			if (!ISCD(un)) {
20878 				un->un_ncmds_in_driver--;
20879 				ASSERT(un->un_ncmds_in_driver >= 0);
20880 				mutex_exit(SD_MUTEX(un));
20881 				return (ENOTTY);
20882 			}
20883 			break;
20884 		case FDEJECT:
20885 		case DKIOCEJECT:
20886 		case CDROMEJECT:
20887 			if (!un->un_f_eject_media_supported) {
20888 				un->un_ncmds_in_driver--;
20889 				ASSERT(un->un_ncmds_in_driver >= 0);
20890 				mutex_exit(SD_MUTEX(un));
20891 				return (ENOTTY);
20892 			}
20893 			break;
20894 		case DKIOCSVTOC:
20895 		case DKIOCSETEFI:
20896 		case DKIOCSMBOOT:
20897 		case DKIOCFLUSHWRITECACHE:
20898 			mutex_exit(SD_MUTEX(un));
20899 			err = sd_send_scsi_TEST_UNIT_READY(un, 0);
20900 			if (err != 0) {
20901 				mutex_enter(SD_MUTEX(un));
20902 				un->un_ncmds_in_driver--;
20903 				ASSERT(un->un_ncmds_in_driver >= 0);
20904 				mutex_exit(SD_MUTEX(un));
20905 				return (EIO);
20906 			}
20907 			mutex_enter(SD_MUTEX(un));
20908 			/* FALLTHROUGH */
20909 		case DKIOCREMOVABLE:
20910 		case DKIOCHOTPLUGGABLE:
20911 		case DKIOCINFO:
20912 		case DKIOCGMEDIAINFO:
20913 		case MHIOCENFAILFAST:
20914 		case MHIOCSTATUS:
20915 		case MHIOCTKOWN:
20916 		case MHIOCRELEASE:
20917 		case MHIOCGRP_INKEYS:
20918 		case MHIOCGRP_INRESV:
20919 		case MHIOCGRP_REGISTER:
20920 		case MHIOCGRP_RESERVE:
20921 		case MHIOCGRP_PREEMPTANDABORT:
20922 		case MHIOCGRP_REGISTERANDIGNOREKEY:
20923 		case CDROMCLOSETRAY:
20924 		case USCSICMD:
20925 			goto skip_ready_valid;
20926 		default:
20927 			break;
20928 		}
20929 
20930 		mutex_exit(SD_MUTEX(un));
20931 		err = sd_ready_and_valid(un);
20932 		mutex_enter(SD_MUTEX(un));
20933 		if (err == SD_READY_NOT_VALID) {
20934 			switch (cmd) {
20935 			case DKIOCGAPART:
20936 			case DKIOCGGEOM:
20937 			case DKIOCSGEOM:
20938 			case DKIOCGVTOC:
20939 			case DKIOCSVTOC:
20940 			case DKIOCSAPART:
20941 			case DKIOCG_PHYGEOM:
20942 			case DKIOCG_VIRTGEOM:
20943 				err = ENOTSUP;
20944 				un->un_ncmds_in_driver--;
20945 				ASSERT(un->un_ncmds_in_driver >= 0);
20946 				mutex_exit(SD_MUTEX(un));
20947 				return (err);
20948 			}
20949 		}
20950 		if (err != SD_READY_VALID) {
20951 			switch (cmd) {
20952 			case DKIOCSTATE:
20953 			case CDROMGDRVSPEED:
20954 			case CDROMSDRVSPEED:
20955 			case FDEJECT:	/* for eject command */
20956 			case DKIOCEJECT:
20957 			case CDROMEJECT:
20958 			case DKIOCGETEFI:
20959 			case DKIOCSGEOM:
20960 			case DKIOCREMOVABLE:
20961 			case DKIOCHOTPLUGGABLE:
20962 			case DKIOCSAPART:
20963 			case DKIOCSETEFI:
20964 				break;
20965 			default:
20966 				if (un->un_f_has_removable_media) {
20967 					err = ENXIO;
20968 				} else {
20969 					/* Do not map EACCES to EIO */
20970 					if (err != EACCES)
20971 						err = EIO;
20972 				}
20973 				un->un_ncmds_in_driver--;
20974 				ASSERT(un->un_ncmds_in_driver >= 0);
20975 				mutex_exit(SD_MUTEX(un));
20976 				return (err);
20977 			}
20978 		}
20979 		geom_validated = TRUE;
20980 	}
20981 	if ((un->un_f_geometry_is_valid == TRUE) &&
20982 	    (un->un_solaris_size > 0)) {
20983 		/*
20984 		 * the "geometry_is_valid" flag could be true if we
20985 		 * have an fdisk table but no Solaris partition
20986 		 */
20987 		if (un->un_vtoc.v_sanity != VTOC_SANE) {
20988 			/* it is EFI, so return ENOTSUP for these */
20989 			switch (cmd) {
20990 			case DKIOCGAPART:
20991 			case DKIOCGGEOM:
20992 			case DKIOCGVTOC:
20993 			case DKIOCSVTOC:
20994 			case DKIOCSAPART:
20995 				err = ENOTSUP;
20996 				un->un_ncmds_in_driver--;
20997 				ASSERT(un->un_ncmds_in_driver >= 0);
20998 				mutex_exit(SD_MUTEX(un));
20999 				return (err);
21000 			}
21001 		}
21002 	}
21003 
21004 skip_ready_valid:
21005 	mutex_exit(SD_MUTEX(un));
21006 
21007 	switch (cmd) {
21008 	case DKIOCINFO:
21009 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
21010 		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
21011 		break;
21012 
21013 	case DKIOCGMEDIAINFO:
21014 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
21015 		err = sd_get_media_info(dev, (caddr_t)arg, flag);
21016 		break;
21017 
21018 	case DKIOCGGEOM:
21019 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGGEOM\n");
21020 		err = sd_dkio_get_geometry(dev, (caddr_t)arg, flag,
21021 		    geom_validated);
21022 		break;
21023 
21024 	case DKIOCSGEOM:
21025 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSGEOM\n");
21026 		err = sd_dkio_set_geometry(dev, (caddr_t)arg, flag);
21027 		break;
21028 
21029 	case DKIOCGAPART:
21030 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGAPART\n");
21031 		err = sd_dkio_get_partition(dev, (caddr_t)arg, flag,
21032 		    geom_validated);
21033 		break;
21034 
21035 	case DKIOCSAPART:
21036 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSAPART\n");
21037 		err = sd_dkio_set_partition(dev, (caddr_t)arg, flag);
21038 		break;
21039 
21040 	case DKIOCGVTOC:
21041 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGVTOC\n");
21042 		err = sd_dkio_get_vtoc(dev, (caddr_t)arg, flag,
21043 		    geom_validated);
21044 		break;
21045 
21046 	case DKIOCGETEFI:
21047 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGETEFI\n");
21048 		err = sd_dkio_get_efi(dev, (caddr_t)arg, flag);
21049 		break;
21050 
21051 	case DKIOCPARTITION:
21052 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTITION\n");
21053 		err = sd_dkio_partition(dev, (caddr_t)arg, flag);
21054 		break;
21055 
21056 	case DKIOCSVTOC:
21057 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSVTOC\n");
21058 		err = sd_dkio_set_vtoc(dev, (caddr_t)arg, flag);
21059 		break;
21060 
21061 	case DKIOCSETEFI:
21062 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSETEFI\n");
21063 		err = sd_dkio_set_efi(dev, (caddr_t)arg, flag);
21064 		break;
21065 
21066 	case DKIOCGMBOOT:
21067 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMBOOT\n");
21068 		err = sd_dkio_get_mboot(dev, (caddr_t)arg, flag);
21069 		break;
21070 
21071 	case DKIOCSMBOOT:
21072 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSMBOOT\n");
21073 		err = sd_dkio_set_mboot(dev, (caddr_t)arg, flag);
21074 		break;
21075 
21076 	case DKIOCLOCK:
21077 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
21078 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
21079 		    SD_PATH_STANDARD);
21080 		break;
21081 
21082 	case DKIOCUNLOCK:
21083 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
21084 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
21085 		    SD_PATH_STANDARD);
21086 		break;
21087 
21088 	case DKIOCSTATE: {
21089 		enum dkio_state		state;
21090 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
21091 
21092 		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
21093 			err = EFAULT;
21094 		} else {
21095 			err = sd_check_media(dev, state);
21096 			if (err == 0) {
21097 				if (ddi_copyout(&un->un_mediastate, (void *)arg,
21098 				    sizeof (int), flag) != 0)
21099 					err = EFAULT;
21100 			}
21101 		}
21102 		break;
21103 	}
21104 
21105 	case DKIOCREMOVABLE:
21106 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
21107 		/*
21108 		 * At present, vold only does automount for removable-media
21109 		 * devices, in order not to break current applications, we
21110 		 * still let hopluggable devices pretend to be removable media
21111 		 * devices for vold. In the near future, once vold is EOL'ed,
21112 		 * we should remove this workaround.
21113 		 */
21114 		if (un->un_f_has_removable_media || un->un_f_is_hotpluggable) {
21115 			i = 1;
21116 		} else {
21117 			i = 0;
21118 		}
21119 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21120 			err = EFAULT;
21121 		} else {
21122 			err = 0;
21123 		}
21124 		break;
21125 
21126 	case DKIOCHOTPLUGGABLE:
21127 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCHOTPLUGGABLE\n");
21128 		if (un->un_f_is_hotpluggable) {
21129 			i = 1;
21130 		} else {
21131 			i = 0;
21132 		}
21133 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21134 			err = EFAULT;
21135 		} else {
21136 			err = 0;
21137 		}
21138 		break;
21139 
21140 	case DKIOCGTEMPERATURE:
21141 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
21142 		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
21143 		break;
21144 
21145 	case MHIOCENFAILFAST:
21146 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
21147 		if ((err = drv_priv(cred_p)) == 0) {
21148 			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
21149 		}
21150 		break;
21151 
21152 	case MHIOCTKOWN:
21153 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
21154 		if ((err = drv_priv(cred_p)) == 0) {
21155 			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
21156 		}
21157 		break;
21158 
21159 	case MHIOCRELEASE:
21160 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
21161 		if ((err = drv_priv(cred_p)) == 0) {
21162 			err = sd_mhdioc_release(dev);
21163 		}
21164 		break;
21165 
21166 	case MHIOCSTATUS:
21167 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
21168 		if ((err = drv_priv(cred_p)) == 0) {
21169 			switch (sd_send_scsi_TEST_UNIT_READY(un, 0)) {
21170 			case 0:
21171 				err = 0;
21172 				break;
21173 			case EACCES:
21174 				*rval_p = 1;
21175 				err = 0;
21176 				break;
21177 			default:
21178 				err = EIO;
21179 				break;
21180 			}
21181 		}
21182 		break;
21183 
21184 	case MHIOCQRESERVE:
21185 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
21186 		if ((err = drv_priv(cred_p)) == 0) {
21187 			err = sd_reserve_release(dev, SD_RESERVE);
21188 		}
21189 		break;
21190 
21191 	case MHIOCREREGISTERDEVID:
21192 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
21193 		if (drv_priv(cred_p) == EPERM) {
21194 			err = EPERM;
21195 		} else if (!un->un_f_devid_supported) {
21196 			err = ENOTTY;
21197 		} else {
21198 			err = sd_mhdioc_register_devid(dev);
21199 		}
21200 		break;
21201 
21202 	case MHIOCGRP_INKEYS:
21203 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
21204 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21205 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21206 				err = ENOTSUP;
21207 			} else {
21208 				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
21209 				    flag);
21210 			}
21211 		}
21212 		break;
21213 
21214 	case MHIOCGRP_INRESV:
21215 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
21216 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21217 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21218 				err = ENOTSUP;
21219 			} else {
21220 				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
21221 			}
21222 		}
21223 		break;
21224 
21225 	case MHIOCGRP_REGISTER:
21226 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
21227 		if ((err = drv_priv(cred_p)) != EPERM) {
21228 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21229 				err = ENOTSUP;
21230 			} else if (arg != NULL) {
21231 				mhioc_register_t reg;
21232 				if (ddi_copyin((void *)arg, &reg,
21233 				    sizeof (mhioc_register_t), flag) != 0) {
21234 					err = EFAULT;
21235 				} else {
21236 					err =
21237 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21238 					    un, SD_SCSI3_REGISTER,
21239 					    (uchar_t *)&reg);
21240 				}
21241 			}
21242 		}
21243 		break;
21244 
21245 	case MHIOCGRP_RESERVE:
21246 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
21247 		if ((err = drv_priv(cred_p)) != EPERM) {
21248 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21249 				err = ENOTSUP;
21250 			} else if (arg != NULL) {
21251 				mhioc_resv_desc_t resv_desc;
21252 				if (ddi_copyin((void *)arg, &resv_desc,
21253 				    sizeof (mhioc_resv_desc_t), flag) != 0) {
21254 					err = EFAULT;
21255 				} else {
21256 					err =
21257 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21258 					    un, SD_SCSI3_RESERVE,
21259 					    (uchar_t *)&resv_desc);
21260 				}
21261 			}
21262 		}
21263 		break;
21264 
21265 	case MHIOCGRP_PREEMPTANDABORT:
21266 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
21267 		if ((err = drv_priv(cred_p)) != EPERM) {
21268 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21269 				err = ENOTSUP;
21270 			} else if (arg != NULL) {
21271 				mhioc_preemptandabort_t preempt_abort;
21272 				if (ddi_copyin((void *)arg, &preempt_abort,
21273 				    sizeof (mhioc_preemptandabort_t),
21274 				    flag) != 0) {
21275 					err = EFAULT;
21276 				} else {
21277 					err =
21278 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21279 					    un, SD_SCSI3_PREEMPTANDABORT,
21280 					    (uchar_t *)&preempt_abort);
21281 				}
21282 			}
21283 		}
21284 		break;
21285 
21286 	case MHIOCGRP_REGISTERANDIGNOREKEY:
21287 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
21288 		if ((err = drv_priv(cred_p)) != EPERM) {
21289 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21290 				err = ENOTSUP;
21291 			} else if (arg != NULL) {
21292 				mhioc_registerandignorekey_t r_and_i;
21293 				if (ddi_copyin((void *)arg, (void *)&r_and_i,
21294 				    sizeof (mhioc_registerandignorekey_t),
21295 				    flag) != 0) {
21296 					err = EFAULT;
21297 				} else {
21298 					err =
21299 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21300 					    un, SD_SCSI3_REGISTERANDIGNOREKEY,
21301 					    (uchar_t *)&r_and_i);
21302 				}
21303 			}
21304 		}
21305 		break;
21306 
21307 	case USCSICMD:
21308 		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
21309 		cr = ddi_get_cred();
21310 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
21311 			err = EPERM;
21312 		} else {
21313 			err = sd_uscsi_ioctl(dev, (caddr_t)arg, flag);
21314 		}
21315 		break;
21316 
21317 	case CDROMPAUSE:
21318 	case CDROMRESUME:
21319 		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
21320 		if (!ISCD(un)) {
21321 			err = ENOTTY;
21322 		} else {
21323 			err = sr_pause_resume(dev, cmd);
21324 		}
21325 		break;
21326 
21327 	case CDROMPLAYMSF:
21328 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
21329 		if (!ISCD(un)) {
21330 			err = ENOTTY;
21331 		} else {
21332 			err = sr_play_msf(dev, (caddr_t)arg, flag);
21333 		}
21334 		break;
21335 
21336 	case CDROMPLAYTRKIND:
21337 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
21338 #if defined(__i386) || defined(__amd64)
21339 		/*
21340 		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
21341 		 */
21342 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21343 #else
21344 		if (!ISCD(un)) {
21345 #endif
21346 			err = ENOTTY;
21347 		} else {
21348 			err = sr_play_trkind(dev, (caddr_t)arg, flag);
21349 		}
21350 		break;
21351 
21352 	case CDROMREADTOCHDR:
21353 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
21354 		if (!ISCD(un)) {
21355 			err = ENOTTY;
21356 		} else {
21357 			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
21358 		}
21359 		break;
21360 
21361 	case CDROMREADTOCENTRY:
21362 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
21363 		if (!ISCD(un)) {
21364 			err = ENOTTY;
21365 		} else {
21366 			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
21367 		}
21368 		break;
21369 
21370 	case CDROMSTOP:
21371 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
21372 		if (!ISCD(un)) {
21373 			err = ENOTTY;
21374 		} else {
21375 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_STOP,
21376 			    SD_PATH_STANDARD);
21377 		}
21378 		break;
21379 
21380 	case CDROMSTART:
21381 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
21382 		if (!ISCD(un)) {
21383 			err = ENOTTY;
21384 		} else {
21385 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
21386 			    SD_PATH_STANDARD);
21387 		}
21388 		break;
21389 
21390 	case CDROMCLOSETRAY:
21391 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
21392 		if (!ISCD(un)) {
21393 			err = ENOTTY;
21394 		} else {
21395 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_CLOSE,
21396 			    SD_PATH_STANDARD);
21397 		}
21398 		break;
21399 
21400 	case FDEJECT:	/* for eject command */
21401 	case DKIOCEJECT:
21402 	case CDROMEJECT:
21403 		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
21404 		if (!un->un_f_eject_media_supported) {
21405 			err = ENOTTY;
21406 		} else {
21407 			err = sr_eject(dev);
21408 		}
21409 		break;
21410 
21411 	case CDROMVOLCTRL:
21412 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
21413 		if (!ISCD(un)) {
21414 			err = ENOTTY;
21415 		} else {
21416 			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
21417 		}
21418 		break;
21419 
21420 	case CDROMSUBCHNL:
21421 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
21422 		if (!ISCD(un)) {
21423 			err = ENOTTY;
21424 		} else {
21425 			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
21426 		}
21427 		break;
21428 
21429 	case CDROMREADMODE2:
21430 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
21431 		if (!ISCD(un)) {
21432 			err = ENOTTY;
21433 		} else if (un->un_f_cfg_is_atapi == TRUE) {
21434 			/*
21435 			 * If the drive supports READ CD, use that instead of
21436 			 * switching the LBA size via a MODE SELECT
21437 			 * Block Descriptor
21438 			 */
21439 			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
21440 		} else {
21441 			err = sr_read_mode2(dev, (caddr_t)arg, flag);
21442 		}
21443 		break;
21444 
21445 	case CDROMREADMODE1:
21446 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
21447 		if (!ISCD(un)) {
21448 			err = ENOTTY;
21449 		} else {
21450 			err = sr_read_mode1(dev, (caddr_t)arg, flag);
21451 		}
21452 		break;
21453 
21454 	case CDROMREADOFFSET:
21455 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
21456 		if (!ISCD(un)) {
21457 			err = ENOTTY;
21458 		} else {
21459 			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
21460 			    flag);
21461 		}
21462 		break;
21463 
21464 	case CDROMSBLKMODE:
21465 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
21466 		/*
21467 		 * There is no means of changing block size in case of atapi
21468 		 * drives, thus return ENOTTY if drive type is atapi
21469 		 */
21470 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21471 			err = ENOTTY;
21472 		} else if (un->un_f_mmc_cap == TRUE) {
21473 
21474 			/*
21475 			 * MMC Devices do not support changing the
21476 			 * logical block size
21477 			 *
21478 			 * Note: EINVAL is being returned instead of ENOTTY to
21479 			 * maintain consistancy with the original mmc
21480 			 * driver update.
21481 			 */
21482 			err = EINVAL;
21483 		} else {
21484 			mutex_enter(SD_MUTEX(un));
21485 			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
21486 			    (un->un_ncmds_in_transport > 0)) {
21487 				mutex_exit(SD_MUTEX(un));
21488 				err = EINVAL;
21489 			} else {
21490 				mutex_exit(SD_MUTEX(un));
21491 				err = sr_change_blkmode(dev, cmd, arg, flag);
21492 			}
21493 		}
21494 		break;
21495 
21496 	case CDROMGBLKMODE:
21497 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
21498 		if (!ISCD(un)) {
21499 			err = ENOTTY;
21500 		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
21501 		    (un->un_f_blockcount_is_valid != FALSE)) {
21502 			/*
21503 			 * Drive is an ATAPI drive so return target block
21504 			 * size for ATAPI drives since we cannot change the
21505 			 * blocksize on ATAPI drives. Used primarily to detect
21506 			 * if an ATAPI cdrom is present.
21507 			 */
21508 			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
21509 			    sizeof (int), flag) != 0) {
21510 				err = EFAULT;
21511 			} else {
21512 				err = 0;
21513 			}
21514 
21515 		} else {
21516 			/*
21517 			 * Drive supports changing block sizes via a Mode
21518 			 * Select.
21519 			 */
21520 			err = sr_change_blkmode(dev, cmd, arg, flag);
21521 		}
21522 		break;
21523 
21524 	case CDROMGDRVSPEED:
21525 	case CDROMSDRVSPEED:
21526 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
21527 		if (!ISCD(un)) {
21528 			err = ENOTTY;
21529 		} else if (un->un_f_mmc_cap == TRUE) {
21530 			/*
21531 			 * Note: In the future the driver implementation
21532 			 * for getting and
21533 			 * setting cd speed should entail:
21534 			 * 1) If non-mmc try the Toshiba mode page
21535 			 *    (sr_change_speed)
21536 			 * 2) If mmc but no support for Real Time Streaming try
21537 			 *    the SET CD SPEED (0xBB) command
21538 			 *   (sr_atapi_change_speed)
21539 			 * 3) If mmc and support for Real Time Streaming
21540 			 *    try the GET PERFORMANCE and SET STREAMING
21541 			 *    commands (not yet implemented, 4380808)
21542 			 */
21543 			/*
21544 			 * As per recent MMC spec, CD-ROM speed is variable
21545 			 * and changes with LBA. Since there is no such
21546 			 * things as drive speed now, fail this ioctl.
21547 			 *
21548 			 * Note: EINVAL is returned for consistancy of original
21549 			 * implementation which included support for getting
21550 			 * the drive speed of mmc devices but not setting
21551 			 * the drive speed. Thus EINVAL would be returned
21552 			 * if a set request was made for an mmc device.
21553 			 * We no longer support get or set speed for
21554 			 * mmc but need to remain consistant with regard
21555 			 * to the error code returned.
21556 			 */
21557 			err = EINVAL;
21558 		} else if (un->un_f_cfg_is_atapi == TRUE) {
21559 			err = sr_atapi_change_speed(dev, cmd, arg, flag);
21560 		} else {
21561 			err = sr_change_speed(dev, cmd, arg, flag);
21562 		}
21563 		break;
21564 
21565 	case CDROMCDDA:
21566 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
21567 		if (!ISCD(un)) {
21568 			err = ENOTTY;
21569 		} else {
21570 			err = sr_read_cdda(dev, (void *)arg, flag);
21571 		}
21572 		break;
21573 
21574 	case CDROMCDXA:
21575 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
21576 		if (!ISCD(un)) {
21577 			err = ENOTTY;
21578 		} else {
21579 			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
21580 		}
21581 		break;
21582 
21583 	case CDROMSUBCODE:
21584 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
21585 		if (!ISCD(un)) {
21586 			err = ENOTTY;
21587 		} else {
21588 			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
21589 		}
21590 		break;
21591 
21592 	case DKIOCPARTINFO: {
21593 		/*
21594 		 * Return parameters describing the selected disk slice.
21595 		 * Note: this ioctl is for the intel platform only
21596 		 */
21597 #if defined(__i386) || defined(__amd64)
21598 		int part;
21599 
21600 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTINFO\n");
21601 		part = SDPART(dev);
21602 
21603 		/* don't check un_solaris_size for pN */
21604 		if (part < P0_RAW_DISK && un->un_solaris_size == 0) {
21605 			err = EIO;
21606 		} else {
21607 			struct part_info p;
21608 
21609 			p.p_start = (daddr_t)un->un_offset[part];
21610 			p.p_length = (int)un->un_map[part].dkl_nblk;
21611 #ifdef _MULTI_DATAMODEL
21612 			switch (ddi_model_convert_from(flag & FMODELS)) {
21613 			case DDI_MODEL_ILP32:
21614 			{
21615 				struct part_info32 p32;
21616 
21617 				p32.p_start = (daddr32_t)p.p_start;
21618 				p32.p_length = p.p_length;
21619 				if (ddi_copyout(&p32, (void *)arg,
21620 				    sizeof (p32), flag))
21621 					err = EFAULT;
21622 				break;
21623 			}
21624 
21625 			case DDI_MODEL_NONE:
21626 			{
21627 				if (ddi_copyout(&p, (void *)arg, sizeof (p),
21628 				    flag))
21629 					err = EFAULT;
21630 				break;
21631 			}
21632 			}
21633 #else /* ! _MULTI_DATAMODEL */
21634 			if (ddi_copyout(&p, (void *)arg, sizeof (p), flag))
21635 				err = EFAULT;
21636 #endif /* _MULTI_DATAMODEL */
21637 		}
21638 #else
21639 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTINFO\n");
21640 		err = ENOTTY;
21641 #endif
21642 		break;
21643 	}
21644 
21645 	case DKIOCG_PHYGEOM: {
21646 		/* Return the driver's notion of the media physical geometry */
21647 #if defined(__i386) || defined(__amd64)
21648 		struct dk_geom	disk_geom;
21649 		struct dk_geom	*dkgp = &disk_geom;
21650 
21651 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_PHYGEOM\n");
21652 		mutex_enter(SD_MUTEX(un));
21653 
21654 		if (un->un_g.dkg_nhead != 0 &&
21655 		    un->un_g.dkg_nsect != 0) {
21656 			/*
21657 			 * We succeeded in getting a geometry, but
21658 			 * right now it is being reported as just the
21659 			 * Solaris fdisk partition, just like for
21660 			 * DKIOCGGEOM. We need to change that to be
21661 			 * correct for the entire disk now.
21662 			 */
21663 			bcopy(&un->un_g, dkgp, sizeof (*dkgp));
21664 			dkgp->dkg_acyl = 0;
21665 			dkgp->dkg_ncyl = un->un_blockcount /
21666 			    (dkgp->dkg_nhead * dkgp->dkg_nsect);
21667 		} else {
21668 			bzero(dkgp, sizeof (struct dk_geom));
21669 			/*
21670 			 * This disk does not have a Solaris VTOC
21671 			 * so we must present a physical geometry
21672 			 * that will remain consistent regardless
21673 			 * of how the disk is used. This will ensure
21674 			 * that the geometry does not change regardless
21675 			 * of the fdisk partition type (ie. EFI, FAT32,
21676 			 * Solaris, etc).
21677 			 */
21678 			if (ISCD(un)) {
21679 				dkgp->dkg_nhead = un->un_pgeom.g_nhead;
21680 				dkgp->dkg_nsect = un->un_pgeom.g_nsect;
21681 				dkgp->dkg_ncyl = un->un_pgeom.g_ncyl;
21682 				dkgp->dkg_acyl = un->un_pgeom.g_acyl;
21683 			} else {
21684 				/*
21685 				 * Invalid un_blockcount can generate invalid
21686 				 * dk_geom and may result in division by zero
21687 				 * system failure. Should make sure blockcount
21688 				 * is valid before using it here.
21689 				 */
21690 				if (un->un_f_blockcount_is_valid == FALSE) {
21691 					mutex_exit(SD_MUTEX(un));
21692 					err = EIO;
21693 
21694 					break;
21695 				}
21696 				sd_convert_geometry(un->un_blockcount, dkgp);
21697 				dkgp->dkg_acyl = 0;
21698 				dkgp->dkg_ncyl = un->un_blockcount /
21699 				    (dkgp->dkg_nhead * dkgp->dkg_nsect);
21700 			}
21701 		}
21702 		dkgp->dkg_pcyl = dkgp->dkg_ncyl + dkgp->dkg_acyl;
21703 
21704 		if (ddi_copyout(dkgp, (void *)arg,
21705 		    sizeof (struct dk_geom), flag)) {
21706 			mutex_exit(SD_MUTEX(un));
21707 			err = EFAULT;
21708 		} else {
21709 			mutex_exit(SD_MUTEX(un));
21710 			err = 0;
21711 		}
21712 #else
21713 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_PHYGEOM\n");
21714 		err = ENOTTY;
21715 #endif
21716 		break;
21717 	}
21718 
21719 	case DKIOCG_VIRTGEOM: {
21720 		/* Return the driver's notion of the media's logical geometry */
21721 #if defined(__i386) || defined(__amd64)
21722 		struct dk_geom	disk_geom;
21723 		struct dk_geom	*dkgp = &disk_geom;
21724 
21725 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_VIRTGEOM\n");
21726 		mutex_enter(SD_MUTEX(un));
21727 		/*
21728 		 * If there is no HBA geometry available, or
21729 		 * if the HBA returned us something that doesn't
21730 		 * really fit into an Int 13/function 8 geometry
21731 		 * result, just fail the ioctl.  See PSARC 1998/313.
21732 		 */
21733 		if (un->un_lgeom.g_nhead == 0 ||
21734 		    un->un_lgeom.g_nsect == 0 ||
21735 		    un->un_lgeom.g_ncyl > 1024) {
21736 			mutex_exit(SD_MUTEX(un));
21737 			err = EINVAL;
21738 		} else {
21739 			dkgp->dkg_ncyl	= un->un_lgeom.g_ncyl;
21740 			dkgp->dkg_acyl	= un->un_lgeom.g_acyl;
21741 			dkgp->dkg_pcyl	= dkgp->dkg_ncyl + dkgp->dkg_acyl;
21742 			dkgp->dkg_nhead	= un->un_lgeom.g_nhead;
21743 			dkgp->dkg_nsect	= un->un_lgeom.g_nsect;
21744 
21745 			if (ddi_copyout(dkgp, (void *)arg,
21746 			    sizeof (struct dk_geom), flag)) {
21747 				mutex_exit(SD_MUTEX(un));
21748 				err = EFAULT;
21749 			} else {
21750 				mutex_exit(SD_MUTEX(un));
21751 				err = 0;
21752 			}
21753 		}
21754 #else
21755 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_VIRTGEOM\n");
21756 		err = ENOTTY;
21757 #endif
21758 		break;
21759 	}
21760 #ifdef SDDEBUG
21761 /* RESET/ABORTS testing ioctls */
21762 	case DKIOCRESET: {
21763 		int	reset_level;
21764 
21765 		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
21766 			err = EFAULT;
21767 		} else {
21768 			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
21769 			    "reset_level = 0x%lx\n", reset_level);
21770 			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
21771 				err = 0;
21772 			} else {
21773 				err = EIO;
21774 			}
21775 		}
21776 		break;
21777 	}
21778 
21779 	case DKIOCABORT:
21780 		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
21781 		if (scsi_abort(SD_ADDRESS(un), NULL)) {
21782 			err = 0;
21783 		} else {
21784 			err = EIO;
21785 		}
21786 		break;
21787 #endif
21788 
21789 #ifdef SD_FAULT_INJECTION
21790 /* SDIOC FaultInjection testing ioctls */
21791 	case SDIOCSTART:
21792 	case SDIOCSTOP:
21793 	case SDIOCINSERTPKT:
21794 	case SDIOCINSERTXB:
21795 	case SDIOCINSERTUN:
21796 	case SDIOCINSERTARQ:
21797 	case SDIOCPUSH:
21798 	case SDIOCRETRIEVE:
21799 	case SDIOCRUN:
21800 		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
21801 		    "SDIOC detected cmd:0x%X:\n", cmd);
21802 		/* call error generator */
21803 		sd_faultinjection_ioctl(cmd, arg, un);
21804 		err = 0;
21805 		break;
21806 
21807 #endif /* SD_FAULT_INJECTION */
21808 
21809 	case DKIOCFLUSHWRITECACHE:
21810 		{
21811 			struct dk_callback *dkc = (struct dk_callback *)arg;
21812 
21813 			mutex_enter(SD_MUTEX(un));
21814 			if (!un->un_f_sync_cache_supported ||
21815 			    !un->un_f_write_cache_enabled) {
21816 				err = un->un_f_sync_cache_supported ?
21817 					0 : ENOTSUP;
21818 				mutex_exit(SD_MUTEX(un));
21819 				if ((flag & FKIOCTL) && dkc != NULL &&
21820 				    dkc->dkc_callback != NULL) {
21821 					(*dkc->dkc_callback)(dkc->dkc_cookie,
21822 					    err);
21823 					/*
21824 					 * Did callback and reported error.
21825 					 * Since we did a callback, ioctl
21826 					 * should return 0.
21827 					 */
21828 					err = 0;
21829 				}
21830 				break;
21831 			}
21832 			mutex_exit(SD_MUTEX(un));
21833 
21834 			if ((flag & FKIOCTL) && dkc != NULL &&
21835 			    dkc->dkc_callback != NULL) {
21836 				/* async SYNC CACHE request */
21837 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
21838 			} else {
21839 				/* synchronous SYNC CACHE request */
21840 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
21841 			}
21842 		}
21843 		break;
21844 
21845 	case DKIOCGETWCE: {
21846 
21847 		int wce;
21848 
21849 		if ((err = sd_get_write_cache_enabled(un, &wce)) != 0) {
21850 			break;
21851 		}
21852 
21853 		if (ddi_copyout(&wce, (void *)arg, sizeof (wce), flag)) {
21854 			err = EFAULT;
21855 		}
21856 		break;
21857 	}
21858 
21859 	case DKIOCSETWCE: {
21860 
21861 		int wce, sync_supported;
21862 
21863 		if (ddi_copyin((void *)arg, &wce, sizeof (wce), flag)) {
21864 			err = EFAULT;
21865 			break;
21866 		}
21867 
21868 		/*
21869 		 * Synchronize multiple threads trying to enable
21870 		 * or disable the cache via the un_f_wcc_cv
21871 		 * condition variable.
21872 		 */
21873 		mutex_enter(SD_MUTEX(un));
21874 
21875 		/*
21876 		 * Don't allow the cache to be enabled if the
21877 		 * config file has it disabled.
21878 		 */
21879 		if (un->un_f_opt_disable_cache && wce) {
21880 			mutex_exit(SD_MUTEX(un));
21881 			err = EINVAL;
21882 			break;
21883 		}
21884 
21885 		/*
21886 		 * Wait for write cache change in progress
21887 		 * bit to be clear before proceeding.
21888 		 */
21889 		while (un->un_f_wcc_inprog)
21890 			cv_wait(&un->un_wcc_cv, SD_MUTEX(un));
21891 
21892 		un->un_f_wcc_inprog = 1;
21893 
21894 		if (un->un_f_write_cache_enabled && wce == 0) {
21895 			/*
21896 			 * Disable the write cache.  Don't clear
21897 			 * un_f_write_cache_enabled until after
21898 			 * the mode select and flush are complete.
21899 			 */
21900 			sync_supported = un->un_f_sync_cache_supported;
21901 			mutex_exit(SD_MUTEX(un));
21902 			if ((err = sd_cache_control(un, SD_CACHE_NOCHANGE,
21903 			    SD_CACHE_DISABLE)) == 0 && sync_supported) {
21904 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
21905 			}
21906 
21907 			mutex_enter(SD_MUTEX(un));
21908 			if (err == 0) {
21909 				un->un_f_write_cache_enabled = 0;
21910 			}
21911 
21912 		} else if (!un->un_f_write_cache_enabled && wce != 0) {
21913 			/*
21914 			 * Set un_f_write_cache_enabled first, so there is
21915 			 * no window where the cache is enabled, but the
21916 			 * bit says it isn't.
21917 			 */
21918 			un->un_f_write_cache_enabled = 1;
21919 			mutex_exit(SD_MUTEX(un));
21920 
21921 			err = sd_cache_control(un, SD_CACHE_NOCHANGE,
21922 				SD_CACHE_ENABLE);
21923 
21924 			mutex_enter(SD_MUTEX(un));
21925 
21926 			if (err) {
21927 				un->un_f_write_cache_enabled = 0;
21928 			}
21929 		}
21930 
21931 		un->un_f_wcc_inprog = 0;
21932 		cv_broadcast(&un->un_wcc_cv);
21933 		mutex_exit(SD_MUTEX(un));
21934 		break;
21935 	}
21936 
21937 	default:
21938 		err = ENOTTY;
21939 		break;
21940 	}
21941 	mutex_enter(SD_MUTEX(un));
21942 	un->un_ncmds_in_driver--;
21943 	ASSERT(un->un_ncmds_in_driver >= 0);
21944 	mutex_exit(SD_MUTEX(un));
21945 
21946 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
21947 	return (err);
21948 }
21949 
21950 
21951 /*
21952  *    Function: sd_uscsi_ioctl
21953  *
21954  * Description: This routine is the driver entry point for handling USCSI ioctl
21955  *		requests (USCSICMD).
21956  *
21957  *   Arguments: dev	- the device number
21958  *		arg	- user provided scsi command
21959  *		flag	- this argument is a pass through to ddi_copyxxx()
21960  *			  directly from the mode argument of ioctl().
21961  *
21962  * Return Code: code returned by sd_send_scsi_cmd
21963  *		ENXIO
21964  *		EFAULT
21965  *		EAGAIN
21966  */
21967 
21968 static int
21969 sd_uscsi_ioctl(dev_t dev, caddr_t arg, int flag)
21970 {
21971 #ifdef _MULTI_DATAMODEL
21972 	/*
21973 	 * For use when a 32 bit app makes a call into a
21974 	 * 64 bit ioctl
21975 	 */
21976 	struct uscsi_cmd32	uscsi_cmd_32_for_64;
21977 	struct uscsi_cmd32	*ucmd32 = &uscsi_cmd_32_for_64;
21978 	model_t			model;
21979 #endif /* _MULTI_DATAMODEL */
21980 	struct uscsi_cmd	*scmd = NULL;
21981 	struct sd_lun		*un = NULL;
21982 	enum uio_seg		uioseg;
21983 	char			cdb[CDB_GROUP0];
21984 	int			rval = 0;
21985 
21986 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21987 		return (ENXIO);
21988 	}
21989 
21990 	SD_TRACE(SD_LOG_IOCTL, un, "sd_uscsi_ioctl: entry: un:0x%p\n", un);
21991 
21992 	scmd = (struct uscsi_cmd *)
21993 	    kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
21994 
21995 #ifdef _MULTI_DATAMODEL
21996 	switch (model = ddi_model_convert_from(flag & FMODELS)) {
21997 	case DDI_MODEL_ILP32:
21998 	{
21999 		if (ddi_copyin((void *)arg, ucmd32, sizeof (*ucmd32), flag)) {
22000 			rval = EFAULT;
22001 			goto done;
22002 		}
22003 		/*
22004 		 * Convert the ILP32 uscsi data from the
22005 		 * application to LP64 for internal use.
22006 		 */
22007 		uscsi_cmd32touscsi_cmd(ucmd32, scmd);
22008 		break;
22009 	}
22010 	case DDI_MODEL_NONE:
22011 		if (ddi_copyin((void *)arg, scmd, sizeof (*scmd), flag)) {
22012 			rval = EFAULT;
22013 			goto done;
22014 		}
22015 		break;
22016 	}
22017 #else /* ! _MULTI_DATAMODEL */
22018 	if (ddi_copyin((void *)arg, scmd, sizeof (*scmd), flag)) {
22019 		rval = EFAULT;
22020 		goto done;
22021 	}
22022 #endif /* _MULTI_DATAMODEL */
22023 
22024 	scmd->uscsi_flags &= ~USCSI_NOINTR;
22025 	uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE : UIO_USERSPACE;
22026 	if (un->un_f_format_in_progress == TRUE) {
22027 		rval = EAGAIN;
22028 		goto done;
22029 	}
22030 
22031 	/*
22032 	 * Gotta do the ddi_copyin() here on the uscsi_cdb so that
22033 	 * we will have a valid cdb[0] to test.
22034 	 */
22035 	if ((ddi_copyin(scmd->uscsi_cdb, cdb, CDB_GROUP0, flag) == 0) &&
22036 	    (cdb[0] == SCMD_FORMAT)) {
22037 		SD_TRACE(SD_LOG_IOCTL, un,
22038 		    "sd_uscsi_ioctl: scmd->uscsi_cdb 0x%x\n", cdb[0]);
22039 		mutex_enter(SD_MUTEX(un));
22040 		un->un_f_format_in_progress = TRUE;
22041 		mutex_exit(SD_MUTEX(un));
22042 		rval = sd_send_scsi_cmd(dev, scmd, uioseg, uioseg, uioseg,
22043 		    SD_PATH_STANDARD);
22044 		mutex_enter(SD_MUTEX(un));
22045 		un->un_f_format_in_progress = FALSE;
22046 		mutex_exit(SD_MUTEX(un));
22047 	} else {
22048 		SD_TRACE(SD_LOG_IOCTL, un,
22049 		    "sd_uscsi_ioctl: scmd->uscsi_cdb 0x%x\n", cdb[0]);
22050 		/*
22051 		 * It's OK to fall into here even if the ddi_copyin()
22052 		 * on the uscsi_cdb above fails, because sd_send_scsi_cmd()
22053 		 * does this same copyin and will return the EFAULT
22054 		 * if it fails.
22055 		 */
22056 		rval = sd_send_scsi_cmd(dev, scmd, uioseg, uioseg, uioseg,
22057 		    SD_PATH_STANDARD);
22058 	}
22059 #ifdef _MULTI_DATAMODEL
22060 	switch (model) {
22061 	case DDI_MODEL_ILP32:
22062 		/*
22063 		 * Convert back to ILP32 before copyout to the
22064 		 * application
22065 		 */
22066 		uscsi_cmdtouscsi_cmd32(scmd, ucmd32);
22067 		if (ddi_copyout(ucmd32, (void *)arg, sizeof (*ucmd32), flag)) {
22068 			if (rval != 0) {
22069 				rval = EFAULT;
22070 			}
22071 		}
22072 		break;
22073 	case DDI_MODEL_NONE:
22074 		if (ddi_copyout(scmd, (void *)arg, sizeof (*scmd), flag)) {
22075 			if (rval != 0) {
22076 				rval = EFAULT;
22077 			}
22078 		}
22079 		break;
22080 	}
22081 #else /* ! _MULTI_DATAMODE */
22082 	if (ddi_copyout(scmd, (void *)arg, sizeof (*scmd), flag)) {
22083 		if (rval != 0) {
22084 			rval = EFAULT;
22085 		}
22086 	}
22087 #endif /* _MULTI_DATAMODE */
22088 done:
22089 	kmem_free(scmd, sizeof (struct uscsi_cmd));
22090 
22091 	SD_TRACE(SD_LOG_IOCTL, un, "sd_uscsi_ioctl: exit: un:0x%p\n", un);
22092 
22093 	return (rval);
22094 }
22095 
22096 
22097 /*
22098  *    Function: sd_dkio_ctrl_info
22099  *
22100  * Description: This routine is the driver entry point for handling controller
22101  *		information ioctl requests (DKIOCINFO).
22102  *
22103  *   Arguments: dev  - the device number
22104  *		arg  - pointer to user provided dk_cinfo structure
22105  *		       specifying the controller type and attributes.
22106  *		flag - this argument is a pass through to ddi_copyxxx()
22107  *		       directly from the mode argument of ioctl().
22108  *
22109  * Return Code: 0
22110  *		EFAULT
22111  *		ENXIO
22112  */
22113 
22114 static int
22115 sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
22116 {
22117 	struct sd_lun	*un = NULL;
22118 	struct dk_cinfo	*info;
22119 	dev_info_t	*pdip;
22120 	int		lun, tgt;
22121 
22122 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22123 		return (ENXIO);
22124 	}
22125 
22126 	info = (struct dk_cinfo *)
22127 		kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
22128 
22129 	switch (un->un_ctype) {
22130 	case CTYPE_CDROM:
22131 		info->dki_ctype = DKC_CDROM;
22132 		break;
22133 	default:
22134 		info->dki_ctype = DKC_SCSI_CCS;
22135 		break;
22136 	}
22137 	pdip = ddi_get_parent(SD_DEVINFO(un));
22138 	info->dki_cnum = ddi_get_instance(pdip);
22139 	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
22140 		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
22141 	} else {
22142 		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
22143 		    DK_DEVLEN - 1);
22144 	}
22145 
22146 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22147 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
22148 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22149 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
22150 
22151 	/* Unit Information */
22152 	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
22153 	info->dki_slave = ((tgt << 3) | lun);
22154 	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
22155 	    DK_DEVLEN - 1);
22156 	info->dki_flags = DKI_FMTVOL;
22157 	info->dki_partition = SDPART(dev);
22158 
22159 	/* Max Transfer size of this device in blocks */
22160 	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
22161 	info->dki_addr = 0;
22162 	info->dki_space = 0;
22163 	info->dki_prio = 0;
22164 	info->dki_vec = 0;
22165 
22166 	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
22167 		kmem_free(info, sizeof (struct dk_cinfo));
22168 		return (EFAULT);
22169 	} else {
22170 		kmem_free(info, sizeof (struct dk_cinfo));
22171 		return (0);
22172 	}
22173 }
22174 
22175 
22176 /*
22177  *    Function: sd_get_media_info
22178  *
22179  * Description: This routine is the driver entry point for handling ioctl
22180  *		requests for the media type or command set profile used by the
22181  *		drive to operate on the media (DKIOCGMEDIAINFO).
22182  *
22183  *   Arguments: dev	- the device number
22184  *		arg	- pointer to user provided dk_minfo structure
22185  *			  specifying the media type, logical block size and
22186  *			  drive capacity.
22187  *		flag	- this argument is a pass through to ddi_copyxxx()
22188  *			  directly from the mode argument of ioctl().
22189  *
22190  * Return Code: 0
22191  *		EACCESS
22192  *		EFAULT
22193  *		ENXIO
22194  *		EIO
22195  */
22196 
22197 static int
22198 sd_get_media_info(dev_t dev, caddr_t arg, int flag)
22199 {
22200 	struct sd_lun		*un = NULL;
22201 	struct uscsi_cmd	com;
22202 	struct scsi_inquiry	*sinq;
22203 	struct dk_minfo		media_info;
22204 	u_longlong_t		media_capacity;
22205 	uint64_t		capacity;
22206 	uint_t			lbasize;
22207 	uchar_t			*out_data;
22208 	uchar_t			*rqbuf;
22209 	int			rval = 0;
22210 	int			rtn;
22211 
22212 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
22213 	    (un->un_state == SD_STATE_OFFLINE)) {
22214 		return (ENXIO);
22215 	}
22216 
22217 	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info: entry\n");
22218 
22219 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
22220 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
22221 
22222 	/* Issue a TUR to determine if the drive is ready with media present */
22223 	rval = sd_send_scsi_TEST_UNIT_READY(un, SD_CHECK_FOR_MEDIA);
22224 	if (rval == ENXIO) {
22225 		goto done;
22226 	}
22227 
22228 	/* Now get configuration data */
22229 	if (ISCD(un)) {
22230 		media_info.dki_media_type = DK_CDROM;
22231 
22232 		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
22233 		if (un->un_f_mmc_cap == TRUE) {
22234 			rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf,
22235 				SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN);
22236 
22237 			if (rtn) {
22238 				/*
22239 				 * Failed for other than an illegal request
22240 				 * or command not supported
22241 				 */
22242 				if ((com.uscsi_status == STATUS_CHECK) &&
22243 				    (com.uscsi_rqstatus == STATUS_GOOD)) {
22244 					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
22245 					    (rqbuf[12] != 0x20)) {
22246 						rval = EIO;
22247 						goto done;
22248 					}
22249 				}
22250 			} else {
22251 				/*
22252 				 * The GET CONFIGURATION command succeeded
22253 				 * so set the media type according to the
22254 				 * returned data
22255 				 */
22256 				media_info.dki_media_type = out_data[6];
22257 				media_info.dki_media_type <<= 8;
22258 				media_info.dki_media_type |= out_data[7];
22259 			}
22260 		}
22261 	} else {
22262 		/*
22263 		 * The profile list is not available, so we attempt to identify
22264 		 * the media type based on the inquiry data
22265 		 */
22266 		sinq = un->un_sd->sd_inq;
22267 		if (sinq->inq_qual == 0) {
22268 			/* This is a direct access device */
22269 			media_info.dki_media_type = DK_FIXED_DISK;
22270 
22271 			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
22272 			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
22273 				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
22274 					media_info.dki_media_type = DK_ZIP;
22275 				} else if (
22276 				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
22277 					media_info.dki_media_type = DK_JAZ;
22278 				}
22279 			}
22280 		} else {
22281 			/* Not a CD or direct access so return unknown media */
22282 			media_info.dki_media_type = DK_UNKNOWN;
22283 		}
22284 	}
22285 
22286 	/* Now read the capacity so we can provide the lbasize and capacity */
22287 	switch (sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
22288 	    SD_PATH_DIRECT)) {
22289 	case 0:
22290 		break;
22291 	case EACCES:
22292 		rval = EACCES;
22293 		goto done;
22294 	default:
22295 		rval = EIO;
22296 		goto done;
22297 	}
22298 
22299 	media_info.dki_lbsize = lbasize;
22300 	media_capacity = capacity;
22301 
22302 	/*
22303 	 * sd_send_scsi_READ_CAPACITY() reports capacity in
22304 	 * un->un_sys_blocksize chunks. So we need to convert it into
22305 	 * cap.lbasize chunks.
22306 	 */
22307 	media_capacity *= un->un_sys_blocksize;
22308 	media_capacity /= lbasize;
22309 	media_info.dki_capacity = media_capacity;
22310 
22311 	if (ddi_copyout(&media_info, arg, sizeof (struct dk_minfo), flag)) {
22312 		rval = EFAULT;
22313 		/* Put goto. Anybody might add some code below in future */
22314 		goto done;
22315 	}
22316 done:
22317 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
22318 	kmem_free(rqbuf, SENSE_LENGTH);
22319 	return (rval);
22320 }
22321 
22322 
22323 /*
22324  *    Function: sd_dkio_get_geometry
22325  *
22326  * Description: This routine is the driver entry point for handling user
22327  *		requests to get the device geometry (DKIOCGGEOM).
22328  *
22329  *   Arguments: dev  - the device number
22330  *		arg  - pointer to user provided dk_geom structure specifying
22331  *			the controller's notion of the current geometry.
22332  *		flag - this argument is a pass through to ddi_copyxxx()
22333  *		       directly from the mode argument of ioctl().
22334  *		geom_validated - flag indicating if the device geometry has been
22335  *				 previously validated in the sdioctl routine.
22336  *
22337  * Return Code: 0
22338  *		EFAULT
22339  *		ENXIO
22340  *		EIO
22341  */
22342 
22343 static int
22344 sd_dkio_get_geometry(dev_t dev, caddr_t arg, int flag, int geom_validated)
22345 {
22346 	struct sd_lun	*un = NULL;
22347 	struct dk_geom	*tmp_geom = NULL;
22348 	int		rval = 0;
22349 
22350 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22351 		return (ENXIO);
22352 	}
22353 
22354 	if (geom_validated == FALSE) {
22355 		/*
22356 		 * sd_validate_geometry does not spin a disk up
22357 		 * if it was spun down. We need to make sure it
22358 		 * is ready.
22359 		 */
22360 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22361 			return (rval);
22362 		}
22363 		mutex_enter(SD_MUTEX(un));
22364 		rval = sd_validate_geometry(un, SD_PATH_DIRECT);
22365 		mutex_exit(SD_MUTEX(un));
22366 	}
22367 	if (rval)
22368 		return (rval);
22369 
22370 	/*
22371 	 * It is possible that un_solaris_size is 0(uninitialized)
22372 	 * after sd_unit_attach. Reservation conflict may cause the
22373 	 * above situation. Thus, the zero check of un_solaris_size
22374 	 * should occur after the sd_validate_geometry() call.
22375 	 */
22376 #if defined(__i386) || defined(__amd64)
22377 	if (un->un_solaris_size == 0) {
22378 		return (EIO);
22379 	}
22380 #endif
22381 
22382 	/*
22383 	 * Make a local copy of the soft state geometry to avoid some potential
22384 	 * race conditions associated with holding the mutex and updating the
22385 	 * write_reinstruct value
22386 	 */
22387 	tmp_geom = kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP);
22388 	mutex_enter(SD_MUTEX(un));
22389 	bcopy(&un->un_g, tmp_geom, sizeof (struct dk_geom));
22390 	mutex_exit(SD_MUTEX(un));
22391 
22392 	if (tmp_geom->dkg_write_reinstruct == 0) {
22393 		tmp_geom->dkg_write_reinstruct =
22394 		    (int)((int)(tmp_geom->dkg_nsect * tmp_geom->dkg_rpm *
22395 		    sd_rot_delay) / (int)60000);
22396 	}
22397 
22398 	rval = ddi_copyout(tmp_geom, (void *)arg, sizeof (struct dk_geom),
22399 	    flag);
22400 	if (rval != 0) {
22401 		rval = EFAULT;
22402 	}
22403 
22404 	kmem_free(tmp_geom, sizeof (struct dk_geom));
22405 	return (rval);
22406 
22407 }
22408 
22409 
22410 /*
22411  *    Function: sd_dkio_set_geometry
22412  *
22413  * Description: This routine is the driver entry point for handling user
22414  *		requests to set the device geometry (DKIOCSGEOM). The actual
22415  *		device geometry is not updated, just the driver "notion" of it.
22416  *
22417  *   Arguments: dev  - the device number
22418  *		arg  - pointer to user provided dk_geom structure used to set
22419  *			the controller's notion of the current geometry.
22420  *		flag - this argument is a pass through to ddi_copyxxx()
22421  *		       directly from the mode argument of ioctl().
22422  *
22423  * Return Code: 0
22424  *		EFAULT
22425  *		ENXIO
22426  *		EIO
22427  */
22428 
22429 static int
22430 sd_dkio_set_geometry(dev_t dev, caddr_t arg, int flag)
22431 {
22432 	struct sd_lun	*un = NULL;
22433 	struct dk_geom	*tmp_geom;
22434 	struct dk_map	*lp;
22435 	int		rval = 0;
22436 	int		i;
22437 
22438 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22439 		return (ENXIO);
22440 	}
22441 
22442 	/*
22443 	 * Make sure the geometry is valid before setting the geometry.
22444 	 */
22445 	if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22446 		return (rval);
22447 	}
22448 	mutex_enter(SD_MUTEX(un));
22449 
22450 	if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
22451 		mutex_exit(SD_MUTEX(un));
22452 		return (rval);
22453 	}
22454 	mutex_exit(SD_MUTEX(un));
22455 
22456 #if defined(__i386) || defined(__amd64)
22457 	if (un->un_solaris_size == 0) {
22458 		return (EIO);
22459 	}
22460 #endif
22461 
22462 	/*
22463 	 * We need to copy the user specified geometry into local
22464 	 * storage and then update the softstate. We don't want to hold
22465 	 * the mutex and copyin directly from the user to the soft state
22466 	 */
22467 	tmp_geom = (struct dk_geom *)
22468 	    kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP);
22469 	rval = ddi_copyin(arg, tmp_geom, sizeof (struct dk_geom), flag);
22470 	if (rval != 0) {
22471 		kmem_free(tmp_geom, sizeof (struct dk_geom));
22472 		return (EFAULT);
22473 	}
22474 
22475 	mutex_enter(SD_MUTEX(un));
22476 	bcopy(tmp_geom, &un->un_g, sizeof (struct dk_geom));
22477 	for (i = 0; i < NDKMAP; i++) {
22478 		lp  = &un->un_map[i];
22479 		un->un_offset[i] =
22480 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
22481 #if defined(__i386) || defined(__amd64)
22482 		un->un_offset[i] += un->un_solaris_offset;
22483 #endif
22484 	}
22485 	un->un_f_geometry_is_valid = FALSE;
22486 	mutex_exit(SD_MUTEX(un));
22487 	kmem_free(tmp_geom, sizeof (struct dk_geom));
22488 
22489 	return (rval);
22490 }
22491 
22492 
22493 /*
22494  *    Function: sd_dkio_get_partition
22495  *
22496  * Description: This routine is the driver entry point for handling user
22497  *		requests to get the partition table (DKIOCGAPART).
22498  *
22499  *   Arguments: dev  - the device number
22500  *		arg  - pointer to user provided dk_allmap structure specifying
22501  *			the controller's notion of the current partition table.
22502  *		flag - this argument is a pass through to ddi_copyxxx()
22503  *		       directly from the mode argument of ioctl().
22504  *		geom_validated - flag indicating if the device geometry has been
22505  *				 previously validated in the sdioctl routine.
22506  *
22507  * Return Code: 0
22508  *		EFAULT
22509  *		ENXIO
22510  *		EIO
22511  */
22512 
22513 static int
22514 sd_dkio_get_partition(dev_t dev, caddr_t arg, int flag, int geom_validated)
22515 {
22516 	struct sd_lun	*un = NULL;
22517 	int		rval = 0;
22518 	int		size;
22519 
22520 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22521 		return (ENXIO);
22522 	}
22523 
22524 	/*
22525 	 * Make sure the geometry is valid before getting the partition
22526 	 * information.
22527 	 */
22528 	mutex_enter(SD_MUTEX(un));
22529 	if (geom_validated == FALSE) {
22530 		/*
22531 		 * sd_validate_geometry does not spin a disk up
22532 		 * if it was spun down. We need to make sure it
22533 		 * is ready before validating the geometry.
22534 		 */
22535 		mutex_exit(SD_MUTEX(un));
22536 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22537 			return (rval);
22538 		}
22539 		mutex_enter(SD_MUTEX(un));
22540 
22541 		if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
22542 			mutex_exit(SD_MUTEX(un));
22543 			return (rval);
22544 		}
22545 	}
22546 	mutex_exit(SD_MUTEX(un));
22547 
22548 	/*
22549 	 * It is possible that un_solaris_size is 0(uninitialized)
22550 	 * after sd_unit_attach. Reservation conflict may cause the
22551 	 * above situation. Thus, the zero check of un_solaris_size
22552 	 * should occur after the sd_validate_geometry() call.
22553 	 */
22554 #if defined(__i386) || defined(__amd64)
22555 	if (un->un_solaris_size == 0) {
22556 		return (EIO);
22557 	}
22558 #endif
22559 
22560 #ifdef _MULTI_DATAMODEL
22561 	switch (ddi_model_convert_from(flag & FMODELS)) {
22562 	case DDI_MODEL_ILP32: {
22563 		struct dk_map32 dk_map32[NDKMAP];
22564 		int		i;
22565 
22566 		for (i = 0; i < NDKMAP; i++) {
22567 			dk_map32[i].dkl_cylno = un->un_map[i].dkl_cylno;
22568 			dk_map32[i].dkl_nblk  = un->un_map[i].dkl_nblk;
22569 		}
22570 		size = NDKMAP * sizeof (struct dk_map32);
22571 		rval = ddi_copyout(dk_map32, (void *)arg, size, flag);
22572 		if (rval != 0) {
22573 			rval = EFAULT;
22574 		}
22575 		break;
22576 	}
22577 	case DDI_MODEL_NONE:
22578 		size = NDKMAP * sizeof (struct dk_map);
22579 		rval = ddi_copyout(un->un_map, (void *)arg, size, flag);
22580 		if (rval != 0) {
22581 			rval = EFAULT;
22582 		}
22583 		break;
22584 	}
22585 #else /* ! _MULTI_DATAMODEL */
22586 	size = NDKMAP * sizeof (struct dk_map);
22587 	rval = ddi_copyout(un->un_map, (void *)arg, size, flag);
22588 	if (rval != 0) {
22589 		rval = EFAULT;
22590 	}
22591 #endif /* _MULTI_DATAMODEL */
22592 	return (rval);
22593 }
22594 
22595 
22596 /*
22597  *    Function: sd_dkio_set_partition
22598  *
22599  * Description: This routine is the driver entry point for handling user
22600  *		requests to set the partition table (DKIOCSAPART). The actual
22601  *		device partition is not updated.
22602  *
22603  *   Arguments: dev  - the device number
22604  *		arg  - pointer to user provided dk_allmap structure used to set
22605  *			the controller's notion of the partition table.
22606  *		flag - this argument is a pass through to ddi_copyxxx()
22607  *		       directly from the mode argument of ioctl().
22608  *
22609  * Return Code: 0
22610  *		EINVAL
22611  *		EFAULT
22612  *		ENXIO
22613  *		EIO
22614  */
22615 
22616 static int
22617 sd_dkio_set_partition(dev_t dev, caddr_t arg, int flag)
22618 {
22619 	struct sd_lun	*un = NULL;
22620 	struct dk_map	dk_map[NDKMAP];
22621 	struct dk_map	*lp;
22622 	int		rval = 0;
22623 	int		size;
22624 	int		i;
22625 #if defined(_SUNOS_VTOC_16)
22626 	struct dkl_partition	*vp;
22627 #endif
22628 
22629 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22630 		return (ENXIO);
22631 	}
22632 
22633 	/*
22634 	 * Set the map for all logical partitions.  We lock
22635 	 * the priority just to make sure an interrupt doesn't
22636 	 * come in while the map is half updated.
22637 	 */
22638 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_solaris_size))
22639 	mutex_enter(SD_MUTEX(un));
22640 	if (un->un_blockcount > DK_MAX_BLOCKS) {
22641 		mutex_exit(SD_MUTEX(un));
22642 		return (ENOTSUP);
22643 	}
22644 	mutex_exit(SD_MUTEX(un));
22645 
22646 	/*
22647 	 * Make sure the geometry is valid before setting the partitions.
22648 	 */
22649 	if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22650 		return (rval);
22651 	}
22652 	mutex_enter(SD_MUTEX(un));
22653 
22654 	if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
22655 		mutex_exit(SD_MUTEX(un));
22656 		return (rval);
22657 	}
22658 	mutex_exit(SD_MUTEX(un));
22659 
22660 #if defined(__i386) || defined(__amd64)
22661 	if (un->un_solaris_size == 0) {
22662 		return (EIO);
22663 	}
22664 #endif
22665 
22666 #ifdef _MULTI_DATAMODEL
22667 	switch (ddi_model_convert_from(flag & FMODELS)) {
22668 	case DDI_MODEL_ILP32: {
22669 		struct dk_map32 dk_map32[NDKMAP];
22670 
22671 		size = NDKMAP * sizeof (struct dk_map32);
22672 		rval = ddi_copyin((void *)arg, dk_map32, size, flag);
22673 		if (rval != 0) {
22674 			return (EFAULT);
22675 		}
22676 		for (i = 0; i < NDKMAP; i++) {
22677 			dk_map[i].dkl_cylno = dk_map32[i].dkl_cylno;
22678 			dk_map[i].dkl_nblk  = dk_map32[i].dkl_nblk;
22679 		}
22680 		break;
22681 	}
22682 	case DDI_MODEL_NONE:
22683 		size = NDKMAP * sizeof (struct dk_map);
22684 		rval = ddi_copyin((void *)arg, dk_map, size, flag);
22685 		if (rval != 0) {
22686 			return (EFAULT);
22687 		}
22688 		break;
22689 	}
22690 #else /* ! _MULTI_DATAMODEL */
22691 	size = NDKMAP * sizeof (struct dk_map);
22692 	rval = ddi_copyin((void *)arg, dk_map, size, flag);
22693 	if (rval != 0) {
22694 		return (EFAULT);
22695 	}
22696 #endif /* _MULTI_DATAMODEL */
22697 
22698 	mutex_enter(SD_MUTEX(un));
22699 	/* Note: The size used in this bcopy is set based upon the data model */
22700 	bcopy(dk_map, un->un_map, size);
22701 #if defined(_SUNOS_VTOC_16)
22702 	vp = (struct dkl_partition *)&(un->un_vtoc);
22703 #endif	/* defined(_SUNOS_VTOC_16) */
22704 	for (i = 0; i < NDKMAP; i++) {
22705 		lp  = &un->un_map[i];
22706 		un->un_offset[i] =
22707 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
22708 #if defined(_SUNOS_VTOC_16)
22709 		vp->p_start = un->un_offset[i];
22710 		vp->p_size = lp->dkl_nblk;
22711 		vp++;
22712 #endif	/* defined(_SUNOS_VTOC_16) */
22713 #if defined(__i386) || defined(__amd64)
22714 		un->un_offset[i] += un->un_solaris_offset;
22715 #endif
22716 	}
22717 	mutex_exit(SD_MUTEX(un));
22718 	return (rval);
22719 }
22720 
22721 
22722 /*
22723  *    Function: sd_dkio_get_vtoc
22724  *
22725  * Description: This routine is the driver entry point for handling user
22726  *		requests to get the current volume table of contents
22727  *		(DKIOCGVTOC).
22728  *
22729  *   Arguments: dev  - the device number
22730  *		arg  - pointer to user provided vtoc structure specifying
22731  *			the current vtoc.
22732  *		flag - this argument is a pass through to ddi_copyxxx()
22733  *		       directly from the mode argument of ioctl().
22734  *		geom_validated - flag indicating if the device geometry has been
22735  *				 previously validated in the sdioctl routine.
22736  *
22737  * Return Code: 0
22738  *		EFAULT
22739  *		ENXIO
22740  *		EIO
22741  */
22742 
22743 static int
22744 sd_dkio_get_vtoc(dev_t dev, caddr_t arg, int flag, int geom_validated)
22745 {
22746 	struct sd_lun	*un = NULL;
22747 #if defined(_SUNOS_VTOC_8)
22748 	struct vtoc	user_vtoc;
22749 #endif	/* defined(_SUNOS_VTOC_8) */
22750 	int		rval = 0;
22751 
22752 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22753 		return (ENXIO);
22754 	}
22755 
22756 	mutex_enter(SD_MUTEX(un));
22757 	if (geom_validated == FALSE) {
22758 		/*
22759 		 * sd_validate_geometry does not spin a disk up
22760 		 * if it was spun down. We need to make sure it
22761 		 * is ready.
22762 		 */
22763 		mutex_exit(SD_MUTEX(un));
22764 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22765 			return (rval);
22766 		}
22767 		mutex_enter(SD_MUTEX(un));
22768 		if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
22769 			mutex_exit(SD_MUTEX(un));
22770 			return (rval);
22771 		}
22772 	}
22773 
22774 #if defined(_SUNOS_VTOC_8)
22775 	sd_build_user_vtoc(un, &user_vtoc);
22776 	mutex_exit(SD_MUTEX(un));
22777 
22778 #ifdef _MULTI_DATAMODEL
22779 	switch (ddi_model_convert_from(flag & FMODELS)) {
22780 	case DDI_MODEL_ILP32: {
22781 		struct vtoc32 user_vtoc32;
22782 
22783 		vtoctovtoc32(user_vtoc, user_vtoc32);
22784 		if (ddi_copyout(&user_vtoc32, (void *)arg,
22785 		    sizeof (struct vtoc32), flag)) {
22786 			return (EFAULT);
22787 		}
22788 		break;
22789 	}
22790 
22791 	case DDI_MODEL_NONE:
22792 		if (ddi_copyout(&user_vtoc, (void *)arg,
22793 		    sizeof (struct vtoc), flag)) {
22794 			return (EFAULT);
22795 		}
22796 		break;
22797 	}
22798 #else /* ! _MULTI_DATAMODEL */
22799 	if (ddi_copyout(&user_vtoc, (void *)arg, sizeof (struct vtoc), flag)) {
22800 		return (EFAULT);
22801 	}
22802 #endif /* _MULTI_DATAMODEL */
22803 
22804 #elif defined(_SUNOS_VTOC_16)
22805 	mutex_exit(SD_MUTEX(un));
22806 
22807 #ifdef _MULTI_DATAMODEL
22808 	/*
22809 	 * The un_vtoc structure is a "struct dk_vtoc"  which is always
22810 	 * 32-bit to maintain compatibility with existing on-disk
22811 	 * structures.  Thus, we need to convert the structure when copying
22812 	 * it out to a datamodel-dependent "struct vtoc" in a 64-bit
22813 	 * program.  If the target is a 32-bit program, then no conversion
22814 	 * is necessary.
22815 	 */
22816 	/* LINTED: logical expression always true: op "||" */
22817 	ASSERT(sizeof (un->un_vtoc) == sizeof (struct vtoc32));
22818 	switch (ddi_model_convert_from(flag & FMODELS)) {
22819 	case DDI_MODEL_ILP32:
22820 		if (ddi_copyout(&(un->un_vtoc), (void *)arg,
22821 		    sizeof (un->un_vtoc), flag)) {
22822 			return (EFAULT);
22823 		}
22824 		break;
22825 
22826 	case DDI_MODEL_NONE: {
22827 		struct vtoc user_vtoc;
22828 
22829 		vtoc32tovtoc(un->un_vtoc, user_vtoc);
22830 		if (ddi_copyout(&user_vtoc, (void *)arg,
22831 		    sizeof (struct vtoc), flag)) {
22832 			return (EFAULT);
22833 		}
22834 		break;
22835 	}
22836 	}
22837 #else /* ! _MULTI_DATAMODEL */
22838 	if (ddi_copyout(&(un->un_vtoc), (void *)arg, sizeof (un->un_vtoc),
22839 	    flag)) {
22840 		return (EFAULT);
22841 	}
22842 #endif /* _MULTI_DATAMODEL */
22843 #else
22844 #error "No VTOC format defined."
22845 #endif
22846 
22847 	return (rval);
22848 }
22849 
22850 static int
22851 sd_dkio_get_efi(dev_t dev, caddr_t arg, int flag)
22852 {
22853 	struct sd_lun	*un = NULL;
22854 	dk_efi_t	user_efi;
22855 	int		rval = 0;
22856 	void		*buffer;
22857 
22858 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL)
22859 		return (ENXIO);
22860 
22861 	if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag))
22862 		return (EFAULT);
22863 
22864 	user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64;
22865 
22866 	if ((user_efi.dki_length % un->un_tgt_blocksize) ||
22867 	    (user_efi.dki_length > un->un_max_xfer_size))
22868 		return (EINVAL);
22869 
22870 	buffer = kmem_alloc(user_efi.dki_length, KM_SLEEP);
22871 	rval = sd_send_scsi_READ(un, buffer, user_efi.dki_length,
22872 	    user_efi.dki_lba, SD_PATH_DIRECT);
22873 	if (rval == 0 && ddi_copyout(buffer, user_efi.dki_data,
22874 	    user_efi.dki_length, flag) != 0)
22875 		rval = EFAULT;
22876 
22877 	kmem_free(buffer, user_efi.dki_length);
22878 	return (rval);
22879 }
22880 
22881 /*
22882  *    Function: sd_build_user_vtoc
22883  *
22884  * Description: This routine populates a pass by reference variable with the
22885  *		current volume table of contents.
22886  *
22887  *   Arguments: un - driver soft state (unit) structure
22888  *		user_vtoc - pointer to vtoc structure to be populated
22889  */
22890 
22891 static void
22892 sd_build_user_vtoc(struct sd_lun *un, struct vtoc *user_vtoc)
22893 {
22894 	struct dk_map2		*lpart;
22895 	struct dk_map		*lmap;
22896 	struct partition	*vpart;
22897 	int			nblks;
22898 	int			i;
22899 
22900 	ASSERT(mutex_owned(SD_MUTEX(un)));
22901 
22902 	/*
22903 	 * Return vtoc structure fields in the provided VTOC area, addressed
22904 	 * by *vtoc.
22905 	 */
22906 	bzero(user_vtoc, sizeof (struct vtoc));
22907 	user_vtoc->v_bootinfo[0] = un->un_vtoc.v_bootinfo[0];
22908 	user_vtoc->v_bootinfo[1] = un->un_vtoc.v_bootinfo[1];
22909 	user_vtoc->v_bootinfo[2] = un->un_vtoc.v_bootinfo[2];
22910 	user_vtoc->v_sanity	= VTOC_SANE;
22911 	user_vtoc->v_version	= un->un_vtoc.v_version;
22912 	bcopy(un->un_vtoc.v_volume, user_vtoc->v_volume, LEN_DKL_VVOL);
22913 	user_vtoc->v_sectorsz = un->un_sys_blocksize;
22914 	user_vtoc->v_nparts = un->un_vtoc.v_nparts;
22915 	bcopy(un->un_vtoc.v_reserved, user_vtoc->v_reserved,
22916 	    sizeof (un->un_vtoc.v_reserved));
22917 	/*
22918 	 * Convert partitioning information.
22919 	 *
22920 	 * Note the conversion from starting cylinder number
22921 	 * to starting sector number.
22922 	 */
22923 	lmap = un->un_map;
22924 	lpart = (struct dk_map2 *)un->un_vtoc.v_part;
22925 	vpart = user_vtoc->v_part;
22926 
22927 	nblks = un->un_g.dkg_nsect * un->un_g.dkg_nhead;
22928 
22929 	for (i = 0; i < V_NUMPAR; i++) {
22930 		vpart->p_tag	= lpart->p_tag;
22931 		vpart->p_flag	= lpart->p_flag;
22932 		vpart->p_start	= lmap->dkl_cylno * nblks;
22933 		vpart->p_size	= lmap->dkl_nblk;
22934 		lmap++;
22935 		lpart++;
22936 		vpart++;
22937 
22938 		/* (4364927) */
22939 		user_vtoc->timestamp[i] = (time_t)un->un_vtoc.v_timestamp[i];
22940 	}
22941 
22942 	bcopy(un->un_asciilabel, user_vtoc->v_asciilabel, LEN_DKL_ASCII);
22943 }
22944 
22945 static int
22946 sd_dkio_partition(dev_t dev, caddr_t arg, int flag)
22947 {
22948 	struct sd_lun		*un = NULL;
22949 	struct partition64	p64;
22950 	int			rval = 0;
22951 	uint_t			nparts;
22952 	efi_gpe_t		*partitions;
22953 	efi_gpt_t		*buffer;
22954 	diskaddr_t		gpe_lba;
22955 
22956 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22957 		return (ENXIO);
22958 	}
22959 
22960 	if (ddi_copyin((const void *)arg, &p64,
22961 	    sizeof (struct partition64), flag)) {
22962 		return (EFAULT);
22963 	}
22964 
22965 	buffer = kmem_alloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP);
22966 	rval = sd_send_scsi_READ(un, buffer, DEV_BSIZE,
22967 		1, SD_PATH_DIRECT);
22968 	if (rval != 0)
22969 		goto done_error;
22970 
22971 	sd_swap_efi_gpt(buffer);
22972 
22973 	if ((rval = sd_validate_efi(buffer)) != 0)
22974 		goto done_error;
22975 
22976 	nparts = buffer->efi_gpt_NumberOfPartitionEntries;
22977 	gpe_lba = buffer->efi_gpt_PartitionEntryLBA;
22978 	if (p64.p_partno > nparts) {
22979 		/* couldn't find it */
22980 		rval = ESRCH;
22981 		goto done_error;
22982 	}
22983 	/*
22984 	 * if we're dealing with a partition that's out of the normal
22985 	 * 16K block, adjust accordingly
22986 	 */
22987 	gpe_lba += p64.p_partno / sizeof (efi_gpe_t);
22988 	rval = sd_send_scsi_READ(un, buffer, EFI_MIN_ARRAY_SIZE,
22989 			gpe_lba, SD_PATH_DIRECT);
22990 	if (rval) {
22991 		goto done_error;
22992 	}
22993 	partitions = (efi_gpe_t *)buffer;
22994 
22995 	sd_swap_efi_gpe(nparts, partitions);
22996 
22997 	partitions += p64.p_partno;
22998 	bcopy(&partitions->efi_gpe_PartitionTypeGUID, &p64.p_type,
22999 	    sizeof (struct uuid));
23000 	p64.p_start = partitions->efi_gpe_StartingLBA;
23001 	p64.p_size = partitions->efi_gpe_EndingLBA -
23002 			p64.p_start + 1;
23003 
23004 	if (ddi_copyout(&p64, (void *)arg, sizeof (struct partition64), flag))
23005 		rval = EFAULT;
23006 
23007 done_error:
23008 	kmem_free(buffer, EFI_MIN_ARRAY_SIZE);
23009 	return (rval);
23010 }
23011 
23012 
23013 /*
23014  *    Function: sd_dkio_set_vtoc
23015  *
23016  * Description: This routine is the driver entry point for handling user
23017  *		requests to set the current volume table of contents
23018  *		(DKIOCSVTOC).
23019  *
23020  *   Arguments: dev  - the device number
23021  *		arg  - pointer to user provided vtoc structure used to set the
23022  *			current vtoc.
23023  *		flag - this argument is a pass through to ddi_copyxxx()
23024  *		       directly from the mode argument of ioctl().
23025  *
23026  * Return Code: 0
23027  *		EFAULT
23028  *		ENXIO
23029  *		EINVAL
23030  *		ENOTSUP
23031  */
23032 
23033 static int
23034 sd_dkio_set_vtoc(dev_t dev, caddr_t arg, int flag)
23035 {
23036 	struct sd_lun	*un = NULL;
23037 	struct vtoc	user_vtoc;
23038 	int		rval = 0;
23039 
23040 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23041 		return (ENXIO);
23042 	}
23043 
23044 #if defined(__i386) || defined(__amd64)
23045 	if (un->un_tgt_blocksize != un->un_sys_blocksize) {
23046 		return (EINVAL);
23047 	}
23048 #endif
23049 
23050 #ifdef _MULTI_DATAMODEL
23051 	switch (ddi_model_convert_from(flag & FMODELS)) {
23052 	case DDI_MODEL_ILP32: {
23053 		struct vtoc32 user_vtoc32;
23054 
23055 		if (ddi_copyin((const void *)arg, &user_vtoc32,
23056 		    sizeof (struct vtoc32), flag)) {
23057 			return (EFAULT);
23058 		}
23059 		vtoc32tovtoc(user_vtoc32, user_vtoc);
23060 		break;
23061 	}
23062 
23063 	case DDI_MODEL_NONE:
23064 		if (ddi_copyin((const void *)arg, &user_vtoc,
23065 		    sizeof (struct vtoc), flag)) {
23066 			return (EFAULT);
23067 		}
23068 		break;
23069 	}
23070 #else /* ! _MULTI_DATAMODEL */
23071 	if (ddi_copyin((const void *)arg, &user_vtoc,
23072 	    sizeof (struct vtoc), flag)) {
23073 		return (EFAULT);
23074 	}
23075 #endif /* _MULTI_DATAMODEL */
23076 
23077 	mutex_enter(SD_MUTEX(un));
23078 	if (un->un_blockcount > DK_MAX_BLOCKS) {
23079 		mutex_exit(SD_MUTEX(un));
23080 		return (ENOTSUP);
23081 	}
23082 	if (un->un_g.dkg_ncyl == 0) {
23083 		mutex_exit(SD_MUTEX(un));
23084 		return (EINVAL);
23085 	}
23086 
23087 	mutex_exit(SD_MUTEX(un));
23088 	sd_clear_efi(un);
23089 	ddi_remove_minor_node(SD_DEVINFO(un), "wd");
23090 	ddi_remove_minor_node(SD_DEVINFO(un), "wd,raw");
23091 	(void) ddi_create_minor_node(SD_DEVINFO(un), "h",
23092 	    S_IFBLK, (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23093 	    un->un_node_type, NULL);
23094 	(void) ddi_create_minor_node(SD_DEVINFO(un), "h,raw",
23095 	    S_IFCHR, (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23096 	    un->un_node_type, NULL);
23097 	mutex_enter(SD_MUTEX(un));
23098 
23099 	if ((rval = sd_build_label_vtoc(un, &user_vtoc)) == 0) {
23100 		if ((rval = sd_write_label(dev)) == 0) {
23101 			if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT))
23102 			    != 0) {
23103 				SD_ERROR(SD_LOG_IOCTL_DKIO, un,
23104 				    "sd_dkio_set_vtoc: "
23105 				    "Failed validate geometry\n");
23106 			}
23107 		}
23108 	}
23109 
23110 	/*
23111 	 * If sd_build_label_vtoc, or sd_write_label failed above write the
23112 	 * devid anyway, what can it hurt? Also preserve the device id by
23113 	 * writing to the disk acyl for the case where a devid has been
23114 	 * fabricated.
23115 	 */
23116 	if (un->un_f_devid_supported &&
23117 	    (un->un_f_opt_fab_devid == TRUE)) {
23118 		if (un->un_devid == NULL) {
23119 			sd_register_devid(un, SD_DEVINFO(un),
23120 			    SD_TARGET_IS_UNRESERVED);
23121 		} else {
23122 			/*
23123 			 * The device id for this disk has been
23124 			 * fabricated. Fabricated device id's are
23125 			 * managed by storing them in the last 2
23126 			 * available sectors on the drive. The device
23127 			 * id must be preserved by writing it back out
23128 			 * to this location.
23129 			 */
23130 			if (sd_write_deviceid(un) != 0) {
23131 				ddi_devid_free(un->un_devid);
23132 				un->un_devid = NULL;
23133 			}
23134 		}
23135 	}
23136 	mutex_exit(SD_MUTEX(un));
23137 	return (rval);
23138 }
23139 
23140 
23141 /*
23142  *    Function: sd_build_label_vtoc
23143  *
23144  * Description: This routine updates the driver soft state current volume table
23145  *		of contents based on a user specified vtoc.
23146  *
23147  *   Arguments: un - driver soft state (unit) structure
23148  *		user_vtoc - pointer to vtoc structure specifying vtoc to be used
23149  *			    to update the driver soft state.
23150  *
23151  * Return Code: 0
23152  *		EINVAL
23153  */
23154 
23155 static int
23156 sd_build_label_vtoc(struct sd_lun *un, struct vtoc *user_vtoc)
23157 {
23158 	struct dk_map		*lmap;
23159 	struct partition	*vpart;
23160 	int			nblks;
23161 #if defined(_SUNOS_VTOC_8)
23162 	int			ncyl;
23163 	struct dk_map2		*lpart;
23164 #endif	/* defined(_SUNOS_VTOC_8) */
23165 	int			i;
23166 
23167 	ASSERT(mutex_owned(SD_MUTEX(un)));
23168 
23169 	/* Sanity-check the vtoc */
23170 	if (user_vtoc->v_sanity != VTOC_SANE ||
23171 	    user_vtoc->v_sectorsz != un->un_sys_blocksize ||
23172 	    user_vtoc->v_nparts != V_NUMPAR) {
23173 		return (EINVAL);
23174 	}
23175 
23176 	nblks = un->un_g.dkg_nsect * un->un_g.dkg_nhead;
23177 	if (nblks == 0) {
23178 		return (EINVAL);
23179 	}
23180 
23181 #if defined(_SUNOS_VTOC_8)
23182 	vpart = user_vtoc->v_part;
23183 	for (i = 0; i < V_NUMPAR; i++) {
23184 		if ((vpart->p_start % nblks) != 0) {
23185 			return (EINVAL);
23186 		}
23187 		ncyl = vpart->p_start / nblks;
23188 		ncyl += vpart->p_size / nblks;
23189 		if ((vpart->p_size % nblks) != 0) {
23190 			ncyl++;
23191 		}
23192 		if (ncyl > (int)un->un_g.dkg_ncyl) {
23193 			return (EINVAL);
23194 		}
23195 		vpart++;
23196 	}
23197 #endif	/* defined(_SUNOS_VTOC_8) */
23198 
23199 	/* Put appropriate vtoc structure fields into the disk label */
23200 #if defined(_SUNOS_VTOC_16)
23201 	/*
23202 	 * The vtoc is always a 32bit data structure to maintain the
23203 	 * on-disk format. Convert "in place" instead of bcopying it.
23204 	 */
23205 	vtoctovtoc32((*user_vtoc), (*((struct vtoc32 *)&(un->un_vtoc))));
23206 
23207 	/*
23208 	 * in the 16-slice vtoc, starting sectors are expressed in
23209 	 * numbers *relative* to the start of the Solaris fdisk partition.
23210 	 */
23211 	lmap = un->un_map;
23212 	vpart = user_vtoc->v_part;
23213 
23214 	for (i = 0; i < (int)user_vtoc->v_nparts; i++, lmap++, vpart++) {
23215 		lmap->dkl_cylno = vpart->p_start / nblks;
23216 		lmap->dkl_nblk = vpart->p_size;
23217 	}
23218 
23219 #elif defined(_SUNOS_VTOC_8)
23220 
23221 	un->un_vtoc.v_bootinfo[0] = (uint32_t)user_vtoc->v_bootinfo[0];
23222 	un->un_vtoc.v_bootinfo[1] = (uint32_t)user_vtoc->v_bootinfo[1];
23223 	un->un_vtoc.v_bootinfo[2] = (uint32_t)user_vtoc->v_bootinfo[2];
23224 
23225 	un->un_vtoc.v_sanity = (uint32_t)user_vtoc->v_sanity;
23226 	un->un_vtoc.v_version = (uint32_t)user_vtoc->v_version;
23227 
23228 	bcopy(user_vtoc->v_volume, un->un_vtoc.v_volume, LEN_DKL_VVOL);
23229 
23230 	un->un_vtoc.v_nparts = user_vtoc->v_nparts;
23231 
23232 	bcopy(user_vtoc->v_reserved, un->un_vtoc.v_reserved,
23233 	    sizeof (un->un_vtoc.v_reserved));
23234 
23235 	/*
23236 	 * Note the conversion from starting sector number
23237 	 * to starting cylinder number.
23238 	 * Return error if division results in a remainder.
23239 	 */
23240 	lmap = un->un_map;
23241 	lpart = un->un_vtoc.v_part;
23242 	vpart = user_vtoc->v_part;
23243 
23244 	for (i = 0; i < (int)user_vtoc->v_nparts; i++) {
23245 		lpart->p_tag  = vpart->p_tag;
23246 		lpart->p_flag = vpart->p_flag;
23247 		lmap->dkl_cylno = vpart->p_start / nblks;
23248 		lmap->dkl_nblk = vpart->p_size;
23249 
23250 		lmap++;
23251 		lpart++;
23252 		vpart++;
23253 
23254 		/* (4387723) */
23255 #ifdef _LP64
23256 		if (user_vtoc->timestamp[i] > TIME32_MAX) {
23257 			un->un_vtoc.v_timestamp[i] = TIME32_MAX;
23258 		} else {
23259 			un->un_vtoc.v_timestamp[i] = user_vtoc->timestamp[i];
23260 		}
23261 #else
23262 		un->un_vtoc.v_timestamp[i] = user_vtoc->timestamp[i];
23263 #endif
23264 	}
23265 
23266 	bcopy(user_vtoc->v_asciilabel, un->un_asciilabel, LEN_DKL_ASCII);
23267 #else
23268 #error "No VTOC format defined."
23269 #endif
23270 	return (0);
23271 }
23272 
23273 /*
23274  *    Function: sd_clear_efi
23275  *
23276  * Description: This routine clears all EFI labels.
23277  *
23278  *   Arguments: un - driver soft state (unit) structure
23279  *
23280  * Return Code: void
23281  */
23282 
23283 static void
23284 sd_clear_efi(struct sd_lun *un)
23285 {
23286 	efi_gpt_t	*gpt;
23287 	uint_t		lbasize;
23288 	uint64_t	cap;
23289 	int rval;
23290 
23291 	ASSERT(!mutex_owned(SD_MUTEX(un)));
23292 
23293 	gpt = kmem_alloc(sizeof (efi_gpt_t), KM_SLEEP);
23294 
23295 	if (sd_send_scsi_READ(un, gpt, DEV_BSIZE, 1, SD_PATH_DIRECT) != 0) {
23296 		goto done;
23297 	}
23298 
23299 	sd_swap_efi_gpt(gpt);
23300 	rval = sd_validate_efi(gpt);
23301 	if (rval == 0) {
23302 		/* clear primary */
23303 		bzero(gpt, sizeof (efi_gpt_t));
23304 		if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE, 1,
23305 			SD_PATH_DIRECT))) {
23306 			SD_INFO(SD_LOG_IO_PARTITION, un,
23307 				"sd_clear_efi: clear primary label failed\n");
23308 		}
23309 	}
23310 	/* the backup */
23311 	rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
23312 	    SD_PATH_DIRECT);
23313 	if (rval) {
23314 		goto done;
23315 	}
23316 	/*
23317 	 * The MMC standard allows READ CAPACITY to be
23318 	 * inaccurate by a bounded amount (in the interest of
23319 	 * response latency).  As a result, failed READs are
23320 	 * commonplace (due to the reading of metadata and not
23321 	 * data). Depending on the per-Vendor/drive Sense data,
23322 	 * the failed READ can cause many (unnecessary) retries.
23323 	 */
23324 	if ((rval = sd_send_scsi_READ(un, gpt, lbasize,
23325 	    cap - 1, ISCD(un) ? SD_PATH_DIRECT_PRIORITY :
23326 		SD_PATH_DIRECT)) != 0) {
23327 		goto done;
23328 	}
23329 	sd_swap_efi_gpt(gpt);
23330 	rval = sd_validate_efi(gpt);
23331 	if (rval == 0) {
23332 		/* clear backup */
23333 		SD_TRACE(SD_LOG_IOCTL, un, "sd_clear_efi clear backup@%lu\n",
23334 			cap-1);
23335 		bzero(gpt, sizeof (efi_gpt_t));
23336 		if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE,
23337 		    cap-1, SD_PATH_DIRECT))) {
23338 			SD_INFO(SD_LOG_IO_PARTITION, un,
23339 				"sd_clear_efi: clear backup label failed\n");
23340 		}
23341 	}
23342 
23343 done:
23344 	kmem_free(gpt, sizeof (efi_gpt_t));
23345 }
23346 
23347 /*
23348  *    Function: sd_set_vtoc
23349  *
23350  * Description: This routine writes data to the appropriate positions
23351  *
23352  *   Arguments: un - driver soft state (unit) structure
23353  *              dkl  - the data to be written
23354  *
23355  * Return: void
23356  */
23357 
23358 static int
23359 sd_set_vtoc(struct sd_lun *un, struct dk_label *dkl)
23360 {
23361 	void			*shadow_buf;
23362 	uint_t			label_addr;
23363 	int			sec;
23364 	int			blk;
23365 	int			head;
23366 	int			cyl;
23367 	int			rval;
23368 
23369 #if defined(__i386) || defined(__amd64)
23370 	label_addr = un->un_solaris_offset + DK_LABEL_LOC;
23371 #else
23372 	/* Write the primary label at block 0 of the solaris partition. */
23373 	label_addr = 0;
23374 #endif
23375 
23376 	if (NOT_DEVBSIZE(un)) {
23377 		shadow_buf = kmem_zalloc(un->un_tgt_blocksize, KM_SLEEP);
23378 		/*
23379 		 * Read the target's first block.
23380 		 */
23381 		if ((rval = sd_send_scsi_READ(un, shadow_buf,
23382 		    un->un_tgt_blocksize, label_addr,
23383 		    SD_PATH_STANDARD)) != 0) {
23384 			goto exit;
23385 		}
23386 		/*
23387 		 * Copy the contents of the label into the shadow buffer
23388 		 * which is of the size of target block size.
23389 		 */
23390 		bcopy(dkl, shadow_buf, sizeof (struct dk_label));
23391 	}
23392 
23393 	/* Write the primary label */
23394 	if (NOT_DEVBSIZE(un)) {
23395 		rval = sd_send_scsi_WRITE(un, shadow_buf, un->un_tgt_blocksize,
23396 		    label_addr, SD_PATH_STANDARD);
23397 	} else {
23398 		rval = sd_send_scsi_WRITE(un, dkl, un->un_sys_blocksize,
23399 		    label_addr, SD_PATH_STANDARD);
23400 	}
23401 	if (rval != 0) {
23402 		return (rval);
23403 	}
23404 
23405 	/*
23406 	 * Calculate where the backup labels go.  They are always on
23407 	 * the last alternate cylinder, but some older drives put them
23408 	 * on head 2 instead of the last head.	They are always on the
23409 	 * first 5 odd sectors of the appropriate track.
23410 	 *
23411 	 * We have no choice at this point, but to believe that the
23412 	 * disk label is valid.	 Use the geometry of the disk
23413 	 * as described in the label.
23414 	 */
23415 	cyl  = dkl->dkl_ncyl  + dkl->dkl_acyl - 1;
23416 	head = dkl->dkl_nhead - 1;
23417 
23418 	/*
23419 	 * Write and verify the backup labels. Make sure we don't try to
23420 	 * write past the last cylinder.
23421 	 */
23422 	for (sec = 1; ((sec < 5 * 2 + 1) && (sec < dkl->dkl_nsect)); sec += 2) {
23423 		blk = (daddr_t)(
23424 		    (cyl * ((dkl->dkl_nhead * dkl->dkl_nsect) - dkl->dkl_apc)) +
23425 		    (head * dkl->dkl_nsect) + sec);
23426 #if defined(__i386) || defined(__amd64)
23427 		blk += un->un_solaris_offset;
23428 #endif
23429 		if (NOT_DEVBSIZE(un)) {
23430 			uint64_t	tblk;
23431 			/*
23432 			 * Need to read the block first for read modify write.
23433 			 */
23434 			tblk = (uint64_t)blk;
23435 			blk = (int)((tblk * un->un_sys_blocksize) /
23436 			    un->un_tgt_blocksize);
23437 			if ((rval = sd_send_scsi_READ(un, shadow_buf,
23438 			    un->un_tgt_blocksize, blk,
23439 			    SD_PATH_STANDARD)) != 0) {
23440 				goto exit;
23441 			}
23442 			/*
23443 			 * Modify the shadow buffer with the label.
23444 			 */
23445 			bcopy(dkl, shadow_buf, sizeof (struct dk_label));
23446 			rval = sd_send_scsi_WRITE(un, shadow_buf,
23447 			    un->un_tgt_blocksize, blk, SD_PATH_STANDARD);
23448 		} else {
23449 			rval = sd_send_scsi_WRITE(un, dkl, un->un_sys_blocksize,
23450 			    blk, SD_PATH_STANDARD);
23451 			SD_INFO(SD_LOG_IO_PARTITION, un,
23452 			"sd_set_vtoc: wrote backup label %d\n", blk);
23453 		}
23454 		if (rval != 0) {
23455 			goto exit;
23456 		}
23457 	}
23458 exit:
23459 	if (NOT_DEVBSIZE(un)) {
23460 		kmem_free(shadow_buf, un->un_tgt_blocksize);
23461 	}
23462 	return (rval);
23463 }
23464 
23465 /*
23466  *    Function: sd_clear_vtoc
23467  *
23468  * Description: This routine clears out the VTOC labels.
23469  *
23470  *   Arguments: un - driver soft state (unit) structure
23471  *
23472  * Return: void
23473  */
23474 
23475 static void
23476 sd_clear_vtoc(struct sd_lun *un)
23477 {
23478 	struct dk_label		*dkl;
23479 
23480 	mutex_exit(SD_MUTEX(un));
23481 	dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
23482 	mutex_enter(SD_MUTEX(un));
23483 	/*
23484 	 * sd_set_vtoc uses these fields in order to figure out
23485 	 * where to overwrite the backup labels
23486 	 */
23487 	dkl->dkl_apc    = un->un_g.dkg_apc;
23488 	dkl->dkl_ncyl   = un->un_g.dkg_ncyl;
23489 	dkl->dkl_acyl   = un->un_g.dkg_acyl;
23490 	dkl->dkl_nhead  = un->un_g.dkg_nhead;
23491 	dkl->dkl_nsect  = un->un_g.dkg_nsect;
23492 	mutex_exit(SD_MUTEX(un));
23493 	(void) sd_set_vtoc(un, dkl);
23494 	kmem_free(dkl, sizeof (struct dk_label));
23495 
23496 	mutex_enter(SD_MUTEX(un));
23497 }
23498 
23499 /*
23500  *    Function: sd_write_label
23501  *
23502  * Description: This routine will validate and write the driver soft state vtoc
23503  *		contents to the device.
23504  *
23505  *   Arguments: dev - the device number
23506  *
23507  * Return Code: the code returned by sd_send_scsi_cmd()
23508  *		0
23509  *		EINVAL
23510  *		ENXIO
23511  *		ENOMEM
23512  */
23513 
23514 static int
23515 sd_write_label(dev_t dev)
23516 {
23517 	struct sd_lun		*un;
23518 	struct dk_label		*dkl;
23519 	short			sum;
23520 	short			*sp;
23521 	int			i;
23522 	int			rval;
23523 
23524 	if (((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) ||
23525 	    (un->un_state == SD_STATE_OFFLINE)) {
23526 		return (ENXIO);
23527 	}
23528 	ASSERT(mutex_owned(SD_MUTEX(un)));
23529 	mutex_exit(SD_MUTEX(un));
23530 	dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
23531 	mutex_enter(SD_MUTEX(un));
23532 
23533 	bcopy(&un->un_vtoc, &dkl->dkl_vtoc, sizeof (struct dk_vtoc));
23534 	dkl->dkl_rpm	= un->un_g.dkg_rpm;
23535 	dkl->dkl_pcyl	= un->un_g.dkg_pcyl;
23536 	dkl->dkl_apc	= un->un_g.dkg_apc;
23537 	dkl->dkl_intrlv = un->un_g.dkg_intrlv;
23538 	dkl->dkl_ncyl	= un->un_g.dkg_ncyl;
23539 	dkl->dkl_acyl	= un->un_g.dkg_acyl;
23540 	dkl->dkl_nhead	= un->un_g.dkg_nhead;
23541 	dkl->dkl_nsect	= un->un_g.dkg_nsect;
23542 
23543 #if defined(_SUNOS_VTOC_8)
23544 	dkl->dkl_obs1	= un->un_g.dkg_obs1;
23545 	dkl->dkl_obs2	= un->un_g.dkg_obs2;
23546 	dkl->dkl_obs3	= un->un_g.dkg_obs3;
23547 	for (i = 0; i < NDKMAP; i++) {
23548 		dkl->dkl_map[i].dkl_cylno = un->un_map[i].dkl_cylno;
23549 		dkl->dkl_map[i].dkl_nblk  = un->un_map[i].dkl_nblk;
23550 	}
23551 	bcopy(un->un_asciilabel, dkl->dkl_asciilabel, LEN_DKL_ASCII);
23552 #elif defined(_SUNOS_VTOC_16)
23553 	dkl->dkl_skew	= un->un_dkg_skew;
23554 #else
23555 #error "No VTOC format defined."
23556 #endif
23557 
23558 	dkl->dkl_magic			= DKL_MAGIC;
23559 	dkl->dkl_write_reinstruct	= un->un_g.dkg_write_reinstruct;
23560 	dkl->dkl_read_reinstruct	= un->un_g.dkg_read_reinstruct;
23561 
23562 	/* Construct checksum for the new disk label */
23563 	sum = 0;
23564 	sp = (short *)dkl;
23565 	i = sizeof (struct dk_label) / sizeof (short);
23566 	while (i--) {
23567 		sum ^= *sp++;
23568 	}
23569 	dkl->dkl_cksum = sum;
23570 
23571 	mutex_exit(SD_MUTEX(un));
23572 
23573 	rval = sd_set_vtoc(un, dkl);
23574 exit:
23575 	kmem_free(dkl, sizeof (struct dk_label));
23576 	mutex_enter(SD_MUTEX(un));
23577 	return (rval);
23578 }
23579 
23580 static int
23581 sd_dkio_set_efi(dev_t dev, caddr_t arg, int flag)
23582 {
23583 	struct sd_lun	*un = NULL;
23584 	dk_efi_t	user_efi;
23585 	int		rval = 0;
23586 	void		*buffer;
23587 
23588 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL)
23589 		return (ENXIO);
23590 
23591 	if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag))
23592 		return (EFAULT);
23593 
23594 	user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64;
23595 
23596 	if ((user_efi.dki_length % un->un_tgt_blocksize) ||
23597 	    (user_efi.dki_length > un->un_max_xfer_size))
23598 		return (EINVAL);
23599 
23600 	buffer = kmem_alloc(user_efi.dki_length, KM_SLEEP);
23601 	if (ddi_copyin(user_efi.dki_data, buffer, user_efi.dki_length, flag)) {
23602 		rval = EFAULT;
23603 	} else {
23604 		/*
23605 		 * let's clear the vtoc labels and clear the softstate
23606 		 * vtoc.
23607 		 */
23608 		mutex_enter(SD_MUTEX(un));
23609 		if (un->un_vtoc.v_sanity == VTOC_SANE) {
23610 			SD_TRACE(SD_LOG_IO_PARTITION, un,
23611 				"sd_dkio_set_efi: CLEAR VTOC\n");
23612 			sd_clear_vtoc(un);
23613 			bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
23614 			mutex_exit(SD_MUTEX(un));
23615 			ddi_remove_minor_node(SD_DEVINFO(un), "h");
23616 			ddi_remove_minor_node(SD_DEVINFO(un), "h,raw");
23617 			(void) ddi_create_minor_node(SD_DEVINFO(un), "wd",
23618 			    S_IFBLK,
23619 			    (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23620 			    un->un_node_type, NULL);
23621 			(void) ddi_create_minor_node(SD_DEVINFO(un), "wd,raw",
23622 			    S_IFCHR,
23623 			    (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23624 			    un->un_node_type, NULL);
23625 		} else
23626 			mutex_exit(SD_MUTEX(un));
23627 		rval = sd_send_scsi_WRITE(un, buffer, user_efi.dki_length,
23628 		    user_efi.dki_lba, SD_PATH_DIRECT);
23629 		if (rval == 0) {
23630 			mutex_enter(SD_MUTEX(un));
23631 			un->un_f_geometry_is_valid = FALSE;
23632 			mutex_exit(SD_MUTEX(un));
23633 		}
23634 	}
23635 	kmem_free(buffer, user_efi.dki_length);
23636 	return (rval);
23637 }
23638 
23639 /*
23640  *    Function: sd_dkio_get_mboot
23641  *
23642  * Description: This routine is the driver entry point for handling user
23643  *		requests to get the current device mboot (DKIOCGMBOOT)
23644  *
23645  *   Arguments: dev  - the device number
23646  *		arg  - pointer to user provided mboot structure specifying
23647  *			the current mboot.
23648  *		flag - this argument is a pass through to ddi_copyxxx()
23649  *		       directly from the mode argument of ioctl().
23650  *
23651  * Return Code: 0
23652  *		EINVAL
23653  *		EFAULT
23654  *		ENXIO
23655  */
23656 
23657 static int
23658 sd_dkio_get_mboot(dev_t dev, caddr_t arg, int flag)
23659 {
23660 	struct sd_lun	*un;
23661 	struct mboot	*mboot;
23662 	int		rval;
23663 	size_t		buffer_size;
23664 
23665 	if (((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) ||
23666 	    (un->un_state == SD_STATE_OFFLINE)) {
23667 		return (ENXIO);
23668 	}
23669 
23670 	if (!un->un_f_mboot_supported || arg == NULL) {
23671 		return (EINVAL);
23672 	}
23673 
23674 	/*
23675 	 * Read the mboot block, located at absolute block 0 on the target.
23676 	 */
23677 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct mboot));
23678 
23679 	SD_TRACE(SD_LOG_IO_PARTITION, un,
23680 	    "sd_dkio_get_mboot: allocation size: 0x%x\n", buffer_size);
23681 
23682 	mboot = kmem_zalloc(buffer_size, KM_SLEEP);
23683 	if ((rval = sd_send_scsi_READ(un, mboot, buffer_size, 0,
23684 	    SD_PATH_STANDARD)) == 0) {
23685 		if (ddi_copyout(mboot, (void *)arg,
23686 		    sizeof (struct mboot), flag) != 0) {
23687 			rval = EFAULT;
23688 		}
23689 	}
23690 	kmem_free(mboot, buffer_size);
23691 	return (rval);
23692 }
23693 
23694 
23695 /*
23696  *    Function: sd_dkio_set_mboot
23697  *
23698  * Description: This routine is the driver entry point for handling user
23699  *		requests to validate and set the device master boot
23700  *		(DKIOCSMBOOT).
23701  *
23702  *   Arguments: dev  - the device number
23703  *		arg  - pointer to user provided mboot structure used to set the
23704  *			master boot.
23705  *		flag - this argument is a pass through to ddi_copyxxx()
23706  *		       directly from the mode argument of ioctl().
23707  *
23708  * Return Code: 0
23709  *		EINVAL
23710  *		EFAULT
23711  *		ENXIO
23712  */
23713 
23714 static int
23715 sd_dkio_set_mboot(dev_t dev, caddr_t arg, int flag)
23716 {
23717 	struct sd_lun	*un = NULL;
23718 	struct mboot	*mboot = NULL;
23719 	int		rval;
23720 	ushort_t	magic;
23721 
23722 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23723 		return (ENXIO);
23724 	}
23725 
23726 	ASSERT(!mutex_owned(SD_MUTEX(un)));
23727 
23728 	if (!un->un_f_mboot_supported) {
23729 		return (EINVAL);
23730 	}
23731 
23732 	if (arg == NULL) {
23733 		return (EINVAL);
23734 	}
23735 
23736 	mboot = kmem_zalloc(sizeof (struct mboot), KM_SLEEP);
23737 
23738 	if (ddi_copyin((const void *)arg, mboot,
23739 	    sizeof (struct mboot), flag) != 0) {
23740 		kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23741 		return (EFAULT);
23742 	}
23743 
23744 	/* Is this really a master boot record? */
23745 	magic = LE_16(mboot->signature);
23746 	if (magic != MBB_MAGIC) {
23747 		kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23748 		return (EINVAL);
23749 	}
23750 
23751 	rval = sd_send_scsi_WRITE(un, mboot, un->un_sys_blocksize, 0,
23752 	    SD_PATH_STANDARD);
23753 
23754 	mutex_enter(SD_MUTEX(un));
23755 #if defined(__i386) || defined(__amd64)
23756 	if (rval == 0) {
23757 		/*
23758 		 * mboot has been written successfully.
23759 		 * update the fdisk and vtoc tables in memory
23760 		 */
23761 		rval = sd_update_fdisk_and_vtoc(un);
23762 		if ((un->un_f_geometry_is_valid == FALSE) || (rval != 0)) {
23763 			mutex_exit(SD_MUTEX(un));
23764 			kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23765 			return (rval);
23766 		}
23767 	}
23768 
23769 	/*
23770 	 * If the mboot write fails, write the devid anyway, what can it hurt?
23771 	 * Also preserve the device id by writing to the disk acyl for the case
23772 	 * where a devid has been fabricated.
23773 	 */
23774 	if (un->un_f_devid_supported && un->un_f_opt_fab_devid) {
23775 		if (un->un_devid == NULL) {
23776 			sd_register_devid(un, SD_DEVINFO(un),
23777 			    SD_TARGET_IS_UNRESERVED);
23778 		} else {
23779 			/*
23780 			 * The device id for this disk has been
23781 			 * fabricated. Fabricated device id's are
23782 			 * managed by storing them in the last 2
23783 			 * available sectors on the drive. The device
23784 			 * id must be preserved by writing it back out
23785 			 * to this location.
23786 			 */
23787 			if (sd_write_deviceid(un) != 0) {
23788 				ddi_devid_free(un->un_devid);
23789 				un->un_devid = NULL;
23790 			}
23791 		}
23792 	}
23793 
23794 #ifdef __lock_lint
23795 	sd_setup_default_geometry(un);
23796 #endif
23797 
23798 #else
23799 	if (rval == 0) {
23800 		/*
23801 		 * mboot has been written successfully.
23802 		 * set up the default geometry and VTOC
23803 		 */
23804 		if (un->un_blockcount <= DK_MAX_BLOCKS)
23805 			sd_setup_default_geometry(un);
23806 	}
23807 #endif
23808 	mutex_exit(SD_MUTEX(un));
23809 	kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23810 	return (rval);
23811 }
23812 
23813 
23814 /*
23815  *    Function: sd_setup_default_geometry
23816  *
23817  * Description: This local utility routine sets the default geometry as part of
23818  *		setting the device mboot.
23819  *
23820  *   Arguments: un - driver soft state (unit) structure
23821  *
23822  * Note: This may be redundant with sd_build_default_label.
23823  */
23824 
23825 static void
23826 sd_setup_default_geometry(struct sd_lun *un)
23827 {
23828 	/* zero out the soft state geometry and partition table. */
23829 	bzero(&un->un_g, sizeof (struct dk_geom));
23830 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
23831 	bzero(un->un_map, NDKMAP * (sizeof (struct dk_map)));
23832 	un->un_asciilabel[0] = '\0';
23833 
23834 	/*
23835 	 * For the rpm, we use the minimum for the disk.
23836 	 * For the head, cyl and number of sector per track,
23837 	 * if the capacity <= 1GB, head = 64, sect = 32.
23838 	 * else head = 255, sect 63
23839 	 * Note: the capacity should be equal to C*H*S values.
23840 	 * This will cause some truncation of size due to
23841 	 * round off errors. For CD-ROMs, this truncation can
23842 	 * have adverse side effects, so returning ncyl and
23843 	 * nhead as 1. The nsect will overflow for most of
23844 	 * CD-ROMs as nsect is of type ushort.
23845 	 */
23846 	if (ISCD(un)) {
23847 		un->un_g.dkg_ncyl = 1;
23848 		un->un_g.dkg_nhead = 1;
23849 		un->un_g.dkg_nsect = un->un_blockcount;
23850 	} else {
23851 		if (un->un_blockcount <= 0x1000) {
23852 			/* Needed for unlabeled SCSI floppies. */
23853 			un->un_g.dkg_nhead = 2;
23854 			un->un_g.dkg_ncyl = 80;
23855 			un->un_g.dkg_pcyl = 80;
23856 			un->un_g.dkg_nsect = un->un_blockcount / (2 * 80);
23857 		} else if (un->un_blockcount <= 0x200000) {
23858 			un->un_g.dkg_nhead = 64;
23859 			un->un_g.dkg_nsect = 32;
23860 			un->un_g.dkg_ncyl = un->un_blockcount / (64 * 32);
23861 		} else {
23862 			un->un_g.dkg_nhead = 255;
23863 			un->un_g.dkg_nsect = 63;
23864 			un->un_g.dkg_ncyl = un->un_blockcount / (255 * 63);
23865 		}
23866 		un->un_blockcount = un->un_g.dkg_ncyl *
23867 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect;
23868 	}
23869 	un->un_g.dkg_acyl = 0;
23870 	un->un_g.dkg_bcyl = 0;
23871 	un->un_g.dkg_intrlv = 1;
23872 	un->un_g.dkg_rpm = 200;
23873 	un->un_g.dkg_read_reinstruct = 0;
23874 	un->un_g.dkg_write_reinstruct = 0;
23875 	if (un->un_g.dkg_pcyl == 0) {
23876 		un->un_g.dkg_pcyl = un->un_g.dkg_ncyl + un->un_g.dkg_acyl;
23877 	}
23878 
23879 	un->un_map['a'-'a'].dkl_cylno = 0;
23880 	un->un_map['a'-'a'].dkl_nblk = un->un_blockcount;
23881 	un->un_map['c'-'a'].dkl_cylno = 0;
23882 	un->un_map['c'-'a'].dkl_nblk = un->un_blockcount;
23883 	un->un_f_geometry_is_valid = FALSE;
23884 }
23885 
23886 
23887 #if defined(__i386) || defined(__amd64)
23888 /*
23889  *    Function: sd_update_fdisk_and_vtoc
23890  *
23891  * Description: This local utility routine updates the device fdisk and vtoc
23892  *		as part of setting the device mboot.
23893  *
23894  *   Arguments: un - driver soft state (unit) structure
23895  *
23896  * Return Code: 0 for success or errno-type return code.
23897  *
23898  *    Note:x86: This looks like a duplicate of sd_validate_geometry(), but
23899  *		these did exist seperately in x86 sd.c!!!
23900  */
23901 
23902 static int
23903 sd_update_fdisk_and_vtoc(struct sd_lun *un)
23904 {
23905 	static char	labelstring[128];
23906 	static char	buf[256];
23907 	char		*label = 0;
23908 	int		count;
23909 	int		label_rc = 0;
23910 	int		gvalid = un->un_f_geometry_is_valid;
23911 	int		fdisk_rval;
23912 	int		lbasize;
23913 	int		capacity;
23914 
23915 	ASSERT(mutex_owned(SD_MUTEX(un)));
23916 
23917 	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
23918 		return (EINVAL);
23919 	}
23920 
23921 	if (un->un_f_blockcount_is_valid == FALSE) {
23922 		return (EINVAL);
23923 	}
23924 
23925 #if defined(_SUNOS_VTOC_16)
23926 	/*
23927 	 * Set up the "whole disk" fdisk partition; this should always
23928 	 * exist, regardless of whether the disk contains an fdisk table
23929 	 * or vtoc.
23930 	 */
23931 	un->un_map[P0_RAW_DISK].dkl_cylno = 0;
23932 	un->un_map[P0_RAW_DISK].dkl_nblk = un->un_blockcount;
23933 #endif	/* defined(_SUNOS_VTOC_16) */
23934 
23935 	/*
23936 	 * copy the lbasize and capacity so that if they're
23937 	 * reset while we're not holding the SD_MUTEX(un), we will
23938 	 * continue to use valid values after the SD_MUTEX(un) is
23939 	 * reacquired.
23940 	 */
23941 	lbasize  = un->un_tgt_blocksize;
23942 	capacity = un->un_blockcount;
23943 
23944 	/*
23945 	 * refresh the logical and physical geometry caches.
23946 	 * (data from mode sense format/rigid disk geometry pages,
23947 	 * and scsi_ifgetcap("geometry").
23948 	 */
23949 	sd_resync_geom_caches(un, capacity, lbasize, SD_PATH_DIRECT);
23950 
23951 	/*
23952 	 * Only DIRECT ACCESS devices will have Sun labels.
23953 	 * CD's supposedly have a Sun label, too
23954 	 */
23955 	if (un->un_f_vtoc_label_supported) {
23956 		fdisk_rval = sd_read_fdisk(un, capacity, lbasize,
23957 		    SD_PATH_DIRECT);
23958 		if (fdisk_rval == SD_CMD_FAILURE) {
23959 			ASSERT(mutex_owned(SD_MUTEX(un)));
23960 			return (EIO);
23961 		}
23962 
23963 		if (fdisk_rval == SD_CMD_RESERVATION_CONFLICT) {
23964 			ASSERT(mutex_owned(SD_MUTEX(un)));
23965 			return (EACCES);
23966 		}
23967 
23968 		if (un->un_solaris_size <= DK_LABEL_LOC) {
23969 			/*
23970 			 * Found fdisk table but no Solaris partition entry,
23971 			 * so don't call sd_uselabel() and don't create
23972 			 * a default label.
23973 			 */
23974 			label_rc = 0;
23975 			un->un_f_geometry_is_valid = TRUE;
23976 			goto no_solaris_partition;
23977 		}
23978 
23979 #if defined(_SUNOS_VTOC_8)
23980 		label = (char *)un->un_asciilabel;
23981 #elif defined(_SUNOS_VTOC_16)
23982 		label = (char *)un->un_vtoc.v_asciilabel;
23983 #else
23984 #error "No VTOC format defined."
23985 #endif
23986 	} else if (capacity < 0) {
23987 		ASSERT(mutex_owned(SD_MUTEX(un)));
23988 		return (EINVAL);
23989 	}
23990 
23991 	/*
23992 	 * For Removable media We reach here if we have found a
23993 	 * SOLARIS PARTITION.
23994 	 * If un_f_geometry_is_valid is FALSE it indicates that the SOLARIS
23995 	 * PARTITION has changed from the previous one, hence we will setup a
23996 	 * default VTOC in this case.
23997 	 */
23998 	if (un->un_f_geometry_is_valid == FALSE) {
23999 		sd_build_default_label(un);
24000 		label_rc = 0;
24001 	}
24002 
24003 no_solaris_partition:
24004 	if ((!un->un_f_has_removable_media ||
24005 	    (un->un_f_has_removable_media &&
24006 	    un->un_mediastate == DKIO_EJECTED)) &&
24007 		(un->un_state == SD_STATE_NORMAL && !gvalid)) {
24008 		/*
24009 		 * Print out a message indicating who and what we are.
24010 		 * We do this only when we happen to really validate the
24011 		 * geometry. We may call sd_validate_geometry() at other
24012 		 * times, ioctl()'s like Get VTOC in which case we
24013 		 * don't want to print the label.
24014 		 * If the geometry is valid, print the label string,
24015 		 * else print vendor and product info, if available
24016 		 */
24017 		if ((un->un_f_geometry_is_valid == TRUE) && (label != NULL)) {
24018 			SD_INFO(SD_LOG_IOCTL_DKIO, un, "?<%s>\n", label);
24019 		} else {
24020 			mutex_enter(&sd_label_mutex);
24021 			sd_inq_fill(SD_INQUIRY(un)->inq_vid, VIDMAX,
24022 			    labelstring);
24023 			sd_inq_fill(SD_INQUIRY(un)->inq_pid, PIDMAX,
24024 			    &labelstring[64]);
24025 			(void) sprintf(buf, "?Vendor '%s', product '%s'",
24026 			    labelstring, &labelstring[64]);
24027 			if (un->un_f_blockcount_is_valid == TRUE) {
24028 				(void) sprintf(&buf[strlen(buf)],
24029 				    ", %" PRIu64 " %u byte blocks\n",
24030 				    un->un_blockcount,
24031 				    un->un_tgt_blocksize);
24032 			} else {
24033 				(void) sprintf(&buf[strlen(buf)],
24034 				    ", (unknown capacity)\n");
24035 			}
24036 			SD_INFO(SD_LOG_IOCTL_DKIO, un, buf);
24037 			mutex_exit(&sd_label_mutex);
24038 		}
24039 	}
24040 
24041 #if defined(_SUNOS_VTOC_16)
24042 	/*
24043 	 * If we have valid geometry, set up the remaining fdisk partitions.
24044 	 * Note that dkl_cylno is not used for the fdisk map entries, so
24045 	 * we set it to an entirely bogus value.
24046 	 */
24047 	for (count = 0; count < FD_NUMPART; count++) {
24048 		un->un_map[FDISK_P1 + count].dkl_cylno = -1;
24049 		un->un_map[FDISK_P1 + count].dkl_nblk =
24050 		    un->un_fmap[count].fmap_nblk;
24051 		un->un_offset[FDISK_P1 + count] =
24052 		    un->un_fmap[count].fmap_start;
24053 	}
24054 #endif
24055 
24056 	for (count = 0; count < NDKMAP; count++) {
24057 #if defined(_SUNOS_VTOC_8)
24058 		struct dk_map *lp  = &un->un_map[count];
24059 		un->un_offset[count] =
24060 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
24061 #elif defined(_SUNOS_VTOC_16)
24062 		struct dkl_partition *vp = &un->un_vtoc.v_part[count];
24063 		un->un_offset[count] = vp->p_start + un->un_solaris_offset;
24064 #else
24065 #error "No VTOC format defined."
24066 #endif
24067 	}
24068 
24069 	ASSERT(mutex_owned(SD_MUTEX(un)));
24070 	return (label_rc);
24071 }
24072 #endif
24073 
24074 
24075 /*
24076  *    Function: sd_check_media
24077  *
24078  * Description: This utility routine implements the functionality for the
24079  *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
24080  *		driver state changes from that specified by the user
24081  *		(inserted or ejected). For example, if the user specifies
24082  *		DKIO_EJECTED and the current media state is inserted this
24083  *		routine will immediately return DKIO_INSERTED. However, if the
24084  *		current media state is not inserted the user thread will be
24085  *		blocked until the drive state changes. If DKIO_NONE is specified
24086  *		the user thread will block until a drive state change occurs.
24087  *
24088  *   Arguments: dev  - the device number
24089  *		state  - user pointer to a dkio_state, updated with the current
24090  *			drive state at return.
24091  *
24092  * Return Code: ENXIO
24093  *		EIO
24094  *		EAGAIN
24095  *		EINTR
24096  */
24097 
24098 static int
24099 sd_check_media(dev_t dev, enum dkio_state state)
24100 {
24101 	struct sd_lun		*un = NULL;
24102 	enum dkio_state		prev_state;
24103 	opaque_t		token = NULL;
24104 	int			rval = 0;
24105 
24106 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24107 		return (ENXIO);
24108 	}
24109 
24110 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
24111 
24112 	mutex_enter(SD_MUTEX(un));
24113 
24114 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
24115 	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
24116 
24117 	prev_state = un->un_mediastate;
24118 
24119 	/* is there anything to do? */
24120 	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
24121 		/*
24122 		 * submit the request to the scsi_watch service;
24123 		 * scsi_media_watch_cb() does the real work
24124 		 */
24125 		mutex_exit(SD_MUTEX(un));
24126 
24127 		/*
24128 		 * This change handles the case where a scsi watch request is
24129 		 * added to a device that is powered down. To accomplish this
24130 		 * we power up the device before adding the scsi watch request,
24131 		 * since the scsi watch sends a TUR directly to the device
24132 		 * which the device cannot handle if it is powered down.
24133 		 */
24134 		if (sd_pm_entry(un) != DDI_SUCCESS) {
24135 			mutex_enter(SD_MUTEX(un));
24136 			goto done;
24137 		}
24138 
24139 		token = scsi_watch_request_submit(SD_SCSI_DEVP(un),
24140 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
24141 		    (caddr_t)dev);
24142 
24143 		sd_pm_exit(un);
24144 
24145 		mutex_enter(SD_MUTEX(un));
24146 		if (token == NULL) {
24147 			rval = EAGAIN;
24148 			goto done;
24149 		}
24150 
24151 		/*
24152 		 * This is a special case IOCTL that doesn't return
24153 		 * until the media state changes. Routine sdpower
24154 		 * knows about and handles this so don't count it
24155 		 * as an active cmd in the driver, which would
24156 		 * keep the device busy to the pm framework.
24157 		 * If the count isn't decremented the device can't
24158 		 * be powered down.
24159 		 */
24160 		un->un_ncmds_in_driver--;
24161 		ASSERT(un->un_ncmds_in_driver >= 0);
24162 
24163 		/*
24164 		 * if a prior request had been made, this will be the same
24165 		 * token, as scsi_watch was designed that way.
24166 		 */
24167 		un->un_swr_token = token;
24168 		un->un_specified_mediastate = state;
24169 
24170 		/*
24171 		 * now wait for media change
24172 		 * we will not be signalled unless mediastate == state but it is
24173 		 * still better to test for this condition, since there is a
24174 		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
24175 		 */
24176 		SD_TRACE(SD_LOG_COMMON, un,
24177 		    "sd_check_media: waiting for media state change\n");
24178 		while (un->un_mediastate == state) {
24179 			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
24180 				SD_TRACE(SD_LOG_COMMON, un,
24181 				    "sd_check_media: waiting for media state "
24182 				    "was interrupted\n");
24183 				un->un_ncmds_in_driver++;
24184 				rval = EINTR;
24185 				goto done;
24186 			}
24187 			SD_TRACE(SD_LOG_COMMON, un,
24188 			    "sd_check_media: received signal, state=%x\n",
24189 			    un->un_mediastate);
24190 		}
24191 		/*
24192 		 * Inc the counter to indicate the device once again
24193 		 * has an active outstanding cmd.
24194 		 */
24195 		un->un_ncmds_in_driver++;
24196 	}
24197 
24198 	/* invalidate geometry */
24199 	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
24200 		sr_ejected(un);
24201 	}
24202 
24203 	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
24204 		uint64_t	capacity;
24205 		uint_t		lbasize;
24206 
24207 		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
24208 		mutex_exit(SD_MUTEX(un));
24209 		/*
24210 		 * Since the following routines use SD_PATH_DIRECT, we must
24211 		 * call PM directly before the upcoming disk accesses. This
24212 		 * may cause the disk to be power/spin up.
24213 		 */
24214 
24215 		if (sd_pm_entry(un) == DDI_SUCCESS) {
24216 			rval = sd_send_scsi_READ_CAPACITY(un,
24217 			    &capacity,
24218 			    &lbasize, SD_PATH_DIRECT);
24219 			if (rval != 0) {
24220 				sd_pm_exit(un);
24221 				mutex_enter(SD_MUTEX(un));
24222 				goto done;
24223 			}
24224 		} else {
24225 			rval = EIO;
24226 			mutex_enter(SD_MUTEX(un));
24227 			goto done;
24228 		}
24229 		mutex_enter(SD_MUTEX(un));
24230 
24231 		sd_update_block_info(un, lbasize, capacity);
24232 
24233 		un->un_f_geometry_is_valid	= FALSE;
24234 		(void) sd_validate_geometry(un, SD_PATH_DIRECT);
24235 
24236 		mutex_exit(SD_MUTEX(un));
24237 		rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
24238 		    SD_PATH_DIRECT);
24239 		sd_pm_exit(un);
24240 
24241 		mutex_enter(SD_MUTEX(un));
24242 	}
24243 done:
24244 	un->un_f_watcht_stopped = FALSE;
24245 	if (un->un_swr_token) {
24246 		/*
24247 		 * Use of this local token and the mutex ensures that we avoid
24248 		 * some race conditions associated with terminating the
24249 		 * scsi watch.
24250 		 */
24251 		token = un->un_swr_token;
24252 		un->un_swr_token = (opaque_t)NULL;
24253 		mutex_exit(SD_MUTEX(un));
24254 		(void) scsi_watch_request_terminate(token,
24255 		    SCSI_WATCH_TERMINATE_WAIT);
24256 		mutex_enter(SD_MUTEX(un));
24257 	}
24258 
24259 	/*
24260 	 * Update the capacity kstat value, if no media previously
24261 	 * (capacity kstat is 0) and a media has been inserted
24262 	 * (un_f_blockcount_is_valid == TRUE)
24263 	 */
24264 	if (un->un_errstats) {
24265 		struct sd_errstats	*stp = NULL;
24266 
24267 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
24268 		if ((stp->sd_capacity.value.ui64 == 0) &&
24269 		    (un->un_f_blockcount_is_valid == TRUE)) {
24270 			stp->sd_capacity.value.ui64 =
24271 			    (uint64_t)((uint64_t)un->un_blockcount *
24272 			    un->un_sys_blocksize);
24273 		}
24274 	}
24275 	mutex_exit(SD_MUTEX(un));
24276 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
24277 	return (rval);
24278 }
24279 
24280 
24281 /*
24282  *    Function: sd_delayed_cv_broadcast
24283  *
24284  * Description: Delayed cv_broadcast to allow for target to recover from media
24285  *		insertion.
24286  *
24287  *   Arguments: arg - driver soft state (unit) structure
24288  */
24289 
24290 static void
24291 sd_delayed_cv_broadcast(void *arg)
24292 {
24293 	struct sd_lun *un = arg;
24294 
24295 	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
24296 
24297 	mutex_enter(SD_MUTEX(un));
24298 	un->un_dcvb_timeid = NULL;
24299 	cv_broadcast(&un->un_state_cv);
24300 	mutex_exit(SD_MUTEX(un));
24301 }
24302 
24303 
24304 /*
24305  *    Function: sd_media_watch_cb
24306  *
24307  * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
24308  *		routine processes the TUR sense data and updates the driver
24309  *		state if a transition has occurred. The user thread
24310  *		(sd_check_media) is then signalled.
24311  *
24312  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
24313  *			among multiple watches that share this callback function
24314  *		resultp - scsi watch facility result packet containing scsi
24315  *			  packet, status byte and sense data
24316  *
24317  * Return Code: 0 for success, -1 for failure
24318  */
24319 
24320 static int
24321 sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
24322 {
24323 	struct sd_lun			*un;
24324 	struct scsi_status		*statusp = resultp->statusp;
24325 	struct scsi_extended_sense	*sensep = resultp->sensep;
24326 	enum dkio_state			state = DKIO_NONE;
24327 	dev_t				dev = (dev_t)arg;
24328 	uchar_t				actual_sense_length;
24329 
24330 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24331 		return (-1);
24332 	}
24333 	actual_sense_length = resultp->actual_sense_length;
24334 
24335 	mutex_enter(SD_MUTEX(un));
24336 	SD_TRACE(SD_LOG_COMMON, un,
24337 	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
24338 	    *((char *)statusp), (void *)sensep, actual_sense_length);
24339 
24340 	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
24341 		un->un_mediastate = DKIO_DEV_GONE;
24342 		cv_broadcast(&un->un_state_cv);
24343 		mutex_exit(SD_MUTEX(un));
24344 
24345 		return (0);
24346 	}
24347 
24348 	/*
24349 	 * If there was a check condition then sensep points to valid sense data
24350 	 * If status was not a check condition but a reservation or busy status
24351 	 * then the new state is DKIO_NONE
24352 	 */
24353 	if (sensep != NULL) {
24354 		SD_INFO(SD_LOG_COMMON, un,
24355 		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
24356 		    sensep->es_key, sensep->es_add_code, sensep->es_qual_code);
24357 		/* This routine only uses up to 13 bytes of sense data. */
24358 		if (actual_sense_length >= 13) {
24359 			if (sensep->es_key == KEY_UNIT_ATTENTION) {
24360 				if (sensep->es_add_code == 0x28) {
24361 					state = DKIO_INSERTED;
24362 				}
24363 			} else {
24364 				/*
24365 				 * if 02/04/02  means that the host
24366 				 * should send start command. Explicitly
24367 				 * leave the media state as is
24368 				 * (inserted) as the media is inserted
24369 				 * and host has stopped device for PM
24370 				 * reasons. Upon next true read/write
24371 				 * to this media will bring the
24372 				 * device to the right state good for
24373 				 * media access.
24374 				 */
24375 				if ((sensep->es_key == KEY_NOT_READY) &&
24376 				    (sensep->es_add_code == 0x3a)) {
24377 					state = DKIO_EJECTED;
24378 				}
24379 
24380 				/*
24381 				 * If the drivge is busy with an operation
24382 				 * or long write, keep the media in an
24383 				 * inserted state.
24384 				 */
24385 
24386 				if ((sensep->es_key == KEY_NOT_READY) &&
24387 				    (sensep->es_add_code == 0x04) &&
24388 				    ((sensep->es_qual_code == 0x02) ||
24389 				    (sensep->es_qual_code == 0x07) ||
24390 				    (sensep->es_qual_code == 0x08))) {
24391 					state = DKIO_INSERTED;
24392 				}
24393 			}
24394 		}
24395 	} else if ((*((char *)statusp) == STATUS_GOOD) &&
24396 	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
24397 		state = DKIO_INSERTED;
24398 	}
24399 
24400 	SD_TRACE(SD_LOG_COMMON, un,
24401 	    "sd_media_watch_cb: state=%x, specified=%x\n",
24402 	    state, un->un_specified_mediastate);
24403 
24404 	/*
24405 	 * now signal the waiting thread if this is *not* the specified state;
24406 	 * delay the signal if the state is DKIO_INSERTED to allow the target
24407 	 * to recover
24408 	 */
24409 	if (state != un->un_specified_mediastate) {
24410 		un->un_mediastate = state;
24411 		if (state == DKIO_INSERTED) {
24412 			/*
24413 			 * delay the signal to give the drive a chance
24414 			 * to do what it apparently needs to do
24415 			 */
24416 			SD_TRACE(SD_LOG_COMMON, un,
24417 			    "sd_media_watch_cb: delayed cv_broadcast\n");
24418 			if (un->un_dcvb_timeid == NULL) {
24419 				un->un_dcvb_timeid =
24420 				    timeout(sd_delayed_cv_broadcast, un,
24421 				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
24422 			}
24423 		} else {
24424 			SD_TRACE(SD_LOG_COMMON, un,
24425 			    "sd_media_watch_cb: immediate cv_broadcast\n");
24426 			cv_broadcast(&un->un_state_cv);
24427 		}
24428 	}
24429 	mutex_exit(SD_MUTEX(un));
24430 	return (0);
24431 }
24432 
24433 
24434 /*
24435  *    Function: sd_dkio_get_temp
24436  *
24437  * Description: This routine is the driver entry point for handling ioctl
24438  *		requests to get the disk temperature.
24439  *
24440  *   Arguments: dev  - the device number
24441  *		arg  - pointer to user provided dk_temperature structure.
24442  *		flag - this argument is a pass through to ddi_copyxxx()
24443  *		       directly from the mode argument of ioctl().
24444  *
24445  * Return Code: 0
24446  *		EFAULT
24447  *		ENXIO
24448  *		EAGAIN
24449  */
24450 
24451 static int
24452 sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
24453 {
24454 	struct sd_lun		*un = NULL;
24455 	struct dk_temperature	*dktemp = NULL;
24456 	uchar_t			*temperature_page;
24457 	int			rval = 0;
24458 	int			path_flag = SD_PATH_STANDARD;
24459 
24460 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24461 		return (ENXIO);
24462 	}
24463 
24464 	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
24465 
24466 	/* copyin the disk temp argument to get the user flags */
24467 	if (ddi_copyin((void *)arg, dktemp,
24468 	    sizeof (struct dk_temperature), flag) != 0) {
24469 		rval = EFAULT;
24470 		goto done;
24471 	}
24472 
24473 	/* Initialize the temperature to invalid. */
24474 	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24475 	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24476 
24477 	/*
24478 	 * Note: Investigate removing the "bypass pm" semantic.
24479 	 * Can we just bypass PM always?
24480 	 */
24481 	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
24482 		path_flag = SD_PATH_DIRECT;
24483 		ASSERT(!mutex_owned(&un->un_pm_mutex));
24484 		mutex_enter(&un->un_pm_mutex);
24485 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
24486 			/*
24487 			 * If DKT_BYPASS_PM is set, and the drive happens to be
24488 			 * in low power mode, we can not wake it up, Need to
24489 			 * return EAGAIN.
24490 			 */
24491 			mutex_exit(&un->un_pm_mutex);
24492 			rval = EAGAIN;
24493 			goto done;
24494 		} else {
24495 			/*
24496 			 * Indicate to PM the device is busy. This is required
24497 			 * to avoid a race - i.e. the ioctl is issuing a
24498 			 * command and the pm framework brings down the device
24499 			 * to low power mode (possible power cut-off on some
24500 			 * platforms).
24501 			 */
24502 			mutex_exit(&un->un_pm_mutex);
24503 			if (sd_pm_entry(un) != DDI_SUCCESS) {
24504 				rval = EAGAIN;
24505 				goto done;
24506 			}
24507 		}
24508 	}
24509 
24510 	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
24511 
24512 	if ((rval = sd_send_scsi_LOG_SENSE(un, temperature_page,
24513 	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag)) != 0) {
24514 		goto done2;
24515 	}
24516 
24517 	/*
24518 	 * For the current temperature verify that the parameter length is 0x02
24519 	 * and the parameter code is 0x00
24520 	 */
24521 	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
24522 	    (temperature_page[5] == 0x00)) {
24523 		if (temperature_page[9] == 0xFF) {
24524 			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24525 		} else {
24526 			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
24527 		}
24528 	}
24529 
24530 	/*
24531 	 * For the reference temperature verify that the parameter
24532 	 * length is 0x02 and the parameter code is 0x01
24533 	 */
24534 	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
24535 	    (temperature_page[11] == 0x01)) {
24536 		if (temperature_page[15] == 0xFF) {
24537 			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24538 		} else {
24539 			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
24540 		}
24541 	}
24542 
24543 	/* Do the copyout regardless of the temperature commands status. */
24544 	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
24545 	    flag) != 0) {
24546 		rval = EFAULT;
24547 	}
24548 
24549 done2:
24550 	if (path_flag == SD_PATH_DIRECT) {
24551 		sd_pm_exit(un);
24552 	}
24553 
24554 	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
24555 done:
24556 	if (dktemp != NULL) {
24557 		kmem_free(dktemp, sizeof (struct dk_temperature));
24558 	}
24559 
24560 	return (rval);
24561 }
24562 
24563 
24564 /*
24565  *    Function: sd_log_page_supported
24566  *
24567  * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
24568  *		supported log pages.
24569  *
24570  *   Arguments: un -
24571  *		log_page -
24572  *
24573  * Return Code: -1 - on error (log sense is optional and may not be supported).
24574  *		0  - log page not found.
24575  *  		1  - log page found.
24576  */
24577 
24578 static int
24579 sd_log_page_supported(struct sd_lun *un, int log_page)
24580 {
24581 	uchar_t *log_page_data;
24582 	int	i;
24583 	int	match = 0;
24584 	int	log_size;
24585 
24586 	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
24587 
24588 	if (sd_send_scsi_LOG_SENSE(un, log_page_data, 0xFF, 0, 0x01, 0,
24589 	    SD_PATH_DIRECT) != 0) {
24590 		SD_ERROR(SD_LOG_COMMON, un,
24591 		    "sd_log_page_supported: failed log page retrieval\n");
24592 		kmem_free(log_page_data, 0xFF);
24593 		return (-1);
24594 	}
24595 	log_size = log_page_data[3];
24596 
24597 	/*
24598 	 * The list of supported log pages start from the fourth byte. Check
24599 	 * until we run out of log pages or a match is found.
24600 	 */
24601 	for (i = 4; (i < (log_size + 4)) && !match; i++) {
24602 		if (log_page_data[i] == log_page) {
24603 			match++;
24604 		}
24605 	}
24606 	kmem_free(log_page_data, 0xFF);
24607 	return (match);
24608 }
24609 
24610 
24611 /*
24612  *    Function: sd_mhdioc_failfast
24613  *
24614  * Description: This routine is the driver entry point for handling ioctl
24615  *		requests to enable/disable the multihost failfast option.
24616  *		(MHIOCENFAILFAST)
24617  *
24618  *   Arguments: dev	- the device number
24619  *		arg	- user specified probing interval.
24620  *		flag	- this argument is a pass through to ddi_copyxxx()
24621  *			  directly from the mode argument of ioctl().
24622  *
24623  * Return Code: 0
24624  *		EFAULT
24625  *		ENXIO
24626  */
24627 
24628 static int
24629 sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
24630 {
24631 	struct sd_lun	*un = NULL;
24632 	int		mh_time;
24633 	int		rval = 0;
24634 
24635 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24636 		return (ENXIO);
24637 	}
24638 
24639 	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
24640 		return (EFAULT);
24641 
24642 	if (mh_time) {
24643 		mutex_enter(SD_MUTEX(un));
24644 		un->un_resvd_status |= SD_FAILFAST;
24645 		mutex_exit(SD_MUTEX(un));
24646 		/*
24647 		 * If mh_time is INT_MAX, then this ioctl is being used for
24648 		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
24649 		 */
24650 		if (mh_time != INT_MAX) {
24651 			rval = sd_check_mhd(dev, mh_time);
24652 		}
24653 	} else {
24654 		(void) sd_check_mhd(dev, 0);
24655 		mutex_enter(SD_MUTEX(un));
24656 		un->un_resvd_status &= ~SD_FAILFAST;
24657 		mutex_exit(SD_MUTEX(un));
24658 	}
24659 	return (rval);
24660 }
24661 
24662 
24663 /*
24664  *    Function: sd_mhdioc_takeown
24665  *
24666  * Description: This routine is the driver entry point for handling ioctl
24667  *		requests to forcefully acquire exclusive access rights to the
24668  *		multihost disk (MHIOCTKOWN).
24669  *
24670  *   Arguments: dev	- the device number
24671  *		arg	- user provided structure specifying the delay
24672  *			  parameters in milliseconds
24673  *		flag	- this argument is a pass through to ddi_copyxxx()
24674  *			  directly from the mode argument of ioctl().
24675  *
24676  * Return Code: 0
24677  *		EFAULT
24678  *		ENXIO
24679  */
24680 
24681 static int
24682 sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
24683 {
24684 	struct sd_lun		*un = NULL;
24685 	struct mhioctkown	*tkown = NULL;
24686 	int			rval = 0;
24687 
24688 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24689 		return (ENXIO);
24690 	}
24691 
24692 	if (arg != NULL) {
24693 		tkown = (struct mhioctkown *)
24694 		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
24695 		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
24696 		if (rval != 0) {
24697 			rval = EFAULT;
24698 			goto error;
24699 		}
24700 	}
24701 
24702 	rval = sd_take_ownership(dev, tkown);
24703 	mutex_enter(SD_MUTEX(un));
24704 	if (rval == 0) {
24705 		un->un_resvd_status |= SD_RESERVE;
24706 		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
24707 			sd_reinstate_resv_delay =
24708 			    tkown->reinstate_resv_delay * 1000;
24709 		} else {
24710 			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
24711 		}
24712 		/*
24713 		 * Give the scsi_watch routine interval set by
24714 		 * the MHIOCENFAILFAST ioctl precedence here.
24715 		 */
24716 		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
24717 			mutex_exit(SD_MUTEX(un));
24718 			(void) sd_check_mhd(dev, sd_reinstate_resv_delay/1000);
24719 			SD_TRACE(SD_LOG_IOCTL_MHD, un,
24720 			    "sd_mhdioc_takeown : %d\n",
24721 			    sd_reinstate_resv_delay);
24722 		} else {
24723 			mutex_exit(SD_MUTEX(un));
24724 		}
24725 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
24726 		    sd_mhd_reset_notify_cb, (caddr_t)un);
24727 	} else {
24728 		un->un_resvd_status &= ~SD_RESERVE;
24729 		mutex_exit(SD_MUTEX(un));
24730 	}
24731 
24732 error:
24733 	if (tkown != NULL) {
24734 		kmem_free(tkown, sizeof (struct mhioctkown));
24735 	}
24736 	return (rval);
24737 }
24738 
24739 
24740 /*
24741  *    Function: sd_mhdioc_release
24742  *
24743  * Description: This routine is the driver entry point for handling ioctl
24744  *		requests to release exclusive access rights to the multihost
24745  *		disk (MHIOCRELEASE).
24746  *
24747  *   Arguments: dev	- the device number
24748  *
24749  * Return Code: 0
24750  *		ENXIO
24751  */
24752 
24753 static int
24754 sd_mhdioc_release(dev_t dev)
24755 {
24756 	struct sd_lun		*un = NULL;
24757 	timeout_id_t		resvd_timeid_save;
24758 	int			resvd_status_save;
24759 	int			rval = 0;
24760 
24761 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24762 		return (ENXIO);
24763 	}
24764 
24765 	mutex_enter(SD_MUTEX(un));
24766 	resvd_status_save = un->un_resvd_status;
24767 	un->un_resvd_status &=
24768 	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
24769 	if (un->un_resvd_timeid) {
24770 		resvd_timeid_save = un->un_resvd_timeid;
24771 		un->un_resvd_timeid = NULL;
24772 		mutex_exit(SD_MUTEX(un));
24773 		(void) untimeout(resvd_timeid_save);
24774 	} else {
24775 		mutex_exit(SD_MUTEX(un));
24776 	}
24777 
24778 	/*
24779 	 * destroy any pending timeout thread that may be attempting to
24780 	 * reinstate reservation on this device.
24781 	 */
24782 	sd_rmv_resv_reclaim_req(dev);
24783 
24784 	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
24785 		mutex_enter(SD_MUTEX(un));
24786 		if ((un->un_mhd_token) &&
24787 		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
24788 			mutex_exit(SD_MUTEX(un));
24789 			(void) sd_check_mhd(dev, 0);
24790 		} else {
24791 			mutex_exit(SD_MUTEX(un));
24792 		}
24793 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
24794 		    sd_mhd_reset_notify_cb, (caddr_t)un);
24795 	} else {
24796 		/*
24797 		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
24798 		 */
24799 		mutex_enter(SD_MUTEX(un));
24800 		un->un_resvd_status = resvd_status_save;
24801 		mutex_exit(SD_MUTEX(un));
24802 	}
24803 	return (rval);
24804 }
24805 
24806 
24807 /*
24808  *    Function: sd_mhdioc_register_devid
24809  *
24810  * Description: This routine is the driver entry point for handling ioctl
24811  *		requests to register the device id (MHIOCREREGISTERDEVID).
24812  *
24813  *		Note: The implementation for this ioctl has been updated to
24814  *		be consistent with the original PSARC case (1999/357)
24815  *		(4375899, 4241671, 4220005)
24816  *
24817  *   Arguments: dev	- the device number
24818  *
24819  * Return Code: 0
24820  *		ENXIO
24821  */
24822 
24823 static int
24824 sd_mhdioc_register_devid(dev_t dev)
24825 {
24826 	struct sd_lun	*un = NULL;
24827 	int		rval = 0;
24828 
24829 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24830 		return (ENXIO);
24831 	}
24832 
24833 	ASSERT(!mutex_owned(SD_MUTEX(un)));
24834 
24835 	mutex_enter(SD_MUTEX(un));
24836 
24837 	/* If a devid already exists, de-register it */
24838 	if (un->un_devid != NULL) {
24839 		ddi_devid_unregister(SD_DEVINFO(un));
24840 		/*
24841 		 * After unregister devid, needs to free devid memory
24842 		 */
24843 		ddi_devid_free(un->un_devid);
24844 		un->un_devid = NULL;
24845 	}
24846 
24847 	/* Check for reservation conflict */
24848 	mutex_exit(SD_MUTEX(un));
24849 	rval = sd_send_scsi_TEST_UNIT_READY(un, 0);
24850 	mutex_enter(SD_MUTEX(un));
24851 
24852 	switch (rval) {
24853 	case 0:
24854 		sd_register_devid(un, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
24855 		break;
24856 	case EACCES:
24857 		break;
24858 	default:
24859 		rval = EIO;
24860 	}
24861 
24862 	mutex_exit(SD_MUTEX(un));
24863 	return (rval);
24864 }
24865 
24866 
24867 /*
24868  *    Function: sd_mhdioc_inkeys
24869  *
24870  * Description: This routine is the driver entry point for handling ioctl
24871  *		requests to issue the SCSI-3 Persistent In Read Keys command
24872  *		to the device (MHIOCGRP_INKEYS).
24873  *
24874  *   Arguments: dev	- the device number
24875  *		arg	- user provided in_keys structure
24876  *		flag	- this argument is a pass through to ddi_copyxxx()
24877  *			  directly from the mode argument of ioctl().
24878  *
24879  * Return Code: code returned by sd_persistent_reservation_in_read_keys()
24880  *		ENXIO
24881  *		EFAULT
24882  */
24883 
24884 static int
24885 sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
24886 {
24887 	struct sd_lun		*un;
24888 	mhioc_inkeys_t		inkeys;
24889 	int			rval = 0;
24890 
24891 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24892 		return (ENXIO);
24893 	}
24894 
24895 #ifdef _MULTI_DATAMODEL
24896 	switch (ddi_model_convert_from(flag & FMODELS)) {
24897 	case DDI_MODEL_ILP32: {
24898 		struct mhioc_inkeys32	inkeys32;
24899 
24900 		if (ddi_copyin(arg, &inkeys32,
24901 		    sizeof (struct mhioc_inkeys32), flag) != 0) {
24902 			return (EFAULT);
24903 		}
24904 		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
24905 		if ((rval = sd_persistent_reservation_in_read_keys(un,
24906 		    &inkeys, flag)) != 0) {
24907 			return (rval);
24908 		}
24909 		inkeys32.generation = inkeys.generation;
24910 		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
24911 		    flag) != 0) {
24912 			return (EFAULT);
24913 		}
24914 		break;
24915 	}
24916 	case DDI_MODEL_NONE:
24917 		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
24918 		    flag) != 0) {
24919 			return (EFAULT);
24920 		}
24921 		if ((rval = sd_persistent_reservation_in_read_keys(un,
24922 		    &inkeys, flag)) != 0) {
24923 			return (rval);
24924 		}
24925 		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
24926 		    flag) != 0) {
24927 			return (EFAULT);
24928 		}
24929 		break;
24930 	}
24931 
24932 #else /* ! _MULTI_DATAMODEL */
24933 
24934 	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
24935 		return (EFAULT);
24936 	}
24937 	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
24938 	if (rval != 0) {
24939 		return (rval);
24940 	}
24941 	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
24942 		return (EFAULT);
24943 	}
24944 
24945 #endif /* _MULTI_DATAMODEL */
24946 
24947 	return (rval);
24948 }
24949 
24950 
24951 /*
24952  *    Function: sd_mhdioc_inresv
24953  *
24954  * Description: This routine is the driver entry point for handling ioctl
24955  *		requests to issue the SCSI-3 Persistent In Read Reservations
24956  *		command to the device (MHIOCGRP_INKEYS).
24957  *
24958  *   Arguments: dev	- the device number
24959  *		arg	- user provided in_resv structure
24960  *		flag	- this argument is a pass through to ddi_copyxxx()
24961  *			  directly from the mode argument of ioctl().
24962  *
24963  * Return Code: code returned by sd_persistent_reservation_in_read_resv()
24964  *		ENXIO
24965  *		EFAULT
24966  */
24967 
24968 static int
24969 sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
24970 {
24971 	struct sd_lun		*un;
24972 	mhioc_inresvs_t		inresvs;
24973 	int			rval = 0;
24974 
24975 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24976 		return (ENXIO);
24977 	}
24978 
24979 #ifdef _MULTI_DATAMODEL
24980 
24981 	switch (ddi_model_convert_from(flag & FMODELS)) {
24982 	case DDI_MODEL_ILP32: {
24983 		struct mhioc_inresvs32	inresvs32;
24984 
24985 		if (ddi_copyin(arg, &inresvs32,
24986 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
24987 			return (EFAULT);
24988 		}
24989 		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
24990 		if ((rval = sd_persistent_reservation_in_read_resv(un,
24991 		    &inresvs, flag)) != 0) {
24992 			return (rval);
24993 		}
24994 		inresvs32.generation = inresvs.generation;
24995 		if (ddi_copyout(&inresvs32, arg,
24996 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
24997 			return (EFAULT);
24998 		}
24999 		break;
25000 	}
25001 	case DDI_MODEL_NONE:
25002 		if (ddi_copyin(arg, &inresvs,
25003 		    sizeof (mhioc_inresvs_t), flag) != 0) {
25004 			return (EFAULT);
25005 		}
25006 		if ((rval = sd_persistent_reservation_in_read_resv(un,
25007 		    &inresvs, flag)) != 0) {
25008 			return (rval);
25009 		}
25010 		if (ddi_copyout(&inresvs, arg,
25011 		    sizeof (mhioc_inresvs_t), flag) != 0) {
25012 			return (EFAULT);
25013 		}
25014 		break;
25015 	}
25016 
25017 #else /* ! _MULTI_DATAMODEL */
25018 
25019 	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
25020 		return (EFAULT);
25021 	}
25022 	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
25023 	if (rval != 0) {
25024 		return (rval);
25025 	}
25026 	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
25027 		return (EFAULT);
25028 	}
25029 
25030 #endif /* ! _MULTI_DATAMODEL */
25031 
25032 	return (rval);
25033 }
25034 
25035 
25036 /*
25037  * The following routines support the clustering functionality described below
25038  * and implement lost reservation reclaim functionality.
25039  *
25040  * Clustering
25041  * ----------
25042  * The clustering code uses two different, independent forms of SCSI
25043  * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
25044  * Persistent Group Reservations. For any particular disk, it will use either
25045  * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
25046  *
25047  * SCSI-2
25048  * The cluster software takes ownership of a multi-hosted disk by issuing the
25049  * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
25050  * MHIOCRELEASE ioctl.Closely related is the MHIOCENFAILFAST ioctl -- a cluster,
25051  * just after taking ownership of the disk with the MHIOCTKOWN ioctl then issues
25052  * the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the driver. The
25053  * meaning of failfast is that if the driver (on this host) ever encounters the
25054  * scsi error return code RESERVATION_CONFLICT from the device, it should
25055  * immediately panic the host. The motivation for this ioctl is that if this
25056  * host does encounter reservation conflict, the underlying cause is that some
25057  * other host of the cluster has decided that this host is no longer in the
25058  * cluster and has seized control of the disks for itself. Since this host is no
25059  * longer in the cluster, it ought to panic itself. The MHIOCENFAILFAST ioctl
25060  * does two things:
25061  *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
25062  *      error to panic the host
25063  *      (b) it sets up a periodic timer to test whether this host still has
25064  *      "access" (in that no other host has reserved the device):  if the
25065  *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
25066  *      purpose of that periodic timer is to handle scenarios where the host is
25067  *      otherwise temporarily quiescent, temporarily doing no real i/o.
25068  * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
25069  * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
25070  * the device itself.
25071  *
25072  * SCSI-3 PGR
25073  * A direct semantic implementation of the SCSI-3 Persistent Reservation
25074  * facility is supported through the shared multihost disk ioctls
25075  * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
25076  * MHIOCGRP_PREEMPTANDABORT)
25077  *
25078  * Reservation Reclaim:
25079  * --------------------
25080  * To support the lost reservation reclaim operations this driver creates a
25081  * single thread to handle reinstating reservations on all devices that have
25082  * lost reservations sd_resv_reclaim_requests are logged for all devices that
25083  * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
25084  * and the reservation reclaim thread loops through the requests to regain the
25085  * lost reservations.
25086  */
25087 
25088 /*
25089  *    Function: sd_check_mhd()
25090  *
25091  * Description: This function sets up and submits a scsi watch request or
25092  *		terminates an existing watch request. This routine is used in
25093  *		support of reservation reclaim.
25094  *
25095  *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
25096  *			 among multiple watches that share the callback function
25097  *		interval - the number of microseconds specifying the watch
25098  *			   interval for issuing TEST UNIT READY commands. If
25099  *			   set to 0 the watch should be terminated. If the
25100  *			   interval is set to 0 and if the device is required
25101  *			   to hold reservation while disabling failfast, the
25102  *			   watch is restarted with an interval of
25103  *			   reinstate_resv_delay.
25104  *
25105  * Return Code: 0	   - Successful submit/terminate of scsi watch request
25106  *		ENXIO      - Indicates an invalid device was specified
25107  *		EAGAIN     - Unable to submit the scsi watch request
25108  */
25109 
25110 static int
25111 sd_check_mhd(dev_t dev, int interval)
25112 {
25113 	struct sd_lun	*un;
25114 	opaque_t	token;
25115 
25116 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25117 		return (ENXIO);
25118 	}
25119 
25120 	/* is this a watch termination request? */
25121 	if (interval == 0) {
25122 		mutex_enter(SD_MUTEX(un));
25123 		/* if there is an existing watch task then terminate it */
25124 		if (un->un_mhd_token) {
25125 			token = un->un_mhd_token;
25126 			un->un_mhd_token = NULL;
25127 			mutex_exit(SD_MUTEX(un));
25128 			(void) scsi_watch_request_terminate(token,
25129 			    SCSI_WATCH_TERMINATE_WAIT);
25130 			mutex_enter(SD_MUTEX(un));
25131 		} else {
25132 			mutex_exit(SD_MUTEX(un));
25133 			/*
25134 			 * Note: If we return here we don't check for the
25135 			 * failfast case. This is the original legacy
25136 			 * implementation but perhaps we should be checking
25137 			 * the failfast case.
25138 			 */
25139 			return (0);
25140 		}
25141 		/*
25142 		 * If the device is required to hold reservation while
25143 		 * disabling failfast, we need to restart the scsi_watch
25144 		 * routine with an interval of reinstate_resv_delay.
25145 		 */
25146 		if (un->un_resvd_status & SD_RESERVE) {
25147 			interval = sd_reinstate_resv_delay/1000;
25148 		} else {
25149 			/* no failfast so bail */
25150 			mutex_exit(SD_MUTEX(un));
25151 			return (0);
25152 		}
25153 		mutex_exit(SD_MUTEX(un));
25154 	}
25155 
25156 	/*
25157 	 * adjust minimum time interval to 1 second,
25158 	 * and convert from msecs to usecs
25159 	 */
25160 	if (interval > 0 && interval < 1000) {
25161 		interval = 1000;
25162 	}
25163 	interval *= 1000;
25164 
25165 	/*
25166 	 * submit the request to the scsi_watch service
25167 	 */
25168 	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
25169 	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
25170 	if (token == NULL) {
25171 		return (EAGAIN);
25172 	}
25173 
25174 	/*
25175 	 * save token for termination later on
25176 	 */
25177 	mutex_enter(SD_MUTEX(un));
25178 	un->un_mhd_token = token;
25179 	mutex_exit(SD_MUTEX(un));
25180 	return (0);
25181 }
25182 
25183 
25184 /*
25185  *    Function: sd_mhd_watch_cb()
25186  *
25187  * Description: This function is the call back function used by the scsi watch
25188  *		facility. The scsi watch facility sends the "Test Unit Ready"
25189  *		and processes the status. If applicable (i.e. a "Unit Attention"
25190  *		status and automatic "Request Sense" not used) the scsi watch
25191  *		facility will send a "Request Sense" and retrieve the sense data
25192  *		to be passed to this callback function. In either case the
25193  *		automatic "Request Sense" or the facility submitting one, this
25194  *		callback is passed the status and sense data.
25195  *
25196  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25197  *			among multiple watches that share this callback function
25198  *		resultp - scsi watch facility result packet containing scsi
25199  *			  packet, status byte and sense data
25200  *
25201  * Return Code: 0 - continue the watch task
25202  *		non-zero - terminate the watch task
25203  */
25204 
25205 static int
25206 sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
25207 {
25208 	struct sd_lun			*un;
25209 	struct scsi_status		*statusp;
25210 	struct scsi_extended_sense	*sensep;
25211 	struct scsi_pkt			*pkt;
25212 	uchar_t				actual_sense_length;
25213 	dev_t  				dev = (dev_t)arg;
25214 
25215 	ASSERT(resultp != NULL);
25216 	statusp			= resultp->statusp;
25217 	sensep			= resultp->sensep;
25218 	pkt			= resultp->pkt;
25219 	actual_sense_length	= resultp->actual_sense_length;
25220 
25221 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25222 		return (ENXIO);
25223 	}
25224 
25225 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
25226 	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
25227 	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
25228 
25229 	/* Begin processing of the status and/or sense data */
25230 	if (pkt->pkt_reason != CMD_CMPLT) {
25231 		/* Handle the incomplete packet */
25232 		sd_mhd_watch_incomplete(un, pkt);
25233 		return (0);
25234 	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
25235 		if (*((unsigned char *)statusp)
25236 		    == STATUS_RESERVATION_CONFLICT) {
25237 			/*
25238 			 * Handle a reservation conflict by panicking if
25239 			 * configured for failfast or by logging the conflict
25240 			 * and updating the reservation status
25241 			 */
25242 			mutex_enter(SD_MUTEX(un));
25243 			if ((un->un_resvd_status & SD_FAILFAST) &&
25244 			    (sd_failfast_enable)) {
25245 				sd_panic_for_res_conflict(un);
25246 				/*NOTREACHED*/
25247 			}
25248 			SD_INFO(SD_LOG_IOCTL_MHD, un,
25249 			    "sd_mhd_watch_cb: Reservation Conflict\n");
25250 			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
25251 			mutex_exit(SD_MUTEX(un));
25252 		}
25253 	}
25254 
25255 	if (sensep != NULL) {
25256 		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
25257 			mutex_enter(SD_MUTEX(un));
25258 			if ((sensep->es_add_code == SD_SCSI_RESET_SENSE_CODE) &&
25259 			    (un->un_resvd_status & SD_RESERVE)) {
25260 				/*
25261 				 * The additional sense code indicates a power
25262 				 * on or bus device reset has occurred; update
25263 				 * the reservation status.
25264 				 */
25265 				un->un_resvd_status |=
25266 				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25267 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25268 				    "sd_mhd_watch_cb: Lost Reservation\n");
25269 			}
25270 		} else {
25271 			return (0);
25272 		}
25273 	} else {
25274 		mutex_enter(SD_MUTEX(un));
25275 	}
25276 
25277 	if ((un->un_resvd_status & SD_RESERVE) &&
25278 	    (un->un_resvd_status & SD_LOST_RESERVE)) {
25279 		if (un->un_resvd_status & SD_WANT_RESERVE) {
25280 			/*
25281 			 * A reset occurred in between the last probe and this
25282 			 * one so if a timeout is pending cancel it.
25283 			 */
25284 			if (un->un_resvd_timeid) {
25285 				timeout_id_t temp_id = un->un_resvd_timeid;
25286 				un->un_resvd_timeid = NULL;
25287 				mutex_exit(SD_MUTEX(un));
25288 				(void) untimeout(temp_id);
25289 				mutex_enter(SD_MUTEX(un));
25290 			}
25291 			un->un_resvd_status &= ~SD_WANT_RESERVE;
25292 		}
25293 		if (un->un_resvd_timeid == 0) {
25294 			/* Schedule a timeout to handle the lost reservation */
25295 			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
25296 			    (void *)dev,
25297 			    drv_usectohz(sd_reinstate_resv_delay));
25298 		}
25299 	}
25300 	mutex_exit(SD_MUTEX(un));
25301 	return (0);
25302 }
25303 
25304 
25305 /*
25306  *    Function: sd_mhd_watch_incomplete()
25307  *
25308  * Description: This function is used to find out why a scsi pkt sent by the
25309  *		scsi watch facility was not completed. Under some scenarios this
25310  *		routine will return. Otherwise it will send a bus reset to see
25311  *		if the drive is still online.
25312  *
25313  *   Arguments: un  - driver soft state (unit) structure
25314  *		pkt - incomplete scsi pkt
25315  */
25316 
25317 static void
25318 sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
25319 {
25320 	int	be_chatty;
25321 	int	perr;
25322 
25323 	ASSERT(pkt != NULL);
25324 	ASSERT(un != NULL);
25325 	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
25326 	perr		= (pkt->pkt_statistics & STAT_PERR);
25327 
25328 	mutex_enter(SD_MUTEX(un));
25329 	if (un->un_state == SD_STATE_DUMPING) {
25330 		mutex_exit(SD_MUTEX(un));
25331 		return;
25332 	}
25333 
25334 	switch (pkt->pkt_reason) {
25335 	case CMD_UNX_BUS_FREE:
25336 		/*
25337 		 * If we had a parity error that caused the target to drop BSY*,
25338 		 * don't be chatty about it.
25339 		 */
25340 		if (perr && be_chatty) {
25341 			be_chatty = 0;
25342 		}
25343 		break;
25344 	case CMD_TAG_REJECT:
25345 		/*
25346 		 * The SCSI-2 spec states that a tag reject will be sent by the
25347 		 * target if tagged queuing is not supported. A tag reject may
25348 		 * also be sent during certain initialization periods or to
25349 		 * control internal resources. For the latter case the target
25350 		 * may also return Queue Full.
25351 		 *
25352 		 * If this driver receives a tag reject from a target that is
25353 		 * going through an init period or controlling internal
25354 		 * resources tagged queuing will be disabled. This is a less
25355 		 * than optimal behavior but the driver is unable to determine
25356 		 * the target state and assumes tagged queueing is not supported
25357 		 */
25358 		pkt->pkt_flags = 0;
25359 		un->un_tagflags = 0;
25360 
25361 		if (un->un_f_opt_queueing == TRUE) {
25362 			un->un_throttle = min(un->un_throttle, 3);
25363 		} else {
25364 			un->un_throttle = 1;
25365 		}
25366 		mutex_exit(SD_MUTEX(un));
25367 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
25368 		mutex_enter(SD_MUTEX(un));
25369 		break;
25370 	case CMD_INCOMPLETE:
25371 		/*
25372 		 * The transport stopped with an abnormal state, fallthrough and
25373 		 * reset the target and/or bus unless selection did not complete
25374 		 * (indicated by STATE_GOT_BUS) in which case we don't want to
25375 		 * go through a target/bus reset
25376 		 */
25377 		if (pkt->pkt_state == STATE_GOT_BUS) {
25378 			break;
25379 		}
25380 		/*FALLTHROUGH*/
25381 
25382 	case CMD_TIMEOUT:
25383 	default:
25384 		/*
25385 		 * The lun may still be running the command, so a lun reset
25386 		 * should be attempted. If the lun reset fails or cannot be
25387 		 * issued, than try a target reset. Lastly try a bus reset.
25388 		 */
25389 		if ((pkt->pkt_statistics &
25390 		    (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
25391 			int reset_retval = 0;
25392 			mutex_exit(SD_MUTEX(un));
25393 			if (un->un_f_allow_bus_device_reset == TRUE) {
25394 				if (un->un_f_lun_reset_enabled == TRUE) {
25395 					reset_retval =
25396 					    scsi_reset(SD_ADDRESS(un),
25397 					    RESET_LUN);
25398 				}
25399 				if (reset_retval == 0) {
25400 					reset_retval =
25401 					    scsi_reset(SD_ADDRESS(un),
25402 					    RESET_TARGET);
25403 				}
25404 			}
25405 			if (reset_retval == 0) {
25406 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
25407 			}
25408 			mutex_enter(SD_MUTEX(un));
25409 		}
25410 		break;
25411 	}
25412 
25413 	/* A device/bus reset has occurred; update the reservation status. */
25414 	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
25415 	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
25416 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25417 			un->un_resvd_status |=
25418 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25419 			SD_INFO(SD_LOG_IOCTL_MHD, un,
25420 			    "sd_mhd_watch_incomplete: Lost Reservation\n");
25421 		}
25422 	}
25423 
25424 	/*
25425 	 * The disk has been turned off; Update the device state.
25426 	 *
25427 	 * Note: Should we be offlining the disk here?
25428 	 */
25429 	if (pkt->pkt_state == STATE_GOT_BUS) {
25430 		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
25431 		    "Disk not responding to selection\n");
25432 		if (un->un_state != SD_STATE_OFFLINE) {
25433 			New_state(un, SD_STATE_OFFLINE);
25434 		}
25435 	} else if (be_chatty) {
25436 		/*
25437 		 * suppress messages if they are all the same pkt reason;
25438 		 * with TQ, many (up to 256) are returned with the same
25439 		 * pkt_reason
25440 		 */
25441 		if (pkt->pkt_reason != un->un_last_pkt_reason) {
25442 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25443 			    "sd_mhd_watch_incomplete: "
25444 			    "SCSI transport failed: reason '%s'\n",
25445 			    scsi_rname(pkt->pkt_reason));
25446 		}
25447 	}
25448 	un->un_last_pkt_reason = pkt->pkt_reason;
25449 	mutex_exit(SD_MUTEX(un));
25450 }
25451 
25452 
25453 /*
25454  *    Function: sd_sname()
25455  *
25456  * Description: This is a simple little routine to return a string containing
25457  *		a printable description of command status byte for use in
25458  *		logging.
25459  *
25460  *   Arguments: status - pointer to a status byte
25461  *
25462  * Return Code: char * - string containing status description.
25463  */
25464 
25465 static char *
25466 sd_sname(uchar_t status)
25467 {
25468 	switch (status & STATUS_MASK) {
25469 	case STATUS_GOOD:
25470 		return ("good status");
25471 	case STATUS_CHECK:
25472 		return ("check condition");
25473 	case STATUS_MET:
25474 		return ("condition met");
25475 	case STATUS_BUSY:
25476 		return ("busy");
25477 	case STATUS_INTERMEDIATE:
25478 		return ("intermediate");
25479 	case STATUS_INTERMEDIATE_MET:
25480 		return ("intermediate - condition met");
25481 	case STATUS_RESERVATION_CONFLICT:
25482 		return ("reservation_conflict");
25483 	case STATUS_TERMINATED:
25484 		return ("command terminated");
25485 	case STATUS_QFULL:
25486 		return ("queue full");
25487 	default:
25488 		return ("<unknown status>");
25489 	}
25490 }
25491 
25492 
25493 /*
25494  *    Function: sd_mhd_resvd_recover()
25495  *
25496  * Description: This function adds a reservation entry to the
25497  *		sd_resv_reclaim_request list and signals the reservation
25498  *		reclaim thread that there is work pending. If the reservation
25499  *		reclaim thread has not been previously created this function
25500  *		will kick it off.
25501  *
25502  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25503  *			among multiple watches that share this callback function
25504  *
25505  *     Context: This routine is called by timeout() and is run in interrupt
25506  *		context. It must not sleep or call other functions which may
25507  *		sleep.
25508  */
25509 
25510 static void
25511 sd_mhd_resvd_recover(void *arg)
25512 {
25513 	dev_t			dev = (dev_t)arg;
25514 	struct sd_lun		*un;
25515 	struct sd_thr_request	*sd_treq = NULL;
25516 	struct sd_thr_request	*sd_cur = NULL;
25517 	struct sd_thr_request	*sd_prev = NULL;
25518 	int			already_there = 0;
25519 
25520 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25521 		return;
25522 	}
25523 
25524 	mutex_enter(SD_MUTEX(un));
25525 	un->un_resvd_timeid = NULL;
25526 	if (un->un_resvd_status & SD_WANT_RESERVE) {
25527 		/*
25528 		 * There was a reset so don't issue the reserve, allow the
25529 		 * sd_mhd_watch_cb callback function to notice this and
25530 		 * reschedule the timeout for reservation.
25531 		 */
25532 		mutex_exit(SD_MUTEX(un));
25533 		return;
25534 	}
25535 	mutex_exit(SD_MUTEX(un));
25536 
25537 	/*
25538 	 * Add this device to the sd_resv_reclaim_request list and the
25539 	 * sd_resv_reclaim_thread should take care of the rest.
25540 	 *
25541 	 * Note: We can't sleep in this context so if the memory allocation
25542 	 * fails allow the sd_mhd_watch_cb callback function to notice this and
25543 	 * reschedule the timeout for reservation.  (4378460)
25544 	 */
25545 	sd_treq = (struct sd_thr_request *)
25546 	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
25547 	if (sd_treq == NULL) {
25548 		return;
25549 	}
25550 
25551 	sd_treq->sd_thr_req_next = NULL;
25552 	sd_treq->dev = dev;
25553 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25554 	if (sd_tr.srq_thr_req_head == NULL) {
25555 		sd_tr.srq_thr_req_head = sd_treq;
25556 	} else {
25557 		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
25558 		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
25559 			if (sd_cur->dev == dev) {
25560 				/*
25561 				 * already in Queue so don't log
25562 				 * another request for the device
25563 				 */
25564 				already_there = 1;
25565 				break;
25566 			}
25567 			sd_prev = sd_cur;
25568 		}
25569 		if (!already_there) {
25570 			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
25571 			    "logging request for %lx\n", dev);
25572 			sd_prev->sd_thr_req_next = sd_treq;
25573 		} else {
25574 			kmem_free(sd_treq, sizeof (struct sd_thr_request));
25575 		}
25576 	}
25577 
25578 	/*
25579 	 * Create a kernel thread to do the reservation reclaim and free up this
25580 	 * thread. We cannot block this thread while we go away to do the
25581 	 * reservation reclaim
25582 	 */
25583 	if (sd_tr.srq_resv_reclaim_thread == NULL)
25584 		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
25585 		    sd_resv_reclaim_thread, NULL,
25586 		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
25587 
25588 	/* Tell the reservation reclaim thread that it has work to do */
25589 	cv_signal(&sd_tr.srq_resv_reclaim_cv);
25590 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25591 }
25592 
25593 /*
25594  *    Function: sd_resv_reclaim_thread()
25595  *
25596  * Description: This function implements the reservation reclaim operations
25597  *
25598  *   Arguments: arg - the device 'dev_t' is used for context to discriminate
25599  *		      among multiple watches that share this callback function
25600  */
25601 
25602 static void
25603 sd_resv_reclaim_thread()
25604 {
25605 	struct sd_lun		*un;
25606 	struct sd_thr_request	*sd_mhreq;
25607 
25608 	/* Wait for work */
25609 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25610 	if (sd_tr.srq_thr_req_head == NULL) {
25611 		cv_wait(&sd_tr.srq_resv_reclaim_cv,
25612 		    &sd_tr.srq_resv_reclaim_mutex);
25613 	}
25614 
25615 	/* Loop while we have work */
25616 	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
25617 		un = ddi_get_soft_state(sd_state,
25618 		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
25619 		if (un == NULL) {
25620 			/*
25621 			 * softstate structure is NULL so just
25622 			 * dequeue the request and continue
25623 			 */
25624 			sd_tr.srq_thr_req_head =
25625 			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25626 			kmem_free(sd_tr.srq_thr_cur_req,
25627 			    sizeof (struct sd_thr_request));
25628 			continue;
25629 		}
25630 
25631 		/* dequeue the request */
25632 		sd_mhreq = sd_tr.srq_thr_cur_req;
25633 		sd_tr.srq_thr_req_head =
25634 		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25635 		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25636 
25637 		/*
25638 		 * Reclaim reservation only if SD_RESERVE is still set. There
25639 		 * may have been a call to MHIOCRELEASE before we got here.
25640 		 */
25641 		mutex_enter(SD_MUTEX(un));
25642 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25643 			/*
25644 			 * Note: The SD_LOST_RESERVE flag is cleared before
25645 			 * reclaiming the reservation. If this is done after the
25646 			 * call to sd_reserve_release a reservation loss in the
25647 			 * window between pkt completion of reserve cmd and
25648 			 * mutex_enter below may not be recognized
25649 			 */
25650 			un->un_resvd_status &= ~SD_LOST_RESERVE;
25651 			mutex_exit(SD_MUTEX(un));
25652 
25653 			if (sd_reserve_release(sd_mhreq->dev,
25654 			    SD_RESERVE) == 0) {
25655 				mutex_enter(SD_MUTEX(un));
25656 				un->un_resvd_status |= SD_RESERVE;
25657 				mutex_exit(SD_MUTEX(un));
25658 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25659 				    "sd_resv_reclaim_thread: "
25660 				    "Reservation Recovered\n");
25661 			} else {
25662 				mutex_enter(SD_MUTEX(un));
25663 				un->un_resvd_status |= SD_LOST_RESERVE;
25664 				mutex_exit(SD_MUTEX(un));
25665 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25666 				    "sd_resv_reclaim_thread: Failed "
25667 				    "Reservation Recovery\n");
25668 			}
25669 		} else {
25670 			mutex_exit(SD_MUTEX(un));
25671 		}
25672 		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25673 		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
25674 		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25675 		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
25676 		/*
25677 		 * wakeup the destroy thread if anyone is waiting on
25678 		 * us to complete.
25679 		 */
25680 		cv_signal(&sd_tr.srq_inprocess_cv);
25681 		SD_TRACE(SD_LOG_IOCTL_MHD, un,
25682 		    "sd_resv_reclaim_thread: cv_signalling current request \n");
25683 	}
25684 
25685 	/*
25686 	 * cleanup the sd_tr structure now that this thread will not exist
25687 	 */
25688 	ASSERT(sd_tr.srq_thr_req_head == NULL);
25689 	ASSERT(sd_tr.srq_thr_cur_req == NULL);
25690 	sd_tr.srq_resv_reclaim_thread = NULL;
25691 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25692 	thread_exit();
25693 }
25694 
25695 
25696 /*
25697  *    Function: sd_rmv_resv_reclaim_req()
25698  *
25699  * Description: This function removes any pending reservation reclaim requests
25700  *		for the specified device.
25701  *
25702  *   Arguments: dev - the device 'dev_t'
25703  */
25704 
25705 static void
25706 sd_rmv_resv_reclaim_req(dev_t dev)
25707 {
25708 	struct sd_thr_request *sd_mhreq;
25709 	struct sd_thr_request *sd_prev;
25710 
25711 	/* Remove a reservation reclaim request from the list */
25712 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25713 	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
25714 		/*
25715 		 * We are attempting to reinstate reservation for
25716 		 * this device. We wait for sd_reserve_release()
25717 		 * to return before we return.
25718 		 */
25719 		cv_wait(&sd_tr.srq_inprocess_cv,
25720 		    &sd_tr.srq_resv_reclaim_mutex);
25721 	} else {
25722 		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
25723 		if (sd_mhreq && sd_mhreq->dev == dev) {
25724 			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
25725 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25726 			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25727 			return;
25728 		}
25729 		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
25730 			if (sd_mhreq && sd_mhreq->dev == dev) {
25731 				break;
25732 			}
25733 			sd_prev = sd_mhreq;
25734 		}
25735 		if (sd_mhreq != NULL) {
25736 			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
25737 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25738 		}
25739 	}
25740 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25741 }
25742 
25743 
25744 /*
25745  *    Function: sd_mhd_reset_notify_cb()
25746  *
25747  * Description: This is a call back function for scsi_reset_notify. This
25748  *		function updates the softstate reserved status and logs the
25749  *		reset. The driver scsi watch facility callback function
25750  *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
25751  *		will reclaim the reservation.
25752  *
25753  *   Arguments: arg  - driver soft state (unit) structure
25754  */
25755 
25756 static void
25757 sd_mhd_reset_notify_cb(caddr_t arg)
25758 {
25759 	struct sd_lun *un = (struct sd_lun *)arg;
25760 
25761 	mutex_enter(SD_MUTEX(un));
25762 	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25763 		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
25764 		SD_INFO(SD_LOG_IOCTL_MHD, un,
25765 		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
25766 	}
25767 	mutex_exit(SD_MUTEX(un));
25768 }
25769 
25770 
25771 /*
25772  *    Function: sd_take_ownership()
25773  *
25774  * Description: This routine implements an algorithm to achieve a stable
25775  *		reservation on disks which don't implement priority reserve,
25776  *		and makes sure that other host lose re-reservation attempts.
25777  *		This algorithm contains of a loop that keeps issuing the RESERVE
25778  *		for some period of time (min_ownership_delay, default 6 seconds)
25779  *		During that loop, it looks to see if there has been a bus device
25780  *		reset or bus reset (both of which cause an existing reservation
25781  *		to be lost). If the reservation is lost issue RESERVE until a
25782  *		period of min_ownership_delay with no resets has gone by, or
25783  *		until max_ownership_delay has expired. This loop ensures that
25784  *		the host really did manage to reserve the device, in spite of
25785  *		resets. The looping for min_ownership_delay (default six
25786  *		seconds) is important to early generation clustering products,
25787  *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
25788  *		MHIOCENFAILFAST periodic timer of two seconds. By having
25789  *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
25790  *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
25791  *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
25792  *		have already noticed, via the MHIOCENFAILFAST polling, that it
25793  *		no longer "owns" the disk and will have panicked itself.  Thus,
25794  *		the host issuing the MHIOCTKOWN is assured (with timing
25795  *		dependencies) that by the time it actually starts to use the
25796  *		disk for real work, the old owner is no longer accessing it.
25797  *
25798  *		min_ownership_delay is the minimum amount of time for which the
25799  *		disk must be reserved continuously devoid of resets before the
25800  *		MHIOCTKOWN ioctl will return success.
25801  *
25802  *		max_ownership_delay indicates the amount of time by which the
25803  *		take ownership should succeed or timeout with an error.
25804  *
25805  *   Arguments: dev - the device 'dev_t'
25806  *		*p  - struct containing timing info.
25807  *
25808  * Return Code: 0 for success or error code
25809  */
25810 
25811 static int
25812 sd_take_ownership(dev_t dev, struct mhioctkown *p)
25813 {
25814 	struct sd_lun	*un;
25815 	int		rval;
25816 	int		err;
25817 	int		reservation_count   = 0;
25818 	int		min_ownership_delay =  6000000; /* in usec */
25819 	int		max_ownership_delay = 30000000; /* in usec */
25820 	clock_t		start_time;	/* starting time of this algorithm */
25821 	clock_t		end_time;	/* time limit for giving up */
25822 	clock_t		ownership_time;	/* time limit for stable ownership */
25823 	clock_t		current_time;
25824 	clock_t		previous_current_time;
25825 
25826 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25827 		return (ENXIO);
25828 	}
25829 
25830 	/*
25831 	 * Attempt a device reservation. A priority reservation is requested.
25832 	 */
25833 	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
25834 	    != SD_SUCCESS) {
25835 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
25836 		    "sd_take_ownership: return(1)=%d\n", rval);
25837 		return (rval);
25838 	}
25839 
25840 	/* Update the softstate reserved status to indicate the reservation */
25841 	mutex_enter(SD_MUTEX(un));
25842 	un->un_resvd_status |= SD_RESERVE;
25843 	un->un_resvd_status &=
25844 	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
25845 	mutex_exit(SD_MUTEX(un));
25846 
25847 	if (p != NULL) {
25848 		if (p->min_ownership_delay != 0) {
25849 			min_ownership_delay = p->min_ownership_delay * 1000;
25850 		}
25851 		if (p->max_ownership_delay != 0) {
25852 			max_ownership_delay = p->max_ownership_delay * 1000;
25853 		}
25854 	}
25855 	SD_INFO(SD_LOG_IOCTL_MHD, un,
25856 	    "sd_take_ownership: min, max delays: %d, %d\n",
25857 	    min_ownership_delay, max_ownership_delay);
25858 
25859 	start_time = ddi_get_lbolt();
25860 	current_time	= start_time;
25861 	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
25862 	end_time	= start_time + drv_usectohz(max_ownership_delay);
25863 
25864 	while (current_time - end_time < 0) {
25865 		delay(drv_usectohz(500000));
25866 
25867 		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
25868 			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
25869 				mutex_enter(SD_MUTEX(un));
25870 				rval = (un->un_resvd_status &
25871 				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
25872 				mutex_exit(SD_MUTEX(un));
25873 				break;
25874 			}
25875 		}
25876 		previous_current_time = current_time;
25877 		current_time = ddi_get_lbolt();
25878 		mutex_enter(SD_MUTEX(un));
25879 		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
25880 			ownership_time = ddi_get_lbolt() +
25881 			    drv_usectohz(min_ownership_delay);
25882 			reservation_count = 0;
25883 		} else {
25884 			reservation_count++;
25885 		}
25886 		un->un_resvd_status |= SD_RESERVE;
25887 		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
25888 		mutex_exit(SD_MUTEX(un));
25889 
25890 		SD_INFO(SD_LOG_IOCTL_MHD, un,
25891 		    "sd_take_ownership: ticks for loop iteration=%ld, "
25892 		    "reservation=%s\n", (current_time - previous_current_time),
25893 		    reservation_count ? "ok" : "reclaimed");
25894 
25895 		if (current_time - ownership_time >= 0 &&
25896 		    reservation_count >= 4) {
25897 			rval = 0; /* Achieved a stable ownership */
25898 			break;
25899 		}
25900 		if (current_time - end_time >= 0) {
25901 			rval = EACCES; /* No ownership in max possible time */
25902 			break;
25903 		}
25904 	}
25905 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
25906 	    "sd_take_ownership: return(2)=%d\n", rval);
25907 	return (rval);
25908 }
25909 
25910 
25911 /*
25912  *    Function: sd_reserve_release()
25913  *
25914  * Description: This function builds and sends scsi RESERVE, RELEASE, and
25915  *		PRIORITY RESERVE commands based on a user specified command type
25916  *
25917  *   Arguments: dev - the device 'dev_t'
25918  *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
25919  *		      SD_RESERVE, SD_RELEASE
25920  *
25921  * Return Code: 0 or Error Code
25922  */
25923 
25924 static int
25925 sd_reserve_release(dev_t dev, int cmd)
25926 {
25927 	struct uscsi_cmd	*com = NULL;
25928 	struct sd_lun		*un = NULL;
25929 	char			cdb[CDB_GROUP0];
25930 	int			rval;
25931 
25932 	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
25933 	    (cmd == SD_PRIORITY_RESERVE));
25934 
25935 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25936 		return (ENXIO);
25937 	}
25938 
25939 	/* instantiate and initialize the command and cdb */
25940 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25941 	bzero(cdb, CDB_GROUP0);
25942 	com->uscsi_flags   = USCSI_SILENT;
25943 	com->uscsi_timeout = un->un_reserve_release_time;
25944 	com->uscsi_cdblen  = CDB_GROUP0;
25945 	com->uscsi_cdb	   = cdb;
25946 	if (cmd == SD_RELEASE) {
25947 		cdb[0] = SCMD_RELEASE;
25948 	} else {
25949 		cdb[0] = SCMD_RESERVE;
25950 	}
25951 
25952 	/* Send the command. */
25953 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
25954 	    UIO_SYSSPACE, SD_PATH_STANDARD);
25955 
25956 	/*
25957 	 * "break" a reservation that is held by another host, by issuing a
25958 	 * reset if priority reserve is desired, and we could not get the
25959 	 * device.
25960 	 */
25961 	if ((cmd == SD_PRIORITY_RESERVE) &&
25962 	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
25963 		/*
25964 		 * First try to reset the LUN. If we cannot, then try a target
25965 		 * reset, followed by a bus reset if the target reset fails.
25966 		 */
25967 		int reset_retval = 0;
25968 		if (un->un_f_lun_reset_enabled == TRUE) {
25969 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
25970 		}
25971 		if (reset_retval == 0) {
25972 			/* The LUN reset either failed or was not issued */
25973 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
25974 		}
25975 		if ((reset_retval == 0) &&
25976 		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
25977 			rval = EIO;
25978 			kmem_free(com, sizeof (*com));
25979 			return (rval);
25980 		}
25981 
25982 		bzero(com, sizeof (struct uscsi_cmd));
25983 		com->uscsi_flags   = USCSI_SILENT;
25984 		com->uscsi_cdb	   = cdb;
25985 		com->uscsi_cdblen  = CDB_GROUP0;
25986 		com->uscsi_timeout = 5;
25987 
25988 		/*
25989 		 * Reissue the last reserve command, this time without request
25990 		 * sense.  Assume that it is just a regular reserve command.
25991 		 */
25992 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
25993 		    UIO_SYSSPACE, SD_PATH_STANDARD);
25994 	}
25995 
25996 	/* Return an error if still getting a reservation conflict. */
25997 	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
25998 		rval = EACCES;
25999 	}
26000 
26001 	kmem_free(com, sizeof (*com));
26002 	return (rval);
26003 }
26004 
26005 
26006 #define	SD_NDUMP_RETRIES	12
26007 /*
26008  *	System Crash Dump routine
26009  */
26010 
26011 static int
26012 sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
26013 {
26014 	int		instance;
26015 	int		partition;
26016 	int		i;
26017 	int		err;
26018 	struct sd_lun	*un;
26019 	struct dk_map	*lp;
26020 	struct scsi_pkt *wr_pktp;
26021 	struct buf	*wr_bp;
26022 	struct buf	wr_buf;
26023 	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
26024 	daddr_t		tgt_blkno;	/* rmw - blkno for target */
26025 	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
26026 	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
26027 	size_t		io_start_offset;
26028 	int		doing_rmw = FALSE;
26029 	int		rval;
26030 #if defined(__i386) || defined(__amd64)
26031 	ssize_t dma_resid;
26032 	daddr_t oblkno;
26033 #endif
26034 
26035 	instance = SDUNIT(dev);
26036 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
26037 	    (!un->un_f_geometry_is_valid) || ISCD(un)) {
26038 		return (ENXIO);
26039 	}
26040 
26041 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
26042 
26043 	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
26044 
26045 	partition = SDPART(dev);
26046 	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
26047 
26048 	/* Validate blocks to dump at against partition size. */
26049 	lp = &un->un_map[partition];
26050 	if ((blkno + nblk) > lp->dkl_nblk) {
26051 		SD_TRACE(SD_LOG_DUMP, un,
26052 		    "sddump: dump range larger than partition: "
26053 		    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
26054 		    blkno, nblk, lp->dkl_nblk);
26055 		return (EINVAL);
26056 	}
26057 
26058 	mutex_enter(&un->un_pm_mutex);
26059 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
26060 		struct scsi_pkt *start_pktp;
26061 
26062 		mutex_exit(&un->un_pm_mutex);
26063 
26064 		/*
26065 		 * use pm framework to power on HBA 1st
26066 		 */
26067 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
26068 
26069 		/*
26070 		 * Dump no long uses sdpower to power on a device, it's
26071 		 * in-line here so it can be done in polled mode.
26072 		 */
26073 
26074 		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
26075 
26076 		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
26077 		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
26078 
26079 		if (start_pktp == NULL) {
26080 			/* We were not given a SCSI packet, fail. */
26081 			return (EIO);
26082 		}
26083 		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
26084 		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
26085 		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
26086 		start_pktp->pkt_flags = FLAG_NOINTR;
26087 
26088 		mutex_enter(SD_MUTEX(un));
26089 		SD_FILL_SCSI1_LUN(un, start_pktp);
26090 		mutex_exit(SD_MUTEX(un));
26091 		/*
26092 		 * Scsi_poll returns 0 (success) if the command completes and
26093 		 * the status block is STATUS_GOOD.
26094 		 */
26095 		if (sd_scsi_poll(un, start_pktp) != 0) {
26096 			scsi_destroy_pkt(start_pktp);
26097 			return (EIO);
26098 		}
26099 		scsi_destroy_pkt(start_pktp);
26100 		(void) sd_ddi_pm_resume(un);
26101 	} else {
26102 		mutex_exit(&un->un_pm_mutex);
26103 	}
26104 
26105 	mutex_enter(SD_MUTEX(un));
26106 	un->un_throttle = 0;
26107 
26108 	/*
26109 	 * The first time through, reset the specific target device.
26110 	 * However, when cpr calls sddump we know that sd is in a
26111 	 * a good state so no bus reset is required.
26112 	 * Clear sense data via Request Sense cmd.
26113 	 * In sddump we don't care about allow_bus_device_reset anymore
26114 	 */
26115 
26116 	if ((un->un_state != SD_STATE_SUSPENDED) &&
26117 	    (un->un_state != SD_STATE_DUMPING)) {
26118 
26119 		New_state(un, SD_STATE_DUMPING);
26120 
26121 		if (un->un_f_is_fibre == FALSE) {
26122 			mutex_exit(SD_MUTEX(un));
26123 			/*
26124 			 * Attempt a bus reset for parallel scsi.
26125 			 *
26126 			 * Note: A bus reset is required because on some host
26127 			 * systems (i.e. E420R) a bus device reset is
26128 			 * insufficient to reset the state of the target.
26129 			 *
26130 			 * Note: Don't issue the reset for fibre-channel,
26131 			 * because this tends to hang the bus (loop) for
26132 			 * too long while everyone is logging out and in
26133 			 * and the deadman timer for dumping will fire
26134 			 * before the dump is complete.
26135 			 */
26136 			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
26137 				mutex_enter(SD_MUTEX(un));
26138 				Restore_state(un);
26139 				mutex_exit(SD_MUTEX(un));
26140 				return (EIO);
26141 			}
26142 
26143 			/* Delay to give the device some recovery time. */
26144 			drv_usecwait(10000);
26145 
26146 			if (sd_send_polled_RQS(un) == SD_FAILURE) {
26147 				SD_INFO(SD_LOG_DUMP, un,
26148 					"sddump: sd_send_polled_RQS failed\n");
26149 			}
26150 			mutex_enter(SD_MUTEX(un));
26151 		}
26152 	}
26153 
26154 	/*
26155 	 * Convert the partition-relative block number to a
26156 	 * disk physical block number.
26157 	 */
26158 	blkno += un->un_offset[partition];
26159 	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
26160 
26161 
26162 	/*
26163 	 * Check if the device has a non-512 block size.
26164 	 */
26165 	wr_bp = NULL;
26166 	if (NOT_DEVBSIZE(un)) {
26167 		tgt_byte_offset = blkno * un->un_sys_blocksize;
26168 		tgt_byte_count = nblk * un->un_sys_blocksize;
26169 		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
26170 		    (tgt_byte_count % un->un_tgt_blocksize)) {
26171 			doing_rmw = TRUE;
26172 			/*
26173 			 * Calculate the block number and number of block
26174 			 * in terms of the media block size.
26175 			 */
26176 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
26177 			tgt_nblk =
26178 			    ((tgt_byte_offset + tgt_byte_count +
26179 				(un->un_tgt_blocksize - 1)) /
26180 				un->un_tgt_blocksize) - tgt_blkno;
26181 
26182 			/*
26183 			 * Invoke the routine which is going to do read part
26184 			 * of read-modify-write.
26185 			 * Note that this routine returns a pointer to
26186 			 * a valid bp in wr_bp.
26187 			 */
26188 			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
26189 			    &wr_bp);
26190 			if (err) {
26191 				mutex_exit(SD_MUTEX(un));
26192 				return (err);
26193 			}
26194 			/*
26195 			 * Offset is being calculated as -
26196 			 * (original block # * system block size) -
26197 			 * (new block # * target block size)
26198 			 */
26199 			io_start_offset =
26200 			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
26201 			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
26202 
26203 			ASSERT((io_start_offset >= 0) &&
26204 			    (io_start_offset < un->un_tgt_blocksize));
26205 			/*
26206 			 * Do the modify portion of read modify write.
26207 			 */
26208 			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
26209 			    (size_t)nblk * un->un_sys_blocksize);
26210 		} else {
26211 			doing_rmw = FALSE;
26212 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
26213 			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
26214 		}
26215 
26216 		/* Convert blkno and nblk to target blocks */
26217 		blkno = tgt_blkno;
26218 		nblk = tgt_nblk;
26219 	} else {
26220 		wr_bp = &wr_buf;
26221 		bzero(wr_bp, sizeof (struct buf));
26222 		wr_bp->b_flags		= B_BUSY;
26223 		wr_bp->b_un.b_addr	= addr;
26224 		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
26225 		wr_bp->b_resid		= 0;
26226 	}
26227 
26228 	mutex_exit(SD_MUTEX(un));
26229 
26230 	/*
26231 	 * Obtain a SCSI packet for the write command.
26232 	 * It should be safe to call the allocator here without
26233 	 * worrying about being locked for DVMA mapping because
26234 	 * the address we're passed is already a DVMA mapping
26235 	 *
26236 	 * We are also not going to worry about semaphore ownership
26237 	 * in the dump buffer. Dumping is single threaded at present.
26238 	 */
26239 
26240 	wr_pktp = NULL;
26241 
26242 #if defined(__i386) || defined(__amd64)
26243 	dma_resid = wr_bp->b_bcount;
26244 	oblkno = blkno;
26245 	while (dma_resid != 0) {
26246 #endif
26247 
26248 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26249 		wr_bp->b_flags &= ~B_ERROR;
26250 
26251 #if defined(__i386) || defined(__amd64)
26252 		blkno = oblkno +
26253 			((wr_bp->b_bcount - dma_resid) /
26254 			    un->un_tgt_blocksize);
26255 		nblk = dma_resid / un->un_tgt_blocksize;
26256 
26257 		if (wr_pktp) {
26258 			/* Partial DMA transfers after initial transfer */
26259 			rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
26260 			    blkno, nblk);
26261 		} else {
26262 			/* Initial transfer */
26263 			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26264 			    un->un_pkt_flags, NULL_FUNC, NULL,
26265 			    blkno, nblk);
26266 		}
26267 #else
26268 		rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26269 		    0, NULL_FUNC, NULL, blkno, nblk);
26270 #endif
26271 
26272 		if (rval == 0) {
26273 			/* We were given a SCSI packet, continue. */
26274 			break;
26275 		}
26276 
26277 		if (i == 0) {
26278 			if (wr_bp->b_flags & B_ERROR) {
26279 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26280 				    "no resources for dumping; "
26281 				    "error code: 0x%x, retrying",
26282 				    geterror(wr_bp));
26283 			} else {
26284 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26285 				    "no resources for dumping; retrying");
26286 			}
26287 		} else if (i != (SD_NDUMP_RETRIES - 1)) {
26288 			if (wr_bp->b_flags & B_ERROR) {
26289 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26290 				    "no resources for dumping; error code: "
26291 				    "0x%x, retrying\n", geterror(wr_bp));
26292 			}
26293 		} else {
26294 			if (wr_bp->b_flags & B_ERROR) {
26295 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26296 				    "no resources for dumping; "
26297 				    "error code: 0x%x, retries failed, "
26298 				    "giving up.\n", geterror(wr_bp));
26299 			} else {
26300 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26301 				    "no resources for dumping; "
26302 				    "retries failed, giving up.\n");
26303 			}
26304 			mutex_enter(SD_MUTEX(un));
26305 			Restore_state(un);
26306 			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
26307 				mutex_exit(SD_MUTEX(un));
26308 				scsi_free_consistent_buf(wr_bp);
26309 			} else {
26310 				mutex_exit(SD_MUTEX(un));
26311 			}
26312 			return (EIO);
26313 		}
26314 		drv_usecwait(10000);
26315 	}
26316 
26317 #if defined(__i386) || defined(__amd64)
26318 	/*
26319 	 * save the resid from PARTIAL_DMA
26320 	 */
26321 	dma_resid = wr_pktp->pkt_resid;
26322 	if (dma_resid != 0)
26323 		nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
26324 	wr_pktp->pkt_resid = 0;
26325 #endif
26326 
26327 	/* SunBug 1222170 */
26328 	wr_pktp->pkt_flags = FLAG_NOINTR;
26329 
26330 	err = EIO;
26331 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26332 
26333 		/*
26334 		 * Scsi_poll returns 0 (success) if the command completes and
26335 		 * the status block is STATUS_GOOD.  We should only check
26336 		 * errors if this condition is not true.  Even then we should
26337 		 * send our own request sense packet only if we have a check
26338 		 * condition and auto request sense has not been performed by
26339 		 * the hba.
26340 		 */
26341 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
26342 
26343 		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
26344 		    (wr_pktp->pkt_resid == 0)) {
26345 			err = SD_SUCCESS;
26346 			break;
26347 		}
26348 
26349 		/*
26350 		 * Check CMD_DEV_GONE 1st, give up if device is gone.
26351 		 */
26352 		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
26353 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26354 			    "Device is gone\n");
26355 			break;
26356 		}
26357 
26358 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
26359 			SD_INFO(SD_LOG_DUMP, un,
26360 			    "sddump: write failed with CHECK, try # %d\n", i);
26361 			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
26362 				(void) sd_send_polled_RQS(un);
26363 			}
26364 
26365 			continue;
26366 		}
26367 
26368 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
26369 			int reset_retval = 0;
26370 
26371 			SD_INFO(SD_LOG_DUMP, un,
26372 			    "sddump: write failed with BUSY, try # %d\n", i);
26373 
26374 			if (un->un_f_lun_reset_enabled == TRUE) {
26375 				reset_retval = scsi_reset(SD_ADDRESS(un),
26376 				    RESET_LUN);
26377 			}
26378 			if (reset_retval == 0) {
26379 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26380 			}
26381 			(void) sd_send_polled_RQS(un);
26382 
26383 		} else {
26384 			SD_INFO(SD_LOG_DUMP, un,
26385 			    "sddump: write failed with 0x%x, try # %d\n",
26386 			    SD_GET_PKT_STATUS(wr_pktp), i);
26387 			mutex_enter(SD_MUTEX(un));
26388 			sd_reset_target(un, wr_pktp);
26389 			mutex_exit(SD_MUTEX(un));
26390 		}
26391 
26392 		/*
26393 		 * If we are not getting anywhere with lun/target resets,
26394 		 * let's reset the bus.
26395 		 */
26396 		if (i == SD_NDUMP_RETRIES/2) {
26397 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
26398 			(void) sd_send_polled_RQS(un);
26399 		}
26400 
26401 	}
26402 #if defined(__i386) || defined(__amd64)
26403 	}	/* dma_resid */
26404 #endif
26405 
26406 	scsi_destroy_pkt(wr_pktp);
26407 	mutex_enter(SD_MUTEX(un));
26408 	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
26409 		mutex_exit(SD_MUTEX(un));
26410 		scsi_free_consistent_buf(wr_bp);
26411 	} else {
26412 		mutex_exit(SD_MUTEX(un));
26413 	}
26414 	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
26415 	return (err);
26416 }
26417 
26418 /*
26419  *    Function: sd_scsi_poll()
26420  *
26421  * Description: This is a wrapper for the scsi_poll call.
26422  *
26423  *   Arguments: sd_lun - The unit structure
26424  *              scsi_pkt - The scsi packet being sent to the device.
26425  *
26426  * Return Code: 0 - Command completed successfully with good status
26427  *             -1 - Command failed.  This could indicate a check condition
26428  *                  or other status value requiring recovery action.
26429  *
26430  */
26431 
26432 static int
26433 sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
26434 {
26435 	int status;
26436 
26437 	ASSERT(un != NULL);
26438 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26439 	ASSERT(pktp != NULL);
26440 
26441 	status = SD_SUCCESS;
26442 
26443 	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
26444 		pktp->pkt_flags |= un->un_tagflags;
26445 		pktp->pkt_flags &= ~FLAG_NODISCON;
26446 	}
26447 
26448 	status = sd_ddi_scsi_poll(pktp);
26449 	/*
26450 	 * Scsi_poll returns 0 (success) if the command completes and the
26451 	 * status block is STATUS_GOOD.  We should only check errors if this
26452 	 * condition is not true.  Even then we should send our own request
26453 	 * sense packet only if we have a check condition and auto
26454 	 * request sense has not been performed by the hba.
26455 	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
26456 	 */
26457 	if ((status != SD_SUCCESS) &&
26458 	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
26459 	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
26460 	    (pktp->pkt_reason != CMD_DEV_GONE))
26461 		(void) sd_send_polled_RQS(un);
26462 
26463 	return (status);
26464 }
26465 
26466 /*
26467  *    Function: sd_send_polled_RQS()
26468  *
26469  * Description: This sends the request sense command to a device.
26470  *
26471  *   Arguments: sd_lun - The unit structure
26472  *
26473  * Return Code: 0 - Command completed successfully with good status
26474  *             -1 - Command failed.
26475  *
26476  */
26477 
26478 static int
26479 sd_send_polled_RQS(struct sd_lun *un)
26480 {
26481 	int	ret_val;
26482 	struct	scsi_pkt	*rqs_pktp;
26483 	struct	buf		*rqs_bp;
26484 
26485 	ASSERT(un != NULL);
26486 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26487 
26488 	ret_val = SD_SUCCESS;
26489 
26490 	rqs_pktp = un->un_rqs_pktp;
26491 	rqs_bp	 = un->un_rqs_bp;
26492 
26493 	mutex_enter(SD_MUTEX(un));
26494 
26495 	if (un->un_sense_isbusy) {
26496 		ret_val = SD_FAILURE;
26497 		mutex_exit(SD_MUTEX(un));
26498 		return (ret_val);
26499 	}
26500 
26501 	/*
26502 	 * If the request sense buffer (and packet) is not in use,
26503 	 * let's set the un_sense_isbusy and send our packet
26504 	 */
26505 	un->un_sense_isbusy 	= 1;
26506 	rqs_pktp->pkt_resid  	= 0;
26507 	rqs_pktp->pkt_reason 	= 0;
26508 	rqs_pktp->pkt_flags |= FLAG_NOINTR;
26509 	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
26510 
26511 	mutex_exit(SD_MUTEX(un));
26512 
26513 	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
26514 	    " 0x%p\n", rqs_bp->b_un.b_addr);
26515 
26516 	/*
26517 	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
26518 	 * axle - it has a call into us!
26519 	 */
26520 	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
26521 		SD_INFO(SD_LOG_COMMON, un,
26522 		    "sd_send_polled_RQS: RQS failed\n");
26523 	}
26524 
26525 	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
26526 	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
26527 
26528 	mutex_enter(SD_MUTEX(un));
26529 	un->un_sense_isbusy = 0;
26530 	mutex_exit(SD_MUTEX(un));
26531 
26532 	return (ret_val);
26533 }
26534 
26535 /*
26536  * Defines needed for localized version of the scsi_poll routine.
26537  */
26538 #define	SD_CSEC		10000			/* usecs */
26539 #define	SD_SEC_TO_CSEC	(1000000/SD_CSEC)
26540 
26541 
26542 /*
26543  *    Function: sd_ddi_scsi_poll()
26544  *
26545  * Description: Localized version of the scsi_poll routine.  The purpose is to
26546  *		send a scsi_pkt to a device as a polled command.  This version
26547  *		is to ensure more robust handling of transport errors.
26548  *		Specifically this routine cures not ready, coming ready
26549  *		transition for power up and reset of sonoma's.  This can take
26550  *		up to 45 seconds for power-on and 20 seconds for reset of a
26551  * 		sonoma lun.
26552  *
26553  *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
26554  *
26555  * Return Code: 0 - Command completed successfully with good status
26556  *             -1 - Command failed.
26557  *
26558  */
26559 
26560 static int
26561 sd_ddi_scsi_poll(struct scsi_pkt *pkt)
26562 {
26563 	int busy_count;
26564 	int timeout;
26565 	int rval = SD_FAILURE;
26566 	int savef;
26567 	struct scsi_extended_sense *sensep;
26568 	long savet;
26569 	void (*savec)();
26570 	/*
26571 	 * The following is defined in machdep.c and is used in determining if
26572 	 * the scsi transport system will do polled I/O instead of interrupt
26573 	 * I/O when called from xx_dump().
26574 	 */
26575 	extern int do_polled_io;
26576 
26577 	/*
26578 	 * save old flags in pkt, to restore at end
26579 	 */
26580 	savef = pkt->pkt_flags;
26581 	savec = pkt->pkt_comp;
26582 	savet = pkt->pkt_time;
26583 
26584 	pkt->pkt_flags |= FLAG_NOINTR;
26585 
26586 	/*
26587 	 * XXX there is nothing in the SCSA spec that states that we should not
26588 	 * do a callback for polled cmds; however, removing this will break sd
26589 	 * and probably other target drivers
26590 	 */
26591 	pkt->pkt_comp = NULL;
26592 
26593 	/*
26594 	 * we don't like a polled command without timeout.
26595 	 * 60 seconds seems long enough.
26596 	 */
26597 	if (pkt->pkt_time == 0) {
26598 		pkt->pkt_time = SCSI_POLL_TIMEOUT;
26599 	}
26600 
26601 	/*
26602 	 * Send polled cmd.
26603 	 *
26604 	 * We do some error recovery for various errors.  Tran_busy,
26605 	 * queue full, and non-dispatched commands are retried every 10 msec.
26606 	 * as they are typically transient failures.  Busy status and Not
26607 	 * Ready are retried every second as this status takes a while to
26608 	 * change.  Unit attention is retried for pkt_time (60) times
26609 	 * with no delay.
26610 	 */
26611 	timeout = pkt->pkt_time * SD_SEC_TO_CSEC;
26612 
26613 	for (busy_count = 0; busy_count < timeout; busy_count++) {
26614 		int rc;
26615 		int poll_delay;
26616 
26617 		/*
26618 		 * Initialize pkt status variables.
26619 		 */
26620 		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
26621 
26622 		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
26623 			if (rc != TRAN_BUSY) {
26624 				/* Transport failed - give up. */
26625 				break;
26626 			} else {
26627 				/* Transport busy - try again. */
26628 				poll_delay = 1 * SD_CSEC; /* 10 msec */
26629 			}
26630 		} else {
26631 			/*
26632 			 * Transport accepted - check pkt status.
26633 			 */
26634 			rc = (*pkt->pkt_scbp) & STATUS_MASK;
26635 			if (pkt->pkt_reason == CMD_CMPLT &&
26636 			    rc == STATUS_CHECK &&
26637 			    pkt->pkt_state & STATE_ARQ_DONE) {
26638 				struct scsi_arq_status *arqstat =
26639 				    (struct scsi_arq_status *)(pkt->pkt_scbp);
26640 
26641 				sensep = &arqstat->sts_sensedata;
26642 			} else {
26643 				sensep = NULL;
26644 			}
26645 
26646 			if ((pkt->pkt_reason == CMD_CMPLT) &&
26647 			    (rc == STATUS_GOOD)) {
26648 				/* No error - we're done */
26649 				rval = SD_SUCCESS;
26650 				break;
26651 
26652 			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
26653 				/* Lost connection - give up */
26654 				break;
26655 
26656 			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
26657 			    (pkt->pkt_state == 0)) {
26658 				/* Pkt not dispatched - try again. */
26659 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
26660 
26661 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26662 			    (rc == STATUS_QFULL)) {
26663 				/* Queue full - try again. */
26664 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
26665 
26666 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26667 			    (rc == STATUS_BUSY)) {
26668 				/* Busy - try again. */
26669 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
26670 				busy_count += (SD_SEC_TO_CSEC - 1);
26671 
26672 			} else if ((sensep != NULL) &&
26673 			    (sensep->es_key == KEY_UNIT_ATTENTION)) {
26674 				/* Unit Attention - try again */
26675 				busy_count += (SD_SEC_TO_CSEC - 1); /* 1 */
26676 				continue;
26677 
26678 			} else if ((sensep != NULL) &&
26679 			    (sensep->es_key == KEY_NOT_READY) &&
26680 			    (sensep->es_add_code == 0x04) &&
26681 			    (sensep->es_qual_code == 0x01)) {
26682 				/* Not ready -> ready - try again. */
26683 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
26684 				busy_count += (SD_SEC_TO_CSEC - 1);
26685 
26686 			} else {
26687 				/* BAD status - give up. */
26688 				break;
26689 			}
26690 		}
26691 
26692 		if ((curthread->t_flag & T_INTR_THREAD) == 0 &&
26693 		    !do_polled_io) {
26694 			delay(drv_usectohz(poll_delay));
26695 		} else {
26696 			/* we busy wait during cpr_dump or interrupt threads */
26697 			drv_usecwait(poll_delay);
26698 		}
26699 	}
26700 
26701 	pkt->pkt_flags = savef;
26702 	pkt->pkt_comp = savec;
26703 	pkt->pkt_time = savet;
26704 	return (rval);
26705 }
26706 
26707 
26708 /*
26709  *    Function: sd_persistent_reservation_in_read_keys
26710  *
26711  * Description: This routine is the driver entry point for handling CD-ROM
26712  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
26713  *		by sending the SCSI-3 PRIN commands to the device.
26714  *		Processes the read keys command response by copying the
26715  *		reservation key information into the user provided buffer.
26716  *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
26717  *
26718  *   Arguments: un   -  Pointer to soft state struct for the target.
26719  *		usrp -	user provided pointer to multihost Persistent In Read
26720  *			Keys structure (mhioc_inkeys_t)
26721  *		flag -	this argument is a pass through to ddi_copyxxx()
26722  *			directly from the mode argument of ioctl().
26723  *
26724  * Return Code: 0   - Success
26725  *		EACCES
26726  *		ENOTSUP
26727  *		errno return code from sd_send_scsi_cmd()
26728  *
26729  *     Context: Can sleep. Does not return until command is completed.
26730  */
26731 
26732 static int
26733 sd_persistent_reservation_in_read_keys(struct sd_lun *un,
26734     mhioc_inkeys_t *usrp, int flag)
26735 {
26736 #ifdef _MULTI_DATAMODEL
26737 	struct mhioc_key_list32	li32;
26738 #endif
26739 	sd_prin_readkeys_t	*in;
26740 	mhioc_inkeys_t		*ptr;
26741 	mhioc_key_list_t	li;
26742 	uchar_t			*data_bufp;
26743 	int 			data_len;
26744 	int			rval;
26745 	size_t			copysz;
26746 
26747 	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
26748 		return (EINVAL);
26749 	}
26750 	bzero(&li, sizeof (mhioc_key_list_t));
26751 
26752 	/*
26753 	 * Get the listsize from user
26754 	 */
26755 #ifdef _MULTI_DATAMODEL
26756 
26757 	switch (ddi_model_convert_from(flag & FMODELS)) {
26758 	case DDI_MODEL_ILP32:
26759 		copysz = sizeof (struct mhioc_key_list32);
26760 		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
26761 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26762 			    "sd_persistent_reservation_in_read_keys: "
26763 			    "failed ddi_copyin: mhioc_key_list32_t\n");
26764 			rval = EFAULT;
26765 			goto done;
26766 		}
26767 		li.listsize = li32.listsize;
26768 		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
26769 		break;
26770 
26771 	case DDI_MODEL_NONE:
26772 		copysz = sizeof (mhioc_key_list_t);
26773 		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26774 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26775 			    "sd_persistent_reservation_in_read_keys: "
26776 			    "failed ddi_copyin: mhioc_key_list_t\n");
26777 			rval = EFAULT;
26778 			goto done;
26779 		}
26780 		break;
26781 	}
26782 
26783 #else /* ! _MULTI_DATAMODEL */
26784 	copysz = sizeof (mhioc_key_list_t);
26785 	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26786 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26787 		    "sd_persistent_reservation_in_read_keys: "
26788 		    "failed ddi_copyin: mhioc_key_list_t\n");
26789 		rval = EFAULT;
26790 		goto done;
26791 	}
26792 #endif
26793 
26794 	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
26795 	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
26796 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
26797 
26798 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS,
26799 	    data_len, data_bufp)) != 0) {
26800 		goto done;
26801 	}
26802 	in = (sd_prin_readkeys_t *)data_bufp;
26803 	ptr->generation = BE_32(in->generation);
26804 	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
26805 
26806 	/*
26807 	 * Return the min(listsize, listlen) keys
26808 	 */
26809 #ifdef _MULTI_DATAMODEL
26810 
26811 	switch (ddi_model_convert_from(flag & FMODELS)) {
26812 	case DDI_MODEL_ILP32:
26813 		li32.listlen = li.listlen;
26814 		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
26815 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26816 			    "sd_persistent_reservation_in_read_keys: "
26817 			    "failed ddi_copyout: mhioc_key_list32_t\n");
26818 			rval = EFAULT;
26819 			goto done;
26820 		}
26821 		break;
26822 
26823 	case DDI_MODEL_NONE:
26824 		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
26825 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26826 			    "sd_persistent_reservation_in_read_keys: "
26827 			    "failed ddi_copyout: mhioc_key_list_t\n");
26828 			rval = EFAULT;
26829 			goto done;
26830 		}
26831 		break;
26832 	}
26833 
26834 #else /* ! _MULTI_DATAMODEL */
26835 
26836 	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
26837 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26838 		    "sd_persistent_reservation_in_read_keys: "
26839 		    "failed ddi_copyout: mhioc_key_list_t\n");
26840 		rval = EFAULT;
26841 		goto done;
26842 	}
26843 
26844 #endif /* _MULTI_DATAMODEL */
26845 
26846 	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
26847 	    li.listsize * MHIOC_RESV_KEY_SIZE);
26848 	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
26849 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26850 		    "sd_persistent_reservation_in_read_keys: "
26851 		    "failed ddi_copyout: keylist\n");
26852 		rval = EFAULT;
26853 	}
26854 done:
26855 	kmem_free(data_bufp, data_len);
26856 	return (rval);
26857 }
26858 
26859 
26860 /*
26861  *    Function: sd_persistent_reservation_in_read_resv
26862  *
26863  * Description: This routine is the driver entry point for handling CD-ROM
26864  *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
26865  *		by sending the SCSI-3 PRIN commands to the device.
26866  *		Process the read persistent reservations command response by
26867  *		copying the reservation information into the user provided
26868  *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
26869  *
26870  *   Arguments: un   -  Pointer to soft state struct for the target.
26871  *		usrp -	user provided pointer to multihost Persistent In Read
26872  *			Keys structure (mhioc_inkeys_t)
26873  *		flag -	this argument is a pass through to ddi_copyxxx()
26874  *			directly from the mode argument of ioctl().
26875  *
26876  * Return Code: 0   - Success
26877  *		EACCES
26878  *		ENOTSUP
26879  *		errno return code from sd_send_scsi_cmd()
26880  *
26881  *     Context: Can sleep. Does not return until command is completed.
26882  */
26883 
26884 static int
26885 sd_persistent_reservation_in_read_resv(struct sd_lun *un,
26886     mhioc_inresvs_t *usrp, int flag)
26887 {
26888 #ifdef _MULTI_DATAMODEL
26889 	struct mhioc_resv_desc_list32 resvlist32;
26890 #endif
26891 	sd_prin_readresv_t	*in;
26892 	mhioc_inresvs_t		*ptr;
26893 	sd_readresv_desc_t	*readresv_ptr;
26894 	mhioc_resv_desc_list_t	resvlist;
26895 	mhioc_resv_desc_t 	resvdesc;
26896 	uchar_t			*data_bufp;
26897 	int 			data_len;
26898 	int			rval;
26899 	int			i;
26900 	size_t			copysz;
26901 	mhioc_resv_desc_t	*bufp;
26902 
26903 	if ((ptr = usrp) == NULL) {
26904 		return (EINVAL);
26905 	}
26906 
26907 	/*
26908 	 * Get the listsize from user
26909 	 */
26910 #ifdef _MULTI_DATAMODEL
26911 	switch (ddi_model_convert_from(flag & FMODELS)) {
26912 	case DDI_MODEL_ILP32:
26913 		copysz = sizeof (struct mhioc_resv_desc_list32);
26914 		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
26915 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26916 			    "sd_persistent_reservation_in_read_resv: "
26917 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26918 			rval = EFAULT;
26919 			goto done;
26920 		}
26921 		resvlist.listsize = resvlist32.listsize;
26922 		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
26923 		break;
26924 
26925 	case DDI_MODEL_NONE:
26926 		copysz = sizeof (mhioc_resv_desc_list_t);
26927 		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
26928 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26929 			    "sd_persistent_reservation_in_read_resv: "
26930 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26931 			rval = EFAULT;
26932 			goto done;
26933 		}
26934 		break;
26935 	}
26936 #else /* ! _MULTI_DATAMODEL */
26937 	copysz = sizeof (mhioc_resv_desc_list_t);
26938 	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
26939 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26940 		    "sd_persistent_reservation_in_read_resv: "
26941 		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26942 		rval = EFAULT;
26943 		goto done;
26944 	}
26945 #endif /* ! _MULTI_DATAMODEL */
26946 
26947 	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
26948 	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
26949 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
26950 
26951 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_RESV,
26952 	    data_len, data_bufp)) != 0) {
26953 		goto done;
26954 	}
26955 	in = (sd_prin_readresv_t *)data_bufp;
26956 	ptr->generation = BE_32(in->generation);
26957 	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
26958 
26959 	/*
26960 	 * Return the min(listsize, listlen( keys
26961 	 */
26962 #ifdef _MULTI_DATAMODEL
26963 
26964 	switch (ddi_model_convert_from(flag & FMODELS)) {
26965 	case DDI_MODEL_ILP32:
26966 		resvlist32.listlen = resvlist.listlen;
26967 		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
26968 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26969 			    "sd_persistent_reservation_in_read_resv: "
26970 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26971 			rval = EFAULT;
26972 			goto done;
26973 		}
26974 		break;
26975 
26976 	case DDI_MODEL_NONE:
26977 		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
26978 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26979 			    "sd_persistent_reservation_in_read_resv: "
26980 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26981 			rval = EFAULT;
26982 			goto done;
26983 		}
26984 		break;
26985 	}
26986 
26987 #else /* ! _MULTI_DATAMODEL */
26988 
26989 	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
26990 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26991 		    "sd_persistent_reservation_in_read_resv: "
26992 		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26993 		rval = EFAULT;
26994 		goto done;
26995 	}
26996 
26997 #endif /* ! _MULTI_DATAMODEL */
26998 
26999 	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
27000 	bufp = resvlist.list;
27001 	copysz = sizeof (mhioc_resv_desc_t);
27002 	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
27003 	    i++, readresv_ptr++, bufp++) {
27004 
27005 		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
27006 		    MHIOC_RESV_KEY_SIZE);
27007 		resvdesc.type  = readresv_ptr->type;
27008 		resvdesc.scope = readresv_ptr->scope;
27009 		resvdesc.scope_specific_addr =
27010 		    BE_32(readresv_ptr->scope_specific_addr);
27011 
27012 		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
27013 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27014 			    "sd_persistent_reservation_in_read_resv: "
27015 			    "failed ddi_copyout: resvlist\n");
27016 			rval = EFAULT;
27017 			goto done;
27018 		}
27019 	}
27020 done:
27021 	kmem_free(data_bufp, data_len);
27022 	return (rval);
27023 }
27024 
27025 
27026 /*
27027  *    Function: sr_change_blkmode()
27028  *
27029  * Description: This routine is the driver entry point for handling CD-ROM
27030  *		block mode ioctl requests. Support for returning and changing
27031  *		the current block size in use by the device is implemented. The
27032  *		LBA size is changed via a MODE SELECT Block Descriptor.
27033  *
27034  *		This routine issues a mode sense with an allocation length of
27035  *		12 bytes for the mode page header and a single block descriptor.
27036  *
27037  *   Arguments: dev - the device 'dev_t'
27038  *		cmd - the request type; one of CDROMGBLKMODE (get) or
27039  *		      CDROMSBLKMODE (set)
27040  *		data - current block size or requested block size
27041  *		flag - this argument is a pass through to ddi_copyxxx() directly
27042  *		       from the mode argument of ioctl().
27043  *
27044  * Return Code: the code returned by sd_send_scsi_cmd()
27045  *		EINVAL if invalid arguments are provided
27046  *		EFAULT if ddi_copyxxx() fails
27047  *		ENXIO if fail ddi_get_soft_state
27048  *		EIO if invalid mode sense block descriptor length
27049  *
27050  */
27051 
27052 static int
27053 sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
27054 {
27055 	struct sd_lun			*un = NULL;
27056 	struct mode_header		*sense_mhp, *select_mhp;
27057 	struct block_descriptor		*sense_desc, *select_desc;
27058 	int				current_bsize;
27059 	int				rval = EINVAL;
27060 	uchar_t				*sense = NULL;
27061 	uchar_t				*select = NULL;
27062 
27063 	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
27064 
27065 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27066 		return (ENXIO);
27067 	}
27068 
27069 	/*
27070 	 * The block length is changed via the Mode Select block descriptor, the
27071 	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
27072 	 * required as part of this routine. Therefore the mode sense allocation
27073 	 * length is specified to be the length of a mode page header and a
27074 	 * block descriptor.
27075 	 */
27076 	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
27077 
27078 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
27079 	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD)) != 0) {
27080 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27081 		    "sr_change_blkmode: Mode Sense Failed\n");
27082 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27083 		return (rval);
27084 	}
27085 
27086 	/* Check the block descriptor len to handle only 1 block descriptor */
27087 	sense_mhp = (struct mode_header *)sense;
27088 	if ((sense_mhp->bdesc_length == 0) ||
27089 	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
27090 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27091 		    "sr_change_blkmode: Mode Sense returned invalid block"
27092 		    " descriptor length\n");
27093 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27094 		return (EIO);
27095 	}
27096 	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
27097 	current_bsize = ((sense_desc->blksize_hi << 16) |
27098 	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
27099 
27100 	/* Process command */
27101 	switch (cmd) {
27102 	case CDROMGBLKMODE:
27103 		/* Return the block size obtained during the mode sense */
27104 		if (ddi_copyout(&current_bsize, (void *)data,
27105 		    sizeof (int), flag) != 0)
27106 			rval = EFAULT;
27107 		break;
27108 	case CDROMSBLKMODE:
27109 		/* Validate the requested block size */
27110 		switch (data) {
27111 		case CDROM_BLK_512:
27112 		case CDROM_BLK_1024:
27113 		case CDROM_BLK_2048:
27114 		case CDROM_BLK_2056:
27115 		case CDROM_BLK_2336:
27116 		case CDROM_BLK_2340:
27117 		case CDROM_BLK_2352:
27118 		case CDROM_BLK_2368:
27119 		case CDROM_BLK_2448:
27120 		case CDROM_BLK_2646:
27121 		case CDROM_BLK_2647:
27122 			break;
27123 		default:
27124 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27125 			    "sr_change_blkmode: "
27126 			    "Block Size '%ld' Not Supported\n", data);
27127 			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27128 			return (EINVAL);
27129 		}
27130 
27131 		/*
27132 		 * The current block size matches the requested block size so
27133 		 * there is no need to send the mode select to change the size
27134 		 */
27135 		if (current_bsize == data) {
27136 			break;
27137 		}
27138 
27139 		/* Build the select data for the requested block size */
27140 		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
27141 		select_mhp = (struct mode_header *)select;
27142 		select_desc =
27143 		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
27144 		/*
27145 		 * The LBA size is changed via the block descriptor, so the
27146 		 * descriptor is built according to the user data
27147 		 */
27148 		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
27149 		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
27150 		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
27151 		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
27152 
27153 		/* Send the mode select for the requested block size */
27154 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
27155 		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
27156 		    SD_PATH_STANDARD)) != 0) {
27157 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27158 			    "sr_change_blkmode: Mode Select Failed\n");
27159 			/*
27160 			 * The mode select failed for the requested block size,
27161 			 * so reset the data for the original block size and
27162 			 * send it to the target. The error is indicated by the
27163 			 * return value for the failed mode select.
27164 			 */
27165 			select_desc->blksize_hi  = sense_desc->blksize_hi;
27166 			select_desc->blksize_mid = sense_desc->blksize_mid;
27167 			select_desc->blksize_lo  = sense_desc->blksize_lo;
27168 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
27169 			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
27170 			    SD_PATH_STANDARD);
27171 		} else {
27172 			ASSERT(!mutex_owned(SD_MUTEX(un)));
27173 			mutex_enter(SD_MUTEX(un));
27174 			sd_update_block_info(un, (uint32_t)data, 0);
27175 
27176 			mutex_exit(SD_MUTEX(un));
27177 		}
27178 		break;
27179 	default:
27180 		/* should not reach here, but check anyway */
27181 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27182 		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
27183 		rval = EINVAL;
27184 		break;
27185 	}
27186 
27187 	if (select) {
27188 		kmem_free(select, BUFLEN_CHG_BLK_MODE);
27189 	}
27190 	if (sense) {
27191 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27192 	}
27193 	return (rval);
27194 }
27195 
27196 
27197 /*
27198  * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
27199  * implement driver support for getting and setting the CD speed. The command
27200  * set used will be based on the device type. If the device has not been
27201  * identified as MMC the Toshiba vendor specific mode page will be used. If
27202  * the device is MMC but does not support the Real Time Streaming feature
27203  * the SET CD SPEED command will be used to set speed and mode page 0x2A will
27204  * be used to read the speed.
27205  */
27206 
27207 /*
27208  *    Function: sr_change_speed()
27209  *
27210  * Description: This routine is the driver entry point for handling CD-ROM
27211  *		drive speed ioctl requests for devices supporting the Toshiba
27212  *		vendor specific drive speed mode page. Support for returning
27213  *		and changing the current drive speed in use by the device is
27214  *		implemented.
27215  *
27216  *   Arguments: dev - the device 'dev_t'
27217  *		cmd - the request type; one of CDROMGDRVSPEED (get) or
27218  *		      CDROMSDRVSPEED (set)
27219  *		data - current drive speed or requested drive speed
27220  *		flag - this argument is a pass through to ddi_copyxxx() directly
27221  *		       from the mode argument of ioctl().
27222  *
27223  * Return Code: the code returned by sd_send_scsi_cmd()
27224  *		EINVAL if invalid arguments are provided
27225  *		EFAULT if ddi_copyxxx() fails
27226  *		ENXIO if fail ddi_get_soft_state
27227  *		EIO if invalid mode sense block descriptor length
27228  */
27229 
27230 static int
27231 sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27232 {
27233 	struct sd_lun			*un = NULL;
27234 	struct mode_header		*sense_mhp, *select_mhp;
27235 	struct mode_speed		*sense_page, *select_page;
27236 	int				current_speed;
27237 	int				rval = EINVAL;
27238 	int				bd_len;
27239 	uchar_t				*sense = NULL;
27240 	uchar_t				*select = NULL;
27241 
27242 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27243 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27244 		return (ENXIO);
27245 	}
27246 
27247 	/*
27248 	 * Note: The drive speed is being modified here according to a Toshiba
27249 	 * vendor specific mode page (0x31).
27250 	 */
27251 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27252 
27253 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
27254 	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
27255 	    SD_PATH_STANDARD)) != 0) {
27256 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27257 		    "sr_change_speed: Mode Sense Failed\n");
27258 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27259 		return (rval);
27260 	}
27261 	sense_mhp  = (struct mode_header *)sense;
27262 
27263 	/* Check the block descriptor len to handle only 1 block descriptor */
27264 	bd_len = sense_mhp->bdesc_length;
27265 	if (bd_len > MODE_BLK_DESC_LENGTH) {
27266 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27267 		    "sr_change_speed: Mode Sense returned invalid block "
27268 		    "descriptor length\n");
27269 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27270 		return (EIO);
27271 	}
27272 
27273 	sense_page = (struct mode_speed *)
27274 	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
27275 	current_speed = sense_page->speed;
27276 
27277 	/* Process command */
27278 	switch (cmd) {
27279 	case CDROMGDRVSPEED:
27280 		/* Return the drive speed obtained during the mode sense */
27281 		if (current_speed == 0x2) {
27282 			current_speed = CDROM_TWELVE_SPEED;
27283 		}
27284 		if (ddi_copyout(&current_speed, (void *)data,
27285 		    sizeof (int), flag) != 0) {
27286 			rval = EFAULT;
27287 		}
27288 		break;
27289 	case CDROMSDRVSPEED:
27290 		/* Validate the requested drive speed */
27291 		switch ((uchar_t)data) {
27292 		case CDROM_TWELVE_SPEED:
27293 			data = 0x2;
27294 			/*FALLTHROUGH*/
27295 		case CDROM_NORMAL_SPEED:
27296 		case CDROM_DOUBLE_SPEED:
27297 		case CDROM_QUAD_SPEED:
27298 		case CDROM_MAXIMUM_SPEED:
27299 			break;
27300 		default:
27301 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27302 			    "sr_change_speed: "
27303 			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
27304 			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27305 			return (EINVAL);
27306 		}
27307 
27308 		/*
27309 		 * The current drive speed matches the requested drive speed so
27310 		 * there is no need to send the mode select to change the speed
27311 		 */
27312 		if (current_speed == data) {
27313 			break;
27314 		}
27315 
27316 		/* Build the select data for the requested drive speed */
27317 		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27318 		select_mhp = (struct mode_header *)select;
27319 		select_mhp->bdesc_length = 0;
27320 		select_page =
27321 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27322 		select_page =
27323 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27324 		select_page->mode_page.code = CDROM_MODE_SPEED;
27325 		select_page->mode_page.length = 2;
27326 		select_page->speed = (uchar_t)data;
27327 
27328 		/* Send the mode select for the requested block size */
27329 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
27330 		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27331 		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
27332 			/*
27333 			 * The mode select failed for the requested drive speed,
27334 			 * so reset the data for the original drive speed and
27335 			 * send it to the target. The error is indicated by the
27336 			 * return value for the failed mode select.
27337 			 */
27338 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27339 			    "sr_drive_speed: Mode Select Failed\n");
27340 			select_page->speed = sense_page->speed;
27341 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
27342 			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27343 			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27344 		}
27345 		break;
27346 	default:
27347 		/* should not reach here, but check anyway */
27348 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27349 		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
27350 		rval = EINVAL;
27351 		break;
27352 	}
27353 
27354 	if (select) {
27355 		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
27356 	}
27357 	if (sense) {
27358 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27359 	}
27360 
27361 	return (rval);
27362 }
27363 
27364 
27365 /*
27366  *    Function: sr_atapi_change_speed()
27367  *
27368  * Description: This routine is the driver entry point for handling CD-ROM
27369  *		drive speed ioctl requests for MMC devices that do not support
27370  *		the Real Time Streaming feature (0x107).
27371  *
27372  *		Note: This routine will use the SET SPEED command which may not
27373  *		be supported by all devices.
27374  *
27375  *   Arguments: dev- the device 'dev_t'
27376  *		cmd- the request type; one of CDROMGDRVSPEED (get) or
27377  *		     CDROMSDRVSPEED (set)
27378  *		data- current drive speed or requested drive speed
27379  *		flag- this argument is a pass through to ddi_copyxxx() directly
27380  *		      from the mode argument of ioctl().
27381  *
27382  * Return Code: the code returned by sd_send_scsi_cmd()
27383  *		EINVAL if invalid arguments are provided
27384  *		EFAULT if ddi_copyxxx() fails
27385  *		ENXIO if fail ddi_get_soft_state
27386  *		EIO if invalid mode sense block descriptor length
27387  */
27388 
27389 static int
27390 sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27391 {
27392 	struct sd_lun			*un;
27393 	struct uscsi_cmd		*com = NULL;
27394 	struct mode_header_grp2		*sense_mhp;
27395 	uchar_t				*sense_page;
27396 	uchar_t				*sense = NULL;
27397 	char				cdb[CDB_GROUP5];
27398 	int				bd_len;
27399 	int				current_speed = 0;
27400 	int				max_speed = 0;
27401 	int				rval;
27402 
27403 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27404 
27405 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27406 		return (ENXIO);
27407 	}
27408 
27409 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
27410 
27411 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
27412 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
27413 	    SD_PATH_STANDARD)) != 0) {
27414 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27415 		    "sr_atapi_change_speed: Mode Sense Failed\n");
27416 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27417 		return (rval);
27418 	}
27419 
27420 	/* Check the block descriptor len to handle only 1 block descriptor */
27421 	sense_mhp = (struct mode_header_grp2 *)sense;
27422 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
27423 	if (bd_len > MODE_BLK_DESC_LENGTH) {
27424 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27425 		    "sr_atapi_change_speed: Mode Sense returned invalid "
27426 		    "block descriptor length\n");
27427 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27428 		return (EIO);
27429 	}
27430 
27431 	/* Calculate the current and maximum drive speeds */
27432 	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
27433 	current_speed = (sense_page[14] << 8) | sense_page[15];
27434 	max_speed = (sense_page[8] << 8) | sense_page[9];
27435 
27436 	/* Process the command */
27437 	switch (cmd) {
27438 	case CDROMGDRVSPEED:
27439 		current_speed /= SD_SPEED_1X;
27440 		if (ddi_copyout(&current_speed, (void *)data,
27441 		    sizeof (int), flag) != 0)
27442 			rval = EFAULT;
27443 		break;
27444 	case CDROMSDRVSPEED:
27445 		/* Convert the speed code to KB/sec */
27446 		switch ((uchar_t)data) {
27447 		case CDROM_NORMAL_SPEED:
27448 			current_speed = SD_SPEED_1X;
27449 			break;
27450 		case CDROM_DOUBLE_SPEED:
27451 			current_speed = 2 * SD_SPEED_1X;
27452 			break;
27453 		case CDROM_QUAD_SPEED:
27454 			current_speed = 4 * SD_SPEED_1X;
27455 			break;
27456 		case CDROM_TWELVE_SPEED:
27457 			current_speed = 12 * SD_SPEED_1X;
27458 			break;
27459 		case CDROM_MAXIMUM_SPEED:
27460 			current_speed = 0xffff;
27461 			break;
27462 		default:
27463 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27464 			    "sr_atapi_change_speed: invalid drive speed %d\n",
27465 			    (uchar_t)data);
27466 			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27467 			return (EINVAL);
27468 		}
27469 
27470 		/* Check the request against the drive's max speed. */
27471 		if (current_speed != 0xffff) {
27472 			if (current_speed > max_speed) {
27473 				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27474 				return (EINVAL);
27475 			}
27476 		}
27477 
27478 		/*
27479 		 * Build and send the SET SPEED command
27480 		 *
27481 		 * Note: The SET SPEED (0xBB) command used in this routine is
27482 		 * obsolete per the SCSI MMC spec but still supported in the
27483 		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
27484 		 * therefore the command is still implemented in this routine.
27485 		 */
27486 		bzero(cdb, sizeof (cdb));
27487 		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
27488 		cdb[2] = (uchar_t)(current_speed >> 8);
27489 		cdb[3] = (uchar_t)current_speed;
27490 		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27491 		com->uscsi_cdb	   = (caddr_t)cdb;
27492 		com->uscsi_cdblen  = CDB_GROUP5;
27493 		com->uscsi_bufaddr = NULL;
27494 		com->uscsi_buflen  = 0;
27495 		com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT;
27496 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, 0,
27497 		    UIO_SYSSPACE, SD_PATH_STANDARD);
27498 		break;
27499 	default:
27500 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27501 		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
27502 		rval = EINVAL;
27503 	}
27504 
27505 	if (sense) {
27506 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27507 	}
27508 	if (com) {
27509 		kmem_free(com, sizeof (*com));
27510 	}
27511 	return (rval);
27512 }
27513 
27514 
27515 /*
27516  *    Function: sr_pause_resume()
27517  *
27518  * Description: This routine is the driver entry point for handling CD-ROM
27519  *		pause/resume ioctl requests. This only affects the audio play
27520  *		operation.
27521  *
27522  *   Arguments: dev - the device 'dev_t'
27523  *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
27524  *		      for setting the resume bit of the cdb.
27525  *
27526  * Return Code: the code returned by sd_send_scsi_cmd()
27527  *		EINVAL if invalid mode specified
27528  *
27529  */
27530 
27531 static int
27532 sr_pause_resume(dev_t dev, int cmd)
27533 {
27534 	struct sd_lun		*un;
27535 	struct uscsi_cmd	*com;
27536 	char			cdb[CDB_GROUP1];
27537 	int			rval;
27538 
27539 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27540 		return (ENXIO);
27541 	}
27542 
27543 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27544 	bzero(cdb, CDB_GROUP1);
27545 	cdb[0] = SCMD_PAUSE_RESUME;
27546 	switch (cmd) {
27547 	case CDROMRESUME:
27548 		cdb[8] = 1;
27549 		break;
27550 	case CDROMPAUSE:
27551 		cdb[8] = 0;
27552 		break;
27553 	default:
27554 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
27555 		    " Command '%x' Not Supported\n", cmd);
27556 		rval = EINVAL;
27557 		goto done;
27558 	}
27559 
27560 	com->uscsi_cdb    = cdb;
27561 	com->uscsi_cdblen = CDB_GROUP1;
27562 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27563 
27564 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27565 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27566 
27567 done:
27568 	kmem_free(com, sizeof (*com));
27569 	return (rval);
27570 }
27571 
27572 
27573 /*
27574  *    Function: sr_play_msf()
27575  *
27576  * Description: This routine is the driver entry point for handling CD-ROM
27577  *		ioctl requests to output the audio signals at the specified
27578  *		starting address and continue the audio play until the specified
27579  *		ending address (CDROMPLAYMSF) The address is in Minute Second
27580  *		Frame (MSF) format.
27581  *
27582  *   Arguments: dev	- the device 'dev_t'
27583  *		data	- pointer to user provided audio msf structure,
27584  *		          specifying start/end addresses.
27585  *		flag	- this argument is a pass through to ddi_copyxxx()
27586  *		          directly from the mode argument of ioctl().
27587  *
27588  * Return Code: the code returned by sd_send_scsi_cmd()
27589  *		EFAULT if ddi_copyxxx() fails
27590  *		ENXIO if fail ddi_get_soft_state
27591  *		EINVAL if data pointer is NULL
27592  */
27593 
27594 static int
27595 sr_play_msf(dev_t dev, caddr_t data, int flag)
27596 {
27597 	struct sd_lun		*un;
27598 	struct uscsi_cmd	*com;
27599 	struct cdrom_msf	msf_struct;
27600 	struct cdrom_msf	*msf = &msf_struct;
27601 	char			cdb[CDB_GROUP1];
27602 	int			rval;
27603 
27604 	if (data == NULL) {
27605 		return (EINVAL);
27606 	}
27607 
27608 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27609 		return (ENXIO);
27610 	}
27611 
27612 	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
27613 		return (EFAULT);
27614 	}
27615 
27616 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27617 	bzero(cdb, CDB_GROUP1);
27618 	cdb[0] = SCMD_PLAYAUDIO_MSF;
27619 	if (un->un_f_cfg_playmsf_bcd == TRUE) {
27620 		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
27621 		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
27622 		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
27623 		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
27624 		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
27625 		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
27626 	} else {
27627 		cdb[3] = msf->cdmsf_min0;
27628 		cdb[4] = msf->cdmsf_sec0;
27629 		cdb[5] = msf->cdmsf_frame0;
27630 		cdb[6] = msf->cdmsf_min1;
27631 		cdb[7] = msf->cdmsf_sec1;
27632 		cdb[8] = msf->cdmsf_frame1;
27633 	}
27634 	com->uscsi_cdb    = cdb;
27635 	com->uscsi_cdblen = CDB_GROUP1;
27636 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27637 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27638 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27639 	kmem_free(com, sizeof (*com));
27640 	return (rval);
27641 }
27642 
27643 
27644 /*
27645  *    Function: sr_play_trkind()
27646  *
27647  * Description: This routine is the driver entry point for handling CD-ROM
27648  *		ioctl requests to output the audio signals at the specified
27649  *		starting address and continue the audio play until the specified
27650  *		ending address (CDROMPLAYTRKIND). The address is in Track Index
27651  *		format.
27652  *
27653  *   Arguments: dev	- the device 'dev_t'
27654  *		data	- pointer to user provided audio track/index structure,
27655  *		          specifying start/end addresses.
27656  *		flag	- this argument is a pass through to ddi_copyxxx()
27657  *		          directly from the mode argument of ioctl().
27658  *
27659  * Return Code: the code returned by sd_send_scsi_cmd()
27660  *		EFAULT if ddi_copyxxx() fails
27661  *		ENXIO if fail ddi_get_soft_state
27662  *		EINVAL if data pointer is NULL
27663  */
27664 
27665 static int
27666 sr_play_trkind(dev_t dev, caddr_t data, int flag)
27667 {
27668 	struct cdrom_ti		ti_struct;
27669 	struct cdrom_ti		*ti = &ti_struct;
27670 	struct uscsi_cmd	*com = NULL;
27671 	char			cdb[CDB_GROUP1];
27672 	int			rval;
27673 
27674 	if (data == NULL) {
27675 		return (EINVAL);
27676 	}
27677 
27678 	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
27679 		return (EFAULT);
27680 	}
27681 
27682 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27683 	bzero(cdb, CDB_GROUP1);
27684 	cdb[0] = SCMD_PLAYAUDIO_TI;
27685 	cdb[4] = ti->cdti_trk0;
27686 	cdb[5] = ti->cdti_ind0;
27687 	cdb[7] = ti->cdti_trk1;
27688 	cdb[8] = ti->cdti_ind1;
27689 	com->uscsi_cdb    = cdb;
27690 	com->uscsi_cdblen = CDB_GROUP1;
27691 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27692 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27693 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27694 	kmem_free(com, sizeof (*com));
27695 	return (rval);
27696 }
27697 
27698 
27699 /*
27700  *    Function: sr_read_all_subcodes()
27701  *
27702  * Description: This routine is the driver entry point for handling CD-ROM
27703  *		ioctl requests to return raw subcode data while the target is
27704  *		playing audio (CDROMSUBCODE).
27705  *
27706  *   Arguments: dev	- the device 'dev_t'
27707  *		data	- pointer to user provided cdrom subcode structure,
27708  *		          specifying the transfer length and address.
27709  *		flag	- this argument is a pass through to ddi_copyxxx()
27710  *		          directly from the mode argument of ioctl().
27711  *
27712  * Return Code: the code returned by sd_send_scsi_cmd()
27713  *		EFAULT if ddi_copyxxx() fails
27714  *		ENXIO if fail ddi_get_soft_state
27715  *		EINVAL if data pointer is NULL
27716  */
27717 
27718 static int
27719 sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
27720 {
27721 	struct sd_lun		*un = NULL;
27722 	struct uscsi_cmd	*com = NULL;
27723 	struct cdrom_subcode	*subcode = NULL;
27724 	int			rval;
27725 	size_t			buflen;
27726 	char			cdb[CDB_GROUP5];
27727 
27728 #ifdef _MULTI_DATAMODEL
27729 	/* To support ILP32 applications in an LP64 world */
27730 	struct cdrom_subcode32		cdrom_subcode32;
27731 	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
27732 #endif
27733 	if (data == NULL) {
27734 		return (EINVAL);
27735 	}
27736 
27737 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27738 		return (ENXIO);
27739 	}
27740 
27741 	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
27742 
27743 #ifdef _MULTI_DATAMODEL
27744 	switch (ddi_model_convert_from(flag & FMODELS)) {
27745 	case DDI_MODEL_ILP32:
27746 		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
27747 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27748 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27749 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27750 			return (EFAULT);
27751 		}
27752 		/* Convert the ILP32 uscsi data from the application to LP64 */
27753 		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
27754 		break;
27755 	case DDI_MODEL_NONE:
27756 		if (ddi_copyin(data, subcode,
27757 		    sizeof (struct cdrom_subcode), flag)) {
27758 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27759 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27760 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27761 			return (EFAULT);
27762 		}
27763 		break;
27764 	}
27765 #else /* ! _MULTI_DATAMODEL */
27766 	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
27767 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27768 		    "sr_read_all_subcodes: ddi_copyin Failed\n");
27769 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27770 		return (EFAULT);
27771 	}
27772 #endif /* _MULTI_DATAMODEL */
27773 
27774 	/*
27775 	 * Since MMC-2 expects max 3 bytes for length, check if the
27776 	 * length input is greater than 3 bytes
27777 	 */
27778 	if ((subcode->cdsc_length & 0xFF000000) != 0) {
27779 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27780 		    "sr_read_all_subcodes: "
27781 		    "cdrom transfer length too large: %d (limit %d)\n",
27782 		    subcode->cdsc_length, 0xFFFFFF);
27783 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27784 		return (EINVAL);
27785 	}
27786 
27787 	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
27788 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27789 	bzero(cdb, CDB_GROUP5);
27790 
27791 	if (un->un_f_mmc_cap == TRUE) {
27792 		cdb[0] = (char)SCMD_READ_CD;
27793 		cdb[2] = (char)0xff;
27794 		cdb[3] = (char)0xff;
27795 		cdb[4] = (char)0xff;
27796 		cdb[5] = (char)0xff;
27797 		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
27798 		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
27799 		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
27800 		cdb[10] = 1;
27801 	} else {
27802 		/*
27803 		 * Note: A vendor specific command (0xDF) is being used her to
27804 		 * request a read of all subcodes.
27805 		 */
27806 		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
27807 		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
27808 		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
27809 		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
27810 		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
27811 	}
27812 	com->uscsi_cdb	   = cdb;
27813 	com->uscsi_cdblen  = CDB_GROUP5;
27814 	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
27815 	com->uscsi_buflen  = buflen;
27816 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27817 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
27818 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27819 	kmem_free(subcode, sizeof (struct cdrom_subcode));
27820 	kmem_free(com, sizeof (*com));
27821 	return (rval);
27822 }
27823 
27824 
27825 /*
27826  *    Function: sr_read_subchannel()
27827  *
27828  * Description: This routine is the driver entry point for handling CD-ROM
27829  *		ioctl requests to return the Q sub-channel data of the CD
27830  *		current position block. (CDROMSUBCHNL) The data includes the
27831  *		track number, index number, absolute CD-ROM address (LBA or MSF
27832  *		format per the user) , track relative CD-ROM address (LBA or MSF
27833  *		format per the user), control data and audio status.
27834  *
27835  *   Arguments: dev	- the device 'dev_t'
27836  *		data	- pointer to user provided cdrom sub-channel structure
27837  *		flag	- this argument is a pass through to ddi_copyxxx()
27838  *		          directly from the mode argument of ioctl().
27839  *
27840  * Return Code: the code returned by sd_send_scsi_cmd()
27841  *		EFAULT if ddi_copyxxx() fails
27842  *		ENXIO if fail ddi_get_soft_state
27843  *		EINVAL if data pointer is NULL
27844  */
27845 
27846 static int
27847 sr_read_subchannel(dev_t dev, caddr_t data, int flag)
27848 {
27849 	struct sd_lun		*un;
27850 	struct uscsi_cmd	*com;
27851 	struct cdrom_subchnl	subchanel;
27852 	struct cdrom_subchnl	*subchnl = &subchanel;
27853 	char			cdb[CDB_GROUP1];
27854 	caddr_t			buffer;
27855 	int			rval;
27856 
27857 	if (data == NULL) {
27858 		return (EINVAL);
27859 	}
27860 
27861 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27862 	    (un->un_state == SD_STATE_OFFLINE)) {
27863 		return (ENXIO);
27864 	}
27865 
27866 	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
27867 		return (EFAULT);
27868 	}
27869 
27870 	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
27871 	bzero(cdb, CDB_GROUP1);
27872 	cdb[0] = SCMD_READ_SUBCHANNEL;
27873 	/* Set the MSF bit based on the user requested address format */
27874 	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
27875 	/*
27876 	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
27877 	 * returned
27878 	 */
27879 	cdb[2] = 0x40;
27880 	/*
27881 	 * Set byte 3 to specify the return data format. A value of 0x01
27882 	 * indicates that the CD-ROM current position should be returned.
27883 	 */
27884 	cdb[3] = 0x01;
27885 	cdb[8] = 0x10;
27886 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27887 	com->uscsi_cdb	   = cdb;
27888 	com->uscsi_cdblen  = CDB_GROUP1;
27889 	com->uscsi_bufaddr = buffer;
27890 	com->uscsi_buflen  = 16;
27891 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27892 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27893 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27894 	if (rval != 0) {
27895 		kmem_free(buffer, 16);
27896 		kmem_free(com, sizeof (*com));
27897 		return (rval);
27898 	}
27899 
27900 	/* Process the returned Q sub-channel data */
27901 	subchnl->cdsc_audiostatus = buffer[1];
27902 	subchnl->cdsc_adr	= (buffer[5] & 0xF0);
27903 	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
27904 	subchnl->cdsc_trk	= buffer[6];
27905 	subchnl->cdsc_ind	= buffer[7];
27906 	if (subchnl->cdsc_format & CDROM_LBA) {
27907 		subchnl->cdsc_absaddr.lba =
27908 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
27909 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
27910 		subchnl->cdsc_reladdr.lba =
27911 		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
27912 		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
27913 	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
27914 		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
27915 		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
27916 		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
27917 		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
27918 		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
27919 		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
27920 	} else {
27921 		subchnl->cdsc_absaddr.msf.minute = buffer[9];
27922 		subchnl->cdsc_absaddr.msf.second = buffer[10];
27923 		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
27924 		subchnl->cdsc_reladdr.msf.minute = buffer[13];
27925 		subchnl->cdsc_reladdr.msf.second = buffer[14];
27926 		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
27927 	}
27928 	kmem_free(buffer, 16);
27929 	kmem_free(com, sizeof (*com));
27930 	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
27931 	    != 0) {
27932 		return (EFAULT);
27933 	}
27934 	return (rval);
27935 }
27936 
27937 
27938 /*
27939  *    Function: sr_read_tocentry()
27940  *
27941  * Description: This routine is the driver entry point for handling CD-ROM
27942  *		ioctl requests to read from the Table of Contents (TOC)
27943  *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
27944  *		fields, the starting address (LBA or MSF format per the user)
27945  *		and the data mode if the user specified track is a data track.
27946  *
27947  *		Note: The READ HEADER (0x44) command used in this routine is
27948  *		obsolete per the SCSI MMC spec but still supported in the
27949  *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
27950  *		therefore the command is still implemented in this routine.
27951  *
27952  *   Arguments: dev	- the device 'dev_t'
27953  *		data	- pointer to user provided toc entry structure,
27954  *			  specifying the track # and the address format
27955  *			  (LBA or MSF).
27956  *		flag	- this argument is a pass through to ddi_copyxxx()
27957  *		          directly from the mode argument of ioctl().
27958  *
27959  * Return Code: the code returned by sd_send_scsi_cmd()
27960  *		EFAULT if ddi_copyxxx() fails
27961  *		ENXIO if fail ddi_get_soft_state
27962  *		EINVAL if data pointer is NULL
27963  */
27964 
27965 static int
27966 sr_read_tocentry(dev_t dev, caddr_t data, int flag)
27967 {
27968 	struct sd_lun		*un = NULL;
27969 	struct uscsi_cmd	*com;
27970 	struct cdrom_tocentry	toc_entry;
27971 	struct cdrom_tocentry	*entry = &toc_entry;
27972 	caddr_t			buffer;
27973 	int			rval;
27974 	char			cdb[CDB_GROUP1];
27975 
27976 	if (data == NULL) {
27977 		return (EINVAL);
27978 	}
27979 
27980 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27981 	    (un->un_state == SD_STATE_OFFLINE)) {
27982 		return (ENXIO);
27983 	}
27984 
27985 	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
27986 		return (EFAULT);
27987 	}
27988 
27989 	/* Validate the requested track and address format */
27990 	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
27991 		return (EINVAL);
27992 	}
27993 
27994 	if (entry->cdte_track == 0) {
27995 		return (EINVAL);
27996 	}
27997 
27998 	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
27999 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28000 	bzero(cdb, CDB_GROUP1);
28001 
28002 	cdb[0] = SCMD_READ_TOC;
28003 	/* Set the MSF bit based on the user requested address format  */
28004 	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
28005 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
28006 		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
28007 	} else {
28008 		cdb[6] = entry->cdte_track;
28009 	}
28010 
28011 	/*
28012 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
28013 	 * (4 byte TOC response header + 8 byte track descriptor)
28014 	 */
28015 	cdb[8] = 12;
28016 	com->uscsi_cdb	   = cdb;
28017 	com->uscsi_cdblen  = CDB_GROUP1;
28018 	com->uscsi_bufaddr = buffer;
28019 	com->uscsi_buflen  = 0x0C;
28020 	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
28021 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28022 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28023 	if (rval != 0) {
28024 		kmem_free(buffer, 12);
28025 		kmem_free(com, sizeof (*com));
28026 		return (rval);
28027 	}
28028 
28029 	/* Process the toc entry */
28030 	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
28031 	entry->cdte_ctrl	= (buffer[5] & 0x0F);
28032 	if (entry->cdte_format & CDROM_LBA) {
28033 		entry->cdte_addr.lba =
28034 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28035 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28036 	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
28037 		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
28038 		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
28039 		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
28040 		/*
28041 		 * Send a READ TOC command using the LBA address format to get
28042 		 * the LBA for the track requested so it can be used in the
28043 		 * READ HEADER request
28044 		 *
28045 		 * Note: The MSF bit of the READ HEADER command specifies the
28046 		 * output format. The block address specified in that command
28047 		 * must be in LBA format.
28048 		 */
28049 		cdb[1] = 0;
28050 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28051 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28052 		if (rval != 0) {
28053 			kmem_free(buffer, 12);
28054 			kmem_free(com, sizeof (*com));
28055 			return (rval);
28056 		}
28057 	} else {
28058 		entry->cdte_addr.msf.minute	= buffer[9];
28059 		entry->cdte_addr.msf.second	= buffer[10];
28060 		entry->cdte_addr.msf.frame	= buffer[11];
28061 		/*
28062 		 * Send a READ TOC command using the LBA address format to get
28063 		 * the LBA for the track requested so it can be used in the
28064 		 * READ HEADER request
28065 		 *
28066 		 * Note: The MSF bit of the READ HEADER command specifies the
28067 		 * output format. The block address specified in that command
28068 		 * must be in LBA format.
28069 		 */
28070 		cdb[1] = 0;
28071 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28072 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28073 		if (rval != 0) {
28074 			kmem_free(buffer, 12);
28075 			kmem_free(com, sizeof (*com));
28076 			return (rval);
28077 		}
28078 	}
28079 
28080 	/*
28081 	 * Build and send the READ HEADER command to determine the data mode of
28082 	 * the user specified track.
28083 	 */
28084 	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
28085 	    (entry->cdte_track != CDROM_LEADOUT)) {
28086 		bzero(cdb, CDB_GROUP1);
28087 		cdb[0] = SCMD_READ_HEADER;
28088 		cdb[2] = buffer[8];
28089 		cdb[3] = buffer[9];
28090 		cdb[4] = buffer[10];
28091 		cdb[5] = buffer[11];
28092 		cdb[8] = 0x08;
28093 		com->uscsi_buflen = 0x08;
28094 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28095 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28096 		if (rval == 0) {
28097 			entry->cdte_datamode = buffer[0];
28098 		} else {
28099 			/*
28100 			 * READ HEADER command failed, since this is
28101 			 * obsoleted in one spec, its better to return
28102 			 * -1 for an invlid track so that we can still
28103 			 * recieve the rest of the TOC data.
28104 			 */
28105 			entry->cdte_datamode = (uchar_t)-1;
28106 		}
28107 	} else {
28108 		entry->cdte_datamode = (uchar_t)-1;
28109 	}
28110 
28111 	kmem_free(buffer, 12);
28112 	kmem_free(com, sizeof (*com));
28113 	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
28114 		return (EFAULT);
28115 
28116 	return (rval);
28117 }
28118 
28119 
28120 /*
28121  *    Function: sr_read_tochdr()
28122  *
28123  * Description: This routine is the driver entry point for handling CD-ROM
28124  * 		ioctl requests to read the Table of Contents (TOC) header
28125  *		(CDROMREADTOHDR). The TOC header consists of the disk starting
28126  *		and ending track numbers
28127  *
28128  *   Arguments: dev	- the device 'dev_t'
28129  *		data	- pointer to user provided toc header structure,
28130  *			  specifying the starting and ending track numbers.
28131  *		flag	- this argument is a pass through to ddi_copyxxx()
28132  *			  directly from the mode argument of ioctl().
28133  *
28134  * Return Code: the code returned by sd_send_scsi_cmd()
28135  *		EFAULT if ddi_copyxxx() fails
28136  *		ENXIO if fail ddi_get_soft_state
28137  *		EINVAL if data pointer is NULL
28138  */
28139 
28140 static int
28141 sr_read_tochdr(dev_t dev, caddr_t data, int flag)
28142 {
28143 	struct sd_lun		*un;
28144 	struct uscsi_cmd	*com;
28145 	struct cdrom_tochdr	toc_header;
28146 	struct cdrom_tochdr	*hdr = &toc_header;
28147 	char			cdb[CDB_GROUP1];
28148 	int			rval;
28149 	caddr_t			buffer;
28150 
28151 	if (data == NULL) {
28152 		return (EINVAL);
28153 	}
28154 
28155 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28156 	    (un->un_state == SD_STATE_OFFLINE)) {
28157 		return (ENXIO);
28158 	}
28159 
28160 	buffer = kmem_zalloc(4, KM_SLEEP);
28161 	bzero(cdb, CDB_GROUP1);
28162 	cdb[0] = SCMD_READ_TOC;
28163 	/*
28164 	 * Specifying a track number of 0x00 in the READ TOC command indicates
28165 	 * that the TOC header should be returned
28166 	 */
28167 	cdb[6] = 0x00;
28168 	/*
28169 	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
28170 	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
28171 	 */
28172 	cdb[8] = 0x04;
28173 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28174 	com->uscsi_cdb	   = cdb;
28175 	com->uscsi_cdblen  = CDB_GROUP1;
28176 	com->uscsi_bufaddr = buffer;
28177 	com->uscsi_buflen  = 0x04;
28178 	com->uscsi_timeout = 300;
28179 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28180 
28181 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28182 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28183 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
28184 		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
28185 		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
28186 	} else {
28187 		hdr->cdth_trk0 = buffer[2];
28188 		hdr->cdth_trk1 = buffer[3];
28189 	}
28190 	kmem_free(buffer, 4);
28191 	kmem_free(com, sizeof (*com));
28192 	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
28193 		return (EFAULT);
28194 	}
28195 	return (rval);
28196 }
28197 
28198 
28199 /*
28200  * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
28201  * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
28202  * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
28203  * digital audio and extended architecture digital audio. These modes are
28204  * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
28205  * MMC specs.
28206  *
28207  * In addition to support for the various data formats these routines also
28208  * include support for devices that implement only the direct access READ
28209  * commands (0x08, 0x28), devices that implement the READ_CD commands
28210  * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
28211  * READ CDXA commands (0xD8, 0xDB)
28212  */
28213 
28214 /*
28215  *    Function: sr_read_mode1()
28216  *
28217  * Description: This routine is the driver entry point for handling CD-ROM
28218  *		ioctl read mode1 requests (CDROMREADMODE1).
28219  *
28220  *   Arguments: dev	- the device 'dev_t'
28221  *		data	- pointer to user provided cd read structure specifying
28222  *			  the lba buffer address and length.
28223  *		flag	- this argument is a pass through to ddi_copyxxx()
28224  *			  directly from the mode argument of ioctl().
28225  *
28226  * Return Code: the code returned by sd_send_scsi_cmd()
28227  *		EFAULT if ddi_copyxxx() fails
28228  *		ENXIO if fail ddi_get_soft_state
28229  *		EINVAL if data pointer is NULL
28230  */
28231 
28232 static int
28233 sr_read_mode1(dev_t dev, caddr_t data, int flag)
28234 {
28235 	struct sd_lun		*un;
28236 	struct cdrom_read	mode1_struct;
28237 	struct cdrom_read	*mode1 = &mode1_struct;
28238 	int			rval;
28239 #ifdef _MULTI_DATAMODEL
28240 	/* To support ILP32 applications in an LP64 world */
28241 	struct cdrom_read32	cdrom_read32;
28242 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28243 #endif /* _MULTI_DATAMODEL */
28244 
28245 	if (data == NULL) {
28246 		return (EINVAL);
28247 	}
28248 
28249 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28250 	    (un->un_state == SD_STATE_OFFLINE)) {
28251 		return (ENXIO);
28252 	}
28253 
28254 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28255 	    "sd_read_mode1: entry: un:0x%p\n", un);
28256 
28257 #ifdef _MULTI_DATAMODEL
28258 	switch (ddi_model_convert_from(flag & FMODELS)) {
28259 	case DDI_MODEL_ILP32:
28260 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28261 			return (EFAULT);
28262 		}
28263 		/* Convert the ILP32 uscsi data from the application to LP64 */
28264 		cdrom_read32tocdrom_read(cdrd32, mode1);
28265 		break;
28266 	case DDI_MODEL_NONE:
28267 		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28268 			return (EFAULT);
28269 		}
28270 	}
28271 #else /* ! _MULTI_DATAMODEL */
28272 	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28273 		return (EFAULT);
28274 	}
28275 #endif /* _MULTI_DATAMODEL */
28276 
28277 	rval = sd_send_scsi_READ(un, mode1->cdread_bufaddr,
28278 	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
28279 
28280 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28281 	    "sd_read_mode1: exit: un:0x%p\n", un);
28282 
28283 	return (rval);
28284 }
28285 
28286 
28287 /*
28288  *    Function: sr_read_cd_mode2()
28289  *
28290  * Description: This routine is the driver entry point for handling CD-ROM
28291  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28292  *		support the READ CD (0xBE) command or the 1st generation
28293  *		READ CD (0xD4) command.
28294  *
28295  *   Arguments: dev	- the device 'dev_t'
28296  *		data	- pointer to user provided cd read structure specifying
28297  *			  the lba buffer address and length.
28298  *		flag	- this argument is a pass through to ddi_copyxxx()
28299  *			  directly from the mode argument of ioctl().
28300  *
28301  * Return Code: the code returned by sd_send_scsi_cmd()
28302  *		EFAULT if ddi_copyxxx() fails
28303  *		ENXIO if fail ddi_get_soft_state
28304  *		EINVAL if data pointer is NULL
28305  */
28306 
28307 static int
28308 sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
28309 {
28310 	struct sd_lun		*un;
28311 	struct uscsi_cmd	*com;
28312 	struct cdrom_read	mode2_struct;
28313 	struct cdrom_read	*mode2 = &mode2_struct;
28314 	uchar_t			cdb[CDB_GROUP5];
28315 	int			nblocks;
28316 	int			rval;
28317 #ifdef _MULTI_DATAMODEL
28318 	/*  To support ILP32 applications in an LP64 world */
28319 	struct cdrom_read32	cdrom_read32;
28320 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28321 #endif /* _MULTI_DATAMODEL */
28322 
28323 	if (data == NULL) {
28324 		return (EINVAL);
28325 	}
28326 
28327 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28328 	    (un->un_state == SD_STATE_OFFLINE)) {
28329 		return (ENXIO);
28330 	}
28331 
28332 #ifdef _MULTI_DATAMODEL
28333 	switch (ddi_model_convert_from(flag & FMODELS)) {
28334 	case DDI_MODEL_ILP32:
28335 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28336 			return (EFAULT);
28337 		}
28338 		/* Convert the ILP32 uscsi data from the application to LP64 */
28339 		cdrom_read32tocdrom_read(cdrd32, mode2);
28340 		break;
28341 	case DDI_MODEL_NONE:
28342 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28343 			return (EFAULT);
28344 		}
28345 		break;
28346 	}
28347 
28348 #else /* ! _MULTI_DATAMODEL */
28349 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28350 		return (EFAULT);
28351 	}
28352 #endif /* _MULTI_DATAMODEL */
28353 
28354 	bzero(cdb, sizeof (cdb));
28355 	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
28356 		/* Read command supported by 1st generation atapi drives */
28357 		cdb[0] = SCMD_READ_CDD4;
28358 	} else {
28359 		/* Universal CD Access Command */
28360 		cdb[0] = SCMD_READ_CD;
28361 	}
28362 
28363 	/*
28364 	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
28365 	 */
28366 	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
28367 
28368 	/* set the start address */
28369 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
28370 	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
28371 	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28372 	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
28373 
28374 	/* set the transfer length */
28375 	nblocks = mode2->cdread_buflen / 2336;
28376 	cdb[6] = (uchar_t)(nblocks >> 16);
28377 	cdb[7] = (uchar_t)(nblocks >> 8);
28378 	cdb[8] = (uchar_t)nblocks;
28379 
28380 	/* set the filter bits */
28381 	cdb[9] = CDROM_READ_CD_USERDATA;
28382 
28383 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28384 	com->uscsi_cdb = (caddr_t)cdb;
28385 	com->uscsi_cdblen = sizeof (cdb);
28386 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28387 	com->uscsi_buflen = mode2->cdread_buflen;
28388 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28389 
28390 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28391 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28392 	kmem_free(com, sizeof (*com));
28393 	return (rval);
28394 }
28395 
28396 
28397 /*
28398  *    Function: sr_read_mode2()
28399  *
28400  * Description: This routine is the driver entry point for handling CD-ROM
28401  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28402  *		do not support the READ CD (0xBE) command.
28403  *
28404  *   Arguments: dev	- the device 'dev_t'
28405  *		data	- pointer to user provided cd read structure specifying
28406  *			  the lba buffer address and length.
28407  *		flag	- this argument is a pass through to ddi_copyxxx()
28408  *			  directly from the mode argument of ioctl().
28409  *
28410  * Return Code: the code returned by sd_send_scsi_cmd()
28411  *		EFAULT if ddi_copyxxx() fails
28412  *		ENXIO if fail ddi_get_soft_state
28413  *		EINVAL if data pointer is NULL
28414  *		EIO if fail to reset block size
28415  *		EAGAIN if commands are in progress in the driver
28416  */
28417 
28418 static int
28419 sr_read_mode2(dev_t dev, caddr_t data, int flag)
28420 {
28421 	struct sd_lun		*un;
28422 	struct cdrom_read	mode2_struct;
28423 	struct cdrom_read	*mode2 = &mode2_struct;
28424 	int			rval;
28425 	uint32_t		restore_blksize;
28426 	struct uscsi_cmd	*com;
28427 	uchar_t			cdb[CDB_GROUP0];
28428 	int			nblocks;
28429 
28430 #ifdef _MULTI_DATAMODEL
28431 	/* To support ILP32 applications in an LP64 world */
28432 	struct cdrom_read32	cdrom_read32;
28433 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28434 #endif /* _MULTI_DATAMODEL */
28435 
28436 	if (data == NULL) {
28437 		return (EINVAL);
28438 	}
28439 
28440 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28441 	    (un->un_state == SD_STATE_OFFLINE)) {
28442 		return (ENXIO);
28443 	}
28444 
28445 	/*
28446 	 * Because this routine will update the device and driver block size
28447 	 * being used we want to make sure there are no commands in progress.
28448 	 * If commands are in progress the user will have to try again.
28449 	 *
28450 	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
28451 	 * in sdioctl to protect commands from sdioctl through to the top of
28452 	 * sd_uscsi_strategy. See sdioctl for details.
28453 	 */
28454 	mutex_enter(SD_MUTEX(un));
28455 	if (un->un_ncmds_in_driver != 1) {
28456 		mutex_exit(SD_MUTEX(un));
28457 		return (EAGAIN);
28458 	}
28459 	mutex_exit(SD_MUTEX(un));
28460 
28461 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28462 	    "sd_read_mode2: entry: un:0x%p\n", un);
28463 
28464 #ifdef _MULTI_DATAMODEL
28465 	switch (ddi_model_convert_from(flag & FMODELS)) {
28466 	case DDI_MODEL_ILP32:
28467 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28468 			return (EFAULT);
28469 		}
28470 		/* Convert the ILP32 uscsi data from the application to LP64 */
28471 		cdrom_read32tocdrom_read(cdrd32, mode2);
28472 		break;
28473 	case DDI_MODEL_NONE:
28474 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28475 			return (EFAULT);
28476 		}
28477 		break;
28478 	}
28479 #else /* ! _MULTI_DATAMODEL */
28480 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
28481 		return (EFAULT);
28482 	}
28483 #endif /* _MULTI_DATAMODEL */
28484 
28485 	/* Store the current target block size for restoration later */
28486 	restore_blksize = un->un_tgt_blocksize;
28487 
28488 	/* Change the device and soft state target block size to 2336 */
28489 	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
28490 		rval = EIO;
28491 		goto done;
28492 	}
28493 
28494 
28495 	bzero(cdb, sizeof (cdb));
28496 
28497 	/* set READ operation */
28498 	cdb[0] = SCMD_READ;
28499 
28500 	/* adjust lba for 2kbyte blocks from 512 byte blocks */
28501 	mode2->cdread_lba >>= 2;
28502 
28503 	/* set the start address */
28504 	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
28505 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28506 	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
28507 
28508 	/* set the transfer length */
28509 	nblocks = mode2->cdread_buflen / 2336;
28510 	cdb[4] = (uchar_t)nblocks & 0xFF;
28511 
28512 	/* build command */
28513 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28514 	com->uscsi_cdb = (caddr_t)cdb;
28515 	com->uscsi_cdblen = sizeof (cdb);
28516 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28517 	com->uscsi_buflen = mode2->cdread_buflen;
28518 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28519 
28520 	/*
28521 	 * Issue SCSI command with user space address for read buffer.
28522 	 *
28523 	 * This sends the command through main channel in the driver.
28524 	 *
28525 	 * Since this is accessed via an IOCTL call, we go through the
28526 	 * standard path, so that if the device was powered down, then
28527 	 * it would be 'awakened' to handle the command.
28528 	 */
28529 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28530 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28531 
28532 	kmem_free(com, sizeof (*com));
28533 
28534 	/* Restore the device and soft state target block size */
28535 	if (sr_sector_mode(dev, restore_blksize) != 0) {
28536 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28537 		    "can't do switch back to mode 1\n");
28538 		/*
28539 		 * If sd_send_scsi_READ succeeded we still need to report
28540 		 * an error because we failed to reset the block size
28541 		 */
28542 		if (rval == 0) {
28543 			rval = EIO;
28544 		}
28545 	}
28546 
28547 done:
28548 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28549 	    "sd_read_mode2: exit: un:0x%p\n", un);
28550 
28551 	return (rval);
28552 }
28553 
28554 
28555 /*
28556  *    Function: sr_sector_mode()
28557  *
28558  * Description: This utility function is used by sr_read_mode2 to set the target
28559  *		block size based on the user specified size. This is a legacy
28560  *		implementation based upon a vendor specific mode page
28561  *
28562  *   Arguments: dev	- the device 'dev_t'
28563  *		data	- flag indicating if block size is being set to 2336 or
28564  *			  512.
28565  *
28566  * Return Code: the code returned by sd_send_scsi_cmd()
28567  *		EFAULT if ddi_copyxxx() fails
28568  *		ENXIO if fail ddi_get_soft_state
28569  *		EINVAL if data pointer is NULL
28570  */
28571 
28572 static int
28573 sr_sector_mode(dev_t dev, uint32_t blksize)
28574 {
28575 	struct sd_lun	*un;
28576 	uchar_t		*sense;
28577 	uchar_t		*select;
28578 	int		rval;
28579 
28580 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28581 	    (un->un_state == SD_STATE_OFFLINE)) {
28582 		return (ENXIO);
28583 	}
28584 
28585 	sense = kmem_zalloc(20, KM_SLEEP);
28586 
28587 	/* Note: This is a vendor specific mode page (0x81) */
28588 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, 20, 0x81,
28589 	    SD_PATH_STANDARD)) != 0) {
28590 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28591 		    "sr_sector_mode: Mode Sense failed\n");
28592 		kmem_free(sense, 20);
28593 		return (rval);
28594 	}
28595 	select = kmem_zalloc(20, KM_SLEEP);
28596 	select[3] = 0x08;
28597 	select[10] = ((blksize >> 8) & 0xff);
28598 	select[11] = (blksize & 0xff);
28599 	select[12] = 0x01;
28600 	select[13] = 0x06;
28601 	select[14] = sense[14];
28602 	select[15] = sense[15];
28603 	if (blksize == SD_MODE2_BLKSIZE) {
28604 		select[14] |= 0x01;
28605 	}
28606 
28607 	if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select, 20,
28608 	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
28609 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28610 		    "sr_sector_mode: Mode Select failed\n");
28611 	} else {
28612 		/*
28613 		 * Only update the softstate block size if we successfully
28614 		 * changed the device block mode.
28615 		 */
28616 		mutex_enter(SD_MUTEX(un));
28617 		sd_update_block_info(un, blksize, 0);
28618 		mutex_exit(SD_MUTEX(un));
28619 	}
28620 	kmem_free(sense, 20);
28621 	kmem_free(select, 20);
28622 	return (rval);
28623 }
28624 
28625 
28626 /*
28627  *    Function: sr_read_cdda()
28628  *
28629  * Description: This routine is the driver entry point for handling CD-ROM
28630  *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
28631  *		the target supports CDDA these requests are handled via a vendor
28632  *		specific command (0xD8) If the target does not support CDDA
28633  *		these requests are handled via the READ CD command (0xBE).
28634  *
28635  *   Arguments: dev	- the device 'dev_t'
28636  *		data	- pointer to user provided CD-DA structure specifying
28637  *			  the track starting address, transfer length, and
28638  *			  subcode options.
28639  *		flag	- this argument is a pass through to ddi_copyxxx()
28640  *			  directly from the mode argument of ioctl().
28641  *
28642  * Return Code: the code returned by sd_send_scsi_cmd()
28643  *		EFAULT if ddi_copyxxx() fails
28644  *		ENXIO if fail ddi_get_soft_state
28645  *		EINVAL if invalid arguments are provided
28646  *		ENOTTY
28647  */
28648 
28649 static int
28650 sr_read_cdda(dev_t dev, caddr_t data, int flag)
28651 {
28652 	struct sd_lun			*un;
28653 	struct uscsi_cmd		*com;
28654 	struct cdrom_cdda		*cdda;
28655 	int				rval;
28656 	size_t				buflen;
28657 	char				cdb[CDB_GROUP5];
28658 
28659 #ifdef _MULTI_DATAMODEL
28660 	/* To support ILP32 applications in an LP64 world */
28661 	struct cdrom_cdda32	cdrom_cdda32;
28662 	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
28663 #endif /* _MULTI_DATAMODEL */
28664 
28665 	if (data == NULL) {
28666 		return (EINVAL);
28667 	}
28668 
28669 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28670 		return (ENXIO);
28671 	}
28672 
28673 	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
28674 
28675 #ifdef _MULTI_DATAMODEL
28676 	switch (ddi_model_convert_from(flag & FMODELS)) {
28677 	case DDI_MODEL_ILP32:
28678 		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
28679 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28680 			    "sr_read_cdda: ddi_copyin Failed\n");
28681 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28682 			return (EFAULT);
28683 		}
28684 		/* Convert the ILP32 uscsi data from the application to LP64 */
28685 		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
28686 		break;
28687 	case DDI_MODEL_NONE:
28688 		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28689 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28690 			    "sr_read_cdda: ddi_copyin Failed\n");
28691 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28692 			return (EFAULT);
28693 		}
28694 		break;
28695 	}
28696 #else /* ! _MULTI_DATAMODEL */
28697 	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28698 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28699 		    "sr_read_cdda: ddi_copyin Failed\n");
28700 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28701 		return (EFAULT);
28702 	}
28703 #endif /* _MULTI_DATAMODEL */
28704 
28705 	/*
28706 	 * Since MMC-2 expects max 3 bytes for length, check if the
28707 	 * length input is greater than 3 bytes
28708 	 */
28709 	if ((cdda->cdda_length & 0xFF000000) != 0) {
28710 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
28711 		    "cdrom transfer length too large: %d (limit %d)\n",
28712 		    cdda->cdda_length, 0xFFFFFF);
28713 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28714 		return (EINVAL);
28715 	}
28716 
28717 	switch (cdda->cdda_subcode) {
28718 	case CDROM_DA_NO_SUBCODE:
28719 		buflen = CDROM_BLK_2352 * cdda->cdda_length;
28720 		break;
28721 	case CDROM_DA_SUBQ:
28722 		buflen = CDROM_BLK_2368 * cdda->cdda_length;
28723 		break;
28724 	case CDROM_DA_ALL_SUBCODE:
28725 		buflen = CDROM_BLK_2448 * cdda->cdda_length;
28726 		break;
28727 	case CDROM_DA_SUBCODE_ONLY:
28728 		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
28729 		break;
28730 	default:
28731 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28732 		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
28733 		    cdda->cdda_subcode);
28734 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28735 		return (EINVAL);
28736 	}
28737 
28738 	/* Build and send the command */
28739 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28740 	bzero(cdb, CDB_GROUP5);
28741 
28742 	if (un->un_f_cfg_cdda == TRUE) {
28743 		cdb[0] = (char)SCMD_READ_CD;
28744 		cdb[1] = 0x04;
28745 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28746 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28747 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28748 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28749 		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28750 		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28751 		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
28752 		cdb[9] = 0x10;
28753 		switch (cdda->cdda_subcode) {
28754 		case CDROM_DA_NO_SUBCODE :
28755 			cdb[10] = 0x0;
28756 			break;
28757 		case CDROM_DA_SUBQ :
28758 			cdb[10] = 0x2;
28759 			break;
28760 		case CDROM_DA_ALL_SUBCODE :
28761 			cdb[10] = 0x1;
28762 			break;
28763 		case CDROM_DA_SUBCODE_ONLY :
28764 			/* FALLTHROUGH */
28765 		default :
28766 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28767 			kmem_free(com, sizeof (*com));
28768 			return (ENOTTY);
28769 		}
28770 	} else {
28771 		cdb[0] = (char)SCMD_READ_CDDA;
28772 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28773 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28774 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28775 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28776 		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
28777 		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28778 		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28779 		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
28780 		cdb[10] = cdda->cdda_subcode;
28781 	}
28782 
28783 	com->uscsi_cdb = cdb;
28784 	com->uscsi_cdblen = CDB_GROUP5;
28785 	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
28786 	com->uscsi_buflen = buflen;
28787 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28788 
28789 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28790 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28791 
28792 	kmem_free(cdda, sizeof (struct cdrom_cdda));
28793 	kmem_free(com, sizeof (*com));
28794 	return (rval);
28795 }
28796 
28797 
28798 /*
28799  *    Function: sr_read_cdxa()
28800  *
28801  * Description: This routine is the driver entry point for handling CD-ROM
28802  *		ioctl requests to return CD-XA (Extended Architecture) data.
28803  *		(CDROMCDXA).
28804  *
28805  *   Arguments: dev	- the device 'dev_t'
28806  *		data	- pointer to user provided CD-XA structure specifying
28807  *			  the data starting address, transfer length, and format
28808  *		flag	- this argument is a pass through to ddi_copyxxx()
28809  *			  directly from the mode argument of ioctl().
28810  *
28811  * Return Code: the code returned by sd_send_scsi_cmd()
28812  *		EFAULT if ddi_copyxxx() fails
28813  *		ENXIO if fail ddi_get_soft_state
28814  *		EINVAL if data pointer is NULL
28815  */
28816 
28817 static int
28818 sr_read_cdxa(dev_t dev, caddr_t data, int flag)
28819 {
28820 	struct sd_lun		*un;
28821 	struct uscsi_cmd	*com;
28822 	struct cdrom_cdxa	*cdxa;
28823 	int			rval;
28824 	size_t			buflen;
28825 	char			cdb[CDB_GROUP5];
28826 	uchar_t			read_flags;
28827 
28828 #ifdef _MULTI_DATAMODEL
28829 	/* To support ILP32 applications in an LP64 world */
28830 	struct cdrom_cdxa32		cdrom_cdxa32;
28831 	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
28832 #endif /* _MULTI_DATAMODEL */
28833 
28834 	if (data == NULL) {
28835 		return (EINVAL);
28836 	}
28837 
28838 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28839 		return (ENXIO);
28840 	}
28841 
28842 	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
28843 
28844 #ifdef _MULTI_DATAMODEL
28845 	switch (ddi_model_convert_from(flag & FMODELS)) {
28846 	case DDI_MODEL_ILP32:
28847 		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
28848 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28849 			return (EFAULT);
28850 		}
28851 		/*
28852 		 * Convert the ILP32 uscsi data from the
28853 		 * application to LP64 for internal use.
28854 		 */
28855 		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
28856 		break;
28857 	case DDI_MODEL_NONE:
28858 		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
28859 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28860 			return (EFAULT);
28861 		}
28862 		break;
28863 	}
28864 #else /* ! _MULTI_DATAMODEL */
28865 	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
28866 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28867 		return (EFAULT);
28868 	}
28869 #endif /* _MULTI_DATAMODEL */
28870 
28871 	/*
28872 	 * Since MMC-2 expects max 3 bytes for length, check if the
28873 	 * length input is greater than 3 bytes
28874 	 */
28875 	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
28876 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
28877 		    "cdrom transfer length too large: %d (limit %d)\n",
28878 		    cdxa->cdxa_length, 0xFFFFFF);
28879 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28880 		return (EINVAL);
28881 	}
28882 
28883 	switch (cdxa->cdxa_format) {
28884 	case CDROM_XA_DATA:
28885 		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
28886 		read_flags = 0x10;
28887 		break;
28888 	case CDROM_XA_SECTOR_DATA:
28889 		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
28890 		read_flags = 0xf8;
28891 		break;
28892 	case CDROM_XA_DATA_W_ERROR:
28893 		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
28894 		read_flags = 0xfc;
28895 		break;
28896 	default:
28897 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28898 		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
28899 		    cdxa->cdxa_format);
28900 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28901 		return (EINVAL);
28902 	}
28903 
28904 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28905 	bzero(cdb, CDB_GROUP5);
28906 	if (un->un_f_mmc_cap == TRUE) {
28907 		cdb[0] = (char)SCMD_READ_CD;
28908 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
28909 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
28910 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
28911 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
28912 		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
28913 		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
28914 		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
28915 		cdb[9] = (char)read_flags;
28916 	} else {
28917 		/*
28918 		 * Note: A vendor specific command (0xDB) is being used her to
28919 		 * request a read of all subcodes.
28920 		 */
28921 		cdb[0] = (char)SCMD_READ_CDXA;
28922 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
28923 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
28924 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
28925 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
28926 		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
28927 		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
28928 		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
28929 		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
28930 		cdb[10] = cdxa->cdxa_format;
28931 	}
28932 	com->uscsi_cdb	   = cdb;
28933 	com->uscsi_cdblen  = CDB_GROUP5;
28934 	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
28935 	com->uscsi_buflen  = buflen;
28936 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28937 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28938 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28939 	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28940 	kmem_free(com, sizeof (*com));
28941 	return (rval);
28942 }
28943 
28944 
28945 /*
28946  *    Function: sr_eject()
28947  *
28948  * Description: This routine is the driver entry point for handling CD-ROM
28949  *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
28950  *
28951  *   Arguments: dev	- the device 'dev_t'
28952  *
28953  * Return Code: the code returned by sd_send_scsi_cmd()
28954  */
28955 
28956 static int
28957 sr_eject(dev_t dev)
28958 {
28959 	struct sd_lun	*un;
28960 	int		rval;
28961 
28962 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28963 	    (un->un_state == SD_STATE_OFFLINE)) {
28964 		return (ENXIO);
28965 	}
28966 	if ((rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
28967 	    SD_PATH_STANDARD)) != 0) {
28968 		return (rval);
28969 	}
28970 
28971 	rval = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_EJECT,
28972 	    SD_PATH_STANDARD);
28973 
28974 	if (rval == 0) {
28975 		mutex_enter(SD_MUTEX(un));
28976 		sr_ejected(un);
28977 		un->un_mediastate = DKIO_EJECTED;
28978 		cv_broadcast(&un->un_state_cv);
28979 		mutex_exit(SD_MUTEX(un));
28980 	}
28981 	return (rval);
28982 }
28983 
28984 
28985 /*
28986  *    Function: sr_ejected()
28987  *
28988  * Description: This routine updates the soft state structure to invalidate the
28989  *		geometry information after the media has been ejected or a
28990  *		media eject has been detected.
28991  *
28992  *   Arguments: un - driver soft state (unit) structure
28993  */
28994 
28995 static void
28996 sr_ejected(struct sd_lun *un)
28997 {
28998 	struct sd_errstats *stp;
28999 
29000 	ASSERT(un != NULL);
29001 	ASSERT(mutex_owned(SD_MUTEX(un)));
29002 
29003 	un->un_f_blockcount_is_valid	= FALSE;
29004 	un->un_f_tgt_blocksize_is_valid	= FALSE;
29005 	un->un_f_geometry_is_valid	= FALSE;
29006 
29007 	if (un->un_errstats != NULL) {
29008 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
29009 		stp->sd_capacity.value.ui64 = 0;
29010 	}
29011 }
29012 
29013 
29014 /*
29015  *    Function: sr_check_wp()
29016  *
29017  * Description: This routine checks the write protection of a removable media
29018  *		disk via the write protect bit of the Mode Page Header device
29019  *		specific field.  This routine has been implemented to use the
29020  *		error recovery mode page for all device types.
29021  *		Note: In the future use a sd_send_scsi_MODE_SENSE() routine
29022  *
29023  *   Arguments: dev		- the device 'dev_t'
29024  *
29025  * Return Code: int indicating if the device is write protected (1) or not (0)
29026  *
29027  *     Context: Kernel thread.
29028  *
29029  */
29030 
29031 static int
29032 sr_check_wp(dev_t dev)
29033 {
29034 	struct sd_lun	*un;
29035 	uchar_t		device_specific;
29036 	uchar_t		*sense;
29037 	int		hdrlen;
29038 	int		rval;
29039 	int		retry_flag = FALSE;
29040 
29041 	/*
29042 	 * Note: The return codes for this routine should be reworked to
29043 	 * properly handle the case of a NULL softstate.
29044 	 */
29045 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
29046 		return (FALSE);
29047 	}
29048 
29049 	if (un->un_f_cfg_is_atapi == TRUE) {
29050 		retry_flag = TRUE;
29051 	}
29052 
29053 retry:
29054 	if (un->un_f_cfg_is_atapi == TRUE) {
29055 		/*
29056 		 * The mode page contents are not required; set the allocation
29057 		 * length for the mode page header only
29058 		 */
29059 		hdrlen = MODE_HEADER_LENGTH_GRP2;
29060 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
29061 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense, hdrlen,
29062 		    MODEPAGE_ERR_RECOV, SD_PATH_STANDARD);
29063 		device_specific =
29064 		    ((struct mode_header_grp2 *)sense)->device_specific;
29065 	} else {
29066 		hdrlen = MODE_HEADER_LENGTH;
29067 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
29068 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, hdrlen,
29069 		    MODEPAGE_ERR_RECOV, SD_PATH_STANDARD);
29070 		device_specific =
29071 		    ((struct mode_header *)sense)->device_specific;
29072 	}
29073 
29074 	if (rval != 0) {
29075 		if ((un->un_f_cfg_is_atapi == TRUE) && (retry_flag)) {
29076 			/*
29077 			 * For an Atapi Zip drive, observed the drive
29078 			 * reporting check condition for the first attempt.
29079 			 * Sense data indicating power on or bus device/reset.
29080 			 * Hence in case of failure need to try at least once
29081 			 * for Atapi devices.
29082 			 */
29083 			retry_flag = FALSE;
29084 			kmem_free(sense, hdrlen);
29085 			goto retry;
29086 		} else {
29087 			/*
29088 			 * Write protect mode sense failed; not all disks
29089 			 * understand this query. Return FALSE assuming that
29090 			 * these devices are not writable.
29091 			 */
29092 			rval = FALSE;
29093 		}
29094 	} else {
29095 		if (device_specific & WRITE_PROTECT) {
29096 			rval = TRUE;
29097 		} else {
29098 			rval = FALSE;
29099 		}
29100 	}
29101 	kmem_free(sense, hdrlen);
29102 	return (rval);
29103 }
29104 
29105 
29106 /*
29107  *    Function: sr_volume_ctrl()
29108  *
29109  * Description: This routine is the driver entry point for handling CD-ROM
29110  *		audio output volume ioctl requests. (CDROMVOLCTRL)
29111  *
29112  *   Arguments: dev	- the device 'dev_t'
29113  *		data	- pointer to user audio volume control structure
29114  *		flag	- this argument is a pass through to ddi_copyxxx()
29115  *			  directly from the mode argument of ioctl().
29116  *
29117  * Return Code: the code returned by sd_send_scsi_cmd()
29118  *		EFAULT if ddi_copyxxx() fails
29119  *		ENXIO if fail ddi_get_soft_state
29120  *		EINVAL if data pointer is NULL
29121  *
29122  */
29123 
29124 static int
29125 sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
29126 {
29127 	struct sd_lun		*un;
29128 	struct cdrom_volctrl    volume;
29129 	struct cdrom_volctrl    *vol = &volume;
29130 	uchar_t			*sense_page;
29131 	uchar_t			*select_page;
29132 	uchar_t			*sense;
29133 	uchar_t			*select;
29134 	int			sense_buflen;
29135 	int			select_buflen;
29136 	int			rval;
29137 
29138 	if (data == NULL) {
29139 		return (EINVAL);
29140 	}
29141 
29142 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29143 	    (un->un_state == SD_STATE_OFFLINE)) {
29144 		return (ENXIO);
29145 	}
29146 
29147 	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
29148 		return (EFAULT);
29149 	}
29150 
29151 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29152 		struct mode_header_grp2		*sense_mhp;
29153 		struct mode_header_grp2		*select_mhp;
29154 		int				bd_len;
29155 
29156 		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
29157 		select_buflen = MODE_HEADER_LENGTH_GRP2 +
29158 		    MODEPAGE_AUDIO_CTRL_LEN;
29159 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
29160 		select = kmem_zalloc(select_buflen, KM_SLEEP);
29161 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
29162 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
29163 		    SD_PATH_STANDARD)) != 0) {
29164 			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
29165 			    "sr_volume_ctrl: Mode Sense Failed\n");
29166 			kmem_free(sense, sense_buflen);
29167 			kmem_free(select, select_buflen);
29168 			return (rval);
29169 		}
29170 		sense_mhp = (struct mode_header_grp2 *)sense;
29171 		select_mhp = (struct mode_header_grp2 *)select;
29172 		bd_len = (sense_mhp->bdesc_length_hi << 8) |
29173 		    sense_mhp->bdesc_length_lo;
29174 		if (bd_len > MODE_BLK_DESC_LENGTH) {
29175 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29176 			    "sr_volume_ctrl: Mode Sense returned invalid "
29177 			    "block descriptor length\n");
29178 			kmem_free(sense, sense_buflen);
29179 			kmem_free(select, select_buflen);
29180 			return (EIO);
29181 		}
29182 		sense_page = (uchar_t *)
29183 		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
29184 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
29185 		select_mhp->length_msb = 0;
29186 		select_mhp->length_lsb = 0;
29187 		select_mhp->bdesc_length_hi = 0;
29188 		select_mhp->bdesc_length_lo = 0;
29189 	} else {
29190 		struct mode_header		*sense_mhp, *select_mhp;
29191 
29192 		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
29193 		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
29194 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
29195 		select = kmem_zalloc(select_buflen, KM_SLEEP);
29196 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
29197 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
29198 		    SD_PATH_STANDARD)) != 0) {
29199 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29200 			    "sr_volume_ctrl: Mode Sense Failed\n");
29201 			kmem_free(sense, sense_buflen);
29202 			kmem_free(select, select_buflen);
29203 			return (rval);
29204 		}
29205 		sense_mhp  = (struct mode_header *)sense;
29206 		select_mhp = (struct mode_header *)select;
29207 		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
29208 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29209 			    "sr_volume_ctrl: Mode Sense returned invalid "
29210 			    "block descriptor length\n");
29211 			kmem_free(sense, sense_buflen);
29212 			kmem_free(select, select_buflen);
29213 			return (EIO);
29214 		}
29215 		sense_page = (uchar_t *)
29216 		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
29217 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
29218 		select_mhp->length = 0;
29219 		select_mhp->bdesc_length = 0;
29220 	}
29221 	/*
29222 	 * Note: An audio control data structure could be created and overlayed
29223 	 * on the following in place of the array indexing method implemented.
29224 	 */
29225 
29226 	/* Build the select data for the user volume data */
29227 	select_page[0] = MODEPAGE_AUDIO_CTRL;
29228 	select_page[1] = 0xE;
29229 	/* Set the immediate bit */
29230 	select_page[2] = 0x04;
29231 	/* Zero out reserved fields */
29232 	select_page[3] = 0x00;
29233 	select_page[4] = 0x00;
29234 	/* Return sense data for fields not to be modified */
29235 	select_page[5] = sense_page[5];
29236 	select_page[6] = sense_page[6];
29237 	select_page[7] = sense_page[7];
29238 	/* Set the user specified volume levels for channel 0 and 1 */
29239 	select_page[8] = 0x01;
29240 	select_page[9] = vol->channel0;
29241 	select_page[10] = 0x02;
29242 	select_page[11] = vol->channel1;
29243 	/* Channel 2 and 3 are currently unsupported so return the sense data */
29244 	select_page[12] = sense_page[12];
29245 	select_page[13] = sense_page[13];
29246 	select_page[14] = sense_page[14];
29247 	select_page[15] = sense_page[15];
29248 
29249 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29250 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, select,
29251 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29252 	} else {
29253 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
29254 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29255 	}
29256 
29257 	kmem_free(sense, sense_buflen);
29258 	kmem_free(select, select_buflen);
29259 	return (rval);
29260 }
29261 
29262 
29263 /*
29264  *    Function: sr_read_sony_session_offset()
29265  *
29266  * Description: This routine is the driver entry point for handling CD-ROM
29267  *		ioctl requests for session offset information. (CDROMREADOFFSET)
29268  *		The address of the first track in the last session of a
29269  *		multi-session CD-ROM is returned
29270  *
29271  *		Note: This routine uses a vendor specific key value in the
29272  *		command control field without implementing any vendor check here
29273  *		or in the ioctl routine.
29274  *
29275  *   Arguments: dev	- the device 'dev_t'
29276  *		data	- pointer to an int to hold the requested address
29277  *		flag	- this argument is a pass through to ddi_copyxxx()
29278  *			  directly from the mode argument of ioctl().
29279  *
29280  * Return Code: the code returned by sd_send_scsi_cmd()
29281  *		EFAULT if ddi_copyxxx() fails
29282  *		ENXIO if fail ddi_get_soft_state
29283  *		EINVAL if data pointer is NULL
29284  */
29285 
29286 static int
29287 sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
29288 {
29289 	struct sd_lun		*un;
29290 	struct uscsi_cmd	*com;
29291 	caddr_t			buffer;
29292 	char			cdb[CDB_GROUP1];
29293 	int			session_offset = 0;
29294 	int			rval;
29295 
29296 	if (data == NULL) {
29297 		return (EINVAL);
29298 	}
29299 
29300 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29301 	    (un->un_state == SD_STATE_OFFLINE)) {
29302 		return (ENXIO);
29303 	}
29304 
29305 	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
29306 	bzero(cdb, CDB_GROUP1);
29307 	cdb[0] = SCMD_READ_TOC;
29308 	/*
29309 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
29310 	 * (4 byte TOC response header + 8 byte response data)
29311 	 */
29312 	cdb[8] = SONY_SESSION_OFFSET_LEN;
29313 	/* Byte 9 is the control byte. A vendor specific value is used */
29314 	cdb[9] = SONY_SESSION_OFFSET_KEY;
29315 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29316 	com->uscsi_cdb = cdb;
29317 	com->uscsi_cdblen = CDB_GROUP1;
29318 	com->uscsi_bufaddr = buffer;
29319 	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
29320 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
29321 
29322 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
29323 	    UIO_SYSSPACE, SD_PATH_STANDARD);
29324 	if (rval != 0) {
29325 		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29326 		kmem_free(com, sizeof (*com));
29327 		return (rval);
29328 	}
29329 	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
29330 		session_offset =
29331 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
29332 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
29333 		/*
29334 		 * Offset returned offset in current lbasize block's. Convert to
29335 		 * 2k block's to return to the user
29336 		 */
29337 		if (un->un_tgt_blocksize == CDROM_BLK_512) {
29338 			session_offset >>= 2;
29339 		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
29340 			session_offset >>= 1;
29341 		}
29342 	}
29343 
29344 	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
29345 		rval = EFAULT;
29346 	}
29347 
29348 	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29349 	kmem_free(com, sizeof (*com));
29350 	return (rval);
29351 }
29352 
29353 
29354 /*
29355  *    Function: sd_wm_cache_constructor()
29356  *
29357  * Description: Cache Constructor for the wmap cache for the read/modify/write
29358  * 		devices.
29359  *
29360  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29361  *		un	- sd_lun structure for the device.
29362  *		flag	- the km flags passed to constructor
29363  *
29364  * Return Code: 0 on success.
29365  *		-1 on failure.
29366  */
29367 
29368 /*ARGSUSED*/
29369 static int
29370 sd_wm_cache_constructor(void *wm, void *un, int flags)
29371 {
29372 	bzero(wm, sizeof (struct sd_w_map));
29373 	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
29374 	return (0);
29375 }
29376 
29377 
29378 /*
29379  *    Function: sd_wm_cache_destructor()
29380  *
29381  * Description: Cache destructor for the wmap cache for the read/modify/write
29382  * 		devices.
29383  *
29384  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29385  *		un	- sd_lun structure for the device.
29386  */
29387 /*ARGSUSED*/
29388 static void
29389 sd_wm_cache_destructor(void *wm, void *un)
29390 {
29391 	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
29392 }
29393 
29394 
29395 /*
29396  *    Function: sd_range_lock()
29397  *
29398  * Description: Lock the range of blocks specified as parameter to ensure
29399  *		that read, modify write is atomic and no other i/o writes
29400  *		to the same location. The range is specified in terms
29401  *		of start and end blocks. Block numbers are the actual
29402  *		media block numbers and not system.
29403  *
29404  *   Arguments: un	- sd_lun structure for the device.
29405  *		startb - The starting block number
29406  *		endb - The end block number
29407  *		typ - type of i/o - simple/read_modify_write
29408  *
29409  * Return Code: wm  - pointer to the wmap structure.
29410  *
29411  *     Context: This routine can sleep.
29412  */
29413 
29414 static struct sd_w_map *
29415 sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
29416 {
29417 	struct sd_w_map *wmp = NULL;
29418 	struct sd_w_map *sl_wmp = NULL;
29419 	struct sd_w_map *tmp_wmp;
29420 	wm_state state = SD_WM_CHK_LIST;
29421 
29422 
29423 	ASSERT(un != NULL);
29424 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29425 
29426 	mutex_enter(SD_MUTEX(un));
29427 
29428 	while (state != SD_WM_DONE) {
29429 
29430 		switch (state) {
29431 		case SD_WM_CHK_LIST:
29432 			/*
29433 			 * This is the starting state. Check the wmap list
29434 			 * to see if the range is currently available.
29435 			 */
29436 			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
29437 				/*
29438 				 * If this is a simple write and no rmw
29439 				 * i/o is pending then try to lock the
29440 				 * range as the range should be available.
29441 				 */
29442 				state = SD_WM_LOCK_RANGE;
29443 			} else {
29444 				tmp_wmp = sd_get_range(un, startb, endb);
29445 				if (tmp_wmp != NULL) {
29446 					if ((wmp != NULL) && ONLIST(un, wmp)) {
29447 						/*
29448 						 * Should not keep onlist wmps
29449 						 * while waiting this macro
29450 						 * will also do wmp = NULL;
29451 						 */
29452 						FREE_ONLIST_WMAP(un, wmp);
29453 					}
29454 					/*
29455 					 * sl_wmp is the wmap on which wait
29456 					 * is done, since the tmp_wmp points
29457 					 * to the inuse wmap, set sl_wmp to
29458 					 * tmp_wmp and change the state to sleep
29459 					 */
29460 					sl_wmp = tmp_wmp;
29461 					state = SD_WM_WAIT_MAP;
29462 				} else {
29463 					state = SD_WM_LOCK_RANGE;
29464 				}
29465 
29466 			}
29467 			break;
29468 
29469 		case SD_WM_LOCK_RANGE:
29470 			ASSERT(un->un_wm_cache);
29471 			/*
29472 			 * The range need to be locked, try to get a wmap.
29473 			 * First attempt it with NO_SLEEP, want to avoid a sleep
29474 			 * if possible as we will have to release the sd mutex
29475 			 * if we have to sleep.
29476 			 */
29477 			if (wmp == NULL)
29478 				wmp = kmem_cache_alloc(un->un_wm_cache,
29479 				    KM_NOSLEEP);
29480 			if (wmp == NULL) {
29481 				mutex_exit(SD_MUTEX(un));
29482 				_NOTE(DATA_READABLE_WITHOUT_LOCK
29483 				    (sd_lun::un_wm_cache))
29484 				wmp = kmem_cache_alloc(un->un_wm_cache,
29485 				    KM_SLEEP);
29486 				mutex_enter(SD_MUTEX(un));
29487 				/*
29488 				 * we released the mutex so recheck and go to
29489 				 * check list state.
29490 				 */
29491 				state = SD_WM_CHK_LIST;
29492 			} else {
29493 				/*
29494 				 * We exit out of state machine since we
29495 				 * have the wmap. Do the housekeeping first.
29496 				 * place the wmap on the wmap list if it is not
29497 				 * on it already and then set the state to done.
29498 				 */
29499 				wmp->wm_start = startb;
29500 				wmp->wm_end = endb;
29501 				wmp->wm_flags = typ | SD_WM_BUSY;
29502 				if (typ & SD_WTYPE_RMW) {
29503 					un->un_rmw_count++;
29504 				}
29505 				/*
29506 				 * If not already on the list then link
29507 				 */
29508 				if (!ONLIST(un, wmp)) {
29509 					wmp->wm_next = un->un_wm;
29510 					wmp->wm_prev = NULL;
29511 					if (wmp->wm_next)
29512 						wmp->wm_next->wm_prev = wmp;
29513 					un->un_wm = wmp;
29514 				}
29515 				state = SD_WM_DONE;
29516 			}
29517 			break;
29518 
29519 		case SD_WM_WAIT_MAP:
29520 			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
29521 			/*
29522 			 * Wait is done on sl_wmp, which is set in the
29523 			 * check_list state.
29524 			 */
29525 			sl_wmp->wm_wanted_count++;
29526 			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
29527 			sl_wmp->wm_wanted_count--;
29528 			/*
29529 			 * We can reuse the memory from the completed sl_wmp
29530 			 * lock range for our new lock, but only if noone is
29531 			 * waiting for it.
29532 			 */
29533 			ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
29534 			if (sl_wmp->wm_wanted_count == 0) {
29535 				if (wmp != NULL)
29536 					CHK_N_FREEWMP(un, wmp);
29537 				wmp = sl_wmp;
29538 			}
29539 			sl_wmp = NULL;
29540 			/*
29541 			 * After waking up, need to recheck for availability of
29542 			 * range.
29543 			 */
29544 			state = SD_WM_CHK_LIST;
29545 			break;
29546 
29547 		default:
29548 			panic("sd_range_lock: "
29549 			    "Unknown state %d in sd_range_lock", state);
29550 			/*NOTREACHED*/
29551 		} /* switch(state) */
29552 
29553 	} /* while(state != SD_WM_DONE) */
29554 
29555 	mutex_exit(SD_MUTEX(un));
29556 
29557 	ASSERT(wmp != NULL);
29558 
29559 	return (wmp);
29560 }
29561 
29562 
29563 /*
29564  *    Function: sd_get_range()
29565  *
29566  * Description: Find if there any overlapping I/O to this one
29567  *		Returns the write-map of 1st such I/O, NULL otherwise.
29568  *
29569  *   Arguments: un	- sd_lun structure for the device.
29570  *		startb - The starting block number
29571  *		endb - The end block number
29572  *
29573  * Return Code: wm  - pointer to the wmap structure.
29574  */
29575 
29576 static struct sd_w_map *
29577 sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
29578 {
29579 	struct sd_w_map *wmp;
29580 
29581 	ASSERT(un != NULL);
29582 
29583 	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
29584 		if (!(wmp->wm_flags & SD_WM_BUSY)) {
29585 			continue;
29586 		}
29587 		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
29588 			break;
29589 		}
29590 		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
29591 			break;
29592 		}
29593 	}
29594 
29595 	return (wmp);
29596 }
29597 
29598 
29599 /*
29600  *    Function: sd_free_inlist_wmap()
29601  *
29602  * Description: Unlink and free a write map struct.
29603  *
29604  *   Arguments: un      - sd_lun structure for the device.
29605  *		wmp	- sd_w_map which needs to be unlinked.
29606  */
29607 
29608 static void
29609 sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
29610 {
29611 	ASSERT(un != NULL);
29612 
29613 	if (un->un_wm == wmp) {
29614 		un->un_wm = wmp->wm_next;
29615 	} else {
29616 		wmp->wm_prev->wm_next = wmp->wm_next;
29617 	}
29618 
29619 	if (wmp->wm_next) {
29620 		wmp->wm_next->wm_prev = wmp->wm_prev;
29621 	}
29622 
29623 	wmp->wm_next = wmp->wm_prev = NULL;
29624 
29625 	kmem_cache_free(un->un_wm_cache, wmp);
29626 }
29627 
29628 
29629 /*
29630  *    Function: sd_range_unlock()
29631  *
29632  * Description: Unlock the range locked by wm.
29633  *		Free write map if nobody else is waiting on it.
29634  *
29635  *   Arguments: un      - sd_lun structure for the device.
29636  *              wmp     - sd_w_map which needs to be unlinked.
29637  */
29638 
29639 static void
29640 sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
29641 {
29642 	ASSERT(un != NULL);
29643 	ASSERT(wm != NULL);
29644 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29645 
29646 	mutex_enter(SD_MUTEX(un));
29647 
29648 	if (wm->wm_flags & SD_WTYPE_RMW) {
29649 		un->un_rmw_count--;
29650 	}
29651 
29652 	if (wm->wm_wanted_count) {
29653 		wm->wm_flags = 0;
29654 		/*
29655 		 * Broadcast that the wmap is available now.
29656 		 */
29657 		cv_broadcast(&wm->wm_avail);
29658 	} else {
29659 		/*
29660 		 * If no one is waiting on the map, it should be free'ed.
29661 		 */
29662 		sd_free_inlist_wmap(un, wm);
29663 	}
29664 
29665 	mutex_exit(SD_MUTEX(un));
29666 }
29667 
29668 
29669 /*
29670  *    Function: sd_read_modify_write_task
29671  *
29672  * Description: Called from a taskq thread to initiate the write phase of
29673  *		a read-modify-write request.  This is used for targets where
29674  *		un->un_sys_blocksize != un->un_tgt_blocksize.
29675  *
29676  *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
29677  *
29678  *     Context: Called under taskq thread context.
29679  */
29680 
29681 static void
29682 sd_read_modify_write_task(void *arg)
29683 {
29684 	struct sd_mapblocksize_info	*bsp;
29685 	struct buf	*bp;
29686 	struct sd_xbuf	*xp;
29687 	struct sd_lun	*un;
29688 
29689 	bp = arg;	/* The bp is given in arg */
29690 	ASSERT(bp != NULL);
29691 
29692 	/* Get the pointer to the layer-private data struct */
29693 	xp = SD_GET_XBUF(bp);
29694 	ASSERT(xp != NULL);
29695 	bsp = xp->xb_private;
29696 	ASSERT(bsp != NULL);
29697 
29698 	un = SD_GET_UN(bp);
29699 	ASSERT(un != NULL);
29700 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29701 
29702 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29703 	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
29704 
29705 	/*
29706 	 * This is the write phase of a read-modify-write request, called
29707 	 * under the context of a taskq thread in response to the completion
29708 	 * of the read portion of the rmw request completing under interrupt
29709 	 * context. The write request must be sent from here down the iostart
29710 	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
29711 	 * we use the layer index saved in the layer-private data area.
29712 	 */
29713 	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
29714 
29715 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29716 	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
29717 }
29718 
29719 
29720 /*
29721  *    Function: sddump_do_read_of_rmw()
29722  *
29723  * Description: This routine will be called from sddump, If sddump is called
29724  *		with an I/O which not aligned on device blocksize boundary
29725  *		then the write has to be converted to read-modify-write.
29726  *		Do the read part here in order to keep sddump simple.
29727  *		Note - That the sd_mutex is held across the call to this
29728  *		routine.
29729  *
29730  *   Arguments: un	- sd_lun
29731  *		blkno	- block number in terms of media block size.
29732  *		nblk	- number of blocks.
29733  *		bpp	- pointer to pointer to the buf structure. On return
29734  *			from this function, *bpp points to the valid buffer
29735  *			to which the write has to be done.
29736  *
29737  * Return Code: 0 for success or errno-type return code
29738  */
29739 
29740 static int
29741 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
29742 	struct buf **bpp)
29743 {
29744 	int err;
29745 	int i;
29746 	int rval;
29747 	struct buf *bp;
29748 	struct scsi_pkt *pkt = NULL;
29749 	uint32_t target_blocksize;
29750 
29751 	ASSERT(un != NULL);
29752 	ASSERT(mutex_owned(SD_MUTEX(un)));
29753 
29754 	target_blocksize = un->un_tgt_blocksize;
29755 
29756 	mutex_exit(SD_MUTEX(un));
29757 
29758 	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
29759 	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
29760 	if (bp == NULL) {
29761 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29762 		    "no resources for dumping; giving up");
29763 		err = ENOMEM;
29764 		goto done;
29765 	}
29766 
29767 	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
29768 	    blkno, nblk);
29769 	if (rval != 0) {
29770 		scsi_free_consistent_buf(bp);
29771 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29772 		    "no resources for dumping; giving up");
29773 		err = ENOMEM;
29774 		goto done;
29775 	}
29776 
29777 	pkt->pkt_flags |= FLAG_NOINTR;
29778 
29779 	err = EIO;
29780 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
29781 
29782 		/*
29783 		 * Scsi_poll returns 0 (success) if the command completes and
29784 		 * the status block is STATUS_GOOD.  We should only check
29785 		 * errors if this condition is not true.  Even then we should
29786 		 * send our own request sense packet only if we have a check
29787 		 * condition and auto request sense has not been performed by
29788 		 * the hba.
29789 		 */
29790 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
29791 
29792 		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
29793 			err = 0;
29794 			break;
29795 		}
29796 
29797 		/*
29798 		 * Check CMD_DEV_GONE 1st, give up if device is gone,
29799 		 * no need to read RQS data.
29800 		 */
29801 		if (pkt->pkt_reason == CMD_DEV_GONE) {
29802 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29803 			    "Device is gone\n");
29804 			break;
29805 		}
29806 
29807 		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
29808 			SD_INFO(SD_LOG_DUMP, un,
29809 			    "sddump: read failed with CHECK, try # %d\n", i);
29810 			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
29811 				(void) sd_send_polled_RQS(un);
29812 			}
29813 
29814 			continue;
29815 		}
29816 
29817 		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
29818 			int reset_retval = 0;
29819 
29820 			SD_INFO(SD_LOG_DUMP, un,
29821 			    "sddump: read failed with BUSY, try # %d\n", i);
29822 
29823 			if (un->un_f_lun_reset_enabled == TRUE) {
29824 				reset_retval = scsi_reset(SD_ADDRESS(un),
29825 				    RESET_LUN);
29826 			}
29827 			if (reset_retval == 0) {
29828 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
29829 			}
29830 			(void) sd_send_polled_RQS(un);
29831 
29832 		} else {
29833 			SD_INFO(SD_LOG_DUMP, un,
29834 			    "sddump: read failed with 0x%x, try # %d\n",
29835 			    SD_GET_PKT_STATUS(pkt), i);
29836 			mutex_enter(SD_MUTEX(un));
29837 			sd_reset_target(un, pkt);
29838 			mutex_exit(SD_MUTEX(un));
29839 		}
29840 
29841 		/*
29842 		 * If we are not getting anywhere with lun/target resets,
29843 		 * let's reset the bus.
29844 		 */
29845 		if (i > SD_NDUMP_RETRIES/2) {
29846 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
29847 			(void) sd_send_polled_RQS(un);
29848 		}
29849 
29850 	}
29851 	scsi_destroy_pkt(pkt);
29852 
29853 	if (err != 0) {
29854 		scsi_free_consistent_buf(bp);
29855 		*bpp = NULL;
29856 	} else {
29857 		*bpp = bp;
29858 	}
29859 
29860 done:
29861 	mutex_enter(SD_MUTEX(un));
29862 	return (err);
29863 }
29864 
29865 
29866 /*
29867  *    Function: sd_failfast_flushq
29868  *
29869  * Description: Take all bp's on the wait queue that have B_FAILFAST set
29870  *		in b_flags and move them onto the failfast queue, then kick
29871  *		off a thread to return all bp's on the failfast queue to
29872  *		their owners with an error set.
29873  *
29874  *   Arguments: un - pointer to the soft state struct for the instance.
29875  *
29876  *     Context: may execute in interrupt context.
29877  */
29878 
29879 static void
29880 sd_failfast_flushq(struct sd_lun *un)
29881 {
29882 	struct buf *bp;
29883 	struct buf *next_waitq_bp;
29884 	struct buf *prev_waitq_bp = NULL;
29885 
29886 	ASSERT(un != NULL);
29887 	ASSERT(mutex_owned(SD_MUTEX(un)));
29888 	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
29889 	ASSERT(un->un_failfast_bp == NULL);
29890 
29891 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
29892 	    "sd_failfast_flushq: entry: un:0x%p\n", un);
29893 
29894 	/*
29895 	 * Check if we should flush all bufs when entering failfast state, or
29896 	 * just those with B_FAILFAST set.
29897 	 */
29898 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
29899 		/*
29900 		 * Move *all* bp's on the wait queue to the failfast flush
29901 		 * queue, including those that do NOT have B_FAILFAST set.
29902 		 */
29903 		if (un->un_failfast_headp == NULL) {
29904 			ASSERT(un->un_failfast_tailp == NULL);
29905 			un->un_failfast_headp = un->un_waitq_headp;
29906 		} else {
29907 			ASSERT(un->un_failfast_tailp != NULL);
29908 			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
29909 		}
29910 
29911 		un->un_failfast_tailp = un->un_waitq_tailp;
29912 
29913 		/* update kstat for each bp moved out of the waitq */
29914 		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
29915 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
29916 		}
29917 
29918 		/* empty the waitq */
29919 		un->un_waitq_headp = un->un_waitq_tailp = NULL;
29920 
29921 	} else {
29922 		/*
29923 		 * Go thru the wait queue, pick off all entries with
29924 		 * B_FAILFAST set, and move these onto the failfast queue.
29925 		 */
29926 		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
29927 			/*
29928 			 * Save the pointer to the next bp on the wait queue,
29929 			 * so we get to it on the next iteration of this loop.
29930 			 */
29931 			next_waitq_bp = bp->av_forw;
29932 
29933 			/*
29934 			 * If this bp from the wait queue does NOT have
29935 			 * B_FAILFAST set, just move on to the next element
29936 			 * in the wait queue. Note, this is the only place
29937 			 * where it is correct to set prev_waitq_bp.
29938 			 */
29939 			if ((bp->b_flags & B_FAILFAST) == 0) {
29940 				prev_waitq_bp = bp;
29941 				continue;
29942 			}
29943 
29944 			/*
29945 			 * Remove the bp from the wait queue.
29946 			 */
29947 			if (bp == un->un_waitq_headp) {
29948 				/* The bp is the first element of the waitq. */
29949 				un->un_waitq_headp = next_waitq_bp;
29950 				if (un->un_waitq_headp == NULL) {
29951 					/* The wait queue is now empty */
29952 					un->un_waitq_tailp = NULL;
29953 				}
29954 			} else {
29955 				/*
29956 				 * The bp is either somewhere in the middle
29957 				 * or at the end of the wait queue.
29958 				 */
29959 				ASSERT(un->un_waitq_headp != NULL);
29960 				ASSERT(prev_waitq_bp != NULL);
29961 				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
29962 				    == 0);
29963 				if (bp == un->un_waitq_tailp) {
29964 					/* bp is the last entry on the waitq. */
29965 					ASSERT(next_waitq_bp == NULL);
29966 					un->un_waitq_tailp = prev_waitq_bp;
29967 				}
29968 				prev_waitq_bp->av_forw = next_waitq_bp;
29969 			}
29970 			bp->av_forw = NULL;
29971 
29972 			/*
29973 			 * update kstat since the bp is moved out of
29974 			 * the waitq
29975 			 */
29976 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
29977 
29978 			/*
29979 			 * Now put the bp onto the failfast queue.
29980 			 */
29981 			if (un->un_failfast_headp == NULL) {
29982 				/* failfast queue is currently empty */
29983 				ASSERT(un->un_failfast_tailp == NULL);
29984 				un->un_failfast_headp =
29985 				    un->un_failfast_tailp = bp;
29986 			} else {
29987 				/* Add the bp to the end of the failfast q */
29988 				ASSERT(un->un_failfast_tailp != NULL);
29989 				ASSERT(un->un_failfast_tailp->b_flags &
29990 				    B_FAILFAST);
29991 				un->un_failfast_tailp->av_forw = bp;
29992 				un->un_failfast_tailp = bp;
29993 			}
29994 		}
29995 	}
29996 
29997 	/*
29998 	 * Now return all bp's on the failfast queue to their owners.
29999 	 */
30000 	while ((bp = un->un_failfast_headp) != NULL) {
30001 
30002 		un->un_failfast_headp = bp->av_forw;
30003 		if (un->un_failfast_headp == NULL) {
30004 			un->un_failfast_tailp = NULL;
30005 		}
30006 
30007 		/*
30008 		 * We want to return the bp with a failure error code, but
30009 		 * we do not want a call to sd_start_cmds() to occur here,
30010 		 * so use sd_return_failed_command_no_restart() instead of
30011 		 * sd_return_failed_command().
30012 		 */
30013 		sd_return_failed_command_no_restart(un, bp, EIO);
30014 	}
30015 
30016 	/* Flush the xbuf queues if required. */
30017 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
30018 		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
30019 	}
30020 
30021 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
30022 	    "sd_failfast_flushq: exit: un:0x%p\n", un);
30023 }
30024 
30025 
30026 /*
30027  *    Function: sd_failfast_flushq_callback
30028  *
30029  * Description: Return TRUE if the given bp meets the criteria for failfast
30030  *		flushing. Used with ddi_xbuf_flushq(9F).
30031  *
30032  *   Arguments: bp - ptr to buf struct to be examined.
30033  *
30034  *     Context: Any
30035  */
30036 
30037 static int
30038 sd_failfast_flushq_callback(struct buf *bp)
30039 {
30040 	/*
30041 	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
30042 	 * state is entered; OR (2) the given bp has B_FAILFAST set.
30043 	 */
30044 	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
30045 	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
30046 }
30047 
30048 
30049 
30050 #if defined(__i386) || defined(__amd64)
30051 /*
30052  * Function: sd_setup_next_xfer
30053  *
30054  * Description: Prepare next I/O operation using DMA_PARTIAL
30055  *
30056  */
30057 
30058 static int
30059 sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
30060     struct scsi_pkt *pkt, struct sd_xbuf *xp)
30061 {
30062 	ssize_t	num_blks_not_xfered;
30063 	daddr_t	strt_blk_num;
30064 	ssize_t	bytes_not_xfered;
30065 	int	rval;
30066 
30067 	ASSERT(pkt->pkt_resid == 0);
30068 
30069 	/*
30070 	 * Calculate next block number and amount to be transferred.
30071 	 *
30072 	 * How much data NOT transfered to the HBA yet.
30073 	 */
30074 	bytes_not_xfered = xp->xb_dma_resid;
30075 
30076 	/*
30077 	 * figure how many blocks NOT transfered to the HBA yet.
30078 	 */
30079 	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
30080 
30081 	/*
30082 	 * set starting block number to the end of what WAS transfered.
30083 	 */
30084 	strt_blk_num = xp->xb_blkno +
30085 	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
30086 
30087 	/*
30088 	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
30089 	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
30090 	 * the disk mutex here.
30091 	 */
30092 	rval = sd_setup_next_rw_pkt(un, pkt, bp,
30093 	    strt_blk_num, num_blks_not_xfered);
30094 
30095 	if (rval == 0) {
30096 
30097 		/*
30098 		 * Success.
30099 		 *
30100 		 * Adjust things if there are still more blocks to be
30101 		 * transfered.
30102 		 */
30103 		xp->xb_dma_resid = pkt->pkt_resid;
30104 		pkt->pkt_resid = 0;
30105 
30106 		return (1);
30107 	}
30108 
30109 	/*
30110 	 * There's really only one possible return value from
30111 	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
30112 	 * returns NULL.
30113 	 */
30114 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
30115 
30116 	bp->b_resid = bp->b_bcount;
30117 	bp->b_flags |= B_ERROR;
30118 
30119 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
30120 	    "Error setting up next portion of DMA transfer\n");
30121 
30122 	return (0);
30123 }
30124 #endif
30125 
30126 /*
30127  *    Function: sd_panic_for_res_conflict
30128  *
30129  * Description: Call panic with a string formated with "Reservation Conflict"
30130  *		and a human readable identifier indicating the SD instance
30131  *		that experienced the reservation conflict.
30132  *
30133  *   Arguments: un - pointer to the soft state struct for the instance.
30134  *
30135  *     Context: may execute in interrupt context.
30136  */
30137 
30138 #define	SD_RESV_CONFLICT_FMT_LEN 40
30139 void
30140 sd_panic_for_res_conflict(struct sd_lun *un)
30141 {
30142 	char panic_str[SD_RESV_CONFLICT_FMT_LEN+MAXPATHLEN];
30143 	char path_str[MAXPATHLEN];
30144 
30145 	(void) snprintf(panic_str, sizeof (panic_str),
30146 	    "Reservation Conflict\nDisk: %s",
30147 	    ddi_pathname(SD_DEVINFO(un), path_str));
30148 
30149 	panic(panic_str);
30150 }
30151 
30152 /*
30153  * Note: The following sd_faultinjection_ioctl( ) routines implement
30154  * driver support for handling fault injection for error analysis
30155  * causing faults in multiple layers of the driver.
30156  *
30157  */
30158 
30159 #ifdef SD_FAULT_INJECTION
30160 static uint_t   sd_fault_injection_on = 0;
30161 
30162 /*
30163  *    Function: sd_faultinjection_ioctl()
30164  *
30165  * Description: This routine is the driver entry point for handling
30166  *              faultinjection ioctls to inject errors into the
30167  *              layer model
30168  *
30169  *   Arguments: cmd	- the ioctl cmd recieved
30170  *		arg	- the arguments from user and returns
30171  */
30172 
30173 static void
30174 sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un) {
30175 
30176 	uint_t i;
30177 	uint_t rval;
30178 
30179 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
30180 
30181 	mutex_enter(SD_MUTEX(un));
30182 
30183 	switch (cmd) {
30184 	case SDIOCRUN:
30185 		/* Allow pushed faults to be injected */
30186 		SD_INFO(SD_LOG_SDTEST, un,
30187 		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
30188 
30189 		sd_fault_injection_on = 1;
30190 
30191 		SD_INFO(SD_LOG_IOERR, un,
30192 		    "sd_faultinjection_ioctl: run finished\n");
30193 		break;
30194 
30195 	case SDIOCSTART:
30196 		/* Start Injection Session */
30197 		SD_INFO(SD_LOG_SDTEST, un,
30198 		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
30199 
30200 		sd_fault_injection_on = 0;
30201 		un->sd_injection_mask = 0xFFFFFFFF;
30202 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30203 			un->sd_fi_fifo_pkt[i] = NULL;
30204 			un->sd_fi_fifo_xb[i] = NULL;
30205 			un->sd_fi_fifo_un[i] = NULL;
30206 			un->sd_fi_fifo_arq[i] = NULL;
30207 		}
30208 		un->sd_fi_fifo_start = 0;
30209 		un->sd_fi_fifo_end = 0;
30210 
30211 		mutex_enter(&(un->un_fi_mutex));
30212 		un->sd_fi_log[0] = '\0';
30213 		un->sd_fi_buf_len = 0;
30214 		mutex_exit(&(un->un_fi_mutex));
30215 
30216 		SD_INFO(SD_LOG_IOERR, un,
30217 		    "sd_faultinjection_ioctl: start finished\n");
30218 		break;
30219 
30220 	case SDIOCSTOP:
30221 		/* Stop Injection Session */
30222 		SD_INFO(SD_LOG_SDTEST, un,
30223 		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
30224 		sd_fault_injection_on = 0;
30225 		un->sd_injection_mask = 0x0;
30226 
30227 		/* Empty stray or unuseds structs from fifo */
30228 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30229 			if (un->sd_fi_fifo_pkt[i] != NULL) {
30230 				kmem_free(un->sd_fi_fifo_pkt[i],
30231 				    sizeof (struct sd_fi_pkt));
30232 			}
30233 			if (un->sd_fi_fifo_xb[i] != NULL) {
30234 				kmem_free(un->sd_fi_fifo_xb[i],
30235 				    sizeof (struct sd_fi_xb));
30236 			}
30237 			if (un->sd_fi_fifo_un[i] != NULL) {
30238 				kmem_free(un->sd_fi_fifo_un[i],
30239 				    sizeof (struct sd_fi_un));
30240 			}
30241 			if (un->sd_fi_fifo_arq[i] != NULL) {
30242 				kmem_free(un->sd_fi_fifo_arq[i],
30243 				    sizeof (struct sd_fi_arq));
30244 			}
30245 			un->sd_fi_fifo_pkt[i] = NULL;
30246 			un->sd_fi_fifo_un[i] = NULL;
30247 			un->sd_fi_fifo_xb[i] = NULL;
30248 			un->sd_fi_fifo_arq[i] = NULL;
30249 		}
30250 		un->sd_fi_fifo_start = 0;
30251 		un->sd_fi_fifo_end = 0;
30252 
30253 		SD_INFO(SD_LOG_IOERR, un,
30254 		    "sd_faultinjection_ioctl: stop finished\n");
30255 		break;
30256 
30257 	case SDIOCINSERTPKT:
30258 		/* Store a packet struct to be pushed onto fifo */
30259 		SD_INFO(SD_LOG_SDTEST, un,
30260 		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
30261 
30262 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30263 
30264 		sd_fault_injection_on = 0;
30265 
30266 		/* No more that SD_FI_MAX_ERROR allowed in Queue */
30267 		if (un->sd_fi_fifo_pkt[i] != NULL) {
30268 			kmem_free(un->sd_fi_fifo_pkt[i],
30269 			    sizeof (struct sd_fi_pkt));
30270 		}
30271 		if (arg != NULL) {
30272 			un->sd_fi_fifo_pkt[i] =
30273 			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
30274 			if (un->sd_fi_fifo_pkt[i] == NULL) {
30275 				/* Alloc failed don't store anything */
30276 				break;
30277 			}
30278 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
30279 			    sizeof (struct sd_fi_pkt), 0);
30280 			if (rval == -1) {
30281 				kmem_free(un->sd_fi_fifo_pkt[i],
30282 				    sizeof (struct sd_fi_pkt));
30283 				un->sd_fi_fifo_pkt[i] = NULL;
30284 			}
30285 		} else {
30286 			SD_INFO(SD_LOG_IOERR, un,
30287 			    "sd_faultinjection_ioctl: pkt null\n");
30288 		}
30289 		break;
30290 
30291 	case SDIOCINSERTXB:
30292 		/* Store a xb struct to be pushed onto fifo */
30293 		SD_INFO(SD_LOG_SDTEST, un,
30294 		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
30295 
30296 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30297 
30298 		sd_fault_injection_on = 0;
30299 
30300 		if (un->sd_fi_fifo_xb[i] != NULL) {
30301 			kmem_free(un->sd_fi_fifo_xb[i],
30302 			    sizeof (struct sd_fi_xb));
30303 			un->sd_fi_fifo_xb[i] = NULL;
30304 		}
30305 		if (arg != NULL) {
30306 			un->sd_fi_fifo_xb[i] =
30307 			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
30308 			if (un->sd_fi_fifo_xb[i] == NULL) {
30309 				/* Alloc failed don't store anything */
30310 				break;
30311 			}
30312 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
30313 			    sizeof (struct sd_fi_xb), 0);
30314 
30315 			if (rval == -1) {
30316 				kmem_free(un->sd_fi_fifo_xb[i],
30317 				    sizeof (struct sd_fi_xb));
30318 				un->sd_fi_fifo_xb[i] = NULL;
30319 			}
30320 		} else {
30321 			SD_INFO(SD_LOG_IOERR, un,
30322 			    "sd_faultinjection_ioctl: xb null\n");
30323 		}
30324 		break;
30325 
30326 	case SDIOCINSERTUN:
30327 		/* Store a un struct to be pushed onto fifo */
30328 		SD_INFO(SD_LOG_SDTEST, un,
30329 		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
30330 
30331 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30332 
30333 		sd_fault_injection_on = 0;
30334 
30335 		if (un->sd_fi_fifo_un[i] != NULL) {
30336 			kmem_free(un->sd_fi_fifo_un[i],
30337 			    sizeof (struct sd_fi_un));
30338 			un->sd_fi_fifo_un[i] = NULL;
30339 		}
30340 		if (arg != NULL) {
30341 			un->sd_fi_fifo_un[i] =
30342 			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
30343 			if (un->sd_fi_fifo_un[i] == NULL) {
30344 				/* Alloc failed don't store anything */
30345 				break;
30346 			}
30347 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
30348 			    sizeof (struct sd_fi_un), 0);
30349 			if (rval == -1) {
30350 				kmem_free(un->sd_fi_fifo_un[i],
30351 				    sizeof (struct sd_fi_un));
30352 				un->sd_fi_fifo_un[i] = NULL;
30353 			}
30354 
30355 		} else {
30356 			SD_INFO(SD_LOG_IOERR, un,
30357 			    "sd_faultinjection_ioctl: un null\n");
30358 		}
30359 
30360 		break;
30361 
30362 	case SDIOCINSERTARQ:
30363 		/* Store a arq struct to be pushed onto fifo */
30364 		SD_INFO(SD_LOG_SDTEST, un,
30365 		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
30366 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30367 
30368 		sd_fault_injection_on = 0;
30369 
30370 		if (un->sd_fi_fifo_arq[i] != NULL) {
30371 			kmem_free(un->sd_fi_fifo_arq[i],
30372 			    sizeof (struct sd_fi_arq));
30373 			un->sd_fi_fifo_arq[i] = NULL;
30374 		}
30375 		if (arg != NULL) {
30376 			un->sd_fi_fifo_arq[i] =
30377 			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
30378 			if (un->sd_fi_fifo_arq[i] == NULL) {
30379 				/* Alloc failed don't store anything */
30380 				break;
30381 			}
30382 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
30383 			    sizeof (struct sd_fi_arq), 0);
30384 			if (rval == -1) {
30385 				kmem_free(un->sd_fi_fifo_arq[i],
30386 				    sizeof (struct sd_fi_arq));
30387 				un->sd_fi_fifo_arq[i] = NULL;
30388 			}
30389 
30390 		} else {
30391 			SD_INFO(SD_LOG_IOERR, un,
30392 			    "sd_faultinjection_ioctl: arq null\n");
30393 		}
30394 
30395 		break;
30396 
30397 	case SDIOCPUSH:
30398 		/* Push stored xb, pkt, un, and arq onto fifo */
30399 		sd_fault_injection_on = 0;
30400 
30401 		if (arg != NULL) {
30402 			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
30403 			if (rval != -1 &&
30404 			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30405 				un->sd_fi_fifo_end += i;
30406 			}
30407 		} else {
30408 			SD_INFO(SD_LOG_IOERR, un,
30409 			    "sd_faultinjection_ioctl: push arg null\n");
30410 			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30411 				un->sd_fi_fifo_end++;
30412 			}
30413 		}
30414 		SD_INFO(SD_LOG_IOERR, un,
30415 		    "sd_faultinjection_ioctl: push to end=%d\n",
30416 		    un->sd_fi_fifo_end);
30417 		break;
30418 
30419 	case SDIOCRETRIEVE:
30420 		/* Return buffer of log from Injection session */
30421 		SD_INFO(SD_LOG_SDTEST, un,
30422 		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
30423 
30424 		sd_fault_injection_on = 0;
30425 
30426 		mutex_enter(&(un->un_fi_mutex));
30427 		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
30428 		    un->sd_fi_buf_len+1, 0);
30429 		mutex_exit(&(un->un_fi_mutex));
30430 
30431 		if (rval == -1) {
30432 			/*
30433 			 * arg is possibly invalid setting
30434 			 * it to NULL for return
30435 			 */
30436 			arg = NULL;
30437 		}
30438 		break;
30439 	}
30440 
30441 	mutex_exit(SD_MUTEX(un));
30442 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl:"
30443 			    " exit\n");
30444 }
30445 
30446 
30447 /*
30448  *    Function: sd_injection_log()
30449  *
30450  * Description: This routine adds buff to the already existing injection log
30451  *              for retrieval via faultinjection_ioctl for use in fault
30452  *              detection and recovery
30453  *
30454  *   Arguments: buf - the string to add to the log
30455  */
30456 
30457 static void
30458 sd_injection_log(char *buf, struct sd_lun *un)
30459 {
30460 	uint_t len;
30461 
30462 	ASSERT(un != NULL);
30463 	ASSERT(buf != NULL);
30464 
30465 	mutex_enter(&(un->un_fi_mutex));
30466 
30467 	len = min(strlen(buf), 255);
30468 	/* Add logged value to Injection log to be returned later */
30469 	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
30470 		uint_t	offset = strlen((char *)un->sd_fi_log);
30471 		char *destp = (char *)un->sd_fi_log + offset;
30472 		int i;
30473 		for (i = 0; i < len; i++) {
30474 			*destp++ = *buf++;
30475 		}
30476 		un->sd_fi_buf_len += len;
30477 		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
30478 	}
30479 
30480 	mutex_exit(&(un->un_fi_mutex));
30481 }
30482 
30483 
30484 /*
30485  *    Function: sd_faultinjection()
30486  *
30487  * Description: This routine takes the pkt and changes its
30488  *		content based on error injection scenerio.
30489  *
30490  *   Arguments: pktp	- packet to be changed
30491  */
30492 
30493 static void
30494 sd_faultinjection(struct scsi_pkt *pktp)
30495 {
30496 	uint_t i;
30497 	struct sd_fi_pkt *fi_pkt;
30498 	struct sd_fi_xb *fi_xb;
30499 	struct sd_fi_un *fi_un;
30500 	struct sd_fi_arq *fi_arq;
30501 	struct buf *bp;
30502 	struct sd_xbuf *xb;
30503 	struct sd_lun *un;
30504 
30505 	ASSERT(pktp != NULL);
30506 
30507 	/* pull bp xb and un from pktp */
30508 	bp = (struct buf *)pktp->pkt_private;
30509 	xb = SD_GET_XBUF(bp);
30510 	un = SD_GET_UN(bp);
30511 
30512 	ASSERT(un != NULL);
30513 
30514 	mutex_enter(SD_MUTEX(un));
30515 
30516 	SD_TRACE(SD_LOG_SDTEST, un,
30517 	    "sd_faultinjection: entry Injection from sdintr\n");
30518 
30519 	/* if injection is off return */
30520 	if (sd_fault_injection_on == 0 ||
30521 		un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
30522 		mutex_exit(SD_MUTEX(un));
30523 		return;
30524 	}
30525 
30526 
30527 	/* take next set off fifo */
30528 	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
30529 
30530 	fi_pkt = un->sd_fi_fifo_pkt[i];
30531 	fi_xb = un->sd_fi_fifo_xb[i];
30532 	fi_un = un->sd_fi_fifo_un[i];
30533 	fi_arq = un->sd_fi_fifo_arq[i];
30534 
30535 
30536 	/* set variables accordingly */
30537 	/* set pkt if it was on fifo */
30538 	if (fi_pkt != NULL) {
30539 		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
30540 		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
30541 		SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
30542 		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
30543 		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
30544 		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
30545 
30546 	}
30547 
30548 	/* set xb if it was on fifo */
30549 	if (fi_xb != NULL) {
30550 		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
30551 		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
30552 		SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
30553 		SD_CONDSET(xb, xb, xb_victim_retry_count,
30554 		    "xb_victim_retry_count");
30555 		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
30556 		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
30557 		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
30558 
30559 		/* copy in block data from sense */
30560 		if (fi_xb->xb_sense_data[0] != -1) {
30561 			bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
30562 			    SENSE_LENGTH);
30563 		}
30564 
30565 		/* copy in extended sense codes */
30566 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_code,
30567 		    "es_code");
30568 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_key,
30569 		    "es_key");
30570 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_add_code,
30571 		    "es_add_code");
30572 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb,
30573 		    es_qual_code, "es_qual_code");
30574 	}
30575 
30576 	/* set un if it was on fifo */
30577 	if (fi_un != NULL) {
30578 		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
30579 		SD_CONDSET(un, un, un_ctype, "un_ctype");
30580 		SD_CONDSET(un, un, un_reset_retry_count,
30581 		    "un_reset_retry_count");
30582 		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
30583 		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
30584 		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
30585 		SD_CONDSET(un, un, un_f_geometry_is_valid,
30586 		    "un_f_geometry_is_valid");
30587 		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
30588 		    "un_f_allow_bus_device_reset");
30589 		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
30590 
30591 	}
30592 
30593 	/* copy in auto request sense if it was on fifo */
30594 	if (fi_arq != NULL) {
30595 		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
30596 	}
30597 
30598 	/* free structs */
30599 	if (un->sd_fi_fifo_pkt[i] != NULL) {
30600 		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
30601 	}
30602 	if (un->sd_fi_fifo_xb[i] != NULL) {
30603 		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
30604 	}
30605 	if (un->sd_fi_fifo_un[i] != NULL) {
30606 		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
30607 	}
30608 	if (un->sd_fi_fifo_arq[i] != NULL) {
30609 		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
30610 	}
30611 
30612 	/*
30613 	 * kmem_free does not gurantee to set to NULL
30614 	 * since we uses these to determine if we set
30615 	 * values or not lets confirm they are always
30616 	 * NULL after free
30617 	 */
30618 	un->sd_fi_fifo_pkt[i] = NULL;
30619 	un->sd_fi_fifo_un[i] = NULL;
30620 	un->sd_fi_fifo_xb[i] = NULL;
30621 	un->sd_fi_fifo_arq[i] = NULL;
30622 
30623 	un->sd_fi_fifo_start++;
30624 
30625 	mutex_exit(SD_MUTEX(un));
30626 
30627 	SD_TRACE(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
30628 }
30629 
30630 #endif /* SD_FAULT_INJECTION */
30631 
30632 /*
30633  * This routine is invoked in sd_unit_attach(). Before calling it, the
30634  * properties in conf file should be processed already, and "hotpluggable"
30635  * property was processed also.
30636  *
30637  * The sd driver distinguishes 3 different type of devices: removable media,
30638  * non-removable media, and hotpluggable. Below the differences are defined:
30639  *
30640  * 1. Device ID
30641  *
30642  *     The device ID of a device is used to identify this device. Refer to
30643  *     ddi_devid_register(9F).
30644  *
30645  *     For a non-removable media disk device which can provide 0x80 or 0x83
30646  *     VPD page (refer to INQUIRY command of SCSI SPC specification), a unique
30647  *     device ID is created to identify this device. For other non-removable
30648  *     media devices, a default device ID is created only if this device has
30649  *     at least 2 alter cylinders. Otherwise, this device has no devid.
30650  *
30651  *     -------------------------------------------------------
30652  *     removable media   hotpluggable  | Can Have Device ID
30653  *     -------------------------------------------------------
30654  *         false             false     |     Yes
30655  *         false             true      |     Yes
30656  *         true                x       |     No
30657  *     ------------------------------------------------------
30658  *
30659  *
30660  * 2. SCSI group 4 commands
30661  *
30662  *     In SCSI specs, only some commands in group 4 command set can use
30663  *     8-byte addresses that can be used to access >2TB storage spaces.
30664  *     Other commands have no such capability. Without supporting group4,
30665  *     it is impossible to make full use of storage spaces of a disk with
30666  *     capacity larger than 2TB.
30667  *
30668  *     -----------------------------------------------
30669  *     removable media   hotpluggable   LP64  |  Group
30670  *     -----------------------------------------------
30671  *           false          false       false |   1
30672  *           false          false       true  |   4
30673  *           false          true        false |   1
30674  *           false          true        true  |   4
30675  *           true             x           x   |   5
30676  *     -----------------------------------------------
30677  *
30678  *
30679  * 3. Check for VTOC Label
30680  *
30681  *     If a direct-access disk has no EFI label, sd will check if it has a
30682  *     valid VTOC label. Now, sd also does that check for removable media
30683  *     and hotpluggable devices.
30684  *
30685  *     --------------------------------------------------------------
30686  *     Direct-Access   removable media    hotpluggable |  Check Label
30687  *     -------------------------------------------------------------
30688  *         false          false           false        |   No
30689  *         false          false           true         |   No
30690  *         false          true            false        |   Yes
30691  *         false          true            true         |   Yes
30692  *         true            x                x          |   Yes
30693  *     --------------------------------------------------------------
30694  *
30695  *
30696  * 4. Building default VTOC label
30697  *
30698  *     As section 3 says, sd checks if some kinds of devices have VTOC label.
30699  *     If those devices have no valid VTOC label, sd(7d) will attempt to
30700  *     create default VTOC for them. Currently sd creates default VTOC label
30701  *     for all devices on x86 platform (VTOC_16), but only for removable
30702  *     media devices on SPARC (VTOC_8).
30703  *
30704  *     -----------------------------------------------------------
30705  *       removable media hotpluggable platform   |   Default Label
30706  *     -----------------------------------------------------------
30707  *             false          false    sparc     |     No
30708  *             false          true      x86      |     Yes
30709  *             false          true     sparc     |     Yes
30710  *             true             x        x       |     Yes
30711  *     ----------------------------------------------------------
30712  *
30713  *
30714  * 5. Supported blocksizes of target devices
30715  *
30716  *     Sd supports non-512-byte blocksize for removable media devices only.
30717  *     For other devices, only 512-byte blocksize is supported. This may be
30718  *     changed in near future because some RAID devices require non-512-byte
30719  *     blocksize
30720  *
30721  *     -----------------------------------------------------------
30722  *     removable media    hotpluggable    | non-512-byte blocksize
30723  *     -----------------------------------------------------------
30724  *           false          false         |   No
30725  *           false          true          |   No
30726  *           true             x           |   Yes
30727  *     -----------------------------------------------------------
30728  *
30729  *
30730  * 6. Automatic mount & unmount (i.e. vold)
30731  *
30732  *     Sd(7d) driver provides DKIOCREMOVABLE ioctl. This ioctl is used to query
30733  *     if a device is removable media device. It return 1 for removable media
30734  *     devices, and 0 for others.
30735  *
30736  *     Vold treats a device as removable one only if DKIOREMOVABLE returns 1.
30737  *     And it does automounting only for removable media devices. In order to
30738  *     preserve users' experience and let vold continue to do automounting for
30739  *     USB disk devices, DKIOCREMOVABLE ioctl still returns 1 for USB/1394 disk
30740  *     devices.
30741  *
30742  *      ------------------------------------------------------
30743  *       removable media    hotpluggable   |  automatic mount
30744  *      ------------------------------------------------------
30745  *             false          false        |   No
30746  *             false          true         |   Yes
30747  *             true             x          |   Yes
30748  *      ------------------------------------------------------
30749  *
30750  *
30751  * 7. fdisk partition management
30752  *
30753  *     Fdisk is traditional partition method on x86 platform. Sd(7d) driver
30754  *     just supports fdisk partitions on x86 platform. On sparc platform, sd
30755  *     doesn't support fdisk partitions at all. Note: pcfs(7fs) can recognize
30756  *     fdisk partitions on both x86 and SPARC platform.
30757  *
30758  *     -----------------------------------------------------------
30759  *       platform   removable media  USB/1394  |  fdisk supported
30760  *     -----------------------------------------------------------
30761  *        x86         X               X        |       true
30762  *     ------------------------------------------------------------
30763  *        sparc       X               X        |       false
30764  *     ------------------------------------------------------------
30765  *
30766  *
30767  * 8. MBOOT/MBR
30768  *
30769  *     Although sd(7d) doesn't support fdisk on SPARC platform, it does support
30770  *     read/write mboot for removable media devices on sparc platform.
30771  *
30772  *     -----------------------------------------------------------
30773  *       platform   removable media  USB/1394  |  mboot supported
30774  *     -----------------------------------------------------------
30775  *        x86         X               X        |       true
30776  *     ------------------------------------------------------------
30777  *        sparc      false           false     |       false
30778  *        sparc      false           true      |       true
30779  *        sparc      true            false     |       true
30780  *        sparc      true            true      |       true
30781  *     ------------------------------------------------------------
30782  *
30783  *
30784  * 9.  error handling during opening device
30785  *
30786  *     If failed to open a disk device, an errno is returned. For some kinds
30787  *     of errors, different errno is returned depending on if this device is
30788  *     a removable media device. This brings USB/1394 hard disks in line with
30789  *     expected hard disk behavior. It is not expected that this breaks any
30790  *     application.
30791  *
30792  *     ------------------------------------------------------
30793  *       removable media    hotpluggable   |  errno
30794  *     ------------------------------------------------------
30795  *             false          false        |   EIO
30796  *             false          true         |   EIO
30797  *             true             x          |   ENXIO
30798  *     ------------------------------------------------------
30799  *
30800  *
30801  * 10. off-by-1 workaround (bug 1175930, and 4996920) (x86 only)
30802  *
30803  *     [ this is a bit of very ugly history, soon to be removed ]
30804  *
30805  *     SCSI READ_CAPACITY command returns the last valid logical block number
30806  *     which starts from 0. So real capacity is larger than the returned
30807  *     value by 1. However, because scdk.c (which was EOL'ed) directly used
30808  *     the logical block number as capacity of disk devices, off-by-1 work-
30809  *     around was applied. This workaround causes fixed SCSI disk to loss a
30810  *     sector on x86 platform, and precludes exchanging fixed hard disks
30811  *     between sparc and x86.
30812  *
30813  *     ------------------------------------------------------
30814  *       removable media    hotplug        |   Off-by-1 works
30815  *     -------------------------------------------------------
30816  *             false          false        |     Yes
30817  *             false          true         |     No
30818  *             true           false        |     No
30819  *             true           true         |     No
30820  *     ------------------------------------------------------
30821  *
30822  *
30823  * 11. ioctls: DKIOCEJECT, CDROMEJECT
30824  *
30825  *     These IOCTLs are applicable only to removable media devices.
30826  *
30827  *     -----------------------------------------------------------
30828  *       removable media    hotpluggable   |DKIOCEJECT, CDROMEJECT
30829  *     -----------------------------------------------------------
30830  *             false          false        |     No
30831  *             false          true         |     No
30832  *             true            x           |     Yes
30833  *     -----------------------------------------------------------
30834  *
30835  *
30836  * 12. Kstats for partitions
30837  *
30838  *     sd creates partition kstat for non-removable media devices. USB and
30839  *     Firewire hard disks now have partition kstats
30840  *
30841  *      ------------------------------------------------------
30842  *       removable media    hotplugable    |   kstat
30843  *      ------------------------------------------------------
30844  *             false          false        |    Yes
30845  *             false          true         |    Yes
30846  *             true             x          |    No
30847  *       ------------------------------------------------------
30848  *
30849  *
30850  * 13. Removable media & hotpluggable properties
30851  *
30852  *     Sd driver creates a "removable-media" property for removable media
30853  *     devices. Parent nexus drivers create a "hotpluggable" property if
30854  *     it supports hotplugging.
30855  *
30856  *     ---------------------------------------------------------------------
30857  *     removable media   hotpluggable |  "removable-media"   " hotpluggable"
30858  *     ---------------------------------------------------------------------
30859  *       false            false       |    No                   No
30860  *       false            true        |    No                   Yes
30861  *       true             false       |    Yes                  No
30862  *       true             true        |    Yes                  Yes
30863  *     ---------------------------------------------------------------------
30864  *
30865  *
30866  * 14. Power Management
30867  *
30868  *     sd only power manages removable media devices or devices that support
30869  *     LOG_SENSE or have a "pm-capable" property  (PSARC/2002/250)
30870  *
30871  *     A parent nexus that supports hotplugging can also set "pm-capable"
30872  *     if the disk can be power managed.
30873  *
30874  *     ------------------------------------------------------------
30875  *       removable media hotpluggable pm-capable  |   power manage
30876  *     ------------------------------------------------------------
30877  *             false          false     false     |     No
30878  *             false          false     true      |     Yes
30879  *             false          true      false     |     No
30880  *             false          true      true      |     Yes
30881  *             true             x        x        |     Yes
30882  *     ------------------------------------------------------------
30883  *
30884  *      USB and firewire hard disks can now be power managed independently
30885  *      of the framebuffer
30886  *
30887  *
30888  * 15. Support for USB disks with capacity larger than 1TB
30889  *
30890  *     Currently, sd doesn't permit a fixed disk device with capacity
30891  *     larger than 1TB to be used in a 32-bit operating system environment.
30892  *     However, sd doesn't do that for removable media devices. Instead, it
30893  *     assumes that removable media devices cannot have a capacity larger
30894  *     than 1TB. Therefore, using those devices on 32-bit system is partially
30895  *     supported, which can cause some unexpected results.
30896  *
30897  *     ---------------------------------------------------------------------
30898  *       removable media    USB/1394 | Capacity > 1TB |   Used in 32-bit env
30899  *     ---------------------------------------------------------------------
30900  *             false          false  |   true         |     no
30901  *             false          true   |   true         |     no
30902  *             true           false  |   true         |     Yes
30903  *             true           true   |   true         |     Yes
30904  *     ---------------------------------------------------------------------
30905  *
30906  *
30907  * 16. Check write-protection at open time
30908  *
30909  *     When a removable media device is being opened for writing without NDELAY
30910  *     flag, sd will check if this device is writable. If attempting to open
30911  *     without NDELAY flag a write-protected device, this operation will abort.
30912  *
30913  *     ------------------------------------------------------------
30914  *       removable media    USB/1394   |   WP Check
30915  *     ------------------------------------------------------------
30916  *             false          false    |     No
30917  *             false          true     |     No
30918  *             true           false    |     Yes
30919  *             true           true     |     Yes
30920  *     ------------------------------------------------------------
30921  *
30922  *
30923  * 17. syslog when corrupted VTOC is encountered
30924  *
30925  *      Currently, if an invalid VTOC is encountered, sd only print syslog
30926  *      for fixed SCSI disks.
30927  *     ------------------------------------------------------------
30928  *       removable media    USB/1394   |   print syslog
30929  *     ------------------------------------------------------------
30930  *             false          false    |     Yes
30931  *             false          true     |     No
30932  *             true           false    |     No
30933  *             true           true     |     No
30934  *     ------------------------------------------------------------
30935  */
30936 static void
30937 sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi)
30938 {
30939 	int	pm_capable_prop;
30940 
30941 	ASSERT(un->un_sd);
30942 	ASSERT(un->un_sd->sd_inq);
30943 
30944 #if defined(_SUNOS_VTOC_16)
30945 	/*
30946 	 * For VTOC_16 devices, the default label will be created for all
30947 	 * devices. (see sd_build_default_label)
30948 	 */
30949 	un->un_f_default_vtoc_supported = TRUE;
30950 #endif
30951 
30952 	if (un->un_sd->sd_inq->inq_rmb) {
30953 		/*
30954 		 * The media of this device is removable. And for this kind
30955 		 * of devices, it is possible to change medium after openning
30956 		 * devices. Thus we should support this operation.
30957 		 */
30958 		un->un_f_has_removable_media = TRUE;
30959 
30960 #if defined(_SUNOS_VTOC_8)
30961 		/*
30962 		 * Note: currently, for VTOC_8 devices, default label is
30963 		 * created for removable and hotpluggable devices only.
30964 		 */
30965 		un->un_f_default_vtoc_supported = TRUE;
30966 #endif
30967 		/*
30968 		 * support non-512-byte blocksize of removable media devices
30969 		 */
30970 		un->un_f_non_devbsize_supported = TRUE;
30971 
30972 		/*
30973 		 * Assume that all removable media devices support DOOR_LOCK
30974 		 */
30975 		un->un_f_doorlock_supported = TRUE;
30976 
30977 		/*
30978 		 * For a removable media device, it is possible to be opened
30979 		 * with NDELAY flag when there is no media in drive, in this
30980 		 * case we don't care if device is writable. But if without
30981 		 * NDELAY flag, we need to check if media is write-protected.
30982 		 */
30983 		un->un_f_chk_wp_open = TRUE;
30984 
30985 		/*
30986 		 * need to start a SCSI watch thread to monitor media state,
30987 		 * when media is being inserted or ejected, notify syseventd.
30988 		 */
30989 		un->un_f_monitor_media_state = TRUE;
30990 
30991 		/*
30992 		 * Some devices don't support START_STOP_UNIT command.
30993 		 * Therefore, we'd better check if a device supports it
30994 		 * before sending it.
30995 		 */
30996 		un->un_f_check_start_stop = TRUE;
30997 
30998 		/*
30999 		 * support eject media ioctl:
31000 		 *		FDEJECT, DKIOCEJECT, CDROMEJECT
31001 		 */
31002 		un->un_f_eject_media_supported = TRUE;
31003 
31004 		/*
31005 		 * Because many removable-media devices don't support
31006 		 * LOG_SENSE, we couldn't use this command to check if
31007 		 * a removable media device support power-management.
31008 		 * We assume that they support power-management via
31009 		 * START_STOP_UNIT command and can be spun up and down
31010 		 * without limitations.
31011 		 */
31012 		un->un_f_pm_supported = TRUE;
31013 
31014 		/*
31015 		 * Need to create a zero length (Boolean) property
31016 		 * removable-media for the removable media devices.
31017 		 * Note that the return value of the property is not being
31018 		 * checked, since if unable to create the property
31019 		 * then do not want the attach to fail altogether. Consistent
31020 		 * with other property creation in attach.
31021 		 */
31022 		(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
31023 		    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
31024 
31025 	} else {
31026 		/*
31027 		 * create device ID for device
31028 		 */
31029 		un->un_f_devid_supported = TRUE;
31030 
31031 		/*
31032 		 * Spin up non-removable-media devices once it is attached
31033 		 */
31034 		un->un_f_attach_spinup = TRUE;
31035 
31036 		/*
31037 		 * According to SCSI specification, Sense data has two kinds of
31038 		 * format: fixed format, and descriptor format. At present, we
31039 		 * don't support descriptor format sense data for removable
31040 		 * media.
31041 		 */
31042 		if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
31043 			un->un_f_descr_format_supported = TRUE;
31044 		}
31045 
31046 		/*
31047 		 * kstats are created only for non-removable media devices.
31048 		 *
31049 		 * Set this in sd.conf to 0 in order to disable kstats.  The
31050 		 * default is 1, so they are enabled by default.
31051 		 */
31052 		un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
31053 		    SD_DEVINFO(un), DDI_PROP_DONTPASS,
31054 			"enable-partition-kstats", 1));
31055 
31056 		/*
31057 		 * Check if HBA has set the "pm-capable" property.
31058 		 * If "pm-capable" exists and is non-zero then we can
31059 		 * power manage the device without checking the start/stop
31060 		 * cycle count log sense page.
31061 		 *
31062 		 * If "pm-capable" exists and is SD_PM_CAPABLE_FALSE (0)
31063 		 * then we should not power manage the device.
31064 		 *
31065 		 * If "pm-capable" doesn't exist then pm_capable_prop will
31066 		 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case,
31067 		 * sd will check the start/stop cycle count log sense page
31068 		 * and power manage the device if the cycle count limit has
31069 		 * not been exceeded.
31070 		 */
31071 		pm_capable_prop = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
31072 		    DDI_PROP_DONTPASS, "pm-capable", SD_PM_CAPABLE_UNDEFINED);
31073 		if (pm_capable_prop == SD_PM_CAPABLE_UNDEFINED) {
31074 			un->un_f_log_sense_supported = TRUE;
31075 		} else {
31076 			/*
31077 			 * pm-capable property exists.
31078 			 *
31079 			 * Convert "TRUE" values for pm_capable_prop to
31080 			 * SD_PM_CAPABLE_TRUE (1) to make it easier to check
31081 			 * later. "TRUE" values are any values except
31082 			 * SD_PM_CAPABLE_FALSE (0) and
31083 			 * SD_PM_CAPABLE_UNDEFINED (-1)
31084 			 */
31085 			if (pm_capable_prop == SD_PM_CAPABLE_FALSE) {
31086 				un->un_f_log_sense_supported = FALSE;
31087 			} else {
31088 				un->un_f_pm_supported = TRUE;
31089 			}
31090 
31091 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
31092 			    "sd_unit_attach: un:0x%p pm-capable "
31093 			    "property set to %d.\n", un, un->un_f_pm_supported);
31094 		}
31095 	}
31096 
31097 	if (un->un_f_is_hotpluggable) {
31098 #if defined(_SUNOS_VTOC_8)
31099 		/*
31100 		 * Note: currently, for VTOC_8 devices, default label is
31101 		 * created for removable and hotpluggable devices only.
31102 		 */
31103 		un->un_f_default_vtoc_supported = TRUE;
31104 #endif
31105 
31106 		/*
31107 		 * Temporarily, let hotpluggable devices pretend to be
31108 		 * removable-media devices for vold.
31109 		 */
31110 		un->un_f_monitor_media_state = TRUE;
31111 
31112 		un->un_f_check_start_stop = TRUE;
31113 
31114 	}
31115 
31116 	/*
31117 	 * By default, only DIRECT ACCESS devices and CDs will have Sun
31118 	 * labels.
31119 	 */
31120 	if ((SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) ||
31121 	    (un->un_sd->sd_inq->inq_rmb)) {
31122 		/*
31123 		 * Direct access devices have disk label
31124 		 */
31125 		un->un_f_vtoc_label_supported = TRUE;
31126 	}
31127 
31128 	/*
31129 	 * Fdisk partitions are supported for all direct access devices on
31130 	 * x86 platform, and just for removable media and hotpluggable
31131 	 * devices on SPARC platform. Later, we will set the following flag
31132 	 * to FALSE if current device is not removable media or hotpluggable
31133 	 * device and if sd works on SAPRC platform.
31134 	 */
31135 	if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
31136 		un->un_f_mboot_supported = TRUE;
31137 	}
31138 
31139 	if (!un->un_f_is_hotpluggable &&
31140 	    !un->un_sd->sd_inq->inq_rmb) {
31141 
31142 #if defined(_SUNOS_VTOC_8)
31143 		/*
31144 		 * Don't support fdisk on fixed disk
31145 		 */
31146 		un->un_f_mboot_supported = FALSE;
31147 #endif
31148 
31149 		/*
31150 		 * Fixed disk support SYNC CACHE
31151 		 */
31152 		un->un_f_sync_cache_supported = TRUE;
31153 
31154 		/*
31155 		 * For fixed disk, if its VTOC is not valid, we will write
31156 		 * errlog into system log
31157 		 */
31158 		if (un->un_f_vtoc_label_supported)
31159 			un->un_f_vtoc_errlog_supported = TRUE;
31160 	}
31161 }
31162