xref: /titanic_50/usr/src/uts/common/io/scsi/targets/sd.c (revision 382dbd461c555f1c7e304a961fd0d4458d958ca2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * SCSI disk target driver.
30  */
31 #include <sys/scsi/scsi.h>
32 #include <sys/dkbad.h>
33 #include <sys/dklabel.h>
34 #include <sys/dkio.h>
35 #include <sys/fdio.h>
36 #include <sys/cdio.h>
37 #include <sys/mhd.h>
38 #include <sys/vtoc.h>
39 #include <sys/dktp/fdisk.h>
40 #include <sys/file.h>
41 #include <sys/stat.h>
42 #include <sys/kstat.h>
43 #include <sys/vtrace.h>
44 #include <sys/note.h>
45 #include <sys/thread.h>
46 #include <sys/proc.h>
47 #include <sys/efi_partition.h>
48 #include <sys/var.h>
49 #include <sys/aio_req.h>
50 
51 #ifdef __lock_lint
52 #define	_LP64
53 #define	__amd64
54 #endif
55 
56 #if (defined(__fibre))
57 /* Note: is there a leadville version of the following? */
58 #include <sys/fc4/fcal_linkapp.h>
59 #endif
60 #include <sys/taskq.h>
61 #include <sys/uuid.h>
62 #include <sys/byteorder.h>
63 #include <sys/sdt.h>
64 
65 #include "sd_xbuf.h"
66 
67 #include <sys/scsi/targets/sddef.h>
68 
69 
70 /*
71  * Loadable module info.
72  */
73 #if (defined(__fibre))
74 #define	SD_MODULE_NAME	"SCSI SSA/FCAL Disk Driver %I%"
75 char _depends_on[]	= "misc/scsi drv/fcp";
76 #else
77 #define	SD_MODULE_NAME	"SCSI Disk Driver %I%"
78 char _depends_on[]	= "misc/scsi";
79 #endif
80 
81 /*
82  * Define the interconnect type, to allow the driver to distinguish
83  * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
84  *
85  * This is really for backward compatability. In the future, the driver
86  * should actually check the "interconnect-type" property as reported by
87  * the HBA; however at present this property is not defined by all HBAs,
88  * so we will use this #define (1) to permit the driver to run in
89  * backward-compatability mode; and (2) to print a notification message
90  * if an FC HBA does not support the "interconnect-type" property.  The
91  * behavior of the driver will be to assume parallel SCSI behaviors unless
92  * the "interconnect-type" property is defined by the HBA **AND** has a
93  * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
94  * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
95  * Channel behaviors (as per the old ssd).  (Note that the
96  * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
97  * will result in the driver assuming parallel SCSI behaviors.)
98  *
99  * (see common/sys/scsi/impl/services.h)
100  *
101  * Note: For ssd semantics, don't use INTERCONNECT_FABRIC as the default
102  * since some FC HBAs may already support that, and there is some code in
103  * the driver that already looks for it.  Using INTERCONNECT_FABRIC as the
104  * default would confuse that code, and besides things should work fine
105  * anyways if the FC HBA already reports INTERCONNECT_FABRIC for the
106  * "interconnect_type" property.
107  */
108 #if (defined(__fibre))
109 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_FIBRE
110 #else
111 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
112 #endif
113 
114 /*
115  * The name of the driver, established from the module name in _init.
116  */
117 static	char *sd_label			= NULL;
118 
119 /*
120  * Driver name is unfortunately prefixed on some driver.conf properties.
121  */
122 #if (defined(__fibre))
123 #define	sd_max_xfer_size		ssd_max_xfer_size
124 #define	sd_config_list			ssd_config_list
125 static	char *sd_max_xfer_size		= "ssd_max_xfer_size";
126 static	char *sd_config_list		= "ssd-config-list";
127 #else
128 static	char *sd_max_xfer_size		= "sd_max_xfer_size";
129 static	char *sd_config_list		= "sd-config-list";
130 #endif
131 
132 /*
133  * Driver global variables
134  */
135 
136 #if (defined(__fibre))
137 /*
138  * These #defines are to avoid namespace collisions that occur because this
139  * code is currently used to compile two seperate driver modules: sd and ssd.
140  * All global variables need to be treated this way (even if declared static)
141  * in order to allow the debugger to resolve the names properly.
142  * It is anticipated that in the near future the ssd module will be obsoleted,
143  * at which time this namespace issue should go away.
144  */
145 #define	sd_state			ssd_state
146 #define	sd_io_time			ssd_io_time
147 #define	sd_failfast_enable		ssd_failfast_enable
148 #define	sd_ua_retry_count		ssd_ua_retry_count
149 #define	sd_report_pfa			ssd_report_pfa
150 #define	sd_max_throttle			ssd_max_throttle
151 #define	sd_min_throttle			ssd_min_throttle
152 #define	sd_rot_delay			ssd_rot_delay
153 
154 #define	sd_retry_on_reservation_conflict	\
155 					ssd_retry_on_reservation_conflict
156 #define	sd_reinstate_resv_delay		ssd_reinstate_resv_delay
157 #define	sd_resv_conflict_name		ssd_resv_conflict_name
158 
159 #define	sd_component_mask		ssd_component_mask
160 #define	sd_level_mask			ssd_level_mask
161 #define	sd_debug_un			ssd_debug_un
162 #define	sd_error_level			ssd_error_level
163 
164 #define	sd_xbuf_active_limit		ssd_xbuf_active_limit
165 #define	sd_xbuf_reserve_limit		ssd_xbuf_reserve_limit
166 
167 #define	sd_tr				ssd_tr
168 #define	sd_reset_throttle_timeout	ssd_reset_throttle_timeout
169 #define	sd_qfull_throttle_timeout	ssd_qfull_throttle_timeout
170 #define	sd_qfull_throttle_enable	ssd_qfull_throttle_enable
171 #define	sd_check_media_time		ssd_check_media_time
172 #define	sd_wait_cmds_complete		ssd_wait_cmds_complete
173 #define	sd_label_mutex			ssd_label_mutex
174 #define	sd_detach_mutex			ssd_detach_mutex
175 #define	sd_log_buf			ssd_log_buf
176 #define	sd_log_mutex			ssd_log_mutex
177 
178 #define	sd_disk_table			ssd_disk_table
179 #define	sd_disk_table_size		ssd_disk_table_size
180 #define	sd_sense_mutex			ssd_sense_mutex
181 #define	sd_cdbtab			ssd_cdbtab
182 
183 #define	sd_cb_ops			ssd_cb_ops
184 #define	sd_ops				ssd_ops
185 #define	sd_additional_codes		ssd_additional_codes
186 
187 #define	sd_minor_data			ssd_minor_data
188 #define	sd_minor_data_efi		ssd_minor_data_efi
189 
190 #define	sd_tq				ssd_tq
191 #define	sd_wmr_tq			ssd_wmr_tq
192 #define	sd_taskq_name			ssd_taskq_name
193 #define	sd_wmr_taskq_name		ssd_wmr_taskq_name
194 #define	sd_taskq_minalloc		ssd_taskq_minalloc
195 #define	sd_taskq_maxalloc		ssd_taskq_maxalloc
196 
197 #define	sd_dump_format_string		ssd_dump_format_string
198 
199 #define	sd_iostart_chain		ssd_iostart_chain
200 #define	sd_iodone_chain			ssd_iodone_chain
201 
202 #define	sd_pm_idletime			ssd_pm_idletime
203 
204 #define	sd_force_pm_supported		ssd_force_pm_supported
205 
206 #define	sd_dtype_optical_bind		ssd_dtype_optical_bind
207 
208 #endif
209 
210 
211 #ifdef	SDDEBUG
212 int	sd_force_pm_supported		= 0;
213 #endif	/* SDDEBUG */
214 
215 void *sd_state				= NULL;
216 int sd_io_time				= SD_IO_TIME;
217 int sd_failfast_enable			= 1;
218 int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
219 int sd_report_pfa			= 1;
220 int sd_max_throttle			= SD_MAX_THROTTLE;
221 int sd_min_throttle			= SD_MIN_THROTTLE;
222 int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
223 int sd_qfull_throttle_enable		= TRUE;
224 
225 int sd_retry_on_reservation_conflict	= 1;
226 int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
227 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
228 
229 static int sd_dtype_optical_bind	= -1;
230 
231 /* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
232 static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
233 
234 /*
235  * Global data for debug logging. To enable debug printing, sd_component_mask
236  * and sd_level_mask should be set to the desired bit patterns as outlined in
237  * sddef.h.
238  */
239 uint_t	sd_component_mask		= 0x0;
240 uint_t	sd_level_mask			= 0x0;
241 struct	sd_lun *sd_debug_un		= NULL;
242 uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
243 
244 /* Note: these may go away in the future... */
245 static uint32_t	sd_xbuf_active_limit	= 512;
246 static uint32_t sd_xbuf_reserve_limit	= 16;
247 
248 static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
249 
250 /*
251  * Timer value used to reset the throttle after it has been reduced
252  * (typically in response to TRAN_BUSY or STATUS_QFULL)
253  */
254 static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
255 static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
256 
257 /*
258  * Interval value associated with the media change scsi watch.
259  */
260 static int sd_check_media_time		= 3000000;
261 
262 /*
263  * Wait value used for in progress operations during a DDI_SUSPEND
264  */
265 static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
266 
267 /*
268  * sd_label_mutex protects a static buffer used in the disk label
269  * component of the driver
270  */
271 static kmutex_t sd_label_mutex;
272 
273 /*
274  * sd_detach_mutex protects un_layer_count, un_detach_count, and
275  * un_opens_in_progress in the sd_lun structure.
276  */
277 static kmutex_t sd_detach_mutex;
278 
279 _NOTE(MUTEX_PROTECTS_DATA(sd_detach_mutex,
280 	sd_lun::{un_layer_count un_detach_count un_opens_in_progress}))
281 
282 /*
283  * Global buffer and mutex for debug logging
284  */
285 static char	sd_log_buf[1024];
286 static kmutex_t	sd_log_mutex;
287 
288 
289 /*
290  * "Smart" Probe Caching structs, globals, #defines, etc.
291  * For parallel scsi and non-self-identify device only.
292  */
293 
294 /*
295  * The following resources and routines are implemented to support
296  * "smart" probing, which caches the scsi_probe() results in an array,
297  * in order to help avoid long probe times.
298  */
299 struct sd_scsi_probe_cache {
300 	struct	sd_scsi_probe_cache	*next;
301 	dev_info_t	*pdip;
302 	int		cache[NTARGETS_WIDE];
303 };
304 
305 static kmutex_t	sd_scsi_probe_cache_mutex;
306 static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
307 
308 /*
309  * Really we only need protection on the head of the linked list, but
310  * better safe than sorry.
311  */
312 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
313     sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
314 
315 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
316     sd_scsi_probe_cache_head))
317 
318 
319 /*
320  * Vendor specific data name property declarations
321  */
322 
323 #if defined(__fibre) || defined(__i386) ||defined(__amd64)
324 
325 static sd_tunables seagate_properties = {
326 	SEAGATE_THROTTLE_VALUE,
327 	0,
328 	0,
329 	0,
330 	0,
331 	0,
332 	0,
333 	0,
334 	0
335 };
336 
337 
338 static sd_tunables fujitsu_properties = {
339 	FUJITSU_THROTTLE_VALUE,
340 	0,
341 	0,
342 	0,
343 	0,
344 	0,
345 	0,
346 	0,
347 	0
348 };
349 
350 static sd_tunables ibm_properties = {
351 	IBM_THROTTLE_VALUE,
352 	0,
353 	0,
354 	0,
355 	0,
356 	0,
357 	0,
358 	0,
359 	0
360 };
361 
362 static sd_tunables purple_properties = {
363 	PURPLE_THROTTLE_VALUE,
364 	0,
365 	0,
366 	PURPLE_BUSY_RETRIES,
367 	PURPLE_RESET_RETRY_COUNT,
368 	PURPLE_RESERVE_RELEASE_TIME,
369 	0,
370 	0,
371 	0
372 };
373 
374 static sd_tunables sve_properties = {
375 	SVE_THROTTLE_VALUE,
376 	0,
377 	0,
378 	SVE_BUSY_RETRIES,
379 	SVE_RESET_RETRY_COUNT,
380 	SVE_RESERVE_RELEASE_TIME,
381 	SVE_MIN_THROTTLE_VALUE,
382 	SVE_DISKSORT_DISABLED_FLAG,
383 	0
384 };
385 
386 static sd_tunables maserati_properties = {
387 	0,
388 	0,
389 	0,
390 	0,
391 	0,
392 	0,
393 	0,
394 	MASERATI_DISKSORT_DISABLED_FLAG,
395 	MASERATI_LUN_RESET_ENABLED_FLAG
396 };
397 
398 static sd_tunables pirus_properties = {
399 	PIRUS_THROTTLE_VALUE,
400 	0,
401 	PIRUS_NRR_COUNT,
402 	PIRUS_BUSY_RETRIES,
403 	PIRUS_RESET_RETRY_COUNT,
404 	0,
405 	PIRUS_MIN_THROTTLE_VALUE,
406 	PIRUS_DISKSORT_DISABLED_FLAG,
407 	PIRUS_LUN_RESET_ENABLED_FLAG
408 };
409 
410 #endif
411 
412 #if (defined(__sparc) && !defined(__fibre)) || \
413 	(defined(__i386) || defined(__amd64))
414 
415 
416 static sd_tunables elite_properties = {
417 	ELITE_THROTTLE_VALUE,
418 	0,
419 	0,
420 	0,
421 	0,
422 	0,
423 	0,
424 	0,
425 	0
426 };
427 
428 static sd_tunables st31200n_properties = {
429 	ST31200N_THROTTLE_VALUE,
430 	0,
431 	0,
432 	0,
433 	0,
434 	0,
435 	0,
436 	0,
437 	0
438 };
439 
440 #endif /* Fibre or not */
441 
442 static sd_tunables lsi_properties_scsi = {
443 	LSI_THROTTLE_VALUE,
444 	0,
445 	LSI_NOTREADY_RETRIES,
446 	0,
447 	0,
448 	0,
449 	0,
450 	0,
451 	0
452 };
453 
454 static sd_tunables symbios_properties = {
455 	SYMBIOS_THROTTLE_VALUE,
456 	0,
457 	SYMBIOS_NOTREADY_RETRIES,
458 	0,
459 	0,
460 	0,
461 	0,
462 	0,
463 	0
464 };
465 
466 static sd_tunables lsi_properties = {
467 	0,
468 	0,
469 	LSI_NOTREADY_RETRIES,
470 	0,
471 	0,
472 	0,
473 	0,
474 	0,
475 	0
476 };
477 
478 static sd_tunables lsi_oem_properties = {
479 	0,
480 	0,
481 	LSI_OEM_NOTREADY_RETRIES,
482 	0,
483 	0,
484 	0,
485 	0,
486 	0,
487 	0
488 };
489 
490 
491 
492 #if (defined(SD_PROP_TST))
493 
494 #define	SD_TST_CTYPE_VAL	CTYPE_CDROM
495 #define	SD_TST_THROTTLE_VAL	16
496 #define	SD_TST_NOTREADY_VAL	12
497 #define	SD_TST_BUSY_VAL		60
498 #define	SD_TST_RST_RETRY_VAL	36
499 #define	SD_TST_RSV_REL_TIME	60
500 
501 static sd_tunables tst_properties = {
502 	SD_TST_THROTTLE_VAL,
503 	SD_TST_CTYPE_VAL,
504 	SD_TST_NOTREADY_VAL,
505 	SD_TST_BUSY_VAL,
506 	SD_TST_RST_RETRY_VAL,
507 	SD_TST_RSV_REL_TIME,
508 	0,
509 	0,
510 	0
511 };
512 #endif
513 
514 /* This is similiar to the ANSI toupper implementation */
515 #define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
516 
517 /*
518  * Static Driver Configuration Table
519  *
520  * This is the table of disks which need throttle adjustment (or, perhaps
521  * something else as defined by the flags at a future time.)  device_id
522  * is a string consisting of concatenated vid (vendor), pid (product/model)
523  * and revision strings as defined in the scsi_inquiry structure.  Offsets of
524  * the parts of the string are as defined by the sizes in the scsi_inquiry
525  * structure.  Device type is searched as far as the device_id string is
526  * defined.  Flags defines which values are to be set in the driver from the
527  * properties list.
528  *
529  * Entries below which begin and end with a "*" are a special case.
530  * These do not have a specific vendor, and the string which follows
531  * can appear anywhere in the 16 byte PID portion of the inquiry data.
532  *
533  * Entries below which begin and end with a " " (blank) are a special
534  * case. The comparison function will treat multiple consecutive blanks
535  * as equivalent to a single blank. For example, this causes a
536  * sd_disk_table entry of " NEC CDROM " to match a device's id string
537  * of  "NEC       CDROM".
538  *
539  * Note: The MD21 controller type has been obsoleted.
540  *	 ST318202F is a Legacy device
541  *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
542  *	 made with an FC connection. The entries here are a legacy.
543  */
544 static sd_disk_config_t sd_disk_table[] = {
545 #if defined(__fibre) || defined(__i386) || defined(__amd64)
546 	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
547 	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
548 	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
549 	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
550 	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
551 	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
552 	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
553 	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
554 	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
555 	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
556 	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
557 	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
558 	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
559 	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
560 	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
561 	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
562 	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
563 	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
564 	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
565 	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
566 	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
567 	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
568 	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
569 	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
570 	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
571 	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
572 	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
573 	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
574 	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
575 	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
576 	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
577 	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
578 	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
579 	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
580 	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
581 	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
582 	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
583 	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
584 	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
585 	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
586 	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
587 	{ "SUN     T3", SD_CONF_BSET_THROTTLE |
588 			SD_CONF_BSET_BSY_RETRY_COUNT|
589 			SD_CONF_BSET_RST_RETRIES|
590 			SD_CONF_BSET_RSV_REL_TIME,
591 		&purple_properties },
592 	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
593 		SD_CONF_BSET_BSY_RETRY_COUNT|
594 		SD_CONF_BSET_RST_RETRIES|
595 		SD_CONF_BSET_RSV_REL_TIME|
596 		SD_CONF_BSET_MIN_THROTTLE|
597 		SD_CONF_BSET_DISKSORT_DISABLED,
598 		&sve_properties },
599 	{ "SUN     T4", SD_CONF_BSET_THROTTLE |
600 			SD_CONF_BSET_BSY_RETRY_COUNT|
601 			SD_CONF_BSET_RST_RETRIES|
602 			SD_CONF_BSET_RSV_REL_TIME,
603 		&purple_properties },
604 	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
605 		SD_CONF_BSET_LUN_RESET_ENABLED,
606 		&maserati_properties },
607 	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
608 		SD_CONF_BSET_NRR_COUNT|
609 		SD_CONF_BSET_BSY_RETRY_COUNT|
610 		SD_CONF_BSET_RST_RETRIES|
611 		SD_CONF_BSET_MIN_THROTTLE|
612 		SD_CONF_BSET_DISKSORT_DISABLED|
613 		SD_CONF_BSET_LUN_RESET_ENABLED,
614 		&pirus_properties },
615 	{ "SUN     SE6940", SD_CONF_BSET_THROTTLE |
616 		SD_CONF_BSET_NRR_COUNT|
617 		SD_CONF_BSET_BSY_RETRY_COUNT|
618 		SD_CONF_BSET_RST_RETRIES|
619 		SD_CONF_BSET_MIN_THROTTLE|
620 		SD_CONF_BSET_DISKSORT_DISABLED|
621 		SD_CONF_BSET_LUN_RESET_ENABLED,
622 		&pirus_properties },
623 	{ "SUN     StorageTek 6920", SD_CONF_BSET_THROTTLE |
624 		SD_CONF_BSET_NRR_COUNT|
625 		SD_CONF_BSET_BSY_RETRY_COUNT|
626 		SD_CONF_BSET_RST_RETRIES|
627 		SD_CONF_BSET_MIN_THROTTLE|
628 		SD_CONF_BSET_DISKSORT_DISABLED|
629 		SD_CONF_BSET_LUN_RESET_ENABLED,
630 		&pirus_properties },
631 	{ "SUN     StorageTek 6940", SD_CONF_BSET_THROTTLE |
632 		SD_CONF_BSET_NRR_COUNT|
633 		SD_CONF_BSET_BSY_RETRY_COUNT|
634 		SD_CONF_BSET_RST_RETRIES|
635 		SD_CONF_BSET_MIN_THROTTLE|
636 		SD_CONF_BSET_DISKSORT_DISABLED|
637 		SD_CONF_BSET_LUN_RESET_ENABLED,
638 		&pirus_properties },
639 	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
640 		SD_CONF_BSET_NRR_COUNT|
641 		SD_CONF_BSET_BSY_RETRY_COUNT|
642 		SD_CONF_BSET_RST_RETRIES|
643 		SD_CONF_BSET_MIN_THROTTLE|
644 		SD_CONF_BSET_DISKSORT_DISABLED|
645 		SD_CONF_BSET_LUN_RESET_ENABLED,
646 		&pirus_properties },
647 	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
648 		SD_CONF_BSET_NRR_COUNT|
649 		SD_CONF_BSET_BSY_RETRY_COUNT|
650 		SD_CONF_BSET_RST_RETRIES|
651 		SD_CONF_BSET_MIN_THROTTLE|
652 		SD_CONF_BSET_DISKSORT_DISABLED|
653 		SD_CONF_BSET_LUN_RESET_ENABLED,
654 		&pirus_properties },
655 	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
656 	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
657 	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
658 	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
659 	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
660 #endif /* fibre or NON-sparc platforms */
661 #if ((defined(__sparc) && !defined(__fibre)) ||\
662 	(defined(__i386) || defined(__amd64)))
663 	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
664 	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
665 	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
666 	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
667 	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
668 	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
669 	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
670 	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
671 	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
672 	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
673 	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
674 	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
675 	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
676 	    &symbios_properties },
677 	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
678 	    &lsi_properties_scsi },
679 #if defined(__i386) || defined(__amd64)
680 	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
681 				    | SD_CONF_BSET_READSUB_BCD
682 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
683 				    | SD_CONF_BSET_NO_READ_HEADER
684 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
685 
686 	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
687 				    | SD_CONF_BSET_READSUB_BCD
688 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
689 				    | SD_CONF_BSET_NO_READ_HEADER
690 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
691 #endif /* __i386 || __amd64 */
692 #endif /* sparc NON-fibre or NON-sparc platforms */
693 
694 #if (defined(SD_PROP_TST))
695 	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
696 				| SD_CONF_BSET_CTYPE
697 				| SD_CONF_BSET_NRR_COUNT
698 				| SD_CONF_BSET_FAB_DEVID
699 				| SD_CONF_BSET_NOCACHE
700 				| SD_CONF_BSET_BSY_RETRY_COUNT
701 				| SD_CONF_BSET_PLAYMSF_BCD
702 				| SD_CONF_BSET_READSUB_BCD
703 				| SD_CONF_BSET_READ_TOC_TRK_BCD
704 				| SD_CONF_BSET_READ_TOC_ADDR_BCD
705 				| SD_CONF_BSET_NO_READ_HEADER
706 				| SD_CONF_BSET_READ_CD_XD4
707 				| SD_CONF_BSET_RST_RETRIES
708 				| SD_CONF_BSET_RSV_REL_TIME
709 				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
710 #endif
711 };
712 
713 static const int sd_disk_table_size =
714 	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
715 
716 
717 /*
718  * Return codes of sd_uselabel().
719  */
720 #define	SD_LABEL_IS_VALID		0
721 #define	SD_LABEL_IS_INVALID		1
722 
723 #define	SD_INTERCONNECT_PARALLEL	0
724 #define	SD_INTERCONNECT_FABRIC		1
725 #define	SD_INTERCONNECT_FIBRE		2
726 #define	SD_INTERCONNECT_SSA		3
727 #define	SD_IS_PARALLEL_SCSI(un)		\
728 	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
729 
730 /*
731  * Definitions used by device id registration routines
732  */
733 #define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
734 #define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
735 #define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
736 #define	WD_NODE			7	/* the whole disk minor */
737 
738 static kmutex_t sd_sense_mutex = {0};
739 
740 /*
741  * Macros for updates of the driver state
742  */
743 #define	New_state(un, s)        \
744 	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
745 #define	Restore_state(un)	\
746 	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
747 
748 static struct sd_cdbinfo sd_cdbtab[] = {
749 	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
750 	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
751 	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
752 	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
753 };
754 
755 /*
756  * Specifies the number of seconds that must have elapsed since the last
757  * cmd. has completed for a device to be declared idle to the PM framework.
758  */
759 static int sd_pm_idletime = 1;
760 
761 /*
762  * Internal function prototypes
763  */
764 
765 #if (defined(__fibre))
766 /*
767  * These #defines are to avoid namespace collisions that occur because this
768  * code is currently used to compile two seperate driver modules: sd and ssd.
769  * All function names need to be treated this way (even if declared static)
770  * in order to allow the debugger to resolve the names properly.
771  * It is anticipated that in the near future the ssd module will be obsoleted,
772  * at which time this ugliness should go away.
773  */
774 #define	sd_log_trace			ssd_log_trace
775 #define	sd_log_info			ssd_log_info
776 #define	sd_log_err			ssd_log_err
777 #define	sdprobe				ssdprobe
778 #define	sdinfo				ssdinfo
779 #define	sd_prop_op			ssd_prop_op
780 #define	sd_scsi_probe_cache_init	ssd_scsi_probe_cache_init
781 #define	sd_scsi_probe_cache_fini	ssd_scsi_probe_cache_fini
782 #define	sd_scsi_clear_probe_cache	ssd_scsi_clear_probe_cache
783 #define	sd_scsi_probe_with_cache	ssd_scsi_probe_with_cache
784 #define	sd_spin_up_unit			ssd_spin_up_unit
785 #define	sd_enable_descr_sense		ssd_enable_descr_sense
786 #define	sd_set_mmc_caps			ssd_set_mmc_caps
787 #define	sd_read_unit_properties		ssd_read_unit_properties
788 #define	sd_process_sdconf_file		ssd_process_sdconf_file
789 #define	sd_process_sdconf_table		ssd_process_sdconf_table
790 #define	sd_sdconf_id_match		ssd_sdconf_id_match
791 #define	sd_blank_cmp			ssd_blank_cmp
792 #define	sd_chk_vers1_data		ssd_chk_vers1_data
793 #define	sd_set_vers1_properties		ssd_set_vers1_properties
794 #define	sd_validate_geometry		ssd_validate_geometry
795 
796 #if defined(_SUNOS_VTOC_16)
797 #define	sd_convert_geometry		ssd_convert_geometry
798 #endif
799 
800 #define	sd_resync_geom_caches		ssd_resync_geom_caches
801 #define	sd_read_fdisk			ssd_read_fdisk
802 #define	sd_get_physical_geometry	ssd_get_physical_geometry
803 #define	sd_get_virtual_geometry		ssd_get_virtual_geometry
804 #define	sd_update_block_info		ssd_update_block_info
805 #define	sd_swap_efi_gpt			ssd_swap_efi_gpt
806 #define	sd_swap_efi_gpe			ssd_swap_efi_gpe
807 #define	sd_validate_efi			ssd_validate_efi
808 #define	sd_use_efi			ssd_use_efi
809 #define	sd_uselabel			ssd_uselabel
810 #define	sd_build_default_label		ssd_build_default_label
811 #define	sd_has_max_chs_vals		ssd_has_max_chs_vals
812 #define	sd_inq_fill			ssd_inq_fill
813 #define	sd_register_devid		ssd_register_devid
814 #define	sd_get_devid_block		ssd_get_devid_block
815 #define	sd_get_devid			ssd_get_devid
816 #define	sd_create_devid			ssd_create_devid
817 #define	sd_write_deviceid		ssd_write_deviceid
818 #define	sd_check_vpd_page_support	ssd_check_vpd_page_support
819 #define	sd_setup_pm			ssd_setup_pm
820 #define	sd_create_pm_components		ssd_create_pm_components
821 #define	sd_ddi_suspend			ssd_ddi_suspend
822 #define	sd_ddi_pm_suspend		ssd_ddi_pm_suspend
823 #define	sd_ddi_resume			ssd_ddi_resume
824 #define	sd_ddi_pm_resume		ssd_ddi_pm_resume
825 #define	sdpower				ssdpower
826 #define	sdattach			ssdattach
827 #define	sddetach			ssddetach
828 #define	sd_unit_attach			ssd_unit_attach
829 #define	sd_unit_detach			ssd_unit_detach
830 #define	sd_set_unit_attributes		ssd_set_unit_attributes
831 #define	sd_create_minor_nodes		ssd_create_minor_nodes
832 #define	sd_create_errstats		ssd_create_errstats
833 #define	sd_set_errstats			ssd_set_errstats
834 #define	sd_set_pstats			ssd_set_pstats
835 #define	sddump				ssddump
836 #define	sd_scsi_poll			ssd_scsi_poll
837 #define	sd_send_polled_RQS		ssd_send_polled_RQS
838 #define	sd_ddi_scsi_poll		ssd_ddi_scsi_poll
839 #define	sd_init_event_callbacks		ssd_init_event_callbacks
840 #define	sd_event_callback		ssd_event_callback
841 #define	sd_cache_control		ssd_cache_control
842 #define	sd_get_write_cache_enabled	ssd_get_write_cache_enabled
843 #define	sd_make_device			ssd_make_device
844 #define	sdopen				ssdopen
845 #define	sdclose				ssdclose
846 #define	sd_ready_and_valid		ssd_ready_and_valid
847 #define	sdmin				ssdmin
848 #define	sdread				ssdread
849 #define	sdwrite				ssdwrite
850 #define	sdaread				ssdaread
851 #define	sdawrite			ssdawrite
852 #define	sdstrategy			ssdstrategy
853 #define	sdioctl				ssdioctl
854 #define	sd_mapblockaddr_iostart		ssd_mapblockaddr_iostart
855 #define	sd_mapblocksize_iostart		ssd_mapblocksize_iostart
856 #define	sd_checksum_iostart		ssd_checksum_iostart
857 #define	sd_checksum_uscsi_iostart	ssd_checksum_uscsi_iostart
858 #define	sd_pm_iostart			ssd_pm_iostart
859 #define	sd_core_iostart			ssd_core_iostart
860 #define	sd_mapblockaddr_iodone		ssd_mapblockaddr_iodone
861 #define	sd_mapblocksize_iodone		ssd_mapblocksize_iodone
862 #define	sd_checksum_iodone		ssd_checksum_iodone
863 #define	sd_checksum_uscsi_iodone	ssd_checksum_uscsi_iodone
864 #define	sd_pm_iodone			ssd_pm_iodone
865 #define	sd_initpkt_for_buf		ssd_initpkt_for_buf
866 #define	sd_destroypkt_for_buf		ssd_destroypkt_for_buf
867 #define	sd_setup_rw_pkt			ssd_setup_rw_pkt
868 #define	sd_setup_next_rw_pkt		ssd_setup_next_rw_pkt
869 #define	sd_buf_iodone			ssd_buf_iodone
870 #define	sd_uscsi_strategy		ssd_uscsi_strategy
871 #define	sd_initpkt_for_uscsi		ssd_initpkt_for_uscsi
872 #define	sd_destroypkt_for_uscsi		ssd_destroypkt_for_uscsi
873 #define	sd_uscsi_iodone			ssd_uscsi_iodone
874 #define	sd_xbuf_strategy		ssd_xbuf_strategy
875 #define	sd_xbuf_init			ssd_xbuf_init
876 #define	sd_pm_entry			ssd_pm_entry
877 #define	sd_pm_exit			ssd_pm_exit
878 
879 #define	sd_pm_idletimeout_handler	ssd_pm_idletimeout_handler
880 #define	sd_pm_timeout_handler		ssd_pm_timeout_handler
881 
882 #define	sd_add_buf_to_waitq		ssd_add_buf_to_waitq
883 #define	sdintr				ssdintr
884 #define	sd_start_cmds			ssd_start_cmds
885 #define	sd_send_scsi_cmd		ssd_send_scsi_cmd
886 #define	sd_bioclone_alloc		ssd_bioclone_alloc
887 #define	sd_bioclone_free		ssd_bioclone_free
888 #define	sd_shadow_buf_alloc		ssd_shadow_buf_alloc
889 #define	sd_shadow_buf_free		ssd_shadow_buf_free
890 #define	sd_print_transport_rejected_message	\
891 					ssd_print_transport_rejected_message
892 #define	sd_retry_command		ssd_retry_command
893 #define	sd_set_retry_bp			ssd_set_retry_bp
894 #define	sd_send_request_sense_command	ssd_send_request_sense_command
895 #define	sd_start_retry_command		ssd_start_retry_command
896 #define	sd_start_direct_priority_command	\
897 					ssd_start_direct_priority_command
898 #define	sd_return_failed_command	ssd_return_failed_command
899 #define	sd_return_failed_command_no_restart	\
900 					ssd_return_failed_command_no_restart
901 #define	sd_return_command		ssd_return_command
902 #define	sd_sync_with_callback		ssd_sync_with_callback
903 #define	sdrunout			ssdrunout
904 #define	sd_mark_rqs_busy		ssd_mark_rqs_busy
905 #define	sd_mark_rqs_idle		ssd_mark_rqs_idle
906 #define	sd_reduce_throttle		ssd_reduce_throttle
907 #define	sd_restore_throttle		ssd_restore_throttle
908 #define	sd_print_incomplete_msg		ssd_print_incomplete_msg
909 #define	sd_init_cdb_limits		ssd_init_cdb_limits
910 #define	sd_pkt_status_good		ssd_pkt_status_good
911 #define	sd_pkt_status_check_condition	ssd_pkt_status_check_condition
912 #define	sd_pkt_status_busy		ssd_pkt_status_busy
913 #define	sd_pkt_status_reservation_conflict	\
914 					ssd_pkt_status_reservation_conflict
915 #define	sd_pkt_status_qfull		ssd_pkt_status_qfull
916 #define	sd_handle_request_sense		ssd_handle_request_sense
917 #define	sd_handle_auto_request_sense	ssd_handle_auto_request_sense
918 #define	sd_print_sense_failed_msg	ssd_print_sense_failed_msg
919 #define	sd_validate_sense_data		ssd_validate_sense_data
920 #define	sd_decode_sense			ssd_decode_sense
921 #define	sd_print_sense_msg		ssd_print_sense_msg
922 #define	sd_extract_sense_info_descr	ssd_extract_sense_info_descr
923 #define	sd_sense_key_no_sense		ssd_sense_key_no_sense
924 #define	sd_sense_key_recoverable_error	ssd_sense_key_recoverable_error
925 #define	sd_sense_key_not_ready		ssd_sense_key_not_ready
926 #define	sd_sense_key_medium_or_hardware_error	\
927 					ssd_sense_key_medium_or_hardware_error
928 #define	sd_sense_key_illegal_request	ssd_sense_key_illegal_request
929 #define	sd_sense_key_unit_attention	ssd_sense_key_unit_attention
930 #define	sd_sense_key_fail_command	ssd_sense_key_fail_command
931 #define	sd_sense_key_blank_check	ssd_sense_key_blank_check
932 #define	sd_sense_key_aborted_command	ssd_sense_key_aborted_command
933 #define	sd_sense_key_default		ssd_sense_key_default
934 #define	sd_print_retry_msg		ssd_print_retry_msg
935 #define	sd_print_cmd_incomplete_msg	ssd_print_cmd_incomplete_msg
936 #define	sd_pkt_reason_cmd_incomplete	ssd_pkt_reason_cmd_incomplete
937 #define	sd_pkt_reason_cmd_tran_err	ssd_pkt_reason_cmd_tran_err
938 #define	sd_pkt_reason_cmd_reset		ssd_pkt_reason_cmd_reset
939 #define	sd_pkt_reason_cmd_aborted	ssd_pkt_reason_cmd_aborted
940 #define	sd_pkt_reason_cmd_timeout	ssd_pkt_reason_cmd_timeout
941 #define	sd_pkt_reason_cmd_unx_bus_free	ssd_pkt_reason_cmd_unx_bus_free
942 #define	sd_pkt_reason_cmd_tag_reject	ssd_pkt_reason_cmd_tag_reject
943 #define	sd_pkt_reason_default		ssd_pkt_reason_default
944 #define	sd_reset_target			ssd_reset_target
945 #define	sd_start_stop_unit_callback	ssd_start_stop_unit_callback
946 #define	sd_start_stop_unit_task		ssd_start_stop_unit_task
947 #define	sd_taskq_create			ssd_taskq_create
948 #define	sd_taskq_delete			ssd_taskq_delete
949 #define	sd_media_change_task		ssd_media_change_task
950 #define	sd_handle_mchange		ssd_handle_mchange
951 #define	sd_send_scsi_DOORLOCK		ssd_send_scsi_DOORLOCK
952 #define	sd_send_scsi_READ_CAPACITY	ssd_send_scsi_READ_CAPACITY
953 #define	sd_send_scsi_READ_CAPACITY_16	ssd_send_scsi_READ_CAPACITY_16
954 #define	sd_send_scsi_GET_CONFIGURATION	ssd_send_scsi_GET_CONFIGURATION
955 #define	sd_send_scsi_feature_GET_CONFIGURATION	\
956 					sd_send_scsi_feature_GET_CONFIGURATION
957 #define	sd_send_scsi_START_STOP_UNIT	ssd_send_scsi_START_STOP_UNIT
958 #define	sd_send_scsi_INQUIRY		ssd_send_scsi_INQUIRY
959 #define	sd_send_scsi_TEST_UNIT_READY	ssd_send_scsi_TEST_UNIT_READY
960 #define	sd_send_scsi_PERSISTENT_RESERVE_IN	\
961 					ssd_send_scsi_PERSISTENT_RESERVE_IN
962 #define	sd_send_scsi_PERSISTENT_RESERVE_OUT	\
963 					ssd_send_scsi_PERSISTENT_RESERVE_OUT
964 #define	sd_send_scsi_SYNCHRONIZE_CACHE	ssd_send_scsi_SYNCHRONIZE_CACHE
965 #define	sd_send_scsi_SYNCHRONIZE_CACHE_biodone	\
966 					ssd_send_scsi_SYNCHRONIZE_CACHE_biodone
967 #define	sd_send_scsi_MODE_SENSE		ssd_send_scsi_MODE_SENSE
968 #define	sd_send_scsi_MODE_SELECT	ssd_send_scsi_MODE_SELECT
969 #define	sd_send_scsi_RDWR		ssd_send_scsi_RDWR
970 #define	sd_send_scsi_LOG_SENSE		ssd_send_scsi_LOG_SENSE
971 #define	sd_alloc_rqs			ssd_alloc_rqs
972 #define	sd_free_rqs			ssd_free_rqs
973 #define	sd_dump_memory			ssd_dump_memory
974 #define	sd_uscsi_ioctl			ssd_uscsi_ioctl
975 #define	sd_get_media_info		ssd_get_media_info
976 #define	sd_dkio_ctrl_info		ssd_dkio_ctrl_info
977 #define	sd_dkio_get_geometry		ssd_dkio_get_geometry
978 #define	sd_dkio_set_geometry		ssd_dkio_set_geometry
979 #define	sd_dkio_get_partition		ssd_dkio_get_partition
980 #define	sd_dkio_set_partition		ssd_dkio_set_partition
981 #define	sd_dkio_partition		ssd_dkio_partition
982 #define	sd_dkio_get_vtoc		ssd_dkio_get_vtoc
983 #define	sd_dkio_get_efi			ssd_dkio_get_efi
984 #define	sd_build_user_vtoc		ssd_build_user_vtoc
985 #define	sd_dkio_set_vtoc		ssd_dkio_set_vtoc
986 #define	sd_dkio_set_efi			ssd_dkio_set_efi
987 #define	sd_build_label_vtoc		ssd_build_label_vtoc
988 #define	sd_write_label			ssd_write_label
989 #define	sd_clear_vtoc			ssd_clear_vtoc
990 #define	sd_clear_efi			ssd_clear_efi
991 #define	sd_get_tunables_from_conf	ssd_get_tunables_from_conf
992 #define	sd_setup_next_xfer		ssd_setup_next_xfer
993 #define	sd_dkio_get_temp		ssd_dkio_get_temp
994 #define	sd_dkio_get_mboot		ssd_dkio_get_mboot
995 #define	sd_dkio_set_mboot		ssd_dkio_set_mboot
996 #define	sd_setup_default_geometry	ssd_setup_default_geometry
997 #define	sd_update_fdisk_and_vtoc	ssd_update_fdisk_and_vtoc
998 #define	sd_check_mhd			ssd_check_mhd
999 #define	sd_mhd_watch_cb			ssd_mhd_watch_cb
1000 #define	sd_mhd_watch_incomplete		ssd_mhd_watch_incomplete
1001 #define	sd_sname			ssd_sname
1002 #define	sd_mhd_resvd_recover		ssd_mhd_resvd_recover
1003 #define	sd_resv_reclaim_thread		ssd_resv_reclaim_thread
1004 #define	sd_take_ownership		ssd_take_ownership
1005 #define	sd_reserve_release		ssd_reserve_release
1006 #define	sd_rmv_resv_reclaim_req		ssd_rmv_resv_reclaim_req
1007 #define	sd_mhd_reset_notify_cb		ssd_mhd_reset_notify_cb
1008 #define	sd_persistent_reservation_in_read_keys	\
1009 					ssd_persistent_reservation_in_read_keys
1010 #define	sd_persistent_reservation_in_read_resv	\
1011 					ssd_persistent_reservation_in_read_resv
1012 #define	sd_mhdioc_takeown		ssd_mhdioc_takeown
1013 #define	sd_mhdioc_failfast		ssd_mhdioc_failfast
1014 #define	sd_mhdioc_release		ssd_mhdioc_release
1015 #define	sd_mhdioc_register_devid	ssd_mhdioc_register_devid
1016 #define	sd_mhdioc_inkeys		ssd_mhdioc_inkeys
1017 #define	sd_mhdioc_inresv		ssd_mhdioc_inresv
1018 #define	sr_change_blkmode		ssr_change_blkmode
1019 #define	sr_change_speed			ssr_change_speed
1020 #define	sr_atapi_change_speed		ssr_atapi_change_speed
1021 #define	sr_pause_resume			ssr_pause_resume
1022 #define	sr_play_msf			ssr_play_msf
1023 #define	sr_play_trkind			ssr_play_trkind
1024 #define	sr_read_all_subcodes		ssr_read_all_subcodes
1025 #define	sr_read_subchannel		ssr_read_subchannel
1026 #define	sr_read_tocentry		ssr_read_tocentry
1027 #define	sr_read_tochdr			ssr_read_tochdr
1028 #define	sr_read_cdda			ssr_read_cdda
1029 #define	sr_read_cdxa			ssr_read_cdxa
1030 #define	sr_read_mode1			ssr_read_mode1
1031 #define	sr_read_mode2			ssr_read_mode2
1032 #define	sr_read_cd_mode2		ssr_read_cd_mode2
1033 #define	sr_sector_mode			ssr_sector_mode
1034 #define	sr_eject			ssr_eject
1035 #define	sr_ejected			ssr_ejected
1036 #define	sr_check_wp			ssr_check_wp
1037 #define	sd_check_media			ssd_check_media
1038 #define	sd_media_watch_cb		ssd_media_watch_cb
1039 #define	sd_delayed_cv_broadcast		ssd_delayed_cv_broadcast
1040 #define	sr_volume_ctrl			ssr_volume_ctrl
1041 #define	sr_read_sony_session_offset	ssr_read_sony_session_offset
1042 #define	sd_log_page_supported		ssd_log_page_supported
1043 #define	sd_check_for_writable_cd	ssd_check_for_writable_cd
1044 #define	sd_wm_cache_constructor		ssd_wm_cache_constructor
1045 #define	sd_wm_cache_destructor		ssd_wm_cache_destructor
1046 #define	sd_range_lock			ssd_range_lock
1047 #define	sd_get_range			ssd_get_range
1048 #define	sd_free_inlist_wmap		ssd_free_inlist_wmap
1049 #define	sd_range_unlock			ssd_range_unlock
1050 #define	sd_read_modify_write_task	ssd_read_modify_write_task
1051 #define	sddump_do_read_of_rmw		ssddump_do_read_of_rmw
1052 
1053 #define	sd_iostart_chain		ssd_iostart_chain
1054 #define	sd_iodone_chain			ssd_iodone_chain
1055 #define	sd_initpkt_map			ssd_initpkt_map
1056 #define	sd_destroypkt_map		ssd_destroypkt_map
1057 #define	sd_chain_type_map		ssd_chain_type_map
1058 #define	sd_chain_index_map		ssd_chain_index_map
1059 
1060 #define	sd_failfast_flushctl		ssd_failfast_flushctl
1061 #define	sd_failfast_flushq		ssd_failfast_flushq
1062 #define	sd_failfast_flushq_callback	ssd_failfast_flushq_callback
1063 
1064 #define	sd_is_lsi			ssd_is_lsi
1065 
1066 #endif	/* #if (defined(__fibre)) */
1067 
1068 
1069 int _init(void);
1070 int _fini(void);
1071 int _info(struct modinfo *modinfop);
1072 
1073 /*PRINTFLIKE3*/
1074 static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1075 /*PRINTFLIKE3*/
1076 static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1077 /*PRINTFLIKE3*/
1078 static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1079 
1080 static int sdprobe(dev_info_t *devi);
1081 static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
1082     void **result);
1083 static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
1084     int mod_flags, char *name, caddr_t valuep, int *lengthp);
1085 
1086 /*
1087  * Smart probe for parallel scsi
1088  */
1089 static void sd_scsi_probe_cache_init(void);
1090 static void sd_scsi_probe_cache_fini(void);
1091 static void sd_scsi_clear_probe_cache(void);
1092 static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
1093 
1094 static int	sd_spin_up_unit(struct sd_lun *un);
1095 #ifdef _LP64
1096 static void	sd_enable_descr_sense(struct sd_lun *un);
1097 #endif /* _LP64 */
1098 static void	sd_set_mmc_caps(struct sd_lun *un);
1099 
1100 static void sd_read_unit_properties(struct sd_lun *un);
1101 static int  sd_process_sdconf_file(struct sd_lun *un);
1102 static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
1103     int *data_list, sd_tunables *values);
1104 static void sd_process_sdconf_table(struct sd_lun *un);
1105 static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
1106 static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
1107 static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
1108 	int list_len, char *dataname_ptr);
1109 static void sd_set_vers1_properties(struct sd_lun *un, int flags,
1110     sd_tunables *prop_list);
1111 static int  sd_validate_geometry(struct sd_lun *un, int path_flag);
1112 
1113 #if defined(_SUNOS_VTOC_16)
1114 static void sd_convert_geometry(uint64_t capacity, struct dk_geom *un_g);
1115 #endif
1116 
1117 static void sd_resync_geom_caches(struct sd_lun *un, int capacity, int lbasize,
1118 	int path_flag);
1119 static int  sd_read_fdisk(struct sd_lun *un, uint_t capacity, int lbasize,
1120 	int path_flag);
1121 static void sd_get_physical_geometry(struct sd_lun *un,
1122 	struct geom_cache *pgeom_p, int capacity, int lbasize, int path_flag);
1123 static void sd_get_virtual_geometry(struct sd_lun *un, int capacity,
1124 	int lbasize);
1125 static int  sd_uselabel(struct sd_lun *un, struct dk_label *l, int path_flag);
1126 static void sd_swap_efi_gpt(efi_gpt_t *);
1127 static void sd_swap_efi_gpe(int nparts, efi_gpe_t *);
1128 static int sd_validate_efi(efi_gpt_t *);
1129 static int sd_use_efi(struct sd_lun *, int);
1130 static void sd_build_default_label(struct sd_lun *un);
1131 
1132 #if defined(_FIRMWARE_NEEDS_FDISK)
1133 static int  sd_has_max_chs_vals(struct ipart *fdp);
1134 #endif
1135 static void sd_inq_fill(char *p, int l, char *s);
1136 
1137 
1138 static void sd_register_devid(struct sd_lun *un, dev_info_t *devi,
1139     int reservation_flag);
1140 static daddr_t  sd_get_devid_block(struct sd_lun *un);
1141 static int  sd_get_devid(struct sd_lun *un);
1142 static int  sd_get_serialnum(struct sd_lun *un, uchar_t *wwn, int *len);
1143 static ddi_devid_t sd_create_devid(struct sd_lun *un);
1144 static int  sd_write_deviceid(struct sd_lun *un);
1145 static int  sd_get_devid_page(struct sd_lun *un, uchar_t *wwn, int *len);
1146 static int  sd_check_vpd_page_support(struct sd_lun *un);
1147 
1148 static void sd_setup_pm(struct sd_lun *un, dev_info_t *devi);
1149 static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
1150 
1151 static int  sd_ddi_suspend(dev_info_t *devi);
1152 static int  sd_ddi_pm_suspend(struct sd_lun *un);
1153 static int  sd_ddi_resume(dev_info_t *devi);
1154 static int  sd_ddi_pm_resume(struct sd_lun *un);
1155 static int  sdpower(dev_info_t *devi, int component, int level);
1156 
1157 static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
1158 static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
1159 static int  sd_unit_attach(dev_info_t *devi);
1160 static int  sd_unit_detach(dev_info_t *devi);
1161 
1162 static void sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi);
1163 static int  sd_create_minor_nodes(struct sd_lun *un, dev_info_t *devi);
1164 static void sd_create_errstats(struct sd_lun *un, int instance);
1165 static void sd_set_errstats(struct sd_lun *un);
1166 static void sd_set_pstats(struct sd_lun *un);
1167 
1168 static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
1169 static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
1170 static int  sd_send_polled_RQS(struct sd_lun *un);
1171 static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
1172 
1173 #if (defined(__fibre))
1174 /*
1175  * Event callbacks (photon)
1176  */
1177 static void sd_init_event_callbacks(struct sd_lun *un);
1178 static void  sd_event_callback(dev_info_t *, ddi_eventcookie_t, void *, void *);
1179 #endif
1180 
1181 /*
1182  * Defines for sd_cache_control
1183  */
1184 
1185 #define	SD_CACHE_ENABLE		1
1186 #define	SD_CACHE_DISABLE	0
1187 #define	SD_CACHE_NOCHANGE	-1
1188 
1189 static int   sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag);
1190 static int   sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled);
1191 static dev_t sd_make_device(dev_info_t *devi);
1192 
1193 static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
1194 	uint64_t capacity);
1195 
1196 /*
1197  * Driver entry point functions.
1198  */
1199 static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
1200 static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
1201 static int  sd_ready_and_valid(struct sd_lun *un);
1202 
1203 static void sdmin(struct buf *bp);
1204 static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
1205 static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
1206 static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1207 static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1208 
1209 static int sdstrategy(struct buf *bp);
1210 static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
1211 
1212 /*
1213  * Function prototypes for layering functions in the iostart chain.
1214  */
1215 static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
1216 	struct buf *bp);
1217 static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
1218 	struct buf *bp);
1219 static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
1220 static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
1221 	struct buf *bp);
1222 static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
1223 static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
1224 
1225 /*
1226  * Function prototypes for layering functions in the iodone chain.
1227  */
1228 static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
1229 static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
1230 static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
1231 	struct buf *bp);
1232 static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
1233 	struct buf *bp);
1234 static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
1235 static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
1236 	struct buf *bp);
1237 static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
1238 
1239 /*
1240  * Prototypes for functions to support buf(9S) based IO.
1241  */
1242 static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
1243 static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
1244 static void sd_destroypkt_for_buf(struct buf *);
1245 static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
1246 	struct buf *bp, int flags,
1247 	int (*callback)(caddr_t), caddr_t callback_arg,
1248 	diskaddr_t lba, uint32_t blockcount);
1249 #if defined(__i386) || defined(__amd64)
1250 static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
1251 	struct buf *bp, diskaddr_t lba, uint32_t blockcount);
1252 #endif /* defined(__i386) || defined(__amd64) */
1253 
1254 /*
1255  * Prototypes for functions to support USCSI IO.
1256  */
1257 static int sd_uscsi_strategy(struct buf *bp);
1258 static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
1259 static void sd_destroypkt_for_uscsi(struct buf *);
1260 
1261 static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
1262 	uchar_t chain_type, void *pktinfop);
1263 
1264 static int  sd_pm_entry(struct sd_lun *un);
1265 static void sd_pm_exit(struct sd_lun *un);
1266 
1267 static void sd_pm_idletimeout_handler(void *arg);
1268 
1269 /*
1270  * sd_core internal functions (used at the sd_core_io layer).
1271  */
1272 static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
1273 static void sdintr(struct scsi_pkt *pktp);
1274 static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
1275 
1276 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd,
1277 	enum uio_seg cdbspace, enum uio_seg dataspace, enum uio_seg rqbufspace,
1278 	int path_flag);
1279 
1280 static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
1281 	daddr_t blkno, int (*func)(struct buf *));
1282 static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
1283 	uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
1284 static void sd_bioclone_free(struct buf *bp);
1285 static void sd_shadow_buf_free(struct buf *bp);
1286 
1287 static void sd_print_transport_rejected_message(struct sd_lun *un,
1288 	struct sd_xbuf *xp, int code);
1289 static void sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp,
1290     void *arg, int code);
1291 static void sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp,
1292     void *arg, int code);
1293 static void sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp,
1294     void *arg, int code);
1295 
1296 static void sd_retry_command(struct sd_lun *un, struct buf *bp,
1297 	int retry_check_flag,
1298 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp,
1299 		int c),
1300 	void *user_arg, int failure_code,  clock_t retry_delay,
1301 	void (*statp)(kstat_io_t *));
1302 
1303 static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
1304 	clock_t retry_delay, void (*statp)(kstat_io_t *));
1305 
1306 static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
1307 	struct scsi_pkt *pktp);
1308 static void sd_start_retry_command(void *arg);
1309 static void sd_start_direct_priority_command(void *arg);
1310 static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
1311 	int errcode);
1312 static void sd_return_failed_command_no_restart(struct sd_lun *un,
1313 	struct buf *bp, int errcode);
1314 static void sd_return_command(struct sd_lun *un, struct buf *bp);
1315 static void sd_sync_with_callback(struct sd_lun *un);
1316 static int sdrunout(caddr_t arg);
1317 
1318 static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
1319 static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
1320 
1321 static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
1322 static void sd_restore_throttle(void *arg);
1323 
1324 static void sd_init_cdb_limits(struct sd_lun *un);
1325 
1326 static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
1327 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1328 
1329 /*
1330  * Error handling functions
1331  */
1332 static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
1333 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1334 static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
1335 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1336 static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
1337 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1338 static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
1339 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1340 
1341 static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
1342 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1343 static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1344 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1345 static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1346 	struct sd_xbuf *xp);
1347 static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1348 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1349 
1350 static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1351 	void *arg, int code);
1352 static diskaddr_t sd_extract_sense_info_descr(
1353 	struct scsi_descr_sense_hdr *sdsp);
1354 
1355 static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1356 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1357 static void sd_sense_key_recoverable_error(struct sd_lun *un,
1358 	uint8_t asc,
1359 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1360 static void sd_sense_key_not_ready(struct sd_lun *un,
1361 	uint8_t asc, uint8_t ascq,
1362 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1363 static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1364 	int sense_key, uint8_t asc,
1365 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1366 static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1367 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1368 static void sd_sense_key_unit_attention(struct sd_lun *un,
1369 	uint8_t asc,
1370 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1371 static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1372 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1373 static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1374 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1375 static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1376 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1377 static void sd_sense_key_default(struct sd_lun *un,
1378 	int sense_key,
1379 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1380 
1381 static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1382 	void *arg, int flag);
1383 
1384 static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1385 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1386 static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1387 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1388 static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1389 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1390 static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1391 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1392 static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1393 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1394 static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1395 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1396 static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1397 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1398 static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1399 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1400 
1401 static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1402 
1403 static void sd_start_stop_unit_callback(void *arg);
1404 static void sd_start_stop_unit_task(void *arg);
1405 
1406 static void sd_taskq_create(void);
1407 static void sd_taskq_delete(void);
1408 static void sd_media_change_task(void *arg);
1409 
1410 static int sd_handle_mchange(struct sd_lun *un);
1411 static int sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag);
1412 static int sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp,
1413 	uint32_t *lbap, int path_flag);
1414 static int sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
1415 	uint32_t *lbap, int path_flag);
1416 static int sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag,
1417 	int path_flag);
1418 static int sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr,
1419 	size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1420 static int sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag);
1421 static int sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un,
1422 	uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1423 static int sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un,
1424 	uchar_t usr_cmd, uchar_t *usr_bufp);
1425 static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1426 	struct dk_callback *dkc);
1427 static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1428 static int sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un,
1429 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1430 	uchar_t *bufaddr, uint_t buflen);
1431 static int sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
1432 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1433 	uchar_t *bufaddr, uint_t buflen, char feature);
1434 static int sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize,
1435 	uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1436 static int sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize,
1437 	uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1438 static int sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
1439 	size_t buflen, daddr_t start_block, int path_flag);
1440 #define	sd_send_scsi_READ(un, bufaddr, buflen, start_block, path_flag)	\
1441 	sd_send_scsi_RDWR(un, SCMD_READ, bufaddr, buflen, start_block, \
1442 	path_flag)
1443 #define	sd_send_scsi_WRITE(un, bufaddr, buflen, start_block, path_flag)	\
1444 	sd_send_scsi_RDWR(un, SCMD_WRITE, bufaddr, buflen, start_block,\
1445 	path_flag)
1446 
1447 static int sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr,
1448 	uint16_t buflen, uchar_t page_code, uchar_t page_control,
1449 	uint16_t param_ptr, int path_flag);
1450 
1451 static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1452 static void sd_free_rqs(struct sd_lun *un);
1453 
1454 static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1455 	uchar_t *data, int len, int fmt);
1456 static void sd_panic_for_res_conflict(struct sd_lun *un);
1457 
1458 /*
1459  * Disk Ioctl Function Prototypes
1460  */
1461 static int sd_uscsi_ioctl(dev_t dev, caddr_t arg, int flag);
1462 static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1463 static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1464 static int sd_dkio_get_geometry(dev_t dev, caddr_t arg, int flag,
1465 	int geom_validated);
1466 static int sd_dkio_set_geometry(dev_t dev, caddr_t arg, int flag);
1467 static int sd_dkio_get_partition(dev_t dev, caddr_t arg, int flag,
1468 	int geom_validated);
1469 static int sd_dkio_set_partition(dev_t dev, caddr_t arg, int flag);
1470 static int sd_dkio_get_vtoc(dev_t dev, caddr_t arg, int flag,
1471 	int geom_validated);
1472 static int sd_dkio_get_efi(dev_t dev, caddr_t arg, int flag);
1473 static int sd_dkio_partition(dev_t dev, caddr_t arg, int flag);
1474 static void sd_build_user_vtoc(struct sd_lun *un, struct vtoc *user_vtoc);
1475 static int sd_dkio_set_vtoc(dev_t dev, caddr_t arg, int flag);
1476 static int sd_dkio_set_efi(dev_t dev, caddr_t arg, int flag);
1477 static int sd_build_label_vtoc(struct sd_lun *un, struct vtoc *user_vtoc);
1478 static int sd_write_label(dev_t dev);
1479 static int sd_set_vtoc(struct sd_lun *un, struct dk_label *dkl);
1480 static void sd_clear_vtoc(struct sd_lun *un);
1481 static void sd_clear_efi(struct sd_lun *un);
1482 static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1483 static int sd_dkio_get_mboot(dev_t dev, caddr_t arg, int flag);
1484 static int sd_dkio_set_mboot(dev_t dev, caddr_t arg, int flag);
1485 static void sd_setup_default_geometry(struct sd_lun *un);
1486 #if defined(__i386) || defined(__amd64)
1487 static int sd_update_fdisk_and_vtoc(struct sd_lun *un);
1488 #endif
1489 
1490 /*
1491  * Multi-host Ioctl Prototypes
1492  */
1493 static int sd_check_mhd(dev_t dev, int interval);
1494 static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1495 static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1496 static char *sd_sname(uchar_t status);
1497 static void sd_mhd_resvd_recover(void *arg);
1498 static void sd_resv_reclaim_thread();
1499 static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1500 static int sd_reserve_release(dev_t dev, int cmd);
1501 static void sd_rmv_resv_reclaim_req(dev_t dev);
1502 static void sd_mhd_reset_notify_cb(caddr_t arg);
1503 static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1504 	mhioc_inkeys_t *usrp, int flag);
1505 static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1506 	mhioc_inresvs_t *usrp, int flag);
1507 static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1508 static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1509 static int sd_mhdioc_release(dev_t dev);
1510 static int sd_mhdioc_register_devid(dev_t dev);
1511 static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1512 static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1513 
1514 /*
1515  * SCSI removable prototypes
1516  */
1517 static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1518 static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1519 static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1520 static int sr_pause_resume(dev_t dev, int mode);
1521 static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1522 static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1523 static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1524 static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1525 static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1526 static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1527 static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1528 static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1529 static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1530 static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1531 static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1532 static int sr_sector_mode(dev_t dev, uint32_t blksize);
1533 static int sr_eject(dev_t dev);
1534 static void sr_ejected(register struct sd_lun *un);
1535 static int sr_check_wp(dev_t dev);
1536 static int sd_check_media(dev_t dev, enum dkio_state state);
1537 static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1538 static void sd_delayed_cv_broadcast(void *arg);
1539 static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1540 static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1541 
1542 static int sd_log_page_supported(struct sd_lun *un, int log_page);
1543 
1544 /*
1545  * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1546  */
1547 static void sd_check_for_writable_cd(struct sd_lun *un);
1548 static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1549 static void sd_wm_cache_destructor(void *wm, void *un);
1550 static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1551 	daddr_t endb, ushort_t typ);
1552 static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1553 	daddr_t endb);
1554 static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1555 static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1556 static void sd_read_modify_write_task(void * arg);
1557 static int
1558 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1559 	struct buf **bpp);
1560 
1561 
1562 /*
1563  * Function prototypes for failfast support.
1564  */
1565 static void sd_failfast_flushq(struct sd_lun *un);
1566 static int sd_failfast_flushq_callback(struct buf *bp);
1567 
1568 /*
1569  * Function prototypes to check for lsi devices
1570  */
1571 static void sd_is_lsi(struct sd_lun *un);
1572 
1573 /*
1574  * Function prototypes for x86 support
1575  */
1576 #if defined(__i386) || defined(__amd64)
1577 static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1578 		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1579 #endif
1580 
1581 /*
1582  * Constants for failfast support:
1583  *
1584  * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1585  * failfast processing being performed.
1586  *
1587  * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1588  * failfast processing on all bufs with B_FAILFAST set.
1589  */
1590 
1591 #define	SD_FAILFAST_INACTIVE		0
1592 #define	SD_FAILFAST_ACTIVE		1
1593 
1594 /*
1595  * Bitmask to control behavior of buf(9S) flushes when a transition to
1596  * the failfast state occurs. Optional bits include:
1597  *
1598  * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1599  * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1600  * be flushed.
1601  *
1602  * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1603  * driver, in addition to the regular wait queue. This includes the xbuf
1604  * queues. When clear, only the driver's wait queue will be flushed.
1605  */
1606 #define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1607 #define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1608 
1609 /*
1610  * The default behavior is to only flush bufs that have B_FAILFAST set, but
1611  * to flush all queues within the driver.
1612  */
1613 static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1614 
1615 
1616 /*
1617  * SD Testing Fault Injection
1618  */
1619 #ifdef SD_FAULT_INJECTION
1620 static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1621 static void sd_faultinjection(struct scsi_pkt *pktp);
1622 static void sd_injection_log(char *buf, struct sd_lun *un);
1623 #endif
1624 
1625 /*
1626  * Device driver ops vector
1627  */
1628 static struct cb_ops sd_cb_ops = {
1629 	sdopen,			/* open */
1630 	sdclose,		/* close */
1631 	sdstrategy,		/* strategy */
1632 	nodev,			/* print */
1633 	sddump,			/* dump */
1634 	sdread,			/* read */
1635 	sdwrite,		/* write */
1636 	sdioctl,		/* ioctl */
1637 	nodev,			/* devmap */
1638 	nodev,			/* mmap */
1639 	nodev,			/* segmap */
1640 	nochpoll,		/* poll */
1641 	sd_prop_op,		/* cb_prop_op */
1642 	0,			/* streamtab  */
1643 	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1644 	CB_REV,			/* cb_rev */
1645 	sdaread, 		/* async I/O read entry point */
1646 	sdawrite		/* async I/O write entry point */
1647 };
1648 
1649 static struct dev_ops sd_ops = {
1650 	DEVO_REV,		/* devo_rev, */
1651 	0,			/* refcnt  */
1652 	sdinfo,			/* info */
1653 	nulldev,		/* identify */
1654 	sdprobe,		/* probe */
1655 	sdattach,		/* attach */
1656 	sddetach,		/* detach */
1657 	nodev,			/* reset */
1658 	&sd_cb_ops,		/* driver operations */
1659 	NULL,			/* bus operations */
1660 	sdpower			/* power */
1661 };
1662 
1663 
1664 /*
1665  * This is the loadable module wrapper.
1666  */
1667 #include <sys/modctl.h>
1668 
1669 static struct modldrv modldrv = {
1670 	&mod_driverops,		/* Type of module. This one is a driver */
1671 	SD_MODULE_NAME,		/* Module name. */
1672 	&sd_ops			/* driver ops */
1673 };
1674 
1675 
1676 static struct modlinkage modlinkage = {
1677 	MODREV_1,
1678 	&modldrv,
1679 	NULL
1680 };
1681 
1682 
1683 static struct scsi_asq_key_strings sd_additional_codes[] = {
1684 	0x81, 0, "Logical Unit is Reserved",
1685 	0x85, 0, "Audio Address Not Valid",
1686 	0xb6, 0, "Media Load Mechanism Failed",
1687 	0xB9, 0, "Audio Play Operation Aborted",
1688 	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1689 	0x53, 2, "Medium removal prevented",
1690 	0x6f, 0, "Authentication failed during key exchange",
1691 	0x6f, 1, "Key not present",
1692 	0x6f, 2, "Key not established",
1693 	0x6f, 3, "Read without proper authentication",
1694 	0x6f, 4, "Mismatched region to this logical unit",
1695 	0x6f, 5, "Region reset count error",
1696 	0xffff, 0x0, NULL
1697 };
1698 
1699 
1700 /*
1701  * Struct for passing printing information for sense data messages
1702  */
1703 struct sd_sense_info {
1704 	int	ssi_severity;
1705 	int	ssi_pfa_flag;
1706 };
1707 
1708 /*
1709  * Table of function pointers for iostart-side routines. Seperate "chains"
1710  * of layered function calls are formed by placing the function pointers
1711  * sequentially in the desired order. Functions are called according to an
1712  * incrementing table index ordering. The last function in each chain must
1713  * be sd_core_iostart(). The corresponding iodone-side routines are expected
1714  * in the sd_iodone_chain[] array.
1715  *
1716  * Note: It may seem more natural to organize both the iostart and iodone
1717  * functions together, into an array of structures (or some similar
1718  * organization) with a common index, rather than two seperate arrays which
1719  * must be maintained in synchronization. The purpose of this division is
1720  * to achiece improved performance: individual arrays allows for more
1721  * effective cache line utilization on certain platforms.
1722  */
1723 
1724 typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1725 
1726 
1727 static sd_chain_t sd_iostart_chain[] = {
1728 
1729 	/* Chain for buf IO for disk drive targets (PM enabled) */
1730 	sd_mapblockaddr_iostart,	/* Index: 0 */
1731 	sd_pm_iostart,			/* Index: 1 */
1732 	sd_core_iostart,		/* Index: 2 */
1733 
1734 	/* Chain for buf IO for disk drive targets (PM disabled) */
1735 	sd_mapblockaddr_iostart,	/* Index: 3 */
1736 	sd_core_iostart,		/* Index: 4 */
1737 
1738 	/* Chain for buf IO for removable-media targets (PM enabled) */
1739 	sd_mapblockaddr_iostart,	/* Index: 5 */
1740 	sd_mapblocksize_iostart,	/* Index: 6 */
1741 	sd_pm_iostart,			/* Index: 7 */
1742 	sd_core_iostart,		/* Index: 8 */
1743 
1744 	/* Chain for buf IO for removable-media targets (PM disabled) */
1745 	sd_mapblockaddr_iostart,	/* Index: 9 */
1746 	sd_mapblocksize_iostart,	/* Index: 10 */
1747 	sd_core_iostart,		/* Index: 11 */
1748 
1749 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1750 	sd_mapblockaddr_iostart,	/* Index: 12 */
1751 	sd_checksum_iostart,		/* Index: 13 */
1752 	sd_pm_iostart,			/* Index: 14 */
1753 	sd_core_iostart,		/* Index: 15 */
1754 
1755 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1756 	sd_mapblockaddr_iostart,	/* Index: 16 */
1757 	sd_checksum_iostart,		/* Index: 17 */
1758 	sd_core_iostart,		/* Index: 18 */
1759 
1760 	/* Chain for USCSI commands (all targets) */
1761 	sd_pm_iostart,			/* Index: 19 */
1762 	sd_core_iostart,		/* Index: 20 */
1763 
1764 	/* Chain for checksumming USCSI commands (all targets) */
1765 	sd_checksum_uscsi_iostart,	/* Index: 21 */
1766 	sd_pm_iostart,			/* Index: 22 */
1767 	sd_core_iostart,		/* Index: 23 */
1768 
1769 	/* Chain for "direct" USCSI commands (all targets) */
1770 	sd_core_iostart,		/* Index: 24 */
1771 
1772 	/* Chain for "direct priority" USCSI commands (all targets) */
1773 	sd_core_iostart,		/* Index: 25 */
1774 };
1775 
1776 /*
1777  * Macros to locate the first function of each iostart chain in the
1778  * sd_iostart_chain[] array. These are located by the index in the array.
1779  */
1780 #define	SD_CHAIN_DISK_IOSTART			0
1781 #define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1782 #define	SD_CHAIN_RMMEDIA_IOSTART		5
1783 #define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1784 #define	SD_CHAIN_CHKSUM_IOSTART			12
1785 #define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1786 #define	SD_CHAIN_USCSI_CMD_IOSTART		19
1787 #define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1788 #define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1789 #define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1790 
1791 
1792 /*
1793  * Table of function pointers for the iodone-side routines for the driver-
1794  * internal layering mechanism.  The calling sequence for iodone routines
1795  * uses a decrementing table index, so the last routine called in a chain
1796  * must be at the lowest array index location for that chain.  The last
1797  * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1798  * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1799  * of the functions in an iodone side chain must correspond to the ordering
1800  * of the iostart routines for that chain.  Note that there is no iodone
1801  * side routine that corresponds to sd_core_iostart(), so there is no
1802  * entry in the table for this.
1803  */
1804 
1805 static sd_chain_t sd_iodone_chain[] = {
1806 
1807 	/* Chain for buf IO for disk drive targets (PM enabled) */
1808 	sd_buf_iodone,			/* Index: 0 */
1809 	sd_mapblockaddr_iodone,		/* Index: 1 */
1810 	sd_pm_iodone,			/* Index: 2 */
1811 
1812 	/* Chain for buf IO for disk drive targets (PM disabled) */
1813 	sd_buf_iodone,			/* Index: 3 */
1814 	sd_mapblockaddr_iodone,		/* Index: 4 */
1815 
1816 	/* Chain for buf IO for removable-media targets (PM enabled) */
1817 	sd_buf_iodone,			/* Index: 5 */
1818 	sd_mapblockaddr_iodone,		/* Index: 6 */
1819 	sd_mapblocksize_iodone,		/* Index: 7 */
1820 	sd_pm_iodone,			/* Index: 8 */
1821 
1822 	/* Chain for buf IO for removable-media targets (PM disabled) */
1823 	sd_buf_iodone,			/* Index: 9 */
1824 	sd_mapblockaddr_iodone,		/* Index: 10 */
1825 	sd_mapblocksize_iodone,		/* Index: 11 */
1826 
1827 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1828 	sd_buf_iodone,			/* Index: 12 */
1829 	sd_mapblockaddr_iodone,		/* Index: 13 */
1830 	sd_checksum_iodone,		/* Index: 14 */
1831 	sd_pm_iodone,			/* Index: 15 */
1832 
1833 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1834 	sd_buf_iodone,			/* Index: 16 */
1835 	sd_mapblockaddr_iodone,		/* Index: 17 */
1836 	sd_checksum_iodone,		/* Index: 18 */
1837 
1838 	/* Chain for USCSI commands (non-checksum targets) */
1839 	sd_uscsi_iodone,		/* Index: 19 */
1840 	sd_pm_iodone,			/* Index: 20 */
1841 
1842 	/* Chain for USCSI commands (checksum targets) */
1843 	sd_uscsi_iodone,		/* Index: 21 */
1844 	sd_checksum_uscsi_iodone,	/* Index: 22 */
1845 	sd_pm_iodone,			/* Index: 22 */
1846 
1847 	/* Chain for "direct" USCSI commands (all targets) */
1848 	sd_uscsi_iodone,		/* Index: 24 */
1849 
1850 	/* Chain for "direct priority" USCSI commands (all targets) */
1851 	sd_uscsi_iodone,		/* Index: 25 */
1852 };
1853 
1854 
1855 /*
1856  * Macros to locate the "first" function in the sd_iodone_chain[] array for
1857  * each iodone-side chain. These are located by the array index, but as the
1858  * iodone side functions are called in a decrementing-index order, the
1859  * highest index number in each chain must be specified (as these correspond
1860  * to the first function in the iodone chain that will be called by the core
1861  * at IO completion time).
1862  */
1863 
1864 #define	SD_CHAIN_DISK_IODONE			2
1865 #define	SD_CHAIN_DISK_IODONE_NO_PM		4
1866 #define	SD_CHAIN_RMMEDIA_IODONE			8
1867 #define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
1868 #define	SD_CHAIN_CHKSUM_IODONE			15
1869 #define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
1870 #define	SD_CHAIN_USCSI_CMD_IODONE		20
1871 #define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
1872 #define	SD_CHAIN_DIRECT_CMD_IODONE		24
1873 #define	SD_CHAIN_PRIORITY_CMD_IODONE		25
1874 
1875 
1876 
1877 
1878 /*
1879  * Array to map a layering chain index to the appropriate initpkt routine.
1880  * The redundant entries are present so that the index used for accessing
1881  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1882  * with this table as well.
1883  */
1884 typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
1885 
1886 static sd_initpkt_t	sd_initpkt_map[] = {
1887 
1888 	/* Chain for buf IO for disk drive targets (PM enabled) */
1889 	sd_initpkt_for_buf,		/* Index: 0 */
1890 	sd_initpkt_for_buf,		/* Index: 1 */
1891 	sd_initpkt_for_buf,		/* Index: 2 */
1892 
1893 	/* Chain for buf IO for disk drive targets (PM disabled) */
1894 	sd_initpkt_for_buf,		/* Index: 3 */
1895 	sd_initpkt_for_buf,		/* Index: 4 */
1896 
1897 	/* Chain for buf IO for removable-media targets (PM enabled) */
1898 	sd_initpkt_for_buf,		/* Index: 5 */
1899 	sd_initpkt_for_buf,		/* Index: 6 */
1900 	sd_initpkt_for_buf,		/* Index: 7 */
1901 	sd_initpkt_for_buf,		/* Index: 8 */
1902 
1903 	/* Chain for buf IO for removable-media targets (PM disabled) */
1904 	sd_initpkt_for_buf,		/* Index: 9 */
1905 	sd_initpkt_for_buf,		/* Index: 10 */
1906 	sd_initpkt_for_buf,		/* Index: 11 */
1907 
1908 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1909 	sd_initpkt_for_buf,		/* Index: 12 */
1910 	sd_initpkt_for_buf,		/* Index: 13 */
1911 	sd_initpkt_for_buf,		/* Index: 14 */
1912 	sd_initpkt_for_buf,		/* Index: 15 */
1913 
1914 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1915 	sd_initpkt_for_buf,		/* Index: 16 */
1916 	sd_initpkt_for_buf,		/* Index: 17 */
1917 	sd_initpkt_for_buf,		/* Index: 18 */
1918 
1919 	/* Chain for USCSI commands (non-checksum targets) */
1920 	sd_initpkt_for_uscsi,		/* Index: 19 */
1921 	sd_initpkt_for_uscsi,		/* Index: 20 */
1922 
1923 	/* Chain for USCSI commands (checksum targets) */
1924 	sd_initpkt_for_uscsi,		/* Index: 21 */
1925 	sd_initpkt_for_uscsi,		/* Index: 22 */
1926 	sd_initpkt_for_uscsi,		/* Index: 22 */
1927 
1928 	/* Chain for "direct" USCSI commands (all targets) */
1929 	sd_initpkt_for_uscsi,		/* Index: 24 */
1930 
1931 	/* Chain for "direct priority" USCSI commands (all targets) */
1932 	sd_initpkt_for_uscsi,		/* Index: 25 */
1933 
1934 };
1935 
1936 
1937 /*
1938  * Array to map a layering chain index to the appropriate destroypktpkt routine.
1939  * The redundant entries are present so that the index used for accessing
1940  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
1941  * with this table as well.
1942  */
1943 typedef void (*sd_destroypkt_t)(struct buf *);
1944 
1945 static sd_destroypkt_t	sd_destroypkt_map[] = {
1946 
1947 	/* Chain for buf IO for disk drive targets (PM enabled) */
1948 	sd_destroypkt_for_buf,		/* Index: 0 */
1949 	sd_destroypkt_for_buf,		/* Index: 1 */
1950 	sd_destroypkt_for_buf,		/* Index: 2 */
1951 
1952 	/* Chain for buf IO for disk drive targets (PM disabled) */
1953 	sd_destroypkt_for_buf,		/* Index: 3 */
1954 	sd_destroypkt_for_buf,		/* Index: 4 */
1955 
1956 	/* Chain for buf IO for removable-media targets (PM enabled) */
1957 	sd_destroypkt_for_buf,		/* Index: 5 */
1958 	sd_destroypkt_for_buf,		/* Index: 6 */
1959 	sd_destroypkt_for_buf,		/* Index: 7 */
1960 	sd_destroypkt_for_buf,		/* Index: 8 */
1961 
1962 	/* Chain for buf IO for removable-media targets (PM disabled) */
1963 	sd_destroypkt_for_buf,		/* Index: 9 */
1964 	sd_destroypkt_for_buf,		/* Index: 10 */
1965 	sd_destroypkt_for_buf,		/* Index: 11 */
1966 
1967 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1968 	sd_destroypkt_for_buf,		/* Index: 12 */
1969 	sd_destroypkt_for_buf,		/* Index: 13 */
1970 	sd_destroypkt_for_buf,		/* Index: 14 */
1971 	sd_destroypkt_for_buf,		/* Index: 15 */
1972 
1973 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1974 	sd_destroypkt_for_buf,		/* Index: 16 */
1975 	sd_destroypkt_for_buf,		/* Index: 17 */
1976 	sd_destroypkt_for_buf,		/* Index: 18 */
1977 
1978 	/* Chain for USCSI commands (non-checksum targets) */
1979 	sd_destroypkt_for_uscsi,	/* Index: 19 */
1980 	sd_destroypkt_for_uscsi,	/* Index: 20 */
1981 
1982 	/* Chain for USCSI commands (checksum targets) */
1983 	sd_destroypkt_for_uscsi,	/* Index: 21 */
1984 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1985 	sd_destroypkt_for_uscsi,	/* Index: 22 */
1986 
1987 	/* Chain for "direct" USCSI commands (all targets) */
1988 	sd_destroypkt_for_uscsi,	/* Index: 24 */
1989 
1990 	/* Chain for "direct priority" USCSI commands (all targets) */
1991 	sd_destroypkt_for_uscsi,	/* Index: 25 */
1992 
1993 };
1994 
1995 
1996 
1997 /*
1998  * Array to map a layering chain index to the appropriate chain "type".
1999  * The chain type indicates a specific property/usage of the chain.
2000  * The redundant entries are present so that the index used for accessing
2001  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2002  * with this table as well.
2003  */
2004 
2005 #define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
2006 #define	SD_CHAIN_BUFIO			1	/* regular buf IO */
2007 #define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
2008 #define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
2009 #define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
2010 						/* (for error recovery) */
2011 
2012 static int sd_chain_type_map[] = {
2013 
2014 	/* Chain for buf IO for disk drive targets (PM enabled) */
2015 	SD_CHAIN_BUFIO,			/* Index: 0 */
2016 	SD_CHAIN_BUFIO,			/* Index: 1 */
2017 	SD_CHAIN_BUFIO,			/* Index: 2 */
2018 
2019 	/* Chain for buf IO for disk drive targets (PM disabled) */
2020 	SD_CHAIN_BUFIO,			/* Index: 3 */
2021 	SD_CHAIN_BUFIO,			/* Index: 4 */
2022 
2023 	/* Chain for buf IO for removable-media targets (PM enabled) */
2024 	SD_CHAIN_BUFIO,			/* Index: 5 */
2025 	SD_CHAIN_BUFIO,			/* Index: 6 */
2026 	SD_CHAIN_BUFIO,			/* Index: 7 */
2027 	SD_CHAIN_BUFIO,			/* Index: 8 */
2028 
2029 	/* Chain for buf IO for removable-media targets (PM disabled) */
2030 	SD_CHAIN_BUFIO,			/* Index: 9 */
2031 	SD_CHAIN_BUFIO,			/* Index: 10 */
2032 	SD_CHAIN_BUFIO,			/* Index: 11 */
2033 
2034 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2035 	SD_CHAIN_BUFIO,			/* Index: 12 */
2036 	SD_CHAIN_BUFIO,			/* Index: 13 */
2037 	SD_CHAIN_BUFIO,			/* Index: 14 */
2038 	SD_CHAIN_BUFIO,			/* Index: 15 */
2039 
2040 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2041 	SD_CHAIN_BUFIO,			/* Index: 16 */
2042 	SD_CHAIN_BUFIO,			/* Index: 17 */
2043 	SD_CHAIN_BUFIO,			/* Index: 18 */
2044 
2045 	/* Chain for USCSI commands (non-checksum targets) */
2046 	SD_CHAIN_USCSI,			/* Index: 19 */
2047 	SD_CHAIN_USCSI,			/* Index: 20 */
2048 
2049 	/* Chain for USCSI commands (checksum targets) */
2050 	SD_CHAIN_USCSI,			/* Index: 21 */
2051 	SD_CHAIN_USCSI,			/* Index: 22 */
2052 	SD_CHAIN_USCSI,			/* Index: 22 */
2053 
2054 	/* Chain for "direct" USCSI commands (all targets) */
2055 	SD_CHAIN_DIRECT,		/* Index: 24 */
2056 
2057 	/* Chain for "direct priority" USCSI commands (all targets) */
2058 	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
2059 };
2060 
2061 
2062 /* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
2063 #define	SD_IS_BUFIO(xp)			\
2064 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
2065 
2066 /* Macro to return TRUE if the IO has come from the "direct priority" chain. */
2067 #define	SD_IS_DIRECT_PRIORITY(xp)	\
2068 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
2069 
2070 
2071 
2072 /*
2073  * Struct, array, and macros to map a specific chain to the appropriate
2074  * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
2075  *
2076  * The sd_chain_index_map[] array is used at attach time to set the various
2077  * un_xxx_chain type members of the sd_lun softstate to the specific layering
2078  * chain to be used with the instance. This allows different instances to use
2079  * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
2080  * and xb_chain_iodone index values in the sd_xbuf are initialized to these
2081  * values at sd_xbuf init time, this allows (1) layering chains may be changed
2082  * dynamically & without the use of locking; and (2) a layer may update the
2083  * xb_chain_io[start|done] member in a given xbuf with its current index value,
2084  * to allow for deferred processing of an IO within the same chain from a
2085  * different execution context.
2086  */
2087 
2088 struct sd_chain_index {
2089 	int	sci_iostart_index;
2090 	int	sci_iodone_index;
2091 };
2092 
2093 static struct sd_chain_index	sd_chain_index_map[] = {
2094 	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
2095 	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
2096 	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
2097 	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
2098 	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
2099 	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
2100 	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
2101 	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
2102 	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
2103 	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
2104 };
2105 
2106 
2107 /*
2108  * The following are indexes into the sd_chain_index_map[] array.
2109  */
2110 
2111 /* un->un_buf_chain_type must be set to one of these */
2112 #define	SD_CHAIN_INFO_DISK		0
2113 #define	SD_CHAIN_INFO_DISK_NO_PM	1
2114 #define	SD_CHAIN_INFO_RMMEDIA		2
2115 #define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
2116 #define	SD_CHAIN_INFO_CHKSUM		4
2117 #define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
2118 
2119 /* un->un_uscsi_chain_type must be set to one of these */
2120 #define	SD_CHAIN_INFO_USCSI_CMD		6
2121 /* USCSI with PM disabled is the same as DIRECT */
2122 #define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
2123 #define	SD_CHAIN_INFO_USCSI_CHKSUM	7
2124 
2125 /* un->un_direct_chain_type must be set to one of these */
2126 #define	SD_CHAIN_INFO_DIRECT_CMD	8
2127 
2128 /* un->un_priority_chain_type must be set to one of these */
2129 #define	SD_CHAIN_INFO_PRIORITY_CMD	9
2130 
2131 /* size for devid inquiries */
2132 #define	MAX_INQUIRY_SIZE		0xF0
2133 
2134 /*
2135  * Macros used by functions to pass a given buf(9S) struct along to the
2136  * next function in the layering chain for further processing.
2137  *
2138  * In the following macros, passing more than three arguments to the called
2139  * routines causes the optimizer for the SPARC compiler to stop doing tail
2140  * call elimination which results in significant performance degradation.
2141  */
2142 #define	SD_BEGIN_IOSTART(index, un, bp)	\
2143 	((*(sd_iostart_chain[index]))(index, un, bp))
2144 
2145 #define	SD_BEGIN_IODONE(index, un, bp)	\
2146 	((*(sd_iodone_chain[index]))(index, un, bp))
2147 
2148 #define	SD_NEXT_IOSTART(index, un, bp)				\
2149 	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
2150 
2151 #define	SD_NEXT_IODONE(index, un, bp)				\
2152 	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
2153 
2154 /*
2155  *    Function: _init
2156  *
2157  * Description: This is the driver _init(9E) entry point.
2158  *
2159  * Return Code: Returns the value from mod_install(9F) or
2160  *		ddi_soft_state_init(9F) as appropriate.
2161  *
2162  *     Context: Called when driver module loaded.
2163  */
2164 
2165 int
2166 _init(void)
2167 {
2168 	int	err;
2169 
2170 	/* establish driver name from module name */
2171 	sd_label = mod_modname(&modlinkage);
2172 
2173 	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
2174 		SD_MAXUNIT);
2175 
2176 	if (err != 0) {
2177 		return (err);
2178 	}
2179 
2180 	mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
2181 	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
2182 	mutex_init(&sd_label_mutex,  NULL, MUTEX_DRIVER, NULL);
2183 
2184 	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
2185 	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
2186 	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
2187 
2188 	/*
2189 	 * it's ok to init here even for fibre device
2190 	 */
2191 	sd_scsi_probe_cache_init();
2192 
2193 	/*
2194 	 * Creating taskq before mod_install ensures that all callers (threads)
2195 	 * that enter the module after a successfull mod_install encounter
2196 	 * a valid taskq.
2197 	 */
2198 	sd_taskq_create();
2199 
2200 	err = mod_install(&modlinkage);
2201 	if (err != 0) {
2202 		/* delete taskq if install fails */
2203 		sd_taskq_delete();
2204 
2205 		mutex_destroy(&sd_detach_mutex);
2206 		mutex_destroy(&sd_log_mutex);
2207 		mutex_destroy(&sd_label_mutex);
2208 
2209 		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2210 		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2211 		cv_destroy(&sd_tr.srq_inprocess_cv);
2212 
2213 		sd_scsi_probe_cache_fini();
2214 
2215 		ddi_soft_state_fini(&sd_state);
2216 		return (err);
2217 	}
2218 
2219 	return (err);
2220 }
2221 
2222 
2223 /*
2224  *    Function: _fini
2225  *
2226  * Description: This is the driver _fini(9E) entry point.
2227  *
2228  * Return Code: Returns the value from mod_remove(9F)
2229  *
2230  *     Context: Called when driver module is unloaded.
2231  */
2232 
2233 int
2234 _fini(void)
2235 {
2236 	int err;
2237 
2238 	if ((err = mod_remove(&modlinkage)) != 0) {
2239 		return (err);
2240 	}
2241 
2242 	sd_taskq_delete();
2243 
2244 	mutex_destroy(&sd_detach_mutex);
2245 	mutex_destroy(&sd_log_mutex);
2246 	mutex_destroy(&sd_label_mutex);
2247 	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2248 
2249 	sd_scsi_probe_cache_fini();
2250 
2251 	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2252 	cv_destroy(&sd_tr.srq_inprocess_cv);
2253 
2254 	ddi_soft_state_fini(&sd_state);
2255 
2256 	return (err);
2257 }
2258 
2259 
2260 /*
2261  *    Function: _info
2262  *
2263  * Description: This is the driver _info(9E) entry point.
2264  *
2265  *   Arguments: modinfop - pointer to the driver modinfo structure
2266  *
2267  * Return Code: Returns the value from mod_info(9F).
2268  *
2269  *     Context: Kernel thread context
2270  */
2271 
2272 int
2273 _info(struct modinfo *modinfop)
2274 {
2275 	return (mod_info(&modlinkage, modinfop));
2276 }
2277 
2278 
2279 /*
2280  * The following routines implement the driver message logging facility.
2281  * They provide component- and level- based debug output filtering.
2282  * Output may also be restricted to messages for a single instance by
2283  * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2284  * to NULL, then messages for all instances are printed.
2285  *
2286  * These routines have been cloned from each other due to the language
2287  * constraints of macros and variable argument list processing.
2288  */
2289 
2290 
2291 /*
2292  *    Function: sd_log_err
2293  *
2294  * Description: This routine is called by the SD_ERROR macro for debug
2295  *		logging of error conditions.
2296  *
2297  *   Arguments: comp - driver component being logged
2298  *		dev  - pointer to driver info structure
2299  *		fmt  - error string and format to be logged
2300  */
2301 
2302 static void
2303 sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2304 {
2305 	va_list		ap;
2306 	dev_info_t	*dev;
2307 
2308 	ASSERT(un != NULL);
2309 	dev = SD_DEVINFO(un);
2310 	ASSERT(dev != NULL);
2311 
2312 	/*
2313 	 * Filter messages based on the global component and level masks.
2314 	 * Also print if un matches the value of sd_debug_un, or if
2315 	 * sd_debug_un is set to NULL.
2316 	 */
2317 	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2318 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2319 		mutex_enter(&sd_log_mutex);
2320 		va_start(ap, fmt);
2321 		(void) vsprintf(sd_log_buf, fmt, ap);
2322 		va_end(ap);
2323 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2324 		mutex_exit(&sd_log_mutex);
2325 	}
2326 #ifdef SD_FAULT_INJECTION
2327 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2328 	if (un->sd_injection_mask & comp) {
2329 		mutex_enter(&sd_log_mutex);
2330 		va_start(ap, fmt);
2331 		(void) vsprintf(sd_log_buf, fmt, ap);
2332 		va_end(ap);
2333 		sd_injection_log(sd_log_buf, un);
2334 		mutex_exit(&sd_log_mutex);
2335 	}
2336 #endif
2337 }
2338 
2339 
2340 /*
2341  *    Function: sd_log_info
2342  *
2343  * Description: This routine is called by the SD_INFO macro for debug
2344  *		logging of general purpose informational conditions.
2345  *
2346  *   Arguments: comp - driver component being logged
2347  *		dev  - pointer to driver info structure
2348  *		fmt  - info string and format to be logged
2349  */
2350 
2351 static void
2352 sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2353 {
2354 	va_list		ap;
2355 	dev_info_t	*dev;
2356 
2357 	ASSERT(un != NULL);
2358 	dev = SD_DEVINFO(un);
2359 	ASSERT(dev != NULL);
2360 
2361 	/*
2362 	 * Filter messages based on the global component and level masks.
2363 	 * Also print if un matches the value of sd_debug_un, or if
2364 	 * sd_debug_un is set to NULL.
2365 	 */
2366 	if ((sd_component_mask & component) &&
2367 	    (sd_level_mask & SD_LOGMASK_INFO) &&
2368 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2369 		mutex_enter(&sd_log_mutex);
2370 		va_start(ap, fmt);
2371 		(void) vsprintf(sd_log_buf, fmt, ap);
2372 		va_end(ap);
2373 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2374 		mutex_exit(&sd_log_mutex);
2375 	}
2376 #ifdef SD_FAULT_INJECTION
2377 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2378 	if (un->sd_injection_mask & component) {
2379 		mutex_enter(&sd_log_mutex);
2380 		va_start(ap, fmt);
2381 		(void) vsprintf(sd_log_buf, fmt, ap);
2382 		va_end(ap);
2383 		sd_injection_log(sd_log_buf, un);
2384 		mutex_exit(&sd_log_mutex);
2385 	}
2386 #endif
2387 }
2388 
2389 
2390 /*
2391  *    Function: sd_log_trace
2392  *
2393  * Description: This routine is called by the SD_TRACE macro for debug
2394  *		logging of trace conditions (i.e. function entry/exit).
2395  *
2396  *   Arguments: comp - driver component being logged
2397  *		dev  - pointer to driver info structure
2398  *		fmt  - trace string and format to be logged
2399  */
2400 
2401 static void
2402 sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2403 {
2404 	va_list		ap;
2405 	dev_info_t	*dev;
2406 
2407 	ASSERT(un != NULL);
2408 	dev = SD_DEVINFO(un);
2409 	ASSERT(dev != NULL);
2410 
2411 	/*
2412 	 * Filter messages based on the global component and level masks.
2413 	 * Also print if un matches the value of sd_debug_un, or if
2414 	 * sd_debug_un is set to NULL.
2415 	 */
2416 	if ((sd_component_mask & component) &&
2417 	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2418 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2419 		mutex_enter(&sd_log_mutex);
2420 		va_start(ap, fmt);
2421 		(void) vsprintf(sd_log_buf, fmt, ap);
2422 		va_end(ap);
2423 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2424 		mutex_exit(&sd_log_mutex);
2425 	}
2426 #ifdef SD_FAULT_INJECTION
2427 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2428 	if (un->sd_injection_mask & component) {
2429 		mutex_enter(&sd_log_mutex);
2430 		va_start(ap, fmt);
2431 		(void) vsprintf(sd_log_buf, fmt, ap);
2432 		va_end(ap);
2433 		sd_injection_log(sd_log_buf, un);
2434 		mutex_exit(&sd_log_mutex);
2435 	}
2436 #endif
2437 }
2438 
2439 
2440 /*
2441  *    Function: sdprobe
2442  *
2443  * Description: This is the driver probe(9e) entry point function.
2444  *
2445  *   Arguments: devi - opaque device info handle
2446  *
2447  * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2448  *              DDI_PROBE_FAILURE: If the probe failed.
2449  *              DDI_PROBE_PARTIAL: If the instance is not present now,
2450  *				   but may be present in the future.
2451  */
2452 
2453 static int
2454 sdprobe(dev_info_t *devi)
2455 {
2456 	struct scsi_device	*devp;
2457 	int			rval;
2458 	int			instance;
2459 
2460 	/*
2461 	 * if it wasn't for pln, sdprobe could actually be nulldev
2462 	 * in the "__fibre" case.
2463 	 */
2464 	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2465 		return (DDI_PROBE_DONTCARE);
2466 	}
2467 
2468 	devp = ddi_get_driver_private(devi);
2469 
2470 	if (devp == NULL) {
2471 		/* Ooops... nexus driver is mis-configured... */
2472 		return (DDI_PROBE_FAILURE);
2473 	}
2474 
2475 	instance = ddi_get_instance(devi);
2476 
2477 	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2478 		return (DDI_PROBE_PARTIAL);
2479 	}
2480 
2481 	/*
2482 	 * Call the SCSA utility probe routine to see if we actually
2483 	 * have a target at this SCSI nexus.
2484 	 */
2485 	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2486 	case SCSIPROBE_EXISTS:
2487 		switch (devp->sd_inq->inq_dtype) {
2488 		case DTYPE_DIRECT:
2489 			rval = DDI_PROBE_SUCCESS;
2490 			break;
2491 		case DTYPE_RODIRECT:
2492 			/* CDs etc. Can be removable media */
2493 			rval = DDI_PROBE_SUCCESS;
2494 			break;
2495 		case DTYPE_OPTICAL:
2496 			/*
2497 			 * Rewritable optical driver HP115AA
2498 			 * Can also be removable media
2499 			 */
2500 
2501 			/*
2502 			 * Do not attempt to bind to  DTYPE_OPTICAL if
2503 			 * pre solaris 9 sparc sd behavior is required
2504 			 *
2505 			 * If first time through and sd_dtype_optical_bind
2506 			 * has not been set in /etc/system check properties
2507 			 */
2508 
2509 			if (sd_dtype_optical_bind  < 0) {
2510 			    sd_dtype_optical_bind = ddi_prop_get_int
2511 				(DDI_DEV_T_ANY,	devi,	0,
2512 				"optical-device-bind",	1);
2513 			}
2514 
2515 			if (sd_dtype_optical_bind == 0) {
2516 				rval = DDI_PROBE_FAILURE;
2517 			} else {
2518 				rval = DDI_PROBE_SUCCESS;
2519 			}
2520 			break;
2521 
2522 		case DTYPE_NOTPRESENT:
2523 		default:
2524 			rval = DDI_PROBE_FAILURE;
2525 			break;
2526 		}
2527 		break;
2528 	default:
2529 		rval = DDI_PROBE_PARTIAL;
2530 		break;
2531 	}
2532 
2533 	/*
2534 	 * This routine checks for resource allocation prior to freeing,
2535 	 * so it will take care of the "smart probing" case where a
2536 	 * scsi_probe() may or may not have been issued and will *not*
2537 	 * free previously-freed resources.
2538 	 */
2539 	scsi_unprobe(devp);
2540 	return (rval);
2541 }
2542 
2543 
2544 /*
2545  *    Function: sdinfo
2546  *
2547  * Description: This is the driver getinfo(9e) entry point function.
2548  * 		Given the device number, return the devinfo pointer from
2549  *		the scsi_device structure or the instance number
2550  *		associated with the dev_t.
2551  *
2552  *   Arguments: dip     - pointer to device info structure
2553  *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2554  *			  DDI_INFO_DEVT2INSTANCE)
2555  *		arg     - driver dev_t
2556  *		resultp - user buffer for request response
2557  *
2558  * Return Code: DDI_SUCCESS
2559  *              DDI_FAILURE
2560  */
2561 /* ARGSUSED */
2562 static int
2563 sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2564 {
2565 	struct sd_lun	*un;
2566 	dev_t		dev;
2567 	int		instance;
2568 	int		error;
2569 
2570 	switch (infocmd) {
2571 	case DDI_INFO_DEVT2DEVINFO:
2572 		dev = (dev_t)arg;
2573 		instance = SDUNIT(dev);
2574 		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2575 			return (DDI_FAILURE);
2576 		}
2577 		*result = (void *) SD_DEVINFO(un);
2578 		error = DDI_SUCCESS;
2579 		break;
2580 	case DDI_INFO_DEVT2INSTANCE:
2581 		dev = (dev_t)arg;
2582 		instance = SDUNIT(dev);
2583 		*result = (void *)(uintptr_t)instance;
2584 		error = DDI_SUCCESS;
2585 		break;
2586 	default:
2587 		error = DDI_FAILURE;
2588 	}
2589 	return (error);
2590 }
2591 
2592 /*
2593  *    Function: sd_prop_op
2594  *
2595  * Description: This is the driver prop_op(9e) entry point function.
2596  *		Return the number of blocks for the partition in question
2597  *		or forward the request to the property facilities.
2598  *
2599  *   Arguments: dev       - device number
2600  *		dip       - pointer to device info structure
2601  *		prop_op   - property operator
2602  *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2603  *		name      - pointer to property name
2604  *		valuep    - pointer or address of the user buffer
2605  *		lengthp   - property length
2606  *
2607  * Return Code: DDI_PROP_SUCCESS
2608  *              DDI_PROP_NOT_FOUND
2609  *              DDI_PROP_UNDEFINED
2610  *              DDI_PROP_NO_MEMORY
2611  *              DDI_PROP_BUF_TOO_SMALL
2612  */
2613 
2614 static int
2615 sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2616 	char *name, caddr_t valuep, int *lengthp)
2617 {
2618 	int		instance = ddi_get_instance(dip);
2619 	struct sd_lun	*un;
2620 	uint64_t	nblocks64;
2621 
2622 	/*
2623 	 * Our dynamic properties are all device specific and size oriented.
2624 	 * Requests issued under conditions where size is valid are passed
2625 	 * to ddi_prop_op_nblocks with the size information, otherwise the
2626 	 * request is passed to ddi_prop_op. Size depends on valid geometry.
2627 	 */
2628 	un = ddi_get_soft_state(sd_state, instance);
2629 	if ((dev == DDI_DEV_T_ANY) || (un == NULL) ||
2630 	    (un->un_f_geometry_is_valid == FALSE)) {
2631 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2632 		    name, valuep, lengthp));
2633 	} else {
2634 		/* get nblocks value */
2635 		ASSERT(!mutex_owned(SD_MUTEX(un)));
2636 		mutex_enter(SD_MUTEX(un));
2637 		nblocks64 = (ulong_t)un->un_map[SDPART(dev)].dkl_nblk;
2638 		mutex_exit(SD_MUTEX(un));
2639 
2640 		return (ddi_prop_op_nblocks(dev, dip, prop_op, mod_flags,
2641 		    name, valuep, lengthp, nblocks64));
2642 	}
2643 }
2644 
2645 /*
2646  * The following functions are for smart probing:
2647  * sd_scsi_probe_cache_init()
2648  * sd_scsi_probe_cache_fini()
2649  * sd_scsi_clear_probe_cache()
2650  * sd_scsi_probe_with_cache()
2651  */
2652 
2653 /*
2654  *    Function: sd_scsi_probe_cache_init
2655  *
2656  * Description: Initializes the probe response cache mutex and head pointer.
2657  *
2658  *     Context: Kernel thread context
2659  */
2660 
2661 static void
2662 sd_scsi_probe_cache_init(void)
2663 {
2664 	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2665 	sd_scsi_probe_cache_head = NULL;
2666 }
2667 
2668 
2669 /*
2670  *    Function: sd_scsi_probe_cache_fini
2671  *
2672  * Description: Frees all resources associated with the probe response cache.
2673  *
2674  *     Context: Kernel thread context
2675  */
2676 
2677 static void
2678 sd_scsi_probe_cache_fini(void)
2679 {
2680 	struct sd_scsi_probe_cache *cp;
2681 	struct sd_scsi_probe_cache *ncp;
2682 
2683 	/* Clean up our smart probing linked list */
2684 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2685 		ncp = cp->next;
2686 		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2687 	}
2688 	sd_scsi_probe_cache_head = NULL;
2689 	mutex_destroy(&sd_scsi_probe_cache_mutex);
2690 }
2691 
2692 
2693 /*
2694  *    Function: sd_scsi_clear_probe_cache
2695  *
2696  * Description: This routine clears the probe response cache. This is
2697  *		done when open() returns ENXIO so that when deferred
2698  *		attach is attempted (possibly after a device has been
2699  *		turned on) we will retry the probe. Since we don't know
2700  *		which target we failed to open, we just clear the
2701  *		entire cache.
2702  *
2703  *     Context: Kernel thread context
2704  */
2705 
2706 static void
2707 sd_scsi_clear_probe_cache(void)
2708 {
2709 	struct sd_scsi_probe_cache	*cp;
2710 	int				i;
2711 
2712 	mutex_enter(&sd_scsi_probe_cache_mutex);
2713 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2714 		/*
2715 		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2716 		 * force probing to be performed the next time
2717 		 * sd_scsi_probe_with_cache is called.
2718 		 */
2719 		for (i = 0; i < NTARGETS_WIDE; i++) {
2720 			cp->cache[i] = SCSIPROBE_EXISTS;
2721 		}
2722 	}
2723 	mutex_exit(&sd_scsi_probe_cache_mutex);
2724 }
2725 
2726 
2727 /*
2728  *    Function: sd_scsi_probe_with_cache
2729  *
2730  * Description: This routine implements support for a scsi device probe
2731  *		with cache. The driver maintains a cache of the target
2732  *		responses to scsi probes. If we get no response from a
2733  *		target during a probe inquiry, we remember that, and we
2734  *		avoid additional calls to scsi_probe on non-zero LUNs
2735  *		on the same target until the cache is cleared. By doing
2736  *		so we avoid the 1/4 sec selection timeout for nonzero
2737  *		LUNs. lun0 of a target is always probed.
2738  *
2739  *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2740  *              waitfunc - indicates what the allocator routines should
2741  *			   do when resources are not available. This value
2742  *			   is passed on to scsi_probe() when that routine
2743  *			   is called.
2744  *
2745  * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2746  *		otherwise the value returned by scsi_probe(9F).
2747  *
2748  *     Context: Kernel thread context
2749  */
2750 
2751 static int
2752 sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2753 {
2754 	struct sd_scsi_probe_cache	*cp;
2755 	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2756 	int		lun, tgt;
2757 
2758 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2759 	    SCSI_ADDR_PROP_LUN, 0);
2760 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2761 	    SCSI_ADDR_PROP_TARGET, -1);
2762 
2763 	/* Make sure caching enabled and target in range */
2764 	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2765 		/* do it the old way (no cache) */
2766 		return (scsi_probe(devp, waitfn));
2767 	}
2768 
2769 	mutex_enter(&sd_scsi_probe_cache_mutex);
2770 
2771 	/* Find the cache for this scsi bus instance */
2772 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2773 		if (cp->pdip == pdip) {
2774 			break;
2775 		}
2776 	}
2777 
2778 	/* If we can't find a cache for this pdip, create one */
2779 	if (cp == NULL) {
2780 		int i;
2781 
2782 		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
2783 		    KM_SLEEP);
2784 		cp->pdip = pdip;
2785 		cp->next = sd_scsi_probe_cache_head;
2786 		sd_scsi_probe_cache_head = cp;
2787 		for (i = 0; i < NTARGETS_WIDE; i++) {
2788 			cp->cache[i] = SCSIPROBE_EXISTS;
2789 		}
2790 	}
2791 
2792 	mutex_exit(&sd_scsi_probe_cache_mutex);
2793 
2794 	/* Recompute the cache for this target if LUN zero */
2795 	if (lun == 0) {
2796 		cp->cache[tgt] = SCSIPROBE_EXISTS;
2797 	}
2798 
2799 	/* Don't probe if cache remembers a NORESP from a previous LUN. */
2800 	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
2801 		return (SCSIPROBE_NORESP);
2802 	}
2803 
2804 	/* Do the actual probe; save & return the result */
2805 	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
2806 }
2807 
2808 
2809 /*
2810  *    Function: sd_spin_up_unit
2811  *
2812  * Description: Issues the following commands to spin-up the device:
2813  *		START STOP UNIT, and INQUIRY.
2814  *
2815  *   Arguments: un - driver soft state (unit) structure
2816  *
2817  * Return Code: 0 - success
2818  *		EIO - failure
2819  *		EACCES - reservation conflict
2820  *
2821  *     Context: Kernel thread context
2822  */
2823 
2824 static int
2825 sd_spin_up_unit(struct sd_lun *un)
2826 {
2827 	size_t	resid		= 0;
2828 	int	has_conflict	= FALSE;
2829 	uchar_t *bufaddr;
2830 
2831 	ASSERT(un != NULL);
2832 
2833 	/*
2834 	 * Send a throwaway START UNIT command.
2835 	 *
2836 	 * If we fail on this, we don't care presently what precisely
2837 	 * is wrong.  EMC's arrays will also fail this with a check
2838 	 * condition (0x2/0x4/0x3) if the device is "inactive," but
2839 	 * we don't want to fail the attach because it may become
2840 	 * "active" later.
2841 	 */
2842 	if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START, SD_PATH_DIRECT)
2843 	    == EACCES)
2844 		has_conflict = TRUE;
2845 
2846 	/*
2847 	 * Send another INQUIRY command to the target. This is necessary for
2848 	 * non-removable media direct access devices because their INQUIRY data
2849 	 * may not be fully qualified until they are spun up (perhaps via the
2850 	 * START command above).  Note: This seems to be needed for some
2851 	 * legacy devices only.) The INQUIRY command should succeed even if a
2852 	 * Reservation Conflict is present.
2853 	 */
2854 	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
2855 	if (sd_send_scsi_INQUIRY(un, bufaddr, SUN_INQSIZE, 0, 0, &resid) != 0) {
2856 		kmem_free(bufaddr, SUN_INQSIZE);
2857 		return (EIO);
2858 	}
2859 
2860 	/*
2861 	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
2862 	 * Note that this routine does not return a failure here even if the
2863 	 * INQUIRY command did not return any data.  This is a legacy behavior.
2864 	 */
2865 	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
2866 		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
2867 	}
2868 
2869 	kmem_free(bufaddr, SUN_INQSIZE);
2870 
2871 	/* If we hit a reservation conflict above, tell the caller. */
2872 	if (has_conflict == TRUE) {
2873 		return (EACCES);
2874 	}
2875 
2876 	return (0);
2877 }
2878 
2879 #ifdef _LP64
2880 /*
2881  *    Function: sd_enable_descr_sense
2882  *
2883  * Description: This routine attempts to select descriptor sense format
2884  *		using the Control mode page.  Devices that support 64 bit
2885  *		LBAs (for >2TB luns) should also implement descriptor
2886  *		sense data so we will call this function whenever we see
2887  *		a lun larger than 2TB.  If for some reason the device
2888  *		supports 64 bit LBAs but doesn't support descriptor sense
2889  *		presumably the mode select will fail.  Everything will
2890  *		continue to work normally except that we will not get
2891  *		complete sense data for commands that fail with an LBA
2892  *		larger than 32 bits.
2893  *
2894  *   Arguments: un - driver soft state (unit) structure
2895  *
2896  *     Context: Kernel thread context only
2897  */
2898 
2899 static void
2900 sd_enable_descr_sense(struct sd_lun *un)
2901 {
2902 	uchar_t			*header;
2903 	struct mode_control_scsi3 *ctrl_bufp;
2904 	size_t			buflen;
2905 	size_t			bd_len;
2906 
2907 	/*
2908 	 * Read MODE SENSE page 0xA, Control Mode Page
2909 	 */
2910 	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
2911 	    sizeof (struct mode_control_scsi3);
2912 	header = kmem_zalloc(buflen, KM_SLEEP);
2913 	if (sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
2914 	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT) != 0) {
2915 		SD_ERROR(SD_LOG_COMMON, un,
2916 		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
2917 		goto eds_exit;
2918 	}
2919 
2920 	/*
2921 	 * Determine size of Block Descriptors in order to locate
2922 	 * the mode page data. ATAPI devices return 0, SCSI devices
2923 	 * should return MODE_BLK_DESC_LENGTH.
2924 	 */
2925 	bd_len  = ((struct mode_header *)header)->bdesc_length;
2926 
2927 	ctrl_bufp = (struct mode_control_scsi3 *)
2928 	    (header + MODE_HEADER_LENGTH + bd_len);
2929 
2930 	/*
2931 	 * Clear PS bit for MODE SELECT
2932 	 */
2933 	ctrl_bufp->mode_page.ps = 0;
2934 
2935 	/*
2936 	 * Set D_SENSE to enable descriptor sense format.
2937 	 */
2938 	ctrl_bufp->d_sense = 1;
2939 
2940 	/*
2941 	 * Use MODE SELECT to commit the change to the D_SENSE bit
2942 	 */
2943 	if (sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
2944 	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT) != 0) {
2945 		SD_INFO(SD_LOG_COMMON, un,
2946 		    "sd_enable_descr_sense: mode select ctrl page failed\n");
2947 		goto eds_exit;
2948 	}
2949 
2950 eds_exit:
2951 	kmem_free(header, buflen);
2952 }
2953 #endif /* _LP64 */
2954 
2955 
2956 /*
2957  *    Function: sd_set_mmc_caps
2958  *
2959  * Description: This routine determines if the device is MMC compliant and if
2960  *		the device supports CDDA via a mode sense of the CDVD
2961  *		capabilities mode page. Also checks if the device is a
2962  *		dvdram writable device.
2963  *
2964  *   Arguments: un - driver soft state (unit) structure
2965  *
2966  *     Context: Kernel thread context only
2967  */
2968 
2969 static void
2970 sd_set_mmc_caps(struct sd_lun *un)
2971 {
2972 	struct mode_header_grp2		*sense_mhp;
2973 	uchar_t				*sense_page;
2974 	caddr_t				buf;
2975 	int				bd_len;
2976 	int				status;
2977 	struct uscsi_cmd		com;
2978 	int				rtn;
2979 	uchar_t				*out_data_rw, *out_data_hd;
2980 	uchar_t				*rqbuf_rw, *rqbuf_hd;
2981 
2982 	ASSERT(un != NULL);
2983 
2984 	/*
2985 	 * The flags which will be set in this function are - mmc compliant,
2986 	 * dvdram writable device, cdda support. Initialize them to FALSE
2987 	 * and if a capability is detected - it will be set to TRUE.
2988 	 */
2989 	un->un_f_mmc_cap = FALSE;
2990 	un->un_f_dvdram_writable_device = FALSE;
2991 	un->un_f_cfg_cdda = FALSE;
2992 
2993 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
2994 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
2995 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
2996 
2997 	if (status != 0) {
2998 		/* command failed; just return */
2999 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3000 		return;
3001 	}
3002 	/*
3003 	 * If the mode sense request for the CDROM CAPABILITIES
3004 	 * page (0x2A) succeeds the device is assumed to be MMC.
3005 	 */
3006 	un->un_f_mmc_cap = TRUE;
3007 
3008 	/* Get to the page data */
3009 	sense_mhp = (struct mode_header_grp2 *)buf;
3010 	bd_len = (sense_mhp->bdesc_length_hi << 8) |
3011 	    sense_mhp->bdesc_length_lo;
3012 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3013 		/*
3014 		 * We did not get back the expected block descriptor
3015 		 * length so we cannot determine if the device supports
3016 		 * CDDA. However, we still indicate the device is MMC
3017 		 * according to the successful response to the page
3018 		 * 0x2A mode sense request.
3019 		 */
3020 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3021 		    "sd_set_mmc_caps: Mode Sense returned "
3022 		    "invalid block descriptor length\n");
3023 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3024 		return;
3025 	}
3026 
3027 	/* See if read CDDA is supported */
3028 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
3029 	    bd_len);
3030 	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
3031 
3032 	/* See if writing DVD RAM is supported. */
3033 	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
3034 	if (un->un_f_dvdram_writable_device == TRUE) {
3035 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3036 		return;
3037 	}
3038 
3039 	/*
3040 	 * If the device presents DVD or CD capabilities in the mode
3041 	 * page, we can return here since a RRD will not have
3042 	 * these capabilities.
3043 	 */
3044 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3045 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3046 		return;
3047 	}
3048 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3049 
3050 	/*
3051 	 * If un->un_f_dvdram_writable_device is still FALSE,
3052 	 * check for a Removable Rigid Disk (RRD).  A RRD
3053 	 * device is identified by the features RANDOM_WRITABLE and
3054 	 * HARDWARE_DEFECT_MANAGEMENT.
3055 	 */
3056 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3057 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3058 
3059 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3060 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3061 	    RANDOM_WRITABLE);
3062 	if (rtn != 0) {
3063 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3064 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3065 		return;
3066 	}
3067 
3068 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3069 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3070 
3071 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3072 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3073 	    HARDWARE_DEFECT_MANAGEMENT);
3074 	if (rtn == 0) {
3075 		/*
3076 		 * We have good information, check for random writable
3077 		 * and hardware defect features.
3078 		 */
3079 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3080 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3081 			un->un_f_dvdram_writable_device = TRUE;
3082 		}
3083 	}
3084 
3085 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3086 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3087 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3088 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3089 }
3090 
3091 /*
3092  *    Function: sd_check_for_writable_cd
3093  *
3094  * Description: This routine determines if the media in the device is
3095  *		writable or not. It uses the get configuration command (0x46)
3096  *		to determine if the media is writable
3097  *
3098  *   Arguments: un - driver soft state (unit) structure
3099  *
3100  *     Context: Never called at interrupt context.
3101  */
3102 
3103 static void
3104 sd_check_for_writable_cd(struct sd_lun *un)
3105 {
3106 	struct uscsi_cmd		com;
3107 	uchar_t				*out_data;
3108 	uchar_t				*rqbuf;
3109 	int				rtn;
3110 	uchar_t				*out_data_rw, *out_data_hd;
3111 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3112 	struct mode_header_grp2		*sense_mhp;
3113 	uchar_t				*sense_page;
3114 	caddr_t				buf;
3115 	int				bd_len;
3116 	int				status;
3117 
3118 	ASSERT(un != NULL);
3119 	ASSERT(mutex_owned(SD_MUTEX(un)));
3120 
3121 	/*
3122 	 * Initialize the writable media to false, if configuration info.
3123 	 * tells us otherwise then only we will set it.
3124 	 */
3125 	un->un_f_mmc_writable_media = FALSE;
3126 	mutex_exit(SD_MUTEX(un));
3127 
3128 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3129 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3130 
3131 	rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf, SENSE_LENGTH,
3132 	    out_data, SD_PROFILE_HEADER_LEN);
3133 
3134 	mutex_enter(SD_MUTEX(un));
3135 	if (rtn == 0) {
3136 		/*
3137 		 * We have good information, check for writable DVD.
3138 		 */
3139 		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3140 			un->un_f_mmc_writable_media = TRUE;
3141 			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3142 			kmem_free(rqbuf, SENSE_LENGTH);
3143 			return;
3144 		}
3145 	}
3146 
3147 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3148 	kmem_free(rqbuf, SENSE_LENGTH);
3149 
3150 	/*
3151 	 * Determine if this is a RRD type device.
3152 	 */
3153 	mutex_exit(SD_MUTEX(un));
3154 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3155 	status = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, (uchar_t *)buf,
3156 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3157 	mutex_enter(SD_MUTEX(un));
3158 	if (status != 0) {
3159 		/* command failed; just return */
3160 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3161 		return;
3162 	}
3163 
3164 	/* Get to the page data */
3165 	sense_mhp = (struct mode_header_grp2 *)buf;
3166 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3167 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3168 		/*
3169 		 * We did not get back the expected block descriptor length so
3170 		 * we cannot check the mode page.
3171 		 */
3172 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3173 		    "sd_check_for_writable_cd: Mode Sense returned "
3174 		    "invalid block descriptor length\n");
3175 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3176 		return;
3177 	}
3178 
3179 	/*
3180 	 * If the device presents DVD or CD capabilities in the mode
3181 	 * page, we can return here since a RRD device will not have
3182 	 * these capabilities.
3183 	 */
3184 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3185 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3186 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3187 		return;
3188 	}
3189 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3190 
3191 	/*
3192 	 * If un->un_f_mmc_writable_media is still FALSE,
3193 	 * check for RRD type media.  A RRD device is identified
3194 	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3195 	 */
3196 	mutex_exit(SD_MUTEX(un));
3197 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3198 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3199 
3200 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_rw,
3201 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3202 	    RANDOM_WRITABLE);
3203 	if (rtn != 0) {
3204 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3205 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3206 		mutex_enter(SD_MUTEX(un));
3207 		return;
3208 	}
3209 
3210 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3211 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3212 
3213 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(un, &com, rqbuf_hd,
3214 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3215 	    HARDWARE_DEFECT_MANAGEMENT);
3216 	mutex_enter(SD_MUTEX(un));
3217 	if (rtn == 0) {
3218 		/*
3219 		 * We have good information, check for random writable
3220 		 * and hardware defect features as current.
3221 		 */
3222 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3223 		    (out_data_rw[10] & 0x1) &&
3224 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3225 		    (out_data_hd[10] & 0x1)) {
3226 			un->un_f_mmc_writable_media = TRUE;
3227 		}
3228 	}
3229 
3230 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3231 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3232 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3233 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3234 }
3235 
3236 /*
3237  *    Function: sd_read_unit_properties
3238  *
3239  * Description: The following implements a property lookup mechanism.
3240  *		Properties for particular disks (keyed on vendor, model
3241  *		and rev numbers) are sought in the sd.conf file via
3242  *		sd_process_sdconf_file(), and if not found there, are
3243  *		looked for in a list hardcoded in this driver via
3244  *		sd_process_sdconf_table() Once located the properties
3245  *		are used to update the driver unit structure.
3246  *
3247  *   Arguments: un - driver soft state (unit) structure
3248  */
3249 
3250 static void
3251 sd_read_unit_properties(struct sd_lun *un)
3252 {
3253 	/*
3254 	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3255 	 * the "sd-config-list" property (from the sd.conf file) or if
3256 	 * there was not a match for the inquiry vid/pid. If this event
3257 	 * occurs the static driver configuration table is searched for
3258 	 * a match.
3259 	 */
3260 	ASSERT(un != NULL);
3261 	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3262 		sd_process_sdconf_table(un);
3263 	}
3264 
3265 	/* check for LSI device */
3266 	sd_is_lsi(un);
3267 
3268 
3269 }
3270 
3271 
3272 /*
3273  *    Function: sd_process_sdconf_file
3274  *
3275  * Description: Use ddi_getlongprop to obtain the properties from the
3276  *		driver's config file (ie, sd.conf) and update the driver
3277  *		soft state structure accordingly.
3278  *
3279  *   Arguments: un - driver soft state (unit) structure
3280  *
3281  * Return Code: SD_SUCCESS - The properties were successfully set according
3282  *			     to the driver configuration file.
3283  *		SD_FAILURE - The driver config list was not obtained or
3284  *			     there was no vid/pid match. This indicates that
3285  *			     the static config table should be used.
3286  *
3287  * The config file has a property, "sd-config-list", which consists of
3288  * one or more duplets as follows:
3289  *
3290  *  sd-config-list=
3291  *	<duplet>,
3292  *	[<duplet>,]
3293  *	[<duplet>];
3294  *
3295  * The structure of each duplet is as follows:
3296  *
3297  *  <duplet>:= <vid+pid>,<data-property-name_list>
3298  *
3299  * The first entry of the duplet is the device ID string (the concatenated
3300  * vid & pid; not to be confused with a device_id).  This is defined in
3301  * the same way as in the sd_disk_table.
3302  *
3303  * The second part of the duplet is a string that identifies a
3304  * data-property-name-list. The data-property-name-list is defined as
3305  * follows:
3306  *
3307  *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3308  *
3309  * The syntax of <data-property-name> depends on the <version> field.
3310  *
3311  * If version = SD_CONF_VERSION_1 we have the following syntax:
3312  *
3313  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3314  *
3315  * where the prop0 value will be used to set prop0 if bit0 set in the
3316  * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3317  *
3318  */
3319 
3320 static int
3321 sd_process_sdconf_file(struct sd_lun *un)
3322 {
3323 	char	*config_list = NULL;
3324 	int	config_list_len;
3325 	int	len;
3326 	int	dupletlen = 0;
3327 	char	*vidptr;
3328 	int	vidlen;
3329 	char	*dnlist_ptr;
3330 	char	*dataname_ptr;
3331 	int	dnlist_len;
3332 	int	dataname_len;
3333 	int	*data_list;
3334 	int	data_list_len;
3335 	int	rval = SD_FAILURE;
3336 	int	i;
3337 
3338 	ASSERT(un != NULL);
3339 
3340 	/* Obtain the configuration list associated with the .conf file */
3341 	if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), DDI_PROP_DONTPASS,
3342 	    sd_config_list, (caddr_t)&config_list, &config_list_len)
3343 	    != DDI_PROP_SUCCESS) {
3344 		return (SD_FAILURE);
3345 	}
3346 
3347 	/*
3348 	 * Compare vids in each duplet to the inquiry vid - if a match is
3349 	 * made, get the data value and update the soft state structure
3350 	 * accordingly.
3351 	 *
3352 	 * Note: This algorithm is complex and difficult to maintain. It should
3353 	 * be replaced with a more robust implementation.
3354 	 */
3355 	for (len = config_list_len, vidptr = config_list; len > 0;
3356 	    vidptr += dupletlen, len -= dupletlen) {
3357 		/*
3358 		 * Note: The assumption here is that each vid entry is on
3359 		 * a unique line from its associated duplet.
3360 		 */
3361 		vidlen = dupletlen = (int)strlen(vidptr);
3362 		if ((vidlen == 0) ||
3363 		    (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS)) {
3364 			dupletlen++;
3365 			continue;
3366 		}
3367 
3368 		/*
3369 		 * dnlist contains 1 or more blank separated
3370 		 * data-property-name entries
3371 		 */
3372 		dnlist_ptr = vidptr + vidlen + 1;
3373 		dnlist_len = (int)strlen(dnlist_ptr);
3374 		dupletlen += dnlist_len + 2;
3375 
3376 		/*
3377 		 * Set a pointer for the first data-property-name
3378 		 * entry in the list
3379 		 */
3380 		dataname_ptr = dnlist_ptr;
3381 		dataname_len = 0;
3382 
3383 		/*
3384 		 * Loop through all data-property-name entries in the
3385 		 * data-property-name-list setting the properties for each.
3386 		 */
3387 		while (dataname_len < dnlist_len) {
3388 			int version;
3389 
3390 			/*
3391 			 * Determine the length of the current
3392 			 * data-property-name entry by indexing until a
3393 			 * blank or NULL is encountered. When the space is
3394 			 * encountered reset it to a NULL for compliance
3395 			 * with ddi_getlongprop().
3396 			 */
3397 			for (i = 0; ((dataname_ptr[i] != ' ') &&
3398 			    (dataname_ptr[i] != '\0')); i++) {
3399 				;
3400 			}
3401 
3402 			dataname_len += i;
3403 			/* If not null terminated, Make it so */
3404 			if (dataname_ptr[i] == ' ') {
3405 				dataname_ptr[i] = '\0';
3406 			}
3407 			dataname_len++;
3408 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3409 			    "sd_process_sdconf_file: disk:%s, data:%s\n",
3410 			    vidptr, dataname_ptr);
3411 
3412 			/* Get the data list */
3413 			if (ddi_getlongprop(DDI_DEV_T_ANY, SD_DEVINFO(un), 0,
3414 			    dataname_ptr, (caddr_t)&data_list, &data_list_len)
3415 			    != DDI_PROP_SUCCESS) {
3416 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3417 				    "sd_process_sdconf_file: data property (%s)"
3418 				    " has no value\n", dataname_ptr);
3419 				dataname_ptr = dnlist_ptr + dataname_len;
3420 				continue;
3421 			}
3422 
3423 			version = data_list[0];
3424 
3425 			if (version == SD_CONF_VERSION_1) {
3426 				sd_tunables values;
3427 
3428 				/* Set the properties */
3429 				if (sd_chk_vers1_data(un, data_list[1],
3430 				    &data_list[2], data_list_len, dataname_ptr)
3431 				    == SD_SUCCESS) {
3432 					sd_get_tunables_from_conf(un,
3433 					    data_list[1], &data_list[2],
3434 					    &values);
3435 					sd_set_vers1_properties(un,
3436 					    data_list[1], &values);
3437 					rval = SD_SUCCESS;
3438 				} else {
3439 					rval = SD_FAILURE;
3440 				}
3441 			} else {
3442 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3443 				    "data property %s version 0x%x is invalid.",
3444 				    dataname_ptr, version);
3445 				rval = SD_FAILURE;
3446 			}
3447 			kmem_free(data_list, data_list_len);
3448 			dataname_ptr = dnlist_ptr + dataname_len;
3449 		}
3450 	}
3451 
3452 	/* free up the memory allocated by ddi_getlongprop */
3453 	if (config_list) {
3454 		kmem_free(config_list, config_list_len);
3455 	}
3456 
3457 	return (rval);
3458 }
3459 
3460 /*
3461  *    Function: sd_get_tunables_from_conf()
3462  *
3463  *
3464  *    This function reads the data list from the sd.conf file and pulls
3465  *    the values that can have numeric values as arguments and places
3466  *    the values in the apropriate sd_tunables member.
3467  *    Since the order of the data list members varies across platforms
3468  *    This function reads them from the data list in a platform specific
3469  *    order and places them into the correct sd_tunable member that is
3470  *    a consistant across all platforms.
3471  */
3472 static void
3473 sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
3474     sd_tunables *values)
3475 {
3476 	int i;
3477 	int mask;
3478 
3479 	bzero(values, sizeof (sd_tunables));
3480 
3481 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3482 
3483 		mask = 1 << i;
3484 		if (mask > flags) {
3485 			break;
3486 		}
3487 
3488 		switch (mask & flags) {
3489 		case 0:	/* This mask bit not set in flags */
3490 			continue;
3491 		case SD_CONF_BSET_THROTTLE:
3492 			values->sdt_throttle = data_list[i];
3493 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3494 			    "sd_get_tunables_from_conf: throttle = %d\n",
3495 			    values->sdt_throttle);
3496 			break;
3497 		case SD_CONF_BSET_CTYPE:
3498 			values->sdt_ctype = data_list[i];
3499 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3500 			    "sd_get_tunables_from_conf: ctype = %d\n",
3501 			    values->sdt_ctype);
3502 			break;
3503 		case SD_CONF_BSET_NRR_COUNT:
3504 			values->sdt_not_rdy_retries = data_list[i];
3505 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3506 			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
3507 			    values->sdt_not_rdy_retries);
3508 			break;
3509 		case SD_CONF_BSET_BSY_RETRY_COUNT:
3510 			values->sdt_busy_retries = data_list[i];
3511 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3512 			    "sd_get_tunables_from_conf: busy_retries = %d\n",
3513 			    values->sdt_busy_retries);
3514 			break;
3515 		case SD_CONF_BSET_RST_RETRIES:
3516 			values->sdt_reset_retries = data_list[i];
3517 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3518 			    "sd_get_tunables_from_conf: reset_retries = %d\n",
3519 			    values->sdt_reset_retries);
3520 			break;
3521 		case SD_CONF_BSET_RSV_REL_TIME:
3522 			values->sdt_reserv_rel_time = data_list[i];
3523 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3524 			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
3525 			    values->sdt_reserv_rel_time);
3526 			break;
3527 		case SD_CONF_BSET_MIN_THROTTLE:
3528 			values->sdt_min_throttle = data_list[i];
3529 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3530 			    "sd_get_tunables_from_conf: min_throttle = %d\n",
3531 			    values->sdt_min_throttle);
3532 			break;
3533 		case SD_CONF_BSET_DISKSORT_DISABLED:
3534 			values->sdt_disk_sort_dis = data_list[i];
3535 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3536 			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
3537 			    values->sdt_disk_sort_dis);
3538 			break;
3539 		case SD_CONF_BSET_LUN_RESET_ENABLED:
3540 			values->sdt_lun_reset_enable = data_list[i];
3541 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3542 			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
3543 			    "\n", values->sdt_lun_reset_enable);
3544 			break;
3545 		}
3546 	}
3547 }
3548 
3549 /*
3550  *    Function: sd_process_sdconf_table
3551  *
3552  * Description: Search the static configuration table for a match on the
3553  *		inquiry vid/pid and update the driver soft state structure
3554  *		according to the table property values for the device.
3555  *
3556  *		The form of a configuration table entry is:
3557  *		  <vid+pid>,<flags>,<property-data>
3558  *		  "SEAGATE ST42400N",1,63,0,0			(Fibre)
3559  *		  "SEAGATE ST42400N",1,63,0,0,0,0		(Sparc)
3560  *		  "SEAGATE ST42400N",1,63,0,0,0,0,0,0,0,0,0,0	(Intel)
3561  *
3562  *   Arguments: un - driver soft state (unit) structure
3563  */
3564 
3565 static void
3566 sd_process_sdconf_table(struct sd_lun *un)
3567 {
3568 	char	*id = NULL;
3569 	int	table_index;
3570 	int	idlen;
3571 
3572 	ASSERT(un != NULL);
3573 	for (table_index = 0; table_index < sd_disk_table_size;
3574 	    table_index++) {
3575 		id = sd_disk_table[table_index].device_id;
3576 		idlen = strlen(id);
3577 		if (idlen == 0) {
3578 			continue;
3579 		}
3580 
3581 		/*
3582 		 * The static configuration table currently does not
3583 		 * implement version 10 properties. Additionally,
3584 		 * multiple data-property-name entries are not
3585 		 * implemented in the static configuration table.
3586 		 */
3587 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
3588 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3589 			    "sd_process_sdconf_table: disk %s\n", id);
3590 			sd_set_vers1_properties(un,
3591 			    sd_disk_table[table_index].flags,
3592 			    sd_disk_table[table_index].properties);
3593 			break;
3594 		}
3595 	}
3596 }
3597 
3598 
3599 /*
3600  *    Function: sd_sdconf_id_match
3601  *
3602  * Description: This local function implements a case sensitive vid/pid
3603  *		comparison as well as the boundary cases of wild card and
3604  *		multiple blanks.
3605  *
3606  *		Note: An implicit assumption made here is that the scsi
3607  *		inquiry structure will always keep the vid, pid and
3608  *		revision strings in consecutive sequence, so they can be
3609  *		read as a single string. If this assumption is not the
3610  *		case, a separate string, to be used for the check, needs
3611  *		to be built with these strings concatenated.
3612  *
3613  *   Arguments: un - driver soft state (unit) structure
3614  *		id - table or config file vid/pid
3615  *		idlen  - length of the vid/pid (bytes)
3616  *
3617  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3618  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3619  */
3620 
3621 static int
3622 sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
3623 {
3624 	struct scsi_inquiry	*sd_inq;
3625 	int 			rval = SD_SUCCESS;
3626 
3627 	ASSERT(un != NULL);
3628 	sd_inq = un->un_sd->sd_inq;
3629 	ASSERT(id != NULL);
3630 
3631 	/*
3632 	 * We use the inq_vid as a pointer to a buffer containing the
3633 	 * vid and pid and use the entire vid/pid length of the table
3634 	 * entry for the comparison. This works because the inq_pid
3635 	 * data member follows inq_vid in the scsi_inquiry structure.
3636 	 */
3637 	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
3638 		/*
3639 		 * The user id string is compared to the inquiry vid/pid
3640 		 * using a case insensitive comparison and ignoring
3641 		 * multiple spaces.
3642 		 */
3643 		rval = sd_blank_cmp(un, id, idlen);
3644 		if (rval != SD_SUCCESS) {
3645 			/*
3646 			 * User id strings that start and end with a "*"
3647 			 * are a special case. These do not have a
3648 			 * specific vendor, and the product string can
3649 			 * appear anywhere in the 16 byte PID portion of
3650 			 * the inquiry data. This is a simple strstr()
3651 			 * type search for the user id in the inquiry data.
3652 			 */
3653 			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
3654 				char	*pidptr = &id[1];
3655 				int	i;
3656 				int	j;
3657 				int	pidstrlen = idlen - 2;
3658 				j = sizeof (SD_INQUIRY(un)->inq_pid) -
3659 				    pidstrlen;
3660 
3661 				if (j < 0) {
3662 					return (SD_FAILURE);
3663 				}
3664 				for (i = 0; i < j; i++) {
3665 					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
3666 					    pidptr, pidstrlen) == 0) {
3667 						rval = SD_SUCCESS;
3668 						break;
3669 					}
3670 				}
3671 			}
3672 		}
3673 	}
3674 	return (rval);
3675 }
3676 
3677 
3678 /*
3679  *    Function: sd_blank_cmp
3680  *
3681  * Description: If the id string starts and ends with a space, treat
3682  *		multiple consecutive spaces as equivalent to a single
3683  *		space. For example, this causes a sd_disk_table entry
3684  *		of " NEC CDROM " to match a device's id string of
3685  *		"NEC       CDROM".
3686  *
3687  *		Note: The success exit condition for this routine is if
3688  *		the pointer to the table entry is '\0' and the cnt of
3689  *		the inquiry length is zero. This will happen if the inquiry
3690  *		string returned by the device is padded with spaces to be
3691  *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
3692  *		SCSI spec states that the inquiry string is to be padded with
3693  *		spaces.
3694  *
3695  *   Arguments: un - driver soft state (unit) structure
3696  *		id - table or config file vid/pid
3697  *		idlen  - length of the vid/pid (bytes)
3698  *
3699  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
3700  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
3701  */
3702 
3703 static int
3704 sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
3705 {
3706 	char		*p1;
3707 	char		*p2;
3708 	int		cnt;
3709 	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
3710 	    sizeof (SD_INQUIRY(un)->inq_pid);
3711 
3712 	ASSERT(un != NULL);
3713 	p2 = un->un_sd->sd_inq->inq_vid;
3714 	ASSERT(id != NULL);
3715 	p1 = id;
3716 
3717 	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
3718 		/*
3719 		 * Note: string p1 is terminated by a NUL but string p2
3720 		 * isn't.  The end of p2 is determined by cnt.
3721 		 */
3722 		for (;;) {
3723 			/* skip over any extra blanks in both strings */
3724 			while ((*p1 != '\0') && (*p1 == ' ')) {
3725 				p1++;
3726 			}
3727 			while ((cnt != 0) && (*p2 == ' ')) {
3728 				p2++;
3729 				cnt--;
3730 			}
3731 
3732 			/* compare the two strings */
3733 			if ((cnt == 0) ||
3734 			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
3735 				break;
3736 			}
3737 			while ((cnt > 0) &&
3738 			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
3739 				p1++;
3740 				p2++;
3741 				cnt--;
3742 			}
3743 		}
3744 	}
3745 
3746 	/* return SD_SUCCESS if both strings match */
3747 	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
3748 }
3749 
3750 
3751 /*
3752  *    Function: sd_chk_vers1_data
3753  *
3754  * Description: Verify the version 1 device properties provided by the
3755  *		user via the configuration file
3756  *
3757  *   Arguments: un	     - driver soft state (unit) structure
3758  *		flags	     - integer mask indicating properties to be set
3759  *		prop_list    - integer list of property values
3760  *		list_len     - length of user provided data
3761  *
3762  * Return Code: SD_SUCCESS - Indicates the user provided data is valid
3763  *		SD_FAILURE - Indicates the user provided data is invalid
3764  */
3765 
3766 static int
3767 sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
3768     int list_len, char *dataname_ptr)
3769 {
3770 	int i;
3771 	int mask = 1;
3772 	int index = 0;
3773 
3774 	ASSERT(un != NULL);
3775 
3776 	/* Check for a NULL property name and list */
3777 	if (dataname_ptr == NULL) {
3778 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3779 		    "sd_chk_vers1_data: NULL data property name.");
3780 		return (SD_FAILURE);
3781 	}
3782 	if (prop_list == NULL) {
3783 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3784 		    "sd_chk_vers1_data: %s NULL data property list.",
3785 		    dataname_ptr);
3786 		return (SD_FAILURE);
3787 	}
3788 
3789 	/* Display a warning if undefined bits are set in the flags */
3790 	if (flags & ~SD_CONF_BIT_MASK) {
3791 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3792 		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
3793 		    "Properties not set.",
3794 		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
3795 		return (SD_FAILURE);
3796 	}
3797 
3798 	/*
3799 	 * Verify the length of the list by identifying the highest bit set
3800 	 * in the flags and validating that the property list has a length
3801 	 * up to the index of this bit.
3802 	 */
3803 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
3804 		if (flags & mask) {
3805 			index++;
3806 		}
3807 		mask = 1 << i;
3808 	}
3809 	if ((list_len / sizeof (int)) < (index + 2)) {
3810 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3811 		    "sd_chk_vers1_data: "
3812 		    "Data property list %s size is incorrect. "
3813 		    "Properties not set.", dataname_ptr);
3814 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
3815 		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
3816 		return (SD_FAILURE);
3817 	}
3818 	return (SD_SUCCESS);
3819 }
3820 
3821 
3822 /*
3823  *    Function: sd_set_vers1_properties
3824  *
3825  * Description: Set version 1 device properties based on a property list
3826  *		retrieved from the driver configuration file or static
3827  *		configuration table. Version 1 properties have the format:
3828  *
3829  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3830  *
3831  *		where the prop0 value will be used to set prop0 if bit0
3832  *		is set in the flags
3833  *
3834  *   Arguments: un	     - driver soft state (unit) structure
3835  *		flags	     - integer mask indicating properties to be set
3836  *		prop_list    - integer list of property values
3837  */
3838 
3839 static void
3840 sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
3841 {
3842 	ASSERT(un != NULL);
3843 
3844 	/*
3845 	 * Set the flag to indicate cache is to be disabled. An attempt
3846 	 * to disable the cache via sd_cache_control() will be made
3847 	 * later during attach once the basic initialization is complete.
3848 	 */
3849 	if (flags & SD_CONF_BSET_NOCACHE) {
3850 		un->un_f_opt_disable_cache = TRUE;
3851 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3852 		    "sd_set_vers1_properties: caching disabled flag set\n");
3853 	}
3854 
3855 	/* CD-specific configuration parameters */
3856 	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
3857 		un->un_f_cfg_playmsf_bcd = TRUE;
3858 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3859 		    "sd_set_vers1_properties: playmsf_bcd set\n");
3860 	}
3861 	if (flags & SD_CONF_BSET_READSUB_BCD) {
3862 		un->un_f_cfg_readsub_bcd = TRUE;
3863 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3864 		    "sd_set_vers1_properties: readsub_bcd set\n");
3865 	}
3866 	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
3867 		un->un_f_cfg_read_toc_trk_bcd = TRUE;
3868 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3869 		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
3870 	}
3871 	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
3872 		un->un_f_cfg_read_toc_addr_bcd = TRUE;
3873 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3874 		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
3875 	}
3876 	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
3877 		un->un_f_cfg_no_read_header = TRUE;
3878 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3879 			    "sd_set_vers1_properties: no_read_header set\n");
3880 	}
3881 	if (flags & SD_CONF_BSET_READ_CD_XD4) {
3882 		un->un_f_cfg_read_cd_xd4 = TRUE;
3883 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3884 		    "sd_set_vers1_properties: read_cd_xd4 set\n");
3885 	}
3886 
3887 	/* Support for devices which do not have valid/unique serial numbers */
3888 	if (flags & SD_CONF_BSET_FAB_DEVID) {
3889 		un->un_f_opt_fab_devid = TRUE;
3890 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3891 		    "sd_set_vers1_properties: fab_devid bit set\n");
3892 	}
3893 
3894 	/* Support for user throttle configuration */
3895 	if (flags & SD_CONF_BSET_THROTTLE) {
3896 		ASSERT(prop_list != NULL);
3897 		un->un_saved_throttle = un->un_throttle =
3898 		    prop_list->sdt_throttle;
3899 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3900 		    "sd_set_vers1_properties: throttle set to %d\n",
3901 		    prop_list->sdt_throttle);
3902 	}
3903 
3904 	/* Set the per disk retry count according to the conf file or table. */
3905 	if (flags & SD_CONF_BSET_NRR_COUNT) {
3906 		ASSERT(prop_list != NULL);
3907 		if (prop_list->sdt_not_rdy_retries) {
3908 			un->un_notready_retry_count =
3909 				prop_list->sdt_not_rdy_retries;
3910 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3911 			    "sd_set_vers1_properties: not ready retry count"
3912 			    " set to %d\n", un->un_notready_retry_count);
3913 		}
3914 	}
3915 
3916 	/* The controller type is reported for generic disk driver ioctls */
3917 	if (flags & SD_CONF_BSET_CTYPE) {
3918 		ASSERT(prop_list != NULL);
3919 		switch (prop_list->sdt_ctype) {
3920 		case CTYPE_CDROM:
3921 			un->un_ctype = prop_list->sdt_ctype;
3922 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3923 			    "sd_set_vers1_properties: ctype set to "
3924 			    "CTYPE_CDROM\n");
3925 			break;
3926 		case CTYPE_CCS:
3927 			un->un_ctype = prop_list->sdt_ctype;
3928 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3929 				"sd_set_vers1_properties: ctype set to "
3930 				"CTYPE_CCS\n");
3931 			break;
3932 		case CTYPE_ROD:		/* RW optical */
3933 			un->un_ctype = prop_list->sdt_ctype;
3934 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3935 			    "sd_set_vers1_properties: ctype set to "
3936 			    "CTYPE_ROD\n");
3937 			break;
3938 		default:
3939 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3940 			    "sd_set_vers1_properties: Could not set "
3941 			    "invalid ctype value (%d)",
3942 			    prop_list->sdt_ctype);
3943 		}
3944 	}
3945 
3946 	/* Purple failover timeout */
3947 	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
3948 		ASSERT(prop_list != NULL);
3949 		un->un_busy_retry_count =
3950 			prop_list->sdt_busy_retries;
3951 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3952 		    "sd_set_vers1_properties: "
3953 		    "busy retry count set to %d\n",
3954 		    un->un_busy_retry_count);
3955 	}
3956 
3957 	/* Purple reset retry count */
3958 	if (flags & SD_CONF_BSET_RST_RETRIES) {
3959 		ASSERT(prop_list != NULL);
3960 		un->un_reset_retry_count =
3961 			prop_list->sdt_reset_retries;
3962 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3963 		    "sd_set_vers1_properties: "
3964 		    "reset retry count set to %d\n",
3965 		    un->un_reset_retry_count);
3966 	}
3967 
3968 	/* Purple reservation release timeout */
3969 	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
3970 		ASSERT(prop_list != NULL);
3971 		un->un_reserve_release_time =
3972 			prop_list->sdt_reserv_rel_time;
3973 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3974 		    "sd_set_vers1_properties: "
3975 		    "reservation release timeout set to %d\n",
3976 		    un->un_reserve_release_time);
3977 	}
3978 
3979 	/*
3980 	 * Driver flag telling the driver to verify that no commands are pending
3981 	 * for a device before issuing a Test Unit Ready. This is a workaround
3982 	 * for a firmware bug in some Seagate eliteI drives.
3983 	 */
3984 	if (flags & SD_CONF_BSET_TUR_CHECK) {
3985 		un->un_f_cfg_tur_check = TRUE;
3986 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3987 		    "sd_set_vers1_properties: tur queue check set\n");
3988 	}
3989 
3990 	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
3991 		un->un_min_throttle = prop_list->sdt_min_throttle;
3992 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
3993 		    "sd_set_vers1_properties: min throttle set to %d\n",
3994 		    un->un_min_throttle);
3995 	}
3996 
3997 	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
3998 		un->un_f_disksort_disabled =
3999 		    (prop_list->sdt_disk_sort_dis != 0) ?
4000 		    TRUE : FALSE;
4001 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4002 		    "sd_set_vers1_properties: disksort disabled "
4003 		    "flag set to %d\n",
4004 		    prop_list->sdt_disk_sort_dis);
4005 	}
4006 
4007 	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
4008 		un->un_f_lun_reset_enabled =
4009 		    (prop_list->sdt_lun_reset_enable != 0) ?
4010 		    TRUE : FALSE;
4011 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4012 		    "sd_set_vers1_properties: lun reset enabled "
4013 		    "flag set to %d\n",
4014 		    prop_list->sdt_lun_reset_enable);
4015 	}
4016 
4017 	/*
4018 	 * Validate the throttle values.
4019 	 * If any of the numbers are invalid, set everything to defaults.
4020 	 */
4021 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4022 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4023 	    (un->un_min_throttle > un->un_throttle)) {
4024 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4025 		un->un_min_throttle = sd_min_throttle;
4026 	}
4027 }
4028 
4029 /*
4030  *   Function: sd_is_lsi()
4031  *
4032  *   Description: Check for lsi devices, step throught the static device
4033  *	table to match vid/pid.
4034  *
4035  *   Args: un - ptr to sd_lun
4036  *
4037  *   Notes:  When creating new LSI property, need to add the new LSI property
4038  *		to this function.
4039  */
4040 static void
4041 sd_is_lsi(struct sd_lun *un)
4042 {
4043 	char	*id = NULL;
4044 	int	table_index;
4045 	int	idlen;
4046 	void	*prop;
4047 
4048 	ASSERT(un != NULL);
4049 	for (table_index = 0; table_index < sd_disk_table_size;
4050 	    table_index++) {
4051 		id = sd_disk_table[table_index].device_id;
4052 		idlen = strlen(id);
4053 		if (idlen == 0) {
4054 			continue;
4055 		}
4056 
4057 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4058 			prop = sd_disk_table[table_index].properties;
4059 			if (prop == &lsi_properties ||
4060 			    prop == &lsi_oem_properties ||
4061 			    prop == &lsi_properties_scsi ||
4062 			    prop == &symbios_properties) {
4063 				un->un_f_cfg_is_lsi = TRUE;
4064 			}
4065 			break;
4066 		}
4067 	}
4068 }
4069 
4070 
4071 /*
4072  * The following routines support reading and interpretation of disk labels,
4073  * including Solaris BE (8-slice) vtoc's, Solaris LE (16-slice) vtoc's, and
4074  * fdisk tables.
4075  */
4076 
4077 /*
4078  *    Function: sd_validate_geometry
4079  *
4080  * Description: Read the label from the disk (if present). Update the unit's
4081  *		geometry and vtoc information from the data in the label.
4082  *		Verify that the label is valid.
4083  *
4084  *   Arguments: un - driver soft state (unit) structure
4085  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4086  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4087  *			to use the USCSI "direct" chain and bypass the normal
4088  *			command waitq.
4089  *
4090  * Return Code: 0 - Successful completion
4091  *		EINVAL  - Invalid value in un->un_tgt_blocksize or
4092  *			  un->un_blockcount; or label on disk is corrupted
4093  *			  or unreadable.
4094  *		EACCES  - Reservation conflict at the device.
4095  *		ENOMEM  - Resource allocation error
4096  *		ENOTSUP - geometry not applicable
4097  *
4098  *     Context: Kernel thread only (can sleep).
4099  */
4100 
4101 static int
4102 sd_validate_geometry(struct sd_lun *un, int path_flag)
4103 {
4104 	static	char		labelstring[128];
4105 	static	char		buf[256];
4106 	char	*label		= NULL;
4107 	int	label_error	= 0;
4108 	int	gvalid		= un->un_f_geometry_is_valid;
4109 	int	lbasize;
4110 	uint_t	capacity;
4111 	int	count;
4112 
4113 	ASSERT(un != NULL);
4114 	ASSERT(mutex_owned(SD_MUTEX(un)));
4115 
4116 	/*
4117 	 * If the required values are not valid, then try getting them
4118 	 * once via read capacity. If that fails, then fail this call.
4119 	 * This is necessary with the new mpxio failover behavior in
4120 	 * the T300 where we can get an attach for the inactive path
4121 	 * before the active path. The inactive path fails commands with
4122 	 * sense data of 02,04,88 which happens to the read capacity
4123 	 * before mpxio has had sufficient knowledge to know if it should
4124 	 * force a fail over or not. (Which it won't do at attach anyhow).
4125 	 * If the read capacity at attach time fails, un_tgt_blocksize and
4126 	 * un_blockcount won't be valid.
4127 	 */
4128 	if ((un->un_f_tgt_blocksize_is_valid != TRUE) ||
4129 	    (un->un_f_blockcount_is_valid != TRUE)) {
4130 		uint64_t	cap;
4131 		uint32_t	lbasz;
4132 		int		rval;
4133 
4134 		mutex_exit(SD_MUTEX(un));
4135 		rval = sd_send_scsi_READ_CAPACITY(un, &cap,
4136 		    &lbasz, SD_PATH_DIRECT);
4137 		mutex_enter(SD_MUTEX(un));
4138 		if (rval == 0) {
4139 			/*
4140 			 * The following relies on
4141 			 * sd_send_scsi_READ_CAPACITY never
4142 			 * returning 0 for capacity and/or lbasize.
4143 			 */
4144 			sd_update_block_info(un, lbasz, cap);
4145 		}
4146 
4147 		if ((un->un_f_tgt_blocksize_is_valid != TRUE) ||
4148 		    (un->un_f_blockcount_is_valid != TRUE)) {
4149 			return (EINVAL);
4150 		}
4151 	}
4152 
4153 	/*
4154 	 * Copy the lbasize and capacity so that if they're reset while we're
4155 	 * not holding the SD_MUTEX, we will continue to use valid values
4156 	 * after the SD_MUTEX is reacquired. (4119659)
4157 	 */
4158 	lbasize  = un->un_tgt_blocksize;
4159 	capacity = un->un_blockcount;
4160 
4161 #if defined(_SUNOS_VTOC_16)
4162 	/*
4163 	 * Set up the "whole disk" fdisk partition; this should always
4164 	 * exist, regardless of whether the disk contains an fdisk table
4165 	 * or vtoc.
4166 	 */
4167 	un->un_map[P0_RAW_DISK].dkl_cylno = 0;
4168 	un->un_map[P0_RAW_DISK].dkl_nblk  = capacity;
4169 #endif
4170 
4171 	/*
4172 	 * Refresh the logical and physical geometry caches.
4173 	 * (data from MODE SENSE format/rigid disk geometry pages,
4174 	 * and scsi_ifgetcap("geometry").
4175 	 */
4176 	sd_resync_geom_caches(un, capacity, lbasize, path_flag);
4177 
4178 	label_error = sd_use_efi(un, path_flag);
4179 	if (label_error == 0) {
4180 		/* found a valid EFI label */
4181 		SD_TRACE(SD_LOG_IO_PARTITION, un,
4182 			"sd_validate_geometry: found EFI label\n");
4183 		un->un_solaris_offset = 0;
4184 		un->un_solaris_size = capacity;
4185 		return (ENOTSUP);
4186 	}
4187 	if (un->un_blockcount > DK_MAX_BLOCKS) {
4188 		if (label_error == ESRCH) {
4189 			/*
4190 			 * they've configured a LUN over 1TB, but used
4191 			 * format.dat to restrict format's view of the
4192 			 * capacity to be under 1TB
4193 			 */
4194 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4195 "is >1TB and has a VTOC label: use format(1M) to either decrease the");
4196 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
4197 "size to be < 1TB or relabel the disk with an EFI label");
4198 		} else {
4199 			/* unlabeled disk over 1TB */
4200 			return (ENOTSUP);
4201 		}
4202 	}
4203 	label_error = 0;
4204 
4205 	/*
4206 	 * at this point it is either labeled with a VTOC or it is
4207 	 * under 1TB
4208 	 */
4209 	if (un->un_f_vtoc_label_supported) {
4210 		struct	dk_label *dkl;
4211 		offset_t dkl1;
4212 		offset_t label_addr, real_addr;
4213 		int	rval;
4214 		size_t	buffer_size;
4215 
4216 		/*
4217 		 * Note: This will set up un->un_solaris_size and
4218 		 * un->un_solaris_offset.
4219 		 */
4220 		switch (sd_read_fdisk(un, capacity, lbasize, path_flag)) {
4221 		case SD_CMD_RESERVATION_CONFLICT:
4222 			ASSERT(mutex_owned(SD_MUTEX(un)));
4223 			return (EACCES);
4224 		case SD_CMD_FAILURE:
4225 			ASSERT(mutex_owned(SD_MUTEX(un)));
4226 			return (ENOMEM);
4227 		}
4228 
4229 		if (un->un_solaris_size <= DK_LABEL_LOC) {
4230 			/*
4231 			 * Found fdisk table but no Solaris partition entry,
4232 			 * so don't call sd_uselabel() and don't create
4233 			 * a default label.
4234 			 */
4235 			label_error = 0;
4236 			un->un_f_geometry_is_valid = TRUE;
4237 			goto no_solaris_partition;
4238 		}
4239 		label_addr = (daddr_t)(un->un_solaris_offset + DK_LABEL_LOC);
4240 
4241 		/*
4242 		 * sys_blocksize != tgt_blocksize, need to re-adjust
4243 		 * blkno and save the index to beginning of dk_label
4244 		 */
4245 		real_addr = SD_SYS2TGTBLOCK(un, label_addr);
4246 		buffer_size = SD_REQBYTES2TGTBYTES(un,
4247 		    sizeof (struct dk_label));
4248 
4249 		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_validate_geometry: "
4250 		    "label_addr: 0x%x allocation size: 0x%x\n",
4251 		    label_addr, buffer_size);
4252 		dkl = kmem_zalloc(buffer_size, KM_NOSLEEP);
4253 		if (dkl == NULL) {
4254 			return (ENOMEM);
4255 		}
4256 
4257 		mutex_exit(SD_MUTEX(un));
4258 		rval = sd_send_scsi_READ(un, dkl, buffer_size, real_addr,
4259 		    path_flag);
4260 		mutex_enter(SD_MUTEX(un));
4261 
4262 		switch (rval) {
4263 		case 0:
4264 			/*
4265 			 * sd_uselabel will establish that the geometry
4266 			 * is valid.
4267 			 * For sys_blocksize != tgt_blocksize, need
4268 			 * to index into the beginning of dk_label
4269 			 */
4270 			dkl1 = (daddr_t)dkl
4271 				+ SD_TGTBYTEOFFSET(un, label_addr, real_addr);
4272 			if (sd_uselabel(un, (struct dk_label *)(uintptr_t)dkl1,
4273 			    path_flag) != SD_LABEL_IS_VALID) {
4274 				label_error = EINVAL;
4275 			}
4276 			break;
4277 		case EACCES:
4278 			label_error = EACCES;
4279 			break;
4280 		default:
4281 			label_error = EINVAL;
4282 			break;
4283 		}
4284 
4285 		kmem_free(dkl, buffer_size);
4286 
4287 #if defined(_SUNOS_VTOC_8)
4288 		label = (char *)un->un_asciilabel;
4289 #elif defined(_SUNOS_VTOC_16)
4290 		label = (char *)un->un_vtoc.v_asciilabel;
4291 #else
4292 #error "No VTOC format defined."
4293 #endif
4294 	}
4295 
4296 	/*
4297 	 * If a valid label was not found, AND if no reservation conflict
4298 	 * was detected, then go ahead and create a default label (4069506).
4299 	 */
4300 
4301 	if (un->un_f_default_vtoc_supported && (label_error != EACCES)) {
4302 		if (un->un_f_geometry_is_valid == FALSE) {
4303 			sd_build_default_label(un);
4304 		}
4305 		label_error = 0;
4306 	}
4307 
4308 no_solaris_partition:
4309 	if ((!un->un_f_has_removable_media ||
4310 	    (un->un_f_has_removable_media &&
4311 		un->un_mediastate == DKIO_EJECTED)) &&
4312 		(un->un_state == SD_STATE_NORMAL && !gvalid)) {
4313 		/*
4314 		 * Print out a message indicating who and what we are.
4315 		 * We do this only when we happen to really validate the
4316 		 * geometry. We may call sd_validate_geometry() at other
4317 		 * times, e.g., ioctl()'s like Get VTOC in which case we
4318 		 * don't want to print the label.
4319 		 * If the geometry is valid, print the label string,
4320 		 * else print vendor and product info, if available
4321 		 */
4322 		if ((un->un_f_geometry_is_valid == TRUE) && (label != NULL)) {
4323 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "?<%s>\n", label);
4324 		} else {
4325 			mutex_enter(&sd_label_mutex);
4326 			sd_inq_fill(SD_INQUIRY(un)->inq_vid, VIDMAX,
4327 			    labelstring);
4328 			sd_inq_fill(SD_INQUIRY(un)->inq_pid, PIDMAX,
4329 			    &labelstring[64]);
4330 			(void) sprintf(buf, "?Vendor '%s', product '%s'",
4331 			    labelstring, &labelstring[64]);
4332 			if (un->un_f_blockcount_is_valid == TRUE) {
4333 				(void) sprintf(&buf[strlen(buf)],
4334 				    ", %llu %u byte blocks\n",
4335 				    (longlong_t)un->un_blockcount,
4336 				    un->un_tgt_blocksize);
4337 			} else {
4338 				(void) sprintf(&buf[strlen(buf)],
4339 				    ", (unknown capacity)\n");
4340 			}
4341 			SD_INFO(SD_LOG_ATTACH_DETACH, un, buf);
4342 			mutex_exit(&sd_label_mutex);
4343 		}
4344 	}
4345 
4346 #if defined(_SUNOS_VTOC_16)
4347 	/*
4348 	 * If we have valid geometry, set up the remaining fdisk partitions.
4349 	 * Note that dkl_cylno is not used for the fdisk map entries, so
4350 	 * we set it to an entirely bogus value.
4351 	 */
4352 	for (count = 0; count < FD_NUMPART; count++) {
4353 		un->un_map[FDISK_P1 + count].dkl_cylno = -1;
4354 		un->un_map[FDISK_P1 + count].dkl_nblk =
4355 		    un->un_fmap[count].fmap_nblk;
4356 
4357 		un->un_offset[FDISK_P1 + count] =
4358 		    un->un_fmap[count].fmap_start;
4359 	}
4360 #endif
4361 
4362 	for (count = 0; count < NDKMAP; count++) {
4363 #if defined(_SUNOS_VTOC_8)
4364 		struct dk_map *lp  = &un->un_map[count];
4365 		un->un_offset[count] =
4366 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
4367 #elif defined(_SUNOS_VTOC_16)
4368 		struct dkl_partition *vp = &un->un_vtoc.v_part[count];
4369 
4370 		un->un_offset[count] = vp->p_start + un->un_solaris_offset;
4371 #else
4372 #error "No VTOC format defined."
4373 #endif
4374 	}
4375 
4376 	return (label_error);
4377 }
4378 
4379 
4380 #if defined(_SUNOS_VTOC_16)
4381 /*
4382  * Macro: MAX_BLKS
4383  *
4384  *	This macro is used for table entries where we need to have the largest
4385  *	possible sector value for that head & SPT (sectors per track)
4386  *	combination.  Other entries for some smaller disk sizes are set by
4387  *	convention to match those used by X86 BIOS usage.
4388  */
4389 #define	MAX_BLKS(heads, spt)	UINT16_MAX * heads * spt, heads, spt
4390 
4391 /*
4392  *    Function: sd_convert_geometry
4393  *
4394  * Description: Convert physical geometry into a dk_geom structure. In
4395  *		other words, make sure we don't wrap 16-bit values.
4396  *		e.g. converting from geom_cache to dk_geom
4397  *
4398  *     Context: Kernel thread only
4399  */
4400 static void
4401 sd_convert_geometry(uint64_t capacity, struct dk_geom *un_g)
4402 {
4403 	int i;
4404 	static const struct chs_values {
4405 		uint_t max_cap;		/* Max Capacity for this HS. */
4406 		uint_t nhead;		/* Heads to use. */
4407 		uint_t nsect;		/* SPT to use. */
4408 	} CHS_values[] = {
4409 		{0x00200000,  64, 32},		/* 1GB or smaller disk. */
4410 		{0x01000000, 128, 32},		/* 8GB or smaller disk. */
4411 		{MAX_BLKS(255,  63)},		/* 502.02GB or smaller disk. */
4412 		{MAX_BLKS(255, 126)},		/* .98TB or smaller disk. */
4413 		{DK_MAX_BLOCKS, 255, 189}	/* Max size is just under 1TB */
4414 	};
4415 
4416 	/* Unlabeled SCSI floppy device */
4417 	if (capacity <= 0x1000) {
4418 		un_g->dkg_nhead = 2;
4419 		un_g->dkg_ncyl = 80;
4420 		un_g->dkg_nsect = capacity / (un_g->dkg_nhead * un_g->dkg_ncyl);
4421 		return;
4422 	}
4423 
4424 	/*
4425 	 * For all devices we calculate cylinders using the
4426 	 * heads and sectors we assign based on capacity of the
4427 	 * device.  The table is designed to be compatible with the
4428 	 * way other operating systems lay out fdisk tables for X86
4429 	 * and to insure that the cylinders never exceed 65535 to
4430 	 * prevent problems with X86 ioctls that report geometry.
4431 	 * We use SPT that are multiples of 63, since other OSes that
4432 	 * are not limited to 16-bits for cylinders stop at 63 SPT
4433 	 * we make do by using multiples of 63 SPT.
4434 	 *
4435 	 * Note than capacities greater than or equal to 1TB will simply
4436 	 * get the largest geometry from the table. This should be okay
4437 	 * since disks this large shouldn't be using CHS values anyway.
4438 	 */
4439 	for (i = 0; CHS_values[i].max_cap < capacity &&
4440 	    CHS_values[i].max_cap != DK_MAX_BLOCKS; i++)
4441 		;
4442 
4443 	un_g->dkg_nhead = CHS_values[i].nhead;
4444 	un_g->dkg_nsect = CHS_values[i].nsect;
4445 }
4446 #endif
4447 
4448 
4449 /*
4450  *    Function: sd_resync_geom_caches
4451  *
4452  * Description: (Re)initialize both geometry caches: the virtual geometry
4453  *		information is extracted from the HBA (the "geometry"
4454  *		capability), and the physical geometry cache data is
4455  *		generated by issuing MODE SENSE commands.
4456  *
4457  *   Arguments: un - driver soft state (unit) structure
4458  *		capacity - disk capacity in #blocks
4459  *		lbasize - disk block size in bytes
4460  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4461  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4462  *			to use the USCSI "direct" chain and bypass the normal
4463  *			command waitq.
4464  *
4465  *     Context: Kernel thread only (can sleep).
4466  */
4467 
4468 static void
4469 sd_resync_geom_caches(struct sd_lun *un, int capacity, int lbasize,
4470 	int path_flag)
4471 {
4472 	struct 	geom_cache 	pgeom;
4473 	struct 	geom_cache	*pgeom_p = &pgeom;
4474 	int 	spc;
4475 	unsigned short nhead;
4476 	unsigned short nsect;
4477 
4478 	ASSERT(un != NULL);
4479 	ASSERT(mutex_owned(SD_MUTEX(un)));
4480 
4481 	/*
4482 	 * Ask the controller for its logical geometry.
4483 	 * Note: if the HBA does not support scsi_ifgetcap("geometry"),
4484 	 * then the lgeom cache will be invalid.
4485 	 */
4486 	sd_get_virtual_geometry(un, capacity, lbasize);
4487 
4488 	/*
4489 	 * Initialize the pgeom cache from lgeom, so that if MODE SENSE
4490 	 * doesn't work, DKIOCG_PHYSGEOM can return reasonable values.
4491 	 */
4492 	if (un->un_lgeom.g_nsect == 0 || un->un_lgeom.g_nhead == 0) {
4493 		/*
4494 		 * Note: Perhaps this needs to be more adaptive? The rationale
4495 		 * is that, if there's no HBA geometry from the HBA driver, any
4496 		 * guess is good, since this is the physical geometry. If MODE
4497 		 * SENSE fails this gives a max cylinder size for non-LBA access
4498 		 */
4499 		nhead = 255;
4500 		nsect = 63;
4501 	} else {
4502 		nhead = un->un_lgeom.g_nhead;
4503 		nsect = un->un_lgeom.g_nsect;
4504 	}
4505 
4506 	if (ISCD(un)) {
4507 		pgeom_p->g_nhead = 1;
4508 		pgeom_p->g_nsect = nsect * nhead;
4509 	} else {
4510 		pgeom_p->g_nhead = nhead;
4511 		pgeom_p->g_nsect = nsect;
4512 	}
4513 
4514 	spc = pgeom_p->g_nhead * pgeom_p->g_nsect;
4515 	pgeom_p->g_capacity = capacity;
4516 	pgeom_p->g_ncyl = pgeom_p->g_capacity / spc;
4517 	pgeom_p->g_acyl = 0;
4518 
4519 	/*
4520 	 * Retrieve fresh geometry data from the hardware, stash it
4521 	 * here temporarily before we rebuild the incore label.
4522 	 *
4523 	 * We want to use the MODE SENSE commands to derive the
4524 	 * physical geometry of the device, but if either command
4525 	 * fails, the logical geometry is used as the fallback for
4526 	 * disk label geometry.
4527 	 */
4528 	mutex_exit(SD_MUTEX(un));
4529 	sd_get_physical_geometry(un, pgeom_p, capacity, lbasize, path_flag);
4530 	mutex_enter(SD_MUTEX(un));
4531 
4532 	/*
4533 	 * Now update the real copy while holding the mutex. This
4534 	 * way the global copy is never in an inconsistent state.
4535 	 */
4536 	bcopy(pgeom_p, &un->un_pgeom,  sizeof (un->un_pgeom));
4537 
4538 	SD_INFO(SD_LOG_COMMON, un, "sd_resync_geom_caches: "
4539 	    "(cached from lgeom)\n");
4540 	SD_INFO(SD_LOG_COMMON, un,
4541 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
4542 	    un->un_pgeom.g_ncyl, un->un_pgeom.g_acyl,
4543 	    un->un_pgeom.g_nhead, un->un_pgeom.g_nsect);
4544 	SD_INFO(SD_LOG_COMMON, un, "   lbasize: %d; capacity: %ld; "
4545 	    "intrlv: %d; rpm: %d\n", un->un_pgeom.g_secsize,
4546 	    un->un_pgeom.g_capacity, un->un_pgeom.g_intrlv,
4547 	    un->un_pgeom.g_rpm);
4548 }
4549 
4550 
4551 /*
4552  *    Function: sd_read_fdisk
4553  *
4554  * Description: utility routine to read the fdisk table.
4555  *
4556  *   Arguments: un - driver soft state (unit) structure
4557  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4558  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4559  *			to use the USCSI "direct" chain and bypass the normal
4560  *			command waitq.
4561  *
4562  * Return Code: SD_CMD_SUCCESS
4563  *		SD_CMD_FAILURE
4564  *
4565  *     Context: Kernel thread only (can sleep).
4566  */
4567 /* ARGSUSED */
4568 static int
4569 sd_read_fdisk(struct sd_lun *un, uint_t capacity, int lbasize, int path_flag)
4570 {
4571 #if defined(_NO_FDISK_PRESENT)
4572 
4573 	un->un_solaris_offset = 0;
4574 	un->un_solaris_size = capacity;
4575 	bzero(un->un_fmap, sizeof (struct fmap) * FD_NUMPART);
4576 	return (SD_CMD_SUCCESS);
4577 
4578 #elif defined(_FIRMWARE_NEEDS_FDISK)
4579 
4580 	struct ipart	*fdp;
4581 	struct mboot	*mbp;
4582 	struct ipart	fdisk[FD_NUMPART];
4583 	int		i;
4584 	char		sigbuf[2];
4585 	caddr_t		bufp;
4586 	int		uidx;
4587 	int		rval;
4588 	int		lba = 0;
4589 	uint_t		solaris_offset;	/* offset to solaris part. */
4590 	daddr_t		solaris_size;	/* size of solaris partition */
4591 	uint32_t	blocksize;
4592 
4593 	ASSERT(un != NULL);
4594 	ASSERT(mutex_owned(SD_MUTEX(un)));
4595 	ASSERT(un->un_f_tgt_blocksize_is_valid == TRUE);
4596 
4597 	blocksize = un->un_tgt_blocksize;
4598 
4599 	/*
4600 	 * Start off assuming no fdisk table
4601 	 */
4602 	solaris_offset = 0;
4603 	solaris_size   = capacity;
4604 
4605 	mutex_exit(SD_MUTEX(un));
4606 	bufp = kmem_zalloc(blocksize, KM_SLEEP);
4607 	rval = sd_send_scsi_READ(un, bufp, blocksize, 0, path_flag);
4608 	mutex_enter(SD_MUTEX(un));
4609 
4610 	if (rval != 0) {
4611 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4612 		    "sd_read_fdisk: fdisk read err\n");
4613 		kmem_free(bufp, blocksize);
4614 		return (SD_CMD_FAILURE);
4615 	}
4616 
4617 	mbp = (struct mboot *)bufp;
4618 
4619 	/*
4620 	 * The fdisk table does not begin on a 4-byte boundary within the
4621 	 * master boot record, so we copy it to an aligned structure to avoid
4622 	 * alignment exceptions on some processors.
4623 	 */
4624 	bcopy(&mbp->parts[0], fdisk, sizeof (fdisk));
4625 
4626 	/*
4627 	 * Check for lba support before verifying sig; sig might not be
4628 	 * there, say on a blank disk, but the max_chs mark may still
4629 	 * be present.
4630 	 *
4631 	 * Note: LBA support and BEFs are an x86-only concept but this
4632 	 * code should work OK on SPARC as well.
4633 	 */
4634 
4635 	/*
4636 	 * First, check for lba-access-ok on root node (or prom root node)
4637 	 * if present there, don't need to search fdisk table.
4638 	 */
4639 	if (ddi_getprop(DDI_DEV_T_ANY, ddi_root_node(), 0,
4640 	    "lba-access-ok", 0) != 0) {
4641 		/* All drives do LBA; don't search fdisk table */
4642 		lba = 1;
4643 	} else {
4644 		/* Okay, look for mark in fdisk table */
4645 		for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) {
4646 			/* accumulate "lba" value from all partitions */
4647 			lba = (lba || sd_has_max_chs_vals(fdp));
4648 		}
4649 	}
4650 
4651 	if (lba != 0) {
4652 		dev_t dev = sd_make_device(SD_DEVINFO(un));
4653 
4654 		if (ddi_getprop(dev, SD_DEVINFO(un), DDI_PROP_DONTPASS,
4655 		    "lba-access-ok", 0) == 0) {
4656 			/* not found; create it */
4657 			if (ddi_prop_create(dev, SD_DEVINFO(un), 0,
4658 			    "lba-access-ok", (caddr_t)NULL, 0) !=
4659 			    DDI_PROP_SUCCESS) {
4660 				SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4661 				    "sd_read_fdisk: Can't create lba property "
4662 				    "for instance %d\n",
4663 				    ddi_get_instance(SD_DEVINFO(un)));
4664 			}
4665 		}
4666 	}
4667 
4668 	bcopy(&mbp->signature, sigbuf, sizeof (sigbuf));
4669 
4670 	/*
4671 	 * Endian-independent signature check
4672 	 */
4673 	if (((sigbuf[1] & 0xFF) != ((MBB_MAGIC >> 8) & 0xFF)) ||
4674 	    (sigbuf[0] != (MBB_MAGIC & 0xFF))) {
4675 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
4676 		    "sd_read_fdisk: no fdisk\n");
4677 		bzero(un->un_fmap, sizeof (struct fmap) * FD_NUMPART);
4678 		rval = SD_CMD_SUCCESS;
4679 		goto done;
4680 	}
4681 
4682 #ifdef SDDEBUG
4683 	if (sd_level_mask & SD_LOGMASK_INFO) {
4684 		fdp = fdisk;
4685 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_read_fdisk:\n");
4686 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "         relsect    "
4687 		    "numsect         sysid       bootid\n");
4688 		for (i = 0; i < FD_NUMPART; i++, fdp++) {
4689 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4690 			    "    %d:  %8d   %8d     0x%08x     0x%08x\n",
4691 			    i, fdp->relsect, fdp->numsect,
4692 			    fdp->systid, fdp->bootid);
4693 		}
4694 	}
4695 #endif
4696 
4697 	/*
4698 	 * Try to find the unix partition
4699 	 */
4700 	uidx = -1;
4701 	solaris_offset = 0;
4702 	solaris_size   = 0;
4703 
4704 	for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) {
4705 		int	relsect;
4706 		int	numsect;
4707 
4708 		if (fdp->numsect == 0) {
4709 			un->un_fmap[i].fmap_start = 0;
4710 			un->un_fmap[i].fmap_nblk  = 0;
4711 			continue;
4712 		}
4713 
4714 		/*
4715 		 * Data in the fdisk table is little-endian.
4716 		 */
4717 		relsect = LE_32(fdp->relsect);
4718 		numsect = LE_32(fdp->numsect);
4719 
4720 		un->un_fmap[i].fmap_start = relsect;
4721 		un->un_fmap[i].fmap_nblk  = numsect;
4722 
4723 		if (fdp->systid != SUNIXOS &&
4724 		    fdp->systid != SUNIXOS2 &&
4725 		    fdp->systid != EFI_PMBR) {
4726 			continue;
4727 		}
4728 
4729 		/*
4730 		 * use the last active solaris partition id found
4731 		 * (there should only be 1 active partition id)
4732 		 *
4733 		 * if there are no active solaris partition id
4734 		 * then use the first inactive solaris partition id
4735 		 */
4736 		if ((uidx == -1) || (fdp->bootid == ACTIVE)) {
4737 			uidx = i;
4738 			solaris_offset = relsect;
4739 			solaris_size   = numsect;
4740 		}
4741 	}
4742 
4743 	SD_INFO(SD_LOG_ATTACH_DETACH, un, "fdisk 0x%x 0x%lx",
4744 	    un->un_solaris_offset, un->un_solaris_size);
4745 
4746 	rval = SD_CMD_SUCCESS;
4747 
4748 done:
4749 
4750 	/*
4751 	 * Clear the VTOC info, only if the Solaris partition entry
4752 	 * has moved, changed size, been deleted, or if the size of
4753 	 * the partition is too small to even fit the label sector.
4754 	 */
4755 	if ((un->un_solaris_offset != solaris_offset) ||
4756 	    (un->un_solaris_size != solaris_size) ||
4757 	    solaris_size <= DK_LABEL_LOC) {
4758 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "fdisk moved 0x%x 0x%lx",
4759 			solaris_offset, solaris_size);
4760 		bzero(&un->un_g, sizeof (struct dk_geom));
4761 		bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
4762 		bzero(&un->un_map, NDKMAP * (sizeof (struct dk_map)));
4763 		un->un_f_geometry_is_valid = FALSE;
4764 	}
4765 	un->un_solaris_offset = solaris_offset;
4766 	un->un_solaris_size = solaris_size;
4767 	kmem_free(bufp, blocksize);
4768 	return (rval);
4769 
4770 #else	/* #elif defined(_FIRMWARE_NEEDS_FDISK) */
4771 #error "fdisk table presence undetermined for this platform."
4772 #endif	/* #if defined(_NO_FDISK_PRESENT) */
4773 }
4774 
4775 
4776 /*
4777  *    Function: sd_get_physical_geometry
4778  *
4779  * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4780  *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4781  *		target, and use this information to initialize the physical
4782  *		geometry cache specified by pgeom_p.
4783  *
4784  *		MODE SENSE is an optional command, so failure in this case
4785  *		does not necessarily denote an error. We want to use the
4786  *		MODE SENSE commands to derive the physical geometry of the
4787  *		device, but if either command fails, the logical geometry is
4788  *		used as the fallback for disk label geometry.
4789  *
4790  *		This requires that un->un_blockcount and un->un_tgt_blocksize
4791  *		have already been initialized for the current target and
4792  *		that the current values be passed as args so that we don't
4793  *		end up ever trying to use -1 as a valid value. This could
4794  *		happen if either value is reset while we're not holding
4795  *		the mutex.
4796  *
4797  *   Arguments: un - driver soft state (unit) structure
4798  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4799  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4800  *			to use the USCSI "direct" chain and bypass the normal
4801  *			command waitq.
4802  *
4803  *     Context: Kernel thread only (can sleep).
4804  */
4805 
4806 static void
4807 sd_get_physical_geometry(struct sd_lun *un, struct geom_cache *pgeom_p,
4808 	int capacity, int lbasize, int path_flag)
4809 {
4810 	struct	mode_format	*page3p;
4811 	struct	mode_geometry	*page4p;
4812 	struct	mode_header	*headerp;
4813 	int	sector_size;
4814 	int	nsect;
4815 	int	nhead;
4816 	int	ncyl;
4817 	int	intrlv;
4818 	int	spc;
4819 	int	modesense_capacity;
4820 	int	rpm;
4821 	int	bd_len;
4822 	int	mode_header_length;
4823 	uchar_t	*p3bufp;
4824 	uchar_t	*p4bufp;
4825 	int	cdbsize;
4826 
4827 	ASSERT(un != NULL);
4828 	ASSERT(!(mutex_owned(SD_MUTEX(un))));
4829 
4830 	if (un->un_f_blockcount_is_valid != TRUE) {
4831 		return;
4832 	}
4833 
4834 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
4835 		return;
4836 	}
4837 
4838 	if (lbasize == 0) {
4839 		if (ISCD(un)) {
4840 			lbasize = 2048;
4841 		} else {
4842 			lbasize = un->un_sys_blocksize;
4843 		}
4844 	}
4845 	pgeom_p->g_secsize = (unsigned short)lbasize;
4846 
4847 	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4848 
4849 	/*
4850 	 * Retrieve MODE SENSE page 3 - Format Device Page
4851 	 */
4852 	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4853 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p3bufp,
4854 	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag)
4855 	    != 0) {
4856 		SD_ERROR(SD_LOG_COMMON, un,
4857 		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4858 		goto page3_exit;
4859 	}
4860 
4861 	/*
4862 	 * Determine size of Block Descriptors in order to locate the mode
4863 	 * page data.  ATAPI devices return 0, SCSI devices should return
4864 	 * MODE_BLK_DESC_LENGTH.
4865 	 */
4866 	headerp = (struct mode_header *)p3bufp;
4867 	if (un->un_f_cfg_is_atapi == TRUE) {
4868 		struct mode_header_grp2 *mhp =
4869 		    (struct mode_header_grp2 *)headerp;
4870 		mode_header_length = MODE_HEADER_LENGTH_GRP2;
4871 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4872 	} else {
4873 		mode_header_length = MODE_HEADER_LENGTH;
4874 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4875 	}
4876 
4877 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4878 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4879 		    "received unexpected bd_len of %d, page3\n", bd_len);
4880 		goto page3_exit;
4881 	}
4882 
4883 	page3p = (struct mode_format *)
4884 	    ((caddr_t)headerp + mode_header_length + bd_len);
4885 
4886 	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
4887 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4888 		    "mode sense pg3 code mismatch %d\n",
4889 		    page3p->mode_page.code);
4890 		goto page3_exit;
4891 	}
4892 
4893 	/*
4894 	 * Use this physical geometry data only if BOTH MODE SENSE commands
4895 	 * complete successfully; otherwise, revert to the logical geometry.
4896 	 * So, we need to save everything in temporary variables.
4897 	 */
4898 	sector_size = BE_16(page3p->data_bytes_sect);
4899 
4900 	/*
4901 	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
4902 	 */
4903 	if (sector_size == 0) {
4904 		sector_size = (ISCD(un)) ? 2048 : un->un_sys_blocksize;
4905 	} else {
4906 		sector_size &= ~(un->un_sys_blocksize - 1);
4907 	}
4908 
4909 	nsect  = BE_16(page3p->sect_track);
4910 	intrlv = BE_16(page3p->interleave);
4911 
4912 	SD_INFO(SD_LOG_COMMON, un,
4913 	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
4914 	SD_INFO(SD_LOG_COMMON, un,
4915 	    "   mode page: %d; nsect: %d; sector size: %d;\n",
4916 	    page3p->mode_page.code, nsect, sector_size);
4917 	SD_INFO(SD_LOG_COMMON, un,
4918 	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
4919 	    BE_16(page3p->track_skew),
4920 	    BE_16(page3p->cylinder_skew));
4921 
4922 
4923 	/*
4924 	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
4925 	 */
4926 	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
4927 	if (sd_send_scsi_MODE_SENSE(un, cdbsize, p4bufp,
4928 	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag)
4929 	    != 0) {
4930 		SD_ERROR(SD_LOG_COMMON, un,
4931 		    "sd_get_physical_geometry: mode sense page 4 failed\n");
4932 		goto page4_exit;
4933 	}
4934 
4935 	/*
4936 	 * Determine size of Block Descriptors in order to locate the mode
4937 	 * page data.  ATAPI devices return 0, SCSI devices should return
4938 	 * MODE_BLK_DESC_LENGTH.
4939 	 */
4940 	headerp = (struct mode_header *)p4bufp;
4941 	if (un->un_f_cfg_is_atapi == TRUE) {
4942 		struct mode_header_grp2 *mhp =
4943 		    (struct mode_header_grp2 *)headerp;
4944 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4945 	} else {
4946 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4947 	}
4948 
4949 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4950 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4951 		    "received unexpected bd_len of %d, page4\n", bd_len);
4952 		goto page4_exit;
4953 	}
4954 
4955 	page4p = (struct mode_geometry *)
4956 	    ((caddr_t)headerp + mode_header_length + bd_len);
4957 
4958 	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
4959 		SD_ERROR(SD_LOG_COMMON, un, "sd_get_physical_geometry: "
4960 		    "mode sense pg4 code mismatch %d\n",
4961 		    page4p->mode_page.code);
4962 		goto page4_exit;
4963 	}
4964 
4965 	/*
4966 	 * Stash the data now, after we know that both commands completed.
4967 	 */
4968 
4969 	mutex_enter(SD_MUTEX(un));
4970 
4971 	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
4972 	spc   = nhead * nsect;
4973 	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
4974 	rpm   = BE_16(page4p->rpm);
4975 
4976 	modesense_capacity = spc * ncyl;
4977 
4978 	SD_INFO(SD_LOG_COMMON, un,
4979 	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
4980 	SD_INFO(SD_LOG_COMMON, un,
4981 	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
4982 	SD_INFO(SD_LOG_COMMON, un,
4983 	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
4984 	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
4985 	    (void *)pgeom_p, capacity);
4986 
4987 	/*
4988 	 * Compensate if the drive's geometry is not rectangular, i.e.,
4989 	 * the product of C * H * S returned by MODE SENSE >= that returned
4990 	 * by read capacity. This is an idiosyncrasy of the original x86
4991 	 * disk subsystem.
4992 	 */
4993 	if (modesense_capacity >= capacity) {
4994 		SD_INFO(SD_LOG_COMMON, un,
4995 		    "sd_get_physical_geometry: adjusting acyl; "
4996 		    "old: %d; new: %d\n", pgeom_p->g_acyl,
4997 		    (modesense_capacity - capacity + spc - 1) / spc);
4998 		if (sector_size != 0) {
4999 			/* 1243403: NEC D38x7 drives don't support sec size */
5000 			pgeom_p->g_secsize = (unsigned short)sector_size;
5001 		}
5002 		pgeom_p->g_nsect    = (unsigned short)nsect;
5003 		pgeom_p->g_nhead    = (unsigned short)nhead;
5004 		pgeom_p->g_capacity = capacity;
5005 		pgeom_p->g_acyl	    =
5006 		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
5007 		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
5008 	}
5009 
5010 	pgeom_p->g_rpm    = (unsigned short)rpm;
5011 	pgeom_p->g_intrlv = (unsigned short)intrlv;
5012 
5013 	SD_INFO(SD_LOG_COMMON, un,
5014 	    "sd_get_physical_geometry: mode sense geometry:\n");
5015 	SD_INFO(SD_LOG_COMMON, un,
5016 	    "   nsect: %d; sector size: %d; interlv: %d\n",
5017 	    nsect, sector_size, intrlv);
5018 	SD_INFO(SD_LOG_COMMON, un,
5019 	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
5020 	    nhead, ncyl, rpm, modesense_capacity);
5021 	SD_INFO(SD_LOG_COMMON, un,
5022 	    "sd_get_physical_geometry: (cached)\n");
5023 	SD_INFO(SD_LOG_COMMON, un,
5024 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5025 	    un->un_pgeom.g_ncyl,  un->un_pgeom.g_acyl,
5026 	    un->un_pgeom.g_nhead, un->un_pgeom.g_nsect);
5027 	SD_INFO(SD_LOG_COMMON, un,
5028 	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
5029 	    un->un_pgeom.g_secsize, un->un_pgeom.g_capacity,
5030 	    un->un_pgeom.g_intrlv, un->un_pgeom.g_rpm);
5031 
5032 	mutex_exit(SD_MUTEX(un));
5033 
5034 page4_exit:
5035 	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
5036 page3_exit:
5037 	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
5038 }
5039 
5040 
5041 /*
5042  *    Function: sd_get_virtual_geometry
5043  *
5044  * Description: Ask the controller to tell us about the target device.
5045  *
5046  *   Arguments: un - pointer to softstate
5047  *		capacity - disk capacity in #blocks
5048  *		lbasize - disk block size in bytes
5049  *
5050  *     Context: Kernel thread only
5051  */
5052 
5053 static void
5054 sd_get_virtual_geometry(struct sd_lun *un, int capacity, int lbasize)
5055 {
5056 	struct	geom_cache 	*lgeom_p = &un->un_lgeom;
5057 	uint_t	geombuf;
5058 	int	spc;
5059 
5060 	ASSERT(un != NULL);
5061 	ASSERT(mutex_owned(SD_MUTEX(un)));
5062 
5063 	mutex_exit(SD_MUTEX(un));
5064 
5065 	/* Set sector size, and total number of sectors */
5066 	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
5067 	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
5068 
5069 	/* Let the HBA tell us its geometry */
5070 	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
5071 
5072 	mutex_enter(SD_MUTEX(un));
5073 
5074 	/* A value of -1 indicates an undefined "geometry" property */
5075 	if (geombuf == (-1)) {
5076 		return;
5077 	}
5078 
5079 	/* Initialize the logical geometry cache. */
5080 	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
5081 	lgeom_p->g_nsect   = geombuf & 0xffff;
5082 	lgeom_p->g_secsize = un->un_sys_blocksize;
5083 
5084 	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
5085 
5086 	/*
5087 	 * Note: The driver originally converted the capacity value from
5088 	 * target blocks to system blocks. However, the capacity value passed
5089 	 * to this routine is already in terms of system blocks (this scaling
5090 	 * is done when the READ CAPACITY command is issued and processed).
5091 	 * This 'error' may have gone undetected because the usage of g_ncyl
5092 	 * (which is based upon g_capacity) is very limited within the driver
5093 	 */
5094 	lgeom_p->g_capacity = capacity;
5095 
5096 	/*
5097 	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
5098 	 * hba may return zero values if the device has been removed.
5099 	 */
5100 	if (spc == 0) {
5101 		lgeom_p->g_ncyl = 0;
5102 	} else {
5103 		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
5104 	}
5105 	lgeom_p->g_acyl = 0;
5106 
5107 	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
5108 	SD_INFO(SD_LOG_COMMON, un,
5109 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5110 	    un->un_lgeom.g_ncyl,  un->un_lgeom.g_acyl,
5111 	    un->un_lgeom.g_nhead, un->un_lgeom.g_nsect);
5112 	SD_INFO(SD_LOG_COMMON, un, "   lbasize: %d; capacity: %ld; "
5113 	    "intrlv: %d; rpm: %d\n", un->un_lgeom.g_secsize,
5114 	    un->un_lgeom.g_capacity, un->un_lgeom.g_intrlv, un->un_lgeom.g_rpm);
5115 }
5116 
5117 
5118 /*
5119  *    Function: sd_update_block_info
5120  *
5121  * Description: Calculate a byte count to sector count bitshift value
5122  *		from sector size.
5123  *
5124  *   Arguments: un: unit struct.
5125  *		lbasize: new target sector size
5126  *		capacity: new target capacity, ie. block count
5127  *
5128  *     Context: Kernel thread context
5129  */
5130 
5131 static void
5132 sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
5133 {
5134 	if (lbasize != 0) {
5135 		un->un_tgt_blocksize = lbasize;
5136 		un->un_f_tgt_blocksize_is_valid	= TRUE;
5137 	}
5138 
5139 	if (capacity != 0) {
5140 		un->un_blockcount		= capacity;
5141 		un->un_f_blockcount_is_valid	= TRUE;
5142 	}
5143 }
5144 
5145 
5146 static void
5147 sd_swap_efi_gpt(efi_gpt_t *e)
5148 {
5149 	_NOTE(ASSUMING_PROTECTED(*e))
5150 	e->efi_gpt_Signature = LE_64(e->efi_gpt_Signature);
5151 	e->efi_gpt_Revision = LE_32(e->efi_gpt_Revision);
5152 	e->efi_gpt_HeaderSize = LE_32(e->efi_gpt_HeaderSize);
5153 	e->efi_gpt_HeaderCRC32 = LE_32(e->efi_gpt_HeaderCRC32);
5154 	e->efi_gpt_MyLBA = LE_64(e->efi_gpt_MyLBA);
5155 	e->efi_gpt_AlternateLBA = LE_64(e->efi_gpt_AlternateLBA);
5156 	e->efi_gpt_FirstUsableLBA = LE_64(e->efi_gpt_FirstUsableLBA);
5157 	e->efi_gpt_LastUsableLBA = LE_64(e->efi_gpt_LastUsableLBA);
5158 	UUID_LE_CONVERT(e->efi_gpt_DiskGUID, e->efi_gpt_DiskGUID);
5159 	e->efi_gpt_PartitionEntryLBA = LE_64(e->efi_gpt_PartitionEntryLBA);
5160 	e->efi_gpt_NumberOfPartitionEntries =
5161 	    LE_32(e->efi_gpt_NumberOfPartitionEntries);
5162 	e->efi_gpt_SizeOfPartitionEntry =
5163 	    LE_32(e->efi_gpt_SizeOfPartitionEntry);
5164 	e->efi_gpt_PartitionEntryArrayCRC32 =
5165 	    LE_32(e->efi_gpt_PartitionEntryArrayCRC32);
5166 }
5167 
5168 static void
5169 sd_swap_efi_gpe(int nparts, efi_gpe_t *p)
5170 {
5171 	int i;
5172 
5173 	_NOTE(ASSUMING_PROTECTED(*p))
5174 	for (i = 0; i < nparts; i++) {
5175 		UUID_LE_CONVERT(p[i].efi_gpe_PartitionTypeGUID,
5176 		    p[i].efi_gpe_PartitionTypeGUID);
5177 		p[i].efi_gpe_StartingLBA = LE_64(p[i].efi_gpe_StartingLBA);
5178 		p[i].efi_gpe_EndingLBA = LE_64(p[i].efi_gpe_EndingLBA);
5179 		/* PartitionAttrs */
5180 	}
5181 }
5182 
5183 static int
5184 sd_validate_efi(efi_gpt_t *labp)
5185 {
5186 	if (labp->efi_gpt_Signature != EFI_SIGNATURE)
5187 		return (EINVAL);
5188 	/* at least 96 bytes in this version of the spec. */
5189 	if (sizeof (efi_gpt_t) - sizeof (labp->efi_gpt_Reserved2) >
5190 	    labp->efi_gpt_HeaderSize)
5191 		return (EINVAL);
5192 	/* this should be 128 bytes */
5193 	if (labp->efi_gpt_SizeOfPartitionEntry != sizeof (efi_gpe_t))
5194 		return (EINVAL);
5195 	return (0);
5196 }
5197 
5198 static int
5199 sd_use_efi(struct sd_lun *un, int path_flag)
5200 {
5201 	int		i;
5202 	int		rval = 0;
5203 	efi_gpe_t	*partitions;
5204 	uchar_t		*buf;
5205 	uint_t		lbasize;
5206 	uint64_t	cap;
5207 	uint_t		nparts;
5208 	diskaddr_t	gpe_lba;
5209 
5210 	ASSERT(mutex_owned(SD_MUTEX(un)));
5211 	lbasize = un->un_tgt_blocksize;
5212 
5213 	mutex_exit(SD_MUTEX(un));
5214 
5215 	buf = kmem_zalloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP);
5216 
5217 	if (un->un_tgt_blocksize != un->un_sys_blocksize) {
5218 		rval = EINVAL;
5219 		goto done_err;
5220 	}
5221 
5222 	rval = sd_send_scsi_READ(un, buf, lbasize, 0, path_flag);
5223 	if (rval) {
5224 		goto done_err;
5225 	}
5226 	if (((struct dk_label *)buf)->dkl_magic == DKL_MAGIC) {
5227 		/* not ours */
5228 		rval = ESRCH;
5229 		goto done_err;
5230 	}
5231 
5232 	rval = sd_send_scsi_READ(un, buf, lbasize, 1, path_flag);
5233 	if (rval) {
5234 		goto done_err;
5235 	}
5236 	sd_swap_efi_gpt((efi_gpt_t *)buf);
5237 
5238 	if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0) {
5239 		/*
5240 		 * Couldn't read the primary, try the backup.  Our
5241 		 * capacity at this point could be based on CHS, so
5242 		 * check what the device reports.
5243 		 */
5244 		rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
5245 		    path_flag);
5246 		if (rval) {
5247 			goto done_err;
5248 		}
5249 
5250 		/*
5251 		 * The MMC standard allows READ CAPACITY to be
5252 		 * inaccurate by a bounded amount (in the interest of
5253 		 * response latency).  As a result, failed READs are
5254 		 * commonplace (due to the reading of metadata and not
5255 		 * data). Depending on the per-Vendor/drive Sense data,
5256 		 * the failed READ can cause many (unnecessary) retries.
5257 		 */
5258 		if ((rval = sd_send_scsi_READ(un, buf, lbasize,
5259 		    cap - 1, (ISCD(un)) ? SD_PATH_DIRECT_PRIORITY :
5260 			path_flag)) != 0) {
5261 				goto done_err;
5262 		}
5263 
5264 		sd_swap_efi_gpt((efi_gpt_t *)buf);
5265 		if ((rval = sd_validate_efi((efi_gpt_t *)buf)) != 0)
5266 			goto done_err;
5267 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5268 		    "primary label corrupt; using backup\n");
5269 	}
5270 
5271 	nparts = ((efi_gpt_t *)buf)->efi_gpt_NumberOfPartitionEntries;
5272 	gpe_lba = ((efi_gpt_t *)buf)->efi_gpt_PartitionEntryLBA;
5273 
5274 	rval = sd_send_scsi_READ(un, buf, EFI_MIN_ARRAY_SIZE, gpe_lba,
5275 	    path_flag);
5276 	if (rval) {
5277 		goto done_err;
5278 	}
5279 	partitions = (efi_gpe_t *)buf;
5280 
5281 	if (nparts > MAXPART) {
5282 		nparts = MAXPART;
5283 	}
5284 	sd_swap_efi_gpe(nparts, partitions);
5285 
5286 	mutex_enter(SD_MUTEX(un));
5287 
5288 	/* Fill in partition table. */
5289 	for (i = 0; i < nparts; i++) {
5290 		if (partitions->efi_gpe_StartingLBA != 0 ||
5291 		    partitions->efi_gpe_EndingLBA != 0) {
5292 			un->un_map[i].dkl_cylno =
5293 			    partitions->efi_gpe_StartingLBA;
5294 			un->un_map[i].dkl_nblk =
5295 			    partitions->efi_gpe_EndingLBA -
5296 			    partitions->efi_gpe_StartingLBA + 1;
5297 			un->un_offset[i] =
5298 			    partitions->efi_gpe_StartingLBA;
5299 		}
5300 		if (i == WD_NODE) {
5301 			/*
5302 			 * minor number 7 corresponds to the whole disk
5303 			 */
5304 			un->un_map[i].dkl_cylno = 0;
5305 			un->un_map[i].dkl_nblk = un->un_blockcount;
5306 			un->un_offset[i] = 0;
5307 		}
5308 		partitions++;
5309 	}
5310 	un->un_solaris_offset = 0;
5311 	un->un_solaris_size = cap;
5312 	un->un_f_geometry_is_valid = TRUE;
5313 
5314 	/* clear the vtoc label */
5315 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
5316 
5317 	kmem_free(buf, EFI_MIN_ARRAY_SIZE);
5318 	return (0);
5319 
5320 done_err:
5321 	kmem_free(buf, EFI_MIN_ARRAY_SIZE);
5322 	mutex_enter(SD_MUTEX(un));
5323 	/*
5324 	 * if we didn't find something that could look like a VTOC
5325 	 * and the disk is over 1TB, we know there isn't a valid label.
5326 	 * Otherwise let sd_uselabel decide what to do.  We only
5327 	 * want to invalidate this if we're certain the label isn't
5328 	 * valid because sd_prop_op will now fail, which in turn
5329 	 * causes things like opens and stats on the partition to fail.
5330 	 */
5331 	if ((un->un_blockcount > DK_MAX_BLOCKS) && (rval != ESRCH)) {
5332 		un->un_f_geometry_is_valid = FALSE;
5333 	}
5334 	return (rval);
5335 }
5336 
5337 
5338 /*
5339  *    Function: sd_uselabel
5340  *
5341  * Description: Validate the disk label and update the relevant data (geometry,
5342  *		partition, vtoc, and capacity data) in the sd_lun struct.
5343  *		Marks the geometry of the unit as being valid.
5344  *
5345  *   Arguments: un: unit struct.
5346  *		dk_label: disk label
5347  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
5348  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
5349  *			to use the USCSI "direct" chain and bypass the normal
5350  *			command waitq.
5351  *
5352  * Return Code: SD_LABEL_IS_VALID: Label read from disk is OK; geometry,
5353  *		partition, vtoc, and capacity data are good.
5354  *
5355  *		SD_LABEL_IS_INVALID: Magic number or checksum error in the
5356  *		label; or computed capacity does not jibe with capacity
5357  *		reported from the READ CAPACITY command.
5358  *
5359  *     Context: Kernel thread only (can sleep).
5360  */
5361 
5362 static int
5363 sd_uselabel(struct sd_lun *un, struct dk_label *labp, int path_flag)
5364 {
5365 	short	*sp;
5366 	short	sum;
5367 	short	count;
5368 	int	label_error = SD_LABEL_IS_VALID;
5369 	int	i;
5370 	int	capacity;
5371 	int	part_end;
5372 	int	track_capacity;
5373 	int	err;
5374 #if defined(_SUNOS_VTOC_16)
5375 	struct	dkl_partition	*vpartp;
5376 #endif
5377 	ASSERT(un != NULL);
5378 	ASSERT(mutex_owned(SD_MUTEX(un)));
5379 
5380 	/* Validate the magic number of the label. */
5381 	if (labp->dkl_magic != DKL_MAGIC) {
5382 #if defined(__sparc)
5383 		if ((un->un_state == SD_STATE_NORMAL) &&
5384 			un->un_f_vtoc_errlog_supported) {
5385 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5386 			    "Corrupt label; wrong magic number\n");
5387 		}
5388 #endif
5389 		return (SD_LABEL_IS_INVALID);
5390 	}
5391 
5392 	/* Validate the checksum of the label. */
5393 	sp  = (short *)labp;
5394 	sum = 0;
5395 	count = sizeof (struct dk_label) / sizeof (short);
5396 	while (count--)	 {
5397 		sum ^= *sp++;
5398 	}
5399 
5400 	if (sum != 0) {
5401 #if	defined(_SUNOS_VTOC_16)
5402 		if ((un->un_state == SD_STATE_NORMAL) && !ISCD(un)) {
5403 #elif defined(_SUNOS_VTOC_8)
5404 		if ((un->un_state == SD_STATE_NORMAL) &&
5405 		    un->un_f_vtoc_errlog_supported) {
5406 #endif
5407 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5408 			    "Corrupt label - label checksum failed\n");
5409 		}
5410 		return (SD_LABEL_IS_INVALID);
5411 	}
5412 
5413 
5414 	/*
5415 	 * Fill in geometry structure with data from label.
5416 	 */
5417 	bzero(&un->un_g, sizeof (struct dk_geom));
5418 	un->un_g.dkg_ncyl   = labp->dkl_ncyl;
5419 	un->un_g.dkg_acyl   = labp->dkl_acyl;
5420 	un->un_g.dkg_bcyl   = 0;
5421 	un->un_g.dkg_nhead  = labp->dkl_nhead;
5422 	un->un_g.dkg_nsect  = labp->dkl_nsect;
5423 	un->un_g.dkg_intrlv = labp->dkl_intrlv;
5424 
5425 #if defined(_SUNOS_VTOC_8)
5426 	un->un_g.dkg_gap1   = labp->dkl_gap1;
5427 	un->un_g.dkg_gap2   = labp->dkl_gap2;
5428 	un->un_g.dkg_bhead  = labp->dkl_bhead;
5429 #endif
5430 #if defined(_SUNOS_VTOC_16)
5431 	un->un_dkg_skew = labp->dkl_skew;
5432 #endif
5433 
5434 #if defined(__i386) || defined(__amd64)
5435 	un->un_g.dkg_apc = labp->dkl_apc;
5436 #endif
5437 
5438 	/*
5439 	 * Currently we rely on the values in the label being accurate. If
5440 	 * dlk_rpm or dlk_pcly are zero in the label, use a default value.
5441 	 *
5442 	 * Note: In the future a MODE SENSE may be used to retrieve this data,
5443 	 * although this command is optional in SCSI-2.
5444 	 */
5445 	un->un_g.dkg_rpm  = (labp->dkl_rpm  != 0) ? labp->dkl_rpm  : 3600;
5446 	un->un_g.dkg_pcyl = (labp->dkl_pcyl != 0) ? labp->dkl_pcyl :
5447 	    (un->un_g.dkg_ncyl + un->un_g.dkg_acyl);
5448 
5449 	/*
5450 	 * The Read and Write reinstruct values may not be valid
5451 	 * for older disks.
5452 	 */
5453 	un->un_g.dkg_read_reinstruct  = labp->dkl_read_reinstruct;
5454 	un->un_g.dkg_write_reinstruct = labp->dkl_write_reinstruct;
5455 
5456 	/* Fill in partition table. */
5457 #if defined(_SUNOS_VTOC_8)
5458 	for (i = 0; i < NDKMAP; i++) {
5459 		un->un_map[i].dkl_cylno = labp->dkl_map[i].dkl_cylno;
5460 		un->un_map[i].dkl_nblk  = labp->dkl_map[i].dkl_nblk;
5461 	}
5462 #endif
5463 #if  defined(_SUNOS_VTOC_16)
5464 	vpartp		= labp->dkl_vtoc.v_part;
5465 	track_capacity	= labp->dkl_nhead * labp->dkl_nsect;
5466 
5467 	/* Prevent divide by zero */
5468 	if (track_capacity == 0) {
5469 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5470 		    "Corrupt label - zero nhead or nsect value\n");
5471 
5472 		return (SD_LABEL_IS_INVALID);
5473 	}
5474 
5475 	for (i = 0; i < NDKMAP; i++, vpartp++) {
5476 		un->un_map[i].dkl_cylno = vpartp->p_start / track_capacity;
5477 		un->un_map[i].dkl_nblk  = vpartp->p_size;
5478 	}
5479 #endif
5480 
5481 	/* Fill in VTOC Structure. */
5482 	bcopy(&labp->dkl_vtoc, &un->un_vtoc, sizeof (struct dk_vtoc));
5483 #if defined(_SUNOS_VTOC_8)
5484 	/*
5485 	 * The 8-slice vtoc does not include the ascii label; save it into
5486 	 * the device's soft state structure here.
5487 	 */
5488 	bcopy(labp->dkl_asciilabel, un->un_asciilabel, LEN_DKL_ASCII);
5489 #endif
5490 
5491 	/* Now look for a valid capacity. */
5492 	track_capacity	= (un->un_g.dkg_nhead * un->un_g.dkg_nsect);
5493 	capacity	= (un->un_g.dkg_ncyl  * track_capacity);
5494 
5495 	if (un->un_g.dkg_acyl) {
5496 #if defined(__i386) || defined(__amd64)
5497 		/* we may have > 1 alts cylinder */
5498 		capacity += (track_capacity * un->un_g.dkg_acyl);
5499 #else
5500 		capacity += track_capacity;
5501 #endif
5502 	}
5503 
5504 	/*
5505 	 * Force check here to ensure the computed capacity is valid.
5506 	 * If capacity is zero, it indicates an invalid label and
5507 	 * we should abort updating the relevant data then.
5508 	 */
5509 	if (capacity == 0) {
5510 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5511 		    "Corrupt label - no valid capacity could be retrieved\n");
5512 
5513 		return (SD_LABEL_IS_INVALID);
5514 	}
5515 
5516 	/* Mark the geometry as valid. */
5517 	un->un_f_geometry_is_valid = TRUE;
5518 
5519 	/*
5520 	 * At this point, un->un_blockcount should contain valid data from
5521 	 * the READ CAPACITY command.
5522 	 */
5523 	if (un->un_f_blockcount_is_valid != TRUE) {
5524 		/*
5525 		 * We have a situation where the target didn't give us a good
5526 		 * READ CAPACITY value, yet there appears to be a valid label.
5527 		 * In this case, we'll fake the capacity.
5528 		 */
5529 		un->un_blockcount = capacity;
5530 		un->un_f_blockcount_is_valid = TRUE;
5531 		goto done;
5532 	}
5533 
5534 
5535 	if ((capacity <= un->un_blockcount) ||
5536 	    (un->un_state != SD_STATE_NORMAL)) {
5537 #if defined(_SUNOS_VTOC_8)
5538 		/*
5539 		 * We can't let this happen on drives that are subdivided
5540 		 * into logical disks (i.e., that have an fdisk table).
5541 		 * The un_blockcount field should always hold the full media
5542 		 * size in sectors, period.  This code would overwrite
5543 		 * un_blockcount with the size of the Solaris fdisk partition.
5544 		 */
5545 		SD_ERROR(SD_LOG_COMMON, un,
5546 		    "sd_uselabel: Label %d blocks; Drive %d blocks\n",
5547 		    capacity, un->un_blockcount);
5548 		un->un_blockcount = capacity;
5549 		un->un_f_blockcount_is_valid = TRUE;
5550 #endif	/* defined(_SUNOS_VTOC_8) */
5551 		goto done;
5552 	}
5553 
5554 	if (ISCD(un)) {
5555 		/* For CDROMs, we trust that the data in the label is OK. */
5556 #if defined(_SUNOS_VTOC_8)
5557 		for (i = 0; i < NDKMAP; i++) {
5558 			part_end = labp->dkl_nhead * labp->dkl_nsect *
5559 			    labp->dkl_map[i].dkl_cylno +
5560 			    labp->dkl_map[i].dkl_nblk  - 1;
5561 
5562 			if ((labp->dkl_map[i].dkl_nblk) &&
5563 			    (part_end > un->un_blockcount)) {
5564 				un->un_f_geometry_is_valid = FALSE;
5565 				break;
5566 			}
5567 		}
5568 #endif
5569 #if defined(_SUNOS_VTOC_16)
5570 		vpartp = &(labp->dkl_vtoc.v_part[0]);
5571 		for (i = 0; i < NDKMAP; i++, vpartp++) {
5572 			part_end = vpartp->p_start + vpartp->p_size;
5573 			if ((vpartp->p_size > 0) &&
5574 			    (part_end > un->un_blockcount)) {
5575 				un->un_f_geometry_is_valid = FALSE;
5576 				break;
5577 			}
5578 		}
5579 #endif
5580 	} else {
5581 		uint64_t t_capacity;
5582 		uint32_t t_lbasize;
5583 
5584 		mutex_exit(SD_MUTEX(un));
5585 		err = sd_send_scsi_READ_CAPACITY(un, &t_capacity, &t_lbasize,
5586 		    path_flag);
5587 		ASSERT(t_capacity <= DK_MAX_BLOCKS);
5588 		mutex_enter(SD_MUTEX(un));
5589 
5590 		if (err == 0) {
5591 			sd_update_block_info(un, t_lbasize, t_capacity);
5592 		}
5593 
5594 		if (capacity > un->un_blockcount) {
5595 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
5596 			    "Corrupt label - bad geometry\n");
5597 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
5598 			    "Label says %u blocks; Drive says %llu blocks\n",
5599 			    capacity, (unsigned long long)un->un_blockcount);
5600 			un->un_f_geometry_is_valid = FALSE;
5601 			label_error = SD_LABEL_IS_INVALID;
5602 		}
5603 	}
5604 
5605 done:
5606 
5607 	SD_INFO(SD_LOG_COMMON, un, "sd_uselabel: (label geometry)\n");
5608 	SD_INFO(SD_LOG_COMMON, un,
5609 	    "   ncyl: %d; acyl: %d; nhead: %d; nsect: %d\n",
5610 	    un->un_g.dkg_ncyl,  un->un_g.dkg_acyl,
5611 	    un->un_g.dkg_nhead, un->un_g.dkg_nsect);
5612 	SD_INFO(SD_LOG_COMMON, un,
5613 	    "   lbasize: %d; capacity: %d; intrlv: %d; rpm: %d\n",
5614 	    un->un_tgt_blocksize, un->un_blockcount,
5615 	    un->un_g.dkg_intrlv, un->un_g.dkg_rpm);
5616 	SD_INFO(SD_LOG_COMMON, un, "   wrt_reinstr: %d; rd_reinstr: %d\n",
5617 	    un->un_g.dkg_write_reinstruct, un->un_g.dkg_read_reinstruct);
5618 
5619 	ASSERT(mutex_owned(SD_MUTEX(un)));
5620 
5621 	return (label_error);
5622 }
5623 
5624 
5625 /*
5626  *    Function: sd_build_default_label
5627  *
5628  * Description: Generate a default label for those devices that do not have
5629  *		one, e.g., new media, removable cartridges, etc..
5630  *
5631  *     Context: Kernel thread only
5632  */
5633 
5634 static void
5635 sd_build_default_label(struct sd_lun *un)
5636 {
5637 #if defined(_SUNOS_VTOC_16)
5638 	uint_t	phys_spc;
5639 	uint_t	disksize;
5640 	struct	dk_geom un_g;
5641 #endif
5642 
5643 	ASSERT(un != NULL);
5644 	ASSERT(mutex_owned(SD_MUTEX(un)));
5645 
5646 #if defined(_SUNOS_VTOC_8)
5647 	/*
5648 	 * Note: This is a legacy check for non-removable devices on VTOC_8
5649 	 * only. This may be a valid check for VTOC_16 as well.
5650 	 * Once we understand why there is this difference between SPARC and
5651 	 * x86 platform, we could remove this legacy check.
5652 	 */
5653 	ASSERT(un->un_f_default_vtoc_supported);
5654 #endif
5655 
5656 	bzero(&un->un_g, sizeof (struct dk_geom));
5657 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
5658 	bzero(&un->un_map, NDKMAP * (sizeof (struct dk_map)));
5659 
5660 #if defined(_SUNOS_VTOC_8)
5661 
5662 	/*
5663 	 * It's a REMOVABLE media, therefore no label (on sparc, anyway).
5664 	 * But it is still necessary to set up various geometry information,
5665 	 * and we are doing this here.
5666 	 */
5667 
5668 	/*
5669 	 * For the rpm, we use the minimum for the disk.  For the head, cyl,
5670 	 * and number of sector per track, if the capacity <= 1GB, head = 64,
5671 	 * sect = 32.  else head = 255, sect 63 Note: the capacity should be
5672 	 * equal to C*H*S values.  This will cause some truncation of size due
5673 	 * to round off errors. For CD-ROMs, this truncation can have adverse
5674 	 * side effects, so returning ncyl and nhead as 1. The nsect will
5675 	 * overflow for most of CD-ROMs as nsect is of type ushort. (4190569)
5676 	 */
5677 	if (ISCD(un)) {
5678 		/*
5679 		 * Preserve the old behavior for non-writable
5680 		 * medias. Since dkg_nsect is a ushort, it
5681 		 * will lose bits as cdroms have more than
5682 		 * 65536 sectors. So if we recalculate
5683 		 * capacity, it will become much shorter.
5684 		 * But the dkg_* information is not
5685 		 * used for CDROMs so it is OK. But for
5686 		 * Writable CDs we need this information
5687 		 * to be valid (for newfs say). So we
5688 		 * make nsect and nhead > 1 that way
5689 		 * nsect can still stay within ushort limit
5690 		 * without losing any bits.
5691 		 */
5692 		if (un->un_f_mmc_writable_media == TRUE) {
5693 			un->un_g.dkg_nhead = 64;
5694 			un->un_g.dkg_nsect = 32;
5695 			un->un_g.dkg_ncyl = un->un_blockcount / (64 * 32);
5696 			un->un_blockcount = un->un_g.dkg_ncyl *
5697 			    un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5698 		} else {
5699 			un->un_g.dkg_ncyl  = 1;
5700 			un->un_g.dkg_nhead = 1;
5701 			un->un_g.dkg_nsect = un->un_blockcount;
5702 		}
5703 	} else {
5704 		if (un->un_blockcount <= 0x1000) {
5705 			/* unlabeled SCSI floppy device */
5706 			un->un_g.dkg_nhead = 2;
5707 			un->un_g.dkg_ncyl = 80;
5708 			un->un_g.dkg_nsect = un->un_blockcount / (2 * 80);
5709 		} else if (un->un_blockcount <= 0x200000) {
5710 			un->un_g.dkg_nhead = 64;
5711 			un->un_g.dkg_nsect = 32;
5712 			un->un_g.dkg_ncyl  = un->un_blockcount / (64 * 32);
5713 		} else {
5714 			un->un_g.dkg_nhead = 255;
5715 			un->un_g.dkg_nsect = 63;
5716 			un->un_g.dkg_ncyl  = un->un_blockcount / (255 * 63);
5717 		}
5718 		un->un_blockcount =
5719 		    un->un_g.dkg_ncyl * un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5720 	}
5721 
5722 	un->un_g.dkg_acyl	= 0;
5723 	un->un_g.dkg_bcyl	= 0;
5724 	un->un_g.dkg_rpm	= 200;
5725 	un->un_asciilabel[0]	= '\0';
5726 	un->un_g.dkg_pcyl	= un->un_g.dkg_ncyl;
5727 
5728 	un->un_map[0].dkl_cylno = 0;
5729 	un->un_map[0].dkl_nblk  = un->un_blockcount;
5730 	un->un_map[2].dkl_cylno = 0;
5731 	un->un_map[2].dkl_nblk  = un->un_blockcount;
5732 
5733 #elif defined(_SUNOS_VTOC_16)
5734 
5735 	if (un->un_solaris_size == 0) {
5736 		/*
5737 		 * Got fdisk table but no solaris entry therefore
5738 		 * don't create a default label
5739 		 */
5740 		un->un_f_geometry_is_valid = TRUE;
5741 		return;
5742 	}
5743 
5744 	/*
5745 	 * For CDs we continue to use the physical geometry to calculate
5746 	 * number of cylinders. All other devices must convert the
5747 	 * physical geometry (geom_cache) to values that will fit
5748 	 * in a dk_geom structure.
5749 	 */
5750 	if (ISCD(un)) {
5751 		phys_spc = un->un_pgeom.g_nhead * un->un_pgeom.g_nsect;
5752 	} else {
5753 		/* Convert physical geometry to disk geometry */
5754 		bzero(&un_g, sizeof (struct dk_geom));
5755 		sd_convert_geometry(un->un_blockcount, &un_g);
5756 		bcopy(&un_g, &un->un_g, sizeof (un->un_g));
5757 		phys_spc = un->un_g.dkg_nhead * un->un_g.dkg_nsect;
5758 	}
5759 
5760 	ASSERT(phys_spc != 0);
5761 	un->un_g.dkg_pcyl = un->un_solaris_size / phys_spc;
5762 	un->un_g.dkg_acyl = DK_ACYL;
5763 	un->un_g.dkg_ncyl = un->un_g.dkg_pcyl - DK_ACYL;
5764 	disksize = un->un_g.dkg_ncyl * phys_spc;
5765 
5766 	if (ISCD(un)) {
5767 		/*
5768 		 * CD's don't use the "heads * sectors * cyls"-type of
5769 		 * geometry, but instead use the entire capacity of the media.
5770 		 */
5771 		disksize = un->un_solaris_size;
5772 		un->un_g.dkg_nhead = 1;
5773 		un->un_g.dkg_nsect = 1;
5774 		un->un_g.dkg_rpm =
5775 		    (un->un_pgeom.g_rpm == 0) ? 200 : un->un_pgeom.g_rpm;
5776 
5777 		un->un_vtoc.v_part[0].p_start = 0;
5778 		un->un_vtoc.v_part[0].p_size  = disksize;
5779 		un->un_vtoc.v_part[0].p_tag   = V_BACKUP;
5780 		un->un_vtoc.v_part[0].p_flag  = V_UNMNT;
5781 
5782 		un->un_map[0].dkl_cylno = 0;
5783 		un->un_map[0].dkl_nblk  = disksize;
5784 		un->un_offset[0] = 0;
5785 
5786 	} else {
5787 		/*
5788 		 * Hard disks and removable media cartridges
5789 		 */
5790 		un->un_g.dkg_rpm =
5791 		    (un->un_pgeom.g_rpm == 0) ? 3600: un->un_pgeom.g_rpm;
5792 		un->un_vtoc.v_sectorsz = un->un_sys_blocksize;
5793 
5794 		/* Add boot slice */
5795 		un->un_vtoc.v_part[8].p_start = 0;
5796 		un->un_vtoc.v_part[8].p_size  = phys_spc;
5797 		un->un_vtoc.v_part[8].p_tag   = V_BOOT;
5798 		un->un_vtoc.v_part[8].p_flag  = V_UNMNT;
5799 
5800 		un->un_map[8].dkl_cylno = 0;
5801 		un->un_map[8].dkl_nblk  = phys_spc;
5802 		un->un_offset[8] = 0;
5803 	}
5804 
5805 	un->un_g.dkg_apc = 0;
5806 	un->un_vtoc.v_nparts = V_NUMPAR;
5807 
5808 	/* Add backup slice */
5809 	un->un_vtoc.v_part[2].p_start = 0;
5810 	un->un_vtoc.v_part[2].p_size  = disksize;
5811 	un->un_vtoc.v_part[2].p_tag   = V_BACKUP;
5812 	un->un_vtoc.v_part[2].p_flag  = V_UNMNT;
5813 
5814 	un->un_map[2].dkl_cylno = 0;
5815 	un->un_map[2].dkl_nblk  = disksize;
5816 	un->un_offset[2] = 0;
5817 
5818 	(void) sprintf(un->un_vtoc.v_asciilabel, "DEFAULT cyl %d alt %d"
5819 	    " hd %d sec %d", un->un_g.dkg_ncyl, un->un_g.dkg_acyl,
5820 	    un->un_g.dkg_nhead, un->un_g.dkg_nsect);
5821 
5822 #else
5823 #error "No VTOC format defined."
5824 #endif
5825 
5826 	un->un_g.dkg_read_reinstruct  = 0;
5827 	un->un_g.dkg_write_reinstruct = 0;
5828 
5829 	un->un_g.dkg_intrlv = 1;
5830 
5831 	un->un_vtoc.v_version = V_VERSION;
5832 	un->un_vtoc.v_sanity  = VTOC_SANE;
5833 
5834 	un->un_f_geometry_is_valid = TRUE;
5835 
5836 	SD_INFO(SD_LOG_COMMON, un,
5837 	    "sd_build_default_label: Default label created: "
5838 	    "cyl: %d\tacyl: %d\tnhead: %d\tnsect: %d\tcap: %d\n",
5839 	    un->un_g.dkg_ncyl, un->un_g.dkg_acyl, un->un_g.dkg_nhead,
5840 	    un->un_g.dkg_nsect, un->un_blockcount);
5841 }
5842 
5843 
5844 #if defined(_FIRMWARE_NEEDS_FDISK)
5845 /*
5846  * Max CHS values, as they are encoded into bytes, for 1022/254/63
5847  */
5848 #define	LBA_MAX_SECT	(63 | ((1022 & 0x300) >> 2))
5849 #define	LBA_MAX_CYL	(1022 & 0xFF)
5850 #define	LBA_MAX_HEAD	(254)
5851 
5852 
5853 /*
5854  *    Function: sd_has_max_chs_vals
5855  *
5856  * Description: Return TRUE if Cylinder-Head-Sector values are all at maximum.
5857  *
5858  *   Arguments: fdp - ptr to CHS info
5859  *
5860  * Return Code: True or false
5861  *
5862  *     Context: Any.
5863  */
5864 
5865 static int
5866 sd_has_max_chs_vals(struct ipart *fdp)
5867 {
5868 	return ((fdp->begcyl  == LBA_MAX_CYL)	&&
5869 	    (fdp->beghead == LBA_MAX_HEAD)	&&
5870 	    (fdp->begsect == LBA_MAX_SECT)	&&
5871 	    (fdp->endcyl  == LBA_MAX_CYL)	&&
5872 	    (fdp->endhead == LBA_MAX_HEAD)	&&
5873 	    (fdp->endsect == LBA_MAX_SECT));
5874 }
5875 #endif
5876 
5877 
5878 /*
5879  *    Function: sd_inq_fill
5880  *
5881  * Description: Print a piece of inquiry data, cleaned up for non-printable
5882  *		characters and stopping at the first space character after
5883  *		the beginning of the passed string;
5884  *
5885  *   Arguments: p - source string
5886  *		l - maximum length to copy
5887  *		s - destination string
5888  *
5889  *     Context: Any.
5890  */
5891 
5892 static void
5893 sd_inq_fill(char *p, int l, char *s)
5894 {
5895 	unsigned i = 0;
5896 	char c;
5897 
5898 	while (i++ < l) {
5899 		if ((c = *p++) < ' ' || c >= 0x7F) {
5900 			c = '*';
5901 		} else if (i != 1 && c == ' ') {
5902 			break;
5903 		}
5904 		*s++ = c;
5905 	}
5906 	*s++ = 0;
5907 }
5908 
5909 
5910 /*
5911  *    Function: sd_register_devid
5912  *
5913  * Description: This routine will obtain the device id information from the
5914  *		target, obtain the serial number, and register the device
5915  *		id with the ddi framework.
5916  *
5917  *   Arguments: devi - the system's dev_info_t for the device.
5918  *		un - driver soft state (unit) structure
5919  *		reservation_flag - indicates if a reservation conflict
5920  *		occurred during attach
5921  *
5922  *     Context: Kernel Thread
5923  */
5924 static void
5925 sd_register_devid(struct sd_lun *un, dev_info_t *devi, int reservation_flag)
5926 {
5927 	int		rval		= 0;
5928 	uchar_t		*inq80		= NULL;
5929 	size_t		inq80_len	= MAX_INQUIRY_SIZE;
5930 	size_t		inq80_resid	= 0;
5931 	uchar_t		*inq83		= NULL;
5932 	size_t		inq83_len	= MAX_INQUIRY_SIZE;
5933 	size_t		inq83_resid	= 0;
5934 
5935 	ASSERT(un != NULL);
5936 	ASSERT(mutex_owned(SD_MUTEX(un)));
5937 	ASSERT((SD_DEVINFO(un)) == devi);
5938 
5939 	/*
5940 	 * This is the case of antiquated Sun disk drives that have the
5941 	 * FAB_DEVID property set in the disk_table.  These drives
5942 	 * manage the devid's by storing them in last 2 available sectors
5943 	 * on the drive and have them fabricated by the ddi layer by calling
5944 	 * ddi_devid_init and passing the DEVID_FAB flag.
5945 	 */
5946 	if (un->un_f_opt_fab_devid == TRUE) {
5947 		/*
5948 		 * Depending on EINVAL isn't reliable, since a reserved disk
5949 		 * may result in invalid geometry, so check to make sure a
5950 		 * reservation conflict did not occur during attach.
5951 		 */
5952 		if ((sd_get_devid(un) == EINVAL) &&
5953 		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
5954 			/*
5955 			 * The devid is invalid AND there is no reservation
5956 			 * conflict.  Fabricate a new devid.
5957 			 */
5958 			(void) sd_create_devid(un);
5959 		}
5960 
5961 		/* Register the devid if it exists */
5962 		if (un->un_devid != NULL) {
5963 			(void) ddi_devid_register(SD_DEVINFO(un),
5964 			    un->un_devid);
5965 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
5966 			    "sd_register_devid: Devid Fabricated\n");
5967 		}
5968 		return;
5969 	}
5970 
5971 	/*
5972 	 * We check the availibility of the World Wide Name (0x83) and Unit
5973 	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
5974 	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
5975 	 * 0x83 is availible, that is the best choice.  Our next choice is
5976 	 * 0x80.  If neither are availible, we munge the devid from the device
5977 	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
5978 	 * to fabricate a devid for non-Sun qualified disks.
5979 	 */
5980 	if (sd_check_vpd_page_support(un) == 0) {
5981 		/* collect page 80 data if available */
5982 		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
5983 
5984 			mutex_exit(SD_MUTEX(un));
5985 			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
5986 			rval = sd_send_scsi_INQUIRY(un, inq80, inq80_len,
5987 			    0x01, 0x80, &inq80_resid);
5988 
5989 			if (rval != 0) {
5990 				kmem_free(inq80, inq80_len);
5991 				inq80 = NULL;
5992 				inq80_len = 0;
5993 			}
5994 			mutex_enter(SD_MUTEX(un));
5995 		}
5996 
5997 		/* collect page 83 data if available */
5998 		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
5999 			mutex_exit(SD_MUTEX(un));
6000 			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
6001 			rval = sd_send_scsi_INQUIRY(un, inq83, inq83_len,
6002 			    0x01, 0x83, &inq83_resid);
6003 
6004 			if (rval != 0) {
6005 				kmem_free(inq83, inq83_len);
6006 				inq83 = NULL;
6007 				inq83_len = 0;
6008 			}
6009 			mutex_enter(SD_MUTEX(un));
6010 		}
6011 	}
6012 
6013 	/* encode best devid possible based on data available */
6014 	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
6015 	    (char *)ddi_driver_name(SD_DEVINFO(un)),
6016 	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
6017 	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
6018 	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
6019 
6020 		/* devid successfully encoded, register devid */
6021 		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
6022 
6023 	} else {
6024 		/*
6025 		 * Unable to encode a devid based on data available.
6026 		 * This is not a Sun qualified disk.  Older Sun disk
6027 		 * drives that have the SD_FAB_DEVID property
6028 		 * set in the disk_table and non Sun qualified
6029 		 * disks are treated in the same manner.  These
6030 		 * drives manage the devid's by storing them in
6031 		 * last 2 available sectors on the drive and
6032 		 * have them fabricated by the ddi layer by
6033 		 * calling ddi_devid_init and passing the
6034 		 * DEVID_FAB flag.
6035 		 * Create a fabricate devid only if there's no
6036 		 * fabricate devid existed.
6037 		 */
6038 		if (sd_get_devid(un) == EINVAL) {
6039 			(void) sd_create_devid(un);
6040 			un->un_f_opt_fab_devid = TRUE;
6041 		}
6042 
6043 		/* Register the devid if it exists */
6044 		if (un->un_devid != NULL) {
6045 			(void) ddi_devid_register(SD_DEVINFO(un),
6046 			    un->un_devid);
6047 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
6048 			    "sd_register_devid: devid fabricated using "
6049 			    "ddi framework\n");
6050 		}
6051 	}
6052 
6053 	/* clean up resources */
6054 	if (inq80 != NULL) {
6055 		kmem_free(inq80, inq80_len);
6056 	}
6057 	if (inq83 != NULL) {
6058 		kmem_free(inq83, inq83_len);
6059 	}
6060 }
6061 
6062 static daddr_t
6063 sd_get_devid_block(struct sd_lun *un)
6064 {
6065 	daddr_t			spc, blk, head, cyl;
6066 
6067 	if (un->un_blockcount <= DK_MAX_BLOCKS) {
6068 		/* this geometry doesn't allow us to write a devid */
6069 		if (un->un_g.dkg_acyl < 2) {
6070 			return (-1);
6071 		}
6072 
6073 		/*
6074 		 * Subtract 2 guarantees that the next to last cylinder
6075 		 * is used
6076 		 */
6077 		cyl  = un->un_g.dkg_ncyl  + un->un_g.dkg_acyl - 2;
6078 		spc  = un->un_g.dkg_nhead * un->un_g.dkg_nsect;
6079 		head = un->un_g.dkg_nhead - 1;
6080 		blk  = (cyl * (spc - un->un_g.dkg_apc)) +
6081 		    (head * un->un_g.dkg_nsect) + 1;
6082 	} else {
6083 		if (un->un_reserved != -1) {
6084 			blk = un->un_map[un->un_reserved].dkl_cylno + 1;
6085 		} else {
6086 			return (-1);
6087 		}
6088 	}
6089 	return (blk);
6090 }
6091 
6092 /*
6093  *    Function: sd_get_devid
6094  *
6095  * Description: This routine will return 0 if a valid device id has been
6096  *		obtained from the target and stored in the soft state. If a
6097  *		valid device id has not been previously read and stored, a
6098  *		read attempt will be made.
6099  *
6100  *   Arguments: un - driver soft state (unit) structure
6101  *
6102  * Return Code: 0 if we successfully get the device id
6103  *
6104  *     Context: Kernel Thread
6105  */
6106 
6107 static int
6108 sd_get_devid(struct sd_lun *un)
6109 {
6110 	struct dk_devid		*dkdevid;
6111 	ddi_devid_t		tmpid;
6112 	uint_t			*ip;
6113 	size_t			sz;
6114 	daddr_t			blk;
6115 	int			status;
6116 	int			chksum;
6117 	int			i;
6118 	size_t			buffer_size;
6119 
6120 	ASSERT(un != NULL);
6121 	ASSERT(mutex_owned(SD_MUTEX(un)));
6122 
6123 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
6124 	    un);
6125 
6126 	if (un->un_devid != NULL) {
6127 		return (0);
6128 	}
6129 
6130 	blk = sd_get_devid_block(un);
6131 	if (blk < 0)
6132 		return (EINVAL);
6133 
6134 	/*
6135 	 * Read and verify device id, stored in the reserved cylinders at the
6136 	 * end of the disk. Backup label is on the odd sectors of the last
6137 	 * track of the last cylinder. Device id will be on track of the next
6138 	 * to last cylinder.
6139 	 */
6140 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
6141 	mutex_exit(SD_MUTEX(un));
6142 	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
6143 	status = sd_send_scsi_READ(un, dkdevid, buffer_size, blk,
6144 	    SD_PATH_DIRECT);
6145 	if (status != 0) {
6146 		goto error;
6147 	}
6148 
6149 	/* Validate the revision */
6150 	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
6151 	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
6152 		status = EINVAL;
6153 		goto error;
6154 	}
6155 
6156 	/* Calculate the checksum */
6157 	chksum = 0;
6158 	ip = (uint_t *)dkdevid;
6159 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
6160 	    i++) {
6161 		chksum ^= ip[i];
6162 	}
6163 
6164 	/* Compare the checksums */
6165 	if (DKD_GETCHKSUM(dkdevid) != chksum) {
6166 		status = EINVAL;
6167 		goto error;
6168 	}
6169 
6170 	/* Validate the device id */
6171 	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
6172 		status = EINVAL;
6173 		goto error;
6174 	}
6175 
6176 	/*
6177 	 * Store the device id in the driver soft state
6178 	 */
6179 	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
6180 	tmpid = kmem_alloc(sz, KM_SLEEP);
6181 
6182 	mutex_enter(SD_MUTEX(un));
6183 
6184 	un->un_devid = tmpid;
6185 	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
6186 
6187 	kmem_free(dkdevid, buffer_size);
6188 
6189 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
6190 
6191 	return (status);
6192 error:
6193 	mutex_enter(SD_MUTEX(un));
6194 	kmem_free(dkdevid, buffer_size);
6195 	return (status);
6196 }
6197 
6198 
6199 /*
6200  *    Function: sd_create_devid
6201  *
6202  * Description: This routine will fabricate the device id and write it
6203  *		to the disk.
6204  *
6205  *   Arguments: un - driver soft state (unit) structure
6206  *
6207  * Return Code: value of the fabricated device id
6208  *
6209  *     Context: Kernel Thread
6210  */
6211 
6212 static ddi_devid_t
6213 sd_create_devid(struct sd_lun *un)
6214 {
6215 	ASSERT(un != NULL);
6216 
6217 	/* Fabricate the devid */
6218 	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
6219 	    == DDI_FAILURE) {
6220 		return (NULL);
6221 	}
6222 
6223 	/* Write the devid to disk */
6224 	if (sd_write_deviceid(un) != 0) {
6225 		ddi_devid_free(un->un_devid);
6226 		un->un_devid = NULL;
6227 	}
6228 
6229 	return (un->un_devid);
6230 }
6231 
6232 
6233 /*
6234  *    Function: sd_write_deviceid
6235  *
6236  * Description: This routine will write the device id to the disk
6237  *		reserved sector.
6238  *
6239  *   Arguments: un - driver soft state (unit) structure
6240  *
6241  * Return Code: EINVAL
6242  *		value returned by sd_send_scsi_cmd
6243  *
6244  *     Context: Kernel Thread
6245  */
6246 
6247 static int
6248 sd_write_deviceid(struct sd_lun *un)
6249 {
6250 	struct dk_devid		*dkdevid;
6251 	daddr_t			blk;
6252 	uint_t			*ip, chksum;
6253 	int			status;
6254 	int			i;
6255 
6256 	ASSERT(mutex_owned(SD_MUTEX(un)));
6257 
6258 	blk = sd_get_devid_block(un);
6259 	if (blk < 0)
6260 		return (-1);
6261 	mutex_exit(SD_MUTEX(un));
6262 
6263 	/* Allocate the buffer */
6264 	dkdevid = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
6265 
6266 	/* Fill in the revision */
6267 	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
6268 	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
6269 
6270 	/* Copy in the device id */
6271 	mutex_enter(SD_MUTEX(un));
6272 	bcopy(un->un_devid, &dkdevid->dkd_devid,
6273 	    ddi_devid_sizeof(un->un_devid));
6274 	mutex_exit(SD_MUTEX(un));
6275 
6276 	/* Calculate the checksum */
6277 	chksum = 0;
6278 	ip = (uint_t *)dkdevid;
6279 	for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
6280 	    i++) {
6281 		chksum ^= ip[i];
6282 	}
6283 
6284 	/* Fill-in checksum */
6285 	DKD_FORMCHKSUM(chksum, dkdevid);
6286 
6287 	/* Write the reserved sector */
6288 	status = sd_send_scsi_WRITE(un, dkdevid, un->un_sys_blocksize, blk,
6289 	    SD_PATH_DIRECT);
6290 
6291 	kmem_free(dkdevid, un->un_sys_blocksize);
6292 
6293 	mutex_enter(SD_MUTEX(un));
6294 	return (status);
6295 }
6296 
6297 
6298 /*
6299  *    Function: sd_check_vpd_page_support
6300  *
6301  * Description: This routine sends an inquiry command with the EVPD bit set and
6302  *		a page code of 0x00 to the device. It is used to determine which
6303  *		vital product pages are availible to find the devid. We are
6304  *		looking for pages 0x83 or 0x80.  If we return a negative 1, the
6305  *		device does not support that command.
6306  *
6307  *   Arguments: un  - driver soft state (unit) structure
6308  *
6309  * Return Code: 0 - success
6310  *		1 - check condition
6311  *
6312  *     Context: This routine can sleep.
6313  */
6314 
6315 static int
6316 sd_check_vpd_page_support(struct sd_lun *un)
6317 {
6318 	uchar_t	*page_list	= NULL;
6319 	uchar_t	page_length	= 0xff;	/* Use max possible length */
6320 	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
6321 	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
6322 	int    	rval		= 0;
6323 	int	counter;
6324 
6325 	ASSERT(un != NULL);
6326 	ASSERT(mutex_owned(SD_MUTEX(un)));
6327 
6328 	mutex_exit(SD_MUTEX(un));
6329 
6330 	/*
6331 	 * We'll set the page length to the maximum to save figuring it out
6332 	 * with an additional call.
6333 	 */
6334 	page_list =  kmem_zalloc(page_length, KM_SLEEP);
6335 
6336 	rval = sd_send_scsi_INQUIRY(un, page_list, page_length, evpd,
6337 	    page_code, NULL);
6338 
6339 	mutex_enter(SD_MUTEX(un));
6340 
6341 	/*
6342 	 * Now we must validate that the device accepted the command, as some
6343 	 * drives do not support it.  If the drive does support it, we will
6344 	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
6345 	 * not, we return -1.
6346 	 */
6347 	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
6348 		/* Loop to find one of the 2 pages we need */
6349 		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
6350 
6351 		/*
6352 		 * Pages are returned in ascending order, and 0x83 is what we
6353 		 * are hoping for.
6354 		 */
6355 		while ((page_list[counter] <= 0x83) &&
6356 		    (counter <= (page_list[VPD_PAGE_LENGTH] +
6357 		    VPD_HEAD_OFFSET))) {
6358 			/*
6359 			 * Add 3 because page_list[3] is the number of
6360 			 * pages minus 3
6361 			 */
6362 
6363 			switch (page_list[counter]) {
6364 			case 0x00:
6365 				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
6366 				break;
6367 			case 0x80:
6368 				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
6369 				break;
6370 			case 0x81:
6371 				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
6372 				break;
6373 			case 0x82:
6374 				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
6375 				break;
6376 			case 0x83:
6377 				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
6378 				break;
6379 			}
6380 			counter++;
6381 		}
6382 
6383 	} else {
6384 		rval = -1;
6385 
6386 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
6387 		    "sd_check_vpd_page_support: This drive does not implement "
6388 		    "VPD pages.\n");
6389 	}
6390 
6391 	kmem_free(page_list, page_length);
6392 
6393 	return (rval);
6394 }
6395 
6396 
6397 /*
6398  *    Function: sd_setup_pm
6399  *
6400  * Description: Initialize Power Management on the device
6401  *
6402  *     Context: Kernel Thread
6403  */
6404 
6405 static void
6406 sd_setup_pm(struct sd_lun *un, dev_info_t *devi)
6407 {
6408 	uint_t	log_page_size;
6409 	uchar_t	*log_page_data;
6410 	int	rval;
6411 
6412 	/*
6413 	 * Since we are called from attach, holding a mutex for
6414 	 * un is unnecessary. Because some of the routines called
6415 	 * from here require SD_MUTEX to not be held, assert this
6416 	 * right up front.
6417 	 */
6418 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6419 	/*
6420 	 * Since the sd device does not have the 'reg' property,
6421 	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
6422 	 * The following code is to tell cpr that this device
6423 	 * DOES need to be suspended and resumed.
6424 	 */
6425 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
6426 	    "pm-hardware-state", "needs-suspend-resume");
6427 
6428 	/*
6429 	 * This complies with the new power management framework
6430 	 * for certain desktop machines. Create the pm_components
6431 	 * property as a string array property.
6432 	 */
6433 	if (un->un_f_pm_supported) {
6434 		/*
6435 		 * not all devices have a motor, try it first.
6436 		 * some devices may return ILLEGAL REQUEST, some
6437 		 * will hang
6438 		 * The following START_STOP_UNIT is used to check if target
6439 		 * device has a motor.
6440 		 */
6441 		un->un_f_start_stop_supported = TRUE;
6442 		if (sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
6443 		    SD_PATH_DIRECT) != 0) {
6444 			un->un_f_start_stop_supported = FALSE;
6445 		}
6446 
6447 		/*
6448 		 * create pm properties anyways otherwise the parent can't
6449 		 * go to sleep
6450 		 */
6451 		(void) sd_create_pm_components(devi, un);
6452 		un->un_f_pm_is_enabled = TRUE;
6453 		return;
6454 	}
6455 
6456 	if (!un->un_f_log_sense_supported) {
6457 		un->un_power_level = SD_SPINDLE_ON;
6458 		un->un_f_pm_is_enabled = FALSE;
6459 		return;
6460 	}
6461 
6462 	rval = sd_log_page_supported(un, START_STOP_CYCLE_PAGE);
6463 
6464 #ifdef	SDDEBUG
6465 	if (sd_force_pm_supported) {
6466 		/* Force a successful result */
6467 		rval = 1;
6468 	}
6469 #endif
6470 
6471 	/*
6472 	 * If the start-stop cycle counter log page is not supported
6473 	 * or if the pm-capable property is SD_PM_CAPABLE_FALSE (0)
6474 	 * then we should not create the pm_components property.
6475 	 */
6476 	if (rval == -1) {
6477 		/*
6478 		 * Error.
6479 		 * Reading log sense failed, most likely this is
6480 		 * an older drive that does not support log sense.
6481 		 * If this fails auto-pm is not supported.
6482 		 */
6483 		un->un_power_level = SD_SPINDLE_ON;
6484 		un->un_f_pm_is_enabled = FALSE;
6485 
6486 	} else if (rval == 0) {
6487 		/*
6488 		 * Page not found.
6489 		 * The start stop cycle counter is implemented as page
6490 		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
6491 		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
6492 		 */
6493 		if (sd_log_page_supported(un, START_STOP_CYCLE_VU_PAGE) == 1) {
6494 			/*
6495 			 * Page found, use this one.
6496 			 */
6497 			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
6498 			un->un_f_pm_is_enabled = TRUE;
6499 		} else {
6500 			/*
6501 			 * Error or page not found.
6502 			 * auto-pm is not supported for this device.
6503 			 */
6504 			un->un_power_level = SD_SPINDLE_ON;
6505 			un->un_f_pm_is_enabled = FALSE;
6506 		}
6507 	} else {
6508 		/*
6509 		 * Page found, use it.
6510 		 */
6511 		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
6512 		un->un_f_pm_is_enabled = TRUE;
6513 	}
6514 
6515 
6516 	if (un->un_f_pm_is_enabled == TRUE) {
6517 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
6518 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
6519 
6520 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
6521 		    log_page_size, un->un_start_stop_cycle_page,
6522 		    0x01, 0, SD_PATH_DIRECT);
6523 #ifdef	SDDEBUG
6524 		if (sd_force_pm_supported) {
6525 			/* Force a successful result */
6526 			rval = 0;
6527 		}
6528 #endif
6529 
6530 		/*
6531 		 * If the Log sense for Page( Start/stop cycle counter page)
6532 		 * succeeds, then power managment is supported and we can
6533 		 * enable auto-pm.
6534 		 */
6535 		if (rval == 0)  {
6536 			(void) sd_create_pm_components(devi, un);
6537 		} else {
6538 			un->un_power_level = SD_SPINDLE_ON;
6539 			un->un_f_pm_is_enabled = FALSE;
6540 		}
6541 
6542 		kmem_free(log_page_data, log_page_size);
6543 	}
6544 }
6545 
6546 
6547 /*
6548  *    Function: sd_create_pm_components
6549  *
6550  * Description: Initialize PM property.
6551  *
6552  *     Context: Kernel thread context
6553  */
6554 
6555 static void
6556 sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
6557 {
6558 	char *pm_comp[] = { "NAME=spindle-motor", "0=off", "1=on", NULL };
6559 
6560 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6561 
6562 	if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
6563 	    "pm-components", pm_comp, 3) == DDI_PROP_SUCCESS) {
6564 		/*
6565 		 * When components are initially created they are idle,
6566 		 * power up any non-removables.
6567 		 * Note: the return value of pm_raise_power can't be used
6568 		 * for determining if PM should be enabled for this device.
6569 		 * Even if you check the return values and remove this
6570 		 * property created above, the PM framework will not honor the
6571 		 * change after the first call to pm_raise_power. Hence,
6572 		 * removal of that property does not help if pm_raise_power
6573 		 * fails. In the case of removable media, the start/stop
6574 		 * will fail if the media is not present.
6575 		 */
6576 		if (un->un_f_attach_spinup && (pm_raise_power(SD_DEVINFO(un), 0,
6577 		    SD_SPINDLE_ON) == DDI_SUCCESS)) {
6578 			mutex_enter(SD_MUTEX(un));
6579 			un->un_power_level = SD_SPINDLE_ON;
6580 			mutex_enter(&un->un_pm_mutex);
6581 			/* Set to on and not busy. */
6582 			un->un_pm_count = 0;
6583 		} else {
6584 			mutex_enter(SD_MUTEX(un));
6585 			un->un_power_level = SD_SPINDLE_OFF;
6586 			mutex_enter(&un->un_pm_mutex);
6587 			/* Set to off. */
6588 			un->un_pm_count = -1;
6589 		}
6590 		mutex_exit(&un->un_pm_mutex);
6591 		mutex_exit(SD_MUTEX(un));
6592 	} else {
6593 		un->un_power_level = SD_SPINDLE_ON;
6594 		un->un_f_pm_is_enabled = FALSE;
6595 	}
6596 }
6597 
6598 
6599 /*
6600  *    Function: sd_ddi_suspend
6601  *
6602  * Description: Performs system power-down operations. This includes
6603  *		setting the drive state to indicate its suspended so
6604  *		that no new commands will be accepted. Also, wait for
6605  *		all commands that are in transport or queued to a timer
6606  *		for retry to complete. All timeout threads are cancelled.
6607  *
6608  * Return Code: DDI_FAILURE or DDI_SUCCESS
6609  *
6610  *     Context: Kernel thread context
6611  */
6612 
6613 static int
6614 sd_ddi_suspend(dev_info_t *devi)
6615 {
6616 	struct	sd_lun	*un;
6617 	clock_t		wait_cmds_complete;
6618 
6619 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6620 	if (un == NULL) {
6621 		return (DDI_FAILURE);
6622 	}
6623 
6624 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
6625 
6626 	mutex_enter(SD_MUTEX(un));
6627 
6628 	/* Return success if the device is already suspended. */
6629 	if (un->un_state == SD_STATE_SUSPENDED) {
6630 		mutex_exit(SD_MUTEX(un));
6631 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6632 		    "device already suspended, exiting\n");
6633 		return (DDI_SUCCESS);
6634 	}
6635 
6636 	/* Return failure if the device is being used by HA */
6637 	if (un->un_resvd_status &
6638 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
6639 		mutex_exit(SD_MUTEX(un));
6640 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6641 		    "device in use by HA, exiting\n");
6642 		return (DDI_FAILURE);
6643 	}
6644 
6645 	/*
6646 	 * Return failure if the device is in a resource wait
6647 	 * or power changing state.
6648 	 */
6649 	if ((un->un_state == SD_STATE_RWAIT) ||
6650 	    (un->un_state == SD_STATE_PM_CHANGING)) {
6651 		mutex_exit(SD_MUTEX(un));
6652 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6653 		    "device in resource wait state, exiting\n");
6654 		return (DDI_FAILURE);
6655 	}
6656 
6657 
6658 	un->un_save_state = un->un_last_state;
6659 	New_state(un, SD_STATE_SUSPENDED);
6660 
6661 	/*
6662 	 * Wait for all commands that are in transport or queued to a timer
6663 	 * for retry to complete.
6664 	 *
6665 	 * While waiting, no new commands will be accepted or sent because of
6666 	 * the new state we set above.
6667 	 *
6668 	 * Wait till current operation has completed. If we are in the resource
6669 	 * wait state (with an intr outstanding) then we need to wait till the
6670 	 * intr completes and starts the next cmd. We want to wait for
6671 	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
6672 	 */
6673 	wait_cmds_complete = ddi_get_lbolt() +
6674 	    (sd_wait_cmds_complete * drv_usectohz(1000000));
6675 
6676 	while (un->un_ncmds_in_transport != 0) {
6677 		/*
6678 		 * Fail if commands do not finish in the specified time.
6679 		 */
6680 		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
6681 		    wait_cmds_complete) == -1) {
6682 			/*
6683 			 * Undo the state changes made above. Everything
6684 			 * must go back to it's original value.
6685 			 */
6686 			Restore_state(un);
6687 			un->un_last_state = un->un_save_state;
6688 			/* Wake up any threads that might be waiting. */
6689 			cv_broadcast(&un->un_suspend_cv);
6690 			mutex_exit(SD_MUTEX(un));
6691 			SD_ERROR(SD_LOG_IO_PM, un,
6692 			    "sd_ddi_suspend: failed due to outstanding cmds\n");
6693 			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
6694 			return (DDI_FAILURE);
6695 		}
6696 	}
6697 
6698 	/*
6699 	 * Cancel SCSI watch thread and timeouts, if any are active
6700 	 */
6701 
6702 	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
6703 		opaque_t temp_token = un->un_swr_token;
6704 		mutex_exit(SD_MUTEX(un));
6705 		scsi_watch_suspend(temp_token);
6706 		mutex_enter(SD_MUTEX(un));
6707 	}
6708 
6709 	if (un->un_reset_throttle_timeid != NULL) {
6710 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
6711 		un->un_reset_throttle_timeid = NULL;
6712 		mutex_exit(SD_MUTEX(un));
6713 		(void) untimeout(temp_id);
6714 		mutex_enter(SD_MUTEX(un));
6715 	}
6716 
6717 	if (un->un_dcvb_timeid != NULL) {
6718 		timeout_id_t temp_id = un->un_dcvb_timeid;
6719 		un->un_dcvb_timeid = NULL;
6720 		mutex_exit(SD_MUTEX(un));
6721 		(void) untimeout(temp_id);
6722 		mutex_enter(SD_MUTEX(un));
6723 	}
6724 
6725 	mutex_enter(&un->un_pm_mutex);
6726 	if (un->un_pm_timeid != NULL) {
6727 		timeout_id_t temp_id = un->un_pm_timeid;
6728 		un->un_pm_timeid = NULL;
6729 		mutex_exit(&un->un_pm_mutex);
6730 		mutex_exit(SD_MUTEX(un));
6731 		(void) untimeout(temp_id);
6732 		mutex_enter(SD_MUTEX(un));
6733 	} else {
6734 		mutex_exit(&un->un_pm_mutex);
6735 	}
6736 
6737 	if (un->un_retry_timeid != NULL) {
6738 		timeout_id_t temp_id = un->un_retry_timeid;
6739 		un->un_retry_timeid = NULL;
6740 		mutex_exit(SD_MUTEX(un));
6741 		(void) untimeout(temp_id);
6742 		mutex_enter(SD_MUTEX(un));
6743 	}
6744 
6745 	if (un->un_direct_priority_timeid != NULL) {
6746 		timeout_id_t temp_id = un->un_direct_priority_timeid;
6747 		un->un_direct_priority_timeid = NULL;
6748 		mutex_exit(SD_MUTEX(un));
6749 		(void) untimeout(temp_id);
6750 		mutex_enter(SD_MUTEX(un));
6751 	}
6752 
6753 	if (un->un_f_is_fibre == TRUE) {
6754 		/*
6755 		 * Remove callbacks for insert and remove events
6756 		 */
6757 		if (un->un_insert_event != NULL) {
6758 			mutex_exit(SD_MUTEX(un));
6759 			(void) ddi_remove_event_handler(un->un_insert_cb_id);
6760 			mutex_enter(SD_MUTEX(un));
6761 			un->un_insert_event = NULL;
6762 		}
6763 
6764 		if (un->un_remove_event != NULL) {
6765 			mutex_exit(SD_MUTEX(un));
6766 			(void) ddi_remove_event_handler(un->un_remove_cb_id);
6767 			mutex_enter(SD_MUTEX(un));
6768 			un->un_remove_event = NULL;
6769 		}
6770 	}
6771 
6772 	mutex_exit(SD_MUTEX(un));
6773 
6774 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
6775 
6776 	return (DDI_SUCCESS);
6777 }
6778 
6779 
6780 /*
6781  *    Function: sd_ddi_pm_suspend
6782  *
6783  * Description: Set the drive state to low power.
6784  *		Someone else is required to actually change the drive
6785  *		power level.
6786  *
6787  *   Arguments: un - driver soft state (unit) structure
6788  *
6789  * Return Code: DDI_FAILURE or DDI_SUCCESS
6790  *
6791  *     Context: Kernel thread context
6792  */
6793 
6794 static int
6795 sd_ddi_pm_suspend(struct sd_lun *un)
6796 {
6797 	ASSERT(un != NULL);
6798 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: entry\n");
6799 
6800 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6801 	mutex_enter(SD_MUTEX(un));
6802 
6803 	/*
6804 	 * Exit if power management is not enabled for this device, or if
6805 	 * the device is being used by HA.
6806 	 */
6807 	if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
6808 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
6809 		mutex_exit(SD_MUTEX(un));
6810 		SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exiting\n");
6811 		return (DDI_SUCCESS);
6812 	}
6813 
6814 	SD_INFO(SD_LOG_POWER, un, "sd_ddi_pm_suspend: un_ncmds_in_driver=%ld\n",
6815 	    un->un_ncmds_in_driver);
6816 
6817 	/*
6818 	 * See if the device is not busy, ie.:
6819 	 *    - we have no commands in the driver for this device
6820 	 *    - not waiting for resources
6821 	 */
6822 	if ((un->un_ncmds_in_driver == 0) &&
6823 	    (un->un_state != SD_STATE_RWAIT)) {
6824 		/*
6825 		 * The device is not busy, so it is OK to go to low power state.
6826 		 * Indicate low power, but rely on someone else to actually
6827 		 * change it.
6828 		 */
6829 		mutex_enter(&un->un_pm_mutex);
6830 		un->un_pm_count = -1;
6831 		mutex_exit(&un->un_pm_mutex);
6832 		un->un_power_level = SD_SPINDLE_OFF;
6833 	}
6834 
6835 	mutex_exit(SD_MUTEX(un));
6836 
6837 	SD_TRACE(SD_LOG_POWER, un, "sd_ddi_pm_suspend: exit\n");
6838 
6839 	return (DDI_SUCCESS);
6840 }
6841 
6842 
6843 /*
6844  *    Function: sd_ddi_resume
6845  *
6846  * Description: Performs system power-up operations..
6847  *
6848  * Return Code: DDI_SUCCESS
6849  *		DDI_FAILURE
6850  *
6851  *     Context: Kernel thread context
6852  */
6853 
6854 static int
6855 sd_ddi_resume(dev_info_t *devi)
6856 {
6857 	struct	sd_lun	*un;
6858 
6859 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6860 	if (un == NULL) {
6861 		return (DDI_FAILURE);
6862 	}
6863 
6864 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
6865 
6866 	mutex_enter(SD_MUTEX(un));
6867 	Restore_state(un);
6868 
6869 	/*
6870 	 * Restore the state which was saved to give the
6871 	 * the right state in un_last_state
6872 	 */
6873 	un->un_last_state = un->un_save_state;
6874 	/*
6875 	 * Note: throttle comes back at full.
6876 	 * Also note: this MUST be done before calling pm_raise_power
6877 	 * otherwise the system can get hung in biowait. The scenario where
6878 	 * this'll happen is under cpr suspend. Writing of the system
6879 	 * state goes through sddump, which writes 0 to un_throttle. If
6880 	 * writing the system state then fails, example if the partition is
6881 	 * too small, then cpr attempts a resume. If throttle isn't restored
6882 	 * from the saved value until after calling pm_raise_power then
6883 	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
6884 	 * in biowait.
6885 	 */
6886 	un->un_throttle = un->un_saved_throttle;
6887 
6888 	/*
6889 	 * The chance of failure is very rare as the only command done in power
6890 	 * entry point is START command when you transition from 0->1 or
6891 	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
6892 	 * which suspend was done. Ignore the return value as the resume should
6893 	 * not be failed. In the case of removable media the media need not be
6894 	 * inserted and hence there is a chance that raise power will fail with
6895 	 * media not present.
6896 	 */
6897 	if (un->un_f_attach_spinup) {
6898 		mutex_exit(SD_MUTEX(un));
6899 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
6900 		mutex_enter(SD_MUTEX(un));
6901 	}
6902 
6903 	/*
6904 	 * Don't broadcast to the suspend cv and therefore possibly
6905 	 * start I/O until after power has been restored.
6906 	 */
6907 	cv_broadcast(&un->un_suspend_cv);
6908 	cv_broadcast(&un->un_state_cv);
6909 
6910 	/* restart thread */
6911 	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
6912 		scsi_watch_resume(un->un_swr_token);
6913 	}
6914 
6915 #if (defined(__fibre))
6916 	if (un->un_f_is_fibre == TRUE) {
6917 		/*
6918 		 * Add callbacks for insert and remove events
6919 		 */
6920 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
6921 			sd_init_event_callbacks(un);
6922 		}
6923 	}
6924 #endif
6925 
6926 	/*
6927 	 * Transport any pending commands to the target.
6928 	 *
6929 	 * If this is a low-activity device commands in queue will have to wait
6930 	 * until new commands come in, which may take awhile. Also, we
6931 	 * specifically don't check un_ncmds_in_transport because we know that
6932 	 * there really are no commands in progress after the unit was
6933 	 * suspended and we could have reached the throttle level, been
6934 	 * suspended, and have no new commands coming in for awhile. Highly
6935 	 * unlikely, but so is the low-activity disk scenario.
6936 	 */
6937 	ddi_xbuf_dispatch(un->un_xbuf_attr);
6938 
6939 	sd_start_cmds(un, NULL);
6940 	mutex_exit(SD_MUTEX(un));
6941 
6942 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
6943 
6944 	return (DDI_SUCCESS);
6945 }
6946 
6947 
6948 /*
6949  *    Function: sd_ddi_pm_resume
6950  *
6951  * Description: Set the drive state to powered on.
6952  *		Someone else is required to actually change the drive
6953  *		power level.
6954  *
6955  *   Arguments: un - driver soft state (unit) structure
6956  *
6957  * Return Code: DDI_SUCCESS
6958  *
6959  *     Context: Kernel thread context
6960  */
6961 
6962 static int
6963 sd_ddi_pm_resume(struct sd_lun *un)
6964 {
6965 	ASSERT(un != NULL);
6966 
6967 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6968 	mutex_enter(SD_MUTEX(un));
6969 	un->un_power_level = SD_SPINDLE_ON;
6970 
6971 	ASSERT(!mutex_owned(&un->un_pm_mutex));
6972 	mutex_enter(&un->un_pm_mutex);
6973 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
6974 		un->un_pm_count++;
6975 		ASSERT(un->un_pm_count == 0);
6976 		/*
6977 		 * Note: no longer do the cv_broadcast on un_suspend_cv. The
6978 		 * un_suspend_cv is for a system resume, not a power management
6979 		 * device resume. (4297749)
6980 		 *	 cv_broadcast(&un->un_suspend_cv);
6981 		 */
6982 	}
6983 	mutex_exit(&un->un_pm_mutex);
6984 	mutex_exit(SD_MUTEX(un));
6985 
6986 	return (DDI_SUCCESS);
6987 }
6988 
6989 
6990 /*
6991  *    Function: sd_pm_idletimeout_handler
6992  *
6993  * Description: A timer routine that's active only while a device is busy.
6994  *		The purpose is to extend slightly the pm framework's busy
6995  *		view of the device to prevent busy/idle thrashing for
6996  *		back-to-back commands. Do this by comparing the current time
6997  *		to the time at which the last command completed and when the
6998  *		difference is greater than sd_pm_idletime, call
6999  *		pm_idle_component. In addition to indicating idle to the pm
7000  *		framework, update the chain type to again use the internal pm
7001  *		layers of the driver.
7002  *
7003  *   Arguments: arg - driver soft state (unit) structure
7004  *
7005  *     Context: Executes in a timeout(9F) thread context
7006  */
7007 
7008 static void
7009 sd_pm_idletimeout_handler(void *arg)
7010 {
7011 	struct sd_lun *un = arg;
7012 
7013 	time_t	now;
7014 
7015 	mutex_enter(&sd_detach_mutex);
7016 	if (un->un_detach_count != 0) {
7017 		/* Abort if the instance is detaching */
7018 		mutex_exit(&sd_detach_mutex);
7019 		return;
7020 	}
7021 	mutex_exit(&sd_detach_mutex);
7022 
7023 	now = ddi_get_time();
7024 	/*
7025 	 * Grab both mutexes, in the proper order, since we're accessing
7026 	 * both PM and softstate variables.
7027 	 */
7028 	mutex_enter(SD_MUTEX(un));
7029 	mutex_enter(&un->un_pm_mutex);
7030 	if (((now - un->un_pm_idle_time) > sd_pm_idletime) &&
7031 	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
7032 		/*
7033 		 * Update the chain types.
7034 		 * This takes affect on the next new command received.
7035 		 */
7036 		if (un->un_f_non_devbsize_supported) {
7037 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
7038 		} else {
7039 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
7040 		}
7041 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
7042 
7043 		SD_TRACE(SD_LOG_IO_PM, un,
7044 		    "sd_pm_idletimeout_handler: idling device\n");
7045 		(void) pm_idle_component(SD_DEVINFO(un), 0);
7046 		un->un_pm_idle_timeid = NULL;
7047 	} else {
7048 		un->un_pm_idle_timeid =
7049 			timeout(sd_pm_idletimeout_handler, un,
7050 			(drv_usectohz((clock_t)300000))); /* 300 ms. */
7051 	}
7052 	mutex_exit(&un->un_pm_mutex);
7053 	mutex_exit(SD_MUTEX(un));
7054 }
7055 
7056 
7057 /*
7058  *    Function: sd_pm_timeout_handler
7059  *
7060  * Description: Callback to tell framework we are idle.
7061  *
7062  *     Context: timeout(9f) thread context.
7063  */
7064 
7065 static void
7066 sd_pm_timeout_handler(void *arg)
7067 {
7068 	struct sd_lun *un = arg;
7069 
7070 	(void) pm_idle_component(SD_DEVINFO(un), 0);
7071 	mutex_enter(&un->un_pm_mutex);
7072 	un->un_pm_timeid = NULL;
7073 	mutex_exit(&un->un_pm_mutex);
7074 }
7075 
7076 
7077 /*
7078  *    Function: sdpower
7079  *
7080  * Description: PM entry point.
7081  *
7082  * Return Code: DDI_SUCCESS
7083  *		DDI_FAILURE
7084  *
7085  *     Context: Kernel thread context
7086  */
7087 
7088 static int
7089 sdpower(dev_info_t *devi, int component, int level)
7090 {
7091 	struct sd_lun	*un;
7092 	int		instance;
7093 	int		rval = DDI_SUCCESS;
7094 	uint_t		i, log_page_size, maxcycles, ncycles;
7095 	uchar_t		*log_page_data;
7096 	int		log_sense_page;
7097 	int		medium_present;
7098 	time_t		intvlp;
7099 	dev_t		dev;
7100 	struct pm_trans_data	sd_pm_tran_data;
7101 	uchar_t		save_state;
7102 	int		sval;
7103 	uchar_t		state_before_pm;
7104 	int		got_semaphore_here;
7105 
7106 	instance = ddi_get_instance(devi);
7107 
7108 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
7109 	    (SD_SPINDLE_OFF > level) || (level > SD_SPINDLE_ON) ||
7110 	    component != 0) {
7111 		return (DDI_FAILURE);
7112 	}
7113 
7114 	dev = sd_make_device(SD_DEVINFO(un));
7115 
7116 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
7117 
7118 	/*
7119 	 * Must synchronize power down with close.
7120 	 * Attempt to decrement/acquire the open/close semaphore,
7121 	 * but do NOT wait on it. If it's not greater than zero,
7122 	 * ie. it can't be decremented without waiting, then
7123 	 * someone else, either open or close, already has it
7124 	 * and the try returns 0. Use that knowledge here to determine
7125 	 * if it's OK to change the device power level.
7126 	 * Also, only increment it on exit if it was decremented, ie. gotten,
7127 	 * here.
7128 	 */
7129 	got_semaphore_here = sema_tryp(&un->un_semoclose);
7130 
7131 	mutex_enter(SD_MUTEX(un));
7132 
7133 	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
7134 	    un->un_ncmds_in_driver);
7135 
7136 	/*
7137 	 * If un_ncmds_in_driver is non-zero it indicates commands are
7138 	 * already being processed in the driver, or if the semaphore was
7139 	 * not gotten here it indicates an open or close is being processed.
7140 	 * At the same time somebody is requesting to go low power which
7141 	 * can't happen, therefore we need to return failure.
7142 	 */
7143 	if ((level == SD_SPINDLE_OFF) &&
7144 	    ((un->un_ncmds_in_driver != 0) || (got_semaphore_here == 0))) {
7145 		mutex_exit(SD_MUTEX(un));
7146 
7147 		if (got_semaphore_here != 0) {
7148 			sema_v(&un->un_semoclose);
7149 		}
7150 		SD_TRACE(SD_LOG_IO_PM, un,
7151 		    "sdpower: exit, device has queued cmds.\n");
7152 		return (DDI_FAILURE);
7153 	}
7154 
7155 	/*
7156 	 * if it is OFFLINE that means the disk is completely dead
7157 	 * in our case we have to put the disk in on or off by sending commands
7158 	 * Of course that will fail anyway so return back here.
7159 	 *
7160 	 * Power changes to a device that's OFFLINE or SUSPENDED
7161 	 * are not allowed.
7162 	 */
7163 	if ((un->un_state == SD_STATE_OFFLINE) ||
7164 	    (un->un_state == SD_STATE_SUSPENDED)) {
7165 		mutex_exit(SD_MUTEX(un));
7166 
7167 		if (got_semaphore_here != 0) {
7168 			sema_v(&un->un_semoclose);
7169 		}
7170 		SD_TRACE(SD_LOG_IO_PM, un,
7171 		    "sdpower: exit, device is off-line.\n");
7172 		return (DDI_FAILURE);
7173 	}
7174 
7175 	/*
7176 	 * Change the device's state to indicate it's power level
7177 	 * is being changed. Do this to prevent a power off in the
7178 	 * middle of commands, which is especially bad on devices
7179 	 * that are really powered off instead of just spun down.
7180 	 */
7181 	state_before_pm = un->un_state;
7182 	un->un_state = SD_STATE_PM_CHANGING;
7183 
7184 	mutex_exit(SD_MUTEX(un));
7185 
7186 	/*
7187 	 * If "pm-capable" property is set to TRUE by HBA drivers,
7188 	 * bypass the following checking, otherwise, check the log
7189 	 * sense information for this device
7190 	 */
7191 	if ((level == SD_SPINDLE_OFF) && un->un_f_log_sense_supported) {
7192 		/*
7193 		 * Get the log sense information to understand whether the
7194 		 * the powercycle counts have gone beyond the threshhold.
7195 		 */
7196 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
7197 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
7198 
7199 		mutex_enter(SD_MUTEX(un));
7200 		log_sense_page = un->un_start_stop_cycle_page;
7201 		mutex_exit(SD_MUTEX(un));
7202 
7203 		rval = sd_send_scsi_LOG_SENSE(un, log_page_data,
7204 		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
7205 #ifdef	SDDEBUG
7206 		if (sd_force_pm_supported) {
7207 			/* Force a successful result */
7208 			rval = 0;
7209 		}
7210 #endif
7211 		if (rval != 0) {
7212 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
7213 			    "Log Sense Failed\n");
7214 			kmem_free(log_page_data, log_page_size);
7215 			/* Cannot support power management on those drives */
7216 
7217 			if (got_semaphore_here != 0) {
7218 				sema_v(&un->un_semoclose);
7219 			}
7220 			/*
7221 			 * On exit put the state back to it's original value
7222 			 * and broadcast to anyone waiting for the power
7223 			 * change completion.
7224 			 */
7225 			mutex_enter(SD_MUTEX(un));
7226 			un->un_state = state_before_pm;
7227 			cv_broadcast(&un->un_suspend_cv);
7228 			mutex_exit(SD_MUTEX(un));
7229 			SD_TRACE(SD_LOG_IO_PM, un,
7230 			    "sdpower: exit, Log Sense Failed.\n");
7231 			return (DDI_FAILURE);
7232 		}
7233 
7234 		/*
7235 		 * From the page data - Convert the essential information to
7236 		 * pm_trans_data
7237 		 */
7238 		maxcycles =
7239 		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
7240 		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
7241 
7242 		sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
7243 
7244 		ncycles =
7245 		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
7246 		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
7247 
7248 		sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
7249 
7250 		for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
7251 			sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
7252 			    log_page_data[8+i];
7253 		}
7254 
7255 		kmem_free(log_page_data, log_page_size);
7256 
7257 		/*
7258 		 * Call pm_trans_check routine to get the Ok from
7259 		 * the global policy
7260 		 */
7261 
7262 		sd_pm_tran_data.format = DC_SCSI_FORMAT;
7263 		sd_pm_tran_data.un.scsi_cycles.flag = 0;
7264 
7265 		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
7266 #ifdef	SDDEBUG
7267 		if (sd_force_pm_supported) {
7268 			/* Force a successful result */
7269 			rval = 1;
7270 		}
7271 #endif
7272 		switch (rval) {
7273 		case 0:
7274 			/*
7275 			 * Not Ok to Power cycle or error in parameters passed
7276 			 * Would have given the advised time to consider power
7277 			 * cycle. Based on the new intvlp parameter we are
7278 			 * supposed to pretend we are busy so that pm framework
7279 			 * will never call our power entry point. Because of
7280 			 * that install a timeout handler and wait for the
7281 			 * recommended time to elapse so that power management
7282 			 * can be effective again.
7283 			 *
7284 			 * To effect this behavior, call pm_busy_component to
7285 			 * indicate to the framework this device is busy.
7286 			 * By not adjusting un_pm_count the rest of PM in
7287 			 * the driver will function normally, and independant
7288 			 * of this but because the framework is told the device
7289 			 * is busy it won't attempt powering down until it gets
7290 			 * a matching idle. The timeout handler sends this.
7291 			 * Note: sd_pm_entry can't be called here to do this
7292 			 * because sdpower may have been called as a result
7293 			 * of a call to pm_raise_power from within sd_pm_entry.
7294 			 *
7295 			 * If a timeout handler is already active then
7296 			 * don't install another.
7297 			 */
7298 			mutex_enter(&un->un_pm_mutex);
7299 			if (un->un_pm_timeid == NULL) {
7300 				un->un_pm_timeid =
7301 				    timeout(sd_pm_timeout_handler,
7302 				    un, intvlp * drv_usectohz(1000000));
7303 				mutex_exit(&un->un_pm_mutex);
7304 				(void) pm_busy_component(SD_DEVINFO(un), 0);
7305 			} else {
7306 				mutex_exit(&un->un_pm_mutex);
7307 			}
7308 			if (got_semaphore_here != 0) {
7309 				sema_v(&un->un_semoclose);
7310 			}
7311 			/*
7312 			 * On exit put the state back to it's original value
7313 			 * and broadcast to anyone waiting for the power
7314 			 * change completion.
7315 			 */
7316 			mutex_enter(SD_MUTEX(un));
7317 			un->un_state = state_before_pm;
7318 			cv_broadcast(&un->un_suspend_cv);
7319 			mutex_exit(SD_MUTEX(un));
7320 
7321 			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
7322 			    "trans check Failed, not ok to power cycle.\n");
7323 			return (DDI_FAILURE);
7324 
7325 		case -1:
7326 			if (got_semaphore_here != 0) {
7327 				sema_v(&un->un_semoclose);
7328 			}
7329 			/*
7330 			 * On exit put the state back to it's original value
7331 			 * and broadcast to anyone waiting for the power
7332 			 * change completion.
7333 			 */
7334 			mutex_enter(SD_MUTEX(un));
7335 			un->un_state = state_before_pm;
7336 			cv_broadcast(&un->un_suspend_cv);
7337 			mutex_exit(SD_MUTEX(un));
7338 			SD_TRACE(SD_LOG_IO_PM, un,
7339 			    "sdpower: exit, trans check command Failed.\n");
7340 			return (DDI_FAILURE);
7341 		}
7342 	}
7343 
7344 	if (level == SD_SPINDLE_OFF) {
7345 		/*
7346 		 * Save the last state... if the STOP FAILS we need it
7347 		 * for restoring
7348 		 */
7349 		mutex_enter(SD_MUTEX(un));
7350 		save_state = un->un_last_state;
7351 		/*
7352 		 * There must not be any cmds. getting processed
7353 		 * in the driver when we get here. Power to the
7354 		 * device is potentially going off.
7355 		 */
7356 		ASSERT(un->un_ncmds_in_driver == 0);
7357 		mutex_exit(SD_MUTEX(un));
7358 
7359 		/*
7360 		 * For now suspend the device completely before spindle is
7361 		 * turned off
7362 		 */
7363 		if ((rval = sd_ddi_pm_suspend(un)) == DDI_FAILURE) {
7364 			if (got_semaphore_here != 0) {
7365 				sema_v(&un->un_semoclose);
7366 			}
7367 			/*
7368 			 * On exit put the state back to it's original value
7369 			 * and broadcast to anyone waiting for the power
7370 			 * change completion.
7371 			 */
7372 			mutex_enter(SD_MUTEX(un));
7373 			un->un_state = state_before_pm;
7374 			cv_broadcast(&un->un_suspend_cv);
7375 			mutex_exit(SD_MUTEX(un));
7376 			SD_TRACE(SD_LOG_IO_PM, un,
7377 			    "sdpower: exit, PM suspend Failed.\n");
7378 			return (DDI_FAILURE);
7379 		}
7380 	}
7381 
7382 	/*
7383 	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
7384 	 * close, or strategy. Dump no long uses this routine, it uses it's
7385 	 * own code so it can be done in polled mode.
7386 	 */
7387 
7388 	medium_present = TRUE;
7389 
7390 	/*
7391 	 * When powering up, issue a TUR in case the device is at unit
7392 	 * attention.  Don't do retries. Bypass the PM layer, otherwise
7393 	 * a deadlock on un_pm_busy_cv will occur.
7394 	 */
7395 	if (level == SD_SPINDLE_ON) {
7396 		(void) sd_send_scsi_TEST_UNIT_READY(un,
7397 		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
7398 	}
7399 
7400 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
7401 	    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
7402 
7403 	sval = sd_send_scsi_START_STOP_UNIT(un,
7404 	    ((level == SD_SPINDLE_ON) ? SD_TARGET_START : SD_TARGET_STOP),
7405 	    SD_PATH_DIRECT);
7406 	/* Command failed, check for media present. */
7407 	if ((sval == ENXIO) && un->un_f_has_removable_media) {
7408 		medium_present = FALSE;
7409 	}
7410 
7411 	/*
7412 	 * The conditions of interest here are:
7413 	 *   if a spindle off with media present fails,
7414 	 *	then restore the state and return an error.
7415 	 *   else if a spindle on fails,
7416 	 *	then return an error (there's no state to restore).
7417 	 * In all other cases we setup for the new state
7418 	 * and return success.
7419 	 */
7420 	switch (level) {
7421 	case SD_SPINDLE_OFF:
7422 		if ((medium_present == TRUE) && (sval != 0)) {
7423 			/* The stop command from above failed */
7424 			rval = DDI_FAILURE;
7425 			/*
7426 			 * The stop command failed, and we have media
7427 			 * present. Put the level back by calling the
7428 			 * sd_pm_resume() and set the state back to
7429 			 * it's previous value.
7430 			 */
7431 			(void) sd_ddi_pm_resume(un);
7432 			mutex_enter(SD_MUTEX(un));
7433 			un->un_last_state = save_state;
7434 			mutex_exit(SD_MUTEX(un));
7435 			break;
7436 		}
7437 		/*
7438 		 * The stop command from above succeeded.
7439 		 */
7440 		if (un->un_f_monitor_media_state) {
7441 			/*
7442 			 * Terminate watch thread in case of removable media
7443 			 * devices going into low power state. This is as per
7444 			 * the requirements of pm framework, otherwise commands
7445 			 * will be generated for the device (through watch
7446 			 * thread), even when the device is in low power state.
7447 			 */
7448 			mutex_enter(SD_MUTEX(un));
7449 			un->un_f_watcht_stopped = FALSE;
7450 			if (un->un_swr_token != NULL) {
7451 				opaque_t temp_token = un->un_swr_token;
7452 				un->un_f_watcht_stopped = TRUE;
7453 				un->un_swr_token = NULL;
7454 				mutex_exit(SD_MUTEX(un));
7455 				(void) scsi_watch_request_terminate(temp_token,
7456 				    SCSI_WATCH_TERMINATE_WAIT);
7457 			} else {
7458 				mutex_exit(SD_MUTEX(un));
7459 			}
7460 		}
7461 		break;
7462 
7463 	default:	/* The level requested is spindle on... */
7464 		/*
7465 		 * Legacy behavior: return success on a failed spinup
7466 		 * if there is no media in the drive.
7467 		 * Do this by looking at medium_present here.
7468 		 */
7469 		if ((sval != 0) && medium_present) {
7470 			/* The start command from above failed */
7471 			rval = DDI_FAILURE;
7472 			break;
7473 		}
7474 		/*
7475 		 * The start command from above succeeded
7476 		 * Resume the devices now that we have
7477 		 * started the disks
7478 		 */
7479 		(void) sd_ddi_pm_resume(un);
7480 
7481 		/*
7482 		 * Resume the watch thread since it was suspended
7483 		 * when the device went into low power mode.
7484 		 */
7485 		if (un->un_f_monitor_media_state) {
7486 			mutex_enter(SD_MUTEX(un));
7487 			if (un->un_f_watcht_stopped == TRUE) {
7488 				opaque_t temp_token;
7489 
7490 				un->un_f_watcht_stopped = FALSE;
7491 				mutex_exit(SD_MUTEX(un));
7492 				temp_token = scsi_watch_request_submit(
7493 				    SD_SCSI_DEVP(un),
7494 				    sd_check_media_time,
7495 				    SENSE_LENGTH, sd_media_watch_cb,
7496 				    (caddr_t)dev);
7497 				mutex_enter(SD_MUTEX(un));
7498 				un->un_swr_token = temp_token;
7499 			}
7500 			mutex_exit(SD_MUTEX(un));
7501 		}
7502 	}
7503 	if (got_semaphore_here != 0) {
7504 		sema_v(&un->un_semoclose);
7505 	}
7506 	/*
7507 	 * On exit put the state back to it's original value
7508 	 * and broadcast to anyone waiting for the power
7509 	 * change completion.
7510 	 */
7511 	mutex_enter(SD_MUTEX(un));
7512 	un->un_state = state_before_pm;
7513 	cv_broadcast(&un->un_suspend_cv);
7514 	mutex_exit(SD_MUTEX(un));
7515 
7516 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
7517 
7518 	return (rval);
7519 }
7520 
7521 
7522 
7523 /*
7524  *    Function: sdattach
7525  *
7526  * Description: Driver's attach(9e) entry point function.
7527  *
7528  *   Arguments: devi - opaque device info handle
7529  *		cmd  - attach  type
7530  *
7531  * Return Code: DDI_SUCCESS
7532  *		DDI_FAILURE
7533  *
7534  *     Context: Kernel thread context
7535  */
7536 
7537 static int
7538 sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
7539 {
7540 	switch (cmd) {
7541 	case DDI_ATTACH:
7542 		return (sd_unit_attach(devi));
7543 	case DDI_RESUME:
7544 		return (sd_ddi_resume(devi));
7545 	default:
7546 		break;
7547 	}
7548 	return (DDI_FAILURE);
7549 }
7550 
7551 
7552 /*
7553  *    Function: sddetach
7554  *
7555  * Description: Driver's detach(9E) entry point function.
7556  *
7557  *   Arguments: devi - opaque device info handle
7558  *		cmd  - detach  type
7559  *
7560  * Return Code: DDI_SUCCESS
7561  *		DDI_FAILURE
7562  *
7563  *     Context: Kernel thread context
7564  */
7565 
7566 static int
7567 sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
7568 {
7569 	switch (cmd) {
7570 	case DDI_DETACH:
7571 		return (sd_unit_detach(devi));
7572 	case DDI_SUSPEND:
7573 		return (sd_ddi_suspend(devi));
7574 	default:
7575 		break;
7576 	}
7577 	return (DDI_FAILURE);
7578 }
7579 
7580 
7581 /*
7582  *     Function: sd_sync_with_callback
7583  *
7584  *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
7585  *		 state while the callback routine is active.
7586  *
7587  *    Arguments: un: softstate structure for the instance
7588  *
7589  *	Context: Kernel thread context
7590  */
7591 
7592 static void
7593 sd_sync_with_callback(struct sd_lun *un)
7594 {
7595 	ASSERT(un != NULL);
7596 
7597 	mutex_enter(SD_MUTEX(un));
7598 
7599 	ASSERT(un->un_in_callback >= 0);
7600 
7601 	while (un->un_in_callback > 0) {
7602 		mutex_exit(SD_MUTEX(un));
7603 		delay(2);
7604 		mutex_enter(SD_MUTEX(un));
7605 	}
7606 
7607 	mutex_exit(SD_MUTEX(un));
7608 }
7609 
7610 /*
7611  *    Function: sd_unit_attach
7612  *
7613  * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
7614  *		the soft state structure for the device and performs
7615  *		all necessary structure and device initializations.
7616  *
7617  *   Arguments: devi: the system's dev_info_t for the device.
7618  *
7619  * Return Code: DDI_SUCCESS if attach is successful.
7620  *		DDI_FAILURE if any part of the attach fails.
7621  *
7622  *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
7623  *		Kernel thread context only.  Can sleep.
7624  */
7625 
7626 static int
7627 sd_unit_attach(dev_info_t *devi)
7628 {
7629 	struct	scsi_device	*devp;
7630 	struct	sd_lun		*un;
7631 	char			*variantp;
7632 	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
7633 	int	instance;
7634 	int	rval;
7635 	int	wc_enabled;
7636 	uint64_t	capacity;
7637 	uint_t		lbasize;
7638 
7639 	/*
7640 	 * Retrieve the target driver's private data area. This was set
7641 	 * up by the HBA.
7642 	 */
7643 	devp = ddi_get_driver_private(devi);
7644 
7645 	/*
7646 	 * Since we have no idea what state things were left in by the last
7647 	 * user of the device, set up some 'default' settings, ie. turn 'em
7648 	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
7649 	 * Do this before the scsi_probe, which sends an inquiry.
7650 	 * This is a fix for bug (4430280).
7651 	 * Of special importance is wide-xfer. The drive could have been left
7652 	 * in wide transfer mode by the last driver to communicate with it,
7653 	 * this includes us. If that's the case, and if the following is not
7654 	 * setup properly or we don't re-negotiate with the drive prior to
7655 	 * transferring data to/from the drive, it causes bus parity errors,
7656 	 * data overruns, and unexpected interrupts. This first occurred when
7657 	 * the fix for bug (4378686) was made.
7658 	 */
7659 	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
7660 	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
7661 	(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
7662 	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
7663 
7664 	/*
7665 	 * Use scsi_probe() to issue an INQUIRY command to the device.
7666 	 * This call will allocate and fill in the scsi_inquiry structure
7667 	 * and point the sd_inq member of the scsi_device structure to it.
7668 	 * If the attach succeeds, then this memory will not be de-allocated
7669 	 * (via scsi_unprobe()) until the instance is detached.
7670 	 */
7671 	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
7672 		goto probe_failed;
7673 	}
7674 
7675 	/*
7676 	 * Check the device type as specified in the inquiry data and
7677 	 * claim it if it is of a type that we support.
7678 	 */
7679 	switch (devp->sd_inq->inq_dtype) {
7680 	case DTYPE_DIRECT:
7681 		break;
7682 	case DTYPE_RODIRECT:
7683 		break;
7684 	case DTYPE_OPTICAL:
7685 		break;
7686 	case DTYPE_NOTPRESENT:
7687 	default:
7688 		/* Unsupported device type; fail the attach. */
7689 		goto probe_failed;
7690 	}
7691 
7692 	/*
7693 	 * Allocate the soft state structure for this unit.
7694 	 *
7695 	 * We rely upon this memory being set to all zeroes by
7696 	 * ddi_soft_state_zalloc().  We assume that any member of the
7697 	 * soft state structure that is not explicitly initialized by
7698 	 * this routine will have a value of zero.
7699 	 */
7700 	instance = ddi_get_instance(devp->sd_dev);
7701 	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
7702 		goto probe_failed;
7703 	}
7704 
7705 	/*
7706 	 * Retrieve a pointer to the newly-allocated soft state.
7707 	 *
7708 	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
7709 	 * was successful, unless something has gone horribly wrong and the
7710 	 * ddi's soft state internals are corrupt (in which case it is
7711 	 * probably better to halt here than just fail the attach....)
7712 	 */
7713 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
7714 		panic("sd_unit_attach: NULL soft state on instance:0x%x",
7715 		    instance);
7716 		/*NOTREACHED*/
7717 	}
7718 
7719 	/*
7720 	 * Link the back ptr of the driver soft state to the scsi_device
7721 	 * struct for this lun.
7722 	 * Save a pointer to the softstate in the driver-private area of
7723 	 * the scsi_device struct.
7724 	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
7725 	 * we first set un->un_sd below.
7726 	 */
7727 	un->un_sd = devp;
7728 	devp->sd_private = (opaque_t)un;
7729 
7730 	/*
7731 	 * The following must be after devp is stored in the soft state struct.
7732 	 */
7733 #ifdef SDDEBUG
7734 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7735 	    "%s_unit_attach: un:0x%p instance:%d\n",
7736 	    ddi_driver_name(devi), un, instance);
7737 #endif
7738 
7739 	/*
7740 	 * Set up the device type and node type (for the minor nodes).
7741 	 * By default we assume that the device can at least support the
7742 	 * Common Command Set. Call it a CD-ROM if it reports itself
7743 	 * as a RODIRECT device.
7744 	 */
7745 	switch (devp->sd_inq->inq_dtype) {
7746 	case DTYPE_RODIRECT:
7747 		un->un_node_type = DDI_NT_CD_CHAN;
7748 		un->un_ctype	 = CTYPE_CDROM;
7749 		break;
7750 	case DTYPE_OPTICAL:
7751 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7752 		un->un_ctype	 = CTYPE_ROD;
7753 		break;
7754 	default:
7755 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7756 		un->un_ctype	 = CTYPE_CCS;
7757 		break;
7758 	}
7759 
7760 	/*
7761 	 * Try to read the interconnect type from the HBA.
7762 	 *
7763 	 * Note: This driver is currently compiled as two binaries, a parallel
7764 	 * scsi version (sd) and a fibre channel version (ssd). All functional
7765 	 * differences are determined at compile time. In the future a single
7766 	 * binary will be provided and the inteconnect type will be used to
7767 	 * differentiate between fibre and parallel scsi behaviors. At that time
7768 	 * it will be necessary for all fibre channel HBAs to support this
7769 	 * property.
7770 	 *
7771 	 * set un_f_is_fiber to TRUE ( default fiber )
7772 	 */
7773 	un->un_f_is_fibre = TRUE;
7774 	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
7775 	case INTERCONNECT_SSA:
7776 		un->un_interconnect_type = SD_INTERCONNECT_SSA;
7777 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7778 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
7779 		break;
7780 	case INTERCONNECT_PARALLEL:
7781 		un->un_f_is_fibre = FALSE;
7782 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7783 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7784 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
7785 		break;
7786 	case INTERCONNECT_FIBRE:
7787 		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
7788 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7789 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
7790 		break;
7791 	case INTERCONNECT_FABRIC:
7792 		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
7793 		un->un_node_type = DDI_NT_BLOCK_FABRIC;
7794 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7795 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
7796 		break;
7797 	default:
7798 #ifdef SD_DEFAULT_INTERCONNECT_TYPE
7799 		/*
7800 		 * The HBA does not support the "interconnect-type" property
7801 		 * (or did not provide a recognized type).
7802 		 *
7803 		 * Note: This will be obsoleted when a single fibre channel
7804 		 * and parallel scsi driver is delivered. In the meantime the
7805 		 * interconnect type will be set to the platform default.If that
7806 		 * type is not parallel SCSI, it means that we should be
7807 		 * assuming "ssd" semantics. However, here this also means that
7808 		 * the FC HBA is not supporting the "interconnect-type" property
7809 		 * like we expect it to, so log this occurrence.
7810 		 */
7811 		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
7812 		if (!SD_IS_PARALLEL_SCSI(un)) {
7813 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7814 			    "sd_unit_attach: un:0x%p Assuming "
7815 			    "INTERCONNECT_FIBRE\n", un);
7816 		} else {
7817 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7818 			    "sd_unit_attach: un:0x%p Assuming "
7819 			    "INTERCONNECT_PARALLEL\n", un);
7820 			un->un_f_is_fibre = FALSE;
7821 		}
7822 #else
7823 		/*
7824 		 * Note: This source will be implemented when a single fibre
7825 		 * channel and parallel scsi driver is delivered. The default
7826 		 * will be to assume that if a device does not support the
7827 		 * "interconnect-type" property it is a parallel SCSI HBA and
7828 		 * we will set the interconnect type for parallel scsi.
7829 		 */
7830 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7831 		un->un_f_is_fibre = FALSE;
7832 #endif
7833 		break;
7834 	}
7835 
7836 	if (un->un_f_is_fibre == TRUE) {
7837 		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
7838 			SCSI_VERSION_3) {
7839 			switch (un->un_interconnect_type) {
7840 			case SD_INTERCONNECT_FIBRE:
7841 			case SD_INTERCONNECT_SSA:
7842 				un->un_node_type = DDI_NT_BLOCK_WWN;
7843 				break;
7844 			default:
7845 				break;
7846 			}
7847 		}
7848 	}
7849 
7850 	/*
7851 	 * Initialize the Request Sense command for the target
7852 	 */
7853 	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
7854 		goto alloc_rqs_failed;
7855 	}
7856 
7857 	/*
7858 	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
7859 	 * with seperate binary for sd and ssd.
7860 	 *
7861 	 * x86 has 1 binary, un_retry_count is set base on connection type.
7862 	 * The hardcoded values will go away when Sparc uses 1 binary
7863 	 * for sd and ssd.  This hardcoded values need to match
7864 	 * SD_RETRY_COUNT in sddef.h
7865 	 * The value used is base on interconnect type.
7866 	 * fibre = 3, parallel = 5
7867 	 */
7868 #if defined(__i386) || defined(__amd64)
7869 	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
7870 #else
7871 	un->un_retry_count = SD_RETRY_COUNT;
7872 #endif
7873 
7874 	/*
7875 	 * Set the per disk retry count to the default number of retries
7876 	 * for disks and CDROMs. This value can be overridden by the
7877 	 * disk property list or an entry in sd.conf.
7878 	 */
7879 	un->un_notready_retry_count =
7880 	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
7881 			: DISK_NOT_READY_RETRY_COUNT(un);
7882 
7883 	/*
7884 	 * Set the busy retry count to the default value of un_retry_count.
7885 	 * This can be overridden by entries in sd.conf or the device
7886 	 * config table.
7887 	 */
7888 	un->un_busy_retry_count = un->un_retry_count;
7889 
7890 	/*
7891 	 * Init the reset threshold for retries.  This number determines
7892 	 * how many retries must be performed before a reset can be issued
7893 	 * (for certain error conditions). This can be overridden by entries
7894 	 * in sd.conf or the device config table.
7895 	 */
7896 	un->un_reset_retry_count = (un->un_retry_count / 2);
7897 
7898 	/*
7899 	 * Set the victim_retry_count to the default un_retry_count
7900 	 */
7901 	un->un_victim_retry_count = (2 * un->un_retry_count);
7902 
7903 	/*
7904 	 * Set the reservation release timeout to the default value of
7905 	 * 5 seconds. This can be overridden by entries in ssd.conf or the
7906 	 * device config table.
7907 	 */
7908 	un->un_reserve_release_time = 5;
7909 
7910 	/*
7911 	 * Set up the default maximum transfer size. Note that this may
7912 	 * get updated later in the attach, when setting up default wide
7913 	 * operations for disks.
7914 	 */
7915 #if defined(__i386) || defined(__amd64)
7916 	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
7917 #else
7918 	un->un_max_xfer_size = (uint_t)maxphys;
7919 #endif
7920 
7921 	/*
7922 	 * Get "allow bus device reset" property (defaults to "enabled" if
7923 	 * the property was not defined). This is to disable bus resets for
7924 	 * certain kinds of error recovery. Note: In the future when a run-time
7925 	 * fibre check is available the soft state flag should default to
7926 	 * enabled.
7927 	 */
7928 	if (un->un_f_is_fibre == TRUE) {
7929 		un->un_f_allow_bus_device_reset = TRUE;
7930 	} else {
7931 		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
7932 			"allow-bus-device-reset", 1) != 0) {
7933 			un->un_f_allow_bus_device_reset = TRUE;
7934 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7935 			"sd_unit_attach: un:0x%p Bus device reset enabled\n",
7936 				un);
7937 		} else {
7938 			un->un_f_allow_bus_device_reset = FALSE;
7939 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7940 			"sd_unit_attach: un:0x%p Bus device reset disabled\n",
7941 				un);
7942 		}
7943 	}
7944 
7945 	/*
7946 	 * Check if this is an ATAPI device. ATAPI devices use Group 1
7947 	 * Read/Write commands and Group 2 Mode Sense/Select commands.
7948 	 *
7949 	 * Note: The "obsolete" way of doing this is to check for the "atapi"
7950 	 * property. The new "variant" property with a value of "atapi" has been
7951 	 * introduced so that future 'variants' of standard SCSI behavior (like
7952 	 * atapi) could be specified by the underlying HBA drivers by supplying
7953 	 * a new value for the "variant" property, instead of having to define a
7954 	 * new property.
7955 	 */
7956 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
7957 		un->un_f_cfg_is_atapi = TRUE;
7958 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7959 		    "sd_unit_attach: un:0x%p Atapi device\n", un);
7960 	}
7961 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
7962 	    &variantp) == DDI_PROP_SUCCESS) {
7963 		if (strcmp(variantp, "atapi") == 0) {
7964 			un->un_f_cfg_is_atapi = TRUE;
7965 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7966 			    "sd_unit_attach: un:0x%p Atapi device\n", un);
7967 		}
7968 		ddi_prop_free(variantp);
7969 	}
7970 
7971 	un->un_cmd_timeout	= SD_IO_TIME;
7972 
7973 	/* Info on current states, statuses, etc. (Updated frequently) */
7974 	un->un_state		= SD_STATE_NORMAL;
7975 	un->un_last_state	= SD_STATE_NORMAL;
7976 
7977 	/* Control & status info for command throttling */
7978 	un->un_throttle		= sd_max_throttle;
7979 	un->un_saved_throttle	= sd_max_throttle;
7980 	un->un_min_throttle	= sd_min_throttle;
7981 
7982 	if (un->un_f_is_fibre == TRUE) {
7983 		un->un_f_use_adaptive_throttle = TRUE;
7984 	} else {
7985 		un->un_f_use_adaptive_throttle = FALSE;
7986 	}
7987 
7988 	/* Removable media support. */
7989 	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
7990 	un->un_mediastate		= DKIO_NONE;
7991 	un->un_specified_mediastate	= DKIO_NONE;
7992 
7993 	/* CVs for suspend/resume (PM or DR) */
7994 	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
7995 	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
7996 
7997 	/* Power management support. */
7998 	un->un_power_level = SD_SPINDLE_UNINIT;
7999 
8000 	cv_init(&un->un_wcc_cv,   NULL, CV_DRIVER, NULL);
8001 	un->un_f_wcc_inprog = 0;
8002 
8003 	/*
8004 	 * The open/close semaphore is used to serialize threads executing
8005 	 * in the driver's open & close entry point routines for a given
8006 	 * instance.
8007 	 */
8008 	(void) sema_init(&un->un_semoclose, 1, NULL, SEMA_DRIVER, NULL);
8009 
8010 	/*
8011 	 * The conf file entry and softstate variable is a forceful override,
8012 	 * meaning a non-zero value must be entered to change the default.
8013 	 */
8014 	un->un_f_disksort_disabled = FALSE;
8015 
8016 	/*
8017 	 * Retrieve the properties from the static driver table or the driver
8018 	 * configuration file (.conf) for this unit and update the soft state
8019 	 * for the device as needed for the indicated properties.
8020 	 * Note: the property configuration needs to occur here as some of the
8021 	 * following routines may have dependancies on soft state flags set
8022 	 * as part of the driver property configuration.
8023 	 */
8024 	sd_read_unit_properties(un);
8025 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8026 	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
8027 
8028 	/*
8029 	 * Only if a device has "hotpluggable" property, it is
8030 	 * treated as hotpluggable device. Otherwise, it is
8031 	 * regarded as non-hotpluggable one.
8032 	 */
8033 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "hotpluggable",
8034 	    -1) != -1) {
8035 		un->un_f_is_hotpluggable = TRUE;
8036 	}
8037 
8038 	/*
8039 	 * set unit's attributes(flags) according to "hotpluggable" and
8040 	 * RMB bit in INQUIRY data.
8041 	 */
8042 	sd_set_unit_attributes(un, devi);
8043 
8044 	/*
8045 	 * By default, we mark the capacity, lbasize, and geometry
8046 	 * as invalid. Only if we successfully read a valid capacity
8047 	 * will we update the un_blockcount and un_tgt_blocksize with the
8048 	 * valid values (the geometry will be validated later).
8049 	 */
8050 	un->un_f_blockcount_is_valid	= FALSE;
8051 	un->un_f_tgt_blocksize_is_valid	= FALSE;
8052 	un->un_f_geometry_is_valid	= FALSE;
8053 
8054 	/*
8055 	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
8056 	 * otherwise.
8057 	 */
8058 	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
8059 	un->un_blockcount = 0;
8060 
8061 	/*
8062 	 * Set up the per-instance info needed to determine the correct
8063 	 * CDBs and other info for issuing commands to the target.
8064 	 */
8065 	sd_init_cdb_limits(un);
8066 
8067 	/*
8068 	 * Set up the IO chains to use, based upon the target type.
8069 	 */
8070 	if (un->un_f_non_devbsize_supported) {
8071 		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
8072 	} else {
8073 		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
8074 	}
8075 	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
8076 	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
8077 	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
8078 
8079 	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
8080 	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
8081 	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
8082 	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
8083 
8084 
8085 	if (ISCD(un)) {
8086 		un->un_additional_codes = sd_additional_codes;
8087 	} else {
8088 		un->un_additional_codes = NULL;
8089 	}
8090 
8091 	/*
8092 	 * Create the kstats here so they can be available for attach-time
8093 	 * routines that send commands to the unit (either polled or via
8094 	 * sd_send_scsi_cmd).
8095 	 *
8096 	 * Note: This is a critical sequence that needs to be maintained:
8097 	 *	1) Instantiate the kstats here, before any routines using the
8098 	 *	   iopath (i.e. sd_send_scsi_cmd).
8099 	 *	2) Initialize the error stats (sd_set_errstats) and partition
8100 	 *	   stats (sd_set_pstats), following sd_validate_geometry(),
8101 	 *	   sd_register_devid(), and sd_cache_control().
8102 	 */
8103 
8104 	un->un_stats = kstat_create(sd_label, instance,
8105 	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
8106 	if (un->un_stats != NULL) {
8107 		un->un_stats->ks_lock = SD_MUTEX(un);
8108 		kstat_install(un->un_stats);
8109 	}
8110 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8111 	    "sd_unit_attach: un:0x%p un_stats created\n", un);
8112 
8113 	sd_create_errstats(un, instance);
8114 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8115 	    "sd_unit_attach: un:0x%p errstats created\n", un);
8116 
8117 	/*
8118 	 * The following if/else code was relocated here from below as part
8119 	 * of the fix for bug (4430280). However with the default setup added
8120 	 * on entry to this routine, it's no longer absolutely necessary for
8121 	 * this to be before the call to sd_spin_up_unit.
8122 	 */
8123 	if (SD_IS_PARALLEL_SCSI(un)) {
8124 		/*
8125 		 * If SCSI-2 tagged queueing is supported by the target
8126 		 * and by the host adapter then we will enable it.
8127 		 */
8128 		un->un_tagflags = 0;
8129 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) &&
8130 		    (devp->sd_inq->inq_cmdque) &&
8131 		    (un->un_f_arq_enabled == TRUE)) {
8132 			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
8133 			    1, 1) == 1) {
8134 				un->un_tagflags = FLAG_STAG;
8135 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8136 				    "sd_unit_attach: un:0x%p tag queueing "
8137 				    "enabled\n", un);
8138 			} else if (scsi_ifgetcap(SD_ADDRESS(un),
8139 			    "untagged-qing", 0) == 1) {
8140 				un->un_f_opt_queueing = TRUE;
8141 				un->un_saved_throttle = un->un_throttle =
8142 				    min(un->un_throttle, 3);
8143 			} else {
8144 				un->un_f_opt_queueing = FALSE;
8145 				un->un_saved_throttle = un->un_throttle = 1;
8146 			}
8147 		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
8148 		    == 1) && (un->un_f_arq_enabled == TRUE)) {
8149 			/* The Host Adapter supports internal queueing. */
8150 			un->un_f_opt_queueing = TRUE;
8151 			un->un_saved_throttle = un->un_throttle =
8152 			    min(un->un_throttle, 3);
8153 		} else {
8154 			un->un_f_opt_queueing = FALSE;
8155 			un->un_saved_throttle = un->un_throttle = 1;
8156 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8157 			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
8158 		}
8159 
8160 
8161 		/* Setup or tear down default wide operations for disks */
8162 
8163 		/*
8164 		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
8165 		 * and "ssd_max_xfer_size" to exist simultaneously on the same
8166 		 * system and be set to different values. In the future this
8167 		 * code may need to be updated when the ssd module is
8168 		 * obsoleted and removed from the system. (4299588)
8169 		 */
8170 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) &&
8171 		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
8172 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
8173 			    1, 1) == 1) {
8174 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8175 				    "sd_unit_attach: un:0x%p Wide Transfer "
8176 				    "enabled\n", un);
8177 			}
8178 
8179 			/*
8180 			 * If tagged queuing has also been enabled, then
8181 			 * enable large xfers
8182 			 */
8183 			if (un->un_saved_throttle == sd_max_throttle) {
8184 				un->un_max_xfer_size =
8185 				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8186 				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
8187 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8188 				    "sd_unit_attach: un:0x%p max transfer "
8189 				    "size=0x%x\n", un, un->un_max_xfer_size);
8190 			}
8191 		} else {
8192 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
8193 			    0, 1) == 1) {
8194 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8195 				    "sd_unit_attach: un:0x%p "
8196 				    "Wide Transfer disabled\n", un);
8197 			}
8198 		}
8199 	} else {
8200 		un->un_tagflags = FLAG_STAG;
8201 		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
8202 		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
8203 	}
8204 
8205 	/*
8206 	 * If this target supports LUN reset, try to enable it.
8207 	 */
8208 	if (un->un_f_lun_reset_enabled) {
8209 		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
8210 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
8211 			    "un:0x%p lun_reset capability set\n", un);
8212 		} else {
8213 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
8214 			    "un:0x%p lun-reset capability not set\n", un);
8215 		}
8216 	}
8217 
8218 	/*
8219 	 * At this point in the attach, we have enough info in the
8220 	 * soft state to be able to issue commands to the target.
8221 	 *
8222 	 * All command paths used below MUST issue their commands as
8223 	 * SD_PATH_DIRECT. This is important as intermediate layers
8224 	 * are not all initialized yet (such as PM).
8225 	 */
8226 
8227 	/*
8228 	 * Send a TEST UNIT READY command to the device. This should clear
8229 	 * any outstanding UNIT ATTENTION that may be present.
8230 	 *
8231 	 * Note: Don't check for success, just track if there is a reservation,
8232 	 * this is a throw away command to clear any unit attentions.
8233 	 *
8234 	 * Note: This MUST be the first command issued to the target during
8235 	 * attach to ensure power on UNIT ATTENTIONS are cleared.
8236 	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
8237 	 * with attempts at spinning up a device with no media.
8238 	 */
8239 	if (sd_send_scsi_TEST_UNIT_READY(un, SD_DONT_RETRY_TUR) == EACCES) {
8240 		reservation_flag = SD_TARGET_IS_RESERVED;
8241 	}
8242 
8243 	/*
8244 	 * If the device is NOT a removable media device, attempt to spin
8245 	 * it up (using the START_STOP_UNIT command) and read its capacity
8246 	 * (using the READ CAPACITY command).  Note, however, that either
8247 	 * of these could fail and in some cases we would continue with
8248 	 * the attach despite the failure (see below).
8249 	 */
8250 	if (un->un_f_descr_format_supported) {
8251 		switch (sd_spin_up_unit(un)) {
8252 		case 0:
8253 			/*
8254 			 * Spin-up was successful; now try to read the
8255 			 * capacity.  If successful then save the results
8256 			 * and mark the capacity & lbasize as valid.
8257 			 */
8258 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8259 			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
8260 
8261 			switch (sd_send_scsi_READ_CAPACITY(un, &capacity,
8262 			    &lbasize, SD_PATH_DIRECT)) {
8263 			case 0: {
8264 				if (capacity > DK_MAX_BLOCKS) {
8265 #ifdef _LP64
8266 					/*
8267 					 * Enable descriptor format sense data
8268 					 * so that we can get 64 bit sense
8269 					 * data fields.
8270 					 */
8271 					sd_enable_descr_sense(un);
8272 #else
8273 					/* 32-bit kernels can't handle this */
8274 					scsi_log(SD_DEVINFO(un),
8275 					    sd_label, CE_WARN,
8276 					    "disk has %llu blocks, which "
8277 					    "is too large for a 32-bit "
8278 					    "kernel", capacity);
8279 					goto spinup_failed;
8280 #endif
8281 				}
8282 				/*
8283 				 * The following relies on
8284 				 * sd_send_scsi_READ_CAPACITY never
8285 				 * returning 0 for capacity and/or lbasize.
8286 				 */
8287 				sd_update_block_info(un, lbasize, capacity);
8288 
8289 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8290 				    "sd_unit_attach: un:0x%p capacity = %ld "
8291 				    "blocks; lbasize= %ld.\n", un,
8292 				    un->un_blockcount, un->un_tgt_blocksize);
8293 
8294 				break;
8295 			}
8296 			case EACCES:
8297 				/*
8298 				 * Should never get here if the spin-up
8299 				 * succeeded, but code it in anyway.
8300 				 * From here, just continue with the attach...
8301 				 */
8302 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8303 				    "sd_unit_attach: un:0x%p "
8304 				    "sd_send_scsi_READ_CAPACITY "
8305 				    "returned reservation conflict\n", un);
8306 				reservation_flag = SD_TARGET_IS_RESERVED;
8307 				break;
8308 			default:
8309 				/*
8310 				 * Likewise, should never get here if the
8311 				 * spin-up succeeded. Just continue with
8312 				 * the attach...
8313 				 */
8314 				break;
8315 			}
8316 			break;
8317 		case EACCES:
8318 			/*
8319 			 * Device is reserved by another host.  In this case
8320 			 * we could not spin it up or read the capacity, but
8321 			 * we continue with the attach anyway.
8322 			 */
8323 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8324 			    "sd_unit_attach: un:0x%p spin-up reservation "
8325 			    "conflict.\n", un);
8326 			reservation_flag = SD_TARGET_IS_RESERVED;
8327 			break;
8328 		default:
8329 			/* Fail the attach if the spin-up failed. */
8330 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8331 			    "sd_unit_attach: un:0x%p spin-up failed.", un);
8332 			goto spinup_failed;
8333 		}
8334 	}
8335 
8336 	/*
8337 	 * Check to see if this is a MMC drive
8338 	 */
8339 	if (ISCD(un)) {
8340 		sd_set_mmc_caps(un);
8341 	}
8342 
8343 	/*
8344 	 * Create the minor nodes for the device.
8345 	 * Note: If we want to support fdisk on both sparc and intel, this will
8346 	 * have to separate out the notion that VTOC8 is always sparc, and
8347 	 * VTOC16 is always intel (tho these can be the defaults).  The vtoc
8348 	 * type will have to be determined at run-time, and the fdisk
8349 	 * partitioning will have to have been read & set up before we
8350 	 * create the minor nodes. (any other inits (such as kstats) that
8351 	 * also ought to be done before creating the minor nodes?) (Doesn't
8352 	 * setting up the minor nodes kind of imply that we're ready to
8353 	 * handle an open from userland?)
8354 	 */
8355 	if (sd_create_minor_nodes(un, devi) != DDI_SUCCESS) {
8356 		goto create_minor_nodes_failed;
8357 	}
8358 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8359 	    "sd_unit_attach: un:0x%p minor nodes created\n", un);
8360 
8361 	/*
8362 	 * Add a zero-length attribute to tell the world we support
8363 	 * kernel ioctls (for layered drivers)
8364 	 */
8365 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8366 	    DDI_KERNEL_IOCTL, NULL, 0);
8367 
8368 	/*
8369 	 * Add a boolean property to tell the world we support
8370 	 * the B_FAILFAST flag (for layered drivers)
8371 	 */
8372 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8373 	    "ddi-failfast-supported", NULL, 0);
8374 
8375 	/*
8376 	 * Initialize power management
8377 	 */
8378 	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
8379 	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
8380 	sd_setup_pm(un, devi);
8381 	if (un->un_f_pm_is_enabled == FALSE) {
8382 		/*
8383 		 * For performance, point to a jump table that does
8384 		 * not include pm.
8385 		 * The direct and priority chains don't change with PM.
8386 		 *
8387 		 * Note: this is currently done based on individual device
8388 		 * capabilities. When an interface for determining system
8389 		 * power enabled state becomes available, or when additional
8390 		 * layers are added to the command chain, these values will
8391 		 * have to be re-evaluated for correctness.
8392 		 */
8393 		if (un->un_f_non_devbsize_supported) {
8394 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
8395 		} else {
8396 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
8397 		}
8398 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
8399 	}
8400 
8401 	/*
8402 	 * This property is set to 0 by HA software to avoid retries
8403 	 * on a reserved disk. (The preferred property name is
8404 	 * "retry-on-reservation-conflict") (1189689)
8405 	 *
8406 	 * Note: The use of a global here can have unintended consequences. A
8407 	 * per instance variable is preferrable to match the capabilities of
8408 	 * different underlying hba's (4402600)
8409 	 */
8410 	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
8411 	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
8412 	    sd_retry_on_reservation_conflict);
8413 	if (sd_retry_on_reservation_conflict != 0) {
8414 		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
8415 		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
8416 		    sd_retry_on_reservation_conflict);
8417 	}
8418 
8419 	/* Set up options for QFULL handling. */
8420 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8421 	    "qfull-retries", -1)) != -1) {
8422 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
8423 		    rval, 1);
8424 	}
8425 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8426 	    "qfull-retry-interval", -1)) != -1) {
8427 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
8428 		    rval, 1);
8429 	}
8430 
8431 	/*
8432 	 * This just prints a message that announces the existence of the
8433 	 * device. The message is always printed in the system logfile, but
8434 	 * only appears on the console if the system is booted with the
8435 	 * -v (verbose) argument.
8436 	 */
8437 	ddi_report_dev(devi);
8438 
8439 	/*
8440 	 * The framework calls driver attach routines single-threaded
8441 	 * for a given instance.  However we still acquire SD_MUTEX here
8442 	 * because this required for calling the sd_validate_geometry()
8443 	 * and sd_register_devid() functions.
8444 	 */
8445 	mutex_enter(SD_MUTEX(un));
8446 	un->un_f_geometry_is_valid = FALSE;
8447 	un->un_mediastate = DKIO_NONE;
8448 	un->un_reserved = -1;
8449 
8450 	/*
8451 	 * Read and validate the device's geometry (ie, disk label)
8452 	 * A new unformatted drive will not have a valid geometry, but
8453 	 * the driver needs to successfully attach to this device so
8454 	 * the drive can be formatted via ioctls.
8455 	 */
8456 	if (((sd_validate_geometry(un, SD_PATH_DIRECT) ==
8457 	    ENOTSUP)) &&
8458 	    (un->un_blockcount < DK_MAX_BLOCKS)) {
8459 		/*
8460 		 * We found a small disk with an EFI label on it;
8461 		 * we need to fix up the minor nodes accordingly.
8462 		 */
8463 		ddi_remove_minor_node(devi, "h");
8464 		ddi_remove_minor_node(devi, "h,raw");
8465 		(void) ddi_create_minor_node(devi, "wd",
8466 		    S_IFBLK,
8467 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8468 		    un->un_node_type, NULL);
8469 		(void) ddi_create_minor_node(devi, "wd,raw",
8470 		    S_IFCHR,
8471 		    (instance << SDUNIT_SHIFT) | WD_NODE,
8472 		    un->un_node_type, NULL);
8473 	}
8474 
8475 	/*
8476 	 * Read and initialize the devid for the unit.
8477 	 */
8478 	ASSERT(un->un_errstats != NULL);
8479 	if (un->un_f_devid_supported) {
8480 		sd_register_devid(un, devi, reservation_flag);
8481 	}
8482 	mutex_exit(SD_MUTEX(un));
8483 
8484 #if (defined(__fibre))
8485 	/*
8486 	 * Register callbacks for fibre only.  You can't do this soley
8487 	 * on the basis of the devid_type because this is hba specific.
8488 	 * We need to query our hba capabilities to find out whether to
8489 	 * register or not.
8490 	 */
8491 	if (un->un_f_is_fibre) {
8492 	    if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
8493 		sd_init_event_callbacks(un);
8494 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8495 		    "sd_unit_attach: un:0x%p event callbacks inserted", un);
8496 	    }
8497 	}
8498 #endif
8499 
8500 	if (un->un_f_opt_disable_cache == TRUE) {
8501 		/*
8502 		 * Disable both read cache and write cache.  This is
8503 		 * the historic behavior of the keywords in the config file.
8504 		 */
8505 		if (sd_cache_control(un, SD_CACHE_DISABLE, SD_CACHE_DISABLE) !=
8506 		    0) {
8507 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8508 			    "sd_unit_attach: un:0x%p Could not disable "
8509 			    "caching", un);
8510 			goto devid_failed;
8511 		}
8512 	}
8513 
8514 	/*
8515 	 * Check the value of the WCE bit now and
8516 	 * set un_f_write_cache_enabled accordingly.
8517 	 */
8518 	(void) sd_get_write_cache_enabled(un, &wc_enabled);
8519 	mutex_enter(SD_MUTEX(un));
8520 	un->un_f_write_cache_enabled = (wc_enabled != 0);
8521 	mutex_exit(SD_MUTEX(un));
8522 
8523 	/*
8524 	 * Set the pstat and error stat values here, so data obtained during the
8525 	 * previous attach-time routines is available.
8526 	 *
8527 	 * Note: This is a critical sequence that needs to be maintained:
8528 	 *	1) Instantiate the kstats before any routines using the iopath
8529 	 *	   (i.e. sd_send_scsi_cmd).
8530 	 *	2) Initialize the error stats (sd_set_errstats) and partition
8531 	 *	   stats (sd_set_pstats)here, following sd_validate_geometry(),
8532 	 *	   sd_register_devid(), and sd_cache_control().
8533 	 */
8534 	if (un->un_f_pkstats_enabled) {
8535 		sd_set_pstats(un);
8536 		SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8537 		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
8538 	}
8539 
8540 	sd_set_errstats(un);
8541 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8542 	    "sd_unit_attach: un:0x%p errstats set\n", un);
8543 
8544 	/*
8545 	 * Find out what type of reservation this disk supports.
8546 	 */
8547 	switch (sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS, 0, NULL)) {
8548 	case 0:
8549 		/*
8550 		 * SCSI-3 reservations are supported.
8551 		 */
8552 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8553 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8554 		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
8555 		break;
8556 	case ENOTSUP:
8557 		/*
8558 		 * The PERSISTENT RESERVE IN command would not be recognized by
8559 		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
8560 		 */
8561 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8562 		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
8563 		un->un_reservation_type = SD_SCSI2_RESERVATION;
8564 		break;
8565 	default:
8566 		/*
8567 		 * default to SCSI-3 reservations
8568 		 */
8569 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8570 		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
8571 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8572 		break;
8573 	}
8574 
8575 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8576 	    "sd_unit_attach: un:0x%p exit success\n", un);
8577 
8578 	return (DDI_SUCCESS);
8579 
8580 	/*
8581 	 * An error occurred during the attach; clean up & return failure.
8582 	 */
8583 
8584 devid_failed:
8585 
8586 setup_pm_failed:
8587 	ddi_remove_minor_node(devi, NULL);
8588 
8589 create_minor_nodes_failed:
8590 	/*
8591 	 * Cleanup from the scsi_ifsetcap() calls (437868)
8592 	 */
8593 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8594 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8595 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8596 
8597 	if (un->un_f_is_fibre == FALSE) {
8598 	    (void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8599 	}
8600 
8601 spinup_failed:
8602 
8603 	mutex_enter(SD_MUTEX(un));
8604 
8605 	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
8606 	if (un->un_direct_priority_timeid != NULL) {
8607 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8608 		un->un_direct_priority_timeid = NULL;
8609 		mutex_exit(SD_MUTEX(un));
8610 		(void) untimeout(temp_id);
8611 		mutex_enter(SD_MUTEX(un));
8612 	}
8613 
8614 	/* Cancel any pending start/stop timeouts */
8615 	if (un->un_startstop_timeid != NULL) {
8616 		timeout_id_t temp_id = un->un_startstop_timeid;
8617 		un->un_startstop_timeid = NULL;
8618 		mutex_exit(SD_MUTEX(un));
8619 		(void) untimeout(temp_id);
8620 		mutex_enter(SD_MUTEX(un));
8621 	}
8622 
8623 	/* Cancel any pending reset-throttle timeouts */
8624 	if (un->un_reset_throttle_timeid != NULL) {
8625 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8626 		un->un_reset_throttle_timeid = NULL;
8627 		mutex_exit(SD_MUTEX(un));
8628 		(void) untimeout(temp_id);
8629 		mutex_enter(SD_MUTEX(un));
8630 	}
8631 
8632 	/* Cancel any pending retry timeouts */
8633 	if (un->un_retry_timeid != NULL) {
8634 		timeout_id_t temp_id = un->un_retry_timeid;
8635 		un->un_retry_timeid = NULL;
8636 		mutex_exit(SD_MUTEX(un));
8637 		(void) untimeout(temp_id);
8638 		mutex_enter(SD_MUTEX(un));
8639 	}
8640 
8641 	/* Cancel any pending delayed cv broadcast timeouts */
8642 	if (un->un_dcvb_timeid != NULL) {
8643 		timeout_id_t temp_id = un->un_dcvb_timeid;
8644 		un->un_dcvb_timeid = NULL;
8645 		mutex_exit(SD_MUTEX(un));
8646 		(void) untimeout(temp_id);
8647 		mutex_enter(SD_MUTEX(un));
8648 	}
8649 
8650 	mutex_exit(SD_MUTEX(un));
8651 
8652 	/* There should not be any in-progress I/O so ASSERT this check */
8653 	ASSERT(un->un_ncmds_in_transport == 0);
8654 	ASSERT(un->un_ncmds_in_driver == 0);
8655 
8656 	/* Do not free the softstate if the callback routine is active */
8657 	sd_sync_with_callback(un);
8658 
8659 	/*
8660 	 * Partition stats apparently are not used with removables. These would
8661 	 * not have been created during attach, so no need to clean them up...
8662 	 */
8663 	if (un->un_stats != NULL) {
8664 		kstat_delete(un->un_stats);
8665 		un->un_stats = NULL;
8666 	}
8667 	if (un->un_errstats != NULL) {
8668 		kstat_delete(un->un_errstats);
8669 		un->un_errstats = NULL;
8670 	}
8671 
8672 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
8673 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
8674 
8675 	ddi_prop_remove_all(devi);
8676 	sema_destroy(&un->un_semoclose);
8677 	cv_destroy(&un->un_state_cv);
8678 
8679 getrbuf_failed:
8680 
8681 	sd_free_rqs(un);
8682 
8683 alloc_rqs_failed:
8684 
8685 	devp->sd_private = NULL;
8686 	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
8687 
8688 get_softstate_failed:
8689 	/*
8690 	 * Note: the man pages are unclear as to whether or not doing a
8691 	 * ddi_soft_state_free(sd_state, instance) is the right way to
8692 	 * clean up after the ddi_soft_state_zalloc() if the subsequent
8693 	 * ddi_get_soft_state() fails.  The implication seems to be
8694 	 * that the get_soft_state cannot fail if the zalloc succeeds.
8695 	 */
8696 	ddi_soft_state_free(sd_state, instance);
8697 
8698 probe_failed:
8699 	scsi_unprobe(devp);
8700 #ifdef SDDEBUG
8701 	if ((sd_component_mask & SD_LOG_ATTACH_DETACH) &&
8702 	    (sd_level_mask & SD_LOGMASK_TRACE)) {
8703 		cmn_err(CE_CONT, "sd_unit_attach: un:0x%p exit failure\n",
8704 		    (void *)un);
8705 	}
8706 #endif
8707 	return (DDI_FAILURE);
8708 }
8709 
8710 
8711 /*
8712  *    Function: sd_unit_detach
8713  *
8714  * Description: Performs DDI_DETACH processing for sddetach().
8715  *
8716  * Return Code: DDI_SUCCESS
8717  *		DDI_FAILURE
8718  *
8719  *     Context: Kernel thread context
8720  */
8721 
8722 static int
8723 sd_unit_detach(dev_info_t *devi)
8724 {
8725 	struct scsi_device	*devp;
8726 	struct sd_lun		*un;
8727 	int			i;
8728 	dev_t			dev;
8729 	int			instance = ddi_get_instance(devi);
8730 
8731 	mutex_enter(&sd_detach_mutex);
8732 
8733 	/*
8734 	 * Fail the detach for any of the following:
8735 	 *  - Unable to get the sd_lun struct for the instance
8736 	 *  - A layered driver has an outstanding open on the instance
8737 	 *  - Another thread is already detaching this instance
8738 	 *  - Another thread is currently performing an open
8739 	 */
8740 	devp = ddi_get_driver_private(devi);
8741 	if ((devp == NULL) ||
8742 	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
8743 	    (un->un_ncmds_in_driver != 0) || (un->un_layer_count != 0) ||
8744 	    (un->un_detach_count != 0) || (un->un_opens_in_progress != 0)) {
8745 		mutex_exit(&sd_detach_mutex);
8746 		return (DDI_FAILURE);
8747 	}
8748 
8749 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
8750 
8751 	/*
8752 	 * Mark this instance as currently in a detach, to inhibit any
8753 	 * opens from a layered driver.
8754 	 */
8755 	un->un_detach_count++;
8756 	mutex_exit(&sd_detach_mutex);
8757 
8758 	dev = sd_make_device(SD_DEVINFO(un));
8759 
8760 	_NOTE(COMPETING_THREADS_NOW);
8761 
8762 	mutex_enter(SD_MUTEX(un));
8763 
8764 	/*
8765 	 * Fail the detach if there are any outstanding layered
8766 	 * opens on this device.
8767 	 */
8768 	for (i = 0; i < NDKMAP; i++) {
8769 		if (un->un_ocmap.lyropen[i] != 0) {
8770 			goto err_notclosed;
8771 		}
8772 	}
8773 
8774 	/*
8775 	 * Verify there are NO outstanding commands issued to this device.
8776 	 * ie, un_ncmds_in_transport == 0.
8777 	 * It's possible to have outstanding commands through the physio
8778 	 * code path, even though everything's closed.
8779 	 */
8780 	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
8781 	    (un->un_direct_priority_timeid != NULL) ||
8782 	    (un->un_state == SD_STATE_RWAIT)) {
8783 		mutex_exit(SD_MUTEX(un));
8784 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8785 		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
8786 		goto err_stillbusy;
8787 	}
8788 
8789 	/*
8790 	 * If we have the device reserved, release the reservation.
8791 	 */
8792 	if ((un->un_resvd_status & SD_RESERVE) &&
8793 	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
8794 		mutex_exit(SD_MUTEX(un));
8795 		/*
8796 		 * Note: sd_reserve_release sends a command to the device
8797 		 * via the sd_ioctlcmd() path, and can sleep.
8798 		 */
8799 		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
8800 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8801 			    "sd_dr_detach: Cannot release reservation \n");
8802 		}
8803 	} else {
8804 		mutex_exit(SD_MUTEX(un));
8805 	}
8806 
8807 	/*
8808 	 * Untimeout any reserve recover, throttle reset, restart unit
8809 	 * and delayed broadcast timeout threads. Protect the timeout pointer
8810 	 * from getting nulled by their callback functions.
8811 	 */
8812 	mutex_enter(SD_MUTEX(un));
8813 	if (un->un_resvd_timeid != NULL) {
8814 		timeout_id_t temp_id = un->un_resvd_timeid;
8815 		un->un_resvd_timeid = NULL;
8816 		mutex_exit(SD_MUTEX(un));
8817 		(void) untimeout(temp_id);
8818 		mutex_enter(SD_MUTEX(un));
8819 	}
8820 
8821 	if (un->un_reset_throttle_timeid != NULL) {
8822 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8823 		un->un_reset_throttle_timeid = NULL;
8824 		mutex_exit(SD_MUTEX(un));
8825 		(void) untimeout(temp_id);
8826 		mutex_enter(SD_MUTEX(un));
8827 	}
8828 
8829 	if (un->un_startstop_timeid != NULL) {
8830 		timeout_id_t temp_id = un->un_startstop_timeid;
8831 		un->un_startstop_timeid = NULL;
8832 		mutex_exit(SD_MUTEX(un));
8833 		(void) untimeout(temp_id);
8834 		mutex_enter(SD_MUTEX(un));
8835 	}
8836 
8837 	if (un->un_dcvb_timeid != NULL) {
8838 		timeout_id_t temp_id = un->un_dcvb_timeid;
8839 		un->un_dcvb_timeid = NULL;
8840 		mutex_exit(SD_MUTEX(un));
8841 		(void) untimeout(temp_id);
8842 	} else {
8843 		mutex_exit(SD_MUTEX(un));
8844 	}
8845 
8846 	/* Remove any pending reservation reclaim requests for this device */
8847 	sd_rmv_resv_reclaim_req(dev);
8848 
8849 	mutex_enter(SD_MUTEX(un));
8850 
8851 	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
8852 	if (un->un_direct_priority_timeid != NULL) {
8853 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8854 		un->un_direct_priority_timeid = NULL;
8855 		mutex_exit(SD_MUTEX(un));
8856 		(void) untimeout(temp_id);
8857 		mutex_enter(SD_MUTEX(un));
8858 	}
8859 
8860 	/* Cancel any active multi-host disk watch thread requests */
8861 	if (un->un_mhd_token != NULL) {
8862 		mutex_exit(SD_MUTEX(un));
8863 		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
8864 		if (scsi_watch_request_terminate(un->un_mhd_token,
8865 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8866 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8867 			    "sd_dr_detach: Cannot cancel mhd watch request\n");
8868 			/*
8869 			 * Note: We are returning here after having removed
8870 			 * some driver timeouts above. This is consistent with
8871 			 * the legacy implementation but perhaps the watch
8872 			 * terminate call should be made with the wait flag set.
8873 			 */
8874 			goto err_stillbusy;
8875 		}
8876 		mutex_enter(SD_MUTEX(un));
8877 		un->un_mhd_token = NULL;
8878 	}
8879 
8880 	if (un->un_swr_token != NULL) {
8881 		mutex_exit(SD_MUTEX(un));
8882 		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
8883 		if (scsi_watch_request_terminate(un->un_swr_token,
8884 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8885 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8886 			    "sd_dr_detach: Cannot cancel swr watch request\n");
8887 			/*
8888 			 * Note: We are returning here after having removed
8889 			 * some driver timeouts above. This is consistent with
8890 			 * the legacy implementation but perhaps the watch
8891 			 * terminate call should be made with the wait flag set.
8892 			 */
8893 			goto err_stillbusy;
8894 		}
8895 		mutex_enter(SD_MUTEX(un));
8896 		un->un_swr_token = NULL;
8897 	}
8898 
8899 	mutex_exit(SD_MUTEX(un));
8900 
8901 	/*
8902 	 * Clear any scsi_reset_notifies. We clear the reset notifies
8903 	 * if we have not registered one.
8904 	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
8905 	 */
8906 	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
8907 	    sd_mhd_reset_notify_cb, (caddr_t)un);
8908 
8909 	/*
8910 	 * protect the timeout pointers from getting nulled by
8911 	 * their callback functions during the cancellation process.
8912 	 * In such a scenario untimeout can be invoked with a null value.
8913 	 */
8914 	_NOTE(NO_COMPETING_THREADS_NOW);
8915 
8916 	mutex_enter(&un->un_pm_mutex);
8917 	if (un->un_pm_idle_timeid != NULL) {
8918 		timeout_id_t temp_id = un->un_pm_idle_timeid;
8919 		un->un_pm_idle_timeid = NULL;
8920 		mutex_exit(&un->un_pm_mutex);
8921 
8922 		/*
8923 		 * Timeout is active; cancel it.
8924 		 * Note that it'll never be active on a device
8925 		 * that does not support PM therefore we don't
8926 		 * have to check before calling pm_idle_component.
8927 		 */
8928 		(void) untimeout(temp_id);
8929 		(void) pm_idle_component(SD_DEVINFO(un), 0);
8930 		mutex_enter(&un->un_pm_mutex);
8931 	}
8932 
8933 	/*
8934 	 * Check whether there is already a timeout scheduled for power
8935 	 * management. If yes then don't lower the power here, that's.
8936 	 * the timeout handler's job.
8937 	 */
8938 	if (un->un_pm_timeid != NULL) {
8939 		timeout_id_t temp_id = un->un_pm_timeid;
8940 		un->un_pm_timeid = NULL;
8941 		mutex_exit(&un->un_pm_mutex);
8942 		/*
8943 		 * Timeout is active; cancel it.
8944 		 * Note that it'll never be active on a device
8945 		 * that does not support PM therefore we don't
8946 		 * have to check before calling pm_idle_component.
8947 		 */
8948 		(void) untimeout(temp_id);
8949 		(void) pm_idle_component(SD_DEVINFO(un), 0);
8950 
8951 	} else {
8952 		mutex_exit(&un->un_pm_mutex);
8953 		if ((un->un_f_pm_is_enabled == TRUE) &&
8954 		    (pm_lower_power(SD_DEVINFO(un), 0, SD_SPINDLE_OFF) !=
8955 		    DDI_SUCCESS)) {
8956 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8957 		    "sd_dr_detach: Lower power request failed, ignoring.\n");
8958 			/*
8959 			 * Fix for bug: 4297749, item # 13
8960 			 * The above test now includes a check to see if PM is
8961 			 * supported by this device before call
8962 			 * pm_lower_power().
8963 			 * Note, the following is not dead code. The call to
8964 			 * pm_lower_power above will generate a call back into
8965 			 * our sdpower routine which might result in a timeout
8966 			 * handler getting activated. Therefore the following
8967 			 * code is valid and necessary.
8968 			 */
8969 			mutex_enter(&un->un_pm_mutex);
8970 			if (un->un_pm_timeid != NULL) {
8971 				timeout_id_t temp_id = un->un_pm_timeid;
8972 				un->un_pm_timeid = NULL;
8973 				mutex_exit(&un->un_pm_mutex);
8974 				(void) untimeout(temp_id);
8975 				(void) pm_idle_component(SD_DEVINFO(un), 0);
8976 			} else {
8977 				mutex_exit(&un->un_pm_mutex);
8978 			}
8979 		}
8980 	}
8981 
8982 	/*
8983 	 * Cleanup from the scsi_ifsetcap() calls (437868)
8984 	 * Relocated here from above to be after the call to
8985 	 * pm_lower_power, which was getting errors.
8986 	 */
8987 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8988 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8989 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8990 
8991 	if (un->un_f_is_fibre == FALSE) {
8992 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8993 	}
8994 
8995 	/*
8996 	 * Remove any event callbacks, fibre only
8997 	 */
8998 	if (un->un_f_is_fibre == TRUE) {
8999 		if ((un->un_insert_event != NULL) &&
9000 			(ddi_remove_event_handler(un->un_insert_cb_id) !=
9001 				DDI_SUCCESS)) {
9002 			/*
9003 			 * Note: We are returning here after having done
9004 			 * substantial cleanup above. This is consistent
9005 			 * with the legacy implementation but this may not
9006 			 * be the right thing to do.
9007 			 */
9008 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9009 				"sd_dr_detach: Cannot cancel insert event\n");
9010 			goto err_remove_event;
9011 		}
9012 		un->un_insert_event = NULL;
9013 
9014 		if ((un->un_remove_event != NULL) &&
9015 			(ddi_remove_event_handler(un->un_remove_cb_id) !=
9016 				DDI_SUCCESS)) {
9017 			/*
9018 			 * Note: We are returning here after having done
9019 			 * substantial cleanup above. This is consistent
9020 			 * with the legacy implementation but this may not
9021 			 * be the right thing to do.
9022 			 */
9023 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9024 				"sd_dr_detach: Cannot cancel remove event\n");
9025 			goto err_remove_event;
9026 		}
9027 		un->un_remove_event = NULL;
9028 	}
9029 
9030 	/* Do not free the softstate if the callback routine is active */
9031 	sd_sync_with_callback(un);
9032 
9033 	/*
9034 	 * Hold the detach mutex here, to make sure that no other threads ever
9035 	 * can access a (partially) freed soft state structure.
9036 	 */
9037 	mutex_enter(&sd_detach_mutex);
9038 
9039 	/*
9040 	 * Clean up the soft state struct.
9041 	 * Cleanup is done in reverse order of allocs/inits.
9042 	 * At this point there should be no competing threads anymore.
9043 	 */
9044 
9045 	/* Unregister and free device id. */
9046 	ddi_devid_unregister(devi);
9047 	if (un->un_devid) {
9048 		ddi_devid_free(un->un_devid);
9049 		un->un_devid = NULL;
9050 	}
9051 
9052 	/*
9053 	 * Destroy wmap cache if it exists.
9054 	 */
9055 	if (un->un_wm_cache != NULL) {
9056 		kmem_cache_destroy(un->un_wm_cache);
9057 		un->un_wm_cache = NULL;
9058 	}
9059 
9060 	/* Remove minor nodes */
9061 	ddi_remove_minor_node(devi, NULL);
9062 
9063 	/*
9064 	 * kstat cleanup is done in detach for all device types (4363169).
9065 	 * We do not want to fail detach if the device kstats are not deleted
9066 	 * since there is a confusion about the devo_refcnt for the device.
9067 	 * We just delete the kstats and let detach complete successfully.
9068 	 */
9069 	if (un->un_stats != NULL) {
9070 		kstat_delete(un->un_stats);
9071 		un->un_stats = NULL;
9072 	}
9073 	if (un->un_errstats != NULL) {
9074 		kstat_delete(un->un_errstats);
9075 		un->un_errstats = NULL;
9076 	}
9077 
9078 	/* Remove partition stats */
9079 	if (un->un_f_pkstats_enabled) {
9080 		for (i = 0; i < NSDMAP; i++) {
9081 			if (un->un_pstats[i] != NULL) {
9082 				kstat_delete(un->un_pstats[i]);
9083 				un->un_pstats[i] = NULL;
9084 			}
9085 		}
9086 	}
9087 
9088 	/* Remove xbuf registration */
9089 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
9090 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
9091 
9092 	/* Remove driver properties */
9093 	ddi_prop_remove_all(devi);
9094 
9095 	mutex_destroy(&un->un_pm_mutex);
9096 	cv_destroy(&un->un_pm_busy_cv);
9097 
9098 	cv_destroy(&un->un_wcc_cv);
9099 
9100 	/* Open/close semaphore */
9101 	sema_destroy(&un->un_semoclose);
9102 
9103 	/* Removable media condvar. */
9104 	cv_destroy(&un->un_state_cv);
9105 
9106 	/* Suspend/resume condvar. */
9107 	cv_destroy(&un->un_suspend_cv);
9108 	cv_destroy(&un->un_disk_busy_cv);
9109 
9110 	sd_free_rqs(un);
9111 
9112 	/* Free up soft state */
9113 	devp->sd_private = NULL;
9114 	bzero(un, sizeof (struct sd_lun));
9115 	ddi_soft_state_free(sd_state, instance);
9116 
9117 	mutex_exit(&sd_detach_mutex);
9118 
9119 	/* This frees up the INQUIRY data associated with the device. */
9120 	scsi_unprobe(devp);
9121 
9122 	return (DDI_SUCCESS);
9123 
9124 err_notclosed:
9125 	mutex_exit(SD_MUTEX(un));
9126 
9127 err_stillbusy:
9128 	_NOTE(NO_COMPETING_THREADS_NOW);
9129 
9130 err_remove_event:
9131 	mutex_enter(&sd_detach_mutex);
9132 	un->un_detach_count--;
9133 	mutex_exit(&sd_detach_mutex);
9134 
9135 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
9136 	return (DDI_FAILURE);
9137 }
9138 
9139 
9140 /*
9141  * Driver minor node structure and data table
9142  */
9143 struct driver_minor_data {
9144 	char	*name;
9145 	minor_t	minor;
9146 	int	type;
9147 };
9148 
9149 static struct driver_minor_data sd_minor_data[] = {
9150 	{"a", 0, S_IFBLK},
9151 	{"b", 1, S_IFBLK},
9152 	{"c", 2, S_IFBLK},
9153 	{"d", 3, S_IFBLK},
9154 	{"e", 4, S_IFBLK},
9155 	{"f", 5, S_IFBLK},
9156 	{"g", 6, S_IFBLK},
9157 	{"h", 7, S_IFBLK},
9158 #if defined(_SUNOS_VTOC_16)
9159 	{"i", 8, S_IFBLK},
9160 	{"j", 9, S_IFBLK},
9161 	{"k", 10, S_IFBLK},
9162 	{"l", 11, S_IFBLK},
9163 	{"m", 12, S_IFBLK},
9164 	{"n", 13, S_IFBLK},
9165 	{"o", 14, S_IFBLK},
9166 	{"p", 15, S_IFBLK},
9167 #endif			/* defined(_SUNOS_VTOC_16) */
9168 #if defined(_FIRMWARE_NEEDS_FDISK)
9169 	{"q", 16, S_IFBLK},
9170 	{"r", 17, S_IFBLK},
9171 	{"s", 18, S_IFBLK},
9172 	{"t", 19, S_IFBLK},
9173 	{"u", 20, S_IFBLK},
9174 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9175 	{"a,raw", 0, S_IFCHR},
9176 	{"b,raw", 1, S_IFCHR},
9177 	{"c,raw", 2, S_IFCHR},
9178 	{"d,raw", 3, S_IFCHR},
9179 	{"e,raw", 4, S_IFCHR},
9180 	{"f,raw", 5, S_IFCHR},
9181 	{"g,raw", 6, S_IFCHR},
9182 	{"h,raw", 7, S_IFCHR},
9183 #if defined(_SUNOS_VTOC_16)
9184 	{"i,raw", 8, S_IFCHR},
9185 	{"j,raw", 9, S_IFCHR},
9186 	{"k,raw", 10, S_IFCHR},
9187 	{"l,raw", 11, S_IFCHR},
9188 	{"m,raw", 12, S_IFCHR},
9189 	{"n,raw", 13, S_IFCHR},
9190 	{"o,raw", 14, S_IFCHR},
9191 	{"p,raw", 15, S_IFCHR},
9192 #endif			/* defined(_SUNOS_VTOC_16) */
9193 #if defined(_FIRMWARE_NEEDS_FDISK)
9194 	{"q,raw", 16, S_IFCHR},
9195 	{"r,raw", 17, S_IFCHR},
9196 	{"s,raw", 18, S_IFCHR},
9197 	{"t,raw", 19, S_IFCHR},
9198 	{"u,raw", 20, S_IFCHR},
9199 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9200 	{0}
9201 };
9202 
9203 static struct driver_minor_data sd_minor_data_efi[] = {
9204 	{"a", 0, S_IFBLK},
9205 	{"b", 1, S_IFBLK},
9206 	{"c", 2, S_IFBLK},
9207 	{"d", 3, S_IFBLK},
9208 	{"e", 4, S_IFBLK},
9209 	{"f", 5, S_IFBLK},
9210 	{"g", 6, S_IFBLK},
9211 	{"wd", 7, S_IFBLK},
9212 #if defined(_FIRMWARE_NEEDS_FDISK)
9213 	{"q", 16, S_IFBLK},
9214 	{"r", 17, S_IFBLK},
9215 	{"s", 18, S_IFBLK},
9216 	{"t", 19, S_IFBLK},
9217 	{"u", 20, S_IFBLK},
9218 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9219 	{"a,raw", 0, S_IFCHR},
9220 	{"b,raw", 1, S_IFCHR},
9221 	{"c,raw", 2, S_IFCHR},
9222 	{"d,raw", 3, S_IFCHR},
9223 	{"e,raw", 4, S_IFCHR},
9224 	{"f,raw", 5, S_IFCHR},
9225 	{"g,raw", 6, S_IFCHR},
9226 	{"wd,raw", 7, S_IFCHR},
9227 #if defined(_FIRMWARE_NEEDS_FDISK)
9228 	{"q,raw", 16, S_IFCHR},
9229 	{"r,raw", 17, S_IFCHR},
9230 	{"s,raw", 18, S_IFCHR},
9231 	{"t,raw", 19, S_IFCHR},
9232 	{"u,raw", 20, S_IFCHR},
9233 #endif			/* defined(_FIRMWARE_NEEDS_FDISK) */
9234 	{0}
9235 };
9236 
9237 
9238 /*
9239  *    Function: sd_create_minor_nodes
9240  *
9241  * Description: Create the minor device nodes for the instance.
9242  *
9243  *   Arguments: un - driver soft state (unit) structure
9244  *		devi - pointer to device info structure
9245  *
9246  * Return Code: DDI_SUCCESS
9247  *		DDI_FAILURE
9248  *
9249  *     Context: Kernel thread context
9250  */
9251 
9252 static int
9253 sd_create_minor_nodes(struct sd_lun *un, dev_info_t *devi)
9254 {
9255 	struct driver_minor_data	*dmdp;
9256 	struct scsi_device		*devp;
9257 	int				instance;
9258 	char				name[48];
9259 
9260 	ASSERT(un != NULL);
9261 	devp = ddi_get_driver_private(devi);
9262 	instance = ddi_get_instance(devp->sd_dev);
9263 
9264 	/*
9265 	 * Create all the minor nodes for this target.
9266 	 */
9267 	if (un->un_blockcount > DK_MAX_BLOCKS)
9268 		dmdp = sd_minor_data_efi;
9269 	else
9270 		dmdp = sd_minor_data;
9271 	while (dmdp->name != NULL) {
9272 
9273 		(void) sprintf(name, "%s", dmdp->name);
9274 
9275 		if (ddi_create_minor_node(devi, name, dmdp->type,
9276 		    (instance << SDUNIT_SHIFT) | dmdp->minor,
9277 		    un->un_node_type, NULL) == DDI_FAILURE) {
9278 			/*
9279 			 * Clean up any nodes that may have been created, in
9280 			 * case this fails in the middle of the loop.
9281 			 */
9282 			ddi_remove_minor_node(devi, NULL);
9283 			return (DDI_FAILURE);
9284 		}
9285 		dmdp++;
9286 	}
9287 
9288 	return (DDI_SUCCESS);
9289 }
9290 
9291 
9292 /*
9293  *    Function: sd_create_errstats
9294  *
9295  * Description: This routine instantiates the device error stats.
9296  *
9297  *		Note: During attach the stats are instantiated first so they are
9298  *		available for attach-time routines that utilize the driver
9299  *		iopath to send commands to the device. The stats are initialized
9300  *		separately so data obtained during some attach-time routines is
9301  *		available. (4362483)
9302  *
9303  *   Arguments: un - driver soft state (unit) structure
9304  *		instance - driver instance
9305  *
9306  *     Context: Kernel thread context
9307  */
9308 
9309 static void
9310 sd_create_errstats(struct sd_lun *un, int instance)
9311 {
9312 	struct	sd_errstats	*stp;
9313 	char	kstatmodule_err[KSTAT_STRLEN];
9314 	char	kstatname[KSTAT_STRLEN];
9315 	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
9316 
9317 	ASSERT(un != NULL);
9318 
9319 	if (un->un_errstats != NULL) {
9320 		return;
9321 	}
9322 
9323 	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
9324 	    "%serr", sd_label);
9325 	(void) snprintf(kstatname, sizeof (kstatname),
9326 	    "%s%d,err", sd_label, instance);
9327 
9328 	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
9329 	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
9330 
9331 	if (un->un_errstats == NULL) {
9332 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9333 		    "sd_create_errstats: Failed kstat_create\n");
9334 		return;
9335 	}
9336 
9337 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9338 	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
9339 	    KSTAT_DATA_UINT32);
9340 	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
9341 	    KSTAT_DATA_UINT32);
9342 	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
9343 	    KSTAT_DATA_UINT32);
9344 	kstat_named_init(&stp->sd_vid,		"Vendor",
9345 	    KSTAT_DATA_CHAR);
9346 	kstat_named_init(&stp->sd_pid,		"Product",
9347 	    KSTAT_DATA_CHAR);
9348 	kstat_named_init(&stp->sd_revision,	"Revision",
9349 	    KSTAT_DATA_CHAR);
9350 	kstat_named_init(&stp->sd_serial,	"Serial No",
9351 	    KSTAT_DATA_CHAR);
9352 	kstat_named_init(&stp->sd_capacity,	"Size",
9353 	    KSTAT_DATA_ULONGLONG);
9354 	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
9355 	    KSTAT_DATA_UINT32);
9356 	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
9357 	    KSTAT_DATA_UINT32);
9358 	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
9359 	    KSTAT_DATA_UINT32);
9360 	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
9361 	    KSTAT_DATA_UINT32);
9362 	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
9363 	    KSTAT_DATA_UINT32);
9364 	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
9365 	    KSTAT_DATA_UINT32);
9366 
9367 	un->un_errstats->ks_private = un;
9368 	un->un_errstats->ks_update  = nulldev;
9369 
9370 	kstat_install(un->un_errstats);
9371 }
9372 
9373 
9374 /*
9375  *    Function: sd_set_errstats
9376  *
9377  * Description: This routine sets the value of the vendor id, product id,
9378  *		revision, serial number, and capacity device error stats.
9379  *
9380  *		Note: During attach the stats are instantiated first so they are
9381  *		available for attach-time routines that utilize the driver
9382  *		iopath to send commands to the device. The stats are initialized
9383  *		separately so data obtained during some attach-time routines is
9384  *		available. (4362483)
9385  *
9386  *   Arguments: un - driver soft state (unit) structure
9387  *
9388  *     Context: Kernel thread context
9389  */
9390 
9391 static void
9392 sd_set_errstats(struct sd_lun *un)
9393 {
9394 	struct	sd_errstats	*stp;
9395 
9396 	ASSERT(un != NULL);
9397 	ASSERT(un->un_errstats != NULL);
9398 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9399 	ASSERT(stp != NULL);
9400 	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
9401 	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
9402 	(void) strncpy(stp->sd_revision.value.c,
9403 	    un->un_sd->sd_inq->inq_revision, 4);
9404 
9405 	/*
9406 	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
9407 	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
9408 	 * (4376302))
9409 	 */
9410 	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
9411 		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9412 		    sizeof (SD_INQUIRY(un)->inq_serial));
9413 	}
9414 
9415 	if (un->un_f_blockcount_is_valid != TRUE) {
9416 		/*
9417 		 * Set capacity error stat to 0 for no media. This ensures
9418 		 * a valid capacity is displayed in response to 'iostat -E'
9419 		 * when no media is present in the device.
9420 		 */
9421 		stp->sd_capacity.value.ui64 = 0;
9422 	} else {
9423 		/*
9424 		 * Multiply un_blockcount by un->un_sys_blocksize to get
9425 		 * capacity.
9426 		 *
9427 		 * Note: for non-512 blocksize devices "un_blockcount" has been
9428 		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
9429 		 * (un_tgt_blocksize / un->un_sys_blocksize).
9430 		 */
9431 		stp->sd_capacity.value.ui64 = (uint64_t)
9432 		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
9433 	}
9434 }
9435 
9436 
9437 /*
9438  *    Function: sd_set_pstats
9439  *
9440  * Description: This routine instantiates and initializes the partition
9441  *              stats for each partition with more than zero blocks.
9442  *		(4363169)
9443  *
9444  *   Arguments: un - driver soft state (unit) structure
9445  *
9446  *     Context: Kernel thread context
9447  */
9448 
9449 static void
9450 sd_set_pstats(struct sd_lun *un)
9451 {
9452 	char	kstatname[KSTAT_STRLEN];
9453 	int	instance;
9454 	int	i;
9455 
9456 	ASSERT(un != NULL);
9457 
9458 	instance = ddi_get_instance(SD_DEVINFO(un));
9459 
9460 	/* Note:x86: is this a VTOC8/VTOC16 difference? */
9461 	for (i = 0; i < NSDMAP; i++) {
9462 		if ((un->un_pstats[i] == NULL) &&
9463 		    (un->un_map[i].dkl_nblk != 0)) {
9464 			(void) snprintf(kstatname, sizeof (kstatname),
9465 			    "%s%d,%s", sd_label, instance,
9466 			    sd_minor_data[i].name);
9467 			un->un_pstats[i] = kstat_create(sd_label,
9468 			    instance, kstatname, "partition", KSTAT_TYPE_IO,
9469 			    1, KSTAT_FLAG_PERSISTENT);
9470 			if (un->un_pstats[i] != NULL) {
9471 				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
9472 				kstat_install(un->un_pstats[i]);
9473 			}
9474 		}
9475 	}
9476 }
9477 
9478 
9479 #if (defined(__fibre))
9480 /*
9481  *    Function: sd_init_event_callbacks
9482  *
9483  * Description: This routine initializes the insertion and removal event
9484  *		callbacks. (fibre only)
9485  *
9486  *   Arguments: un - driver soft state (unit) structure
9487  *
9488  *     Context: Kernel thread context
9489  */
9490 
9491 static void
9492 sd_init_event_callbacks(struct sd_lun *un)
9493 {
9494 	ASSERT(un != NULL);
9495 
9496 	if ((un->un_insert_event == NULL) &&
9497 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_INSERT_EVENT,
9498 	    &un->un_insert_event) == DDI_SUCCESS)) {
9499 		/*
9500 		 * Add the callback for an insertion event
9501 		 */
9502 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9503 		    un->un_insert_event, sd_event_callback, (void *)un,
9504 		    &(un->un_insert_cb_id));
9505 	}
9506 
9507 	if ((un->un_remove_event == NULL) &&
9508 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_REMOVE_EVENT,
9509 	    &un->un_remove_event) == DDI_SUCCESS)) {
9510 		/*
9511 		 * Add the callback for a removal event
9512 		 */
9513 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9514 		    un->un_remove_event, sd_event_callback, (void *)un,
9515 		    &(un->un_remove_cb_id));
9516 	}
9517 }
9518 
9519 
9520 /*
9521  *    Function: sd_event_callback
9522  *
9523  * Description: This routine handles insert/remove events (photon). The
9524  *		state is changed to OFFLINE which can be used to supress
9525  *		error msgs. (fibre only)
9526  *
9527  *   Arguments: un - driver soft state (unit) structure
9528  *
9529  *     Context: Callout thread context
9530  */
9531 /* ARGSUSED */
9532 static void
9533 sd_event_callback(dev_info_t *dip, ddi_eventcookie_t event, void *arg,
9534     void *bus_impldata)
9535 {
9536 	struct sd_lun *un = (struct sd_lun *)arg;
9537 
9538 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_insert_event));
9539 	if (event == un->un_insert_event) {
9540 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: insert event");
9541 		mutex_enter(SD_MUTEX(un));
9542 		if (un->un_state == SD_STATE_OFFLINE) {
9543 			if (un->un_last_state != SD_STATE_SUSPENDED) {
9544 				un->un_state = un->un_last_state;
9545 			} else {
9546 				/*
9547 				 * We have gone through SUSPEND/RESUME while
9548 				 * we were offline. Restore the last state
9549 				 */
9550 				un->un_state = un->un_save_state;
9551 			}
9552 		}
9553 		mutex_exit(SD_MUTEX(un));
9554 
9555 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_remove_event));
9556 	} else if (event == un->un_remove_event) {
9557 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: remove event");
9558 		mutex_enter(SD_MUTEX(un));
9559 		/*
9560 		 * We need to handle an event callback that occurs during
9561 		 * the suspend operation, since we don't prevent it.
9562 		 */
9563 		if (un->un_state != SD_STATE_OFFLINE) {
9564 			if (un->un_state != SD_STATE_SUSPENDED) {
9565 				New_state(un, SD_STATE_OFFLINE);
9566 			} else {
9567 				un->un_last_state = SD_STATE_OFFLINE;
9568 			}
9569 		}
9570 		mutex_exit(SD_MUTEX(un));
9571 	} else {
9572 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
9573 		    "!Unknown event\n");
9574 	}
9575 
9576 }
9577 #endif
9578 
9579 /*
9580  *    Function: sd_cache_control()
9581  *
9582  * Description: This routine is the driver entry point for setting
9583  *		read and write caching by modifying the WCE (write cache
9584  *		enable) and RCD (read cache disable) bits of mode
9585  *		page 8 (MODEPAGE_CACHING).
9586  *
9587  *   Arguments: un - driver soft state (unit) structure
9588  *		rcd_flag - flag for controlling the read cache
9589  *		wce_flag - flag for controlling the write cache
9590  *
9591  * Return Code: EIO
9592  *		code returned by sd_send_scsi_MODE_SENSE and
9593  *		sd_send_scsi_MODE_SELECT
9594  *
9595  *     Context: Kernel Thread
9596  */
9597 
9598 static int
9599 sd_cache_control(struct sd_lun *un, int rcd_flag, int wce_flag)
9600 {
9601 	struct mode_caching	*mode_caching_page;
9602 	uchar_t			*header;
9603 	size_t			buflen;
9604 	int			hdrlen;
9605 	int			bd_len;
9606 	int			rval = 0;
9607 	struct mode_header_grp2	*mhp;
9608 
9609 	ASSERT(un != NULL);
9610 
9611 	/*
9612 	 * Do a test unit ready, otherwise a mode sense may not work if this
9613 	 * is the first command sent to the device after boot.
9614 	 */
9615 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
9616 
9617 	if (un->un_f_cfg_is_atapi == TRUE) {
9618 		hdrlen = MODE_HEADER_LENGTH_GRP2;
9619 	} else {
9620 		hdrlen = MODE_HEADER_LENGTH;
9621 	}
9622 
9623 	/*
9624 	 * Allocate memory for the retrieved mode page and its headers.  Set
9625 	 * a pointer to the page itself.  Use mode_cache_scsi3 to insure
9626 	 * we get all of the mode sense data otherwise, the mode select
9627 	 * will fail.  mode_cache_scsi3 is a superset of mode_caching.
9628 	 */
9629 	buflen = hdrlen + MODE_BLK_DESC_LENGTH +
9630 		sizeof (struct mode_cache_scsi3);
9631 
9632 	header = kmem_zalloc(buflen, KM_SLEEP);
9633 
9634 	/* Get the information from the device. */
9635 	if (un->un_f_cfg_is_atapi == TRUE) {
9636 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
9637 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9638 	} else {
9639 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
9640 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9641 	}
9642 	if (rval != 0) {
9643 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
9644 		    "sd_cache_control: Mode Sense Failed\n");
9645 		kmem_free(header, buflen);
9646 		return (rval);
9647 	}
9648 
9649 	/*
9650 	 * Determine size of Block Descriptors in order to locate
9651 	 * the mode page data. ATAPI devices return 0, SCSI devices
9652 	 * should return MODE_BLK_DESC_LENGTH.
9653 	 */
9654 	if (un->un_f_cfg_is_atapi == TRUE) {
9655 		mhp	= (struct mode_header_grp2 *)header;
9656 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
9657 	} else {
9658 		bd_len  = ((struct mode_header *)header)->bdesc_length;
9659 	}
9660 
9661 	if (bd_len > MODE_BLK_DESC_LENGTH) {
9662 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9663 		    "sd_cache_control: Mode Sense returned invalid "
9664 		    "block descriptor length\n");
9665 		kmem_free(header, buflen);
9666 		return (EIO);
9667 	}
9668 
9669 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
9670 
9671 	/* Check the relevant bits on successful mode sense. */
9672 	if ((mode_caching_page->rcd && rcd_flag == SD_CACHE_ENABLE) ||
9673 	    (!mode_caching_page->rcd && rcd_flag == SD_CACHE_DISABLE) ||
9674 	    (mode_caching_page->wce && wce_flag == SD_CACHE_DISABLE) ||
9675 	    (!mode_caching_page->wce && wce_flag == SD_CACHE_ENABLE)) {
9676 
9677 		size_t sbuflen;
9678 		uchar_t save_pg;
9679 
9680 		/*
9681 		 * Construct select buffer length based on the
9682 		 * length of the sense data returned.
9683 		 */
9684 		sbuflen =  hdrlen + MODE_BLK_DESC_LENGTH +
9685 				sizeof (struct mode_page) +
9686 				(int)mode_caching_page->mode_page.length;
9687 
9688 		/*
9689 		 * Set the caching bits as requested.
9690 		 */
9691 		if (rcd_flag == SD_CACHE_ENABLE)
9692 			mode_caching_page->rcd = 0;
9693 		else if (rcd_flag == SD_CACHE_DISABLE)
9694 			mode_caching_page->rcd = 1;
9695 
9696 		if (wce_flag == SD_CACHE_ENABLE)
9697 			mode_caching_page->wce = 1;
9698 		else if (wce_flag == SD_CACHE_DISABLE)
9699 			mode_caching_page->wce = 0;
9700 
9701 		/*
9702 		 * Save the page if the mode sense says the
9703 		 * drive supports it.
9704 		 */
9705 		save_pg = mode_caching_page->mode_page.ps ?
9706 				SD_SAVE_PAGE : SD_DONTSAVE_PAGE;
9707 
9708 		/* Clear reserved bits before mode select. */
9709 		mode_caching_page->mode_page.ps = 0;
9710 
9711 		/*
9712 		 * Clear out mode header for mode select.
9713 		 * The rest of the retrieved page will be reused.
9714 		 */
9715 		bzero(header, hdrlen);
9716 
9717 		if (un->un_f_cfg_is_atapi == TRUE) {
9718 			mhp = (struct mode_header_grp2 *)header;
9719 			mhp->bdesc_length_hi = bd_len >> 8;
9720 			mhp->bdesc_length_lo = (uchar_t)bd_len & 0xff;
9721 		} else {
9722 			((struct mode_header *)header)->bdesc_length = bd_len;
9723 		}
9724 
9725 		/* Issue mode select to change the cache settings */
9726 		if (un->un_f_cfg_is_atapi == TRUE) {
9727 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, header,
9728 			    sbuflen, save_pg, SD_PATH_DIRECT);
9729 		} else {
9730 			rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, header,
9731 			    sbuflen, save_pg, SD_PATH_DIRECT);
9732 		}
9733 	}
9734 
9735 	kmem_free(header, buflen);
9736 	return (rval);
9737 }
9738 
9739 
9740 /*
9741  *    Function: sd_get_write_cache_enabled()
9742  *
9743  * Description: This routine is the driver entry point for determining if
9744  *		write caching is enabled.  It examines the WCE (write cache
9745  *		enable) bits of mode page 8 (MODEPAGE_CACHING).
9746  *
9747  *   Arguments: un - driver soft state (unit) structure
9748  *   		is_enabled - pointer to int where write cache enabled state
9749  *   			is returned (non-zero -> write cache enabled)
9750  *
9751  *
9752  * Return Code: EIO
9753  *		code returned by sd_send_scsi_MODE_SENSE
9754  *
9755  *     Context: Kernel Thread
9756  *
9757  * NOTE: If ioctl is added to disable write cache, this sequence should
9758  * be followed so that no locking is required for accesses to
9759  * un->un_f_write_cache_enabled:
9760  * 	do mode select to clear wce
9761  * 	do synchronize cache to flush cache
9762  * 	set un->un_f_write_cache_enabled = FALSE
9763  *
9764  * Conversely, an ioctl to enable the write cache should be done
9765  * in this order:
9766  * 	set un->un_f_write_cache_enabled = TRUE
9767  * 	do mode select to set wce
9768  */
9769 
9770 static int
9771 sd_get_write_cache_enabled(struct sd_lun *un, int *is_enabled)
9772 {
9773 	struct mode_caching	*mode_caching_page;
9774 	uchar_t			*header;
9775 	size_t			buflen;
9776 	int			hdrlen;
9777 	int			bd_len;
9778 	int			rval = 0;
9779 
9780 	ASSERT(un != NULL);
9781 	ASSERT(is_enabled != NULL);
9782 
9783 	/* in case of error, flag as enabled */
9784 	*is_enabled = TRUE;
9785 
9786 	/*
9787 	 * Do a test unit ready, otherwise a mode sense may not work if this
9788 	 * is the first command sent to the device after boot.
9789 	 */
9790 	(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
9791 
9792 	if (un->un_f_cfg_is_atapi == TRUE) {
9793 		hdrlen = MODE_HEADER_LENGTH_GRP2;
9794 	} else {
9795 		hdrlen = MODE_HEADER_LENGTH;
9796 	}
9797 
9798 	/*
9799 	 * Allocate memory for the retrieved mode page and its headers.  Set
9800 	 * a pointer to the page itself.
9801 	 */
9802 	buflen = hdrlen + MODE_BLK_DESC_LENGTH + sizeof (struct mode_caching);
9803 	header = kmem_zalloc(buflen, KM_SLEEP);
9804 
9805 	/* Get the information from the device. */
9806 	if (un->un_f_cfg_is_atapi == TRUE) {
9807 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, header, buflen,
9808 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9809 	} else {
9810 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, header, buflen,
9811 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9812 	}
9813 	if (rval != 0) {
9814 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
9815 		    "sd_get_write_cache_enabled: Mode Sense Failed\n");
9816 		kmem_free(header, buflen);
9817 		return (rval);
9818 	}
9819 
9820 	/*
9821 	 * Determine size of Block Descriptors in order to locate
9822 	 * the mode page data. ATAPI devices return 0, SCSI devices
9823 	 * should return MODE_BLK_DESC_LENGTH.
9824 	 */
9825 	if (un->un_f_cfg_is_atapi == TRUE) {
9826 		struct mode_header_grp2	*mhp;
9827 		mhp	= (struct mode_header_grp2 *)header;
9828 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
9829 	} else {
9830 		bd_len  = ((struct mode_header *)header)->bdesc_length;
9831 	}
9832 
9833 	if (bd_len > MODE_BLK_DESC_LENGTH) {
9834 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
9835 		    "sd_get_write_cache_enabled: Mode Sense returned invalid "
9836 		    "block descriptor length\n");
9837 		kmem_free(header, buflen);
9838 		return (EIO);
9839 	}
9840 
9841 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
9842 	*is_enabled = mode_caching_page->wce;
9843 
9844 	kmem_free(header, buflen);
9845 	return (0);
9846 }
9847 
9848 
9849 /*
9850  *    Function: sd_make_device
9851  *
9852  * Description: Utility routine to return the Solaris device number from
9853  *		the data in the device's dev_info structure.
9854  *
9855  * Return Code: The Solaris device number
9856  *
9857  *     Context: Any
9858  */
9859 
9860 static dev_t
9861 sd_make_device(dev_info_t *devi)
9862 {
9863 	return (makedevice(ddi_name_to_major(ddi_get_name(devi)),
9864 	    ddi_get_instance(devi) << SDUNIT_SHIFT));
9865 }
9866 
9867 
9868 /*
9869  *    Function: sd_pm_entry
9870  *
9871  * Description: Called at the start of a new command to manage power
9872  *		and busy status of a device. This includes determining whether
9873  *		the current power state of the device is sufficient for
9874  *		performing the command or whether it must be changed.
9875  *		The PM framework is notified appropriately.
9876  *		Only with a return status of DDI_SUCCESS will the
9877  *		component be busy to the framework.
9878  *
9879  *		All callers of sd_pm_entry must check the return status
9880  *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
9881  *		of DDI_FAILURE indicates the device failed to power up.
9882  *		In this case un_pm_count has been adjusted so the result
9883  *		on exit is still powered down, ie. count is less than 0.
9884  *		Calling sd_pm_exit with this count value hits an ASSERT.
9885  *
9886  * Return Code: DDI_SUCCESS or DDI_FAILURE
9887  *
9888  *     Context: Kernel thread context.
9889  */
9890 
9891 static int
9892 sd_pm_entry(struct sd_lun *un)
9893 {
9894 	int return_status = DDI_SUCCESS;
9895 
9896 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9897 	ASSERT(!mutex_owned(&un->un_pm_mutex));
9898 
9899 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
9900 
9901 	if (un->un_f_pm_is_enabled == FALSE) {
9902 		SD_TRACE(SD_LOG_IO_PM, un,
9903 		    "sd_pm_entry: exiting, PM not enabled\n");
9904 		return (return_status);
9905 	}
9906 
9907 	/*
9908 	 * Just increment a counter if PM is enabled. On the transition from
9909 	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
9910 	 * the count with each IO and mark the device as idle when the count
9911 	 * hits 0.
9912 	 *
9913 	 * If the count is less than 0 the device is powered down. If a powered
9914 	 * down device is successfully powered up then the count must be
9915 	 * incremented to reflect the power up. Note that it'll get incremented
9916 	 * a second time to become busy.
9917 	 *
9918 	 * Because the following has the potential to change the device state
9919 	 * and must release the un_pm_mutex to do so, only one thread can be
9920 	 * allowed through at a time.
9921 	 */
9922 
9923 	mutex_enter(&un->un_pm_mutex);
9924 	while (un->un_pm_busy == TRUE) {
9925 		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
9926 	}
9927 	un->un_pm_busy = TRUE;
9928 
9929 	if (un->un_pm_count < 1) {
9930 
9931 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
9932 
9933 		/*
9934 		 * Indicate we are now busy so the framework won't attempt to
9935 		 * power down the device. This call will only fail if either
9936 		 * we passed a bad component number or the device has no
9937 		 * components. Neither of these should ever happen.
9938 		 */
9939 		mutex_exit(&un->un_pm_mutex);
9940 		return_status = pm_busy_component(SD_DEVINFO(un), 0);
9941 		ASSERT(return_status == DDI_SUCCESS);
9942 
9943 		mutex_enter(&un->un_pm_mutex);
9944 
9945 		if (un->un_pm_count < 0) {
9946 			mutex_exit(&un->un_pm_mutex);
9947 
9948 			SD_TRACE(SD_LOG_IO_PM, un,
9949 			    "sd_pm_entry: power up component\n");
9950 
9951 			/*
9952 			 * pm_raise_power will cause sdpower to be called
9953 			 * which brings the device power level to the
9954 			 * desired state, ON in this case. If successful,
9955 			 * un_pm_count and un_power_level will be updated
9956 			 * appropriately.
9957 			 */
9958 			return_status = pm_raise_power(SD_DEVINFO(un), 0,
9959 			    SD_SPINDLE_ON);
9960 
9961 			mutex_enter(&un->un_pm_mutex);
9962 
9963 			if (return_status != DDI_SUCCESS) {
9964 				/*
9965 				 * Power up failed.
9966 				 * Idle the device and adjust the count
9967 				 * so the result on exit is that we're
9968 				 * still powered down, ie. count is less than 0.
9969 				 */
9970 				SD_TRACE(SD_LOG_IO_PM, un,
9971 				    "sd_pm_entry: power up failed,"
9972 				    " idle the component\n");
9973 
9974 				(void) pm_idle_component(SD_DEVINFO(un), 0);
9975 				un->un_pm_count--;
9976 			} else {
9977 				/*
9978 				 * Device is powered up, verify the
9979 				 * count is non-negative.
9980 				 * This is debug only.
9981 				 */
9982 				ASSERT(un->un_pm_count == 0);
9983 			}
9984 		}
9985 
9986 		if (return_status == DDI_SUCCESS) {
9987 			/*
9988 			 * For performance, now that the device has been tagged
9989 			 * as busy, and it's known to be powered up, update the
9990 			 * chain types to use jump tables that do not include
9991 			 * pm. This significantly lowers the overhead and
9992 			 * therefore improves performance.
9993 			 */
9994 
9995 			mutex_exit(&un->un_pm_mutex);
9996 			mutex_enter(SD_MUTEX(un));
9997 			SD_TRACE(SD_LOG_IO_PM, un,
9998 			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
9999 			    un->un_uscsi_chain_type);
10000 
10001 			if (un->un_f_non_devbsize_supported) {
10002 				un->un_buf_chain_type =
10003 				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
10004 			} else {
10005 				un->un_buf_chain_type =
10006 				    SD_CHAIN_INFO_DISK_NO_PM;
10007 			}
10008 			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
10009 
10010 			SD_TRACE(SD_LOG_IO_PM, un,
10011 			    "             changed  uscsi_chain_type to   %d\n",
10012 			    un->un_uscsi_chain_type);
10013 			mutex_exit(SD_MUTEX(un));
10014 			mutex_enter(&un->un_pm_mutex);
10015 
10016 			if (un->un_pm_idle_timeid == NULL) {
10017 				/* 300 ms. */
10018 				un->un_pm_idle_timeid =
10019 				    timeout(sd_pm_idletimeout_handler, un,
10020 				    (drv_usectohz((clock_t)300000)));
10021 				/*
10022 				 * Include an extra call to busy which keeps the
10023 				 * device busy with-respect-to the PM layer
10024 				 * until the timer fires, at which time it'll
10025 				 * get the extra idle call.
10026 				 */
10027 				(void) pm_busy_component(SD_DEVINFO(un), 0);
10028 			}
10029 		}
10030 	}
10031 	un->un_pm_busy = FALSE;
10032 	/* Next... */
10033 	cv_signal(&un->un_pm_busy_cv);
10034 
10035 	un->un_pm_count++;
10036 
10037 	SD_TRACE(SD_LOG_IO_PM, un,
10038 	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
10039 
10040 	mutex_exit(&un->un_pm_mutex);
10041 
10042 	return (return_status);
10043 }
10044 
10045 
10046 /*
10047  *    Function: sd_pm_exit
10048  *
10049  * Description: Called at the completion of a command to manage busy
10050  *		status for the device. If the device becomes idle the
10051  *		PM framework is notified.
10052  *
10053  *     Context: Kernel thread context
10054  */
10055 
10056 static void
10057 sd_pm_exit(struct sd_lun *un)
10058 {
10059 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10060 	ASSERT(!mutex_owned(&un->un_pm_mutex));
10061 
10062 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
10063 
10064 	/*
10065 	 * After attach the following flag is only read, so don't
10066 	 * take the penalty of acquiring a mutex for it.
10067 	 */
10068 	if (un->un_f_pm_is_enabled == TRUE) {
10069 
10070 		mutex_enter(&un->un_pm_mutex);
10071 		un->un_pm_count--;
10072 
10073 		SD_TRACE(SD_LOG_IO_PM, un,
10074 		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
10075 
10076 		ASSERT(un->un_pm_count >= 0);
10077 		if (un->un_pm_count == 0) {
10078 			mutex_exit(&un->un_pm_mutex);
10079 
10080 			SD_TRACE(SD_LOG_IO_PM, un,
10081 			    "sd_pm_exit: idle component\n");
10082 
10083 			(void) pm_idle_component(SD_DEVINFO(un), 0);
10084 
10085 		} else {
10086 			mutex_exit(&un->un_pm_mutex);
10087 		}
10088 	}
10089 
10090 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
10091 }
10092 
10093 
10094 /*
10095  *    Function: sdopen
10096  *
10097  * Description: Driver's open(9e) entry point function.
10098  *
10099  *   Arguments: dev_i   - pointer to device number
10100  *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
10101  *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10102  *		cred_p  - user credential pointer
10103  *
10104  * Return Code: EINVAL
10105  *		ENXIO
10106  *		EIO
10107  *		EROFS
10108  *		EBUSY
10109  *
10110  *     Context: Kernel thread context
10111  */
10112 /* ARGSUSED */
10113 static int
10114 sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
10115 {
10116 	struct sd_lun	*un;
10117 	int		nodelay;
10118 	int		part;
10119 	uint64_t	partmask;
10120 	int		instance;
10121 	dev_t		dev;
10122 	int		rval = EIO;
10123 
10124 	/* Validate the open type */
10125 	if (otyp >= OTYPCNT) {
10126 		return (EINVAL);
10127 	}
10128 
10129 	dev = *dev_p;
10130 	instance = SDUNIT(dev);
10131 	mutex_enter(&sd_detach_mutex);
10132 
10133 	/*
10134 	 * Fail the open if there is no softstate for the instance, or
10135 	 * if another thread somewhere is trying to detach the instance.
10136 	 */
10137 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
10138 	    (un->un_detach_count != 0)) {
10139 		mutex_exit(&sd_detach_mutex);
10140 		/*
10141 		 * The probe cache only needs to be cleared when open (9e) fails
10142 		 * with ENXIO (4238046).
10143 		 */
10144 		/*
10145 		 * un-conditionally clearing probe cache is ok with
10146 		 * separate sd/ssd binaries
10147 		 * x86 platform can be an issue with both parallel
10148 		 * and fibre in 1 binary
10149 		 */
10150 		sd_scsi_clear_probe_cache();
10151 		return (ENXIO);
10152 	}
10153 
10154 	/*
10155 	 * The un_layer_count is to prevent another thread in specfs from
10156 	 * trying to detach the instance, which can happen when we are
10157 	 * called from a higher-layer driver instead of thru specfs.
10158 	 * This will not be needed when DDI provides a layered driver
10159 	 * interface that allows specfs to know that an instance is in
10160 	 * use by a layered driver & should not be detached.
10161 	 *
10162 	 * Note: the semantics for layered driver opens are exactly one
10163 	 * close for every open.
10164 	 */
10165 	if (otyp == OTYP_LYR) {
10166 		un->un_layer_count++;
10167 	}
10168 
10169 	/*
10170 	 * Keep a count of the current # of opens in progress. This is because
10171 	 * some layered drivers try to call us as a regular open. This can
10172 	 * cause problems that we cannot prevent, however by keeping this count
10173 	 * we can at least keep our open and detach routines from racing against
10174 	 * each other under such conditions.
10175 	 */
10176 	un->un_opens_in_progress++;
10177 	mutex_exit(&sd_detach_mutex);
10178 
10179 	nodelay  = (flag & (FNDELAY | FNONBLOCK));
10180 	part	 = SDPART(dev);
10181 	partmask = 1 << part;
10182 
10183 	/*
10184 	 * We use a semaphore here in order to serialize
10185 	 * open and close requests on the device.
10186 	 */
10187 	sema_p(&un->un_semoclose);
10188 
10189 	mutex_enter(SD_MUTEX(un));
10190 
10191 	/*
10192 	 * All device accesses go thru sdstrategy() where we check
10193 	 * on suspend status but there could be a scsi_poll command,
10194 	 * which bypasses sdstrategy(), so we need to check pm
10195 	 * status.
10196 	 */
10197 
10198 	if (!nodelay) {
10199 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10200 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10201 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10202 		}
10203 
10204 		mutex_exit(SD_MUTEX(un));
10205 		if (sd_pm_entry(un) != DDI_SUCCESS) {
10206 			rval = EIO;
10207 			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
10208 			    "sdopen: sd_pm_entry failed\n");
10209 			goto open_failed_with_pm;
10210 		}
10211 		mutex_enter(SD_MUTEX(un));
10212 	}
10213 
10214 	/* check for previous exclusive open */
10215 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
10216 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10217 	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
10218 	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
10219 
10220 	if (un->un_exclopen & (partmask)) {
10221 		goto excl_open_fail;
10222 	}
10223 
10224 	if (flag & FEXCL) {
10225 		int i;
10226 		if (un->un_ocmap.lyropen[part]) {
10227 			goto excl_open_fail;
10228 		}
10229 		for (i = 0; i < (OTYPCNT - 1); i++) {
10230 			if (un->un_ocmap.regopen[i] & (partmask)) {
10231 				goto excl_open_fail;
10232 			}
10233 		}
10234 	}
10235 
10236 	/*
10237 	 * Check the write permission if this is a removable media device,
10238 	 * NDELAY has not been set, and writable permission is requested.
10239 	 *
10240 	 * Note: If NDELAY was set and this is write-protected media the WRITE
10241 	 * attempt will fail with EIO as part of the I/O processing. This is a
10242 	 * more permissive implementation that allows the open to succeed and
10243 	 * WRITE attempts to fail when appropriate.
10244 	 */
10245 	if (un->un_f_chk_wp_open) {
10246 		if ((flag & FWRITE) && (!nodelay)) {
10247 			mutex_exit(SD_MUTEX(un));
10248 			/*
10249 			 * Defer the check for write permission on writable
10250 			 * DVD drive till sdstrategy and will not fail open even
10251 			 * if FWRITE is set as the device can be writable
10252 			 * depending upon the media and the media can change
10253 			 * after the call to open().
10254 			 */
10255 			if (un->un_f_dvdram_writable_device == FALSE) {
10256 				if (ISCD(un) || sr_check_wp(dev)) {
10257 				rval = EROFS;
10258 				mutex_enter(SD_MUTEX(un));
10259 				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10260 				    "write to cd or write protected media\n");
10261 				goto open_fail;
10262 				}
10263 			}
10264 			mutex_enter(SD_MUTEX(un));
10265 		}
10266 	}
10267 
10268 	/*
10269 	 * If opening in NDELAY/NONBLOCK mode, just return.
10270 	 * Check if disk is ready and has a valid geometry later.
10271 	 */
10272 	if (!nodelay) {
10273 		mutex_exit(SD_MUTEX(un));
10274 		rval = sd_ready_and_valid(un);
10275 		mutex_enter(SD_MUTEX(un));
10276 		/*
10277 		 * Fail if device is not ready or if the number of disk
10278 		 * blocks is zero or negative for non CD devices.
10279 		 */
10280 		if ((rval != SD_READY_VALID) ||
10281 		    (!ISCD(un) && un->un_map[part].dkl_nblk <= 0)) {
10282 			rval = un->un_f_has_removable_media ? ENXIO : EIO;
10283 			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10284 			    "device not ready or invalid disk block value\n");
10285 			goto open_fail;
10286 		}
10287 #if defined(__i386) || defined(__amd64)
10288 	} else {
10289 		uchar_t *cp;
10290 		/*
10291 		 * x86 requires special nodelay handling, so that p0 is
10292 		 * always defined and accessible.
10293 		 * Invalidate geometry only if device is not already open.
10294 		 */
10295 		cp = &un->un_ocmap.chkd[0];
10296 		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10297 			if (*cp != (uchar_t)0) {
10298 			    break;
10299 			}
10300 			cp++;
10301 		}
10302 		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10303 			un->un_f_geometry_is_valid = FALSE;
10304 		}
10305 
10306 #endif
10307 	}
10308 
10309 	if (otyp == OTYP_LYR) {
10310 		un->un_ocmap.lyropen[part]++;
10311 	} else {
10312 		un->un_ocmap.regopen[otyp] |= partmask;
10313 	}
10314 
10315 	/* Set up open and exclusive open flags */
10316 	if (flag & FEXCL) {
10317 		un->un_exclopen |= (partmask);
10318 	}
10319 
10320 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10321 	    "open of part %d type %d\n", part, otyp);
10322 
10323 	mutex_exit(SD_MUTEX(un));
10324 	if (!nodelay) {
10325 		sd_pm_exit(un);
10326 	}
10327 
10328 	sema_v(&un->un_semoclose);
10329 
10330 	mutex_enter(&sd_detach_mutex);
10331 	un->un_opens_in_progress--;
10332 	mutex_exit(&sd_detach_mutex);
10333 
10334 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
10335 	return (DDI_SUCCESS);
10336 
10337 excl_open_fail:
10338 	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
10339 	rval = EBUSY;
10340 
10341 open_fail:
10342 	mutex_exit(SD_MUTEX(un));
10343 
10344 	/*
10345 	 * On a failed open we must exit the pm management.
10346 	 */
10347 	if (!nodelay) {
10348 		sd_pm_exit(un);
10349 	}
10350 open_failed_with_pm:
10351 	sema_v(&un->un_semoclose);
10352 
10353 	mutex_enter(&sd_detach_mutex);
10354 	un->un_opens_in_progress--;
10355 	if (otyp == OTYP_LYR) {
10356 		un->un_layer_count--;
10357 	}
10358 	mutex_exit(&sd_detach_mutex);
10359 
10360 	return (rval);
10361 }
10362 
10363 
10364 /*
10365  *    Function: sdclose
10366  *
10367  * Description: Driver's close(9e) entry point function.
10368  *
10369  *   Arguments: dev    - device number
10370  *		flag   - file status flag, informational only
10371  *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10372  *		cred_p - user credential pointer
10373  *
10374  * Return Code: ENXIO
10375  *
10376  *     Context: Kernel thread context
10377  */
10378 /* ARGSUSED */
10379 static int
10380 sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
10381 {
10382 	struct sd_lun	*un;
10383 	uchar_t		*cp;
10384 	int		part;
10385 	int		nodelay;
10386 	int		rval = 0;
10387 
10388 	/* Validate the open type */
10389 	if (otyp >= OTYPCNT) {
10390 		return (ENXIO);
10391 	}
10392 
10393 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10394 		return (ENXIO);
10395 	}
10396 
10397 	part = SDPART(dev);
10398 	nodelay = flag & (FNDELAY | FNONBLOCK);
10399 
10400 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10401 	    "sdclose: close of part %d type %d\n", part, otyp);
10402 
10403 	/*
10404 	 * We use a semaphore here in order to serialize
10405 	 * open and close requests on the device.
10406 	 */
10407 	sema_p(&un->un_semoclose);
10408 
10409 	mutex_enter(SD_MUTEX(un));
10410 
10411 	/* Don't proceed if power is being changed. */
10412 	while (un->un_state == SD_STATE_PM_CHANGING) {
10413 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10414 	}
10415 
10416 	if (un->un_exclopen & (1 << part)) {
10417 		un->un_exclopen &= ~(1 << part);
10418 	}
10419 
10420 	/* Update the open partition map */
10421 	if (otyp == OTYP_LYR) {
10422 		un->un_ocmap.lyropen[part] -= 1;
10423 	} else {
10424 		un->un_ocmap.regopen[otyp] &= ~(1 << part);
10425 	}
10426 
10427 	cp = &un->un_ocmap.chkd[0];
10428 	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10429 		if (*cp != NULL) {
10430 			break;
10431 		}
10432 		cp++;
10433 	}
10434 
10435 	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10436 		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
10437 
10438 		/*
10439 		 * We avoid persistance upon the last close, and set
10440 		 * the throttle back to the maximum.
10441 		 */
10442 		un->un_throttle = un->un_saved_throttle;
10443 
10444 		if (un->un_state == SD_STATE_OFFLINE) {
10445 			if (un->un_f_is_fibre == FALSE) {
10446 				scsi_log(SD_DEVINFO(un), sd_label,
10447 					CE_WARN, "offline\n");
10448 			}
10449 			un->un_f_geometry_is_valid = FALSE;
10450 
10451 		} else {
10452 			/*
10453 			 * Flush any outstanding writes in NVRAM cache.
10454 			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
10455 			 * cmd, it may not work for non-Pluto devices.
10456 			 * SYNCHRONIZE CACHE is not required for removables,
10457 			 * except DVD-RAM drives.
10458 			 *
10459 			 * Also note: because SYNCHRONIZE CACHE is currently
10460 			 * the only command issued here that requires the
10461 			 * drive be powered up, only do the power up before
10462 			 * sending the Sync Cache command. If additional
10463 			 * commands are added which require a powered up
10464 			 * drive, the following sequence may have to change.
10465 			 *
10466 			 * And finally, note that parallel SCSI on SPARC
10467 			 * only issues a Sync Cache to DVD-RAM, a newly
10468 			 * supported device.
10469 			 */
10470 #if defined(__i386) || defined(__amd64)
10471 			if (un->un_f_sync_cache_supported ||
10472 			    un->un_f_dvdram_writable_device == TRUE) {
10473 #else
10474 			if (un->un_f_dvdram_writable_device == TRUE) {
10475 #endif
10476 				mutex_exit(SD_MUTEX(un));
10477 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10478 					rval =
10479 					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
10480 					    NULL);
10481 					/* ignore error if not supported */
10482 					if (rval == ENOTSUP) {
10483 						rval = 0;
10484 					} else if (rval != 0) {
10485 						rval = EIO;
10486 					}
10487 					sd_pm_exit(un);
10488 				} else {
10489 					rval = EIO;
10490 				}
10491 				mutex_enter(SD_MUTEX(un));
10492 			}
10493 
10494 			/*
10495 			 * For devices which supports DOOR_LOCK, send an ALLOW
10496 			 * MEDIA REMOVAL command, but don't get upset if it
10497 			 * fails. We need to raise the power of the drive before
10498 			 * we can call sd_send_scsi_DOORLOCK()
10499 			 */
10500 			if (un->un_f_doorlock_supported) {
10501 				mutex_exit(SD_MUTEX(un));
10502 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10503 					rval = sd_send_scsi_DOORLOCK(un,
10504 					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
10505 
10506 					sd_pm_exit(un);
10507 					if (ISCD(un) && (rval != 0) &&
10508 					    (nodelay != 0)) {
10509 						rval = ENXIO;
10510 					}
10511 				} else {
10512 					rval = EIO;
10513 				}
10514 				mutex_enter(SD_MUTEX(un));
10515 			}
10516 
10517 			/*
10518 			 * If a device has removable media, invalidate all
10519 			 * parameters related to media, such as geometry,
10520 			 * blocksize, and blockcount.
10521 			 */
10522 			if (un->un_f_has_removable_media) {
10523 				sr_ejected(un);
10524 			}
10525 
10526 			/*
10527 			 * Destroy the cache (if it exists) which was
10528 			 * allocated for the write maps since this is
10529 			 * the last close for this media.
10530 			 */
10531 			if (un->un_wm_cache) {
10532 				/*
10533 				 * Check if there are pending commands.
10534 				 * and if there are give a warning and
10535 				 * do not destroy the cache.
10536 				 */
10537 				if (un->un_ncmds_in_driver > 0) {
10538 					scsi_log(SD_DEVINFO(un),
10539 					    sd_label, CE_WARN,
10540 					    "Unable to clean up memory "
10541 					    "because of pending I/O\n");
10542 				} else {
10543 					kmem_cache_destroy(
10544 					    un->un_wm_cache);
10545 					un->un_wm_cache = NULL;
10546 				}
10547 			}
10548 		}
10549 	}
10550 
10551 	mutex_exit(SD_MUTEX(un));
10552 	sema_v(&un->un_semoclose);
10553 
10554 	if (otyp == OTYP_LYR) {
10555 		mutex_enter(&sd_detach_mutex);
10556 		/*
10557 		 * The detach routine may run when the layer count
10558 		 * drops to zero.
10559 		 */
10560 		un->un_layer_count--;
10561 		mutex_exit(&sd_detach_mutex);
10562 	}
10563 
10564 	return (rval);
10565 }
10566 
10567 
10568 /*
10569  *    Function: sd_ready_and_valid
10570  *
10571  * Description: Test if device is ready and has a valid geometry.
10572  *
10573  *   Arguments: dev - device number
10574  *		un  - driver soft state (unit) structure
10575  *
10576  * Return Code: SD_READY_VALID		ready and valid label
10577  *		SD_READY_NOT_VALID	ready, geom ops never applicable
10578  *		SD_NOT_READY_VALID	not ready, no label
10579  *
10580  *     Context: Never called at interrupt context.
10581  */
10582 
10583 static int
10584 sd_ready_and_valid(struct sd_lun *un)
10585 {
10586 	struct sd_errstats	*stp;
10587 	uint64_t		capacity;
10588 	uint_t			lbasize;
10589 	int			rval = SD_READY_VALID;
10590 	char			name_str[48];
10591 
10592 	ASSERT(un != NULL);
10593 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10594 
10595 	mutex_enter(SD_MUTEX(un));
10596 	/*
10597 	 * If a device has removable media, we must check if media is
10598 	 * ready when checking if this device is ready and valid.
10599 	 */
10600 	if (un->un_f_has_removable_media) {
10601 		mutex_exit(SD_MUTEX(un));
10602 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
10603 			rval = SD_NOT_READY_VALID;
10604 			mutex_enter(SD_MUTEX(un));
10605 			goto done;
10606 		}
10607 
10608 		mutex_enter(SD_MUTEX(un));
10609 		if ((un->un_f_geometry_is_valid == FALSE) ||
10610 		    (un->un_f_blockcount_is_valid == FALSE) ||
10611 		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
10612 
10613 			/* capacity has to be read every open. */
10614 			mutex_exit(SD_MUTEX(un));
10615 			if (sd_send_scsi_READ_CAPACITY(un, &capacity,
10616 			    &lbasize, SD_PATH_DIRECT) != 0) {
10617 				mutex_enter(SD_MUTEX(un));
10618 				un->un_f_geometry_is_valid = FALSE;
10619 				rval = SD_NOT_READY_VALID;
10620 				goto done;
10621 			} else {
10622 				mutex_enter(SD_MUTEX(un));
10623 				sd_update_block_info(un, lbasize, capacity);
10624 			}
10625 		}
10626 
10627 		/*
10628 		 * Check if the media in the device is writable or not.
10629 		 */
10630 		if ((un->un_f_geometry_is_valid == FALSE) && ISCD(un)) {
10631 			sd_check_for_writable_cd(un);
10632 		}
10633 
10634 	} else {
10635 		/*
10636 		 * Do a test unit ready to clear any unit attention from non-cd
10637 		 * devices.
10638 		 */
10639 		mutex_exit(SD_MUTEX(un));
10640 		(void) sd_send_scsi_TEST_UNIT_READY(un, 0);
10641 		mutex_enter(SD_MUTEX(un));
10642 	}
10643 
10644 
10645 	/*
10646 	 * If this is a non 512 block device, allocate space for
10647 	 * the wmap cache. This is being done here since every time
10648 	 * a media is changed this routine will be called and the
10649 	 * block size is a function of media rather than device.
10650 	 */
10651 	if (un->un_f_non_devbsize_supported && NOT_DEVBSIZE(un)) {
10652 		if (!(un->un_wm_cache)) {
10653 			(void) snprintf(name_str, sizeof (name_str),
10654 			    "%s%d_cache",
10655 			    ddi_driver_name(SD_DEVINFO(un)),
10656 			    ddi_get_instance(SD_DEVINFO(un)));
10657 			un->un_wm_cache = kmem_cache_create(
10658 			    name_str, sizeof (struct sd_w_map),
10659 			    8, sd_wm_cache_constructor,
10660 			    sd_wm_cache_destructor, NULL,
10661 			    (void *)un, NULL, 0);
10662 			if (!(un->un_wm_cache)) {
10663 					rval = ENOMEM;
10664 					goto done;
10665 			}
10666 		}
10667 	}
10668 
10669 	if (un->un_state == SD_STATE_NORMAL) {
10670 		/*
10671 		 * If the target is not yet ready here (defined by a TUR
10672 		 * failure), invalidate the geometry and print an 'offline'
10673 		 * message. This is a legacy message, as the state of the
10674 		 * target is not actually changed to SD_STATE_OFFLINE.
10675 		 *
10676 		 * If the TUR fails for EACCES (Reservation Conflict), it
10677 		 * means there actually is nothing wrong with the target that
10678 		 * would require invalidating the geometry, so continue in
10679 		 * that case as if the TUR was successful.
10680 		 */
10681 		int err;
10682 
10683 		mutex_exit(SD_MUTEX(un));
10684 		err = sd_send_scsi_TEST_UNIT_READY(un, 0);
10685 		mutex_enter(SD_MUTEX(un));
10686 
10687 		if ((err != 0) && (err != EACCES)) {
10688 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10689 			    "offline\n");
10690 			un->un_f_geometry_is_valid = FALSE;
10691 			rval = SD_NOT_READY_VALID;
10692 			goto done;
10693 		}
10694 	}
10695 
10696 	if (un->un_f_format_in_progress == FALSE) {
10697 		/*
10698 		 * Note: sd_validate_geometry may return TRUE, but that does
10699 		 * not necessarily mean un_f_geometry_is_valid == TRUE!
10700 		 */
10701 		rval = sd_validate_geometry(un, SD_PATH_DIRECT);
10702 		if (rval == ENOTSUP) {
10703 			if (un->un_f_geometry_is_valid == TRUE)
10704 				rval = 0;
10705 			else {
10706 				rval = SD_READY_NOT_VALID;
10707 				goto done;
10708 			}
10709 		}
10710 		if (rval != 0) {
10711 			/*
10712 			 * We don't check the validity of geometry for
10713 			 * CDROMs. Also we assume we have a good label
10714 			 * even if sd_validate_geometry returned ENOMEM.
10715 			 */
10716 			if (!ISCD(un) && rval != ENOMEM) {
10717 				rval = SD_NOT_READY_VALID;
10718 				goto done;
10719 			}
10720 		}
10721 	}
10722 
10723 #ifdef DOESNTWORK /* on eliteII, see 1118607 */
10724 	/*
10725 	 * check to see if this disk is write protected, if it is and we have
10726 	 * not set read-only, then fail
10727 	 */
10728 	if ((flag & FWRITE) && (sr_check_wp(dev))) {
10729 		New_state(un, SD_STATE_CLOSED);
10730 		goto done;
10731 	}
10732 #endif
10733 
10734 	/*
10735 	 * If this device supports DOOR_LOCK command, try and send
10736 	 * this command to PREVENT MEDIA REMOVAL, but don't get upset
10737 	 * if it fails. For a CD, however, it is an error
10738 	 */
10739 	if (un->un_f_doorlock_supported) {
10740 		mutex_exit(SD_MUTEX(un));
10741 		if ((sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
10742 		    SD_PATH_DIRECT) != 0) && ISCD(un)) {
10743 			rval = SD_NOT_READY_VALID;
10744 			mutex_enter(SD_MUTEX(un));
10745 			goto done;
10746 		}
10747 		mutex_enter(SD_MUTEX(un));
10748 	}
10749 
10750 	/* The state has changed, inform the media watch routines */
10751 	un->un_mediastate = DKIO_INSERTED;
10752 	cv_broadcast(&un->un_state_cv);
10753 	rval = SD_READY_VALID;
10754 
10755 done:
10756 
10757 	/*
10758 	 * Initialize the capacity kstat value, if no media previously
10759 	 * (capacity kstat is 0) and a media has been inserted
10760 	 * (un_blockcount > 0).
10761 	 */
10762 	if (un->un_errstats != NULL) {
10763 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
10764 		if ((stp->sd_capacity.value.ui64 == 0) &&
10765 		    (un->un_f_blockcount_is_valid == TRUE)) {
10766 			stp->sd_capacity.value.ui64 =
10767 			    (uint64_t)((uint64_t)un->un_blockcount *
10768 			    un->un_sys_blocksize);
10769 		}
10770 	}
10771 
10772 	mutex_exit(SD_MUTEX(un));
10773 	return (rval);
10774 }
10775 
10776 
10777 /*
10778  *    Function: sdmin
10779  *
10780  * Description: Routine to limit the size of a data transfer. Used in
10781  *		conjunction with physio(9F).
10782  *
10783  *   Arguments: bp - pointer to the indicated buf(9S) struct.
10784  *
10785  *     Context: Kernel thread context.
10786  */
10787 
10788 static void
10789 sdmin(struct buf *bp)
10790 {
10791 	struct sd_lun	*un;
10792 	int		instance;
10793 
10794 	instance = SDUNIT(bp->b_edev);
10795 
10796 	un = ddi_get_soft_state(sd_state, instance);
10797 	ASSERT(un != NULL);
10798 
10799 	if (bp->b_bcount > un->un_max_xfer_size) {
10800 		bp->b_bcount = un->un_max_xfer_size;
10801 	}
10802 }
10803 
10804 
10805 /*
10806  *    Function: sdread
10807  *
10808  * Description: Driver's read(9e) entry point function.
10809  *
10810  *   Arguments: dev   - device number
10811  *		uio   - structure pointer describing where data is to be stored
10812  *			in user's space
10813  *		cred_p  - user credential pointer
10814  *
10815  * Return Code: ENXIO
10816  *		EIO
10817  *		EINVAL
10818  *		value returned by physio
10819  *
10820  *     Context: Kernel thread context.
10821  */
10822 /* ARGSUSED */
10823 static int
10824 sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
10825 {
10826 	struct sd_lun	*un = NULL;
10827 	int		secmask;
10828 	int		err;
10829 
10830 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10831 		return (ENXIO);
10832 	}
10833 
10834 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10835 
10836 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
10837 		mutex_enter(SD_MUTEX(un));
10838 		/*
10839 		 * Because the call to sd_ready_and_valid will issue I/O we
10840 		 * must wait here if either the device is suspended or
10841 		 * if it's power level is changing.
10842 		 */
10843 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10844 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10845 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10846 		}
10847 		un->un_ncmds_in_driver++;
10848 		mutex_exit(SD_MUTEX(un));
10849 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
10850 			mutex_enter(SD_MUTEX(un));
10851 			un->un_ncmds_in_driver--;
10852 			ASSERT(un->un_ncmds_in_driver >= 0);
10853 			mutex_exit(SD_MUTEX(un));
10854 			return (EIO);
10855 		}
10856 		mutex_enter(SD_MUTEX(un));
10857 		un->un_ncmds_in_driver--;
10858 		ASSERT(un->un_ncmds_in_driver >= 0);
10859 		mutex_exit(SD_MUTEX(un));
10860 	}
10861 
10862 	/*
10863 	 * Read requests are restricted to multiples of the system block size.
10864 	 */
10865 	secmask = un->un_sys_blocksize - 1;
10866 
10867 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10868 		SD_ERROR(SD_LOG_READ_WRITE, un,
10869 		    "sdread: file offset not modulo %d\n",
10870 		    un->un_sys_blocksize);
10871 		err = EINVAL;
10872 	} else if (uio->uio_iov->iov_len & (secmask)) {
10873 		SD_ERROR(SD_LOG_READ_WRITE, un,
10874 		    "sdread: transfer length not modulo %d\n",
10875 		    un->un_sys_blocksize);
10876 		err = EINVAL;
10877 	} else {
10878 		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
10879 	}
10880 	return (err);
10881 }
10882 
10883 
10884 /*
10885  *    Function: sdwrite
10886  *
10887  * Description: Driver's write(9e) entry point function.
10888  *
10889  *   Arguments: dev   - device number
10890  *		uio   - structure pointer describing where data is stored in
10891  *			user's space
10892  *		cred_p  - user credential pointer
10893  *
10894  * Return Code: ENXIO
10895  *		EIO
10896  *		EINVAL
10897  *		value returned by physio
10898  *
10899  *     Context: Kernel thread context.
10900  */
10901 /* ARGSUSED */
10902 static int
10903 sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
10904 {
10905 	struct sd_lun	*un = NULL;
10906 	int		secmask;
10907 	int		err;
10908 
10909 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10910 		return (ENXIO);
10911 	}
10912 
10913 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10914 
10915 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
10916 		mutex_enter(SD_MUTEX(un));
10917 		/*
10918 		 * Because the call to sd_ready_and_valid will issue I/O we
10919 		 * must wait here if either the device is suspended or
10920 		 * if it's power level is changing.
10921 		 */
10922 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10923 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10924 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10925 		}
10926 		un->un_ncmds_in_driver++;
10927 		mutex_exit(SD_MUTEX(un));
10928 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
10929 			mutex_enter(SD_MUTEX(un));
10930 			un->un_ncmds_in_driver--;
10931 			ASSERT(un->un_ncmds_in_driver >= 0);
10932 			mutex_exit(SD_MUTEX(un));
10933 			return (EIO);
10934 		}
10935 		mutex_enter(SD_MUTEX(un));
10936 		un->un_ncmds_in_driver--;
10937 		ASSERT(un->un_ncmds_in_driver >= 0);
10938 		mutex_exit(SD_MUTEX(un));
10939 	}
10940 
10941 	/*
10942 	 * Write requests are restricted to multiples of the system block size.
10943 	 */
10944 	secmask = un->un_sys_blocksize - 1;
10945 
10946 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10947 		SD_ERROR(SD_LOG_READ_WRITE, un,
10948 		    "sdwrite: file offset not modulo %d\n",
10949 		    un->un_sys_blocksize);
10950 		err = EINVAL;
10951 	} else if (uio->uio_iov->iov_len & (secmask)) {
10952 		SD_ERROR(SD_LOG_READ_WRITE, un,
10953 		    "sdwrite: transfer length not modulo %d\n",
10954 		    un->un_sys_blocksize);
10955 		err = EINVAL;
10956 	} else {
10957 		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
10958 	}
10959 	return (err);
10960 }
10961 
10962 
10963 /*
10964  *    Function: sdaread
10965  *
10966  * Description: Driver's aread(9e) entry point function.
10967  *
10968  *   Arguments: dev   - device number
10969  *		aio   - structure pointer describing where data is to be stored
10970  *		cred_p  - user credential pointer
10971  *
10972  * Return Code: ENXIO
10973  *		EIO
10974  *		EINVAL
10975  *		value returned by aphysio
10976  *
10977  *     Context: Kernel thread context.
10978  */
10979 /* ARGSUSED */
10980 static int
10981 sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
10982 {
10983 	struct sd_lun	*un = NULL;
10984 	struct uio	*uio = aio->aio_uio;
10985 	int		secmask;
10986 	int		err;
10987 
10988 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10989 		return (ENXIO);
10990 	}
10991 
10992 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10993 
10994 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
10995 		mutex_enter(SD_MUTEX(un));
10996 		/*
10997 		 * Because the call to sd_ready_and_valid will issue I/O we
10998 		 * must wait here if either the device is suspended or
10999 		 * if it's power level is changing.
11000 		 */
11001 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11002 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11003 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11004 		}
11005 		un->un_ncmds_in_driver++;
11006 		mutex_exit(SD_MUTEX(un));
11007 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11008 			mutex_enter(SD_MUTEX(un));
11009 			un->un_ncmds_in_driver--;
11010 			ASSERT(un->un_ncmds_in_driver >= 0);
11011 			mutex_exit(SD_MUTEX(un));
11012 			return (EIO);
11013 		}
11014 		mutex_enter(SD_MUTEX(un));
11015 		un->un_ncmds_in_driver--;
11016 		ASSERT(un->un_ncmds_in_driver >= 0);
11017 		mutex_exit(SD_MUTEX(un));
11018 	}
11019 
11020 	/*
11021 	 * Read requests are restricted to multiples of the system block size.
11022 	 */
11023 	secmask = un->un_sys_blocksize - 1;
11024 
11025 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11026 		SD_ERROR(SD_LOG_READ_WRITE, un,
11027 		    "sdaread: file offset not modulo %d\n",
11028 		    un->un_sys_blocksize);
11029 		err = EINVAL;
11030 	} else if (uio->uio_iov->iov_len & (secmask)) {
11031 		SD_ERROR(SD_LOG_READ_WRITE, un,
11032 		    "sdaread: transfer length not modulo %d\n",
11033 		    un->un_sys_blocksize);
11034 		err = EINVAL;
11035 	} else {
11036 		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
11037 	}
11038 	return (err);
11039 }
11040 
11041 
11042 /*
11043  *    Function: sdawrite
11044  *
11045  * Description: Driver's awrite(9e) entry point function.
11046  *
11047  *   Arguments: dev   - device number
11048  *		aio   - structure pointer describing where data is stored
11049  *		cred_p  - user credential pointer
11050  *
11051  * Return Code: ENXIO
11052  *		EIO
11053  *		EINVAL
11054  *		value returned by aphysio
11055  *
11056  *     Context: Kernel thread context.
11057  */
11058 /* ARGSUSED */
11059 static int
11060 sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11061 {
11062 	struct sd_lun	*un = NULL;
11063 	struct uio	*uio = aio->aio_uio;
11064 	int		secmask;
11065 	int		err;
11066 
11067 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11068 		return (ENXIO);
11069 	}
11070 
11071 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11072 
11073 	if ((un->un_f_geometry_is_valid == FALSE) && !ISCD(un)) {
11074 		mutex_enter(SD_MUTEX(un));
11075 		/*
11076 		 * Because the call to sd_ready_and_valid will issue I/O we
11077 		 * must wait here if either the device is suspended or
11078 		 * if it's power level is changing.
11079 		 */
11080 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11081 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11082 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11083 		}
11084 		un->un_ncmds_in_driver++;
11085 		mutex_exit(SD_MUTEX(un));
11086 		if ((sd_ready_and_valid(un)) != SD_READY_VALID) {
11087 			mutex_enter(SD_MUTEX(un));
11088 			un->un_ncmds_in_driver--;
11089 			ASSERT(un->un_ncmds_in_driver >= 0);
11090 			mutex_exit(SD_MUTEX(un));
11091 			return (EIO);
11092 		}
11093 		mutex_enter(SD_MUTEX(un));
11094 		un->un_ncmds_in_driver--;
11095 		ASSERT(un->un_ncmds_in_driver >= 0);
11096 		mutex_exit(SD_MUTEX(un));
11097 	}
11098 
11099 	/*
11100 	 * Write requests are restricted to multiples of the system block size.
11101 	 */
11102 	secmask = un->un_sys_blocksize - 1;
11103 
11104 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11105 		SD_ERROR(SD_LOG_READ_WRITE, un,
11106 		    "sdawrite: file offset not modulo %d\n",
11107 		    un->un_sys_blocksize);
11108 		err = EINVAL;
11109 	} else if (uio->uio_iov->iov_len & (secmask)) {
11110 		SD_ERROR(SD_LOG_READ_WRITE, un,
11111 		    "sdawrite: transfer length not modulo %d\n",
11112 		    un->un_sys_blocksize);
11113 		err = EINVAL;
11114 	} else {
11115 		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
11116 	}
11117 	return (err);
11118 }
11119 
11120 
11121 
11122 
11123 
11124 /*
11125  * Driver IO processing follows the following sequence:
11126  *
11127  *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
11128  *         |                |                     ^
11129  *         v                v                     |
11130  * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
11131  *         |                |                     |                   |
11132  *         v                |                     |                   |
11133  * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
11134  *         |                |                     ^                   ^
11135  *         v                v                     |                   |
11136  * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
11137  *         |                |                     |                   |
11138  *     +---+                |                     +------------+      +-------+
11139  *     |                    |                                  |              |
11140  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11141  *     |                    v                                  |              |
11142  *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
11143  *     |                    |                                  ^              |
11144  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11145  *     |                    v                                  |              |
11146  *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
11147  *     |                    |                                  ^              |
11148  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11149  *     |                    v                                  |              |
11150  *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
11151  *     |                    |                                  ^              |
11152  *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
11153  *     |                    v                                  |              |
11154  *     |              sd_pm_iostart()                     sd_pm_iodone()      |
11155  *     |                    |                                  ^              |
11156  *     |                    |                                  |              |
11157  *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
11158  *                          |                           ^
11159  *                          v                           |
11160  *                   sd_core_iostart()                  |
11161  *                          |                           |
11162  *                          |                           +------>(*destroypkt)()
11163  *                          +-> sd_start_cmds() <-+     |           |
11164  *                          |                     |     |           v
11165  *                          |                     |     |  scsi_destroy_pkt(9F)
11166  *                          |                     |     |
11167  *                          +->(*initpkt)()       +- sdintr()
11168  *                          |  |                        |  |
11169  *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
11170  *                          |  +-> scsi_setup_cdb(9F)   |
11171  *                          |                           |
11172  *                          +--> scsi_transport(9F)     |
11173  *                                     |                |
11174  *                                     +----> SCSA ---->+
11175  *
11176  *
11177  * This code is based upon the following presumtions:
11178  *
11179  *   - iostart and iodone functions operate on buf(9S) structures. These
11180  *     functions perform the necessary operations on the buf(9S) and pass
11181  *     them along to the next function in the chain by using the macros
11182  *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
11183  *     (for iodone side functions).
11184  *
11185  *   - The iostart side functions may sleep. The iodone side functions
11186  *     are called under interrupt context and may NOT sleep. Therefore
11187  *     iodone side functions also may not call iostart side functions.
11188  *     (NOTE: iostart side functions should NOT sleep for memory, as
11189  *     this could result in deadlock.)
11190  *
11191  *   - An iostart side function may call its corresponding iodone side
11192  *     function directly (if necessary).
11193  *
11194  *   - In the event of an error, an iostart side function can return a buf(9S)
11195  *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
11196  *     b_error in the usual way of course).
11197  *
11198  *   - The taskq mechanism may be used by the iodone side functions to dispatch
11199  *     requests to the iostart side functions.  The iostart side functions in
11200  *     this case would be called under the context of a taskq thread, so it's
11201  *     OK for them to block/sleep/spin in this case.
11202  *
11203  *   - iostart side functions may allocate "shadow" buf(9S) structs and
11204  *     pass them along to the next function in the chain.  The corresponding
11205  *     iodone side functions must coalesce the "shadow" bufs and return
11206  *     the "original" buf to the next higher layer.
11207  *
11208  *   - The b_private field of the buf(9S) struct holds a pointer to
11209  *     an sd_xbuf struct, which contains information needed to
11210  *     construct the scsi_pkt for the command.
11211  *
11212  *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
11213  *     layer must acquire & release the SD_MUTEX(un) as needed.
11214  */
11215 
11216 
11217 /*
11218  * Create taskq for all targets in the system. This is created at
11219  * _init(9E) and destroyed at _fini(9E).
11220  *
11221  * Note: here we set the minalloc to a reasonably high number to ensure that
11222  * we will have an adequate supply of task entries available at interrupt time.
11223  * This is used in conjunction with the TASKQ_PREPOPULATE flag in
11224  * sd_create_taskq().  Since we do not want to sleep for allocations at
11225  * interrupt time, set maxalloc equal to minalloc. That way we will just fail
11226  * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
11227  * requests any one instant in time.
11228  */
11229 #define	SD_TASKQ_NUMTHREADS	8
11230 #define	SD_TASKQ_MINALLOC	256
11231 #define	SD_TASKQ_MAXALLOC	256
11232 
11233 static taskq_t	*sd_tq = NULL;
11234 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_tq))
11235 
11236 static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
11237 static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
11238 
11239 /*
11240  * The following task queue is being created for the write part of
11241  * read-modify-write of non-512 block size devices.
11242  * Limit the number of threads to 1 for now. This number has been choosen
11243  * considering the fact that it applies only to dvd ram drives/MO drives
11244  * currently. Performance for which is not main criteria at this stage.
11245  * Note: It needs to be explored if we can use a single taskq in future
11246  */
11247 #define	SD_WMR_TASKQ_NUMTHREADS	1
11248 static taskq_t	*sd_wmr_tq = NULL;
11249 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_wmr_tq))
11250 
11251 /*
11252  *    Function: sd_taskq_create
11253  *
11254  * Description: Create taskq thread(s) and preallocate task entries
11255  *
11256  * Return Code: Returns a pointer to the allocated taskq_t.
11257  *
11258  *     Context: Can sleep. Requires blockable context.
11259  *
11260  *       Notes: - The taskq() facility currently is NOT part of the DDI.
11261  *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
11262  *		- taskq_create() will block for memory, also it will panic
11263  *		  if it cannot create the requested number of threads.
11264  *		- Currently taskq_create() creates threads that cannot be
11265  *		  swapped.
11266  *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
11267  *		  supply of taskq entries at interrupt time (ie, so that we
11268  *		  do not have to sleep for memory)
11269  */
11270 
11271 static void
11272 sd_taskq_create(void)
11273 {
11274 	char	taskq_name[TASKQ_NAMELEN];
11275 
11276 	ASSERT(sd_tq == NULL);
11277 	ASSERT(sd_wmr_tq == NULL);
11278 
11279 	(void) snprintf(taskq_name, sizeof (taskq_name),
11280 	    "%s_drv_taskq", sd_label);
11281 	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
11282 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11283 	    TASKQ_PREPOPULATE));
11284 
11285 	(void) snprintf(taskq_name, sizeof (taskq_name),
11286 	    "%s_rmw_taskq", sd_label);
11287 	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
11288 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11289 	    TASKQ_PREPOPULATE));
11290 }
11291 
11292 
11293 /*
11294  *    Function: sd_taskq_delete
11295  *
11296  * Description: Complementary cleanup routine for sd_taskq_create().
11297  *
11298  *     Context: Kernel thread context.
11299  */
11300 
11301 static void
11302 sd_taskq_delete(void)
11303 {
11304 	ASSERT(sd_tq != NULL);
11305 	ASSERT(sd_wmr_tq != NULL);
11306 	taskq_destroy(sd_tq);
11307 	taskq_destroy(sd_wmr_tq);
11308 	sd_tq = NULL;
11309 	sd_wmr_tq = NULL;
11310 }
11311 
11312 
11313 /*
11314  *    Function: sdstrategy
11315  *
11316  * Description: Driver's strategy (9E) entry point function.
11317  *
11318  *   Arguments: bp - pointer to buf(9S)
11319  *
11320  * Return Code: Always returns zero
11321  *
11322  *     Context: Kernel thread context.
11323  */
11324 
11325 static int
11326 sdstrategy(struct buf *bp)
11327 {
11328 	struct sd_lun *un;
11329 
11330 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11331 	if (un == NULL) {
11332 		bioerror(bp, EIO);
11333 		bp->b_resid = bp->b_bcount;
11334 		biodone(bp);
11335 		return (0);
11336 	}
11337 	/* As was done in the past, fail new cmds. if state is dumping. */
11338 	if (un->un_state == SD_STATE_DUMPING) {
11339 		bioerror(bp, ENXIO);
11340 		bp->b_resid = bp->b_bcount;
11341 		biodone(bp);
11342 		return (0);
11343 	}
11344 
11345 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11346 
11347 	/*
11348 	 * Commands may sneak in while we released the mutex in
11349 	 * DDI_SUSPEND, we should block new commands. However, old
11350 	 * commands that are still in the driver at this point should
11351 	 * still be allowed to drain.
11352 	 */
11353 	mutex_enter(SD_MUTEX(un));
11354 	/*
11355 	 * Must wait here if either the device is suspended or
11356 	 * if it's power level is changing.
11357 	 */
11358 	while ((un->un_state == SD_STATE_SUSPENDED) ||
11359 	    (un->un_state == SD_STATE_PM_CHANGING)) {
11360 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11361 	}
11362 
11363 	un->un_ncmds_in_driver++;
11364 
11365 	/*
11366 	 * atapi: Since we are running the CD for now in PIO mode we need to
11367 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11368 	 * the HBA's init_pkt routine.
11369 	 */
11370 	if (un->un_f_cfg_is_atapi == TRUE) {
11371 		mutex_exit(SD_MUTEX(un));
11372 		bp_mapin(bp);
11373 		mutex_enter(SD_MUTEX(un));
11374 	}
11375 	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
11376 	    un->un_ncmds_in_driver);
11377 
11378 	mutex_exit(SD_MUTEX(un));
11379 
11380 	/*
11381 	 * This will (eventually) allocate the sd_xbuf area and
11382 	 * call sd_xbuf_strategy().  We just want to return the
11383 	 * result of ddi_xbuf_qstrategy so that we have an opt-
11384 	 * imized tail call which saves us a stack frame.
11385 	 */
11386 	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
11387 }
11388 
11389 
11390 /*
11391  *    Function: sd_xbuf_strategy
11392  *
11393  * Description: Function for initiating IO operations via the
11394  *		ddi_xbuf_qstrategy() mechanism.
11395  *
11396  *     Context: Kernel thread context.
11397  */
11398 
11399 static void
11400 sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
11401 {
11402 	struct sd_lun *un = arg;
11403 
11404 	ASSERT(bp != NULL);
11405 	ASSERT(xp != NULL);
11406 	ASSERT(un != NULL);
11407 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11408 
11409 	/*
11410 	 * Initialize the fields in the xbuf and save a pointer to the
11411 	 * xbuf in bp->b_private.
11412 	 */
11413 	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
11414 
11415 	/* Send the buf down the iostart chain */
11416 	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
11417 }
11418 
11419 
11420 /*
11421  *    Function: sd_xbuf_init
11422  *
11423  * Description: Prepare the given sd_xbuf struct for use.
11424  *
11425  *   Arguments: un - ptr to softstate
11426  *		bp - ptr to associated buf(9S)
11427  *		xp - ptr to associated sd_xbuf
11428  *		chain_type - IO chain type to use:
11429  *			SD_CHAIN_NULL
11430  *			SD_CHAIN_BUFIO
11431  *			SD_CHAIN_USCSI
11432  *			SD_CHAIN_DIRECT
11433  *			SD_CHAIN_DIRECT_PRIORITY
11434  *		pktinfop - ptr to private data struct for scsi_pkt(9S)
11435  *			initialization; may be NULL if none.
11436  *
11437  *     Context: Kernel thread context
11438  */
11439 
11440 static void
11441 sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
11442 	uchar_t chain_type, void *pktinfop)
11443 {
11444 	int index;
11445 
11446 	ASSERT(un != NULL);
11447 	ASSERT(bp != NULL);
11448 	ASSERT(xp != NULL);
11449 
11450 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
11451 	    bp, chain_type);
11452 
11453 	xp->xb_un	= un;
11454 	xp->xb_pktp	= NULL;
11455 	xp->xb_pktinfo	= pktinfop;
11456 	xp->xb_private	= bp->b_private;
11457 	xp->xb_blkno	= (daddr_t)bp->b_blkno;
11458 
11459 	/*
11460 	 * Set up the iostart and iodone chain indexes in the xbuf, based
11461 	 * upon the specified chain type to use.
11462 	 */
11463 	switch (chain_type) {
11464 	case SD_CHAIN_NULL:
11465 		/*
11466 		 * Fall thru to just use the values for the buf type, even
11467 		 * tho for the NULL chain these values will never be used.
11468 		 */
11469 		/* FALLTHRU */
11470 	case SD_CHAIN_BUFIO:
11471 		index = un->un_buf_chain_type;
11472 		break;
11473 	case SD_CHAIN_USCSI:
11474 		index = un->un_uscsi_chain_type;
11475 		break;
11476 	case SD_CHAIN_DIRECT:
11477 		index = un->un_direct_chain_type;
11478 		break;
11479 	case SD_CHAIN_DIRECT_PRIORITY:
11480 		index = un->un_priority_chain_type;
11481 		break;
11482 	default:
11483 		/* We're really broken if we ever get here... */
11484 		panic("sd_xbuf_init: illegal chain type!");
11485 		/*NOTREACHED*/
11486 	}
11487 
11488 	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
11489 	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
11490 
11491 	/*
11492 	 * It might be a bit easier to simply bzero the entire xbuf above,
11493 	 * but it turns out that since we init a fair number of members anyway,
11494 	 * we save a fair number cycles by doing explicit assignment of zero.
11495 	 */
11496 	xp->xb_pkt_flags	= 0;
11497 	xp->xb_dma_resid	= 0;
11498 	xp->xb_retry_count	= 0;
11499 	xp->xb_victim_retry_count = 0;
11500 	xp->xb_ua_retry_count	= 0;
11501 	xp->xb_sense_bp		= NULL;
11502 	xp->xb_sense_status	= 0;
11503 	xp->xb_sense_state	= 0;
11504 	xp->xb_sense_resid	= 0;
11505 
11506 	bp->b_private	= xp;
11507 	bp->b_flags	&= ~(B_DONE | B_ERROR);
11508 	bp->b_resid	= 0;
11509 	bp->av_forw	= NULL;
11510 	bp->av_back	= NULL;
11511 	bioerror(bp, 0);
11512 
11513 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
11514 }
11515 
11516 
11517 /*
11518  *    Function: sd_uscsi_strategy
11519  *
11520  * Description: Wrapper for calling into the USCSI chain via physio(9F)
11521  *
11522  *   Arguments: bp - buf struct ptr
11523  *
11524  * Return Code: Always returns 0
11525  *
11526  *     Context: Kernel thread context
11527  */
11528 
11529 static int
11530 sd_uscsi_strategy(struct buf *bp)
11531 {
11532 	struct sd_lun		*un;
11533 	struct sd_uscsi_info	*uip;
11534 	struct sd_xbuf		*xp;
11535 	uchar_t			chain_type;
11536 
11537 	ASSERT(bp != NULL);
11538 
11539 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11540 	if (un == NULL) {
11541 		bioerror(bp, EIO);
11542 		bp->b_resid = bp->b_bcount;
11543 		biodone(bp);
11544 		return (0);
11545 	}
11546 
11547 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11548 
11549 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
11550 
11551 	mutex_enter(SD_MUTEX(un));
11552 	/*
11553 	 * atapi: Since we are running the CD for now in PIO mode we need to
11554 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11555 	 * the HBA's init_pkt routine.
11556 	 */
11557 	if (un->un_f_cfg_is_atapi == TRUE) {
11558 		mutex_exit(SD_MUTEX(un));
11559 		bp_mapin(bp);
11560 		mutex_enter(SD_MUTEX(un));
11561 	}
11562 	un->un_ncmds_in_driver++;
11563 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
11564 	    un->un_ncmds_in_driver);
11565 	mutex_exit(SD_MUTEX(un));
11566 
11567 	/*
11568 	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
11569 	 */
11570 	ASSERT(bp->b_private != NULL);
11571 	uip = (struct sd_uscsi_info *)bp->b_private;
11572 
11573 	switch (uip->ui_flags) {
11574 	case SD_PATH_DIRECT:
11575 		chain_type = SD_CHAIN_DIRECT;
11576 		break;
11577 	case SD_PATH_DIRECT_PRIORITY:
11578 		chain_type = SD_CHAIN_DIRECT_PRIORITY;
11579 		break;
11580 	default:
11581 		chain_type = SD_CHAIN_USCSI;
11582 		break;
11583 	}
11584 
11585 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
11586 	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
11587 
11588 	/* Use the index obtained within xbuf_init */
11589 	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
11590 
11591 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
11592 
11593 	return (0);
11594 }
11595 
11596 
11597 /*
11598  * These routines perform raw i/o operations.
11599  */
11600 /*ARGSUSED*/
11601 static void
11602 sduscsimin(struct buf *bp)
11603 {
11604 	/*
11605 	 * do not break up because the CDB count would then
11606 	 * be incorrect and data underruns would result (incomplete
11607 	 * read/writes which would be retried and then failed, see
11608 	 * sdintr().
11609 	 */
11610 }
11611 
11612 
11613 
11614 /*
11615  *    Function: sd_send_scsi_cmd
11616  *
11617  * Description: Runs a USCSI command for user (when called thru sdioctl),
11618  *		or for the driver
11619  *
11620  *   Arguments: dev - the dev_t for the device
11621  *		incmd - ptr to a valid uscsi_cmd struct
11622  *		cdbspace - UIO_USERSPACE or UIO_SYSSPACE
11623  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11624  *		rqbufspace - UIO_USERSPACE or UIO_SYSSPACE
11625  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11626  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11627  *			to use the USCSI "direct" chain and bypass the normal
11628  *			command waitq.
11629  *
11630  * Return Code: 0 -  successful completion of the given command
11631  *		EIO - scsi_reset() failed, or see biowait()/physio() codes.
11632  *		ENXIO  - soft state not found for specified dev
11633  *		EINVAL
11634  *		EFAULT - copyin/copyout error
11635  *		return code of biowait(9F) or physio(9F):
11636  *			EIO - IO error, caller may check incmd->uscsi_status
11637  *			ENXIO
11638  *			EACCES - reservation conflict
11639  *
11640  *     Context: Waits for command to complete. Can sleep.
11641  */
11642 
11643 static int
11644 sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd,
11645 	enum uio_seg cdbspace, enum uio_seg dataspace, enum uio_seg rqbufspace,
11646 	int path_flag)
11647 {
11648 	struct sd_uscsi_info	*uip;
11649 	struct uscsi_cmd	*uscmd;
11650 	struct sd_lun	*un;
11651 	struct buf	*bp;
11652 	int	rval;
11653 	int	flags;
11654 
11655 	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
11656 	if (un == NULL) {
11657 		return (ENXIO);
11658 	}
11659 
11660 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11661 
11662 #ifdef SDDEBUG
11663 	switch (dataspace) {
11664 	case UIO_USERSPACE:
11665 		SD_TRACE(SD_LOG_IO, un,
11666 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_USERSPACE\n", un);
11667 		break;
11668 	case UIO_SYSSPACE:
11669 		SD_TRACE(SD_LOG_IO, un,
11670 		    "sd_send_scsi_cmd: entry: un:0x%p UIO_SYSSPACE\n", un);
11671 		break;
11672 	default:
11673 		SD_TRACE(SD_LOG_IO, un,
11674 		    "sd_send_scsi_cmd: entry: un:0x%p UNEXPECTED SPACE\n", un);
11675 		break;
11676 	}
11677 #endif
11678 
11679 	/*
11680 	 * Perform resets directly; no need to generate a command to do it.
11681 	 */
11682 	if (incmd->uscsi_flags & (USCSI_RESET | USCSI_RESET_ALL)) {
11683 		flags = ((incmd->uscsi_flags & USCSI_RESET_ALL) != 0) ?
11684 		    RESET_ALL : RESET_TARGET;
11685 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: Issuing reset\n");
11686 		if (scsi_reset(SD_ADDRESS(un), flags) == 0) {
11687 			/* Reset attempt was unsuccessful */
11688 			SD_TRACE(SD_LOG_IO, un,
11689 			    "sd_send_scsi_cmd: reset: failure\n");
11690 			return (EIO);
11691 		}
11692 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: reset: success\n");
11693 		return (0);
11694 	}
11695 
11696 	/* Perfunctory sanity check... */
11697 	if (incmd->uscsi_cdblen <= 0) {
11698 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11699 		    "invalid uscsi_cdblen, returning EINVAL\n");
11700 		return (EINVAL);
11701 	}
11702 
11703 	/*
11704 	 * In order to not worry about where the uscsi structure came from
11705 	 * (or where the cdb it points to came from) we're going to make
11706 	 * kmem_alloc'd copies of them here. This will also allow reference
11707 	 * to the data they contain long after this process has gone to
11708 	 * sleep and its kernel stack has been unmapped, etc.
11709 	 *
11710 	 * First get some memory for the uscsi_cmd struct and copy the
11711 	 * contents of the given uscsi_cmd struct into it.
11712 	 */
11713 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
11714 	bcopy(incmd, uscmd, sizeof (struct uscsi_cmd));
11715 
11716 	SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_cmd: uscsi_cmd",
11717 	    (uchar_t *)uscmd, sizeof (struct uscsi_cmd), SD_LOG_HEX);
11718 
11719 	/*
11720 	 * Now get some space for the CDB, and copy the given CDB into
11721 	 * it. Use ddi_copyin() in case the data is in user space.
11722 	 */
11723 	uscmd->uscsi_cdb = kmem_zalloc((size_t)incmd->uscsi_cdblen, KM_SLEEP);
11724 	flags = (cdbspace == UIO_SYSSPACE) ? FKIOCTL : 0;
11725 	if (ddi_copyin(incmd->uscsi_cdb, uscmd->uscsi_cdb,
11726 	    (uint_t)incmd->uscsi_cdblen, flags) != 0) {
11727 		kmem_free(uscmd->uscsi_cdb, (size_t)incmd->uscsi_cdblen);
11728 		kmem_free(uscmd, sizeof (struct uscsi_cmd));
11729 		return (EFAULT);
11730 	}
11731 
11732 	SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_cmd: CDB",
11733 	    (uchar_t *)uscmd->uscsi_cdb, incmd->uscsi_cdblen, SD_LOG_HEX);
11734 
11735 	bp = getrbuf(KM_SLEEP);
11736 
11737 	/*
11738 	 * Allocate an sd_uscsi_info struct and fill it with the info
11739 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
11740 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
11741 	 * since we allocate the buf here in this function, we do not
11742 	 * need to preserve the prior contents of b_private.
11743 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
11744 	 */
11745 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
11746 	uip->ui_flags = path_flag;
11747 	uip->ui_cmdp  = uscmd;
11748 	bp->b_private = uip;
11749 
11750 	/*
11751 	 * Initialize Request Sense buffering, if requested.
11752 	 */
11753 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
11754 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
11755 		/*
11756 		 * Here uscmd->uscsi_rqbuf currently points to the caller's
11757 		 * buffer, but we replace this with a kernel buffer that
11758 		 * we allocate to use with the sense data. The sense data
11759 		 * (if present) gets copied into this new buffer before the
11760 		 * command is completed.  Then we copy the sense data from
11761 		 * our allocated buf into the caller's buffer below. Note
11762 		 * that incmd->uscsi_rqbuf and incmd->uscsi_rqlen are used
11763 		 * below to perform the copy back to the caller's buf.
11764 		 */
11765 		uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
11766 		if (rqbufspace == UIO_USERSPACE) {
11767 			uscmd->uscsi_rqlen   = SENSE_LENGTH;
11768 			uscmd->uscsi_rqresid = SENSE_LENGTH;
11769 		} else {
11770 			uchar_t rlen = min(SENSE_LENGTH, uscmd->uscsi_rqlen);
11771 			uscmd->uscsi_rqlen   = rlen;
11772 			uscmd->uscsi_rqresid = rlen;
11773 		}
11774 	} else {
11775 		uscmd->uscsi_rqbuf = NULL;
11776 		uscmd->uscsi_rqlen   = 0;
11777 		uscmd->uscsi_rqresid = 0;
11778 	}
11779 
11780 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: rqbuf:0x%p  rqlen:%d\n",
11781 	    uscmd->uscsi_rqbuf, uscmd->uscsi_rqlen);
11782 
11783 	if (un->un_f_is_fibre == FALSE) {
11784 		/*
11785 		 * Force asynchronous mode, if necessary.  Doing this here
11786 		 * has the unfortunate effect of running other queued
11787 		 * commands async also, but since the main purpose of this
11788 		 * capability is downloading new drive firmware, we can
11789 		 * probably live with it.
11790 		 */
11791 		if ((uscmd->uscsi_flags & USCSI_ASYNC) != 0) {
11792 			if (scsi_ifgetcap(SD_ADDRESS(un), "synchronous", 1)
11793 				== 1) {
11794 				if (scsi_ifsetcap(SD_ADDRESS(un),
11795 					    "synchronous", 0, 1) == 1) {
11796 					SD_TRACE(SD_LOG_IO, un,
11797 					"sd_send_scsi_cmd: forced async ok\n");
11798 				} else {
11799 					SD_TRACE(SD_LOG_IO, un,
11800 					"sd_send_scsi_cmd:\
11801 					forced async failed\n");
11802 					rval = EINVAL;
11803 					goto done;
11804 				}
11805 			}
11806 		}
11807 
11808 		/*
11809 		 * Re-enable synchronous mode, if requested
11810 		 */
11811 		if (uscmd->uscsi_flags & USCSI_SYNC) {
11812 			if (scsi_ifgetcap(SD_ADDRESS(un), "synchronous", 1)
11813 				== 0) {
11814 				int i = scsi_ifsetcap(SD_ADDRESS(un),
11815 						"synchronous", 1, 1);
11816 				SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11817 					"re-enabled sync %s\n",
11818 					(i == 1) ? "ok" : "failed");
11819 			}
11820 		}
11821 	}
11822 
11823 	/*
11824 	 * Commands sent with priority are intended for error recovery
11825 	 * situations, and do not have retries performed.
11826 	 */
11827 	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
11828 		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
11829 	}
11830 
11831 	/*
11832 	 * If we're going to do actual I/O, let physio do all the right things
11833 	 */
11834 	if (uscmd->uscsi_buflen != 0) {
11835 		struct iovec	aiov;
11836 		struct uio	auio;
11837 		struct uio	*uio = &auio;
11838 
11839 		bzero(&auio, sizeof (struct uio));
11840 		bzero(&aiov, sizeof (struct iovec));
11841 		aiov.iov_base = uscmd->uscsi_bufaddr;
11842 		aiov.iov_len  = uscmd->uscsi_buflen;
11843 		uio->uio_iov  = &aiov;
11844 
11845 		uio->uio_iovcnt  = 1;
11846 		uio->uio_resid   = uscmd->uscsi_buflen;
11847 		uio->uio_segflg  = dataspace;
11848 
11849 		/*
11850 		 * physio() will block here until the command completes....
11851 		 */
11852 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: calling physio.\n");
11853 
11854 		rval = physio(sd_uscsi_strategy, bp, dev,
11855 		    ((uscmd->uscsi_flags & USCSI_READ) ? B_READ : B_WRITE),
11856 		    sduscsimin, uio);
11857 
11858 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11859 		    "returned from physio with 0x%x\n", rval);
11860 
11861 	} else {
11862 		/*
11863 		 * We have to mimic what physio would do here! Argh!
11864 		 */
11865 		bp->b_flags  = B_BUSY |
11866 		    ((uscmd->uscsi_flags & USCSI_READ) ? B_READ : B_WRITE);
11867 		bp->b_edev   = dev;
11868 		bp->b_dev    = cmpdev(dev);	/* maybe unnecessary? */
11869 		bp->b_bcount = 0;
11870 		bp->b_blkno  = 0;
11871 
11872 		SD_TRACE(SD_LOG_IO, un,
11873 		    "sd_send_scsi_cmd: calling sd_uscsi_strategy...\n");
11874 
11875 		(void) sd_uscsi_strategy(bp);
11876 
11877 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: calling biowait\n");
11878 
11879 		rval = biowait(bp);
11880 
11881 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11882 		    "returned from  biowait with 0x%x\n", rval);
11883 	}
11884 
11885 done:
11886 
11887 #ifdef SDDEBUG
11888 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11889 	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
11890 	    uscmd->uscsi_status, uscmd->uscsi_resid);
11891 	if (uscmd->uscsi_bufaddr != NULL) {
11892 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11893 		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
11894 		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
11895 		if (dataspace == UIO_SYSSPACE) {
11896 			SD_DUMP_MEMORY(un, SD_LOG_IO,
11897 			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
11898 			    uscmd->uscsi_buflen, SD_LOG_HEX);
11899 		}
11900 	}
11901 #endif
11902 
11903 	/*
11904 	 * Get the status and residual to return to the caller.
11905 	 */
11906 	incmd->uscsi_status = uscmd->uscsi_status;
11907 	incmd->uscsi_resid  = uscmd->uscsi_resid;
11908 
11909 	/*
11910 	 * If the caller wants sense data, copy back whatever sense data
11911 	 * we may have gotten, and update the relevant rqsense info.
11912 	 */
11913 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
11914 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
11915 
11916 		int rqlen = uscmd->uscsi_rqlen - uscmd->uscsi_rqresid;
11917 		rqlen = min(((int)incmd->uscsi_rqlen), rqlen);
11918 
11919 		/* Update the Request Sense status and resid */
11920 		incmd->uscsi_rqresid  = incmd->uscsi_rqlen - rqlen;
11921 		incmd->uscsi_rqstatus = uscmd->uscsi_rqstatus;
11922 
11923 		SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11924 		    "uscsi_rqstatus: 0x%02x  uscsi_rqresid:0x%x\n",
11925 		    incmd->uscsi_rqstatus, incmd->uscsi_rqresid);
11926 
11927 		/* Copy out the sense data for user processes */
11928 		if ((incmd->uscsi_rqbuf != NULL) && (rqlen != 0)) {
11929 			int flags =
11930 			    (rqbufspace == UIO_USERSPACE) ? 0 : FKIOCTL;
11931 			if (ddi_copyout(uscmd->uscsi_rqbuf, incmd->uscsi_rqbuf,
11932 			    rqlen, flags) != 0) {
11933 				rval = EFAULT;
11934 			}
11935 			/*
11936 			 * Note: Can't touch incmd->uscsi_rqbuf so use
11937 			 * uscmd->uscsi_rqbuf instead. They're the same.
11938 			 */
11939 			SD_INFO(SD_LOG_IO, un, "sd_send_scsi_cmd: "
11940 			    "incmd->uscsi_rqbuf: 0x%p  rqlen:%d\n",
11941 			    incmd->uscsi_rqbuf, rqlen);
11942 			SD_DUMP_MEMORY(un, SD_LOG_IO, "rq",
11943 			    (uchar_t *)uscmd->uscsi_rqbuf, rqlen, SD_LOG_HEX);
11944 		}
11945 	}
11946 
11947 	/*
11948 	 * Free allocated resources and return; mapout the buf in case it was
11949 	 * mapped in by a lower layer.
11950 	 */
11951 	bp_mapout(bp);
11952 	freerbuf(bp);
11953 	kmem_free(uip, sizeof (struct sd_uscsi_info));
11954 	if (uscmd->uscsi_rqbuf != NULL) {
11955 		kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
11956 	}
11957 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
11958 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
11959 
11960 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_cmd: exit\n");
11961 
11962 	return (rval);
11963 }
11964 
11965 
11966 /*
11967  *    Function: sd_buf_iodone
11968  *
11969  * Description: Frees the sd_xbuf & returns the buf to its originator.
11970  *
11971  *     Context: May be called from interrupt context.
11972  */
11973 /* ARGSUSED */
11974 static void
11975 sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
11976 {
11977 	struct sd_xbuf *xp;
11978 
11979 	ASSERT(un != NULL);
11980 	ASSERT(bp != NULL);
11981 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11982 
11983 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
11984 
11985 	xp = SD_GET_XBUF(bp);
11986 	ASSERT(xp != NULL);
11987 
11988 	mutex_enter(SD_MUTEX(un));
11989 
11990 	/*
11991 	 * Grab time when the cmd completed.
11992 	 * This is used for determining if the system has been
11993 	 * idle long enough to make it idle to the PM framework.
11994 	 * This is for lowering the overhead, and therefore improving
11995 	 * performance per I/O operation.
11996 	 */
11997 	un->un_pm_idle_time = ddi_get_time();
11998 
11999 	un->un_ncmds_in_driver--;
12000 	ASSERT(un->un_ncmds_in_driver >= 0);
12001 	SD_INFO(SD_LOG_IO, un, "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
12002 	    un->un_ncmds_in_driver);
12003 
12004 	mutex_exit(SD_MUTEX(un));
12005 
12006 	ddi_xbuf_done(bp, un->un_xbuf_attr);	/* xbuf is gone after this */
12007 	biodone(bp);				/* bp is gone after this */
12008 
12009 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
12010 }
12011 
12012 
12013 /*
12014  *    Function: sd_uscsi_iodone
12015  *
12016  * Description: Frees the sd_xbuf & returns the buf to its originator.
12017  *
12018  *     Context: May be called from interrupt context.
12019  */
12020 /* ARGSUSED */
12021 static void
12022 sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
12023 {
12024 	struct sd_xbuf *xp;
12025 
12026 	ASSERT(un != NULL);
12027 	ASSERT(bp != NULL);
12028 
12029 	xp = SD_GET_XBUF(bp);
12030 	ASSERT(xp != NULL);
12031 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12032 
12033 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
12034 
12035 	bp->b_private = xp->xb_private;
12036 
12037 	mutex_enter(SD_MUTEX(un));
12038 
12039 	/*
12040 	 * Grab time when the cmd completed.
12041 	 * This is used for determining if the system has been
12042 	 * idle long enough to make it idle to the PM framework.
12043 	 * This is for lowering the overhead, and therefore improving
12044 	 * performance per I/O operation.
12045 	 */
12046 	un->un_pm_idle_time = ddi_get_time();
12047 
12048 	un->un_ncmds_in_driver--;
12049 	ASSERT(un->un_ncmds_in_driver >= 0);
12050 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
12051 	    un->un_ncmds_in_driver);
12052 
12053 	mutex_exit(SD_MUTEX(un));
12054 
12055 	kmem_free(xp, sizeof (struct sd_xbuf));
12056 	biodone(bp);
12057 
12058 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
12059 }
12060 
12061 
12062 /*
12063  *    Function: sd_mapblockaddr_iostart
12064  *
12065  * Description: Verify request lies withing the partition limits for
12066  *		the indicated minor device.  Issue "overrun" buf if
12067  *		request would exceed partition range.  Converts
12068  *		partition-relative block address to absolute.
12069  *
12070  *     Context: Can sleep
12071  *
12072  *      Issues: This follows what the old code did, in terms of accessing
12073  *		some of the partition info in the unit struct without holding
12074  *		the mutext.  This is a general issue, if the partition info
12075  *		can be altered while IO is in progress... as soon as we send
12076  *		a buf, its partitioning can be invalid before it gets to the
12077  *		device.  Probably the right fix is to move partitioning out
12078  *		of the driver entirely.
12079  */
12080 
12081 static void
12082 sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
12083 {
12084 	daddr_t	nblocks;	/* #blocks in the given partition */
12085 	daddr_t	blocknum;	/* Block number specified by the buf */
12086 	size_t	requested_nblocks;
12087 	size_t	available_nblocks;
12088 	int	partition;
12089 	diskaddr_t	partition_offset;
12090 	struct sd_xbuf *xp;
12091 
12092 
12093 	ASSERT(un != NULL);
12094 	ASSERT(bp != NULL);
12095 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12096 
12097 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12098 	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
12099 
12100 	xp = SD_GET_XBUF(bp);
12101 	ASSERT(xp != NULL);
12102 
12103 	/*
12104 	 * If the geometry is not indicated as valid, attempt to access
12105 	 * the unit & verify the geometry/label. This can be the case for
12106 	 * removable-media devices, of if the device was opened in
12107 	 * NDELAY/NONBLOCK mode.
12108 	 */
12109 	if ((un->un_f_geometry_is_valid != TRUE) &&
12110 	    (sd_ready_and_valid(un) != SD_READY_VALID)) {
12111 		/*
12112 		 * For removable devices it is possible to start an I/O
12113 		 * without a media by opening the device in nodelay mode.
12114 		 * Also for writable CDs there can be many scenarios where
12115 		 * there is no geometry yet but volume manager is trying to
12116 		 * issue a read() just because it can see TOC on the CD. So
12117 		 * do not print a message for removables.
12118 		 */
12119 		if (!un->un_f_has_removable_media) {
12120 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12121 			    "i/o to invalid geometry\n");
12122 		}
12123 		bioerror(bp, EIO);
12124 		bp->b_resid = bp->b_bcount;
12125 		SD_BEGIN_IODONE(index, un, bp);
12126 		return;
12127 	}
12128 
12129 	partition = SDPART(bp->b_edev);
12130 
12131 	/* #blocks in partition */
12132 	nblocks = un->un_map[partition].dkl_nblk;    /* #blocks in partition */
12133 
12134 	/* Use of a local variable potentially improves performance slightly */
12135 	partition_offset = un->un_offset[partition];
12136 
12137 	/*
12138 	 * blocknum is the starting block number of the request. At this
12139 	 * point it is still relative to the start of the minor device.
12140 	 */
12141 	blocknum = xp->xb_blkno;
12142 
12143 	/*
12144 	 * Legacy: If the starting block number is one past the last block
12145 	 * in the partition, do not set B_ERROR in the buf.
12146 	 */
12147 	if (blocknum == nblocks)  {
12148 		goto error_exit;
12149 	}
12150 
12151 	/*
12152 	 * Confirm that the first block of the request lies within the
12153 	 * partition limits. Also the requested number of bytes must be
12154 	 * a multiple of the system block size.
12155 	 */
12156 	if ((blocknum < 0) || (blocknum >= nblocks) ||
12157 	    ((bp->b_bcount & (un->un_sys_blocksize - 1)) != 0)) {
12158 		bp->b_flags |= B_ERROR;
12159 		goto error_exit;
12160 	}
12161 
12162 	/*
12163 	 * If the requsted # blocks exceeds the available # blocks, that
12164 	 * is an overrun of the partition.
12165 	 */
12166 	requested_nblocks = SD_BYTES2SYSBLOCKS(un, bp->b_bcount);
12167 	available_nblocks = (size_t)(nblocks - blocknum);
12168 	ASSERT(nblocks >= blocknum);
12169 
12170 	if (requested_nblocks > available_nblocks) {
12171 		/*
12172 		 * Allocate an "overrun" buf to allow the request to proceed
12173 		 * for the amount of space available in the partition. The
12174 		 * amount not transferred will be added into the b_resid
12175 		 * when the operation is complete. The overrun buf
12176 		 * replaces the original buf here, and the original buf
12177 		 * is saved inside the overrun buf, for later use.
12178 		 */
12179 		size_t resid = SD_SYSBLOCKS2BYTES(un,
12180 		    (offset_t)(requested_nblocks - available_nblocks));
12181 		size_t count = bp->b_bcount - resid;
12182 		/*
12183 		 * Note: count is an unsigned entity thus it'll NEVER
12184 		 * be less than 0 so ASSERT the original values are
12185 		 * correct.
12186 		 */
12187 		ASSERT(bp->b_bcount >= resid);
12188 
12189 		bp = sd_bioclone_alloc(bp, count, blocknum,
12190 			(int (*)(struct buf *)) sd_mapblockaddr_iodone);
12191 		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
12192 		ASSERT(xp != NULL);
12193 	}
12194 
12195 	/* At this point there should be no residual for this buf. */
12196 	ASSERT(bp->b_resid == 0);
12197 
12198 	/* Convert the block number to an absolute address. */
12199 	xp->xb_blkno += partition_offset;
12200 
12201 	SD_NEXT_IOSTART(index, un, bp);
12202 
12203 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12204 	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
12205 
12206 	return;
12207 
12208 error_exit:
12209 	bp->b_resid = bp->b_bcount;
12210 	SD_BEGIN_IODONE(index, un, bp);
12211 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12212 	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
12213 }
12214 
12215 
12216 /*
12217  *    Function: sd_mapblockaddr_iodone
12218  *
12219  * Description: Completion-side processing for partition management.
12220  *
12221  *     Context: May be called under interrupt context
12222  */
12223 
12224 static void
12225 sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
12226 {
12227 	/* int	partition; */	/* Not used, see below. */
12228 	ASSERT(un != NULL);
12229 	ASSERT(bp != NULL);
12230 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12231 
12232 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12233 	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
12234 
12235 	if (bp->b_iodone == (int (*)(struct buf *)) sd_mapblockaddr_iodone) {
12236 		/*
12237 		 * We have an "overrun" buf to deal with...
12238 		 */
12239 		struct sd_xbuf	*xp;
12240 		struct buf	*obp;	/* ptr to the original buf */
12241 
12242 		xp = SD_GET_XBUF(bp);
12243 		ASSERT(xp != NULL);
12244 
12245 		/* Retrieve the pointer to the original buf */
12246 		obp = (struct buf *)xp->xb_private;
12247 		ASSERT(obp != NULL);
12248 
12249 		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
12250 		bioerror(obp, bp->b_error);
12251 
12252 		sd_bioclone_free(bp);
12253 
12254 		/*
12255 		 * Get back the original buf.
12256 		 * Note that since the restoration of xb_blkno below
12257 		 * was removed, the sd_xbuf is not needed.
12258 		 */
12259 		bp = obp;
12260 		/*
12261 		 * xp = SD_GET_XBUF(bp);
12262 		 * ASSERT(xp != NULL);
12263 		 */
12264 	}
12265 
12266 	/*
12267 	 * Convert sd->xb_blkno back to a minor-device relative value.
12268 	 * Note: this has been commented out, as it is not needed in the
12269 	 * current implementation of the driver (ie, since this function
12270 	 * is at the top of the layering chains, so the info will be
12271 	 * discarded) and it is in the "hot" IO path.
12272 	 *
12273 	 * partition = getminor(bp->b_edev) & SDPART_MASK;
12274 	 * xp->xb_blkno -= un->un_offset[partition];
12275 	 */
12276 
12277 	SD_NEXT_IODONE(index, un, bp);
12278 
12279 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12280 	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
12281 }
12282 
12283 
12284 /*
12285  *    Function: sd_mapblocksize_iostart
12286  *
12287  * Description: Convert between system block size (un->un_sys_blocksize)
12288  *		and target block size (un->un_tgt_blocksize).
12289  *
12290  *     Context: Can sleep to allocate resources.
12291  *
12292  * Assumptions: A higher layer has already performed any partition validation,
12293  *		and converted the xp->xb_blkno to an absolute value relative
12294  *		to the start of the device.
12295  *
12296  *		It is also assumed that the higher layer has implemented
12297  *		an "overrun" mechanism for the case where the request would
12298  *		read/write beyond the end of a partition.  In this case we
12299  *		assume (and ASSERT) that bp->b_resid == 0.
12300  *
12301  *		Note: The implementation for this routine assumes the target
12302  *		block size remains constant between allocation and transport.
12303  */
12304 
12305 static void
12306 sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
12307 {
12308 	struct sd_mapblocksize_info	*bsp;
12309 	struct sd_xbuf			*xp;
12310 	offset_t first_byte;
12311 	daddr_t	start_block, end_block;
12312 	daddr_t	request_bytes;
12313 	ushort_t is_aligned = FALSE;
12314 
12315 	ASSERT(un != NULL);
12316 	ASSERT(bp != NULL);
12317 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12318 	ASSERT(bp->b_resid == 0);
12319 
12320 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12321 	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
12322 
12323 	/*
12324 	 * For a non-writable CD, a write request is an error
12325 	 */
12326 	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
12327 	    (un->un_f_mmc_writable_media == FALSE)) {
12328 		bioerror(bp, EIO);
12329 		bp->b_resid = bp->b_bcount;
12330 		SD_BEGIN_IODONE(index, un, bp);
12331 		return;
12332 	}
12333 
12334 	/*
12335 	 * We do not need a shadow buf if the device is using
12336 	 * un->un_sys_blocksize as its block size or if bcount == 0.
12337 	 * In this case there is no layer-private data block allocated.
12338 	 */
12339 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12340 	    (bp->b_bcount == 0)) {
12341 		goto done;
12342 	}
12343 
12344 #if defined(__i386) || defined(__amd64)
12345 	/* We do not support non-block-aligned transfers for ROD devices */
12346 	ASSERT(!ISROD(un));
12347 #endif
12348 
12349 	xp = SD_GET_XBUF(bp);
12350 	ASSERT(xp != NULL);
12351 
12352 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12353 	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
12354 	    un->un_tgt_blocksize, un->un_sys_blocksize);
12355 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12356 	    "request start block:0x%x\n", xp->xb_blkno);
12357 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12358 	    "request len:0x%x\n", bp->b_bcount);
12359 
12360 	/*
12361 	 * Allocate the layer-private data area for the mapblocksize layer.
12362 	 * Layers are allowed to use the xp_private member of the sd_xbuf
12363 	 * struct to store the pointer to their layer-private data block, but
12364 	 * each layer also has the responsibility of restoring the prior
12365 	 * contents of xb_private before returning the buf/xbuf to the
12366 	 * higher layer that sent it.
12367 	 *
12368 	 * Here we save the prior contents of xp->xb_private into the
12369 	 * bsp->mbs_oprivate field of our layer-private data area. This value
12370 	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
12371 	 * the layer-private area and returning the buf/xbuf to the layer
12372 	 * that sent it.
12373 	 *
12374 	 * Note that here we use kmem_zalloc for the allocation as there are
12375 	 * parts of the mapblocksize code that expect certain fields to be
12376 	 * zero unless explicitly set to a required value.
12377 	 */
12378 	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12379 	bsp->mbs_oprivate = xp->xb_private;
12380 	xp->xb_private = bsp;
12381 
12382 	/*
12383 	 * This treats the data on the disk (target) as an array of bytes.
12384 	 * first_byte is the byte offset, from the beginning of the device,
12385 	 * to the location of the request. This is converted from a
12386 	 * un->un_sys_blocksize block address to a byte offset, and then back
12387 	 * to a block address based upon a un->un_tgt_blocksize block size.
12388 	 *
12389 	 * xp->xb_blkno should be absolute upon entry into this function,
12390 	 * but, but it is based upon partitions that use the "system"
12391 	 * block size. It must be adjusted to reflect the block size of
12392 	 * the target.
12393 	 *
12394 	 * Note that end_block is actually the block that follows the last
12395 	 * block of the request, but that's what is needed for the computation.
12396 	 */
12397 	first_byte  = SD_SYSBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12398 	start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
12399 	end_block   = (first_byte + bp->b_bcount + un->un_tgt_blocksize - 1) /
12400 	    un->un_tgt_blocksize;
12401 
12402 	/* request_bytes is rounded up to a multiple of the target block size */
12403 	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
12404 
12405 	/*
12406 	 * See if the starting address of the request and the request
12407 	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
12408 	 * then we do not need to allocate a shadow buf to handle the request.
12409 	 */
12410 	if (((first_byte   % un->un_tgt_blocksize) == 0) &&
12411 	    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
12412 		is_aligned = TRUE;
12413 	}
12414 
12415 	if ((bp->b_flags & B_READ) == 0) {
12416 		/*
12417 		 * Lock the range for a write operation. An aligned request is
12418 		 * considered a simple write; otherwise the request must be a
12419 		 * read-modify-write.
12420 		 */
12421 		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
12422 		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
12423 	}
12424 
12425 	/*
12426 	 * Alloc a shadow buf if the request is not aligned. Also, this is
12427 	 * where the READ command is generated for a read-modify-write. (The
12428 	 * write phase is deferred until after the read completes.)
12429 	 */
12430 	if (is_aligned == FALSE) {
12431 
12432 		struct sd_mapblocksize_info	*shadow_bsp;
12433 		struct sd_xbuf	*shadow_xp;
12434 		struct buf	*shadow_bp;
12435 
12436 		/*
12437 		 * Allocate the shadow buf and it associated xbuf. Note that
12438 		 * after this call the xb_blkno value in both the original
12439 		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
12440 		 * same: absolute relative to the start of the device, and
12441 		 * adjusted for the target block size. The b_blkno in the
12442 		 * shadow buf will also be set to this value. We should never
12443 		 * change b_blkno in the original bp however.
12444 		 *
12445 		 * Note also that the shadow buf will always need to be a
12446 		 * READ command, regardless of whether the incoming command
12447 		 * is a READ or a WRITE.
12448 		 */
12449 		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
12450 		    xp->xb_blkno,
12451 		    (int (*)(struct buf *)) sd_mapblocksize_iodone);
12452 
12453 		shadow_xp = SD_GET_XBUF(shadow_bp);
12454 
12455 		/*
12456 		 * Allocate the layer-private data for the shadow buf.
12457 		 * (No need to preserve xb_private in the shadow xbuf.)
12458 		 */
12459 		shadow_xp->xb_private = shadow_bsp =
12460 		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12461 
12462 		/*
12463 		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
12464 		 * to figure out where the start of the user data is (based upon
12465 		 * the system block size) in the data returned by the READ
12466 		 * command (which will be based upon the target blocksize). Note
12467 		 * that this is only really used if the request is unaligned.
12468 		 */
12469 		bsp->mbs_copy_offset = (ssize_t)(first_byte -
12470 		    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
12471 		ASSERT((bsp->mbs_copy_offset >= 0) &&
12472 		    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
12473 
12474 		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
12475 
12476 		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
12477 
12478 		/* Transfer the wmap (if any) to the shadow buf */
12479 		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
12480 		bsp->mbs_wmp = NULL;
12481 
12482 		/*
12483 		 * The shadow buf goes on from here in place of the
12484 		 * original buf.
12485 		 */
12486 		shadow_bsp->mbs_orig_bp = bp;
12487 		bp = shadow_bp;
12488 	}
12489 
12490 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12491 	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
12492 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12493 	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
12494 	    request_bytes);
12495 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12496 	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
12497 
12498 done:
12499 	SD_NEXT_IOSTART(index, un, bp);
12500 
12501 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12502 	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
12503 }
12504 
12505 
12506 /*
12507  *    Function: sd_mapblocksize_iodone
12508  *
12509  * Description: Completion side processing for block-size mapping.
12510  *
12511  *     Context: May be called under interrupt context
12512  */
12513 
12514 static void
12515 sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
12516 {
12517 	struct sd_mapblocksize_info	*bsp;
12518 	struct sd_xbuf	*xp;
12519 	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
12520 	struct buf	*orig_bp;	/* ptr to the original buf */
12521 	offset_t	shadow_end;
12522 	offset_t	request_end;
12523 	offset_t	shadow_start;
12524 	ssize_t		copy_offset;
12525 	size_t		copy_length;
12526 	size_t		shortfall;
12527 	uint_t		is_write;	/* TRUE if this bp is a WRITE */
12528 	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
12529 
12530 	ASSERT(un != NULL);
12531 	ASSERT(bp != NULL);
12532 
12533 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12534 	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
12535 
12536 	/*
12537 	 * There is no shadow buf or layer-private data if the target is
12538 	 * using un->un_sys_blocksize as its block size or if bcount == 0.
12539 	 */
12540 	if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
12541 	    (bp->b_bcount == 0)) {
12542 		goto exit;
12543 	}
12544 
12545 	xp = SD_GET_XBUF(bp);
12546 	ASSERT(xp != NULL);
12547 
12548 	/* Retrieve the pointer to the layer-private data area from the xbuf. */
12549 	bsp = xp->xb_private;
12550 
12551 	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
12552 	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
12553 
12554 	if (is_write) {
12555 		/*
12556 		 * For a WRITE request we must free up the block range that
12557 		 * we have locked up.  This holds regardless of whether this is
12558 		 * an aligned write request or a read-modify-write request.
12559 		 */
12560 		sd_range_unlock(un, bsp->mbs_wmp);
12561 		bsp->mbs_wmp = NULL;
12562 	}
12563 
12564 	if ((bp->b_iodone != (int(*)(struct buf *))sd_mapblocksize_iodone)) {
12565 		/*
12566 		 * An aligned read or write command will have no shadow buf;
12567 		 * there is not much else to do with it.
12568 		 */
12569 		goto done;
12570 	}
12571 
12572 	orig_bp = bsp->mbs_orig_bp;
12573 	ASSERT(orig_bp != NULL);
12574 	orig_xp = SD_GET_XBUF(orig_bp);
12575 	ASSERT(orig_xp != NULL);
12576 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12577 
12578 	if (!is_write && has_wmap) {
12579 		/*
12580 		 * A READ with a wmap means this is the READ phase of a
12581 		 * read-modify-write. If an error occurred on the READ then
12582 		 * we do not proceed with the WRITE phase or copy any data.
12583 		 * Just release the write maps and return with an error.
12584 		 */
12585 		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
12586 			orig_bp->b_resid = orig_bp->b_bcount;
12587 			bioerror(orig_bp, bp->b_error);
12588 			sd_range_unlock(un, bsp->mbs_wmp);
12589 			goto freebuf_done;
12590 		}
12591 	}
12592 
12593 	/*
12594 	 * Here is where we set up to copy the data from the shadow buf
12595 	 * into the space associated with the original buf.
12596 	 *
12597 	 * To deal with the conversion between block sizes, these
12598 	 * computations treat the data as an array of bytes, with the
12599 	 * first byte (byte 0) corresponding to the first byte in the
12600 	 * first block on the disk.
12601 	 */
12602 
12603 	/*
12604 	 * shadow_start and shadow_len indicate the location and size of
12605 	 * the data returned with the shadow IO request.
12606 	 */
12607 	shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
12608 	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
12609 
12610 	/*
12611 	 * copy_offset gives the offset (in bytes) from the start of the first
12612 	 * block of the READ request to the beginning of the data.  We retrieve
12613 	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
12614 	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
12615 	 * data to be copied (in bytes).
12616 	 */
12617 	copy_offset  = bsp->mbs_copy_offset;
12618 	ASSERT((copy_offset >= 0) && (copy_offset < un->un_tgt_blocksize));
12619 	copy_length  = orig_bp->b_bcount;
12620 	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
12621 
12622 	/*
12623 	 * Set up the resid and error fields of orig_bp as appropriate.
12624 	 */
12625 	if (shadow_end >= request_end) {
12626 		/* We got all the requested data; set resid to zero */
12627 		orig_bp->b_resid = 0;
12628 	} else {
12629 		/*
12630 		 * We failed to get enough data to fully satisfy the original
12631 		 * request. Just copy back whatever data we got and set
12632 		 * up the residual and error code as required.
12633 		 *
12634 		 * 'shortfall' is the amount by which the data received with the
12635 		 * shadow buf has "fallen short" of the requested amount.
12636 		 */
12637 		shortfall = (size_t)(request_end - shadow_end);
12638 
12639 		if (shortfall > orig_bp->b_bcount) {
12640 			/*
12641 			 * We did not get enough data to even partially
12642 			 * fulfill the original request.  The residual is
12643 			 * equal to the amount requested.
12644 			 */
12645 			orig_bp->b_resid = orig_bp->b_bcount;
12646 		} else {
12647 			/*
12648 			 * We did not get all the data that we requested
12649 			 * from the device, but we will try to return what
12650 			 * portion we did get.
12651 			 */
12652 			orig_bp->b_resid = shortfall;
12653 		}
12654 		ASSERT(copy_length >= orig_bp->b_resid);
12655 		copy_length  -= orig_bp->b_resid;
12656 	}
12657 
12658 	/* Propagate the error code from the shadow buf to the original buf */
12659 	bioerror(orig_bp, bp->b_error);
12660 
12661 	if (is_write) {
12662 		goto freebuf_done;	/* No data copying for a WRITE */
12663 	}
12664 
12665 	if (has_wmap) {
12666 		/*
12667 		 * This is a READ command from the READ phase of a
12668 		 * read-modify-write request. We have to copy the data given
12669 		 * by the user OVER the data returned by the READ command,
12670 		 * then convert the command from a READ to a WRITE and send
12671 		 * it back to the target.
12672 		 */
12673 		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
12674 		    copy_length);
12675 
12676 		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
12677 
12678 		/*
12679 		 * Dispatch the WRITE command to the taskq thread, which
12680 		 * will in turn send the command to the target. When the
12681 		 * WRITE command completes, we (sd_mapblocksize_iodone())
12682 		 * will get called again as part of the iodone chain
12683 		 * processing for it. Note that we will still be dealing
12684 		 * with the shadow buf at that point.
12685 		 */
12686 		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
12687 		    KM_NOSLEEP) != 0) {
12688 			/*
12689 			 * Dispatch was successful so we are done. Return
12690 			 * without going any higher up the iodone chain. Do
12691 			 * not free up any layer-private data until after the
12692 			 * WRITE completes.
12693 			 */
12694 			return;
12695 		}
12696 
12697 		/*
12698 		 * Dispatch of the WRITE command failed; set up the error
12699 		 * condition and send this IO back up the iodone chain.
12700 		 */
12701 		bioerror(orig_bp, EIO);
12702 		orig_bp->b_resid = orig_bp->b_bcount;
12703 
12704 	} else {
12705 		/*
12706 		 * This is a regular READ request (ie, not a RMW). Copy the
12707 		 * data from the shadow buf into the original buf. The
12708 		 * copy_offset compensates for any "misalignment" between the
12709 		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
12710 		 * original buf (with its un->un_sys_blocksize blocks).
12711 		 */
12712 		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
12713 		    copy_length);
12714 	}
12715 
12716 freebuf_done:
12717 
12718 	/*
12719 	 * At this point we still have both the shadow buf AND the original
12720 	 * buf to deal with, as well as the layer-private data area in each.
12721 	 * Local variables are as follows:
12722 	 *
12723 	 * bp -- points to shadow buf
12724 	 * xp -- points to xbuf of shadow buf
12725 	 * bsp -- points to layer-private data area of shadow buf
12726 	 * orig_bp -- points to original buf
12727 	 *
12728 	 * First free the shadow buf and its associated xbuf, then free the
12729 	 * layer-private data area from the shadow buf. There is no need to
12730 	 * restore xb_private in the shadow xbuf.
12731 	 */
12732 	sd_shadow_buf_free(bp);
12733 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12734 
12735 	/*
12736 	 * Now update the local variables to point to the original buf, xbuf,
12737 	 * and layer-private area.
12738 	 */
12739 	bp = orig_bp;
12740 	xp = SD_GET_XBUF(bp);
12741 	ASSERT(xp != NULL);
12742 	ASSERT(xp == orig_xp);
12743 	bsp = xp->xb_private;
12744 	ASSERT(bsp != NULL);
12745 
12746 done:
12747 	/*
12748 	 * Restore xb_private to whatever it was set to by the next higher
12749 	 * layer in the chain, then free the layer-private data area.
12750 	 */
12751 	xp->xb_private = bsp->mbs_oprivate;
12752 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
12753 
12754 exit:
12755 	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
12756 	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
12757 
12758 	SD_NEXT_IODONE(index, un, bp);
12759 }
12760 
12761 
12762 /*
12763  *    Function: sd_checksum_iostart
12764  *
12765  * Description: A stub function for a layer that's currently not used.
12766  *		For now just a placeholder.
12767  *
12768  *     Context: Kernel thread context
12769  */
12770 
12771 static void
12772 sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
12773 {
12774 	ASSERT(un != NULL);
12775 	ASSERT(bp != NULL);
12776 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12777 	SD_NEXT_IOSTART(index, un, bp);
12778 }
12779 
12780 
12781 /*
12782  *    Function: sd_checksum_iodone
12783  *
12784  * Description: A stub function for a layer that's currently not used.
12785  *		For now just a placeholder.
12786  *
12787  *     Context: May be called under interrupt context
12788  */
12789 
12790 static void
12791 sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
12792 {
12793 	ASSERT(un != NULL);
12794 	ASSERT(bp != NULL);
12795 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12796 	SD_NEXT_IODONE(index, un, bp);
12797 }
12798 
12799 
12800 /*
12801  *    Function: sd_checksum_uscsi_iostart
12802  *
12803  * Description: A stub function for a layer that's currently not used.
12804  *		For now just a placeholder.
12805  *
12806  *     Context: Kernel thread context
12807  */
12808 
12809 static void
12810 sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
12811 {
12812 	ASSERT(un != NULL);
12813 	ASSERT(bp != NULL);
12814 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12815 	SD_NEXT_IOSTART(index, un, bp);
12816 }
12817 
12818 
12819 /*
12820  *    Function: sd_checksum_uscsi_iodone
12821  *
12822  * Description: A stub function for a layer that's currently not used.
12823  *		For now just a placeholder.
12824  *
12825  *     Context: May be called under interrupt context
12826  */
12827 
12828 static void
12829 sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
12830 {
12831 	ASSERT(un != NULL);
12832 	ASSERT(bp != NULL);
12833 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12834 	SD_NEXT_IODONE(index, un, bp);
12835 }
12836 
12837 
12838 /*
12839  *    Function: sd_pm_iostart
12840  *
12841  * Description: iostart-side routine for Power mangement.
12842  *
12843  *     Context: Kernel thread context
12844  */
12845 
12846 static void
12847 sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
12848 {
12849 	ASSERT(un != NULL);
12850 	ASSERT(bp != NULL);
12851 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12852 	ASSERT(!mutex_owned(&un->un_pm_mutex));
12853 
12854 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
12855 
12856 	if (sd_pm_entry(un) != DDI_SUCCESS) {
12857 		/*
12858 		 * Set up to return the failed buf back up the 'iodone'
12859 		 * side of the calling chain.
12860 		 */
12861 		bioerror(bp, EIO);
12862 		bp->b_resid = bp->b_bcount;
12863 
12864 		SD_BEGIN_IODONE(index, un, bp);
12865 
12866 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
12867 		return;
12868 	}
12869 
12870 	SD_NEXT_IOSTART(index, un, bp);
12871 
12872 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
12873 }
12874 
12875 
12876 /*
12877  *    Function: sd_pm_iodone
12878  *
12879  * Description: iodone-side routine for power mangement.
12880  *
12881  *     Context: may be called from interrupt context
12882  */
12883 
12884 static void
12885 sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
12886 {
12887 	ASSERT(un != NULL);
12888 	ASSERT(bp != NULL);
12889 	ASSERT(!mutex_owned(&un->un_pm_mutex));
12890 
12891 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
12892 
12893 	/*
12894 	 * After attach the following flag is only read, so don't
12895 	 * take the penalty of acquiring a mutex for it.
12896 	 */
12897 	if (un->un_f_pm_is_enabled == TRUE) {
12898 		sd_pm_exit(un);
12899 	}
12900 
12901 	SD_NEXT_IODONE(index, un, bp);
12902 
12903 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
12904 }
12905 
12906 
12907 /*
12908  *    Function: sd_core_iostart
12909  *
12910  * Description: Primary driver function for enqueuing buf(9S) structs from
12911  *		the system and initiating IO to the target device
12912  *
12913  *     Context: Kernel thread context. Can sleep.
12914  *
12915  * Assumptions:  - The given xp->xb_blkno is absolute
12916  *		   (ie, relative to the start of the device).
12917  *		 - The IO is to be done using the native blocksize of
12918  *		   the device, as specified in un->un_tgt_blocksize.
12919  */
12920 /* ARGSUSED */
12921 static void
12922 sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
12923 {
12924 	struct sd_xbuf *xp;
12925 
12926 	ASSERT(un != NULL);
12927 	ASSERT(bp != NULL);
12928 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12929 	ASSERT(bp->b_resid == 0);
12930 
12931 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
12932 
12933 	xp = SD_GET_XBUF(bp);
12934 	ASSERT(xp != NULL);
12935 
12936 	mutex_enter(SD_MUTEX(un));
12937 
12938 	/*
12939 	 * If we are currently in the failfast state, fail any new IO
12940 	 * that has B_FAILFAST set, then return.
12941 	 */
12942 	if ((bp->b_flags & B_FAILFAST) &&
12943 	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
12944 		mutex_exit(SD_MUTEX(un));
12945 		bioerror(bp, EIO);
12946 		bp->b_resid = bp->b_bcount;
12947 		SD_BEGIN_IODONE(index, un, bp);
12948 		return;
12949 	}
12950 
12951 	if (SD_IS_DIRECT_PRIORITY(xp)) {
12952 		/*
12953 		 * Priority command -- transport it immediately.
12954 		 *
12955 		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
12956 		 * because all direct priority commands should be associated
12957 		 * with error recovery actions which we don't want to retry.
12958 		 */
12959 		sd_start_cmds(un, bp);
12960 	} else {
12961 		/*
12962 		 * Normal command -- add it to the wait queue, then start
12963 		 * transporting commands from the wait queue.
12964 		 */
12965 		sd_add_buf_to_waitq(un, bp);
12966 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
12967 		sd_start_cmds(un, NULL);
12968 	}
12969 
12970 	mutex_exit(SD_MUTEX(un));
12971 
12972 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
12973 }
12974 
12975 
12976 /*
12977  *    Function: sd_init_cdb_limits
12978  *
12979  * Description: This is to handle scsi_pkt initialization differences
12980  *		between the driver platforms.
12981  *
12982  *		Legacy behaviors:
12983  *
12984  *		If the block number or the sector count exceeds the
12985  *		capabilities of a Group 0 command, shift over to a
12986  *		Group 1 command. We don't blindly use Group 1
12987  *		commands because a) some drives (CDC Wren IVs) get a
12988  *		bit confused, and b) there is probably a fair amount
12989  *		of speed difference for a target to receive and decode
12990  *		a 10 byte command instead of a 6 byte command.
12991  *
12992  *		The xfer time difference of 6 vs 10 byte CDBs is
12993  *		still significant so this code is still worthwhile.
12994  *		10 byte CDBs are very inefficient with the fas HBA driver
12995  *		and older disks. Each CDB byte took 1 usec with some
12996  *		popular disks.
12997  *
12998  *     Context: Must be called at attach time
12999  */
13000 
13001 static void
13002 sd_init_cdb_limits(struct sd_lun *un)
13003 {
13004 	/*
13005 	 * Use CDB_GROUP1 commands for most devices except for
13006 	 * parallel SCSI fixed drives in which case we get better
13007 	 * performance using CDB_GROUP0 commands (where applicable).
13008 	 */
13009 	un->un_mincdb = SD_CDB_GROUP1;
13010 #if !defined(__fibre)
13011 	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
13012 	    !un->un_f_has_removable_media) {
13013 		un->un_mincdb = SD_CDB_GROUP0;
13014 	}
13015 #endif
13016 
13017 	/*
13018 	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
13019 	 * commands for fixed disks unless we are building for a 32 bit
13020 	 * kernel.
13021 	 */
13022 #ifdef _LP64
13023 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13024 	    SD_CDB_GROUP4;
13025 #else
13026 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13027 	    SD_CDB_GROUP1;
13028 #endif
13029 
13030 	/*
13031 	 * x86 systems require the PKT_DMA_PARTIAL flag
13032 	 */
13033 #if defined(__x86)
13034 	un->un_pkt_flags = PKT_DMA_PARTIAL;
13035 #else
13036 	un->un_pkt_flags = 0;
13037 #endif
13038 
13039 	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
13040 	    ? sizeof (struct scsi_arq_status) : 1);
13041 	un->un_cmd_timeout = (ushort_t)sd_io_time;
13042 	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
13043 }
13044 
13045 
13046 /*
13047  *    Function: sd_initpkt_for_buf
13048  *
13049  * Description: Allocate and initialize for transport a scsi_pkt struct,
13050  *		based upon the info specified in the given buf struct.
13051  *
13052  *		Assumes the xb_blkno in the request is absolute (ie,
13053  *		relative to the start of the device (NOT partition!).
13054  *		Also assumes that the request is using the native block
13055  *		size of the device (as returned by the READ CAPACITY
13056  *		command).
13057  *
13058  * Return Code: SD_PKT_ALLOC_SUCCESS
13059  *		SD_PKT_ALLOC_FAILURE
13060  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13061  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13062  *
13063  *     Context: Kernel thread and may be called from software interrupt context
13064  *		as part of a sdrunout callback. This function may not block or
13065  *		call routines that block
13066  */
13067 
13068 static int
13069 sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
13070 {
13071 	struct sd_xbuf	*xp;
13072 	struct scsi_pkt *pktp = NULL;
13073 	struct sd_lun	*un;
13074 	size_t		blockcount;
13075 	daddr_t		startblock;
13076 	int		rval;
13077 	int		cmd_flags;
13078 
13079 	ASSERT(bp != NULL);
13080 	ASSERT(pktpp != NULL);
13081 	xp = SD_GET_XBUF(bp);
13082 	ASSERT(xp != NULL);
13083 	un = SD_GET_UN(bp);
13084 	ASSERT(un != NULL);
13085 	ASSERT(mutex_owned(SD_MUTEX(un)));
13086 	ASSERT(bp->b_resid == 0);
13087 
13088 	SD_TRACE(SD_LOG_IO_CORE, un,
13089 	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
13090 
13091 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13092 	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
13093 		/*
13094 		 * Already have a scsi_pkt -- just need DMA resources.
13095 		 * We must recompute the CDB in case the mapping returns
13096 		 * a nonzero pkt_resid.
13097 		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
13098 		 * that is being retried, the unmap/remap of the DMA resouces
13099 		 * will result in the entire transfer starting over again
13100 		 * from the very first block.
13101 		 */
13102 		ASSERT(xp->xb_pktp != NULL);
13103 		pktp = xp->xb_pktp;
13104 	} else {
13105 		pktp = NULL;
13106 	}
13107 #endif /* __i386 || __amd64 */
13108 
13109 	startblock = xp->xb_blkno;	/* Absolute block num. */
13110 	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
13111 
13112 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13113 
13114 	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
13115 
13116 #else
13117 
13118 	cmd_flags = un->un_pkt_flags | xp->xb_pkt_flags;
13119 
13120 #endif
13121 
13122 	/*
13123 	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
13124 	 * call scsi_init_pkt, and build the CDB.
13125 	 */
13126 	rval = sd_setup_rw_pkt(un, &pktp, bp,
13127 	    cmd_flags, sdrunout, (caddr_t)un,
13128 	    startblock, blockcount);
13129 
13130 	if (rval == 0) {
13131 		/*
13132 		 * Success.
13133 		 *
13134 		 * If partial DMA is being used and required for this transfer.
13135 		 * set it up here.
13136 		 */
13137 		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
13138 		    (pktp->pkt_resid != 0)) {
13139 
13140 			/*
13141 			 * Save the CDB length and pkt_resid for the
13142 			 * next xfer
13143 			 */
13144 			xp->xb_dma_resid = pktp->pkt_resid;
13145 
13146 			/* rezero resid */
13147 			pktp->pkt_resid = 0;
13148 
13149 		} else {
13150 			xp->xb_dma_resid = 0;
13151 		}
13152 
13153 		pktp->pkt_flags = un->un_tagflags;
13154 		pktp->pkt_time  = un->un_cmd_timeout;
13155 		pktp->pkt_comp  = sdintr;
13156 
13157 		pktp->pkt_private = bp;
13158 		*pktpp = pktp;
13159 
13160 		SD_TRACE(SD_LOG_IO_CORE, un,
13161 		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
13162 
13163 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13164 		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
13165 #endif
13166 
13167 		return (SD_PKT_ALLOC_SUCCESS);
13168 
13169 	}
13170 
13171 	/*
13172 	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
13173 	 * from sd_setup_rw_pkt.
13174 	 */
13175 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
13176 
13177 	if (rval == SD_PKT_ALLOC_FAILURE) {
13178 		*pktpp = NULL;
13179 		/*
13180 		 * Set the driver state to RWAIT to indicate the driver
13181 		 * is waiting on resource allocations. The driver will not
13182 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13183 		 */
13184 		New_state(un, SD_STATE_RWAIT);
13185 
13186 		SD_ERROR(SD_LOG_IO_CORE, un,
13187 		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
13188 
13189 		if ((bp->b_flags & B_ERROR) != 0) {
13190 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13191 		}
13192 		return (SD_PKT_ALLOC_FAILURE);
13193 	} else {
13194 		/*
13195 		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13196 		 *
13197 		 * This should never happen.  Maybe someone messed with the
13198 		 * kernel's minphys?
13199 		 */
13200 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13201 		    "Request rejected: too large for CDB: "
13202 		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
13203 		SD_ERROR(SD_LOG_IO_CORE, un,
13204 		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
13205 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13206 
13207 	}
13208 }
13209 
13210 
13211 /*
13212  *    Function: sd_destroypkt_for_buf
13213  *
13214  * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
13215  *
13216  *     Context: Kernel thread or interrupt context
13217  */
13218 
13219 static void
13220 sd_destroypkt_for_buf(struct buf *bp)
13221 {
13222 	ASSERT(bp != NULL);
13223 	ASSERT(SD_GET_UN(bp) != NULL);
13224 
13225 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13226 	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
13227 
13228 	ASSERT(SD_GET_PKTP(bp) != NULL);
13229 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13230 
13231 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13232 	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
13233 }
13234 
13235 /*
13236  *    Function: sd_setup_rw_pkt
13237  *
13238  * Description: Determines appropriate CDB group for the requested LBA
13239  *		and transfer length, calls scsi_init_pkt, and builds
13240  *		the CDB.  Do not use for partial DMA transfers except
13241  *		for the initial transfer since the CDB size must
13242  *		remain constant.
13243  *
13244  *     Context: Kernel thread and may be called from software interrupt
13245  *		context as part of a sdrunout callback. This function may not
13246  *		block or call routines that block
13247  */
13248 
13249 
13250 int
13251 sd_setup_rw_pkt(struct sd_lun *un,
13252     struct scsi_pkt **pktpp, struct buf *bp, int flags,
13253     int (*callback)(caddr_t), caddr_t callback_arg,
13254     diskaddr_t lba, uint32_t blockcount)
13255 {
13256 	struct scsi_pkt *return_pktp;
13257 	union scsi_cdb *cdbp;
13258 	struct sd_cdbinfo *cp = NULL;
13259 	int i;
13260 
13261 	/*
13262 	 * See which size CDB to use, based upon the request.
13263 	 */
13264 	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
13265 
13266 		/*
13267 		 * Check lba and block count against sd_cdbtab limits.
13268 		 * In the partial DMA case, we have to use the same size
13269 		 * CDB for all the transfers.  Check lba + blockcount
13270 		 * against the max LBA so we know that segment of the
13271 		 * transfer can use the CDB we select.
13272 		 */
13273 		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
13274 		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
13275 
13276 			/*
13277 			 * The command will fit into the CDB type
13278 			 * specified by sd_cdbtab[i].
13279 			 */
13280 			cp = sd_cdbtab + i;
13281 
13282 			/*
13283 			 * Call scsi_init_pkt so we can fill in the
13284 			 * CDB.
13285 			 */
13286 			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
13287 			    bp, cp->sc_grpcode, un->un_status_len, 0,
13288 			    flags, callback, callback_arg);
13289 
13290 			if (return_pktp != NULL) {
13291 
13292 				/*
13293 				 * Return new value of pkt
13294 				 */
13295 				*pktpp = return_pktp;
13296 
13297 				/*
13298 				 * To be safe, zero the CDB insuring there is
13299 				 * no leftover data from a previous command.
13300 				 */
13301 				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
13302 
13303 				/*
13304 				 * Handle partial DMA mapping
13305 				 */
13306 				if (return_pktp->pkt_resid != 0) {
13307 
13308 					/*
13309 					 * Not going to xfer as many blocks as
13310 					 * originally expected
13311 					 */
13312 					blockcount -=
13313 					    SD_BYTES2TGTBLOCKS(un,
13314 						return_pktp->pkt_resid);
13315 				}
13316 
13317 				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
13318 
13319 				/*
13320 				 * Set command byte based on the CDB
13321 				 * type we matched.
13322 				 */
13323 				cdbp->scc_cmd = cp->sc_grpmask |
13324 				    ((bp->b_flags & B_READ) ?
13325 					SCMD_READ : SCMD_WRITE);
13326 
13327 				SD_FILL_SCSI1_LUN(un, return_pktp);
13328 
13329 				/*
13330 				 * Fill in LBA and length
13331 				 */
13332 				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
13333 				    (cp->sc_grpcode == CDB_GROUP4) ||
13334 				    (cp->sc_grpcode == CDB_GROUP0) ||
13335 				    (cp->sc_grpcode == CDB_GROUP5));
13336 
13337 				if (cp->sc_grpcode == CDB_GROUP1) {
13338 					FORMG1ADDR(cdbp, lba);
13339 					FORMG1COUNT(cdbp, blockcount);
13340 					return (0);
13341 				} else if (cp->sc_grpcode == CDB_GROUP4) {
13342 					FORMG4LONGADDR(cdbp, lba);
13343 					FORMG4COUNT(cdbp, blockcount);
13344 					return (0);
13345 				} else if (cp->sc_grpcode == CDB_GROUP0) {
13346 					FORMG0ADDR(cdbp, lba);
13347 					FORMG0COUNT(cdbp, blockcount);
13348 					return (0);
13349 				} else if (cp->sc_grpcode == CDB_GROUP5) {
13350 					FORMG5ADDR(cdbp, lba);
13351 					FORMG5COUNT(cdbp, blockcount);
13352 					return (0);
13353 				}
13354 
13355 				/*
13356 				 * It should be impossible to not match one
13357 				 * of the CDB types above, so we should never
13358 				 * reach this point.  Set the CDB command byte
13359 				 * to test-unit-ready to avoid writing
13360 				 * to somewhere we don't intend.
13361 				 */
13362 				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
13363 				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13364 			} else {
13365 				/*
13366 				 * Couldn't get scsi_pkt
13367 				 */
13368 				return (SD_PKT_ALLOC_FAILURE);
13369 			}
13370 		}
13371 	}
13372 
13373 	/*
13374 	 * None of the available CDB types were suitable.  This really
13375 	 * should never happen:  on a 64 bit system we support
13376 	 * READ16/WRITE16 which will hold an entire 64 bit disk address
13377 	 * and on a 32 bit system we will refuse to bind to a device
13378 	 * larger than 2TB so addresses will never be larger than 32 bits.
13379 	 */
13380 	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13381 }
13382 
13383 #if defined(__i386) || defined(__amd64)
13384 /*
13385  *    Function: sd_setup_next_rw_pkt
13386  *
13387  * Description: Setup packet for partial DMA transfers, except for the
13388  * 		initial transfer.  sd_setup_rw_pkt should be used for
13389  *		the initial transfer.
13390  *
13391  *     Context: Kernel thread and may be called from interrupt context.
13392  */
13393 
13394 int
13395 sd_setup_next_rw_pkt(struct sd_lun *un,
13396     struct scsi_pkt *pktp, struct buf *bp,
13397     diskaddr_t lba, uint32_t blockcount)
13398 {
13399 	uchar_t com;
13400 	union scsi_cdb *cdbp;
13401 	uchar_t cdb_group_id;
13402 
13403 	ASSERT(pktp != NULL);
13404 	ASSERT(pktp->pkt_cdbp != NULL);
13405 
13406 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
13407 	com = cdbp->scc_cmd;
13408 	cdb_group_id = CDB_GROUPID(com);
13409 
13410 	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
13411 	    (cdb_group_id == CDB_GROUPID_1) ||
13412 	    (cdb_group_id == CDB_GROUPID_4) ||
13413 	    (cdb_group_id == CDB_GROUPID_5));
13414 
13415 	/*
13416 	 * Move pkt to the next portion of the xfer.
13417 	 * func is NULL_FUNC so we do not have to release
13418 	 * the disk mutex here.
13419 	 */
13420 	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
13421 	    NULL_FUNC, NULL) == pktp) {
13422 		/* Success.  Handle partial DMA */
13423 		if (pktp->pkt_resid != 0) {
13424 			blockcount -=
13425 			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
13426 		}
13427 
13428 		cdbp->scc_cmd = com;
13429 		SD_FILL_SCSI1_LUN(un, pktp);
13430 		if (cdb_group_id == CDB_GROUPID_1) {
13431 			FORMG1ADDR(cdbp, lba);
13432 			FORMG1COUNT(cdbp, blockcount);
13433 			return (0);
13434 		} else if (cdb_group_id == CDB_GROUPID_4) {
13435 			FORMG4LONGADDR(cdbp, lba);
13436 			FORMG4COUNT(cdbp, blockcount);
13437 			return (0);
13438 		} else if (cdb_group_id == CDB_GROUPID_0) {
13439 			FORMG0ADDR(cdbp, lba);
13440 			FORMG0COUNT(cdbp, blockcount);
13441 			return (0);
13442 		} else if (cdb_group_id == CDB_GROUPID_5) {
13443 			FORMG5ADDR(cdbp, lba);
13444 			FORMG5COUNT(cdbp, blockcount);
13445 			return (0);
13446 		}
13447 
13448 		/* Unreachable */
13449 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13450 	}
13451 
13452 	/*
13453 	 * Error setting up next portion of cmd transfer.
13454 	 * Something is definitely very wrong and this
13455 	 * should not happen.
13456 	 */
13457 	return (SD_PKT_ALLOC_FAILURE);
13458 }
13459 #endif /* defined(__i386) || defined(__amd64) */
13460 
13461 /*
13462  *    Function: sd_initpkt_for_uscsi
13463  *
13464  * Description: Allocate and initialize for transport a scsi_pkt struct,
13465  *		based upon the info specified in the given uscsi_cmd struct.
13466  *
13467  * Return Code: SD_PKT_ALLOC_SUCCESS
13468  *		SD_PKT_ALLOC_FAILURE
13469  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13470  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13471  *
13472  *     Context: Kernel thread and may be called from software interrupt context
13473  *		as part of a sdrunout callback. This function may not block or
13474  *		call routines that block
13475  */
13476 
13477 static int
13478 sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
13479 {
13480 	struct uscsi_cmd *uscmd;
13481 	struct sd_xbuf	*xp;
13482 	struct scsi_pkt	*pktp;
13483 	struct sd_lun	*un;
13484 	uint32_t	flags = 0;
13485 
13486 	ASSERT(bp != NULL);
13487 	ASSERT(pktpp != NULL);
13488 	xp = SD_GET_XBUF(bp);
13489 	ASSERT(xp != NULL);
13490 	un = SD_GET_UN(bp);
13491 	ASSERT(un != NULL);
13492 	ASSERT(mutex_owned(SD_MUTEX(un)));
13493 
13494 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13495 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13496 	ASSERT(uscmd != NULL);
13497 
13498 	SD_TRACE(SD_LOG_IO_CORE, un,
13499 	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
13500 
13501 	/*
13502 	 * Allocate the scsi_pkt for the command.
13503 	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
13504 	 *	 during scsi_init_pkt time and will continue to use the
13505 	 *	 same path as long as the same scsi_pkt is used without
13506 	 *	 intervening scsi_dma_free(). Since uscsi command does
13507 	 *	 not call scsi_dmafree() before retry failed command, it
13508 	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
13509 	 *	 set such that scsi_vhci can use other available path for
13510 	 *	 retry. Besides, ucsci command does not allow DMA breakup,
13511 	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
13512 	 */
13513 	pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
13514 	    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
13515 	    sizeof (struct scsi_arq_status), 0,
13516 	    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
13517 	    sdrunout, (caddr_t)un);
13518 
13519 	if (pktp == NULL) {
13520 		*pktpp = NULL;
13521 		/*
13522 		 * Set the driver state to RWAIT to indicate the driver
13523 		 * is waiting on resource allocations. The driver will not
13524 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13525 		 */
13526 		New_state(un, SD_STATE_RWAIT);
13527 
13528 		SD_ERROR(SD_LOG_IO_CORE, un,
13529 		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
13530 
13531 		if ((bp->b_flags & B_ERROR) != 0) {
13532 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13533 		}
13534 		return (SD_PKT_ALLOC_FAILURE);
13535 	}
13536 
13537 	/*
13538 	 * We do not do DMA breakup for USCSI commands, so return failure
13539 	 * here if all the needed DMA resources were not allocated.
13540 	 */
13541 	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
13542 	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
13543 		scsi_destroy_pkt(pktp);
13544 		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
13545 		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
13546 		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
13547 	}
13548 
13549 	/* Init the cdb from the given uscsi struct */
13550 	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
13551 	    uscmd->uscsi_cdb[0], 0, 0, 0);
13552 
13553 	SD_FILL_SCSI1_LUN(un, pktp);
13554 
13555 	/*
13556 	 * Set up the optional USCSI flags. See the uscsi (7I) man page
13557 	 * for listing of the supported flags.
13558 	 */
13559 
13560 	if (uscmd->uscsi_flags & USCSI_SILENT) {
13561 		flags |= FLAG_SILENT;
13562 	}
13563 
13564 	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
13565 		flags |= FLAG_DIAGNOSE;
13566 	}
13567 
13568 	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
13569 		flags |= FLAG_ISOLATE;
13570 	}
13571 
13572 	if (un->un_f_is_fibre == FALSE) {
13573 		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
13574 			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
13575 		}
13576 	}
13577 
13578 	/*
13579 	 * Set the pkt flags here so we save time later.
13580 	 * Note: These flags are NOT in the uscsi man page!!!
13581 	 */
13582 	if (uscmd->uscsi_flags & USCSI_HEAD) {
13583 		flags |= FLAG_HEAD;
13584 	}
13585 
13586 	if (uscmd->uscsi_flags & USCSI_NOINTR) {
13587 		flags |= FLAG_NOINTR;
13588 	}
13589 
13590 	/*
13591 	 * For tagged queueing, things get a bit complicated.
13592 	 * Check first for head of queue and last for ordered queue.
13593 	 * If neither head nor order, use the default driver tag flags.
13594 	 */
13595 	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
13596 		if (uscmd->uscsi_flags & USCSI_HTAG) {
13597 			flags |= FLAG_HTAG;
13598 		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
13599 			flags |= FLAG_OTAG;
13600 		} else {
13601 			flags |= un->un_tagflags & FLAG_TAGMASK;
13602 		}
13603 	}
13604 
13605 	if (uscmd->uscsi_flags & USCSI_NODISCON) {
13606 		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
13607 	}
13608 
13609 	pktp->pkt_flags = flags;
13610 
13611 	/* Copy the caller's CDB into the pkt... */
13612 	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
13613 
13614 	if (uscmd->uscsi_timeout == 0) {
13615 		pktp->pkt_time = un->un_uscsi_timeout;
13616 	} else {
13617 		pktp->pkt_time = uscmd->uscsi_timeout;
13618 	}
13619 
13620 	/* need it later to identify USCSI request in sdintr */
13621 	xp->xb_pkt_flags |= SD_XB_USCSICMD;
13622 
13623 	xp->xb_sense_resid = uscmd->uscsi_rqresid;
13624 
13625 	pktp->pkt_private = bp;
13626 	pktp->pkt_comp = sdintr;
13627 	*pktpp = pktp;
13628 
13629 	SD_TRACE(SD_LOG_IO_CORE, un,
13630 	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
13631 
13632 	return (SD_PKT_ALLOC_SUCCESS);
13633 }
13634 
13635 
13636 /*
13637  *    Function: sd_destroypkt_for_uscsi
13638  *
13639  * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
13640  *		IOs.. Also saves relevant info into the associated uscsi_cmd
13641  *		struct.
13642  *
13643  *     Context: May be called under interrupt context
13644  */
13645 
13646 static void
13647 sd_destroypkt_for_uscsi(struct buf *bp)
13648 {
13649 	struct uscsi_cmd *uscmd;
13650 	struct sd_xbuf	*xp;
13651 	struct scsi_pkt	*pktp;
13652 	struct sd_lun	*un;
13653 
13654 	ASSERT(bp != NULL);
13655 	xp = SD_GET_XBUF(bp);
13656 	ASSERT(xp != NULL);
13657 	un = SD_GET_UN(bp);
13658 	ASSERT(un != NULL);
13659 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13660 	pktp = SD_GET_PKTP(bp);
13661 	ASSERT(pktp != NULL);
13662 
13663 	SD_TRACE(SD_LOG_IO_CORE, un,
13664 	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
13665 
13666 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13667 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13668 	ASSERT(uscmd != NULL);
13669 
13670 	/* Save the status and the residual into the uscsi_cmd struct */
13671 	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
13672 	uscmd->uscsi_resid  = bp->b_resid;
13673 
13674 	/*
13675 	 * If enabled, copy any saved sense data into the area specified
13676 	 * by the uscsi command.
13677 	 */
13678 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
13679 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
13680 		/*
13681 		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
13682 		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
13683 		 */
13684 		uscmd->uscsi_rqstatus = xp->xb_sense_status;
13685 		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
13686 		bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf, SENSE_LENGTH);
13687 	}
13688 
13689 	/* We are done with the scsi_pkt; free it now */
13690 	ASSERT(SD_GET_PKTP(bp) != NULL);
13691 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13692 
13693 	SD_TRACE(SD_LOG_IO_CORE, un,
13694 	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
13695 }
13696 
13697 
13698 /*
13699  *    Function: sd_bioclone_alloc
13700  *
13701  * Description: Allocate a buf(9S) and init it as per the given buf
13702  *		and the various arguments.  The associated sd_xbuf
13703  *		struct is (nearly) duplicated.  The struct buf *bp
13704  *		argument is saved in new_xp->xb_private.
13705  *
13706  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13707  *		datalen - size of data area for the shadow bp
13708  *		blkno - starting LBA
13709  *		func - function pointer for b_iodone in the shadow buf. (May
13710  *			be NULL if none.)
13711  *
13712  * Return Code: Pointer to allocates buf(9S) struct
13713  *
13714  *     Context: Can sleep.
13715  */
13716 
13717 static struct buf *
13718 sd_bioclone_alloc(struct buf *bp, size_t datalen,
13719 	daddr_t blkno, int (*func)(struct buf *))
13720 {
13721 	struct	sd_lun	*un;
13722 	struct	sd_xbuf	*xp;
13723 	struct	sd_xbuf	*new_xp;
13724 	struct	buf	*new_bp;
13725 
13726 	ASSERT(bp != NULL);
13727 	xp = SD_GET_XBUF(bp);
13728 	ASSERT(xp != NULL);
13729 	un = SD_GET_UN(bp);
13730 	ASSERT(un != NULL);
13731 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13732 
13733 	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
13734 	    NULL, KM_SLEEP);
13735 
13736 	new_bp->b_lblkno	= blkno;
13737 
13738 	/*
13739 	 * Allocate an xbuf for the shadow bp and copy the contents of the
13740 	 * original xbuf into it.
13741 	 */
13742 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
13743 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
13744 
13745 	/*
13746 	 * The given bp is automatically saved in the xb_private member
13747 	 * of the new xbuf.  Callers are allowed to depend on this.
13748 	 */
13749 	new_xp->xb_private = bp;
13750 
13751 	new_bp->b_private  = new_xp;
13752 
13753 	return (new_bp);
13754 }
13755 
13756 /*
13757  *    Function: sd_shadow_buf_alloc
13758  *
13759  * Description: Allocate a buf(9S) and init it as per the given buf
13760  *		and the various arguments.  The associated sd_xbuf
13761  *		struct is (nearly) duplicated.  The struct buf *bp
13762  *		argument is saved in new_xp->xb_private.
13763  *
13764  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
13765  *		datalen - size of data area for the shadow bp
13766  *		bflags - B_READ or B_WRITE (pseudo flag)
13767  *		blkno - starting LBA
13768  *		func - function pointer for b_iodone in the shadow buf. (May
13769  *			be NULL if none.)
13770  *
13771  * Return Code: Pointer to allocates buf(9S) struct
13772  *
13773  *     Context: Can sleep.
13774  */
13775 
13776 static struct buf *
13777 sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
13778 	daddr_t blkno, int (*func)(struct buf *))
13779 {
13780 	struct	sd_lun	*un;
13781 	struct	sd_xbuf	*xp;
13782 	struct	sd_xbuf	*new_xp;
13783 	struct	buf	*new_bp;
13784 
13785 	ASSERT(bp != NULL);
13786 	xp = SD_GET_XBUF(bp);
13787 	ASSERT(xp != NULL);
13788 	un = SD_GET_UN(bp);
13789 	ASSERT(un != NULL);
13790 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13791 
13792 	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
13793 		bp_mapin(bp);
13794 	}
13795 
13796 	bflags &= (B_READ | B_WRITE);
13797 #if defined(__i386) || defined(__amd64)
13798 	new_bp = getrbuf(KM_SLEEP);
13799 	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
13800 	new_bp->b_bcount = datalen;
13801 	new_bp->b_flags = bflags |
13802 	    (bp->b_flags & ~(B_PAGEIO | B_PHYS | B_REMAPPED | B_SHADOW));
13803 #else
13804 	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
13805 	    datalen, bflags, SLEEP_FUNC, NULL);
13806 #endif
13807 	new_bp->av_forw	= NULL;
13808 	new_bp->av_back	= NULL;
13809 	new_bp->b_dev	= bp->b_dev;
13810 	new_bp->b_blkno	= blkno;
13811 	new_bp->b_iodone = func;
13812 	new_bp->b_edev	= bp->b_edev;
13813 	new_bp->b_resid	= 0;
13814 
13815 	/* We need to preserve the B_FAILFAST flag */
13816 	if (bp->b_flags & B_FAILFAST) {
13817 		new_bp->b_flags |= B_FAILFAST;
13818 	}
13819 
13820 	/*
13821 	 * Allocate an xbuf for the shadow bp and copy the contents of the
13822 	 * original xbuf into it.
13823 	 */
13824 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
13825 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
13826 
13827 	/* Need later to copy data between the shadow buf & original buf! */
13828 	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
13829 
13830 	/*
13831 	 * The given bp is automatically saved in the xb_private member
13832 	 * of the new xbuf.  Callers are allowed to depend on this.
13833 	 */
13834 	new_xp->xb_private = bp;
13835 
13836 	new_bp->b_private  = new_xp;
13837 
13838 	return (new_bp);
13839 }
13840 
13841 /*
13842  *    Function: sd_bioclone_free
13843  *
13844  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
13845  *		in the larger than partition operation.
13846  *
13847  *     Context: May be called under interrupt context
13848  */
13849 
13850 static void
13851 sd_bioclone_free(struct buf *bp)
13852 {
13853 	struct sd_xbuf	*xp;
13854 
13855 	ASSERT(bp != NULL);
13856 	xp = SD_GET_XBUF(bp);
13857 	ASSERT(xp != NULL);
13858 
13859 	/*
13860 	 * Call bp_mapout() before freeing the buf,  in case a lower
13861 	 * layer or HBA  had done a bp_mapin().  we must do this here
13862 	 * as we are the "originator" of the shadow buf.
13863 	 */
13864 	bp_mapout(bp);
13865 
13866 	/*
13867 	 * Null out b_iodone before freeing the bp, to ensure that the driver
13868 	 * never gets confused by a stale value in this field. (Just a little
13869 	 * extra defensiveness here.)
13870 	 */
13871 	bp->b_iodone = NULL;
13872 
13873 	freerbuf(bp);
13874 
13875 	kmem_free(xp, sizeof (struct sd_xbuf));
13876 }
13877 
13878 /*
13879  *    Function: sd_shadow_buf_free
13880  *
13881  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
13882  *
13883  *     Context: May be called under interrupt context
13884  */
13885 
13886 static void
13887 sd_shadow_buf_free(struct buf *bp)
13888 {
13889 	struct sd_xbuf	*xp;
13890 
13891 	ASSERT(bp != NULL);
13892 	xp = SD_GET_XBUF(bp);
13893 	ASSERT(xp != NULL);
13894 
13895 #if defined(__sparc)
13896 	/*
13897 	 * Call bp_mapout() before freeing the buf,  in case a lower
13898 	 * layer or HBA  had done a bp_mapin().  we must do this here
13899 	 * as we are the "originator" of the shadow buf.
13900 	 */
13901 	bp_mapout(bp);
13902 #endif
13903 
13904 	/*
13905 	 * Null out b_iodone before freeing the bp, to ensure that the driver
13906 	 * never gets confused by a stale value in this field. (Just a little
13907 	 * extra defensiveness here.)
13908 	 */
13909 	bp->b_iodone = NULL;
13910 
13911 #if defined(__i386) || defined(__amd64)
13912 	kmem_free(bp->b_un.b_addr, bp->b_bcount);
13913 	freerbuf(bp);
13914 #else
13915 	scsi_free_consistent_buf(bp);
13916 #endif
13917 
13918 	kmem_free(xp, sizeof (struct sd_xbuf));
13919 }
13920 
13921 
13922 /*
13923  *    Function: sd_print_transport_rejected_message
13924  *
13925  * Description: This implements the ludicrously complex rules for printing
13926  *		a "transport rejected" message.  This is to address the
13927  *		specific problem of having a flood of this error message
13928  *		produced when a failover occurs.
13929  *
13930  *     Context: Any.
13931  */
13932 
13933 static void
13934 sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
13935 	int code)
13936 {
13937 	ASSERT(un != NULL);
13938 	ASSERT(mutex_owned(SD_MUTEX(un)));
13939 	ASSERT(xp != NULL);
13940 
13941 	/*
13942 	 * Print the "transport rejected" message under the following
13943 	 * conditions:
13944 	 *
13945 	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
13946 	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
13947 	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
13948 	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
13949 	 *   scsi_transport(9F) (which indicates that the target might have
13950 	 *   gone off-line).  This uses the un->un_tran_fatal_count
13951 	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
13952 	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
13953 	 *   from scsi_transport().
13954 	 *
13955 	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
13956 	 * the preceeding cases in order for the message to be printed.
13957 	 */
13958 	if ((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) {
13959 		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
13960 		    (code != TRAN_FATAL_ERROR) ||
13961 		    (un->un_tran_fatal_count == 1)) {
13962 			switch (code) {
13963 			case TRAN_BADPKT:
13964 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13965 				    "transport rejected bad packet\n");
13966 				break;
13967 			case TRAN_FATAL_ERROR:
13968 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13969 				    "transport rejected fatal error\n");
13970 				break;
13971 			default:
13972 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13973 				    "transport rejected (%d)\n", code);
13974 				break;
13975 			}
13976 		}
13977 	}
13978 }
13979 
13980 
13981 /*
13982  *    Function: sd_add_buf_to_waitq
13983  *
13984  * Description: Add the given buf(9S) struct to the wait queue for the
13985  *		instance.  If sorting is enabled, then the buf is added
13986  *		to the queue via an elevator sort algorithm (a la
13987  *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
13988  *		If sorting is not enabled, then the buf is just added
13989  *		to the end of the wait queue.
13990  *
13991  * Return Code: void
13992  *
13993  *     Context: Does not sleep/block, therefore technically can be called
13994  *		from any context.  However if sorting is enabled then the
13995  *		execution time is indeterminate, and may take long if
13996  *		the wait queue grows large.
13997  */
13998 
13999 static void
14000 sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
14001 {
14002 	struct buf *ap;
14003 
14004 	ASSERT(bp != NULL);
14005 	ASSERT(un != NULL);
14006 	ASSERT(mutex_owned(SD_MUTEX(un)));
14007 
14008 	/* If the queue is empty, add the buf as the only entry & return. */
14009 	if (un->un_waitq_headp == NULL) {
14010 		ASSERT(un->un_waitq_tailp == NULL);
14011 		un->un_waitq_headp = un->un_waitq_tailp = bp;
14012 		bp->av_forw = NULL;
14013 		return;
14014 	}
14015 
14016 	ASSERT(un->un_waitq_tailp != NULL);
14017 
14018 	/*
14019 	 * If sorting is disabled, just add the buf to the tail end of
14020 	 * the wait queue and return.
14021 	 */
14022 	if (un->un_f_disksort_disabled) {
14023 		un->un_waitq_tailp->av_forw = bp;
14024 		un->un_waitq_tailp = bp;
14025 		bp->av_forw = NULL;
14026 		return;
14027 	}
14028 
14029 	/*
14030 	 * Sort thru the list of requests currently on the wait queue
14031 	 * and add the new buf request at the appropriate position.
14032 	 *
14033 	 * The un->un_waitq_headp is an activity chain pointer on which
14034 	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
14035 	 * first queue holds those requests which are positioned after
14036 	 * the current SD_GET_BLKNO() (in the first request); the second holds
14037 	 * requests which came in after their SD_GET_BLKNO() number was passed.
14038 	 * Thus we implement a one way scan, retracting after reaching
14039 	 * the end of the drive to the first request on the second
14040 	 * queue, at which time it becomes the first queue.
14041 	 * A one-way scan is natural because of the way UNIX read-ahead
14042 	 * blocks are allocated.
14043 	 *
14044 	 * If we lie after the first request, then we must locate the
14045 	 * second request list and add ourselves to it.
14046 	 */
14047 	ap = un->un_waitq_headp;
14048 	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
14049 		while (ap->av_forw != NULL) {
14050 			/*
14051 			 * Look for an "inversion" in the (normally
14052 			 * ascending) block numbers. This indicates
14053 			 * the start of the second request list.
14054 			 */
14055 			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
14056 				/*
14057 				 * Search the second request list for the
14058 				 * first request at a larger block number.
14059 				 * We go before that; however if there is
14060 				 * no such request, we go at the end.
14061 				 */
14062 				do {
14063 					if (SD_GET_BLKNO(bp) <
14064 					    SD_GET_BLKNO(ap->av_forw)) {
14065 						goto insert;
14066 					}
14067 					ap = ap->av_forw;
14068 				} while (ap->av_forw != NULL);
14069 				goto insert;		/* after last */
14070 			}
14071 			ap = ap->av_forw;
14072 		}
14073 
14074 		/*
14075 		 * No inversions... we will go after the last, and
14076 		 * be the first request in the second request list.
14077 		 */
14078 		goto insert;
14079 	}
14080 
14081 	/*
14082 	 * Request is at/after the current request...
14083 	 * sort in the first request list.
14084 	 */
14085 	while (ap->av_forw != NULL) {
14086 		/*
14087 		 * We want to go after the current request (1) if
14088 		 * there is an inversion after it (i.e. it is the end
14089 		 * of the first request list), or (2) if the next
14090 		 * request is a larger block no. than our request.
14091 		 */
14092 		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
14093 		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
14094 			goto insert;
14095 		}
14096 		ap = ap->av_forw;
14097 	}
14098 
14099 	/*
14100 	 * Neither a second list nor a larger request, therefore
14101 	 * we go at the end of the first list (which is the same
14102 	 * as the end of the whole schebang).
14103 	 */
14104 insert:
14105 	bp->av_forw = ap->av_forw;
14106 	ap->av_forw = bp;
14107 
14108 	/*
14109 	 * If we inserted onto the tail end of the waitq, make sure the
14110 	 * tail pointer is updated.
14111 	 */
14112 	if (ap == un->un_waitq_tailp) {
14113 		un->un_waitq_tailp = bp;
14114 	}
14115 }
14116 
14117 
14118 /*
14119  *    Function: sd_start_cmds
14120  *
14121  * Description: Remove and transport cmds from the driver queues.
14122  *
14123  *   Arguments: un - pointer to the unit (soft state) struct for the target.
14124  *
14125  *		immed_bp - ptr to a buf to be transported immediately. Only
14126  *		the immed_bp is transported; bufs on the waitq are not
14127  *		processed and the un_retry_bp is not checked.  If immed_bp is
14128  *		NULL, then normal queue processing is performed.
14129  *
14130  *     Context: May be called from kernel thread context, interrupt context,
14131  *		or runout callback context. This function may not block or
14132  *		call routines that block.
14133  */
14134 
14135 static void
14136 sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
14137 {
14138 	struct	sd_xbuf	*xp;
14139 	struct	buf	*bp;
14140 	void	(*statp)(kstat_io_t *);
14141 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14142 	void	(*saved_statp)(kstat_io_t *);
14143 #endif
14144 	int	rval;
14145 
14146 	ASSERT(un != NULL);
14147 	ASSERT(mutex_owned(SD_MUTEX(un)));
14148 	ASSERT(un->un_ncmds_in_transport >= 0);
14149 	ASSERT(un->un_throttle >= 0);
14150 
14151 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
14152 
14153 	do {
14154 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14155 		saved_statp = NULL;
14156 #endif
14157 
14158 		/*
14159 		 * If we are syncing or dumping, fail the command to
14160 		 * avoid recursively calling back into scsi_transport().
14161 		 * The dump I/O itself uses a separate code path so this
14162 		 * only prevents non-dump I/O from being sent while dumping.
14163 		 * File system sync takes place before dumping begins.
14164 		 * During panic, filesystem I/O is allowed provided
14165 		 * un_in_callback is <= 1.  This is to prevent recursion
14166 		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
14167 		 * sd_start_cmds and so on.  See panic.c for more information
14168 		 * about the states the system can be in during panic.
14169 		 */
14170 		if ((un->un_state == SD_STATE_DUMPING) ||
14171 		    (ddi_in_panic() && (un->un_in_callback > 1))) {
14172 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14173 			    "sd_start_cmds: panicking\n");
14174 			goto exit;
14175 		}
14176 
14177 		if ((bp = immed_bp) != NULL) {
14178 			/*
14179 			 * We have a bp that must be transported immediately.
14180 			 * It's OK to transport the immed_bp here without doing
14181 			 * the throttle limit check because the immed_bp is
14182 			 * always used in a retry/recovery case. This means
14183 			 * that we know we are not at the throttle limit by
14184 			 * virtue of the fact that to get here we must have
14185 			 * already gotten a command back via sdintr(). This also
14186 			 * relies on (1) the command on un_retry_bp preventing
14187 			 * further commands from the waitq from being issued;
14188 			 * and (2) the code in sd_retry_command checking the
14189 			 * throttle limit before issuing a delayed or immediate
14190 			 * retry. This holds even if the throttle limit is
14191 			 * currently ratcheted down from its maximum value.
14192 			 */
14193 			statp = kstat_runq_enter;
14194 			if (bp == un->un_retry_bp) {
14195 				ASSERT((un->un_retry_statp == NULL) ||
14196 				    (un->un_retry_statp == kstat_waitq_enter) ||
14197 				    (un->un_retry_statp ==
14198 				    kstat_runq_back_to_waitq));
14199 				/*
14200 				 * If the waitq kstat was incremented when
14201 				 * sd_set_retry_bp() queued this bp for a retry,
14202 				 * then we must set up statp so that the waitq
14203 				 * count will get decremented correctly below.
14204 				 * Also we must clear un->un_retry_statp to
14205 				 * ensure that we do not act on a stale value
14206 				 * in this field.
14207 				 */
14208 				if ((un->un_retry_statp == kstat_waitq_enter) ||
14209 				    (un->un_retry_statp ==
14210 				    kstat_runq_back_to_waitq)) {
14211 					statp = kstat_waitq_to_runq;
14212 				}
14213 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14214 				saved_statp = un->un_retry_statp;
14215 #endif
14216 				un->un_retry_statp = NULL;
14217 
14218 				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14219 				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
14220 				    "un_throttle:%d un_ncmds_in_transport:%d\n",
14221 				    un, un->un_retry_bp, un->un_throttle,
14222 				    un->un_ncmds_in_transport);
14223 			} else {
14224 				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
14225 				    "processing priority bp:0x%p\n", bp);
14226 			}
14227 
14228 		} else if ((bp = un->un_waitq_headp) != NULL) {
14229 			/*
14230 			 * A command on the waitq is ready to go, but do not
14231 			 * send it if:
14232 			 *
14233 			 * (1) the throttle limit has been reached, or
14234 			 * (2) a retry is pending, or
14235 			 * (3) a START_STOP_UNIT callback pending, or
14236 			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
14237 			 *	command is pending.
14238 			 *
14239 			 * For all of these conditions, IO processing will
14240 			 * restart after the condition is cleared.
14241 			 */
14242 			if (un->un_ncmds_in_transport >= un->un_throttle) {
14243 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14244 				    "sd_start_cmds: exiting, "
14245 				    "throttle limit reached!\n");
14246 				goto exit;
14247 			}
14248 			if (un->un_retry_bp != NULL) {
14249 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14250 				    "sd_start_cmds: exiting, retry pending!\n");
14251 				goto exit;
14252 			}
14253 			if (un->un_startstop_timeid != NULL) {
14254 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14255 				    "sd_start_cmds: exiting, "
14256 				    "START_STOP pending!\n");
14257 				goto exit;
14258 			}
14259 			if (un->un_direct_priority_timeid != NULL) {
14260 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14261 				    "sd_start_cmds: exiting, "
14262 				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
14263 				goto exit;
14264 			}
14265 
14266 			/* Dequeue the command */
14267 			un->un_waitq_headp = bp->av_forw;
14268 			if (un->un_waitq_headp == NULL) {
14269 				un->un_waitq_tailp = NULL;
14270 			}
14271 			bp->av_forw = NULL;
14272 			statp = kstat_waitq_to_runq;
14273 			SD_TRACE(SD_LOG_IO_CORE, un,
14274 			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
14275 
14276 		} else {
14277 			/* No work to do so bail out now */
14278 			SD_TRACE(SD_LOG_IO_CORE, un,
14279 			    "sd_start_cmds: no more work, exiting!\n");
14280 			goto exit;
14281 		}
14282 
14283 		/*
14284 		 * Reset the state to normal. This is the mechanism by which
14285 		 * the state transitions from either SD_STATE_RWAIT or
14286 		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
14287 		 * If state is SD_STATE_PM_CHANGING then this command is
14288 		 * part of the device power control and the state must
14289 		 * not be put back to normal. Doing so would would
14290 		 * allow new commands to proceed when they shouldn't,
14291 		 * the device may be going off.
14292 		 */
14293 		if ((un->un_state != SD_STATE_SUSPENDED) &&
14294 		    (un->un_state != SD_STATE_PM_CHANGING)) {
14295 			New_state(un, SD_STATE_NORMAL);
14296 		    }
14297 
14298 		xp = SD_GET_XBUF(bp);
14299 		ASSERT(xp != NULL);
14300 
14301 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14302 		/*
14303 		 * Allocate the scsi_pkt if we need one, or attach DMA
14304 		 * resources if we have a scsi_pkt that needs them. The
14305 		 * latter should only occur for commands that are being
14306 		 * retried.
14307 		 */
14308 		if ((xp->xb_pktp == NULL) ||
14309 		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
14310 #else
14311 		if (xp->xb_pktp == NULL) {
14312 #endif
14313 			/*
14314 			 * There is no scsi_pkt allocated for this buf. Call
14315 			 * the initpkt function to allocate & init one.
14316 			 *
14317 			 * The scsi_init_pkt runout callback functionality is
14318 			 * implemented as follows:
14319 			 *
14320 			 * 1) The initpkt function always calls
14321 			 *    scsi_init_pkt(9F) with sdrunout specified as the
14322 			 *    callback routine.
14323 			 * 2) A successful packet allocation is initialized and
14324 			 *    the I/O is transported.
14325 			 * 3) The I/O associated with an allocation resource
14326 			 *    failure is left on its queue to be retried via
14327 			 *    runout or the next I/O.
14328 			 * 4) The I/O associated with a DMA error is removed
14329 			 *    from the queue and failed with EIO. Processing of
14330 			 *    the transport queues is also halted to be
14331 			 *    restarted via runout or the next I/O.
14332 			 * 5) The I/O associated with a CDB size or packet
14333 			 *    size error is removed from the queue and failed
14334 			 *    with EIO. Processing of the transport queues is
14335 			 *    continued.
14336 			 *
14337 			 * Note: there is no interface for canceling a runout
14338 			 * callback. To prevent the driver from detaching or
14339 			 * suspending while a runout is pending the driver
14340 			 * state is set to SD_STATE_RWAIT
14341 			 *
14342 			 * Note: using the scsi_init_pkt callback facility can
14343 			 * result in an I/O request persisting at the head of
14344 			 * the list which cannot be satisfied even after
14345 			 * multiple retries. In the future the driver may
14346 			 * implement some kind of maximum runout count before
14347 			 * failing an I/O.
14348 			 *
14349 			 * Note: the use of funcp below may seem superfluous,
14350 			 * but it helps warlock figure out the correct
14351 			 * initpkt function calls (see [s]sd.wlcmd).
14352 			 */
14353 			struct scsi_pkt	*pktp;
14354 			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
14355 
14356 			ASSERT(bp != un->un_rqs_bp);
14357 
14358 			funcp = sd_initpkt_map[xp->xb_chain_iostart];
14359 			switch ((*funcp)(bp, &pktp)) {
14360 			case  SD_PKT_ALLOC_SUCCESS:
14361 				xp->xb_pktp = pktp;
14362 				SD_TRACE(SD_LOG_IO_CORE, un,
14363 				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
14364 				    pktp);
14365 				goto got_pkt;
14366 
14367 			case SD_PKT_ALLOC_FAILURE:
14368 				/*
14369 				 * Temporary (hopefully) resource depletion.
14370 				 * Since retries and RQS commands always have a
14371 				 * scsi_pkt allocated, these cases should never
14372 				 * get here. So the only cases this needs to
14373 				 * handle is a bp from the waitq (which we put
14374 				 * back onto the waitq for sdrunout), or a bp
14375 				 * sent as an immed_bp (which we just fail).
14376 				 */
14377 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14378 				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
14379 
14380 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14381 
14382 				if (bp == immed_bp) {
14383 					/*
14384 					 * If SD_XB_DMA_FREED is clear, then
14385 					 * this is a failure to allocate a
14386 					 * scsi_pkt, and we must fail the
14387 					 * command.
14388 					 */
14389 					if ((xp->xb_pkt_flags &
14390 					    SD_XB_DMA_FREED) == 0) {
14391 						break;
14392 					}
14393 
14394 					/*
14395 					 * If this immediate command is NOT our
14396 					 * un_retry_bp, then we must fail it.
14397 					 */
14398 					if (bp != un->un_retry_bp) {
14399 						break;
14400 					}
14401 
14402 					/*
14403 					 * We get here if this cmd is our
14404 					 * un_retry_bp that was DMAFREED, but
14405 					 * scsi_init_pkt() failed to reallocate
14406 					 * DMA resources when we attempted to
14407 					 * retry it. This can happen when an
14408 					 * mpxio failover is in progress, but
14409 					 * we don't want to just fail the
14410 					 * command in this case.
14411 					 *
14412 					 * Use timeout(9F) to restart it after
14413 					 * a 100ms delay.  We don't want to
14414 					 * let sdrunout() restart it, because
14415 					 * sdrunout() is just supposed to start
14416 					 * commands that are sitting on the
14417 					 * wait queue.  The un_retry_bp stays
14418 					 * set until the command completes, but
14419 					 * sdrunout can be called many times
14420 					 * before that happens.  Since sdrunout
14421 					 * cannot tell if the un_retry_bp is
14422 					 * already in the transport, it could
14423 					 * end up calling scsi_transport() for
14424 					 * the un_retry_bp multiple times.
14425 					 *
14426 					 * Also: don't schedule the callback
14427 					 * if some other callback is already
14428 					 * pending.
14429 					 */
14430 					if (un->un_retry_statp == NULL) {
14431 						/*
14432 						 * restore the kstat pointer to
14433 						 * keep kstat counts coherent
14434 						 * when we do retry the command.
14435 						 */
14436 						un->un_retry_statp =
14437 						    saved_statp;
14438 					}
14439 
14440 					if ((un->un_startstop_timeid == NULL) &&
14441 					    (un->un_retry_timeid == NULL) &&
14442 					    (un->un_direct_priority_timeid ==
14443 					    NULL)) {
14444 
14445 						un->un_retry_timeid =
14446 						    timeout(
14447 						    sd_start_retry_command,
14448 						    un, SD_RESTART_TIMEOUT);
14449 					}
14450 					goto exit;
14451 				}
14452 
14453 #else
14454 				if (bp == immed_bp) {
14455 					break;	/* Just fail the command */
14456 				}
14457 #endif
14458 
14459 				/* Add the buf back to the head of the waitq */
14460 				bp->av_forw = un->un_waitq_headp;
14461 				un->un_waitq_headp = bp;
14462 				if (un->un_waitq_tailp == NULL) {
14463 					un->un_waitq_tailp = bp;
14464 				}
14465 				goto exit;
14466 
14467 			case SD_PKT_ALLOC_FAILURE_NO_DMA:
14468 				/*
14469 				 * HBA DMA resource failure. Fail the command
14470 				 * and continue processing of the queues.
14471 				 */
14472 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14473 				    "sd_start_cmds: "
14474 				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
14475 				break;
14476 
14477 			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
14478 				/*
14479 				 * Note:x86: Partial DMA mapping not supported
14480 				 * for USCSI commands, and all the needed DMA
14481 				 * resources were not allocated.
14482 				 */
14483 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14484 				    "sd_start_cmds: "
14485 				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
14486 				break;
14487 
14488 			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
14489 				/*
14490 				 * Note:x86: Request cannot fit into CDB based
14491 				 * on lba and len.
14492 				 */
14493 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14494 				    "sd_start_cmds: "
14495 				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
14496 				break;
14497 
14498 			default:
14499 				/* Should NEVER get here! */
14500 				panic("scsi_initpkt error");
14501 				/*NOTREACHED*/
14502 			}
14503 
14504 			/*
14505 			 * Fatal error in allocating a scsi_pkt for this buf.
14506 			 * Update kstats & return the buf with an error code.
14507 			 * We must use sd_return_failed_command_no_restart() to
14508 			 * avoid a recursive call back into sd_start_cmds().
14509 			 * However this also means that we must keep processing
14510 			 * the waitq here in order to avoid stalling.
14511 			 */
14512 			if (statp == kstat_waitq_to_runq) {
14513 				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
14514 			}
14515 			sd_return_failed_command_no_restart(un, bp, EIO);
14516 			if (bp == immed_bp) {
14517 				/* immed_bp is gone by now, so clear this */
14518 				immed_bp = NULL;
14519 			}
14520 			continue;
14521 		}
14522 got_pkt:
14523 		if (bp == immed_bp) {
14524 			/* goto the head of the class.... */
14525 			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
14526 		}
14527 
14528 		un->un_ncmds_in_transport++;
14529 		SD_UPDATE_KSTATS(un, statp, bp);
14530 
14531 		/*
14532 		 * Call scsi_transport() to send the command to the target.
14533 		 * According to SCSA architecture, we must drop the mutex here
14534 		 * before calling scsi_transport() in order to avoid deadlock.
14535 		 * Note that the scsi_pkt's completion routine can be executed
14536 		 * (from interrupt context) even before the call to
14537 		 * scsi_transport() returns.
14538 		 */
14539 		SD_TRACE(SD_LOG_IO_CORE, un,
14540 		    "sd_start_cmds: calling scsi_transport()\n");
14541 		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
14542 
14543 		mutex_exit(SD_MUTEX(un));
14544 		rval = scsi_transport(xp->xb_pktp);
14545 		mutex_enter(SD_MUTEX(un));
14546 
14547 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14548 		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
14549 
14550 		switch (rval) {
14551 		case TRAN_ACCEPT:
14552 			/* Clear this with every pkt accepted by the HBA */
14553 			un->un_tran_fatal_count = 0;
14554 			break;	/* Success; try the next cmd (if any) */
14555 
14556 		case TRAN_BUSY:
14557 			un->un_ncmds_in_transport--;
14558 			ASSERT(un->un_ncmds_in_transport >= 0);
14559 
14560 			/*
14561 			 * Don't retry request sense, the sense data
14562 			 * is lost when another request is sent.
14563 			 * Free up the rqs buf and retry
14564 			 * the original failed cmd.  Update kstat.
14565 			 */
14566 			if (bp == un->un_rqs_bp) {
14567 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14568 				bp = sd_mark_rqs_idle(un, xp);
14569 				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
14570 					NULL, NULL, EIO, SD_BSY_TIMEOUT / 500,
14571 					kstat_waitq_enter);
14572 				goto exit;
14573 			}
14574 
14575 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14576 			/*
14577 			 * Free the DMA resources for the  scsi_pkt. This will
14578 			 * allow mpxio to select another path the next time
14579 			 * we call scsi_transport() with this scsi_pkt.
14580 			 * See sdintr() for the rationalization behind this.
14581 			 */
14582 			if ((un->un_f_is_fibre == TRUE) &&
14583 			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
14584 			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
14585 				scsi_dmafree(xp->xb_pktp);
14586 				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
14587 			}
14588 #endif
14589 
14590 			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
14591 				/*
14592 				 * Commands that are SD_PATH_DIRECT_PRIORITY
14593 				 * are for error recovery situations. These do
14594 				 * not use the normal command waitq, so if they
14595 				 * get a TRAN_BUSY we cannot put them back onto
14596 				 * the waitq for later retry. One possible
14597 				 * problem is that there could already be some
14598 				 * other command on un_retry_bp that is waiting
14599 				 * for this one to complete, so we would be
14600 				 * deadlocked if we put this command back onto
14601 				 * the waitq for later retry (since un_retry_bp
14602 				 * must complete before the driver gets back to
14603 				 * commands on the waitq).
14604 				 *
14605 				 * To avoid deadlock we must schedule a callback
14606 				 * that will restart this command after a set
14607 				 * interval.  This should keep retrying for as
14608 				 * long as the underlying transport keeps
14609 				 * returning TRAN_BUSY (just like for other
14610 				 * commands).  Use the same timeout interval as
14611 				 * for the ordinary TRAN_BUSY retry.
14612 				 */
14613 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14614 				    "sd_start_cmds: scsi_transport() returned "
14615 				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
14616 
14617 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14618 				un->un_direct_priority_timeid =
14619 				    timeout(sd_start_direct_priority_command,
14620 				    bp, SD_BSY_TIMEOUT / 500);
14621 
14622 				goto exit;
14623 			}
14624 
14625 			/*
14626 			 * For TRAN_BUSY, we want to reduce the throttle value,
14627 			 * unless we are retrying a command.
14628 			 */
14629 			if (bp != un->un_retry_bp) {
14630 				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
14631 			}
14632 
14633 			/*
14634 			 * Set up the bp to be tried again 10 ms later.
14635 			 * Note:x86: Is there a timeout value in the sd_lun
14636 			 * for this condition?
14637 			 */
14638 			sd_set_retry_bp(un, bp, SD_BSY_TIMEOUT / 500,
14639 				kstat_runq_back_to_waitq);
14640 			goto exit;
14641 
14642 		case TRAN_FATAL_ERROR:
14643 			un->un_tran_fatal_count++;
14644 			/* FALLTHRU */
14645 
14646 		case TRAN_BADPKT:
14647 		default:
14648 			un->un_ncmds_in_transport--;
14649 			ASSERT(un->un_ncmds_in_transport >= 0);
14650 
14651 			/*
14652 			 * If this is our REQUEST SENSE command with a
14653 			 * transport error, we must get back the pointers
14654 			 * to the original buf, and mark the REQUEST
14655 			 * SENSE command as "available".
14656 			 */
14657 			if (bp == un->un_rqs_bp) {
14658 				bp = sd_mark_rqs_idle(un, xp);
14659 				xp = SD_GET_XBUF(bp);
14660 			} else {
14661 				/*
14662 				 * Legacy behavior: do not update transport
14663 				 * error count for request sense commands.
14664 				 */
14665 				SD_UPDATE_ERRSTATS(un, sd_transerrs);
14666 			}
14667 
14668 			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
14669 			sd_print_transport_rejected_message(un, xp, rval);
14670 
14671 			/*
14672 			 * We must use sd_return_failed_command_no_restart() to
14673 			 * avoid a recursive call back into sd_start_cmds().
14674 			 * However this also means that we must keep processing
14675 			 * the waitq here in order to avoid stalling.
14676 			 */
14677 			sd_return_failed_command_no_restart(un, bp, EIO);
14678 
14679 			/*
14680 			 * Notify any threads waiting in sd_ddi_suspend() that
14681 			 * a command completion has occurred.
14682 			 */
14683 			if (un->un_state == SD_STATE_SUSPENDED) {
14684 				cv_broadcast(&un->un_disk_busy_cv);
14685 			}
14686 
14687 			if (bp == immed_bp) {
14688 				/* immed_bp is gone by now, so clear this */
14689 				immed_bp = NULL;
14690 			}
14691 			break;
14692 		}
14693 
14694 	} while (immed_bp == NULL);
14695 
14696 exit:
14697 	ASSERT(mutex_owned(SD_MUTEX(un)));
14698 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
14699 }
14700 
14701 
14702 /*
14703  *    Function: sd_return_command
14704  *
14705  * Description: Returns a command to its originator (with or without an
14706  *		error).  Also starts commands waiting to be transported
14707  *		to the target.
14708  *
14709  *     Context: May be called from interrupt, kernel, or timeout context
14710  */
14711 
14712 static void
14713 sd_return_command(struct sd_lun *un, struct buf *bp)
14714 {
14715 	struct sd_xbuf *xp;
14716 #if defined(__i386) || defined(__amd64)
14717 	struct scsi_pkt *pktp;
14718 #endif
14719 
14720 	ASSERT(bp != NULL);
14721 	ASSERT(un != NULL);
14722 	ASSERT(mutex_owned(SD_MUTEX(un)));
14723 	ASSERT(bp != un->un_rqs_bp);
14724 	xp = SD_GET_XBUF(bp);
14725 	ASSERT(xp != NULL);
14726 
14727 #if defined(__i386) || defined(__amd64)
14728 	pktp = SD_GET_PKTP(bp);
14729 #endif
14730 
14731 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
14732 
14733 #if defined(__i386) || defined(__amd64)
14734 	/*
14735 	 * Note:x86: check for the "sdrestart failed" case.
14736 	 */
14737 	if (((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
14738 		(geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
14739 		(xp->xb_pktp->pkt_resid == 0)) {
14740 
14741 		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
14742 			/*
14743 			 * Successfully set up next portion of cmd
14744 			 * transfer, try sending it
14745 			 */
14746 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
14747 			    NULL, NULL, 0, (clock_t)0, NULL);
14748 			sd_start_cmds(un, NULL);
14749 			return;	/* Note:x86: need a return here? */
14750 		}
14751 	}
14752 #endif
14753 
14754 	/*
14755 	 * If this is the failfast bp, clear it from un_failfast_bp. This
14756 	 * can happen if upon being re-tried the failfast bp either
14757 	 * succeeded or encountered another error (possibly even a different
14758 	 * error than the one that precipitated the failfast state, but in
14759 	 * that case it would have had to exhaust retries as well). Regardless,
14760 	 * this should not occur whenever the instance is in the active
14761 	 * failfast state.
14762 	 */
14763 	if (bp == un->un_failfast_bp) {
14764 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
14765 		un->un_failfast_bp = NULL;
14766 	}
14767 
14768 	/*
14769 	 * Clear the failfast state upon successful completion of ANY cmd.
14770 	 */
14771 	if (bp->b_error == 0) {
14772 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
14773 	}
14774 
14775 	/*
14776 	 * This is used if the command was retried one or more times. Show that
14777 	 * we are done with it, and allow processing of the waitq to resume.
14778 	 */
14779 	if (bp == un->un_retry_bp) {
14780 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14781 		    "sd_return_command: un:0x%p: "
14782 		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
14783 		un->un_retry_bp = NULL;
14784 		un->un_retry_statp = NULL;
14785 	}
14786 
14787 	SD_UPDATE_RDWR_STATS(un, bp);
14788 	SD_UPDATE_PARTITION_STATS(un, bp);
14789 
14790 	switch (un->un_state) {
14791 	case SD_STATE_SUSPENDED:
14792 		/*
14793 		 * Notify any threads waiting in sd_ddi_suspend() that
14794 		 * a command completion has occurred.
14795 		 */
14796 		cv_broadcast(&un->un_disk_busy_cv);
14797 		break;
14798 	default:
14799 		sd_start_cmds(un, NULL);
14800 		break;
14801 	}
14802 
14803 	/* Return this command up the iodone chain to its originator. */
14804 	mutex_exit(SD_MUTEX(un));
14805 
14806 	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
14807 	xp->xb_pktp = NULL;
14808 
14809 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
14810 
14811 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14812 	mutex_enter(SD_MUTEX(un));
14813 
14814 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
14815 }
14816 
14817 
14818 /*
14819  *    Function: sd_return_failed_command
14820  *
14821  * Description: Command completion when an error occurred.
14822  *
14823  *     Context: May be called from interrupt context
14824  */
14825 
14826 static void
14827 sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
14828 {
14829 	ASSERT(bp != NULL);
14830 	ASSERT(un != NULL);
14831 	ASSERT(mutex_owned(SD_MUTEX(un)));
14832 
14833 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14834 	    "sd_return_failed_command: entry\n");
14835 
14836 	/*
14837 	 * b_resid could already be nonzero due to a partial data
14838 	 * transfer, so do not change it here.
14839 	 */
14840 	SD_BIOERROR(bp, errcode);
14841 
14842 	sd_return_command(un, bp);
14843 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14844 	    "sd_return_failed_command: exit\n");
14845 }
14846 
14847 
14848 /*
14849  *    Function: sd_return_failed_command_no_restart
14850  *
14851  * Description: Same as sd_return_failed_command, but ensures that no
14852  *		call back into sd_start_cmds will be issued.
14853  *
14854  *     Context: May be called from interrupt context
14855  */
14856 
14857 static void
14858 sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
14859 	int errcode)
14860 {
14861 	struct sd_xbuf *xp;
14862 
14863 	ASSERT(bp != NULL);
14864 	ASSERT(un != NULL);
14865 	ASSERT(mutex_owned(SD_MUTEX(un)));
14866 	xp = SD_GET_XBUF(bp);
14867 	ASSERT(xp != NULL);
14868 	ASSERT(errcode != 0);
14869 
14870 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14871 	    "sd_return_failed_command_no_restart: entry\n");
14872 
14873 	/*
14874 	 * b_resid could already be nonzero due to a partial data
14875 	 * transfer, so do not change it here.
14876 	 */
14877 	SD_BIOERROR(bp, errcode);
14878 
14879 	/*
14880 	 * If this is the failfast bp, clear it. This can happen if the
14881 	 * failfast bp encounterd a fatal error when we attempted to
14882 	 * re-try it (such as a scsi_transport(9F) failure).  However
14883 	 * we should NOT be in an active failfast state if the failfast
14884 	 * bp is not NULL.
14885 	 */
14886 	if (bp == un->un_failfast_bp) {
14887 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
14888 		un->un_failfast_bp = NULL;
14889 	}
14890 
14891 	if (bp == un->un_retry_bp) {
14892 		/*
14893 		 * This command was retried one or more times. Show that we are
14894 		 * done with it, and allow processing of the waitq to resume.
14895 		 */
14896 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14897 		    "sd_return_failed_command_no_restart: "
14898 		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
14899 		un->un_retry_bp = NULL;
14900 		un->un_retry_statp = NULL;
14901 	}
14902 
14903 	SD_UPDATE_RDWR_STATS(un, bp);
14904 	SD_UPDATE_PARTITION_STATS(un, bp);
14905 
14906 	mutex_exit(SD_MUTEX(un));
14907 
14908 	if (xp->xb_pktp != NULL) {
14909 		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
14910 		xp->xb_pktp = NULL;
14911 	}
14912 
14913 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
14914 
14915 	mutex_enter(SD_MUTEX(un));
14916 
14917 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14918 	    "sd_return_failed_command_no_restart: exit\n");
14919 }
14920 
14921 
14922 /*
14923  *    Function: sd_retry_command
14924  *
14925  * Description: queue up a command for retry, or (optionally) fail it
14926  *		if retry counts are exhausted.
14927  *
14928  *   Arguments: un - Pointer to the sd_lun struct for the target.
14929  *
14930  *		bp - Pointer to the buf for the command to be retried.
14931  *
14932  *		retry_check_flag - Flag to see which (if any) of the retry
14933  *		   counts should be decremented/checked. If the indicated
14934  *		   retry count is exhausted, then the command will not be
14935  *		   retried; it will be failed instead. This should use a
14936  *		   value equal to one of the following:
14937  *
14938  *			SD_RETRIES_NOCHECK
14939  *			SD_RESD_RETRIES_STANDARD
14940  *			SD_RETRIES_VICTIM
14941  *
14942  *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
14943  *		   if the check should be made to see of FLAG_ISOLATE is set
14944  *		   in the pkt. If FLAG_ISOLATE is set, then the command is
14945  *		   not retried, it is simply failed.
14946  *
14947  *		user_funcp - Ptr to function to call before dispatching the
14948  *		   command. May be NULL if no action needs to be performed.
14949  *		   (Primarily intended for printing messages.)
14950  *
14951  *		user_arg - Optional argument to be passed along to
14952  *		   the user_funcp call.
14953  *
14954  *		failure_code - errno return code to set in the bp if the
14955  *		   command is going to be failed.
14956  *
14957  *		retry_delay - Retry delay interval in (clock_t) units. May
14958  *		   be zero which indicates that the retry should be retried
14959  *		   immediately (ie, without an intervening delay).
14960  *
14961  *		statp - Ptr to kstat function to be updated if the command
14962  *		   is queued for a delayed retry. May be NULL if no kstat
14963  *		   update is desired.
14964  *
14965  *     Context: May be called from interupt context.
14966  */
14967 
14968 static void
14969 sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
14970 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int
14971 	code), void *user_arg, int failure_code,  clock_t retry_delay,
14972 	void (*statp)(kstat_io_t *))
14973 {
14974 	struct sd_xbuf	*xp;
14975 	struct scsi_pkt	*pktp;
14976 
14977 	ASSERT(un != NULL);
14978 	ASSERT(mutex_owned(SD_MUTEX(un)));
14979 	ASSERT(bp != NULL);
14980 	xp = SD_GET_XBUF(bp);
14981 	ASSERT(xp != NULL);
14982 	pktp = SD_GET_PKTP(bp);
14983 	ASSERT(pktp != NULL);
14984 
14985 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14986 	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
14987 
14988 	/*
14989 	 * If we are syncing or dumping, fail the command to avoid
14990 	 * recursively calling back into scsi_transport().
14991 	 */
14992 	if (ddi_in_panic()) {
14993 		goto fail_command_no_log;
14994 	}
14995 
14996 	/*
14997 	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
14998 	 * log an error and fail the command.
14999 	 */
15000 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
15001 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
15002 		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
15003 		sd_dump_memory(un, SD_LOG_IO, "CDB",
15004 		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15005 		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
15006 		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
15007 		goto fail_command;
15008 	}
15009 
15010 	/*
15011 	 * If we are suspended, then put the command onto head of the
15012 	 * wait queue since we don't want to start more commands.
15013 	 */
15014 	switch (un->un_state) {
15015 	case SD_STATE_SUSPENDED:
15016 	case SD_STATE_DUMPING:
15017 		bp->av_forw = un->un_waitq_headp;
15018 		un->un_waitq_headp = bp;
15019 		if (un->un_waitq_tailp == NULL) {
15020 			un->un_waitq_tailp = bp;
15021 		}
15022 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
15023 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
15024 		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
15025 		return;
15026 	default:
15027 		break;
15028 	}
15029 
15030 	/*
15031 	 * If the caller wants us to check FLAG_ISOLATE, then see if that
15032 	 * is set; if it is then we do not want to retry the command.
15033 	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
15034 	 */
15035 	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
15036 		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
15037 			goto fail_command;
15038 		}
15039 	}
15040 
15041 
15042 	/*
15043 	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
15044 	 * command timeout or a selection timeout has occurred. This means
15045 	 * that we were unable to establish an kind of communication with
15046 	 * the target, and subsequent retries and/or commands are likely
15047 	 * to encounter similar results and take a long time to complete.
15048 	 *
15049 	 * If this is a failfast error condition, we need to update the
15050 	 * failfast state, even if this bp does not have B_FAILFAST set.
15051 	 */
15052 	if (retry_check_flag & SD_RETRIES_FAILFAST) {
15053 		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
15054 			ASSERT(un->un_failfast_bp == NULL);
15055 			/*
15056 			 * If we are already in the active failfast state, and
15057 			 * another failfast error condition has been detected,
15058 			 * then fail this command if it has B_FAILFAST set.
15059 			 * If B_FAILFAST is clear, then maintain the legacy
15060 			 * behavior of retrying heroically, even tho this will
15061 			 * take a lot more time to fail the command.
15062 			 */
15063 			if (bp->b_flags & B_FAILFAST) {
15064 				goto fail_command;
15065 			}
15066 		} else {
15067 			/*
15068 			 * We're not in the active failfast state, but we
15069 			 * have a failfast error condition, so we must begin
15070 			 * transition to the next state. We do this regardless
15071 			 * of whether or not this bp has B_FAILFAST set.
15072 			 */
15073 			if (un->un_failfast_bp == NULL) {
15074 				/*
15075 				 * This is the first bp to meet a failfast
15076 				 * condition so save it on un_failfast_bp &
15077 				 * do normal retry processing. Do not enter
15078 				 * active failfast state yet. This marks
15079 				 * entry into the "failfast pending" state.
15080 				 */
15081 				un->un_failfast_bp = bp;
15082 
15083 			} else if (un->un_failfast_bp == bp) {
15084 				/*
15085 				 * This is the second time *this* bp has
15086 				 * encountered a failfast error condition,
15087 				 * so enter active failfast state & flush
15088 				 * queues as appropriate.
15089 				 */
15090 				un->un_failfast_state = SD_FAILFAST_ACTIVE;
15091 				un->un_failfast_bp = NULL;
15092 				sd_failfast_flushq(un);
15093 
15094 				/*
15095 				 * Fail this bp now if B_FAILFAST set;
15096 				 * otherwise continue with retries. (It would
15097 				 * be pretty ironic if this bp succeeded on a
15098 				 * subsequent retry after we just flushed all
15099 				 * the queues).
15100 				 */
15101 				if (bp->b_flags & B_FAILFAST) {
15102 					goto fail_command;
15103 				}
15104 
15105 #if !defined(lint) && !defined(__lint)
15106 			} else {
15107 				/*
15108 				 * If neither of the preceeding conditionals
15109 				 * was true, it means that there is some
15110 				 * *other* bp that has met an inital failfast
15111 				 * condition and is currently either being
15112 				 * retried or is waiting to be retried. In
15113 				 * that case we should perform normal retry
15114 				 * processing on *this* bp, since there is a
15115 				 * chance that the current failfast condition
15116 				 * is transient and recoverable. If that does
15117 				 * not turn out to be the case, then retries
15118 				 * will be cleared when the wait queue is
15119 				 * flushed anyway.
15120 				 */
15121 #endif
15122 			}
15123 		}
15124 	} else {
15125 		/*
15126 		 * SD_RETRIES_FAILFAST is clear, which indicates that we
15127 		 * likely were able to at least establish some level of
15128 		 * communication with the target and subsequent commands
15129 		 * and/or retries are likely to get through to the target,
15130 		 * In this case we want to be aggressive about clearing
15131 		 * the failfast state. Note that this does not affect
15132 		 * the "failfast pending" condition.
15133 		 */
15134 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15135 	}
15136 
15137 
15138 	/*
15139 	 * Check the specified retry count to see if we can still do
15140 	 * any retries with this pkt before we should fail it.
15141 	 */
15142 	switch (retry_check_flag & SD_RETRIES_MASK) {
15143 	case SD_RETRIES_VICTIM:
15144 		/*
15145 		 * Check the victim retry count. If exhausted, then fall
15146 		 * thru & check against the standard retry count.
15147 		 */
15148 		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
15149 			/* Increment count & proceed with the retry */
15150 			xp->xb_victim_retry_count++;
15151 			break;
15152 		}
15153 		/* Victim retries exhausted, fall back to std. retries... */
15154 		/* FALLTHRU */
15155 
15156 	case SD_RETRIES_STANDARD:
15157 		if (xp->xb_retry_count >= un->un_retry_count) {
15158 			/* Retries exhausted, fail the command */
15159 			SD_TRACE(SD_LOG_IO_CORE, un,
15160 			    "sd_retry_command: retries exhausted!\n");
15161 			/*
15162 			 * update b_resid for failed SCMD_READ & SCMD_WRITE
15163 			 * commands with nonzero pkt_resid.
15164 			 */
15165 			if ((pktp->pkt_reason == CMD_CMPLT) &&
15166 			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
15167 			    (pktp->pkt_resid != 0)) {
15168 				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
15169 				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
15170 					SD_UPDATE_B_RESID(bp, pktp);
15171 				}
15172 			}
15173 			goto fail_command;
15174 		}
15175 		xp->xb_retry_count++;
15176 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15177 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15178 		break;
15179 
15180 	case SD_RETRIES_UA:
15181 		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
15182 			/* Retries exhausted, fail the command */
15183 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15184 			    "Unit Attention retries exhausted. "
15185 			    "Check the target.\n");
15186 			goto fail_command;
15187 		}
15188 		xp->xb_ua_retry_count++;
15189 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15190 		    "sd_retry_command: retry count:%d\n",
15191 			xp->xb_ua_retry_count);
15192 		break;
15193 
15194 	case SD_RETRIES_BUSY:
15195 		if (xp->xb_retry_count >= un->un_busy_retry_count) {
15196 			/* Retries exhausted, fail the command */
15197 			SD_TRACE(SD_LOG_IO_CORE, un,
15198 			    "sd_retry_command: retries exhausted!\n");
15199 			goto fail_command;
15200 		}
15201 		xp->xb_retry_count++;
15202 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15203 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15204 		break;
15205 
15206 	case SD_RETRIES_NOCHECK:
15207 	default:
15208 		/* No retry count to check. Just proceed with the retry */
15209 		break;
15210 	}
15211 
15212 	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15213 
15214 	/*
15215 	 * If we were given a zero timeout, we must attempt to retry the
15216 	 * command immediately (ie, without a delay).
15217 	 */
15218 	if (retry_delay == 0) {
15219 		/*
15220 		 * Check some limiting conditions to see if we can actually
15221 		 * do the immediate retry.  If we cannot, then we must
15222 		 * fall back to queueing up a delayed retry.
15223 		 */
15224 		if (un->un_ncmds_in_transport >= un->un_throttle) {
15225 			/*
15226 			 * We are at the throttle limit for the target,
15227 			 * fall back to delayed retry.
15228 			 */
15229 			retry_delay = SD_BSY_TIMEOUT;
15230 			statp = kstat_waitq_enter;
15231 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15232 			    "sd_retry_command: immed. retry hit "
15233 			    "throttle!\n");
15234 		} else {
15235 			/*
15236 			 * We're clear to proceed with the immediate retry.
15237 			 * First call the user-provided function (if any)
15238 			 */
15239 			if (user_funcp != NULL) {
15240 				(*user_funcp)(un, bp, user_arg,
15241 				    SD_IMMEDIATE_RETRY_ISSUED);
15242 #ifdef __lock_lint
15243 				sd_print_incomplete_msg(un, bp, user_arg,
15244 				    SD_IMMEDIATE_RETRY_ISSUED);
15245 				sd_print_cmd_incomplete_msg(un, bp, user_arg,
15246 				    SD_IMMEDIATE_RETRY_ISSUED);
15247 				sd_print_sense_failed_msg(un, bp, user_arg,
15248 				    SD_IMMEDIATE_RETRY_ISSUED);
15249 #endif
15250 			}
15251 
15252 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15253 			    "sd_retry_command: issuing immediate retry\n");
15254 
15255 			/*
15256 			 * Call sd_start_cmds() to transport the command to
15257 			 * the target.
15258 			 */
15259 			sd_start_cmds(un, bp);
15260 
15261 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15262 			    "sd_retry_command exit\n");
15263 			return;
15264 		}
15265 	}
15266 
15267 	/*
15268 	 * Set up to retry the command after a delay.
15269 	 * First call the user-provided function (if any)
15270 	 */
15271 	if (user_funcp != NULL) {
15272 		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
15273 	}
15274 
15275 	sd_set_retry_bp(un, bp, retry_delay, statp);
15276 
15277 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15278 	return;
15279 
15280 fail_command:
15281 
15282 	if (user_funcp != NULL) {
15283 		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
15284 	}
15285 
15286 fail_command_no_log:
15287 
15288 	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15289 	    "sd_retry_command: returning failed command\n");
15290 
15291 	sd_return_failed_command(un, bp, failure_code);
15292 
15293 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15294 }
15295 
15296 
15297 /*
15298  *    Function: sd_set_retry_bp
15299  *
15300  * Description: Set up the given bp for retry.
15301  *
15302  *   Arguments: un - ptr to associated softstate
15303  *		bp - ptr to buf(9S) for the command
15304  *		retry_delay - time interval before issuing retry (may be 0)
15305  *		statp - optional pointer to kstat function
15306  *
15307  *     Context: May be called under interrupt context
15308  */
15309 
15310 static void
15311 sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
15312 	void (*statp)(kstat_io_t *))
15313 {
15314 	ASSERT(un != NULL);
15315 	ASSERT(mutex_owned(SD_MUTEX(un)));
15316 	ASSERT(bp != NULL);
15317 
15318 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15319 	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
15320 
15321 	/*
15322 	 * Indicate that the command is being retried. This will not allow any
15323 	 * other commands on the wait queue to be transported to the target
15324 	 * until this command has been completed (success or failure). The
15325 	 * "retry command" is not transported to the target until the given
15326 	 * time delay expires, unless the user specified a 0 retry_delay.
15327 	 *
15328 	 * Note: the timeout(9F) callback routine is what actually calls
15329 	 * sd_start_cmds() to transport the command, with the exception of a
15330 	 * zero retry_delay. The only current implementor of a zero retry delay
15331 	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
15332 	 */
15333 	if (un->un_retry_bp == NULL) {
15334 		ASSERT(un->un_retry_statp == NULL);
15335 		un->un_retry_bp = bp;
15336 
15337 		/*
15338 		 * If the user has not specified a delay the command should
15339 		 * be queued and no timeout should be scheduled.
15340 		 */
15341 		if (retry_delay == 0) {
15342 			/*
15343 			 * Save the kstat pointer that will be used in the
15344 			 * call to SD_UPDATE_KSTATS() below, so that
15345 			 * sd_start_cmds() can correctly decrement the waitq
15346 			 * count when it is time to transport this command.
15347 			 */
15348 			un->un_retry_statp = statp;
15349 			goto done;
15350 		}
15351 	}
15352 
15353 	if (un->un_retry_bp == bp) {
15354 		/*
15355 		 * Save the kstat pointer that will be used in the call to
15356 		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
15357 		 * correctly decrement the waitq count when it is time to
15358 		 * transport this command.
15359 		 */
15360 		un->un_retry_statp = statp;
15361 
15362 		/*
15363 		 * Schedule a timeout if:
15364 		 *   1) The user has specified a delay.
15365 		 *   2) There is not a START_STOP_UNIT callback pending.
15366 		 *
15367 		 * If no delay has been specified, then it is up to the caller
15368 		 * to ensure that IO processing continues without stalling.
15369 		 * Effectively, this means that the caller will issue the
15370 		 * required call to sd_start_cmds(). The START_STOP_UNIT
15371 		 * callback does this after the START STOP UNIT command has
15372 		 * completed. In either of these cases we should not schedule
15373 		 * a timeout callback here.  Also don't schedule the timeout if
15374 		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
15375 		 */
15376 		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
15377 		    (un->un_direct_priority_timeid == NULL)) {
15378 			un->un_retry_timeid =
15379 			    timeout(sd_start_retry_command, un, retry_delay);
15380 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15381 			    "sd_set_retry_bp: setting timeout: un: 0x%p"
15382 			    " bp:0x%p un_retry_timeid:0x%p\n",
15383 			    un, bp, un->un_retry_timeid);
15384 		}
15385 	} else {
15386 		/*
15387 		 * We only get in here if there is already another command
15388 		 * waiting to be retried.  In this case, we just put the
15389 		 * given command onto the wait queue, so it can be transported
15390 		 * after the current retry command has completed.
15391 		 *
15392 		 * Also we have to make sure that if the command at the head
15393 		 * of the wait queue is the un_failfast_bp, that we do not
15394 		 * put ahead of it any other commands that are to be retried.
15395 		 */
15396 		if ((un->un_failfast_bp != NULL) &&
15397 		    (un->un_failfast_bp == un->un_waitq_headp)) {
15398 			/*
15399 			 * Enqueue this command AFTER the first command on
15400 			 * the wait queue (which is also un_failfast_bp).
15401 			 */
15402 			bp->av_forw = un->un_waitq_headp->av_forw;
15403 			un->un_waitq_headp->av_forw = bp;
15404 			if (un->un_waitq_headp == un->un_waitq_tailp) {
15405 				un->un_waitq_tailp = bp;
15406 			}
15407 		} else {
15408 			/* Enqueue this command at the head of the waitq. */
15409 			bp->av_forw = un->un_waitq_headp;
15410 			un->un_waitq_headp = bp;
15411 			if (un->un_waitq_tailp == NULL) {
15412 				un->un_waitq_tailp = bp;
15413 			}
15414 		}
15415 
15416 		if (statp == NULL) {
15417 			statp = kstat_waitq_enter;
15418 		}
15419 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15420 		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
15421 	}
15422 
15423 done:
15424 	if (statp != NULL) {
15425 		SD_UPDATE_KSTATS(un, statp, bp);
15426 	}
15427 
15428 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15429 	    "sd_set_retry_bp: exit un:0x%p\n", un);
15430 }
15431 
15432 
15433 /*
15434  *    Function: sd_start_retry_command
15435  *
15436  * Description: Start the command that has been waiting on the target's
15437  *		retry queue.  Called from timeout(9F) context after the
15438  *		retry delay interval has expired.
15439  *
15440  *   Arguments: arg - pointer to associated softstate for the device.
15441  *
15442  *     Context: timeout(9F) thread context.  May not sleep.
15443  */
15444 
15445 static void
15446 sd_start_retry_command(void *arg)
15447 {
15448 	struct sd_lun *un = arg;
15449 
15450 	ASSERT(un != NULL);
15451 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15452 
15453 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15454 	    "sd_start_retry_command: entry\n");
15455 
15456 	mutex_enter(SD_MUTEX(un));
15457 
15458 	un->un_retry_timeid = NULL;
15459 
15460 	if (un->un_retry_bp != NULL) {
15461 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15462 		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
15463 		    un, un->un_retry_bp);
15464 		sd_start_cmds(un, un->un_retry_bp);
15465 	}
15466 
15467 	mutex_exit(SD_MUTEX(un));
15468 
15469 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15470 	    "sd_start_retry_command: exit\n");
15471 }
15472 
15473 
15474 /*
15475  *    Function: sd_start_direct_priority_command
15476  *
15477  * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
15478  *		received TRAN_BUSY when we called scsi_transport() to send it
15479  *		to the underlying HBA. This function is called from timeout(9F)
15480  *		context after the delay interval has expired.
15481  *
15482  *   Arguments: arg - pointer to associated buf(9S) to be restarted.
15483  *
15484  *     Context: timeout(9F) thread context.  May not sleep.
15485  */
15486 
15487 static void
15488 sd_start_direct_priority_command(void *arg)
15489 {
15490 	struct buf	*priority_bp = arg;
15491 	struct sd_lun	*un;
15492 
15493 	ASSERT(priority_bp != NULL);
15494 	un = SD_GET_UN(priority_bp);
15495 	ASSERT(un != NULL);
15496 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15497 
15498 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15499 	    "sd_start_direct_priority_command: entry\n");
15500 
15501 	mutex_enter(SD_MUTEX(un));
15502 	un->un_direct_priority_timeid = NULL;
15503 	sd_start_cmds(un, priority_bp);
15504 	mutex_exit(SD_MUTEX(un));
15505 
15506 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15507 	    "sd_start_direct_priority_command: exit\n");
15508 }
15509 
15510 
15511 /*
15512  *    Function: sd_send_request_sense_command
15513  *
15514  * Description: Sends a REQUEST SENSE command to the target
15515  *
15516  *     Context: May be called from interrupt context.
15517  */
15518 
15519 static void
15520 sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
15521 	struct scsi_pkt *pktp)
15522 {
15523 	ASSERT(bp != NULL);
15524 	ASSERT(un != NULL);
15525 	ASSERT(mutex_owned(SD_MUTEX(un)));
15526 
15527 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
15528 	    "entry: buf:0x%p\n", bp);
15529 
15530 	/*
15531 	 * If we are syncing or dumping, then fail the command to avoid a
15532 	 * recursive callback into scsi_transport(). Also fail the command
15533 	 * if we are suspended (legacy behavior).
15534 	 */
15535 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
15536 	    (un->un_state == SD_STATE_DUMPING)) {
15537 		sd_return_failed_command(un, bp, EIO);
15538 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15539 		    "sd_send_request_sense_command: syncing/dumping, exit\n");
15540 		return;
15541 	}
15542 
15543 	/*
15544 	 * Retry the failed command and don't issue the request sense if:
15545 	 *    1) the sense buf is busy
15546 	 *    2) we have 1 or more outstanding commands on the target
15547 	 *    (the sense data will be cleared or invalidated any way)
15548 	 *
15549 	 * Note: There could be an issue with not checking a retry limit here,
15550 	 * the problem is determining which retry limit to check.
15551 	 */
15552 	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
15553 		/* Don't retry if the command is flagged as non-retryable */
15554 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
15555 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
15556 			    NULL, NULL, 0, SD_BSY_TIMEOUT, kstat_waitq_enter);
15557 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15558 			    "sd_send_request_sense_command: "
15559 			    "at full throttle, retrying exit\n");
15560 		} else {
15561 			sd_return_failed_command(un, bp, EIO);
15562 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15563 			    "sd_send_request_sense_command: "
15564 			    "at full throttle, non-retryable exit\n");
15565 		}
15566 		return;
15567 	}
15568 
15569 	sd_mark_rqs_busy(un, bp);
15570 	sd_start_cmds(un, un->un_rqs_bp);
15571 
15572 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15573 	    "sd_send_request_sense_command: exit\n");
15574 }
15575 
15576 
15577 /*
15578  *    Function: sd_mark_rqs_busy
15579  *
15580  * Description: Indicate that the request sense bp for this instance is
15581  *		in use.
15582  *
15583  *     Context: May be called under interrupt context
15584  */
15585 
15586 static void
15587 sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
15588 {
15589 	struct sd_xbuf	*sense_xp;
15590 
15591 	ASSERT(un != NULL);
15592 	ASSERT(bp != NULL);
15593 	ASSERT(mutex_owned(SD_MUTEX(un)));
15594 	ASSERT(un->un_sense_isbusy == 0);
15595 
15596 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
15597 	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
15598 
15599 	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
15600 	ASSERT(sense_xp != NULL);
15601 
15602 	SD_INFO(SD_LOG_IO, un,
15603 	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
15604 
15605 	ASSERT(sense_xp->xb_pktp != NULL);
15606 	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
15607 	    == (FLAG_SENSING | FLAG_HEAD));
15608 
15609 	un->un_sense_isbusy = 1;
15610 	un->un_rqs_bp->b_resid = 0;
15611 	sense_xp->xb_pktp->pkt_resid  = 0;
15612 	sense_xp->xb_pktp->pkt_reason = 0;
15613 
15614 	/* So we can get back the bp at interrupt time! */
15615 	sense_xp->xb_sense_bp = bp;
15616 
15617 	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
15618 
15619 	/*
15620 	 * Mark this buf as awaiting sense data. (This is already set in
15621 	 * the pkt_flags for the RQS packet.)
15622 	 */
15623 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
15624 
15625 	sense_xp->xb_retry_count	= 0;
15626 	sense_xp->xb_victim_retry_count = 0;
15627 	sense_xp->xb_ua_retry_count	= 0;
15628 	sense_xp->xb_dma_resid  = 0;
15629 
15630 	/* Clean up the fields for auto-request sense */
15631 	sense_xp->xb_sense_status = 0;
15632 	sense_xp->xb_sense_state  = 0;
15633 	sense_xp->xb_sense_resid  = 0;
15634 	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
15635 
15636 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
15637 }
15638 
15639 
15640 /*
15641  *    Function: sd_mark_rqs_idle
15642  *
15643  * Description: SD_MUTEX must be held continuously through this routine
15644  *		to prevent reuse of the rqs struct before the caller can
15645  *		complete it's processing.
15646  *
15647  * Return Code: Pointer to the RQS buf
15648  *
15649  *     Context: May be called under interrupt context
15650  */
15651 
15652 static struct buf *
15653 sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
15654 {
15655 	struct buf *bp;
15656 	ASSERT(un != NULL);
15657 	ASSERT(sense_xp != NULL);
15658 	ASSERT(mutex_owned(SD_MUTEX(un)));
15659 	ASSERT(un->un_sense_isbusy != 0);
15660 
15661 	un->un_sense_isbusy = 0;
15662 	bp = sense_xp->xb_sense_bp;
15663 	sense_xp->xb_sense_bp = NULL;
15664 
15665 	/* This pkt is no longer interested in getting sense data */
15666 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
15667 
15668 	return (bp);
15669 }
15670 
15671 
15672 
15673 /*
15674  *    Function: sd_alloc_rqs
15675  *
15676  * Description: Set up the unit to receive auto request sense data
15677  *
15678  * Return Code: DDI_SUCCESS or DDI_FAILURE
15679  *
15680  *     Context: Called under attach(9E) context
15681  */
15682 
15683 static int
15684 sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
15685 {
15686 	struct sd_xbuf *xp;
15687 
15688 	ASSERT(un != NULL);
15689 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15690 	ASSERT(un->un_rqs_bp == NULL);
15691 	ASSERT(un->un_rqs_pktp == NULL);
15692 
15693 	/*
15694 	 * First allocate the required buf and scsi_pkt structs, then set up
15695 	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
15696 	 */
15697 	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
15698 	    SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
15699 	if (un->un_rqs_bp == NULL) {
15700 		return (DDI_FAILURE);
15701 	}
15702 
15703 	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
15704 	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
15705 
15706 	if (un->un_rqs_pktp == NULL) {
15707 		sd_free_rqs(un);
15708 		return (DDI_FAILURE);
15709 	}
15710 
15711 	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
15712 	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
15713 	    SCMD_REQUEST_SENSE, 0, SENSE_LENGTH, 0);
15714 
15715 	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
15716 
15717 	/* Set up the other needed members in the ARQ scsi_pkt. */
15718 	un->un_rqs_pktp->pkt_comp   = sdintr;
15719 	un->un_rqs_pktp->pkt_time   = sd_io_time;
15720 	un->un_rqs_pktp->pkt_flags |=
15721 	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
15722 
15723 	/*
15724 	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
15725 	 * provide any intpkt, destroypkt routines as we take care of
15726 	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
15727 	 */
15728 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
15729 	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
15730 	xp->xb_pktp = un->un_rqs_pktp;
15731 	SD_INFO(SD_LOG_ATTACH_DETACH, un,
15732 	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
15733 	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
15734 
15735 	/*
15736 	 * Save the pointer to the request sense private bp so it can
15737 	 * be retrieved in sdintr.
15738 	 */
15739 	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
15740 	ASSERT(un->un_rqs_bp->b_private == xp);
15741 
15742 	/*
15743 	 * See if the HBA supports auto-request sense for the specified
15744 	 * target/lun. If it does, then try to enable it (if not already
15745 	 * enabled).
15746 	 *
15747 	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
15748 	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
15749 	 * return success.  However, in both of these cases ARQ is always
15750 	 * enabled and scsi_ifgetcap will always return true. The best approach
15751 	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
15752 	 *
15753 	 * The 3rd case is the HBA (adp) always return enabled on
15754 	 * scsi_ifgetgetcap even when it's not enable, the best approach
15755 	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
15756 	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
15757 	 */
15758 
15759 	if (un->un_f_is_fibre == TRUE) {
15760 		un->un_f_arq_enabled = TRUE;
15761 	} else {
15762 #if defined(__i386) || defined(__amd64)
15763 		/*
15764 		 * Circumvent the Adaptec bug, remove this code when
15765 		 * the bug is fixed
15766 		 */
15767 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
15768 #endif
15769 		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
15770 		case 0:
15771 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15772 				"sd_alloc_rqs: HBA supports ARQ\n");
15773 			/*
15774 			 * ARQ is supported by this HBA but currently is not
15775 			 * enabled. Attempt to enable it and if successful then
15776 			 * mark this instance as ARQ enabled.
15777 			 */
15778 			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
15779 				== 1) {
15780 				/* Successfully enabled ARQ in the HBA */
15781 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
15782 					"sd_alloc_rqs: ARQ enabled\n");
15783 				un->un_f_arq_enabled = TRUE;
15784 			} else {
15785 				/* Could not enable ARQ in the HBA */
15786 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
15787 				"sd_alloc_rqs: failed ARQ enable\n");
15788 				un->un_f_arq_enabled = FALSE;
15789 			}
15790 			break;
15791 		case 1:
15792 			/*
15793 			 * ARQ is supported by this HBA and is already enabled.
15794 			 * Just mark ARQ as enabled for this instance.
15795 			 */
15796 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15797 				"sd_alloc_rqs: ARQ already enabled\n");
15798 			un->un_f_arq_enabled = TRUE;
15799 			break;
15800 		default:
15801 			/*
15802 			 * ARQ is not supported by this HBA; disable it for this
15803 			 * instance.
15804 			 */
15805 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
15806 				"sd_alloc_rqs: HBA does not support ARQ\n");
15807 			un->un_f_arq_enabled = FALSE;
15808 			break;
15809 		}
15810 	}
15811 
15812 	return (DDI_SUCCESS);
15813 }
15814 
15815 
15816 /*
15817  *    Function: sd_free_rqs
15818  *
15819  * Description: Cleanup for the pre-instance RQS command.
15820  *
15821  *     Context: Kernel thread context
15822  */
15823 
15824 static void
15825 sd_free_rqs(struct sd_lun *un)
15826 {
15827 	ASSERT(un != NULL);
15828 
15829 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
15830 
15831 	/*
15832 	 * If consistent memory is bound to a scsi_pkt, the pkt
15833 	 * has to be destroyed *before* freeing the consistent memory.
15834 	 * Don't change the sequence of this operations.
15835 	 * scsi_destroy_pkt() might access memory, which isn't allowed,
15836 	 * after it was freed in scsi_free_consistent_buf().
15837 	 */
15838 	if (un->un_rqs_pktp != NULL) {
15839 		scsi_destroy_pkt(un->un_rqs_pktp);
15840 		un->un_rqs_pktp = NULL;
15841 	}
15842 
15843 	if (un->un_rqs_bp != NULL) {
15844 		kmem_free(SD_GET_XBUF(un->un_rqs_bp), sizeof (struct sd_xbuf));
15845 		scsi_free_consistent_buf(un->un_rqs_bp);
15846 		un->un_rqs_bp = NULL;
15847 	}
15848 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
15849 }
15850 
15851 
15852 
15853 /*
15854  *    Function: sd_reduce_throttle
15855  *
15856  * Description: Reduces the maximun # of outstanding commands on a
15857  *		target to the current number of outstanding commands.
15858  *		Queues a tiemout(9F) callback to restore the limit
15859  *		after a specified interval has elapsed.
15860  *		Typically used when we get a TRAN_BUSY return code
15861  *		back from scsi_transport().
15862  *
15863  *   Arguments: un - ptr to the sd_lun softstate struct
15864  *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
15865  *
15866  *     Context: May be called from interrupt context
15867  */
15868 
15869 static void
15870 sd_reduce_throttle(struct sd_lun *un, int throttle_type)
15871 {
15872 	ASSERT(un != NULL);
15873 	ASSERT(mutex_owned(SD_MUTEX(un)));
15874 	ASSERT(un->un_ncmds_in_transport >= 0);
15875 
15876 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
15877 	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
15878 	    un, un->un_throttle, un->un_ncmds_in_transport);
15879 
15880 	if (un->un_throttle > 1) {
15881 		if (un->un_f_use_adaptive_throttle == TRUE) {
15882 			switch (throttle_type) {
15883 			case SD_THROTTLE_TRAN_BUSY:
15884 				if (un->un_busy_throttle == 0) {
15885 					un->un_busy_throttle = un->un_throttle;
15886 				}
15887 				break;
15888 			case SD_THROTTLE_QFULL:
15889 				un->un_busy_throttle = 0;
15890 				break;
15891 			default:
15892 				ASSERT(FALSE);
15893 			}
15894 
15895 			if (un->un_ncmds_in_transport > 0) {
15896 			    un->un_throttle = un->un_ncmds_in_transport;
15897 			}
15898 
15899 		} else {
15900 			if (un->un_ncmds_in_transport == 0) {
15901 				un->un_throttle = 1;
15902 			} else {
15903 				un->un_throttle = un->un_ncmds_in_transport;
15904 			}
15905 		}
15906 	}
15907 
15908 	/* Reschedule the timeout if none is currently active */
15909 	if (un->un_reset_throttle_timeid == NULL) {
15910 		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
15911 		    un, SD_THROTTLE_RESET_INTERVAL);
15912 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15913 		    "sd_reduce_throttle: timeout scheduled!\n");
15914 	}
15915 
15916 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
15917 	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
15918 }
15919 
15920 
15921 
15922 /*
15923  *    Function: sd_restore_throttle
15924  *
15925  * Description: Callback function for timeout(9F).  Resets the current
15926  *		value of un->un_throttle to its default.
15927  *
15928  *   Arguments: arg - pointer to associated softstate for the device.
15929  *
15930  *     Context: May be called from interrupt context
15931  */
15932 
15933 static void
15934 sd_restore_throttle(void *arg)
15935 {
15936 	struct sd_lun	*un = arg;
15937 
15938 	ASSERT(un != NULL);
15939 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15940 
15941 	mutex_enter(SD_MUTEX(un));
15942 
15943 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
15944 	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
15945 
15946 	un->un_reset_throttle_timeid = NULL;
15947 
15948 	if (un->un_f_use_adaptive_throttle == TRUE) {
15949 		/*
15950 		 * If un_busy_throttle is nonzero, then it contains the
15951 		 * value that un_throttle was when we got a TRAN_BUSY back
15952 		 * from scsi_transport(). We want to revert back to this
15953 		 * value.
15954 		 *
15955 		 * In the QFULL case, the throttle limit will incrementally
15956 		 * increase until it reaches max throttle.
15957 		 */
15958 		if (un->un_busy_throttle > 0) {
15959 			un->un_throttle = un->un_busy_throttle;
15960 			un->un_busy_throttle = 0;
15961 		} else {
15962 			/*
15963 			 * increase throttle by 10% open gate slowly, schedule
15964 			 * another restore if saved throttle has not been
15965 			 * reached
15966 			 */
15967 			short throttle;
15968 			if (sd_qfull_throttle_enable) {
15969 				throttle = un->un_throttle +
15970 				    max((un->un_throttle / 10), 1);
15971 				un->un_throttle =
15972 				    (throttle < un->un_saved_throttle) ?
15973 				    throttle : un->un_saved_throttle;
15974 				if (un->un_throttle < un->un_saved_throttle) {
15975 				    un->un_reset_throttle_timeid =
15976 					timeout(sd_restore_throttle,
15977 					un, SD_QFULL_THROTTLE_RESET_INTERVAL);
15978 				}
15979 			}
15980 		}
15981 
15982 		/*
15983 		 * If un_throttle has fallen below the low-water mark, we
15984 		 * restore the maximum value here (and allow it to ratchet
15985 		 * down again if necessary).
15986 		 */
15987 		if (un->un_throttle < un->un_min_throttle) {
15988 			un->un_throttle = un->un_saved_throttle;
15989 		}
15990 	} else {
15991 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
15992 		    "restoring limit from 0x%x to 0x%x\n",
15993 		    un->un_throttle, un->un_saved_throttle);
15994 		un->un_throttle = un->un_saved_throttle;
15995 	}
15996 
15997 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15998 	    "sd_restore_throttle: calling sd_start_cmds!\n");
15999 
16000 	sd_start_cmds(un, NULL);
16001 
16002 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16003 	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
16004 	    un, un->un_throttle);
16005 
16006 	mutex_exit(SD_MUTEX(un));
16007 
16008 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
16009 }
16010 
16011 /*
16012  *    Function: sdrunout
16013  *
16014  * Description: Callback routine for scsi_init_pkt when a resource allocation
16015  *		fails.
16016  *
16017  *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
16018  *		soft state instance.
16019  *
16020  * Return Code: The scsi_init_pkt routine allows for the callback function to
16021  *		return a 0 indicating the callback should be rescheduled or a 1
16022  *		indicating not to reschedule. This routine always returns 1
16023  *		because the driver always provides a callback function to
16024  *		scsi_init_pkt. This results in a callback always being scheduled
16025  *		(via the scsi_init_pkt callback implementation) if a resource
16026  *		failure occurs.
16027  *
16028  *     Context: This callback function may not block or call routines that block
16029  *
16030  *        Note: Using the scsi_init_pkt callback facility can result in an I/O
16031  *		request persisting at the head of the list which cannot be
16032  *		satisfied even after multiple retries. In the future the driver
16033  *		may implement some time of maximum runout count before failing
16034  *		an I/O.
16035  */
16036 
16037 static int
16038 sdrunout(caddr_t arg)
16039 {
16040 	struct sd_lun	*un = (struct sd_lun *)arg;
16041 
16042 	ASSERT(un != NULL);
16043 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16044 
16045 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
16046 
16047 	mutex_enter(SD_MUTEX(un));
16048 	sd_start_cmds(un, NULL);
16049 	mutex_exit(SD_MUTEX(un));
16050 	/*
16051 	 * This callback routine always returns 1 (i.e. do not reschedule)
16052 	 * because we always specify sdrunout as the callback handler for
16053 	 * scsi_init_pkt inside the call to sd_start_cmds.
16054 	 */
16055 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
16056 	return (1);
16057 }
16058 
16059 
16060 /*
16061  *    Function: sdintr
16062  *
16063  * Description: Completion callback routine for scsi_pkt(9S) structs
16064  *		sent to the HBA driver via scsi_transport(9F).
16065  *
16066  *     Context: Interrupt context
16067  */
16068 
16069 static void
16070 sdintr(struct scsi_pkt *pktp)
16071 {
16072 	struct buf	*bp;
16073 	struct sd_xbuf	*xp;
16074 	struct sd_lun	*un;
16075 
16076 	ASSERT(pktp != NULL);
16077 	bp = (struct buf *)pktp->pkt_private;
16078 	ASSERT(bp != NULL);
16079 	xp = SD_GET_XBUF(bp);
16080 	ASSERT(xp != NULL);
16081 	ASSERT(xp->xb_pktp != NULL);
16082 	un = SD_GET_UN(bp);
16083 	ASSERT(un != NULL);
16084 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16085 
16086 #ifdef SD_FAULT_INJECTION
16087 
16088 	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
16089 	/* SD FaultInjection */
16090 	sd_faultinjection(pktp);
16091 
16092 #endif /* SD_FAULT_INJECTION */
16093 
16094 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
16095 	    " xp:0x%p, un:0x%p\n", bp, xp, un);
16096 
16097 	mutex_enter(SD_MUTEX(un));
16098 
16099 	/* Reduce the count of the #commands currently in transport */
16100 	un->un_ncmds_in_transport--;
16101 	ASSERT(un->un_ncmds_in_transport >= 0);
16102 
16103 	/* Increment counter to indicate that the callback routine is active */
16104 	un->un_in_callback++;
16105 
16106 	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
16107 
16108 #ifdef	SDDEBUG
16109 	if (bp == un->un_retry_bp) {
16110 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
16111 		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
16112 		    un, un->un_retry_bp, un->un_ncmds_in_transport);
16113 	}
16114 #endif
16115 
16116 	/*
16117 	 * If pkt_reason is CMD_DEV_GONE, just fail the command
16118 	 */
16119 	if (pktp->pkt_reason == CMD_DEV_GONE) {
16120 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16121 			    "Device is gone\n");
16122 		sd_return_failed_command(un, bp, EIO);
16123 		goto exit;
16124 	}
16125 
16126 	/*
16127 	 * First see if the pkt has auto-request sense data with it....
16128 	 * Look at the packet state first so we don't take a performance
16129 	 * hit looking at the arq enabled flag unless absolutely necessary.
16130 	 */
16131 	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
16132 	    (un->un_f_arq_enabled == TRUE)) {
16133 		/*
16134 		 * The HBA did an auto request sense for this command so check
16135 		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16136 		 * driver command that should not be retried.
16137 		 */
16138 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16139 			/*
16140 			 * Save the relevant sense info into the xp for the
16141 			 * original cmd.
16142 			 */
16143 			struct scsi_arq_status *asp;
16144 			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16145 			xp->xb_sense_status =
16146 			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
16147 			xp->xb_sense_state  = asp->sts_rqpkt_state;
16148 			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16149 			bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16150 			    min(sizeof (struct scsi_extended_sense),
16151 			    SENSE_LENGTH));
16152 
16153 			/* fail the command */
16154 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16155 			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
16156 			sd_return_failed_command(un, bp, EIO);
16157 			goto exit;
16158 		}
16159 
16160 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16161 		/*
16162 		 * We want to either retry or fail this command, so free
16163 		 * the DMA resources here.  If we retry the command then
16164 		 * the DMA resources will be reallocated in sd_start_cmds().
16165 		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
16166 		 * causes the *entire* transfer to start over again from the
16167 		 * beginning of the request, even for PARTIAL chunks that
16168 		 * have already transferred successfully.
16169 		 */
16170 		if ((un->un_f_is_fibre == TRUE) &&
16171 		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16172 		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16173 			scsi_dmafree(pktp);
16174 			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16175 		}
16176 #endif
16177 
16178 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16179 		    "sdintr: arq done, sd_handle_auto_request_sense\n");
16180 
16181 		sd_handle_auto_request_sense(un, bp, xp, pktp);
16182 		goto exit;
16183 	}
16184 
16185 	/* Next see if this is the REQUEST SENSE pkt for the instance */
16186 	if (pktp->pkt_flags & FLAG_SENSING)  {
16187 		/* This pktp is from the unit's REQUEST_SENSE command */
16188 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16189 		    "sdintr: sd_handle_request_sense\n");
16190 		sd_handle_request_sense(un, bp, xp, pktp);
16191 		goto exit;
16192 	}
16193 
16194 	/*
16195 	 * Check to see if the command successfully completed as requested;
16196 	 * this is the most common case (and also the hot performance path).
16197 	 *
16198 	 * Requirements for successful completion are:
16199 	 * pkt_reason is CMD_CMPLT and packet status is status good.
16200 	 * In addition:
16201 	 * - A residual of zero indicates successful completion no matter what
16202 	 *   the command is.
16203 	 * - If the residual is not zero and the command is not a read or
16204 	 *   write, then it's still defined as successful completion. In other
16205 	 *   words, if the command is a read or write the residual must be
16206 	 *   zero for successful completion.
16207 	 * - If the residual is not zero and the command is a read or
16208 	 *   write, and it's a USCSICMD, then it's still defined as
16209 	 *   successful completion.
16210 	 */
16211 	if ((pktp->pkt_reason == CMD_CMPLT) &&
16212 	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
16213 
16214 		/*
16215 		 * Since this command is returned with a good status, we
16216 		 * can reset the count for Sonoma failover.
16217 		 */
16218 		un->un_sonoma_failure_count = 0;
16219 
16220 		/*
16221 		 * Return all USCSI commands on good status
16222 		 */
16223 		if (pktp->pkt_resid == 0) {
16224 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16225 			    "sdintr: returning command for resid == 0\n");
16226 		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
16227 		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
16228 			SD_UPDATE_B_RESID(bp, pktp);
16229 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16230 			    "sdintr: returning command for resid != 0\n");
16231 		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16232 			SD_UPDATE_B_RESID(bp, pktp);
16233 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16234 				"sdintr: returning uscsi command\n");
16235 		} else {
16236 			goto not_successful;
16237 		}
16238 		sd_return_command(un, bp);
16239 
16240 		/*
16241 		 * Decrement counter to indicate that the callback routine
16242 		 * is done.
16243 		 */
16244 		un->un_in_callback--;
16245 		ASSERT(un->un_in_callback >= 0);
16246 		mutex_exit(SD_MUTEX(un));
16247 
16248 		return;
16249 	}
16250 
16251 not_successful:
16252 
16253 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16254 	/*
16255 	 * The following is based upon knowledge of the underlying transport
16256 	 * and its use of DMA resources.  This code should be removed when
16257 	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
16258 	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
16259 	 * and sd_start_cmds().
16260 	 *
16261 	 * Free any DMA resources associated with this command if there
16262 	 * is a chance it could be retried or enqueued for later retry.
16263 	 * If we keep the DMA binding then mpxio cannot reissue the
16264 	 * command on another path whenever a path failure occurs.
16265 	 *
16266 	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
16267 	 * causes the *entire* transfer to start over again from the
16268 	 * beginning of the request, even for PARTIAL chunks that
16269 	 * have already transferred successfully.
16270 	 *
16271 	 * This is only done for non-uscsi commands (and also skipped for the
16272 	 * driver's internal RQS command). Also just do this for Fibre Channel
16273 	 * devices as these are the only ones that support mpxio.
16274 	 */
16275 	if ((un->un_f_is_fibre == TRUE) &&
16276 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16277 	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16278 		scsi_dmafree(pktp);
16279 		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16280 	}
16281 #endif
16282 
16283 	/*
16284 	 * The command did not successfully complete as requested so check
16285 	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16286 	 * driver command that should not be retried so just return. If
16287 	 * FLAG_DIAGNOSE is not set the error will be processed below.
16288 	 */
16289 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16290 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16291 		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
16292 		/*
16293 		 * Issue a request sense if a check condition caused the error
16294 		 * (we handle the auto request sense case above), otherwise
16295 		 * just fail the command.
16296 		 */
16297 		if ((pktp->pkt_reason == CMD_CMPLT) &&
16298 		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
16299 			sd_send_request_sense_command(un, bp, pktp);
16300 		} else {
16301 			sd_return_failed_command(un, bp, EIO);
16302 		}
16303 		goto exit;
16304 	}
16305 
16306 	/*
16307 	 * The command did not successfully complete as requested so process
16308 	 * the error, retry, and/or attempt recovery.
16309 	 */
16310 	switch (pktp->pkt_reason) {
16311 	case CMD_CMPLT:
16312 		switch (SD_GET_PKT_STATUS(pktp)) {
16313 		case STATUS_GOOD:
16314 			/*
16315 			 * The command completed successfully with a non-zero
16316 			 * residual
16317 			 */
16318 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16319 			    "sdintr: STATUS_GOOD \n");
16320 			sd_pkt_status_good(un, bp, xp, pktp);
16321 			break;
16322 
16323 		case STATUS_CHECK:
16324 		case STATUS_TERMINATED:
16325 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16326 			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
16327 			sd_pkt_status_check_condition(un, bp, xp, pktp);
16328 			break;
16329 
16330 		case STATUS_BUSY:
16331 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16332 			    "sdintr: STATUS_BUSY\n");
16333 			sd_pkt_status_busy(un, bp, xp, pktp);
16334 			break;
16335 
16336 		case STATUS_RESERVATION_CONFLICT:
16337 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16338 			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
16339 			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16340 			break;
16341 
16342 		case STATUS_QFULL:
16343 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16344 			    "sdintr: STATUS_QFULL\n");
16345 			sd_pkt_status_qfull(un, bp, xp, pktp);
16346 			break;
16347 
16348 		case STATUS_MET:
16349 		case STATUS_INTERMEDIATE:
16350 		case STATUS_SCSI2:
16351 		case STATUS_INTERMEDIATE_MET:
16352 		case STATUS_ACA_ACTIVE:
16353 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16354 			    "Unexpected SCSI status received: 0x%x\n",
16355 			    SD_GET_PKT_STATUS(pktp));
16356 			sd_return_failed_command(un, bp, EIO);
16357 			break;
16358 
16359 		default:
16360 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
16361 			    "Invalid SCSI status received: 0x%x\n",
16362 			    SD_GET_PKT_STATUS(pktp));
16363 			sd_return_failed_command(un, bp, EIO);
16364 			break;
16365 
16366 		}
16367 		break;
16368 
16369 	case CMD_INCOMPLETE:
16370 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16371 		    "sdintr:  CMD_INCOMPLETE\n");
16372 		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
16373 		break;
16374 	case CMD_TRAN_ERR:
16375 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16376 		    "sdintr: CMD_TRAN_ERR\n");
16377 		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
16378 		break;
16379 	case CMD_RESET:
16380 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16381 		    "sdintr: CMD_RESET \n");
16382 		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
16383 		break;
16384 	case CMD_ABORTED:
16385 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16386 		    "sdintr: CMD_ABORTED \n");
16387 		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
16388 		break;
16389 	case CMD_TIMEOUT:
16390 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16391 		    "sdintr: CMD_TIMEOUT\n");
16392 		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
16393 		break;
16394 	case CMD_UNX_BUS_FREE:
16395 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16396 		    "sdintr: CMD_UNX_BUS_FREE \n");
16397 		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
16398 		break;
16399 	case CMD_TAG_REJECT:
16400 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16401 		    "sdintr: CMD_TAG_REJECT\n");
16402 		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
16403 		break;
16404 	default:
16405 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16406 		    "sdintr: default\n");
16407 		sd_pkt_reason_default(un, bp, xp, pktp);
16408 		break;
16409 	}
16410 
16411 exit:
16412 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
16413 
16414 	/* Decrement counter to indicate that the callback routine is done. */
16415 	un->un_in_callback--;
16416 	ASSERT(un->un_in_callback >= 0);
16417 
16418 	/*
16419 	 * At this point, the pkt has been dispatched, ie, it is either
16420 	 * being re-tried or has been returned to its caller and should
16421 	 * not be referenced.
16422 	 */
16423 
16424 	mutex_exit(SD_MUTEX(un));
16425 }
16426 
16427 
16428 /*
16429  *    Function: sd_print_incomplete_msg
16430  *
16431  * Description: Prints the error message for a CMD_INCOMPLETE error.
16432  *
16433  *   Arguments: un - ptr to associated softstate for the device.
16434  *		bp - ptr to the buf(9S) for the command.
16435  *		arg - message string ptr
16436  *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
16437  *			or SD_NO_RETRY_ISSUED.
16438  *
16439  *     Context: May be called under interrupt context
16440  */
16441 
16442 static void
16443 sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
16444 {
16445 	struct scsi_pkt	*pktp;
16446 	char	*msgp;
16447 	char	*cmdp = arg;
16448 
16449 	ASSERT(un != NULL);
16450 	ASSERT(mutex_owned(SD_MUTEX(un)));
16451 	ASSERT(bp != NULL);
16452 	ASSERT(arg != NULL);
16453 	pktp = SD_GET_PKTP(bp);
16454 	ASSERT(pktp != NULL);
16455 
16456 	switch (code) {
16457 	case SD_DELAYED_RETRY_ISSUED:
16458 	case SD_IMMEDIATE_RETRY_ISSUED:
16459 		msgp = "retrying";
16460 		break;
16461 	case SD_NO_RETRY_ISSUED:
16462 	default:
16463 		msgp = "giving up";
16464 		break;
16465 	}
16466 
16467 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16468 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16469 		    "incomplete %s- %s\n", cmdp, msgp);
16470 	}
16471 }
16472 
16473 
16474 
16475 /*
16476  *    Function: sd_pkt_status_good
16477  *
16478  * Description: Processing for a STATUS_GOOD code in pkt_status.
16479  *
16480  *     Context: May be called under interrupt context
16481  */
16482 
16483 static void
16484 sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
16485 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16486 {
16487 	char	*cmdp;
16488 
16489 	ASSERT(un != NULL);
16490 	ASSERT(mutex_owned(SD_MUTEX(un)));
16491 	ASSERT(bp != NULL);
16492 	ASSERT(xp != NULL);
16493 	ASSERT(pktp != NULL);
16494 	ASSERT(pktp->pkt_reason == CMD_CMPLT);
16495 	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
16496 	ASSERT(pktp->pkt_resid != 0);
16497 
16498 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
16499 
16500 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
16501 	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
16502 	case SCMD_READ:
16503 		cmdp = "read";
16504 		break;
16505 	case SCMD_WRITE:
16506 		cmdp = "write";
16507 		break;
16508 	default:
16509 		SD_UPDATE_B_RESID(bp, pktp);
16510 		sd_return_command(un, bp);
16511 		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16512 		return;
16513 	}
16514 
16515 	/*
16516 	 * See if we can retry the read/write, preferrably immediately.
16517 	 * If retries are exhaused, then sd_retry_command() will update
16518 	 * the b_resid count.
16519 	 */
16520 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
16521 	    cmdp, EIO, (clock_t)0, NULL);
16522 
16523 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
16524 }
16525 
16526 
16527 
16528 
16529 
16530 /*
16531  *    Function: sd_handle_request_sense
16532  *
16533  * Description: Processing for non-auto Request Sense command.
16534  *
16535  *   Arguments: un - ptr to associated softstate
16536  *		sense_bp - ptr to buf(9S) for the RQS command
16537  *		sense_xp - ptr to the sd_xbuf for the RQS command
16538  *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
16539  *
16540  *     Context: May be called under interrupt context
16541  */
16542 
16543 static void
16544 sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
16545 	struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
16546 {
16547 	struct buf	*cmd_bp;	/* buf for the original command */
16548 	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
16549 	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
16550 
16551 	ASSERT(un != NULL);
16552 	ASSERT(mutex_owned(SD_MUTEX(un)));
16553 	ASSERT(sense_bp != NULL);
16554 	ASSERT(sense_xp != NULL);
16555 	ASSERT(sense_pktp != NULL);
16556 
16557 	/*
16558 	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
16559 	 * RQS command and not the original command.
16560 	 */
16561 	ASSERT(sense_pktp == un->un_rqs_pktp);
16562 	ASSERT(sense_bp   == un->un_rqs_bp);
16563 	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
16564 	    (FLAG_SENSING | FLAG_HEAD));
16565 	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
16566 	    FLAG_SENSING) == FLAG_SENSING);
16567 
16568 	/* These are the bp, xp, and pktp for the original command */
16569 	cmd_bp = sense_xp->xb_sense_bp;
16570 	cmd_xp = SD_GET_XBUF(cmd_bp);
16571 	cmd_pktp = SD_GET_PKTP(cmd_bp);
16572 
16573 	if (sense_pktp->pkt_reason != CMD_CMPLT) {
16574 		/*
16575 		 * The REQUEST SENSE command failed.  Release the REQUEST
16576 		 * SENSE command for re-use, get back the bp for the original
16577 		 * command, and attempt to re-try the original command if
16578 		 * FLAG_DIAGNOSE is not set in the original packet.
16579 		 */
16580 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
16581 		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16582 			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
16583 			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
16584 			    NULL, NULL, EIO, (clock_t)0, NULL);
16585 			return;
16586 		}
16587 	}
16588 
16589 	/*
16590 	 * Save the relevant sense info into the xp for the original cmd.
16591 	 *
16592 	 * Note: if the request sense failed the state info will be zero
16593 	 * as set in sd_mark_rqs_busy()
16594 	 */
16595 	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
16596 	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
16597 	cmd_xp->xb_sense_resid  = sense_pktp->pkt_resid;
16598 	bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data, SENSE_LENGTH);
16599 
16600 	/*
16601 	 *  Free up the RQS command....
16602 	 *  NOTE:
16603 	 *	Must do this BEFORE calling sd_validate_sense_data!
16604 	 *	sd_validate_sense_data may return the original command in
16605 	 *	which case the pkt will be freed and the flags can no
16606 	 *	longer be touched.
16607 	 *	SD_MUTEX is held through this process until the command
16608 	 *	is dispatched based upon the sense data, so there are
16609 	 *	no race conditions.
16610 	 */
16611 	(void) sd_mark_rqs_idle(un, sense_xp);
16612 
16613 	/*
16614 	 * For a retryable command see if we have valid sense data, if so then
16615 	 * turn it over to sd_decode_sense() to figure out the right course of
16616 	 * action. Just fail a non-retryable command.
16617 	 */
16618 	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16619 		if (sd_validate_sense_data(un, cmd_bp, cmd_xp) ==
16620 		    SD_SENSE_DATA_IS_VALID) {
16621 			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
16622 		}
16623 	} else {
16624 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
16625 		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
16626 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
16627 		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
16628 		sd_return_failed_command(un, cmd_bp, EIO);
16629 	}
16630 }
16631 
16632 
16633 
16634 
16635 /*
16636  *    Function: sd_handle_auto_request_sense
16637  *
16638  * Description: Processing for auto-request sense information.
16639  *
16640  *   Arguments: un - ptr to associated softstate
16641  *		bp - ptr to buf(9S) for the command
16642  *		xp - ptr to the sd_xbuf for the command
16643  *		pktp - ptr to the scsi_pkt(9S) for the command
16644  *
16645  *     Context: May be called under interrupt context
16646  */
16647 
16648 static void
16649 sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
16650 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
16651 {
16652 	struct scsi_arq_status *asp;
16653 
16654 	ASSERT(un != NULL);
16655 	ASSERT(mutex_owned(SD_MUTEX(un)));
16656 	ASSERT(bp != NULL);
16657 	ASSERT(xp != NULL);
16658 	ASSERT(pktp != NULL);
16659 	ASSERT(pktp != un->un_rqs_pktp);
16660 	ASSERT(bp   != un->un_rqs_bp);
16661 
16662 	/*
16663 	 * For auto-request sense, we get a scsi_arq_status back from
16664 	 * the HBA, with the sense data in the sts_sensedata member.
16665 	 * The pkt_scbp of the packet points to this scsi_arq_status.
16666 	 */
16667 	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16668 
16669 	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
16670 		/*
16671 		 * The auto REQUEST SENSE failed; see if we can re-try
16672 		 * the original command.
16673 		 */
16674 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16675 		    "auto request sense failed (reason=%s)\n",
16676 		    scsi_rname(asp->sts_rqpkt_reason));
16677 
16678 		sd_reset_target(un, pktp);
16679 
16680 		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16681 		    NULL, NULL, EIO, (clock_t)0, NULL);
16682 		return;
16683 	}
16684 
16685 	/* Save the relevant sense info into the xp for the original cmd. */
16686 	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
16687 	xp->xb_sense_state  = asp->sts_rqpkt_state;
16688 	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16689 	bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16690 	    min(sizeof (struct scsi_extended_sense), SENSE_LENGTH));
16691 
16692 	/*
16693 	 * See if we have valid sense data, if so then turn it over to
16694 	 * sd_decode_sense() to figure out the right course of action.
16695 	 */
16696 	if (sd_validate_sense_data(un, bp, xp) == SD_SENSE_DATA_IS_VALID) {
16697 		sd_decode_sense(un, bp, xp, pktp);
16698 	}
16699 }
16700 
16701 
16702 /*
16703  *    Function: sd_print_sense_failed_msg
16704  *
16705  * Description: Print log message when RQS has failed.
16706  *
16707  *   Arguments: un - ptr to associated softstate
16708  *		bp - ptr to buf(9S) for the command
16709  *		arg - generic message string ptr
16710  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
16711  *			or SD_NO_RETRY_ISSUED
16712  *
16713  *     Context: May be called from interrupt context
16714  */
16715 
16716 static void
16717 sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
16718 	int code)
16719 {
16720 	char	*msgp = arg;
16721 
16722 	ASSERT(un != NULL);
16723 	ASSERT(mutex_owned(SD_MUTEX(un)));
16724 	ASSERT(bp != NULL);
16725 
16726 	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
16727 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
16728 	}
16729 }
16730 
16731 
16732 /*
16733  *    Function: sd_validate_sense_data
16734  *
16735  * Description: Check the given sense data for validity.
16736  *		If the sense data is not valid, the command will
16737  *		be either failed or retried!
16738  *
16739  * Return Code: SD_SENSE_DATA_IS_INVALID
16740  *		SD_SENSE_DATA_IS_VALID
16741  *
16742  *     Context: May be called from interrupt context
16743  */
16744 
16745 static int
16746 sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp)
16747 {
16748 	struct scsi_extended_sense *esp;
16749 	struct	scsi_pkt *pktp;
16750 	size_t	actual_len;
16751 	char	*msgp = NULL;
16752 
16753 	ASSERT(un != NULL);
16754 	ASSERT(mutex_owned(SD_MUTEX(un)));
16755 	ASSERT(bp != NULL);
16756 	ASSERT(bp != un->un_rqs_bp);
16757 	ASSERT(xp != NULL);
16758 
16759 	pktp = SD_GET_PKTP(bp);
16760 	ASSERT(pktp != NULL);
16761 
16762 	/*
16763 	 * Check the status of the RQS command (auto or manual).
16764 	 */
16765 	switch (xp->xb_sense_status & STATUS_MASK) {
16766 	case STATUS_GOOD:
16767 		break;
16768 
16769 	case STATUS_RESERVATION_CONFLICT:
16770 		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
16771 		return (SD_SENSE_DATA_IS_INVALID);
16772 
16773 	case STATUS_BUSY:
16774 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16775 		    "Busy Status on REQUEST SENSE\n");
16776 		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
16777 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
16778 		return (SD_SENSE_DATA_IS_INVALID);
16779 
16780 	case STATUS_QFULL:
16781 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16782 		    "QFULL Status on REQUEST SENSE\n");
16783 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
16784 		    NULL, EIO, SD_BSY_TIMEOUT / 500, kstat_waitq_enter);
16785 		return (SD_SENSE_DATA_IS_INVALID);
16786 
16787 	case STATUS_CHECK:
16788 	case STATUS_TERMINATED:
16789 		msgp = "Check Condition on REQUEST SENSE\n";
16790 		goto sense_failed;
16791 
16792 	default:
16793 		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
16794 		goto sense_failed;
16795 	}
16796 
16797 	/*
16798 	 * See if we got the minimum required amount of sense data.
16799 	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
16800 	 * or less.
16801 	 */
16802 	actual_len = (int)(SENSE_LENGTH - xp->xb_sense_resid);
16803 	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
16804 	    (actual_len == 0)) {
16805 		msgp = "Request Sense couldn't get sense data\n";
16806 		goto sense_failed;
16807 	}
16808 
16809 	if (actual_len < SUN_MIN_SENSE_LENGTH) {
16810 		msgp = "Not enough sense information\n";
16811 		goto sense_failed;
16812 	}
16813 
16814 	/*
16815 	 * We require the extended sense data
16816 	 */
16817 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
16818 	if (esp->es_class != CLASS_EXTENDED_SENSE) {
16819 		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
16820 			static char tmp[8];
16821 			static char buf[148];
16822 			char *p = (char *)(xp->xb_sense_data);
16823 			int i;
16824 
16825 			mutex_enter(&sd_sense_mutex);
16826 			(void) strcpy(buf, "undecodable sense information:");
16827 			for (i = 0; i < actual_len; i++) {
16828 				(void) sprintf(tmp, " 0x%x", *(p++)&0xff);
16829 				(void) strcpy(&buf[strlen(buf)], tmp);
16830 			}
16831 			i = strlen(buf);
16832 			(void) strcpy(&buf[i], "-(assumed fatal)\n");
16833 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, buf);
16834 			mutex_exit(&sd_sense_mutex);
16835 		}
16836 		/* Note: Legacy behavior, fail the command with no retry */
16837 		sd_return_failed_command(un, bp, EIO);
16838 		return (SD_SENSE_DATA_IS_INVALID);
16839 	}
16840 
16841 	/*
16842 	 * Check that es_code is valid (es_class concatenated with es_code
16843 	 * make up the "response code" field.  es_class will always be 7, so
16844 	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
16845 	 * format.
16846 	 */
16847 	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
16848 	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
16849 	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
16850 	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
16851 	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
16852 		goto sense_failed;
16853 	}
16854 
16855 	return (SD_SENSE_DATA_IS_VALID);
16856 
16857 sense_failed:
16858 	/*
16859 	 * If the request sense failed (for whatever reason), attempt
16860 	 * to retry the original command.
16861 	 */
16862 #if defined(__i386) || defined(__amd64)
16863 	/*
16864 	 * SD_RETRY_DELAY is conditionally compile (#if fibre) in
16865 	 * sddef.h for Sparc platform, and x86 uses 1 binary
16866 	 * for both SCSI/FC.
16867 	 * The SD_RETRY_DELAY value need to be adjusted here
16868 	 * when SD_RETRY_DELAY change in sddef.h
16869 	 */
16870 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16871 	    sd_print_sense_failed_msg, msgp, EIO,
16872 		un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0, NULL);
16873 #else
16874 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
16875 	    sd_print_sense_failed_msg, msgp, EIO, SD_RETRY_DELAY, NULL);
16876 #endif
16877 
16878 	return (SD_SENSE_DATA_IS_INVALID);
16879 }
16880 
16881 
16882 
16883 /*
16884  *    Function: sd_decode_sense
16885  *
16886  * Description: Take recovery action(s) when SCSI Sense Data is received.
16887  *
16888  *     Context: Interrupt context.
16889  */
16890 
16891 static void
16892 sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
16893 	struct scsi_pkt *pktp)
16894 {
16895 	struct scsi_extended_sense *esp;
16896 	struct scsi_descr_sense_hdr *sdsp;
16897 	uint8_t asc, ascq, sense_key;
16898 
16899 	ASSERT(un != NULL);
16900 	ASSERT(mutex_owned(SD_MUTEX(un)));
16901 	ASSERT(bp != NULL);
16902 	ASSERT(bp != un->un_rqs_bp);
16903 	ASSERT(xp != NULL);
16904 	ASSERT(pktp != NULL);
16905 
16906 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
16907 
16908 	switch (esp->es_code) {
16909 	case CODE_FMT_DESCR_CURRENT:
16910 	case CODE_FMT_DESCR_DEFERRED:
16911 		sdsp = (struct scsi_descr_sense_hdr *)xp->xb_sense_data;
16912 		sense_key = sdsp->ds_key;
16913 		asc = sdsp->ds_add_code;
16914 		ascq = sdsp->ds_qual_code;
16915 		break;
16916 	case CODE_FMT_VENDOR_SPECIFIC:
16917 	case CODE_FMT_FIXED_CURRENT:
16918 	case CODE_FMT_FIXED_DEFERRED:
16919 	default:
16920 		sense_key = esp->es_key;
16921 		asc = esp->es_add_code;
16922 		ascq = esp->es_qual_code;
16923 		break;
16924 	}
16925 
16926 	switch (sense_key) {
16927 	case KEY_NO_SENSE:
16928 		sd_sense_key_no_sense(un, bp, xp, pktp);
16929 		break;
16930 	case KEY_RECOVERABLE_ERROR:
16931 		sd_sense_key_recoverable_error(un, asc, bp, xp, pktp);
16932 		break;
16933 	case KEY_NOT_READY:
16934 		sd_sense_key_not_ready(un, asc, ascq, bp, xp, pktp);
16935 		break;
16936 	case KEY_MEDIUM_ERROR:
16937 	case KEY_HARDWARE_ERROR:
16938 		sd_sense_key_medium_or_hardware_error(un,
16939 		    sense_key, asc, bp, xp, pktp);
16940 		break;
16941 	case KEY_ILLEGAL_REQUEST:
16942 		sd_sense_key_illegal_request(un, bp, xp, pktp);
16943 		break;
16944 	case KEY_UNIT_ATTENTION:
16945 		sd_sense_key_unit_attention(un, asc, bp, xp, pktp);
16946 		break;
16947 	case KEY_WRITE_PROTECT:
16948 	case KEY_VOLUME_OVERFLOW:
16949 	case KEY_MISCOMPARE:
16950 		sd_sense_key_fail_command(un, bp, xp, pktp);
16951 		break;
16952 	case KEY_BLANK_CHECK:
16953 		sd_sense_key_blank_check(un, bp, xp, pktp);
16954 		break;
16955 	case KEY_ABORTED_COMMAND:
16956 		sd_sense_key_aborted_command(un, bp, xp, pktp);
16957 		break;
16958 	case KEY_VENDOR_UNIQUE:
16959 	case KEY_COPY_ABORTED:
16960 	case KEY_EQUAL:
16961 	case KEY_RESERVED:
16962 	default:
16963 		sd_sense_key_default(un, sense_key, bp, xp, pktp);
16964 		break;
16965 	}
16966 }
16967 
16968 
16969 /*
16970  *    Function: sd_dump_memory
16971  *
16972  * Description: Debug logging routine to print the contents of a user provided
16973  *		buffer. The output of the buffer is broken up into 256 byte
16974  *		segments due to a size constraint of the scsi_log.
16975  *		implementation.
16976  *
16977  *   Arguments: un - ptr to softstate
16978  *		comp - component mask
16979  *		title - "title" string to preceed data when printed
16980  *		data - ptr to data block to be printed
16981  *		len - size of data block to be printed
16982  *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
16983  *
16984  *     Context: May be called from interrupt context
16985  */
16986 
16987 #define	SD_DUMP_MEMORY_BUF_SIZE	256
16988 
16989 static char *sd_dump_format_string[] = {
16990 		" 0x%02x",
16991 		" %c"
16992 };
16993 
16994 static void
16995 sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
16996     int len, int fmt)
16997 {
16998 	int	i, j;
16999 	int	avail_count;
17000 	int	start_offset;
17001 	int	end_offset;
17002 	size_t	entry_len;
17003 	char	*bufp;
17004 	char	*local_buf;
17005 	char	*format_string;
17006 
17007 	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
17008 
17009 	/*
17010 	 * In the debug version of the driver, this function is called from a
17011 	 * number of places which are NOPs in the release driver.
17012 	 * The debug driver therefore has additional methods of filtering
17013 	 * debug output.
17014 	 */
17015 #ifdef SDDEBUG
17016 	/*
17017 	 * In the debug version of the driver we can reduce the amount of debug
17018 	 * messages by setting sd_error_level to something other than
17019 	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
17020 	 * sd_component_mask.
17021 	 */
17022 	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
17023 	    (sd_error_level != SCSI_ERR_ALL)) {
17024 		return;
17025 	}
17026 	if (((sd_component_mask & comp) == 0) ||
17027 	    (sd_error_level != SCSI_ERR_ALL)) {
17028 		return;
17029 	}
17030 #else
17031 	if (sd_error_level != SCSI_ERR_ALL) {
17032 		return;
17033 	}
17034 #endif
17035 
17036 	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
17037 	bufp = local_buf;
17038 	/*
17039 	 * Available length is the length of local_buf[], minus the
17040 	 * length of the title string, minus one for the ":", minus
17041 	 * one for the newline, minus one for the NULL terminator.
17042 	 * This gives the #bytes available for holding the printed
17043 	 * values from the given data buffer.
17044 	 */
17045 	if (fmt == SD_LOG_HEX) {
17046 		format_string = sd_dump_format_string[0];
17047 	} else /* SD_LOG_CHAR */ {
17048 		format_string = sd_dump_format_string[1];
17049 	}
17050 	/*
17051 	 * Available count is the number of elements from the given
17052 	 * data buffer that we can fit into the available length.
17053 	 * This is based upon the size of the format string used.
17054 	 * Make one entry and find it's size.
17055 	 */
17056 	(void) sprintf(bufp, format_string, data[0]);
17057 	entry_len = strlen(bufp);
17058 	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
17059 
17060 	j = 0;
17061 	while (j < len) {
17062 		bufp = local_buf;
17063 		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
17064 		start_offset = j;
17065 
17066 		end_offset = start_offset + avail_count;
17067 
17068 		(void) sprintf(bufp, "%s:", title);
17069 		bufp += strlen(bufp);
17070 		for (i = start_offset; ((i < end_offset) && (j < len));
17071 		    i++, j++) {
17072 			(void) sprintf(bufp, format_string, data[i]);
17073 			bufp += entry_len;
17074 		}
17075 		(void) sprintf(bufp, "\n");
17076 
17077 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
17078 	}
17079 	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
17080 }
17081 
17082 /*
17083  *    Function: sd_print_sense_msg
17084  *
17085  * Description: Log a message based upon the given sense data.
17086  *
17087  *   Arguments: un - ptr to associated softstate
17088  *		bp - ptr to buf(9S) for the command
17089  *		arg - ptr to associate sd_sense_info struct
17090  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17091  *			or SD_NO_RETRY_ISSUED
17092  *
17093  *     Context: May be called from interrupt context
17094  */
17095 
17096 static void
17097 sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
17098 {
17099 	struct sd_xbuf	*xp;
17100 	struct scsi_pkt	*pktp;
17101 	struct scsi_extended_sense *sensep;
17102 	daddr_t request_blkno;
17103 	diskaddr_t err_blkno;
17104 	int severity;
17105 	int pfa_flag;
17106 	int fixed_format = TRUE;
17107 	extern struct scsi_key_strings scsi_cmds[];
17108 
17109 	ASSERT(un != NULL);
17110 	ASSERT(mutex_owned(SD_MUTEX(un)));
17111 	ASSERT(bp != NULL);
17112 	xp = SD_GET_XBUF(bp);
17113 	ASSERT(xp != NULL);
17114 	pktp = SD_GET_PKTP(bp);
17115 	ASSERT(pktp != NULL);
17116 	ASSERT(arg != NULL);
17117 
17118 	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
17119 	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
17120 
17121 	if ((code == SD_DELAYED_RETRY_ISSUED) ||
17122 	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
17123 		severity = SCSI_ERR_RETRYABLE;
17124 	}
17125 
17126 	/* Use absolute block number for the request block number */
17127 	request_blkno = xp->xb_blkno;
17128 
17129 	/*
17130 	 * Now try to get the error block number from the sense data
17131 	 */
17132 	sensep = (struct scsi_extended_sense *)xp->xb_sense_data;
17133 	switch (sensep->es_code) {
17134 	case CODE_FMT_DESCR_CURRENT:
17135 	case CODE_FMT_DESCR_DEFERRED:
17136 		err_blkno =
17137 		    sd_extract_sense_info_descr(
17138 			(struct scsi_descr_sense_hdr *)sensep);
17139 		fixed_format = FALSE;
17140 		break;
17141 	case CODE_FMT_FIXED_CURRENT:
17142 	case CODE_FMT_FIXED_DEFERRED:
17143 	case CODE_FMT_VENDOR_SPECIFIC:
17144 	default:
17145 		/*
17146 		 * With the es_valid bit set, we assume that the error
17147 		 * blkno is in the sense data.  Also, if xp->xb_blkno is
17148 		 * greater than 0xffffffff then the target *should* have used
17149 		 * a descriptor sense format (or it shouldn't have set
17150 		 * the es_valid bit), and we may as well ignore the
17151 		 * 32-bit value.
17152 		 */
17153 		if ((sensep->es_valid != 0) && (xp->xb_blkno <= 0xffffffff)) {
17154 			err_blkno = (diskaddr_t)
17155 			    ((sensep->es_info_1 << 24) |
17156 			    (sensep->es_info_2 << 16) |
17157 			    (sensep->es_info_3 << 8)  |
17158 			    (sensep->es_info_4));
17159 		} else {
17160 			err_blkno = (diskaddr_t)-1;
17161 		}
17162 		break;
17163 	}
17164 
17165 	if (err_blkno == (diskaddr_t)-1) {
17166 		/*
17167 		 * Without the es_valid bit set (for fixed format) or an
17168 		 * information descriptor (for descriptor format) we cannot
17169 		 * be certain of the error blkno, so just use the
17170 		 * request_blkno.
17171 		 */
17172 		err_blkno = (diskaddr_t)request_blkno;
17173 	} else {
17174 		/*
17175 		 * We retrieved the error block number from the information
17176 		 * portion of the sense data.
17177 		 *
17178 		 * For USCSI commands we are better off using the error
17179 		 * block no. as the requested block no. (This is the best
17180 		 * we can estimate.)
17181 		 */
17182 		if ((SD_IS_BUFIO(xp) == FALSE) &&
17183 		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
17184 			request_blkno = err_blkno;
17185 		}
17186 	}
17187 
17188 	/*
17189 	 * The following will log the buffer contents for the release driver
17190 	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
17191 	 * level is set to verbose.
17192 	 */
17193 	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
17194 	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
17195 	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
17196 	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
17197 
17198 	if (pfa_flag == FALSE) {
17199 		/* This is normally only set for USCSI */
17200 		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
17201 			return;
17202 		}
17203 
17204 		if ((SD_IS_BUFIO(xp) == TRUE) &&
17205 		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
17206 		    (severity < sd_error_level))) {
17207 			return;
17208 		}
17209 	}
17210 
17211 	/*
17212 	 * If the data is fixed format then check for Sonoma Failover,
17213 	 * and keep a count of how many failed I/O's.  We should not have
17214 	 * to worry about Sonoma returning descriptor format sense data,
17215 	 * and asc/ascq are in a different location in descriptor format.
17216 	 */
17217 	if (fixed_format &&
17218 	    (SD_IS_LSI(un)) && (sensep->es_key == KEY_ILLEGAL_REQUEST) &&
17219 	    (sensep->es_add_code == 0x94) && (sensep->es_qual_code == 0x01)) {
17220 		un->un_sonoma_failure_count++;
17221 		if (un->un_sonoma_failure_count > 1) {
17222 			return;
17223 		}
17224 	}
17225 
17226 	scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
17227 	    request_blkno, err_blkno, scsi_cmds, sensep,
17228 	    un->un_additional_codes, NULL);
17229 }
17230 
17231 /*
17232  *    Function: sd_extract_sense_info_descr
17233  *
17234  * Description: Retrieve "information" field from descriptor format
17235  *              sense data.  Iterates through each sense descriptor
17236  *              looking for the information descriptor and returns
17237  *              the information field from that descriptor.
17238  *
17239  *     Context: May be called from interrupt context
17240  */
17241 
17242 static diskaddr_t
17243 sd_extract_sense_info_descr(struct scsi_descr_sense_hdr *sdsp)
17244 {
17245 	diskaddr_t result;
17246 	uint8_t *descr_offset;
17247 	int valid_sense_length;
17248 	struct scsi_information_sense_descr *isd;
17249 
17250 	/*
17251 	 * Initialize result to -1 indicating there is no information
17252 	 * descriptor
17253 	 */
17254 	result = (diskaddr_t)-1;
17255 
17256 	/*
17257 	 * The first descriptor will immediately follow the header
17258 	 */
17259 	descr_offset = (uint8_t *)(sdsp+1); /* Pointer arithmetic */
17260 
17261 	/*
17262 	 * Calculate the amount of valid sense data
17263 	 */
17264 	valid_sense_length =
17265 	    min((sizeof (struct scsi_descr_sense_hdr) +
17266 	    sdsp->ds_addl_sense_length),
17267 	    SENSE_LENGTH);
17268 
17269 	/*
17270 	 * Iterate through the list of descriptors, stopping when we
17271 	 * run out of sense data
17272 	 */
17273 	while ((descr_offset + sizeof (struct scsi_information_sense_descr)) <=
17274 	    (uint8_t *)sdsp + valid_sense_length) {
17275 		/*
17276 		 * Check if this is an information descriptor.  We can
17277 		 * use the scsi_information_sense_descr structure as a
17278 		 * template sense the first two fields are always the
17279 		 * same
17280 		 */
17281 		isd = (struct scsi_information_sense_descr *)descr_offset;
17282 		if (isd->isd_descr_type == DESCR_INFORMATION) {
17283 			/*
17284 			 * Found an information descriptor.  Copy the
17285 			 * information field.  There will only be one
17286 			 * information descriptor so we can stop looking.
17287 			 */
17288 			result =
17289 			    (((diskaddr_t)isd->isd_information[0] << 56) |
17290 				((diskaddr_t)isd->isd_information[1] << 48) |
17291 				((diskaddr_t)isd->isd_information[2] << 40) |
17292 				((diskaddr_t)isd->isd_information[3] << 32) |
17293 				((diskaddr_t)isd->isd_information[4] << 24) |
17294 				((diskaddr_t)isd->isd_information[5] << 16) |
17295 				((diskaddr_t)isd->isd_information[6] << 8)  |
17296 				((diskaddr_t)isd->isd_information[7]));
17297 			break;
17298 		}
17299 
17300 		/*
17301 		 * Get pointer to the next descriptor.  The "additional
17302 		 * length" field holds the length of the descriptor except
17303 		 * for the "type" and "additional length" fields, so
17304 		 * we need to add 2 to get the total length.
17305 		 */
17306 		descr_offset += (isd->isd_addl_length + 2);
17307 	}
17308 
17309 	return (result);
17310 }
17311 
17312 /*
17313  *    Function: sd_sense_key_no_sense
17314  *
17315  * Description: Recovery action when sense data was not received.
17316  *
17317  *     Context: May be called from interrupt context
17318  */
17319 
17320 static void
17321 sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
17322 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17323 {
17324 	struct sd_sense_info	si;
17325 
17326 	ASSERT(un != NULL);
17327 	ASSERT(mutex_owned(SD_MUTEX(un)));
17328 	ASSERT(bp != NULL);
17329 	ASSERT(xp != NULL);
17330 	ASSERT(pktp != NULL);
17331 
17332 	si.ssi_severity = SCSI_ERR_FATAL;
17333 	si.ssi_pfa_flag = FALSE;
17334 
17335 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17336 
17337 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17338 		&si, EIO, (clock_t)0, NULL);
17339 }
17340 
17341 
17342 /*
17343  *    Function: sd_sense_key_recoverable_error
17344  *
17345  * Description: Recovery actions for a SCSI "Recovered Error" sense key.
17346  *
17347  *     Context: May be called from interrupt context
17348  */
17349 
17350 static void
17351 sd_sense_key_recoverable_error(struct sd_lun *un,
17352 	uint8_t asc,
17353 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17354 {
17355 	struct sd_sense_info	si;
17356 
17357 	ASSERT(un != NULL);
17358 	ASSERT(mutex_owned(SD_MUTEX(un)));
17359 	ASSERT(bp != NULL);
17360 	ASSERT(xp != NULL);
17361 	ASSERT(pktp != NULL);
17362 
17363 	/*
17364 	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
17365 	 */
17366 	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
17367 		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17368 		si.ssi_severity = SCSI_ERR_INFO;
17369 		si.ssi_pfa_flag = TRUE;
17370 	} else {
17371 		SD_UPDATE_ERRSTATS(un, sd_softerrs);
17372 		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
17373 		si.ssi_severity = SCSI_ERR_RECOVERED;
17374 		si.ssi_pfa_flag = FALSE;
17375 	}
17376 
17377 	if (pktp->pkt_resid == 0) {
17378 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17379 		sd_return_command(un, bp);
17380 		return;
17381 	}
17382 
17383 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17384 	    &si, EIO, (clock_t)0, NULL);
17385 }
17386 
17387 
17388 
17389 
17390 /*
17391  *    Function: sd_sense_key_not_ready
17392  *
17393  * Description: Recovery actions for a SCSI "Not Ready" sense key.
17394  *
17395  *     Context: May be called from interrupt context
17396  */
17397 
17398 static void
17399 sd_sense_key_not_ready(struct sd_lun *un,
17400 	uint8_t asc, uint8_t ascq,
17401 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17402 {
17403 	struct sd_sense_info	si;
17404 
17405 	ASSERT(un != NULL);
17406 	ASSERT(mutex_owned(SD_MUTEX(un)));
17407 	ASSERT(bp != NULL);
17408 	ASSERT(xp != NULL);
17409 	ASSERT(pktp != NULL);
17410 
17411 	si.ssi_severity = SCSI_ERR_FATAL;
17412 	si.ssi_pfa_flag = FALSE;
17413 
17414 	/*
17415 	 * Update error stats after first NOT READY error. Disks may have
17416 	 * been powered down and may need to be restarted.  For CDROMs,
17417 	 * report NOT READY errors only if media is present.
17418 	 */
17419 	if ((ISCD(un) && (un->un_f_geometry_is_valid == TRUE)) ||
17420 	    (xp->xb_retry_count > 0)) {
17421 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17422 		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
17423 	}
17424 
17425 	/*
17426 	 * Just fail if the "not ready" retry limit has been reached.
17427 	 */
17428 	if (xp->xb_retry_count >= un->un_notready_retry_count) {
17429 		/* Special check for error message printing for removables. */
17430 		if (un->un_f_has_removable_media && (asc == 0x04) &&
17431 		    (ascq >= 0x04)) {
17432 			si.ssi_severity = SCSI_ERR_ALL;
17433 		}
17434 		goto fail_command;
17435 	}
17436 
17437 	/*
17438 	 * Check the ASC and ASCQ in the sense data as needed, to determine
17439 	 * what to do.
17440 	 */
17441 	switch (asc) {
17442 	case 0x04:	/* LOGICAL UNIT NOT READY */
17443 		/*
17444 		 * disk drives that don't spin up result in a very long delay
17445 		 * in format without warning messages. We will log a message
17446 		 * if the error level is set to verbose.
17447 		 */
17448 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17449 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17450 			    "logical unit not ready, resetting disk\n");
17451 		}
17452 
17453 		/*
17454 		 * There are different requirements for CDROMs and disks for
17455 		 * the number of retries.  If a CD-ROM is giving this, it is
17456 		 * probably reading TOC and is in the process of getting
17457 		 * ready, so we should keep on trying for a long time to make
17458 		 * sure that all types of media are taken in account (for
17459 		 * some media the drive takes a long time to read TOC).  For
17460 		 * disks we do not want to retry this too many times as this
17461 		 * can cause a long hang in format when the drive refuses to
17462 		 * spin up (a very common failure).
17463 		 */
17464 		switch (ascq) {
17465 		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
17466 			/*
17467 			 * Disk drives frequently refuse to spin up which
17468 			 * results in a very long hang in format without
17469 			 * warning messages.
17470 			 *
17471 			 * Note: This code preserves the legacy behavior of
17472 			 * comparing xb_retry_count against zero for fibre
17473 			 * channel targets instead of comparing against the
17474 			 * un_reset_retry_count value.  The reason for this
17475 			 * discrepancy has been so utterly lost beneath the
17476 			 * Sands of Time that even Indiana Jones could not
17477 			 * find it.
17478 			 */
17479 			if (un->un_f_is_fibre == TRUE) {
17480 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17481 					(xp->xb_retry_count > 0)) &&
17482 					(un->un_startstop_timeid == NULL)) {
17483 					scsi_log(SD_DEVINFO(un), sd_label,
17484 					CE_WARN, "logical unit not ready, "
17485 					"resetting disk\n");
17486 					sd_reset_target(un, pktp);
17487 				}
17488 			} else {
17489 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
17490 					(xp->xb_retry_count >
17491 					un->un_reset_retry_count)) &&
17492 					(un->un_startstop_timeid == NULL)) {
17493 					scsi_log(SD_DEVINFO(un), sd_label,
17494 					CE_WARN, "logical unit not ready, "
17495 					"resetting disk\n");
17496 					sd_reset_target(un, pktp);
17497 				}
17498 			}
17499 			break;
17500 
17501 		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
17502 			/*
17503 			 * If the target is in the process of becoming
17504 			 * ready, just proceed with the retry. This can
17505 			 * happen with CD-ROMs that take a long time to
17506 			 * read TOC after a power cycle or reset.
17507 			 */
17508 			goto do_retry;
17509 
17510 		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
17511 			break;
17512 
17513 		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
17514 			/*
17515 			 * Retries cannot help here so just fail right away.
17516 			 */
17517 			goto fail_command;
17518 
17519 		case 0x88:
17520 			/*
17521 			 * Vendor-unique code for T3/T4: it indicates a
17522 			 * path problem in a mutipathed config, but as far as
17523 			 * the target driver is concerned it equates to a fatal
17524 			 * error, so we should just fail the command right away
17525 			 * (without printing anything to the console). If this
17526 			 * is not a T3/T4, fall thru to the default recovery
17527 			 * action.
17528 			 * T3/T4 is FC only, don't need to check is_fibre
17529 			 */
17530 			if (SD_IS_T3(un) || SD_IS_T4(un)) {
17531 				sd_return_failed_command(un, bp, EIO);
17532 				return;
17533 			}
17534 			/* FALLTHRU */
17535 
17536 		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
17537 		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
17538 		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
17539 		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
17540 		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
17541 		default:    /* Possible future codes in SCSI spec? */
17542 			/*
17543 			 * For removable-media devices, do not retry if
17544 			 * ASCQ > 2 as these result mostly from USCSI commands
17545 			 * on MMC devices issued to check status of an
17546 			 * operation initiated in immediate mode.  Also for
17547 			 * ASCQ >= 4 do not print console messages as these
17548 			 * mainly represent a user-initiated operation
17549 			 * instead of a system failure.
17550 			 */
17551 			if (un->un_f_has_removable_media) {
17552 				si.ssi_severity = SCSI_ERR_ALL;
17553 				goto fail_command;
17554 			}
17555 			break;
17556 		}
17557 
17558 		/*
17559 		 * As part of our recovery attempt for the NOT READY
17560 		 * condition, we issue a START STOP UNIT command. However
17561 		 * we want to wait for a short delay before attempting this
17562 		 * as there may still be more commands coming back from the
17563 		 * target with the check condition. To do this we use
17564 		 * timeout(9F) to call sd_start_stop_unit_callback() after
17565 		 * the delay interval expires. (sd_start_stop_unit_callback()
17566 		 * dispatches sd_start_stop_unit_task(), which will issue
17567 		 * the actual START STOP UNIT command. The delay interval
17568 		 * is one-half of the delay that we will use to retry the
17569 		 * command that generated the NOT READY condition.
17570 		 *
17571 		 * Note that we could just dispatch sd_start_stop_unit_task()
17572 		 * from here and allow it to sleep for the delay interval,
17573 		 * but then we would be tying up the taskq thread
17574 		 * uncesessarily for the duration of the delay.
17575 		 *
17576 		 * Do not issue the START STOP UNIT if the current command
17577 		 * is already a START STOP UNIT.
17578 		 */
17579 		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
17580 			break;
17581 		}
17582 
17583 		/*
17584 		 * Do not schedule the timeout if one is already pending.
17585 		 */
17586 		if (un->un_startstop_timeid != NULL) {
17587 			SD_INFO(SD_LOG_ERROR, un,
17588 			    "sd_sense_key_not_ready: restart already issued to"
17589 			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
17590 			    ddi_get_instance(SD_DEVINFO(un)));
17591 			break;
17592 		}
17593 
17594 		/*
17595 		 * Schedule the START STOP UNIT command, then queue the command
17596 		 * for a retry.
17597 		 *
17598 		 * Note: A timeout is not scheduled for this retry because we
17599 		 * want the retry to be serial with the START_STOP_UNIT. The
17600 		 * retry will be started when the START_STOP_UNIT is completed
17601 		 * in sd_start_stop_unit_task.
17602 		 */
17603 		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
17604 		    un, SD_BSY_TIMEOUT / 2);
17605 		xp->xb_retry_count++;
17606 		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
17607 		return;
17608 
17609 	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
17610 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17611 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17612 			    "unit does not respond to selection\n");
17613 		}
17614 		break;
17615 
17616 	case 0x3A:	/* MEDIUM NOT PRESENT */
17617 		if (sd_error_level >= SCSI_ERR_FATAL) {
17618 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17619 			    "Caddy not inserted in drive\n");
17620 		}
17621 
17622 		sr_ejected(un);
17623 		un->un_mediastate = DKIO_EJECTED;
17624 		/* The state has changed, inform the media watch routines */
17625 		cv_broadcast(&un->un_state_cv);
17626 		/* Just fail if no media is present in the drive. */
17627 		goto fail_command;
17628 
17629 	default:
17630 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
17631 			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
17632 			    "Unit not Ready. Additional sense code 0x%x\n",
17633 			    asc);
17634 		}
17635 		break;
17636 	}
17637 
17638 do_retry:
17639 
17640 	/*
17641 	 * Retry the command, as some targets may report NOT READY for
17642 	 * several seconds after being reset.
17643 	 */
17644 	xp->xb_retry_count++;
17645 	si.ssi_severity = SCSI_ERR_RETRYABLE;
17646 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
17647 	    &si, EIO, SD_BSY_TIMEOUT, NULL);
17648 
17649 	return;
17650 
17651 fail_command:
17652 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17653 	sd_return_failed_command(un, bp, EIO);
17654 }
17655 
17656 
17657 
17658 /*
17659  *    Function: sd_sense_key_medium_or_hardware_error
17660  *
17661  * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
17662  *		sense key.
17663  *
17664  *     Context: May be called from interrupt context
17665  */
17666 
17667 static void
17668 sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
17669 	int sense_key, uint8_t asc,
17670 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17671 {
17672 	struct sd_sense_info	si;
17673 
17674 	ASSERT(un != NULL);
17675 	ASSERT(mutex_owned(SD_MUTEX(un)));
17676 	ASSERT(bp != NULL);
17677 	ASSERT(xp != NULL);
17678 	ASSERT(pktp != NULL);
17679 
17680 	si.ssi_severity = SCSI_ERR_FATAL;
17681 	si.ssi_pfa_flag = FALSE;
17682 
17683 	if (sense_key == KEY_MEDIUM_ERROR) {
17684 		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
17685 	}
17686 
17687 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17688 
17689 	if ((un->un_reset_retry_count != 0) &&
17690 	    (xp->xb_retry_count == un->un_reset_retry_count)) {
17691 		mutex_exit(SD_MUTEX(un));
17692 		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
17693 		if (un->un_f_allow_bus_device_reset == TRUE) {
17694 
17695 			boolean_t try_resetting_target = B_TRUE;
17696 
17697 			/*
17698 			 * We need to be able to handle specific ASC when we are
17699 			 * handling a KEY_HARDWARE_ERROR. In particular
17700 			 * taking the default action of resetting the target may
17701 			 * not be the appropriate way to attempt recovery.
17702 			 * Resetting a target because of a single LUN failure
17703 			 * victimizes all LUNs on that target.
17704 			 *
17705 			 * This is true for the LSI arrays, if an LSI
17706 			 * array controller returns an ASC of 0x84 (LUN Dead) we
17707 			 * should trust it.
17708 			 */
17709 
17710 			if (sense_key == KEY_HARDWARE_ERROR) {
17711 				switch (asc) {
17712 				case 0x84:
17713 					if (SD_IS_LSI(un)) {
17714 						try_resetting_target = B_FALSE;
17715 					}
17716 					break;
17717 				default:
17718 					break;
17719 				}
17720 			}
17721 
17722 			if (try_resetting_target == B_TRUE) {
17723 				int reset_retval = 0;
17724 				if (un->un_f_lun_reset_enabled == TRUE) {
17725 					SD_TRACE(SD_LOG_IO_CORE, un,
17726 					    "sd_sense_key_medium_or_hardware_"
17727 					    "error: issuing RESET_LUN\n");
17728 					reset_retval =
17729 					    scsi_reset(SD_ADDRESS(un),
17730 					    RESET_LUN);
17731 				}
17732 				if (reset_retval == 0) {
17733 					SD_TRACE(SD_LOG_IO_CORE, un,
17734 					    "sd_sense_key_medium_or_hardware_"
17735 					    "error: issuing RESET_TARGET\n");
17736 					(void) scsi_reset(SD_ADDRESS(un),
17737 					    RESET_TARGET);
17738 				}
17739 			}
17740 		}
17741 		mutex_enter(SD_MUTEX(un));
17742 	}
17743 
17744 	/*
17745 	 * This really ought to be a fatal error, but we will retry anyway
17746 	 * as some drives report this as a spurious error.
17747 	 */
17748 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17749 	    &si, EIO, (clock_t)0, NULL);
17750 }
17751 
17752 
17753 
17754 /*
17755  *    Function: sd_sense_key_illegal_request
17756  *
17757  * Description: Recovery actions for a SCSI "Illegal Request" sense key.
17758  *
17759  *     Context: May be called from interrupt context
17760  */
17761 
17762 static void
17763 sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
17764 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17765 {
17766 	struct sd_sense_info	si;
17767 
17768 	ASSERT(un != NULL);
17769 	ASSERT(mutex_owned(SD_MUTEX(un)));
17770 	ASSERT(bp != NULL);
17771 	ASSERT(xp != NULL);
17772 	ASSERT(pktp != NULL);
17773 
17774 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17775 	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
17776 
17777 	si.ssi_severity = SCSI_ERR_INFO;
17778 	si.ssi_pfa_flag = FALSE;
17779 
17780 	/* Pointless to retry if the target thinks it's an illegal request */
17781 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17782 	sd_return_failed_command(un, bp, EIO);
17783 }
17784 
17785 
17786 
17787 
17788 /*
17789  *    Function: sd_sense_key_unit_attention
17790  *
17791  * Description: Recovery actions for a SCSI "Unit Attention" sense key.
17792  *
17793  *     Context: May be called from interrupt context
17794  */
17795 
17796 static void
17797 sd_sense_key_unit_attention(struct sd_lun *un,
17798 	uint8_t asc,
17799 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17800 {
17801 	/*
17802 	 * For UNIT ATTENTION we allow retries for one minute. Devices
17803 	 * like Sonoma can return UNIT ATTENTION close to a minute
17804 	 * under certain conditions.
17805 	 */
17806 	int	retry_check_flag = SD_RETRIES_UA;
17807 	boolean_t	kstat_updated = B_FALSE;
17808 	struct	sd_sense_info		si;
17809 
17810 	ASSERT(un != NULL);
17811 	ASSERT(mutex_owned(SD_MUTEX(un)));
17812 	ASSERT(bp != NULL);
17813 	ASSERT(xp != NULL);
17814 	ASSERT(pktp != NULL);
17815 
17816 	si.ssi_severity = SCSI_ERR_INFO;
17817 	si.ssi_pfa_flag = FALSE;
17818 
17819 
17820 	switch (asc) {
17821 	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
17822 		if (sd_report_pfa != 0) {
17823 			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17824 			si.ssi_pfa_flag = TRUE;
17825 			retry_check_flag = SD_RETRIES_STANDARD;
17826 			goto do_retry;
17827 		}
17828 		break;
17829 
17830 	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
17831 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
17832 			un->un_resvd_status |=
17833 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
17834 		}
17835 		/* FALLTHRU */
17836 
17837 	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
17838 		if (!un->un_f_has_removable_media) {
17839 			break;
17840 		}
17841 
17842 		/*
17843 		 * When we get a unit attention from a removable-media device,
17844 		 * it may be in a state that will take a long time to recover
17845 		 * (e.g., from a reset).  Since we are executing in interrupt
17846 		 * context here, we cannot wait around for the device to come
17847 		 * back. So hand this command off to sd_media_change_task()
17848 		 * for deferred processing under taskq thread context. (Note
17849 		 * that the command still may be failed if a problem is
17850 		 * encountered at a later time.)
17851 		 */
17852 		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
17853 		    KM_NOSLEEP) == 0) {
17854 			/*
17855 			 * Cannot dispatch the request so fail the command.
17856 			 */
17857 			SD_UPDATE_ERRSTATS(un, sd_harderrs);
17858 			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17859 			si.ssi_severity = SCSI_ERR_FATAL;
17860 			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17861 			sd_return_failed_command(un, bp, EIO);
17862 		}
17863 
17864 		/*
17865 		 * If failed to dispatch sd_media_change_task(), we already
17866 		 * updated kstat. If succeed to dispatch sd_media_change_task(),
17867 		 * we should update kstat later if it encounters an error. So,
17868 		 * we update kstat_updated flag here.
17869 		 */
17870 		kstat_updated = B_TRUE;
17871 
17872 		/*
17873 		 * Either the command has been successfully dispatched to a
17874 		 * task Q for retrying, or the dispatch failed. In either case
17875 		 * do NOT retry again by calling sd_retry_command. This sets up
17876 		 * two retries of the same command and when one completes and
17877 		 * frees the resources the other will access freed memory,
17878 		 * a bad thing.
17879 		 */
17880 		return;
17881 
17882 	default:
17883 		break;
17884 	}
17885 
17886 	/*
17887 	 * Update kstat if we haven't done that.
17888 	 */
17889 	if (!kstat_updated) {
17890 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17891 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
17892 	}
17893 
17894 do_retry:
17895 	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
17896 	    EIO, SD_UA_RETRY_DELAY, NULL);
17897 }
17898 
17899 
17900 
17901 /*
17902  *    Function: sd_sense_key_fail_command
17903  *
17904  * Description: Use to fail a command when we don't like the sense key that
17905  *		was returned.
17906  *
17907  *     Context: May be called from interrupt context
17908  */
17909 
17910 static void
17911 sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
17912 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17913 {
17914 	struct sd_sense_info	si;
17915 
17916 	ASSERT(un != NULL);
17917 	ASSERT(mutex_owned(SD_MUTEX(un)));
17918 	ASSERT(bp != NULL);
17919 	ASSERT(xp != NULL);
17920 	ASSERT(pktp != NULL);
17921 
17922 	si.ssi_severity = SCSI_ERR_FATAL;
17923 	si.ssi_pfa_flag = FALSE;
17924 
17925 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17926 	sd_return_failed_command(un, bp, EIO);
17927 }
17928 
17929 
17930 
17931 /*
17932  *    Function: sd_sense_key_blank_check
17933  *
17934  * Description: Recovery actions for a SCSI "Blank Check" sense key.
17935  *		Has no monetary connotation.
17936  *
17937  *     Context: May be called from interrupt context
17938  */
17939 
17940 static void
17941 sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
17942 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17943 {
17944 	struct sd_sense_info	si;
17945 
17946 	ASSERT(un != NULL);
17947 	ASSERT(mutex_owned(SD_MUTEX(un)));
17948 	ASSERT(bp != NULL);
17949 	ASSERT(xp != NULL);
17950 	ASSERT(pktp != NULL);
17951 
17952 	/*
17953 	 * Blank check is not fatal for removable devices, therefore
17954 	 * it does not require a console message.
17955 	 */
17956 	si.ssi_severity = (un->un_f_has_removable_media) ? SCSI_ERR_ALL :
17957 	    SCSI_ERR_FATAL;
17958 	si.ssi_pfa_flag = FALSE;
17959 
17960 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
17961 	sd_return_failed_command(un, bp, EIO);
17962 }
17963 
17964 
17965 
17966 
17967 /*
17968  *    Function: sd_sense_key_aborted_command
17969  *
17970  * Description: Recovery actions for a SCSI "Aborted Command" sense key.
17971  *
17972  *     Context: May be called from interrupt context
17973  */
17974 
17975 static void
17976 sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
17977 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17978 {
17979 	struct sd_sense_info	si;
17980 
17981 	ASSERT(un != NULL);
17982 	ASSERT(mutex_owned(SD_MUTEX(un)));
17983 	ASSERT(bp != NULL);
17984 	ASSERT(xp != NULL);
17985 	ASSERT(pktp != NULL);
17986 
17987 	si.ssi_severity = SCSI_ERR_FATAL;
17988 	si.ssi_pfa_flag = FALSE;
17989 
17990 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17991 
17992 	/*
17993 	 * This really ought to be a fatal error, but we will retry anyway
17994 	 * as some drives report this as a spurious error.
17995 	 */
17996 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17997 	    &si, EIO, (clock_t)0, NULL);
17998 }
17999 
18000 
18001 
18002 /*
18003  *    Function: sd_sense_key_default
18004  *
18005  * Description: Default recovery action for several SCSI sense keys (basically
18006  *		attempts a retry).
18007  *
18008  *     Context: May be called from interrupt context
18009  */
18010 
18011 static void
18012 sd_sense_key_default(struct sd_lun *un,
18013 	int sense_key,
18014 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18015 {
18016 	struct sd_sense_info	si;
18017 
18018 	ASSERT(un != NULL);
18019 	ASSERT(mutex_owned(SD_MUTEX(un)));
18020 	ASSERT(bp != NULL);
18021 	ASSERT(xp != NULL);
18022 	ASSERT(pktp != NULL);
18023 
18024 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18025 
18026 	/*
18027 	 * Undecoded sense key.	Attempt retries and hope that will fix
18028 	 * the problem.  Otherwise, we're dead.
18029 	 */
18030 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
18031 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18032 		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
18033 	}
18034 
18035 	si.ssi_severity = SCSI_ERR_FATAL;
18036 	si.ssi_pfa_flag = FALSE;
18037 
18038 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18039 	    &si, EIO, (clock_t)0, NULL);
18040 }
18041 
18042 
18043 
18044 /*
18045  *    Function: sd_print_retry_msg
18046  *
18047  * Description: Print a message indicating the retry action being taken.
18048  *
18049  *   Arguments: un - ptr to associated softstate
18050  *		bp - ptr to buf(9S) for the command
18051  *		arg - not used.
18052  *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18053  *			or SD_NO_RETRY_ISSUED
18054  *
18055  *     Context: May be called from interrupt context
18056  */
18057 /* ARGSUSED */
18058 static void
18059 sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
18060 {
18061 	struct sd_xbuf	*xp;
18062 	struct scsi_pkt *pktp;
18063 	char *reasonp;
18064 	char *msgp;
18065 
18066 	ASSERT(un != NULL);
18067 	ASSERT(mutex_owned(SD_MUTEX(un)));
18068 	ASSERT(bp != NULL);
18069 	pktp = SD_GET_PKTP(bp);
18070 	ASSERT(pktp != NULL);
18071 	xp = SD_GET_XBUF(bp);
18072 	ASSERT(xp != NULL);
18073 
18074 	ASSERT(!mutex_owned(&un->un_pm_mutex));
18075 	mutex_enter(&un->un_pm_mutex);
18076 	if ((un->un_state == SD_STATE_SUSPENDED) ||
18077 	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
18078 	    (pktp->pkt_flags & FLAG_SILENT)) {
18079 		mutex_exit(&un->un_pm_mutex);
18080 		goto update_pkt_reason;
18081 	}
18082 	mutex_exit(&un->un_pm_mutex);
18083 
18084 	/*
18085 	 * Suppress messages if they are all the same pkt_reason; with
18086 	 * TQ, many (up to 256) are returned with the same pkt_reason.
18087 	 * If we are in panic, then suppress the retry messages.
18088 	 */
18089 	switch (flag) {
18090 	case SD_NO_RETRY_ISSUED:
18091 		msgp = "giving up";
18092 		break;
18093 	case SD_IMMEDIATE_RETRY_ISSUED:
18094 	case SD_DELAYED_RETRY_ISSUED:
18095 		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
18096 		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
18097 		    (sd_error_level != SCSI_ERR_ALL))) {
18098 			return;
18099 		}
18100 		msgp = "retrying command";
18101 		break;
18102 	default:
18103 		goto update_pkt_reason;
18104 	}
18105 
18106 	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
18107 	    scsi_rname(pktp->pkt_reason));
18108 
18109 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18110 	    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
18111 
18112 update_pkt_reason:
18113 	/*
18114 	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
18115 	 * This is to prevent multiple console messages for the same failure
18116 	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
18117 	 * when the command is retried successfully because there still may be
18118 	 * more commands coming back with the same value of pktp->pkt_reason.
18119 	 */
18120 	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
18121 		un->un_last_pkt_reason = pktp->pkt_reason;
18122 	}
18123 }
18124 
18125 
18126 /*
18127  *    Function: sd_print_cmd_incomplete_msg
18128  *
18129  * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
18130  *
18131  *   Arguments: un - ptr to associated softstate
18132  *		bp - ptr to buf(9S) for the command
18133  *		arg - passed to sd_print_retry_msg()
18134  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18135  *			or SD_NO_RETRY_ISSUED
18136  *
18137  *     Context: May be called from interrupt context
18138  */
18139 
18140 static void
18141 sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
18142 	int code)
18143 {
18144 	dev_info_t	*dip;
18145 
18146 	ASSERT(un != NULL);
18147 	ASSERT(mutex_owned(SD_MUTEX(un)));
18148 	ASSERT(bp != NULL);
18149 
18150 	switch (code) {
18151 	case SD_NO_RETRY_ISSUED:
18152 		/* Command was failed. Someone turned off this target? */
18153 		if (un->un_state != SD_STATE_OFFLINE) {
18154 			/*
18155 			 * Suppress message if we are detaching and
18156 			 * device has been disconnected
18157 			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
18158 			 * private interface and not part of the DDI
18159 			 */
18160 			dip = un->un_sd->sd_dev;
18161 			if (!(DEVI_IS_DETACHING(dip) &&
18162 			    DEVI_IS_DEVICE_REMOVED(dip))) {
18163 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18164 				"disk not responding to selection\n");
18165 			}
18166 			New_state(un, SD_STATE_OFFLINE);
18167 		}
18168 		break;
18169 
18170 	case SD_DELAYED_RETRY_ISSUED:
18171 	case SD_IMMEDIATE_RETRY_ISSUED:
18172 	default:
18173 		/* Command was successfully queued for retry */
18174 		sd_print_retry_msg(un, bp, arg, code);
18175 		break;
18176 	}
18177 }
18178 
18179 
18180 /*
18181  *    Function: sd_pkt_reason_cmd_incomplete
18182  *
18183  * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
18184  *
18185  *     Context: May be called from interrupt context
18186  */
18187 
18188 static void
18189 sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
18190 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18191 {
18192 	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
18193 
18194 	ASSERT(un != NULL);
18195 	ASSERT(mutex_owned(SD_MUTEX(un)));
18196 	ASSERT(bp != NULL);
18197 	ASSERT(xp != NULL);
18198 	ASSERT(pktp != NULL);
18199 
18200 	/* Do not do a reset if selection did not complete */
18201 	/* Note: Should this not just check the bit? */
18202 	if (pktp->pkt_state != STATE_GOT_BUS) {
18203 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18204 		sd_reset_target(un, pktp);
18205 	}
18206 
18207 	/*
18208 	 * If the target was not successfully selected, then set
18209 	 * SD_RETRIES_FAILFAST to indicate that we lost communication
18210 	 * with the target, and further retries and/or commands are
18211 	 * likely to take a long time.
18212 	 */
18213 	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
18214 		flag |= SD_RETRIES_FAILFAST;
18215 	}
18216 
18217 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18218 
18219 	sd_retry_command(un, bp, flag,
18220 	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18221 }
18222 
18223 
18224 
18225 /*
18226  *    Function: sd_pkt_reason_cmd_tran_err
18227  *
18228  * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
18229  *
18230  *     Context: May be called from interrupt context
18231  */
18232 
18233 static void
18234 sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
18235 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18236 {
18237 	ASSERT(un != NULL);
18238 	ASSERT(mutex_owned(SD_MUTEX(un)));
18239 	ASSERT(bp != NULL);
18240 	ASSERT(xp != NULL);
18241 	ASSERT(pktp != NULL);
18242 
18243 	/*
18244 	 * Do not reset if we got a parity error, or if
18245 	 * selection did not complete.
18246 	 */
18247 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18248 	/* Note: Should this not just check the bit for pkt_state? */
18249 	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
18250 	    (pktp->pkt_state != STATE_GOT_BUS)) {
18251 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18252 		sd_reset_target(un, pktp);
18253 	}
18254 
18255 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18256 
18257 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18258 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18259 }
18260 
18261 
18262 
18263 /*
18264  *    Function: sd_pkt_reason_cmd_reset
18265  *
18266  * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
18267  *
18268  *     Context: May be called from interrupt context
18269  */
18270 
18271 static void
18272 sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
18273 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18274 {
18275 	ASSERT(un != NULL);
18276 	ASSERT(mutex_owned(SD_MUTEX(un)));
18277 	ASSERT(bp != NULL);
18278 	ASSERT(xp != NULL);
18279 	ASSERT(pktp != NULL);
18280 
18281 	/* The target may still be running the command, so try to reset. */
18282 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18283 	sd_reset_target(un, pktp);
18284 
18285 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18286 
18287 	/*
18288 	 * If pkt_reason is CMD_RESET chances are that this pkt got
18289 	 * reset because another target on this bus caused it. The target
18290 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18291 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18292 	 */
18293 
18294 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18295 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18296 }
18297 
18298 
18299 
18300 
18301 /*
18302  *    Function: sd_pkt_reason_cmd_aborted
18303  *
18304  * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
18305  *
18306  *     Context: May be called from interrupt context
18307  */
18308 
18309 static void
18310 sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
18311 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18312 {
18313 	ASSERT(un != NULL);
18314 	ASSERT(mutex_owned(SD_MUTEX(un)));
18315 	ASSERT(bp != NULL);
18316 	ASSERT(xp != NULL);
18317 	ASSERT(pktp != NULL);
18318 
18319 	/* The target may still be running the command, so try to reset. */
18320 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18321 	sd_reset_target(un, pktp);
18322 
18323 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18324 
18325 	/*
18326 	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
18327 	 * aborted because another target on this bus caused it. The target
18328 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18329 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18330 	 */
18331 
18332 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18333 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18334 }
18335 
18336 
18337 
18338 /*
18339  *    Function: sd_pkt_reason_cmd_timeout
18340  *
18341  * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
18342  *
18343  *     Context: May be called from interrupt context
18344  */
18345 
18346 static void
18347 sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
18348 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18349 {
18350 	ASSERT(un != NULL);
18351 	ASSERT(mutex_owned(SD_MUTEX(un)));
18352 	ASSERT(bp != NULL);
18353 	ASSERT(xp != NULL);
18354 	ASSERT(pktp != NULL);
18355 
18356 
18357 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18358 	sd_reset_target(un, pktp);
18359 
18360 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18361 
18362 	/*
18363 	 * A command timeout indicates that we could not establish
18364 	 * communication with the target, so set SD_RETRIES_FAILFAST
18365 	 * as further retries/commands are likely to take a long time.
18366 	 */
18367 	sd_retry_command(un, bp,
18368 	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
18369 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18370 }
18371 
18372 
18373 
18374 /*
18375  *    Function: sd_pkt_reason_cmd_unx_bus_free
18376  *
18377  * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
18378  *
18379  *     Context: May be called from interrupt context
18380  */
18381 
18382 static void
18383 sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
18384 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18385 {
18386 	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
18387 
18388 	ASSERT(un != NULL);
18389 	ASSERT(mutex_owned(SD_MUTEX(un)));
18390 	ASSERT(bp != NULL);
18391 	ASSERT(xp != NULL);
18392 	ASSERT(pktp != NULL);
18393 
18394 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18395 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18396 
18397 	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
18398 	    sd_print_retry_msg : NULL;
18399 
18400 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18401 	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18402 }
18403 
18404 
18405 /*
18406  *    Function: sd_pkt_reason_cmd_tag_reject
18407  *
18408  * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
18409  *
18410  *     Context: May be called from interrupt context
18411  */
18412 
18413 static void
18414 sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
18415 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18416 {
18417 	ASSERT(un != NULL);
18418 	ASSERT(mutex_owned(SD_MUTEX(un)));
18419 	ASSERT(bp != NULL);
18420 	ASSERT(xp != NULL);
18421 	ASSERT(pktp != NULL);
18422 
18423 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18424 	pktp->pkt_flags = 0;
18425 	un->un_tagflags = 0;
18426 	if (un->un_f_opt_queueing == TRUE) {
18427 		un->un_throttle = min(un->un_throttle, 3);
18428 	} else {
18429 		un->un_throttle = 1;
18430 	}
18431 	mutex_exit(SD_MUTEX(un));
18432 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
18433 	mutex_enter(SD_MUTEX(un));
18434 
18435 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18436 
18437 	/* Legacy behavior not to check retry counts here. */
18438 	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
18439 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18440 }
18441 
18442 
18443 /*
18444  *    Function: sd_pkt_reason_default
18445  *
18446  * Description: Default recovery actions for SCSA pkt_reason values that
18447  *		do not have more explicit recovery actions.
18448  *
18449  *     Context: May be called from interrupt context
18450  */
18451 
18452 static void
18453 sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
18454 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18455 {
18456 	ASSERT(un != NULL);
18457 	ASSERT(mutex_owned(SD_MUTEX(un)));
18458 	ASSERT(bp != NULL);
18459 	ASSERT(xp != NULL);
18460 	ASSERT(pktp != NULL);
18461 
18462 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18463 	sd_reset_target(un, pktp);
18464 
18465 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18466 
18467 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18468 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18469 }
18470 
18471 
18472 
18473 /*
18474  *    Function: sd_pkt_status_check_condition
18475  *
18476  * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
18477  *
18478  *     Context: May be called from interrupt context
18479  */
18480 
18481 static void
18482 sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
18483 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18484 {
18485 	ASSERT(un != NULL);
18486 	ASSERT(mutex_owned(SD_MUTEX(un)));
18487 	ASSERT(bp != NULL);
18488 	ASSERT(xp != NULL);
18489 	ASSERT(pktp != NULL);
18490 
18491 	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
18492 	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
18493 
18494 	/*
18495 	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
18496 	 * command will be retried after the request sense). Otherwise, retry
18497 	 * the command. Note: we are issuing the request sense even though the
18498 	 * retry limit may have been reached for the failed command.
18499 	 */
18500 	if (un->un_f_arq_enabled == FALSE) {
18501 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18502 		    "no ARQ, sending request sense command\n");
18503 		sd_send_request_sense_command(un, bp, pktp);
18504 	} else {
18505 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
18506 		    "ARQ,retrying request sense command\n");
18507 #if defined(__i386) || defined(__amd64)
18508 		/*
18509 		 * The SD_RETRY_DELAY value need to be adjusted here
18510 		 * when SD_RETRY_DELAY change in sddef.h
18511 		 */
18512 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18513 			un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
18514 			NULL);
18515 #else
18516 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL,
18517 		    EIO, SD_RETRY_DELAY, NULL);
18518 #endif
18519 	}
18520 
18521 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
18522 }
18523 
18524 
18525 /*
18526  *    Function: sd_pkt_status_busy
18527  *
18528  * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
18529  *
18530  *     Context: May be called from interrupt context
18531  */
18532 
18533 static void
18534 sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18535 	struct scsi_pkt *pktp)
18536 {
18537 	ASSERT(un != NULL);
18538 	ASSERT(mutex_owned(SD_MUTEX(un)));
18539 	ASSERT(bp != NULL);
18540 	ASSERT(xp != NULL);
18541 	ASSERT(pktp != NULL);
18542 
18543 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18544 	    "sd_pkt_status_busy: entry\n");
18545 
18546 	/* If retries are exhausted, just fail the command. */
18547 	if (xp->xb_retry_count >= un->un_busy_retry_count) {
18548 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18549 		    "device busy too long\n");
18550 		sd_return_failed_command(un, bp, EIO);
18551 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18552 		    "sd_pkt_status_busy: exit\n");
18553 		return;
18554 	}
18555 	xp->xb_retry_count++;
18556 
18557 	/*
18558 	 * Try to reset the target. However, we do not want to perform
18559 	 * more than one reset if the device continues to fail. The reset
18560 	 * will be performed when the retry count reaches the reset
18561 	 * threshold.  This threshold should be set such that at least
18562 	 * one retry is issued before the reset is performed.
18563 	 */
18564 	if (xp->xb_retry_count ==
18565 	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
18566 		int rval = 0;
18567 		mutex_exit(SD_MUTEX(un));
18568 		if (un->un_f_allow_bus_device_reset == TRUE) {
18569 			/*
18570 			 * First try to reset the LUN; if we cannot then
18571 			 * try to reset the target.
18572 			 */
18573 			if (un->un_f_lun_reset_enabled == TRUE) {
18574 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18575 				    "sd_pkt_status_busy: RESET_LUN\n");
18576 				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18577 			}
18578 			if (rval == 0) {
18579 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18580 				    "sd_pkt_status_busy: RESET_TARGET\n");
18581 				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18582 			}
18583 		}
18584 		if (rval == 0) {
18585 			/*
18586 			 * If the RESET_LUN and/or RESET_TARGET failed,
18587 			 * try RESET_ALL
18588 			 */
18589 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18590 			    "sd_pkt_status_busy: RESET_ALL\n");
18591 			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
18592 		}
18593 		mutex_enter(SD_MUTEX(un));
18594 		if (rval == 0) {
18595 			/*
18596 			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
18597 			 * At this point we give up & fail the command.
18598 			 */
18599 			sd_return_failed_command(un, bp, EIO);
18600 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18601 			    "sd_pkt_status_busy: exit (failed cmd)\n");
18602 			return;
18603 		}
18604 	}
18605 
18606 	/*
18607 	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
18608 	 * we have already checked the retry counts above.
18609 	 */
18610 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
18611 	    EIO, SD_BSY_TIMEOUT, NULL);
18612 
18613 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18614 	    "sd_pkt_status_busy: exit\n");
18615 }
18616 
18617 
18618 /*
18619  *    Function: sd_pkt_status_reservation_conflict
18620  *
18621  * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
18622  *		command status.
18623  *
18624  *     Context: May be called from interrupt context
18625  */
18626 
18627 static void
18628 sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
18629 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18630 {
18631 	ASSERT(un != NULL);
18632 	ASSERT(mutex_owned(SD_MUTEX(un)));
18633 	ASSERT(bp != NULL);
18634 	ASSERT(xp != NULL);
18635 	ASSERT(pktp != NULL);
18636 
18637 	/*
18638 	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
18639 	 * conflict could be due to various reasons like incorrect keys, not
18640 	 * registered or not reserved etc. So, we return EACCES to the caller.
18641 	 */
18642 	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
18643 		int cmd = SD_GET_PKT_OPCODE(pktp);
18644 		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
18645 		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
18646 			sd_return_failed_command(un, bp, EACCES);
18647 			return;
18648 		}
18649 	}
18650 
18651 	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
18652 
18653 	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
18654 		if (sd_failfast_enable != 0) {
18655 			/* By definition, we must panic here.... */
18656 			sd_panic_for_res_conflict(un);
18657 			/*NOTREACHED*/
18658 		}
18659 		SD_ERROR(SD_LOG_IO, un,
18660 		    "sd_handle_resv_conflict: Disk Reserved\n");
18661 		sd_return_failed_command(un, bp, EACCES);
18662 		return;
18663 	}
18664 
18665 	/*
18666 	 * 1147670: retry only if sd_retry_on_reservation_conflict
18667 	 * property is set (default is 1). Retries will not succeed
18668 	 * on a disk reserved by another initiator. HA systems
18669 	 * may reset this via sd.conf to avoid these retries.
18670 	 *
18671 	 * Note: The legacy return code for this failure is EIO, however EACCES
18672 	 * seems more appropriate for a reservation conflict.
18673 	 */
18674 	if (sd_retry_on_reservation_conflict == 0) {
18675 		SD_ERROR(SD_LOG_IO, un,
18676 		    "sd_handle_resv_conflict: Device Reserved\n");
18677 		sd_return_failed_command(un, bp, EIO);
18678 		return;
18679 	}
18680 
18681 	/*
18682 	 * Retry the command if we can.
18683 	 *
18684 	 * Note: The legacy return code for this failure is EIO, however EACCES
18685 	 * seems more appropriate for a reservation conflict.
18686 	 */
18687 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
18688 	    (clock_t)2, NULL);
18689 }
18690 
18691 
18692 
18693 /*
18694  *    Function: sd_pkt_status_qfull
18695  *
18696  * Description: Handle a QUEUE FULL condition from the target.  This can
18697  *		occur if the HBA does not handle the queue full condition.
18698  *		(Basically this means third-party HBAs as Sun HBAs will
18699  *		handle the queue full condition.)  Note that if there are
18700  *		some commands already in the transport, then the queue full
18701  *		has occurred because the queue for this nexus is actually
18702  *		full. If there are no commands in the transport, then the
18703  *		queue full is resulting from some other initiator or lun
18704  *		consuming all the resources at the target.
18705  *
18706  *     Context: May be called from interrupt context
18707  */
18708 
18709 static void
18710 sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
18711 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18712 {
18713 	ASSERT(un != NULL);
18714 	ASSERT(mutex_owned(SD_MUTEX(un)));
18715 	ASSERT(bp != NULL);
18716 	ASSERT(xp != NULL);
18717 	ASSERT(pktp != NULL);
18718 
18719 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18720 	    "sd_pkt_status_qfull: entry\n");
18721 
18722 	/*
18723 	 * Just lower the QFULL throttle and retry the command.  Note that
18724 	 * we do not limit the number of retries here.
18725 	 */
18726 	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
18727 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
18728 	    SD_RESTART_TIMEOUT, NULL);
18729 
18730 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18731 	    "sd_pkt_status_qfull: exit\n");
18732 }
18733 
18734 
18735 /*
18736  *    Function: sd_reset_target
18737  *
18738  * Description: Issue a scsi_reset(9F), with either RESET_LUN,
18739  *		RESET_TARGET, or RESET_ALL.
18740  *
18741  *     Context: May be called under interrupt context.
18742  */
18743 
18744 static void
18745 sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
18746 {
18747 	int rval = 0;
18748 
18749 	ASSERT(un != NULL);
18750 	ASSERT(mutex_owned(SD_MUTEX(un)));
18751 	ASSERT(pktp != NULL);
18752 
18753 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
18754 
18755 	/*
18756 	 * No need to reset if the transport layer has already done so.
18757 	 */
18758 	if ((pktp->pkt_statistics &
18759 	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
18760 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18761 		    "sd_reset_target: no reset\n");
18762 		return;
18763 	}
18764 
18765 	mutex_exit(SD_MUTEX(un));
18766 
18767 	if (un->un_f_allow_bus_device_reset == TRUE) {
18768 		if (un->un_f_lun_reset_enabled == TRUE) {
18769 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18770 			    "sd_reset_target: RESET_LUN\n");
18771 			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
18772 		}
18773 		if (rval == 0) {
18774 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18775 			    "sd_reset_target: RESET_TARGET\n");
18776 			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
18777 		}
18778 	}
18779 
18780 	if (rval == 0) {
18781 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
18782 		    "sd_reset_target: RESET_ALL\n");
18783 		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
18784 	}
18785 
18786 	mutex_enter(SD_MUTEX(un));
18787 
18788 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
18789 }
18790 
18791 
18792 /*
18793  *    Function: sd_media_change_task
18794  *
18795  * Description: Recovery action for CDROM to become available.
18796  *
18797  *     Context: Executes in a taskq() thread context
18798  */
18799 
18800 static void
18801 sd_media_change_task(void *arg)
18802 {
18803 	struct	scsi_pkt	*pktp = arg;
18804 	struct	sd_lun		*un;
18805 	struct	buf		*bp;
18806 	struct	sd_xbuf		*xp;
18807 	int	err		= 0;
18808 	int	retry_count	= 0;
18809 	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
18810 	struct	sd_sense_info	si;
18811 
18812 	ASSERT(pktp != NULL);
18813 	bp = (struct buf *)pktp->pkt_private;
18814 	ASSERT(bp != NULL);
18815 	xp = SD_GET_XBUF(bp);
18816 	ASSERT(xp != NULL);
18817 	un = SD_GET_UN(bp);
18818 	ASSERT(un != NULL);
18819 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18820 	ASSERT(un->un_f_monitor_media_state);
18821 
18822 	si.ssi_severity = SCSI_ERR_INFO;
18823 	si.ssi_pfa_flag = FALSE;
18824 
18825 	/*
18826 	 * When a reset is issued on a CDROM, it takes a long time to
18827 	 * recover. First few attempts to read capacity and other things
18828 	 * related to handling unit attention fail (with a ASC 0x4 and
18829 	 * ASCQ 0x1). In that case we want to do enough retries and we want
18830 	 * to limit the retries in other cases of genuine failures like
18831 	 * no media in drive.
18832 	 */
18833 	while (retry_count++ < retry_limit) {
18834 		if ((err = sd_handle_mchange(un)) == 0) {
18835 			break;
18836 		}
18837 		if (err == EAGAIN) {
18838 			retry_limit = SD_UNIT_ATTENTION_RETRY;
18839 		}
18840 		/* Sleep for 0.5 sec. & try again */
18841 		delay(drv_usectohz(500000));
18842 	}
18843 
18844 	/*
18845 	 * Dispatch (retry or fail) the original command here,
18846 	 * along with appropriate console messages....
18847 	 *
18848 	 * Must grab the mutex before calling sd_retry_command,
18849 	 * sd_print_sense_msg and sd_return_failed_command.
18850 	 */
18851 	mutex_enter(SD_MUTEX(un));
18852 	if (err != SD_CMD_SUCCESS) {
18853 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18854 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18855 		si.ssi_severity = SCSI_ERR_FATAL;
18856 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18857 		sd_return_failed_command(un, bp, EIO);
18858 	} else {
18859 		sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
18860 		    &si, EIO, (clock_t)0, NULL);
18861 	}
18862 	mutex_exit(SD_MUTEX(un));
18863 }
18864 
18865 
18866 
18867 /*
18868  *    Function: sd_handle_mchange
18869  *
18870  * Description: Perform geometry validation & other recovery when CDROM
18871  *		has been removed from drive.
18872  *
18873  * Return Code: 0 for success
18874  *		errno-type return code of either sd_send_scsi_DOORLOCK() or
18875  *		sd_send_scsi_READ_CAPACITY()
18876  *
18877  *     Context: Executes in a taskq() thread context
18878  */
18879 
18880 static int
18881 sd_handle_mchange(struct sd_lun *un)
18882 {
18883 	uint64_t	capacity;
18884 	uint32_t	lbasize;
18885 	int		rval;
18886 
18887 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18888 	ASSERT(un->un_f_monitor_media_state);
18889 
18890 	if ((rval = sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
18891 	    SD_PATH_DIRECT_PRIORITY)) != 0) {
18892 		return (rval);
18893 	}
18894 
18895 	mutex_enter(SD_MUTEX(un));
18896 	sd_update_block_info(un, lbasize, capacity);
18897 
18898 	if (un->un_errstats != NULL) {
18899 		struct	sd_errstats *stp =
18900 		    (struct sd_errstats *)un->un_errstats->ks_data;
18901 		stp->sd_capacity.value.ui64 = (uint64_t)
18902 		    ((uint64_t)un->un_blockcount *
18903 		    (uint64_t)un->un_tgt_blocksize);
18904 	}
18905 
18906 	/*
18907 	 * Note: Maybe let the strategy/partitioning chain worry about getting
18908 	 * valid geometry.
18909 	 */
18910 	un->un_f_geometry_is_valid = FALSE;
18911 	(void) sd_validate_geometry(un, SD_PATH_DIRECT_PRIORITY);
18912 	if (un->un_f_geometry_is_valid == FALSE) {
18913 		mutex_exit(SD_MUTEX(un));
18914 		return (EIO);
18915 	}
18916 
18917 	mutex_exit(SD_MUTEX(un));
18918 
18919 	/*
18920 	 * Try to lock the door
18921 	 */
18922 	return (sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
18923 	    SD_PATH_DIRECT_PRIORITY));
18924 }
18925 
18926 
18927 /*
18928  *    Function: sd_send_scsi_DOORLOCK
18929  *
18930  * Description: Issue the scsi DOOR LOCK command
18931  *
18932  *   Arguments: un    - pointer to driver soft state (unit) structure for
18933  *			this target.
18934  *		flag  - SD_REMOVAL_ALLOW
18935  *			SD_REMOVAL_PREVENT
18936  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
18937  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
18938  *			to use the USCSI "direct" chain and bypass the normal
18939  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
18940  *			command is issued as part of an error recovery action.
18941  *
18942  * Return Code: 0   - Success
18943  *		errno return code from sd_send_scsi_cmd()
18944  *
18945  *     Context: Can sleep.
18946  */
18947 
18948 static int
18949 sd_send_scsi_DOORLOCK(struct sd_lun *un, int flag, int path_flag)
18950 {
18951 	union scsi_cdb		cdb;
18952 	struct uscsi_cmd	ucmd_buf;
18953 	struct scsi_extended_sense	sense_buf;
18954 	int			status;
18955 
18956 	ASSERT(un != NULL);
18957 	ASSERT(!mutex_owned(SD_MUTEX(un)));
18958 
18959 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
18960 
18961 	/* already determined doorlock is not supported, fake success */
18962 	if (un->un_f_doorlock_supported == FALSE) {
18963 		return (0);
18964 	}
18965 
18966 	bzero(&cdb, sizeof (cdb));
18967 	bzero(&ucmd_buf, sizeof (ucmd_buf));
18968 
18969 	cdb.scc_cmd = SCMD_DOORLOCK;
18970 	cdb.cdb_opaque[4] = (uchar_t)flag;
18971 
18972 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
18973 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
18974 	ucmd_buf.uscsi_bufaddr	= NULL;
18975 	ucmd_buf.uscsi_buflen	= 0;
18976 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
18977 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
18978 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
18979 	ucmd_buf.uscsi_timeout	= 15;
18980 
18981 	SD_TRACE(SD_LOG_IO, un,
18982 	    "sd_send_scsi_DOORLOCK: returning sd_send_scsi_cmd()\n");
18983 
18984 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
18985 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
18986 
18987 	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
18988 	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
18989 	    (sense_buf.es_key == KEY_ILLEGAL_REQUEST)) {
18990 		/* fake success and skip subsequent doorlock commands */
18991 		un->un_f_doorlock_supported = FALSE;
18992 		return (0);
18993 	}
18994 
18995 	return (status);
18996 }
18997 
18998 /*
18999  *    Function: sd_send_scsi_READ_CAPACITY
19000  *
19001  * Description: This routine uses the scsi READ CAPACITY command to determine
19002  *		the device capacity in number of blocks and the device native
19003  *		block size. If this function returns a failure, then the
19004  *		values in *capp and *lbap are undefined.  If the capacity
19005  *		returned is 0xffffffff then the lun is too large for a
19006  *		normal READ CAPACITY command and the results of a
19007  *		READ CAPACITY 16 will be used instead.
19008  *
19009  *   Arguments: un   - ptr to soft state struct for the target
19010  *		capp - ptr to unsigned 64-bit variable to receive the
19011  *			capacity value from the command.
19012  *		lbap - ptr to unsigned 32-bit varaible to receive the
19013  *			block size value from the command
19014  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19015  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19016  *			to use the USCSI "direct" chain and bypass the normal
19017  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19018  *			command is issued as part of an error recovery action.
19019  *
19020  * Return Code: 0   - Success
19021  *		EIO - IO error
19022  *		EACCES - Reservation conflict detected
19023  *		EAGAIN - Device is becoming ready
19024  *		errno return code from sd_send_scsi_cmd()
19025  *
19026  *     Context: Can sleep.  Blocks until command completes.
19027  */
19028 
19029 #define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
19030 
19031 static int
19032 sd_send_scsi_READ_CAPACITY(struct sd_lun *un, uint64_t *capp, uint32_t *lbap,
19033 	int path_flag)
19034 {
19035 	struct	scsi_extended_sense	sense_buf;
19036 	struct	uscsi_cmd	ucmd_buf;
19037 	union	scsi_cdb	cdb;
19038 	uint32_t		*capacity_buf;
19039 	uint64_t		capacity;
19040 	uint32_t		lbasize;
19041 	int			status;
19042 
19043 	ASSERT(un != NULL);
19044 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19045 	ASSERT(capp != NULL);
19046 	ASSERT(lbap != NULL);
19047 
19048 	SD_TRACE(SD_LOG_IO, un,
19049 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19050 
19051 	/*
19052 	 * First send a READ_CAPACITY command to the target.
19053 	 * (This command is mandatory under SCSI-2.)
19054 	 *
19055 	 * Set up the CDB for the READ_CAPACITY command.  The Partial
19056 	 * Medium Indicator bit is cleared.  The address field must be
19057 	 * zero if the PMI bit is zero.
19058 	 */
19059 	bzero(&cdb, sizeof (cdb));
19060 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19061 
19062 	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
19063 
19064 	cdb.scc_cmd = SCMD_READ_CAPACITY;
19065 
19066 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19067 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19068 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
19069 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
19070 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19071 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19072 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19073 	ucmd_buf.uscsi_timeout	= 60;
19074 
19075 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19076 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19077 
19078 	switch (status) {
19079 	case 0:
19080 		/* Return failure if we did not get valid capacity data. */
19081 		if (ucmd_buf.uscsi_resid != 0) {
19082 			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19083 			return (EIO);
19084 		}
19085 
19086 		/*
19087 		 * Read capacity and block size from the READ CAPACITY 10 data.
19088 		 * This data may be adjusted later due to device specific
19089 		 * issues.
19090 		 *
19091 		 * According to the SCSI spec, the READ CAPACITY 10
19092 		 * command returns the following:
19093 		 *
19094 		 *  bytes 0-3: Maximum logical block address available.
19095 		 *		(MSB in byte:0 & LSB in byte:3)
19096 		 *
19097 		 *  bytes 4-7: Block length in bytes
19098 		 *		(MSB in byte:4 & LSB in byte:7)
19099 		 *
19100 		 */
19101 		capacity = BE_32(capacity_buf[0]);
19102 		lbasize = BE_32(capacity_buf[1]);
19103 
19104 		/*
19105 		 * Done with capacity_buf
19106 		 */
19107 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19108 
19109 		/*
19110 		 * if the reported capacity is set to all 0xf's, then
19111 		 * this disk is too large and requires SBC-2 commands.
19112 		 * Reissue the request using READ CAPACITY 16.
19113 		 */
19114 		if (capacity == 0xffffffff) {
19115 			status = sd_send_scsi_READ_CAPACITY_16(un, &capacity,
19116 			    &lbasize, path_flag);
19117 			if (status != 0) {
19118 				return (status);
19119 			}
19120 		}
19121 		break;	/* Success! */
19122 	case EIO:
19123 		switch (ucmd_buf.uscsi_status) {
19124 		case STATUS_RESERVATION_CONFLICT:
19125 			status = EACCES;
19126 			break;
19127 		case STATUS_CHECK:
19128 			/*
19129 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19130 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19131 			 */
19132 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19133 			    (sense_buf.es_add_code  == 0x04) &&
19134 			    (sense_buf.es_qual_code == 0x01)) {
19135 				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19136 				return (EAGAIN);
19137 			}
19138 			break;
19139 		default:
19140 			break;
19141 		}
19142 		/* FALLTHRU */
19143 	default:
19144 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19145 		return (status);
19146 	}
19147 
19148 	/*
19149 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
19150 	 * (2352 and 0 are common) so for these devices always force the value
19151 	 * to 2048 as required by the ATAPI specs.
19152 	 */
19153 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
19154 		lbasize = 2048;
19155 	}
19156 
19157 	/*
19158 	 * Get the maximum LBA value from the READ CAPACITY data.
19159 	 * Here we assume that the Partial Medium Indicator (PMI) bit
19160 	 * was cleared when issuing the command. This means that the LBA
19161 	 * returned from the device is the LBA of the last logical block
19162 	 * on the logical unit.  The actual logical block count will be
19163 	 * this value plus one.
19164 	 *
19165 	 * Currently the capacity is saved in terms of un->un_sys_blocksize,
19166 	 * so scale the capacity value to reflect this.
19167 	 */
19168 	capacity = (capacity + 1) * (lbasize / un->un_sys_blocksize);
19169 
19170 #if defined(__i386) || defined(__amd64)
19171 	/*
19172 	 * On x86, compensate for off-by-1 error (number of sectors on
19173 	 * media)  (1175930)
19174 	 */
19175 	if (!un->un_f_has_removable_media && !un->un_f_is_hotpluggable &&
19176 	    (lbasize == un->un_sys_blocksize)) {
19177 		capacity -= 1;
19178 	}
19179 #endif
19180 
19181 	/*
19182 	 * Copy the values from the READ CAPACITY command into the space
19183 	 * provided by the caller.
19184 	 */
19185 	*capp = capacity;
19186 	*lbap = lbasize;
19187 
19188 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
19189 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19190 
19191 	/*
19192 	 * Both the lbasize and capacity from the device must be nonzero,
19193 	 * otherwise we assume that the values are not valid and return
19194 	 * failure to the caller. (4203735)
19195 	 */
19196 	if ((capacity == 0) || (lbasize == 0)) {
19197 		return (EIO);
19198 	}
19199 
19200 	return (0);
19201 }
19202 
19203 /*
19204  *    Function: sd_send_scsi_READ_CAPACITY_16
19205  *
19206  * Description: This routine uses the scsi READ CAPACITY 16 command to
19207  *		determine the device capacity in number of blocks and the
19208  *		device native block size.  If this function returns a failure,
19209  *		then the values in *capp and *lbap are undefined.
19210  *		This routine should always be called by
19211  *		sd_send_scsi_READ_CAPACITY which will appy any device
19212  *		specific adjustments to capacity and lbasize.
19213  *
19214  *   Arguments: un   - ptr to soft state struct for the target
19215  *		capp - ptr to unsigned 64-bit variable to receive the
19216  *			capacity value from the command.
19217  *		lbap - ptr to unsigned 32-bit varaible to receive the
19218  *			block size value from the command
19219  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19220  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19221  *			to use the USCSI "direct" chain and bypass the normal
19222  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
19223  *			this command is issued as part of an error recovery
19224  *			action.
19225  *
19226  * Return Code: 0   - Success
19227  *		EIO - IO error
19228  *		EACCES - Reservation conflict detected
19229  *		EAGAIN - Device is becoming ready
19230  *		errno return code from sd_send_scsi_cmd()
19231  *
19232  *     Context: Can sleep.  Blocks until command completes.
19233  */
19234 
19235 #define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
19236 
19237 static int
19238 sd_send_scsi_READ_CAPACITY_16(struct sd_lun *un, uint64_t *capp,
19239 	uint32_t *lbap, int path_flag)
19240 {
19241 	struct	scsi_extended_sense	sense_buf;
19242 	struct	uscsi_cmd	ucmd_buf;
19243 	union	scsi_cdb	cdb;
19244 	uint64_t		*capacity16_buf;
19245 	uint64_t		capacity;
19246 	uint32_t		lbasize;
19247 	int			status;
19248 
19249 	ASSERT(un != NULL);
19250 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19251 	ASSERT(capp != NULL);
19252 	ASSERT(lbap != NULL);
19253 
19254 	SD_TRACE(SD_LOG_IO, un,
19255 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19256 
19257 	/*
19258 	 * First send a READ_CAPACITY_16 command to the target.
19259 	 *
19260 	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
19261 	 * Medium Indicator bit is cleared.  The address field must be
19262 	 * zero if the PMI bit is zero.
19263 	 */
19264 	bzero(&cdb, sizeof (cdb));
19265 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19266 
19267 	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
19268 
19269 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19270 	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
19271 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
19272 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
19273 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19274 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19275 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19276 	ucmd_buf.uscsi_timeout	= 60;
19277 
19278 	/*
19279 	 * Read Capacity (16) is a Service Action In command.  One
19280 	 * command byte (0x9E) is overloaded for multiple operations,
19281 	 * with the second CDB byte specifying the desired operation
19282 	 */
19283 	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
19284 	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
19285 
19286 	/*
19287 	 * Fill in allocation length field
19288 	 */
19289 	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
19290 
19291 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19292 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19293 
19294 	switch (status) {
19295 	case 0:
19296 		/* Return failure if we did not get valid capacity data. */
19297 		if (ucmd_buf.uscsi_resid > 20) {
19298 			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19299 			return (EIO);
19300 		}
19301 
19302 		/*
19303 		 * Read capacity and block size from the READ CAPACITY 10 data.
19304 		 * This data may be adjusted later due to device specific
19305 		 * issues.
19306 		 *
19307 		 * According to the SCSI spec, the READ CAPACITY 10
19308 		 * command returns the following:
19309 		 *
19310 		 *  bytes 0-7: Maximum logical block address available.
19311 		 *		(MSB in byte:0 & LSB in byte:7)
19312 		 *
19313 		 *  bytes 8-11: Block length in bytes
19314 		 *		(MSB in byte:8 & LSB in byte:11)
19315 		 *
19316 		 */
19317 		capacity = BE_64(capacity16_buf[0]);
19318 		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
19319 
19320 		/*
19321 		 * Done with capacity16_buf
19322 		 */
19323 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19324 
19325 		/*
19326 		 * if the reported capacity is set to all 0xf's, then
19327 		 * this disk is too large.  This could only happen with
19328 		 * a device that supports LBAs larger than 64 bits which
19329 		 * are not defined by any current T10 standards.
19330 		 */
19331 		if (capacity == 0xffffffffffffffff) {
19332 			return (EIO);
19333 		}
19334 		break;	/* Success! */
19335 	case EIO:
19336 		switch (ucmd_buf.uscsi_status) {
19337 		case STATUS_RESERVATION_CONFLICT:
19338 			status = EACCES;
19339 			break;
19340 		case STATUS_CHECK:
19341 			/*
19342 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
19343 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
19344 			 */
19345 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19346 			    (sense_buf.es_add_code  == 0x04) &&
19347 			    (sense_buf.es_qual_code == 0x01)) {
19348 				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19349 				return (EAGAIN);
19350 			}
19351 			break;
19352 		default:
19353 			break;
19354 		}
19355 		/* FALLTHRU */
19356 	default:
19357 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
19358 		return (status);
19359 	}
19360 
19361 	*capp = capacity;
19362 	*lbap = lbasize;
19363 
19364 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
19365 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
19366 
19367 	return (0);
19368 }
19369 
19370 
19371 /*
19372  *    Function: sd_send_scsi_START_STOP_UNIT
19373  *
19374  * Description: Issue a scsi START STOP UNIT command to the target.
19375  *
19376  *   Arguments: un    - pointer to driver soft state (unit) structure for
19377  *			this target.
19378  *		flag  - SD_TARGET_START
19379  *			SD_TARGET_STOP
19380  *			SD_TARGET_EJECT
19381  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19382  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19383  *			to use the USCSI "direct" chain and bypass the normal
19384  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19385  *			command is issued as part of an error recovery action.
19386  *
19387  * Return Code: 0   - Success
19388  *		EIO - IO error
19389  *		EACCES - Reservation conflict detected
19390  *		ENXIO  - Not Ready, medium not present
19391  *		errno return code from sd_send_scsi_cmd()
19392  *
19393  *     Context: Can sleep.
19394  */
19395 
19396 static int
19397 sd_send_scsi_START_STOP_UNIT(struct sd_lun *un, int flag, int path_flag)
19398 {
19399 	struct	scsi_extended_sense	sense_buf;
19400 	union scsi_cdb		cdb;
19401 	struct uscsi_cmd	ucmd_buf;
19402 	int			status;
19403 
19404 	ASSERT(un != NULL);
19405 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19406 
19407 	SD_TRACE(SD_LOG_IO, un,
19408 	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
19409 
19410 	if (un->un_f_check_start_stop &&
19411 	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
19412 	    (un->un_f_start_stop_supported != TRUE)) {
19413 		return (0);
19414 	}
19415 
19416 	bzero(&cdb, sizeof (cdb));
19417 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19418 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19419 
19420 	cdb.scc_cmd = SCMD_START_STOP;
19421 	cdb.cdb_opaque[4] = (uchar_t)flag;
19422 
19423 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19424 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19425 	ucmd_buf.uscsi_bufaddr	= NULL;
19426 	ucmd_buf.uscsi_buflen	= 0;
19427 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19428 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19429 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19430 	ucmd_buf.uscsi_timeout	= 200;
19431 
19432 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19433 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
19434 
19435 	switch (status) {
19436 	case 0:
19437 		break;	/* Success! */
19438 	case EIO:
19439 		switch (ucmd_buf.uscsi_status) {
19440 		case STATUS_RESERVATION_CONFLICT:
19441 			status = EACCES;
19442 			break;
19443 		case STATUS_CHECK:
19444 			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
19445 				switch (sense_buf.es_key) {
19446 				case KEY_ILLEGAL_REQUEST:
19447 					status = ENOTSUP;
19448 					break;
19449 				case KEY_NOT_READY:
19450 					if (sense_buf.es_add_code == 0x3A) {
19451 						status = ENXIO;
19452 					}
19453 					break;
19454 				default:
19455 					break;
19456 				}
19457 			}
19458 			break;
19459 		default:
19460 			break;
19461 		}
19462 		break;
19463 	default:
19464 		break;
19465 	}
19466 
19467 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
19468 
19469 	return (status);
19470 }
19471 
19472 
19473 /*
19474  *    Function: sd_start_stop_unit_callback
19475  *
19476  * Description: timeout(9F) callback to begin recovery process for a
19477  *		device that has spun down.
19478  *
19479  *   Arguments: arg - pointer to associated softstate struct.
19480  *
19481  *     Context: Executes in a timeout(9F) thread context
19482  */
19483 
19484 static void
19485 sd_start_stop_unit_callback(void *arg)
19486 {
19487 	struct sd_lun	*un = arg;
19488 	ASSERT(un != NULL);
19489 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19490 
19491 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
19492 
19493 	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
19494 }
19495 
19496 
19497 /*
19498  *    Function: sd_start_stop_unit_task
19499  *
19500  * Description: Recovery procedure when a drive is spun down.
19501  *
19502  *   Arguments: arg - pointer to associated softstate struct.
19503  *
19504  *     Context: Executes in a taskq() thread context
19505  */
19506 
19507 static void
19508 sd_start_stop_unit_task(void *arg)
19509 {
19510 	struct sd_lun	*un = arg;
19511 
19512 	ASSERT(un != NULL);
19513 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19514 
19515 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
19516 
19517 	/*
19518 	 * Some unformatted drives report not ready error, no need to
19519 	 * restart if format has been initiated.
19520 	 */
19521 	mutex_enter(SD_MUTEX(un));
19522 	if (un->un_f_format_in_progress == TRUE) {
19523 		mutex_exit(SD_MUTEX(un));
19524 		return;
19525 	}
19526 	mutex_exit(SD_MUTEX(un));
19527 
19528 	/*
19529 	 * When a START STOP command is issued from here, it is part of a
19530 	 * failure recovery operation and must be issued before any other
19531 	 * commands, including any pending retries. Thus it must be sent
19532 	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
19533 	 * succeeds or not, we will start I/O after the attempt.
19534 	 */
19535 	(void) sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
19536 	    SD_PATH_DIRECT_PRIORITY);
19537 
19538 	/*
19539 	 * The above call blocks until the START_STOP_UNIT command completes.
19540 	 * Now that it has completed, we must re-try the original IO that
19541 	 * received the NOT READY condition in the first place. There are
19542 	 * three possible conditions here:
19543 	 *
19544 	 *  (1) The original IO is on un_retry_bp.
19545 	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
19546 	 *	is NULL.
19547 	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
19548 	 *	points to some other, unrelated bp.
19549 	 *
19550 	 * For each case, we must call sd_start_cmds() with un_retry_bp
19551 	 * as the argument. If un_retry_bp is NULL, this will initiate
19552 	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
19553 	 * then this will process the bp on un_retry_bp. That may or may not
19554 	 * be the original IO, but that does not matter: the important thing
19555 	 * is to keep the IO processing going at this point.
19556 	 *
19557 	 * Note: This is a very specific error recovery sequence associated
19558 	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
19559 	 * serialize the I/O with completion of the spin-up.
19560 	 */
19561 	mutex_enter(SD_MUTEX(un));
19562 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19563 	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
19564 	    un, un->un_retry_bp);
19565 	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
19566 	sd_start_cmds(un, un->un_retry_bp);
19567 	mutex_exit(SD_MUTEX(un));
19568 
19569 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
19570 }
19571 
19572 
19573 /*
19574  *    Function: sd_send_scsi_INQUIRY
19575  *
19576  * Description: Issue the scsi INQUIRY command.
19577  *
19578  *   Arguments: un
19579  *		bufaddr
19580  *		buflen
19581  *		evpd
19582  *		page_code
19583  *		page_length
19584  *
19585  * Return Code: 0   - Success
19586  *		errno return code from sd_send_scsi_cmd()
19587  *
19588  *     Context: Can sleep. Does not return until command is completed.
19589  */
19590 
19591 static int
19592 sd_send_scsi_INQUIRY(struct sd_lun *un, uchar_t *bufaddr, size_t buflen,
19593 	uchar_t evpd, uchar_t page_code, size_t *residp)
19594 {
19595 	union scsi_cdb		cdb;
19596 	struct uscsi_cmd	ucmd_buf;
19597 	int			status;
19598 
19599 	ASSERT(un != NULL);
19600 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19601 	ASSERT(bufaddr != NULL);
19602 
19603 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
19604 
19605 	bzero(&cdb, sizeof (cdb));
19606 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19607 	bzero(bufaddr, buflen);
19608 
19609 	cdb.scc_cmd = SCMD_INQUIRY;
19610 	cdb.cdb_opaque[1] = evpd;
19611 	cdb.cdb_opaque[2] = page_code;
19612 	FORMG0COUNT(&cdb, buflen);
19613 
19614 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19615 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19616 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
19617 	ucmd_buf.uscsi_buflen	= buflen;
19618 	ucmd_buf.uscsi_rqbuf	= NULL;
19619 	ucmd_buf.uscsi_rqlen	= 0;
19620 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
19621 	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
19622 
19623 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19624 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_DIRECT);
19625 
19626 	if ((status == 0) && (residp != NULL)) {
19627 		*residp = ucmd_buf.uscsi_resid;
19628 	}
19629 
19630 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
19631 
19632 	return (status);
19633 }
19634 
19635 
19636 /*
19637  *    Function: sd_send_scsi_TEST_UNIT_READY
19638  *
19639  * Description: Issue the scsi TEST UNIT READY command.
19640  *		This routine can be told to set the flag USCSI_DIAGNOSE to
19641  *		prevent retrying failed commands. Use this when the intent
19642  *		is either to check for device readiness, to clear a Unit
19643  *		Attention, or to clear any outstanding sense data.
19644  *		However under specific conditions the expected behavior
19645  *		is for retries to bring a device ready, so use the flag
19646  *		with caution.
19647  *
19648  *   Arguments: un
19649  *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
19650  *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
19651  *			0: dont check for media present, do retries on cmd.
19652  *
19653  * Return Code: 0   - Success
19654  *		EIO - IO error
19655  *		EACCES - Reservation conflict detected
19656  *		ENXIO  - Not Ready, medium not present
19657  *		errno return code from sd_send_scsi_cmd()
19658  *
19659  *     Context: Can sleep. Does not return until command is completed.
19660  */
19661 
19662 static int
19663 sd_send_scsi_TEST_UNIT_READY(struct sd_lun *un, int flag)
19664 {
19665 	struct	scsi_extended_sense	sense_buf;
19666 	union scsi_cdb		cdb;
19667 	struct uscsi_cmd	ucmd_buf;
19668 	int			status;
19669 
19670 	ASSERT(un != NULL);
19671 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19672 
19673 	SD_TRACE(SD_LOG_IO, un,
19674 	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
19675 
19676 	/*
19677 	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
19678 	 * timeouts when they receive a TUR and the queue is not empty. Check
19679 	 * the configuration flag set during attach (indicating the drive has
19680 	 * this firmware bug) and un_ncmds_in_transport before issuing the
19681 	 * TUR. If there are
19682 	 * pending commands return success, this is a bit arbitrary but is ok
19683 	 * for non-removables (i.e. the eliteI disks) and non-clustering
19684 	 * configurations.
19685 	 */
19686 	if (un->un_f_cfg_tur_check == TRUE) {
19687 		mutex_enter(SD_MUTEX(un));
19688 		if (un->un_ncmds_in_transport != 0) {
19689 			mutex_exit(SD_MUTEX(un));
19690 			return (0);
19691 		}
19692 		mutex_exit(SD_MUTEX(un));
19693 	}
19694 
19695 	bzero(&cdb, sizeof (cdb));
19696 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19697 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19698 
19699 	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
19700 
19701 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19702 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19703 	ucmd_buf.uscsi_bufaddr	= NULL;
19704 	ucmd_buf.uscsi_buflen	= 0;
19705 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19706 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19707 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19708 
19709 	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
19710 	if ((flag & SD_DONT_RETRY_TUR) != 0) {
19711 		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
19712 	}
19713 	ucmd_buf.uscsi_timeout	= 60;
19714 
19715 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19716 	    UIO_SYSSPACE, UIO_SYSSPACE,
19717 	    ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT : SD_PATH_STANDARD));
19718 
19719 	switch (status) {
19720 	case 0:
19721 		break;	/* Success! */
19722 	case EIO:
19723 		switch (ucmd_buf.uscsi_status) {
19724 		case STATUS_RESERVATION_CONFLICT:
19725 			status = EACCES;
19726 			break;
19727 		case STATUS_CHECK:
19728 			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
19729 				break;
19730 			}
19731 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19732 			    (sense_buf.es_key == KEY_NOT_READY) &&
19733 			    (sense_buf.es_add_code == 0x3A)) {
19734 				status = ENXIO;
19735 			}
19736 			break;
19737 		default:
19738 			break;
19739 		}
19740 		break;
19741 	default:
19742 		break;
19743 	}
19744 
19745 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
19746 
19747 	return (status);
19748 }
19749 
19750 
19751 /*
19752  *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
19753  *
19754  * Description: Issue the scsi PERSISTENT RESERVE IN command.
19755  *
19756  *   Arguments: un
19757  *
19758  * Return Code: 0   - Success
19759  *		EACCES
19760  *		ENOTSUP
19761  *		errno return code from sd_send_scsi_cmd()
19762  *
19763  *     Context: Can sleep. Does not return until command is completed.
19764  */
19765 
19766 static int
19767 sd_send_scsi_PERSISTENT_RESERVE_IN(struct sd_lun *un, uchar_t  usr_cmd,
19768 	uint16_t data_len, uchar_t *data_bufp)
19769 {
19770 	struct scsi_extended_sense	sense_buf;
19771 	union scsi_cdb		cdb;
19772 	struct uscsi_cmd	ucmd_buf;
19773 	int			status;
19774 	int			no_caller_buf = FALSE;
19775 
19776 	ASSERT(un != NULL);
19777 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19778 	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
19779 
19780 	SD_TRACE(SD_LOG_IO, un,
19781 	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
19782 
19783 	bzero(&cdb, sizeof (cdb));
19784 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19785 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19786 	if (data_bufp == NULL) {
19787 		/* Allocate a default buf if the caller did not give one */
19788 		ASSERT(data_len == 0);
19789 		data_len  = MHIOC_RESV_KEY_SIZE;
19790 		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
19791 		no_caller_buf = TRUE;
19792 	}
19793 
19794 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
19795 	cdb.cdb_opaque[1] = usr_cmd;
19796 	FORMG1COUNT(&cdb, data_len);
19797 
19798 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19799 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19800 	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
19801 	ucmd_buf.uscsi_buflen	= data_len;
19802 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19803 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19804 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19805 	ucmd_buf.uscsi_timeout	= 60;
19806 
19807 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19808 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
19809 
19810 	switch (status) {
19811 	case 0:
19812 		break;	/* Success! */
19813 	case EIO:
19814 		switch (ucmd_buf.uscsi_status) {
19815 		case STATUS_RESERVATION_CONFLICT:
19816 			status = EACCES;
19817 			break;
19818 		case STATUS_CHECK:
19819 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19820 			    (sense_buf.es_key == KEY_ILLEGAL_REQUEST)) {
19821 				status = ENOTSUP;
19822 			}
19823 			break;
19824 		default:
19825 			break;
19826 		}
19827 		break;
19828 	default:
19829 		break;
19830 	}
19831 
19832 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
19833 
19834 	if (no_caller_buf == TRUE) {
19835 		kmem_free(data_bufp, data_len);
19836 	}
19837 
19838 	return (status);
19839 }
19840 
19841 
19842 /*
19843  *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
19844  *
19845  * Description: This routine is the driver entry point for handling CD-ROM
19846  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
19847  *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
19848  *		device.
19849  *
19850  *   Arguments: un  -   Pointer to soft state struct for the target.
19851  *		usr_cmd SCSI-3 reservation facility command (one of
19852  *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
19853  *			SD_SCSI3_PREEMPTANDABORT)
19854  *		usr_bufp - user provided pointer register, reserve descriptor or
19855  *			preempt and abort structure (mhioc_register_t,
19856  *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
19857  *
19858  * Return Code: 0   - Success
19859  *		EACCES
19860  *		ENOTSUP
19861  *		errno return code from sd_send_scsi_cmd()
19862  *
19863  *     Context: Can sleep. Does not return until command is completed.
19864  */
19865 
19866 static int
19867 sd_send_scsi_PERSISTENT_RESERVE_OUT(struct sd_lun *un, uchar_t usr_cmd,
19868 	uchar_t	*usr_bufp)
19869 {
19870 	struct scsi_extended_sense	sense_buf;
19871 	union scsi_cdb		cdb;
19872 	struct uscsi_cmd	ucmd_buf;
19873 	int			status;
19874 	uchar_t			data_len = sizeof (sd_prout_t);
19875 	sd_prout_t		*prp;
19876 
19877 	ASSERT(un != NULL);
19878 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19879 	ASSERT(data_len == 24);	/* required by scsi spec */
19880 
19881 	SD_TRACE(SD_LOG_IO, un,
19882 	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
19883 
19884 	if (usr_bufp == NULL) {
19885 		return (EINVAL);
19886 	}
19887 
19888 	bzero(&cdb, sizeof (cdb));
19889 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19890 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
19891 	prp = kmem_zalloc(data_len, KM_SLEEP);
19892 
19893 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
19894 	cdb.cdb_opaque[1] = usr_cmd;
19895 	FORMG1COUNT(&cdb, data_len);
19896 
19897 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19898 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19899 	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
19900 	ucmd_buf.uscsi_buflen	= data_len;
19901 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19902 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
19903 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
19904 	ucmd_buf.uscsi_timeout	= 60;
19905 
19906 	switch (usr_cmd) {
19907 	case SD_SCSI3_REGISTER: {
19908 		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
19909 
19910 		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
19911 		bcopy(ptr->newkey.key, prp->service_key,
19912 		    MHIOC_RESV_KEY_SIZE);
19913 		prp->aptpl = ptr->aptpl;
19914 		break;
19915 	}
19916 	case SD_SCSI3_RESERVE:
19917 	case SD_SCSI3_RELEASE: {
19918 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
19919 
19920 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
19921 		prp->scope_address = BE_32(ptr->scope_specific_addr);
19922 		cdb.cdb_opaque[2] = ptr->type;
19923 		break;
19924 	}
19925 	case SD_SCSI3_PREEMPTANDABORT: {
19926 		mhioc_preemptandabort_t *ptr =
19927 		    (mhioc_preemptandabort_t *)usr_bufp;
19928 
19929 		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
19930 		bcopy(ptr->victim_key.key, prp->service_key,
19931 		    MHIOC_RESV_KEY_SIZE);
19932 		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
19933 		cdb.cdb_opaque[2] = ptr->resvdesc.type;
19934 		ucmd_buf.uscsi_flags |= USCSI_HEAD;
19935 		break;
19936 	}
19937 	case SD_SCSI3_REGISTERANDIGNOREKEY:
19938 	{
19939 		mhioc_registerandignorekey_t *ptr;
19940 		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
19941 		bcopy(ptr->newkey.key,
19942 		    prp->service_key, MHIOC_RESV_KEY_SIZE);
19943 		prp->aptpl = ptr->aptpl;
19944 		break;
19945 	}
19946 	default:
19947 		ASSERT(FALSE);
19948 		break;
19949 	}
19950 
19951 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
19952 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
19953 
19954 	switch (status) {
19955 	case 0:
19956 		break;	/* Success! */
19957 	case EIO:
19958 		switch (ucmd_buf.uscsi_status) {
19959 		case STATUS_RESERVATION_CONFLICT:
19960 			status = EACCES;
19961 			break;
19962 		case STATUS_CHECK:
19963 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19964 			    (sense_buf.es_key == KEY_ILLEGAL_REQUEST)) {
19965 				status = ENOTSUP;
19966 			}
19967 			break;
19968 		default:
19969 			break;
19970 		}
19971 		break;
19972 	default:
19973 		break;
19974 	}
19975 
19976 	kmem_free(prp, data_len);
19977 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
19978 	return (status);
19979 }
19980 
19981 
19982 /*
19983  *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
19984  *
19985  * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
19986  *
19987  *   Arguments: un - pointer to the target's soft state struct
19988  *
19989  * Return Code: 0 - success
19990  *		errno-type error code
19991  *
19992  *     Context: kernel thread context only.
19993  */
19994 
19995 static int
19996 sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
19997 {
19998 	struct sd_uscsi_info	*uip;
19999 	struct uscsi_cmd	*uscmd;
20000 	union scsi_cdb		*cdb;
20001 	struct buf		*bp;
20002 	int			rval = 0;
20003 
20004 	SD_TRACE(SD_LOG_IO, un,
20005 	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
20006 
20007 	ASSERT(un != NULL);
20008 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20009 
20010 	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
20011 	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
20012 
20013 	/*
20014 	 * First get some memory for the uscsi_cmd struct and cdb
20015 	 * and initialize for SYNCHRONIZE_CACHE cmd.
20016 	 */
20017 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
20018 	uscmd->uscsi_cdblen = CDB_GROUP1;
20019 	uscmd->uscsi_cdb = (caddr_t)cdb;
20020 	uscmd->uscsi_bufaddr = NULL;
20021 	uscmd->uscsi_buflen = 0;
20022 	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
20023 	uscmd->uscsi_rqlen = SENSE_LENGTH;
20024 	uscmd->uscsi_rqresid = SENSE_LENGTH;
20025 	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
20026 	uscmd->uscsi_timeout = sd_io_time;
20027 
20028 	/*
20029 	 * Allocate an sd_uscsi_info struct and fill it with the info
20030 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
20031 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
20032 	 * since we allocate the buf here in this function, we do not
20033 	 * need to preserve the prior contents of b_private.
20034 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
20035 	 */
20036 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
20037 	uip->ui_flags = SD_PATH_DIRECT;
20038 	uip->ui_cmdp  = uscmd;
20039 
20040 	bp = getrbuf(KM_SLEEP);
20041 	bp->b_private = uip;
20042 
20043 	/*
20044 	 * Setup buffer to carry uscsi request.
20045 	 */
20046 	bp->b_flags  = B_BUSY;
20047 	bp->b_bcount = 0;
20048 	bp->b_blkno  = 0;
20049 
20050 	if (dkc != NULL) {
20051 		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
20052 		uip->ui_dkc = *dkc;
20053 	}
20054 
20055 	bp->b_edev = SD_GET_DEV(un);
20056 	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
20057 
20058 	(void) sd_uscsi_strategy(bp);
20059 
20060 	/*
20061 	 * If synchronous request, wait for completion
20062 	 * If async just return and let b_iodone callback
20063 	 * cleanup.
20064 	 * NOTE: On return, u_ncmds_in_driver will be decremented,
20065 	 * but it was also incremented in sd_uscsi_strategy(), so
20066 	 * we should be ok.
20067 	 */
20068 	if (dkc == NULL) {
20069 		(void) biowait(bp);
20070 		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
20071 	}
20072 
20073 	return (rval);
20074 }
20075 
20076 
20077 static int
20078 sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
20079 {
20080 	struct sd_uscsi_info *uip;
20081 	struct uscsi_cmd *uscmd;
20082 	struct scsi_extended_sense *sense_buf;
20083 	struct sd_lun *un;
20084 	int status;
20085 
20086 	uip = (struct sd_uscsi_info *)(bp->b_private);
20087 	ASSERT(uip != NULL);
20088 
20089 	uscmd = uip->ui_cmdp;
20090 	ASSERT(uscmd != NULL);
20091 
20092 	sense_buf = (struct scsi_extended_sense *)uscmd->uscsi_rqbuf;
20093 	ASSERT(sense_buf != NULL);
20094 
20095 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
20096 	ASSERT(un != NULL);
20097 
20098 	status = geterror(bp);
20099 	switch (status) {
20100 	case 0:
20101 		break;	/* Success! */
20102 	case EIO:
20103 		switch (uscmd->uscsi_status) {
20104 		case STATUS_RESERVATION_CONFLICT:
20105 			/* Ignore reservation conflict */
20106 			status = 0;
20107 			goto done;
20108 
20109 		case STATUS_CHECK:
20110 			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
20111 			    (sense_buf->es_key == KEY_ILLEGAL_REQUEST)) {
20112 				/* Ignore Illegal Request error */
20113 				mutex_enter(SD_MUTEX(un));
20114 				un->un_f_sync_cache_supported = FALSE;
20115 				mutex_exit(SD_MUTEX(un));
20116 				status = ENOTSUP;
20117 				goto done;
20118 			}
20119 			break;
20120 		default:
20121 			break;
20122 		}
20123 		/* FALLTHRU */
20124 	default:
20125 		/* Ignore error if the media is not present */
20126 		if (sd_send_scsi_TEST_UNIT_READY(un, 0) != 0) {
20127 			status = 0;
20128 			goto done;
20129 		}
20130 		/* If we reach this, we had an error */
20131 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
20132 		    "SYNCHRONIZE CACHE command failed (%d)\n", status);
20133 		break;
20134 	}
20135 
20136 done:
20137 	if (uip->ui_dkc.dkc_callback != NULL) {
20138 		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
20139 	}
20140 
20141 	ASSERT((bp->b_flags & B_REMAPPED) == 0);
20142 	freerbuf(bp);
20143 	kmem_free(uip, sizeof (struct sd_uscsi_info));
20144 	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
20145 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
20146 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
20147 
20148 	return (status);
20149 }
20150 
20151 
20152 /*
20153  *    Function: sd_send_scsi_GET_CONFIGURATION
20154  *
20155  * Description: Issues the get configuration command to the device.
20156  *		Called from sd_check_for_writable_cd & sd_get_media_info
20157  *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
20158  *   Arguments: un
20159  *		ucmdbuf
20160  *		rqbuf
20161  *		rqbuflen
20162  *		bufaddr
20163  *		buflen
20164  *
20165  * Return Code: 0   - Success
20166  *		errno return code from sd_send_scsi_cmd()
20167  *
20168  *     Context: Can sleep. Does not return until command is completed.
20169  *
20170  */
20171 
20172 static int
20173 sd_send_scsi_GET_CONFIGURATION(struct sd_lun *un, struct uscsi_cmd *ucmdbuf,
20174 	uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen)
20175 {
20176 	char	cdb[CDB_GROUP1];
20177 	int	status;
20178 
20179 	ASSERT(un != NULL);
20180 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20181 	ASSERT(bufaddr != NULL);
20182 	ASSERT(ucmdbuf != NULL);
20183 	ASSERT(rqbuf != NULL);
20184 
20185 	SD_TRACE(SD_LOG_IO, un,
20186 	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
20187 
20188 	bzero(cdb, sizeof (cdb));
20189 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20190 	bzero(rqbuf, rqbuflen);
20191 	bzero(bufaddr, buflen);
20192 
20193 	/*
20194 	 * Set up cdb field for the get configuration command.
20195 	 */
20196 	cdb[0] = SCMD_GET_CONFIGURATION;
20197 	cdb[1] = 0x02;  /* Requested Type */
20198 	cdb[8] = SD_PROFILE_HEADER_LEN;
20199 	ucmdbuf->uscsi_cdb = cdb;
20200 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20201 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20202 	ucmdbuf->uscsi_buflen = buflen;
20203 	ucmdbuf->uscsi_timeout = sd_io_time;
20204 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20205 	ucmdbuf->uscsi_rqlen = rqbuflen;
20206 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20207 
20208 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, UIO_SYSSPACE,
20209 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20210 
20211 	switch (status) {
20212 	case 0:
20213 		break;  /* Success! */
20214 	case EIO:
20215 		switch (ucmdbuf->uscsi_status) {
20216 		case STATUS_RESERVATION_CONFLICT:
20217 			status = EACCES;
20218 			break;
20219 		default:
20220 			break;
20221 		}
20222 		break;
20223 	default:
20224 		break;
20225 	}
20226 
20227 	if (status == 0) {
20228 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20229 		    "sd_send_scsi_GET_CONFIGURATION: data",
20230 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20231 	}
20232 
20233 	SD_TRACE(SD_LOG_IO, un,
20234 	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
20235 
20236 	return (status);
20237 }
20238 
20239 /*
20240  *    Function: sd_send_scsi_feature_GET_CONFIGURATION
20241  *
20242  * Description: Issues the get configuration command to the device to
20243  *              retrieve a specfic feature. Called from
20244  *		sd_check_for_writable_cd & sd_set_mmc_caps.
20245  *   Arguments: un
20246  *              ucmdbuf
20247  *              rqbuf
20248  *              rqbuflen
20249  *              bufaddr
20250  *              buflen
20251  *		feature
20252  *
20253  * Return Code: 0   - Success
20254  *              errno return code from sd_send_scsi_cmd()
20255  *
20256  *     Context: Can sleep. Does not return until command is completed.
20257  *
20258  */
20259 static int
20260 sd_send_scsi_feature_GET_CONFIGURATION(struct sd_lun *un,
20261 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
20262 	uchar_t *bufaddr, uint_t buflen, char feature)
20263 {
20264 	char    cdb[CDB_GROUP1];
20265 	int	status;
20266 
20267 	ASSERT(un != NULL);
20268 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20269 	ASSERT(bufaddr != NULL);
20270 	ASSERT(ucmdbuf != NULL);
20271 	ASSERT(rqbuf != NULL);
20272 
20273 	SD_TRACE(SD_LOG_IO, un,
20274 	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
20275 
20276 	bzero(cdb, sizeof (cdb));
20277 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
20278 	bzero(rqbuf, rqbuflen);
20279 	bzero(bufaddr, buflen);
20280 
20281 	/*
20282 	 * Set up cdb field for the get configuration command.
20283 	 */
20284 	cdb[0] = SCMD_GET_CONFIGURATION;
20285 	cdb[1] = 0x02;  /* Requested Type */
20286 	cdb[3] = feature;
20287 	cdb[8] = buflen;
20288 	ucmdbuf->uscsi_cdb = cdb;
20289 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
20290 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
20291 	ucmdbuf->uscsi_buflen = buflen;
20292 	ucmdbuf->uscsi_timeout = sd_io_time;
20293 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
20294 	ucmdbuf->uscsi_rqlen = rqbuflen;
20295 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
20296 
20297 	status = sd_send_scsi_cmd(SD_GET_DEV(un), ucmdbuf, UIO_SYSSPACE,
20298 	    UIO_SYSSPACE, UIO_SYSSPACE, SD_PATH_STANDARD);
20299 
20300 	switch (status) {
20301 	case 0:
20302 		break;  /* Success! */
20303 	case EIO:
20304 		switch (ucmdbuf->uscsi_status) {
20305 		case STATUS_RESERVATION_CONFLICT:
20306 			status = EACCES;
20307 			break;
20308 		default:
20309 			break;
20310 		}
20311 		break;
20312 	default:
20313 		break;
20314 	}
20315 
20316 	if (status == 0) {
20317 		SD_DUMP_MEMORY(un, SD_LOG_IO,
20318 		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
20319 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
20320 	}
20321 
20322 	SD_TRACE(SD_LOG_IO, un,
20323 	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
20324 
20325 	return (status);
20326 }
20327 
20328 
20329 /*
20330  *    Function: sd_send_scsi_MODE_SENSE
20331  *
20332  * Description: Utility function for issuing a scsi MODE SENSE command.
20333  *		Note: This routine uses a consistent implementation for Group0,
20334  *		Group1, and Group2 commands across all platforms. ATAPI devices
20335  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20336  *
20337  *   Arguments: un - pointer to the softstate struct for the target.
20338  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20339  *			  CDB_GROUP[1|2] (10 byte).
20340  *		bufaddr - buffer for page data retrieved from the target.
20341  *		buflen - size of page to be retrieved.
20342  *		page_code - page code of data to be retrieved from the target.
20343  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20344  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20345  *			to use the USCSI "direct" chain and bypass the normal
20346  *			command waitq.
20347  *
20348  * Return Code: 0   - Success
20349  *		errno return code from sd_send_scsi_cmd()
20350  *
20351  *     Context: Can sleep. Does not return until command is completed.
20352  */
20353 
20354 static int
20355 sd_send_scsi_MODE_SENSE(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
20356 	size_t buflen,  uchar_t page_code, int path_flag)
20357 {
20358 	struct	scsi_extended_sense	sense_buf;
20359 	union scsi_cdb		cdb;
20360 	struct uscsi_cmd	ucmd_buf;
20361 	int			status;
20362 
20363 	ASSERT(un != NULL);
20364 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20365 	ASSERT(bufaddr != NULL);
20366 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20367 	    (cdbsize == CDB_GROUP2));
20368 
20369 	SD_TRACE(SD_LOG_IO, un,
20370 	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
20371 
20372 	bzero(&cdb, sizeof (cdb));
20373 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20374 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20375 	bzero(bufaddr, buflen);
20376 
20377 	if (cdbsize == CDB_GROUP0) {
20378 		cdb.scc_cmd = SCMD_MODE_SENSE;
20379 		cdb.cdb_opaque[2] = page_code;
20380 		FORMG0COUNT(&cdb, buflen);
20381 	} else {
20382 		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
20383 		cdb.cdb_opaque[2] = page_code;
20384 		FORMG1COUNT(&cdb, buflen);
20385 	}
20386 
20387 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20388 
20389 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20390 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20391 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20392 	ucmd_buf.uscsi_buflen	= buflen;
20393 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20394 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20395 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20396 	ucmd_buf.uscsi_timeout	= 60;
20397 
20398 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20399 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20400 
20401 	switch (status) {
20402 	case 0:
20403 		break;	/* Success! */
20404 	case EIO:
20405 		switch (ucmd_buf.uscsi_status) {
20406 		case STATUS_RESERVATION_CONFLICT:
20407 			status = EACCES;
20408 			break;
20409 		default:
20410 			break;
20411 		}
20412 		break;
20413 	default:
20414 		break;
20415 	}
20416 
20417 	if (status == 0) {
20418 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
20419 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20420 	}
20421 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
20422 
20423 	return (status);
20424 }
20425 
20426 
20427 /*
20428  *    Function: sd_send_scsi_MODE_SELECT
20429  *
20430  * Description: Utility function for issuing a scsi MODE SELECT command.
20431  *		Note: This routine uses a consistent implementation for Group0,
20432  *		Group1, and Group2 commands across all platforms. ATAPI devices
20433  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
20434  *
20435  *   Arguments: un - pointer to the softstate struct for the target.
20436  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
20437  *			  CDB_GROUP[1|2] (10 byte).
20438  *		bufaddr - buffer for page data retrieved from the target.
20439  *		buflen - size of page to be retrieved.
20440  *		save_page - boolean to determin if SP bit should be set.
20441  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20442  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20443  *			to use the USCSI "direct" chain and bypass the normal
20444  *			command waitq.
20445  *
20446  * Return Code: 0   - Success
20447  *		errno return code from sd_send_scsi_cmd()
20448  *
20449  *     Context: Can sleep. Does not return until command is completed.
20450  */
20451 
20452 static int
20453 sd_send_scsi_MODE_SELECT(struct sd_lun *un, int cdbsize, uchar_t *bufaddr,
20454 	size_t buflen,  uchar_t save_page, int path_flag)
20455 {
20456 	struct	scsi_extended_sense	sense_buf;
20457 	union scsi_cdb		cdb;
20458 	struct uscsi_cmd	ucmd_buf;
20459 	int			status;
20460 
20461 	ASSERT(un != NULL);
20462 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20463 	ASSERT(bufaddr != NULL);
20464 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
20465 	    (cdbsize == CDB_GROUP2));
20466 
20467 	SD_TRACE(SD_LOG_IO, un,
20468 	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
20469 
20470 	bzero(&cdb, sizeof (cdb));
20471 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20472 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20473 
20474 	/* Set the PF bit for many third party drives */
20475 	cdb.cdb_opaque[1] = 0x10;
20476 
20477 	/* Set the savepage(SP) bit if given */
20478 	if (save_page == SD_SAVE_PAGE) {
20479 		cdb.cdb_opaque[1] |= 0x01;
20480 	}
20481 
20482 	if (cdbsize == CDB_GROUP0) {
20483 		cdb.scc_cmd = SCMD_MODE_SELECT;
20484 		FORMG0COUNT(&cdb, buflen);
20485 	} else {
20486 		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
20487 		FORMG1COUNT(&cdb, buflen);
20488 	}
20489 
20490 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20491 
20492 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20493 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20494 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20495 	ucmd_buf.uscsi_buflen	= buflen;
20496 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20497 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20498 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20499 	ucmd_buf.uscsi_timeout	= 60;
20500 
20501 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20502 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20503 
20504 	switch (status) {
20505 	case 0:
20506 		break;	/* Success! */
20507 	case EIO:
20508 		switch (ucmd_buf.uscsi_status) {
20509 		case STATUS_RESERVATION_CONFLICT:
20510 			status = EACCES;
20511 			break;
20512 		default:
20513 			break;
20514 		}
20515 		break;
20516 	default:
20517 		break;
20518 	}
20519 
20520 	if (status == 0) {
20521 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
20522 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20523 	}
20524 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
20525 
20526 	return (status);
20527 }
20528 
20529 
20530 /*
20531  *    Function: sd_send_scsi_RDWR
20532  *
20533  * Description: Issue a scsi READ or WRITE command with the given parameters.
20534  *
20535  *   Arguments: un:      Pointer to the sd_lun struct for the target.
20536  *		cmd:	 SCMD_READ or SCMD_WRITE
20537  *		bufaddr: Address of caller's buffer to receive the RDWR data
20538  *		buflen:  Length of caller's buffer receive the RDWR data.
20539  *		start_block: Block number for the start of the RDWR operation.
20540  *			 (Assumes target-native block size.)
20541  *		residp:  Pointer to variable to receive the redisual of the
20542  *			 RDWR operation (may be NULL of no residual requested).
20543  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20544  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20545  *			to use the USCSI "direct" chain and bypass the normal
20546  *			command waitq.
20547  *
20548  * Return Code: 0   - Success
20549  *		errno return code from sd_send_scsi_cmd()
20550  *
20551  *     Context: Can sleep. Does not return until command is completed.
20552  */
20553 
20554 static int
20555 sd_send_scsi_RDWR(struct sd_lun *un, uchar_t cmd, void *bufaddr,
20556 	size_t buflen, daddr_t start_block, int path_flag)
20557 {
20558 	struct	scsi_extended_sense	sense_buf;
20559 	union scsi_cdb		cdb;
20560 	struct uscsi_cmd	ucmd_buf;
20561 	uint32_t		block_count;
20562 	int			status;
20563 	int			cdbsize;
20564 	uchar_t			flag;
20565 
20566 	ASSERT(un != NULL);
20567 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20568 	ASSERT(bufaddr != NULL);
20569 	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
20570 
20571 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
20572 
20573 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
20574 		return (EINVAL);
20575 	}
20576 
20577 	mutex_enter(SD_MUTEX(un));
20578 	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
20579 	mutex_exit(SD_MUTEX(un));
20580 
20581 	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
20582 
20583 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
20584 	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
20585 	    bufaddr, buflen, start_block, block_count);
20586 
20587 	bzero(&cdb, sizeof (cdb));
20588 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20589 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20590 
20591 	/* Compute CDB size to use */
20592 	if (start_block > 0xffffffff)
20593 		cdbsize = CDB_GROUP4;
20594 	else if ((start_block & 0xFFE00000) ||
20595 	    (un->un_f_cfg_is_atapi == TRUE))
20596 		cdbsize = CDB_GROUP1;
20597 	else
20598 		cdbsize = CDB_GROUP0;
20599 
20600 	switch (cdbsize) {
20601 	case CDB_GROUP0:	/* 6-byte CDBs */
20602 		cdb.scc_cmd = cmd;
20603 		FORMG0ADDR(&cdb, start_block);
20604 		FORMG0COUNT(&cdb, block_count);
20605 		break;
20606 	case CDB_GROUP1:	/* 10-byte CDBs */
20607 		cdb.scc_cmd = cmd | SCMD_GROUP1;
20608 		FORMG1ADDR(&cdb, start_block);
20609 		FORMG1COUNT(&cdb, block_count);
20610 		break;
20611 	case CDB_GROUP4:	/* 16-byte CDBs */
20612 		cdb.scc_cmd = cmd | SCMD_GROUP4;
20613 		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
20614 		FORMG4COUNT(&cdb, block_count);
20615 		break;
20616 	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
20617 	default:
20618 		/* All others reserved */
20619 		return (EINVAL);
20620 	}
20621 
20622 	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
20623 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
20624 
20625 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20626 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
20627 	ucmd_buf.uscsi_bufaddr	= bufaddr;
20628 	ucmd_buf.uscsi_buflen	= buflen;
20629 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20630 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20631 	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
20632 	ucmd_buf.uscsi_timeout	= 60;
20633 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20634 				UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20635 	switch (status) {
20636 	case 0:
20637 		break;	/* Success! */
20638 	case EIO:
20639 		switch (ucmd_buf.uscsi_status) {
20640 		case STATUS_RESERVATION_CONFLICT:
20641 			status = EACCES;
20642 			break;
20643 		default:
20644 			break;
20645 		}
20646 		break;
20647 	default:
20648 		break;
20649 	}
20650 
20651 	if (status == 0) {
20652 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
20653 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20654 	}
20655 
20656 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
20657 
20658 	return (status);
20659 }
20660 
20661 
20662 /*
20663  *    Function: sd_send_scsi_LOG_SENSE
20664  *
20665  * Description: Issue a scsi LOG_SENSE command with the given parameters.
20666  *
20667  *   Arguments: un:      Pointer to the sd_lun struct for the target.
20668  *
20669  * Return Code: 0   - Success
20670  *		errno return code from sd_send_scsi_cmd()
20671  *
20672  *     Context: Can sleep. Does not return until command is completed.
20673  */
20674 
20675 static int
20676 sd_send_scsi_LOG_SENSE(struct sd_lun *un, uchar_t *bufaddr, uint16_t buflen,
20677 	uchar_t page_code, uchar_t page_control, uint16_t param_ptr,
20678 	int path_flag)
20679 
20680 {
20681 	struct	scsi_extended_sense	sense_buf;
20682 	union scsi_cdb		cdb;
20683 	struct uscsi_cmd	ucmd_buf;
20684 	int			status;
20685 
20686 	ASSERT(un != NULL);
20687 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20688 
20689 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
20690 
20691 	bzero(&cdb, sizeof (cdb));
20692 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20693 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20694 
20695 	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
20696 	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
20697 	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
20698 	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
20699 	FORMG1COUNT(&cdb, buflen);
20700 
20701 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20702 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20703 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20704 	ucmd_buf.uscsi_buflen	= buflen;
20705 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20706 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20707 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20708 	ucmd_buf.uscsi_timeout	= 60;
20709 
20710 	status = sd_send_scsi_cmd(SD_GET_DEV(un), &ucmd_buf, UIO_SYSSPACE,
20711 	    UIO_SYSSPACE, UIO_SYSSPACE, path_flag);
20712 
20713 	switch (status) {
20714 	case 0:
20715 		break;
20716 	case EIO:
20717 		switch (ucmd_buf.uscsi_status) {
20718 		case STATUS_RESERVATION_CONFLICT:
20719 			status = EACCES;
20720 			break;
20721 		case STATUS_CHECK:
20722 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20723 			    (sense_buf.es_key == KEY_ILLEGAL_REQUEST) &&
20724 			    (sense_buf.es_add_code == 0x24)) {
20725 				/*
20726 				 * ASC 0x24: INVALID FIELD IN CDB
20727 				 */
20728 				switch (page_code) {
20729 				case START_STOP_CYCLE_PAGE:
20730 					/*
20731 					 * The start stop cycle counter is
20732 					 * implemented as page 0x31 in earlier
20733 					 * generation disks. In new generation
20734 					 * disks the start stop cycle counter is
20735 					 * implemented as page 0xE. To properly
20736 					 * handle this case if an attempt for
20737 					 * log page 0xE is made and fails we
20738 					 * will try again using page 0x31.
20739 					 *
20740 					 * Network storage BU committed to
20741 					 * maintain the page 0x31 for this
20742 					 * purpose and will not have any other
20743 					 * page implemented with page code 0x31
20744 					 * until all disks transition to the
20745 					 * standard page.
20746 					 */
20747 					mutex_enter(SD_MUTEX(un));
20748 					un->un_start_stop_cycle_page =
20749 					    START_STOP_CYCLE_VU_PAGE;
20750 					cdb.cdb_opaque[2] =
20751 					    (char)(page_control << 6) |
20752 					    un->un_start_stop_cycle_page;
20753 					mutex_exit(SD_MUTEX(un));
20754 					status = sd_send_scsi_cmd(
20755 					    SD_GET_DEV(un), &ucmd_buf,
20756 					    UIO_SYSSPACE, UIO_SYSSPACE,
20757 					    UIO_SYSSPACE, path_flag);
20758 
20759 					break;
20760 				case TEMPERATURE_PAGE:
20761 					status = ENOTTY;
20762 					break;
20763 				default:
20764 					break;
20765 				}
20766 			}
20767 			break;
20768 		default:
20769 			break;
20770 		}
20771 		break;
20772 	default:
20773 		break;
20774 	}
20775 
20776 	if (status == 0) {
20777 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
20778 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
20779 	}
20780 
20781 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
20782 
20783 	return (status);
20784 }
20785 
20786 
20787 /*
20788  *    Function: sdioctl
20789  *
20790  * Description: Driver's ioctl(9e) entry point function.
20791  *
20792  *   Arguments: dev     - device number
20793  *		cmd     - ioctl operation to be performed
20794  *		arg     - user argument, contains data to be set or reference
20795  *			  parameter for get
20796  *		flag    - bit flag, indicating open settings, 32/64 bit type
20797  *		cred_p  - user credential pointer
20798  *		rval_p  - calling process return value (OPT)
20799  *
20800  * Return Code: EINVAL
20801  *		ENOTTY
20802  *		ENXIO
20803  *		EIO
20804  *		EFAULT
20805  *		ENOTSUP
20806  *		EPERM
20807  *
20808  *     Context: Called from the device switch at normal priority.
20809  */
20810 
20811 static int
20812 sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
20813 {
20814 	struct sd_lun	*un = NULL;
20815 	int		geom_validated = FALSE;
20816 	int		err = 0;
20817 	int		i = 0;
20818 	cred_t		*cr;
20819 
20820 	/*
20821 	 * All device accesses go thru sdstrategy where we check on suspend
20822 	 * status
20823 	 */
20824 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
20825 		return (ENXIO);
20826 	}
20827 
20828 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20829 
20830 	/*
20831 	 * Moved this wait from sd_uscsi_strategy to here for
20832 	 * reasons of deadlock prevention. Internal driver commands,
20833 	 * specifically those to change a devices power level, result
20834 	 * in a call to sd_uscsi_strategy.
20835 	 */
20836 	mutex_enter(SD_MUTEX(un));
20837 	while ((un->un_state == SD_STATE_SUSPENDED) ||
20838 	    (un->un_state == SD_STATE_PM_CHANGING)) {
20839 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
20840 	}
20841 	/*
20842 	 * Twiddling the counter here protects commands from now
20843 	 * through to the top of sd_uscsi_strategy. Without the
20844 	 * counter inc. a power down, for example, could get in
20845 	 * after the above check for state is made and before
20846 	 * execution gets to the top of sd_uscsi_strategy.
20847 	 * That would cause problems.
20848 	 */
20849 	un->un_ncmds_in_driver++;
20850 
20851 	if ((un->un_f_geometry_is_valid == FALSE) &&
20852 	    (flag & (FNDELAY | FNONBLOCK))) {
20853 		switch (cmd) {
20854 		case CDROMPAUSE:
20855 		case CDROMRESUME:
20856 		case CDROMPLAYMSF:
20857 		case CDROMPLAYTRKIND:
20858 		case CDROMREADTOCHDR:
20859 		case CDROMREADTOCENTRY:
20860 		case CDROMSTOP:
20861 		case CDROMSTART:
20862 		case CDROMVOLCTRL:
20863 		case CDROMSUBCHNL:
20864 		case CDROMREADMODE2:
20865 		case CDROMREADMODE1:
20866 		case CDROMREADOFFSET:
20867 		case CDROMSBLKMODE:
20868 		case CDROMGBLKMODE:
20869 		case CDROMGDRVSPEED:
20870 		case CDROMSDRVSPEED:
20871 		case CDROMCDDA:
20872 		case CDROMCDXA:
20873 		case CDROMSUBCODE:
20874 			if (!ISCD(un)) {
20875 				un->un_ncmds_in_driver--;
20876 				ASSERT(un->un_ncmds_in_driver >= 0);
20877 				mutex_exit(SD_MUTEX(un));
20878 				return (ENOTTY);
20879 			}
20880 			break;
20881 		case FDEJECT:
20882 		case DKIOCEJECT:
20883 		case CDROMEJECT:
20884 			if (!un->un_f_eject_media_supported) {
20885 				un->un_ncmds_in_driver--;
20886 				ASSERT(un->un_ncmds_in_driver >= 0);
20887 				mutex_exit(SD_MUTEX(un));
20888 				return (ENOTTY);
20889 			}
20890 			break;
20891 		case DKIOCSVTOC:
20892 		case DKIOCSETEFI:
20893 		case DKIOCSMBOOT:
20894 		case DKIOCFLUSHWRITECACHE:
20895 			mutex_exit(SD_MUTEX(un));
20896 			err = sd_send_scsi_TEST_UNIT_READY(un, 0);
20897 			if (err != 0) {
20898 				mutex_enter(SD_MUTEX(un));
20899 				un->un_ncmds_in_driver--;
20900 				ASSERT(un->un_ncmds_in_driver >= 0);
20901 				mutex_exit(SD_MUTEX(un));
20902 				return (EIO);
20903 			}
20904 			mutex_enter(SD_MUTEX(un));
20905 			/* FALLTHROUGH */
20906 		case DKIOCREMOVABLE:
20907 		case DKIOCHOTPLUGGABLE:
20908 		case DKIOCINFO:
20909 		case DKIOCGMEDIAINFO:
20910 		case MHIOCENFAILFAST:
20911 		case MHIOCSTATUS:
20912 		case MHIOCTKOWN:
20913 		case MHIOCRELEASE:
20914 		case MHIOCGRP_INKEYS:
20915 		case MHIOCGRP_INRESV:
20916 		case MHIOCGRP_REGISTER:
20917 		case MHIOCGRP_RESERVE:
20918 		case MHIOCGRP_PREEMPTANDABORT:
20919 		case MHIOCGRP_REGISTERANDIGNOREKEY:
20920 		case CDROMCLOSETRAY:
20921 		case USCSICMD:
20922 			goto skip_ready_valid;
20923 		default:
20924 			break;
20925 		}
20926 
20927 		mutex_exit(SD_MUTEX(un));
20928 		err = sd_ready_and_valid(un);
20929 		mutex_enter(SD_MUTEX(un));
20930 		if (err == SD_READY_NOT_VALID) {
20931 			switch (cmd) {
20932 			case DKIOCGAPART:
20933 			case DKIOCGGEOM:
20934 			case DKIOCSGEOM:
20935 			case DKIOCGVTOC:
20936 			case DKIOCSVTOC:
20937 			case DKIOCSAPART:
20938 			case DKIOCG_PHYGEOM:
20939 			case DKIOCG_VIRTGEOM:
20940 				err = ENOTSUP;
20941 				un->un_ncmds_in_driver--;
20942 				ASSERT(un->un_ncmds_in_driver >= 0);
20943 				mutex_exit(SD_MUTEX(un));
20944 				return (err);
20945 			}
20946 		}
20947 		if (err != SD_READY_VALID) {
20948 			switch (cmd) {
20949 			case DKIOCSTATE:
20950 			case CDROMGDRVSPEED:
20951 			case CDROMSDRVSPEED:
20952 			case FDEJECT:	/* for eject command */
20953 			case DKIOCEJECT:
20954 			case CDROMEJECT:
20955 			case DKIOCGETEFI:
20956 			case DKIOCSGEOM:
20957 			case DKIOCREMOVABLE:
20958 			case DKIOCHOTPLUGGABLE:
20959 			case DKIOCSAPART:
20960 			case DKIOCSETEFI:
20961 				break;
20962 			default:
20963 				if (un->un_f_has_removable_media) {
20964 					err = ENXIO;
20965 				} else {
20966 					/* Do not map EACCES to EIO */
20967 					if (err != EACCES)
20968 						err = EIO;
20969 				}
20970 				un->un_ncmds_in_driver--;
20971 				ASSERT(un->un_ncmds_in_driver >= 0);
20972 				mutex_exit(SD_MUTEX(un));
20973 				return (err);
20974 			}
20975 		}
20976 		geom_validated = TRUE;
20977 	}
20978 	if ((un->un_f_geometry_is_valid == TRUE) &&
20979 	    (un->un_solaris_size > 0)) {
20980 		/*
20981 		 * the "geometry_is_valid" flag could be true if we
20982 		 * have an fdisk table but no Solaris partition
20983 		 */
20984 		if (un->un_vtoc.v_sanity != VTOC_SANE) {
20985 			/* it is EFI, so return ENOTSUP for these */
20986 			switch (cmd) {
20987 			case DKIOCGAPART:
20988 			case DKIOCGGEOM:
20989 			case DKIOCGVTOC:
20990 			case DKIOCSVTOC:
20991 			case DKIOCSAPART:
20992 				err = ENOTSUP;
20993 				un->un_ncmds_in_driver--;
20994 				ASSERT(un->un_ncmds_in_driver >= 0);
20995 				mutex_exit(SD_MUTEX(un));
20996 				return (err);
20997 			}
20998 		}
20999 	}
21000 
21001 skip_ready_valid:
21002 	mutex_exit(SD_MUTEX(un));
21003 
21004 	switch (cmd) {
21005 	case DKIOCINFO:
21006 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
21007 		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
21008 		break;
21009 
21010 	case DKIOCGMEDIAINFO:
21011 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
21012 		err = sd_get_media_info(dev, (caddr_t)arg, flag);
21013 		break;
21014 
21015 	case DKIOCGGEOM:
21016 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGGEOM\n");
21017 		err = sd_dkio_get_geometry(dev, (caddr_t)arg, flag,
21018 		    geom_validated);
21019 		break;
21020 
21021 	case DKIOCSGEOM:
21022 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSGEOM\n");
21023 		err = sd_dkio_set_geometry(dev, (caddr_t)arg, flag);
21024 		break;
21025 
21026 	case DKIOCGAPART:
21027 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGAPART\n");
21028 		err = sd_dkio_get_partition(dev, (caddr_t)arg, flag,
21029 		    geom_validated);
21030 		break;
21031 
21032 	case DKIOCSAPART:
21033 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSAPART\n");
21034 		err = sd_dkio_set_partition(dev, (caddr_t)arg, flag);
21035 		break;
21036 
21037 	case DKIOCGVTOC:
21038 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGVTOC\n");
21039 		err = sd_dkio_get_vtoc(dev, (caddr_t)arg, flag,
21040 		    geom_validated);
21041 		break;
21042 
21043 	case DKIOCGETEFI:
21044 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGETEFI\n");
21045 		err = sd_dkio_get_efi(dev, (caddr_t)arg, flag);
21046 		break;
21047 
21048 	case DKIOCPARTITION:
21049 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTITION\n");
21050 		err = sd_dkio_partition(dev, (caddr_t)arg, flag);
21051 		break;
21052 
21053 	case DKIOCSVTOC:
21054 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSVTOC\n");
21055 		err = sd_dkio_set_vtoc(dev, (caddr_t)arg, flag);
21056 		break;
21057 
21058 	case DKIOCSETEFI:
21059 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSETEFI\n");
21060 		err = sd_dkio_set_efi(dev, (caddr_t)arg, flag);
21061 		break;
21062 
21063 	case DKIOCGMBOOT:
21064 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMBOOT\n");
21065 		err = sd_dkio_get_mboot(dev, (caddr_t)arg, flag);
21066 		break;
21067 
21068 	case DKIOCSMBOOT:
21069 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSMBOOT\n");
21070 		err = sd_dkio_set_mboot(dev, (caddr_t)arg, flag);
21071 		break;
21072 
21073 	case DKIOCLOCK:
21074 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
21075 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
21076 		    SD_PATH_STANDARD);
21077 		break;
21078 
21079 	case DKIOCUNLOCK:
21080 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
21081 		err = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
21082 		    SD_PATH_STANDARD);
21083 		break;
21084 
21085 	case DKIOCSTATE: {
21086 		enum dkio_state		state;
21087 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
21088 
21089 		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
21090 			err = EFAULT;
21091 		} else {
21092 			err = sd_check_media(dev, state);
21093 			if (err == 0) {
21094 				if (ddi_copyout(&un->un_mediastate, (void *)arg,
21095 				    sizeof (int), flag) != 0)
21096 					err = EFAULT;
21097 			}
21098 		}
21099 		break;
21100 	}
21101 
21102 	case DKIOCREMOVABLE:
21103 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
21104 		/*
21105 		 * At present, vold only does automount for removable-media
21106 		 * devices, in order not to break current applications, we
21107 		 * still let hopluggable devices pretend to be removable media
21108 		 * devices for vold. In the near future, once vold is EOL'ed,
21109 		 * we should remove this workaround.
21110 		 */
21111 		if (un->un_f_has_removable_media || un->un_f_is_hotpluggable) {
21112 			i = 1;
21113 		} else {
21114 			i = 0;
21115 		}
21116 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21117 			err = EFAULT;
21118 		} else {
21119 			err = 0;
21120 		}
21121 		break;
21122 
21123 	case DKIOCHOTPLUGGABLE:
21124 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCHOTPLUGGABLE\n");
21125 		if (un->un_f_is_hotpluggable) {
21126 			i = 1;
21127 		} else {
21128 			i = 0;
21129 		}
21130 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
21131 			err = EFAULT;
21132 		} else {
21133 			err = 0;
21134 		}
21135 		break;
21136 
21137 	case DKIOCGTEMPERATURE:
21138 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
21139 		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
21140 		break;
21141 
21142 	case MHIOCENFAILFAST:
21143 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
21144 		if ((err = drv_priv(cred_p)) == 0) {
21145 			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
21146 		}
21147 		break;
21148 
21149 	case MHIOCTKOWN:
21150 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
21151 		if ((err = drv_priv(cred_p)) == 0) {
21152 			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
21153 		}
21154 		break;
21155 
21156 	case MHIOCRELEASE:
21157 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
21158 		if ((err = drv_priv(cred_p)) == 0) {
21159 			err = sd_mhdioc_release(dev);
21160 		}
21161 		break;
21162 
21163 	case MHIOCSTATUS:
21164 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
21165 		if ((err = drv_priv(cred_p)) == 0) {
21166 			switch (sd_send_scsi_TEST_UNIT_READY(un, 0)) {
21167 			case 0:
21168 				err = 0;
21169 				break;
21170 			case EACCES:
21171 				*rval_p = 1;
21172 				err = 0;
21173 				break;
21174 			default:
21175 				err = EIO;
21176 				break;
21177 			}
21178 		}
21179 		break;
21180 
21181 	case MHIOCQRESERVE:
21182 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
21183 		if ((err = drv_priv(cred_p)) == 0) {
21184 			err = sd_reserve_release(dev, SD_RESERVE);
21185 		}
21186 		break;
21187 
21188 	case MHIOCREREGISTERDEVID:
21189 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
21190 		if (drv_priv(cred_p) == EPERM) {
21191 			err = EPERM;
21192 		} else if (!un->un_f_devid_supported) {
21193 			err = ENOTTY;
21194 		} else {
21195 			err = sd_mhdioc_register_devid(dev);
21196 		}
21197 		break;
21198 
21199 	case MHIOCGRP_INKEYS:
21200 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
21201 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21202 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21203 				err = ENOTSUP;
21204 			} else {
21205 				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
21206 				    flag);
21207 			}
21208 		}
21209 		break;
21210 
21211 	case MHIOCGRP_INRESV:
21212 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
21213 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
21214 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21215 				err = ENOTSUP;
21216 			} else {
21217 				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
21218 			}
21219 		}
21220 		break;
21221 
21222 	case MHIOCGRP_REGISTER:
21223 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
21224 		if ((err = drv_priv(cred_p)) != EPERM) {
21225 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21226 				err = ENOTSUP;
21227 			} else if (arg != NULL) {
21228 				mhioc_register_t reg;
21229 				if (ddi_copyin((void *)arg, &reg,
21230 				    sizeof (mhioc_register_t), flag) != 0) {
21231 					err = EFAULT;
21232 				} else {
21233 					err =
21234 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21235 					    un, SD_SCSI3_REGISTER,
21236 					    (uchar_t *)&reg);
21237 				}
21238 			}
21239 		}
21240 		break;
21241 
21242 	case MHIOCGRP_RESERVE:
21243 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
21244 		if ((err = drv_priv(cred_p)) != EPERM) {
21245 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21246 				err = ENOTSUP;
21247 			} else if (arg != NULL) {
21248 				mhioc_resv_desc_t resv_desc;
21249 				if (ddi_copyin((void *)arg, &resv_desc,
21250 				    sizeof (mhioc_resv_desc_t), flag) != 0) {
21251 					err = EFAULT;
21252 				} else {
21253 					err =
21254 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21255 					    un, SD_SCSI3_RESERVE,
21256 					    (uchar_t *)&resv_desc);
21257 				}
21258 			}
21259 		}
21260 		break;
21261 
21262 	case MHIOCGRP_PREEMPTANDABORT:
21263 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
21264 		if ((err = drv_priv(cred_p)) != EPERM) {
21265 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21266 				err = ENOTSUP;
21267 			} else if (arg != NULL) {
21268 				mhioc_preemptandabort_t preempt_abort;
21269 				if (ddi_copyin((void *)arg, &preempt_abort,
21270 				    sizeof (mhioc_preemptandabort_t),
21271 				    flag) != 0) {
21272 					err = EFAULT;
21273 				} else {
21274 					err =
21275 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21276 					    un, SD_SCSI3_PREEMPTANDABORT,
21277 					    (uchar_t *)&preempt_abort);
21278 				}
21279 			}
21280 		}
21281 		break;
21282 
21283 	case MHIOCGRP_REGISTERANDIGNOREKEY:
21284 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
21285 		if ((err = drv_priv(cred_p)) != EPERM) {
21286 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
21287 				err = ENOTSUP;
21288 			} else if (arg != NULL) {
21289 				mhioc_registerandignorekey_t r_and_i;
21290 				if (ddi_copyin((void *)arg, (void *)&r_and_i,
21291 				    sizeof (mhioc_registerandignorekey_t),
21292 				    flag) != 0) {
21293 					err = EFAULT;
21294 				} else {
21295 					err =
21296 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
21297 					    un, SD_SCSI3_REGISTERANDIGNOREKEY,
21298 					    (uchar_t *)&r_and_i);
21299 				}
21300 			}
21301 		}
21302 		break;
21303 
21304 	case USCSICMD:
21305 		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
21306 		cr = ddi_get_cred();
21307 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
21308 			err = EPERM;
21309 		} else {
21310 			err = sd_uscsi_ioctl(dev, (caddr_t)arg, flag);
21311 		}
21312 		break;
21313 
21314 	case CDROMPAUSE:
21315 	case CDROMRESUME:
21316 		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
21317 		if (!ISCD(un)) {
21318 			err = ENOTTY;
21319 		} else {
21320 			err = sr_pause_resume(dev, cmd);
21321 		}
21322 		break;
21323 
21324 	case CDROMPLAYMSF:
21325 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
21326 		if (!ISCD(un)) {
21327 			err = ENOTTY;
21328 		} else {
21329 			err = sr_play_msf(dev, (caddr_t)arg, flag);
21330 		}
21331 		break;
21332 
21333 	case CDROMPLAYTRKIND:
21334 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
21335 #if defined(__i386) || defined(__amd64)
21336 		/*
21337 		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
21338 		 */
21339 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21340 #else
21341 		if (!ISCD(un)) {
21342 #endif
21343 			err = ENOTTY;
21344 		} else {
21345 			err = sr_play_trkind(dev, (caddr_t)arg, flag);
21346 		}
21347 		break;
21348 
21349 	case CDROMREADTOCHDR:
21350 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
21351 		if (!ISCD(un)) {
21352 			err = ENOTTY;
21353 		} else {
21354 			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
21355 		}
21356 		break;
21357 
21358 	case CDROMREADTOCENTRY:
21359 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
21360 		if (!ISCD(un)) {
21361 			err = ENOTTY;
21362 		} else {
21363 			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
21364 		}
21365 		break;
21366 
21367 	case CDROMSTOP:
21368 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
21369 		if (!ISCD(un)) {
21370 			err = ENOTTY;
21371 		} else {
21372 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_STOP,
21373 			    SD_PATH_STANDARD);
21374 		}
21375 		break;
21376 
21377 	case CDROMSTART:
21378 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
21379 		if (!ISCD(un)) {
21380 			err = ENOTTY;
21381 		} else {
21382 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_START,
21383 			    SD_PATH_STANDARD);
21384 		}
21385 		break;
21386 
21387 	case CDROMCLOSETRAY:
21388 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
21389 		if (!ISCD(un)) {
21390 			err = ENOTTY;
21391 		} else {
21392 			err = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_CLOSE,
21393 			    SD_PATH_STANDARD);
21394 		}
21395 		break;
21396 
21397 	case FDEJECT:	/* for eject command */
21398 	case DKIOCEJECT:
21399 	case CDROMEJECT:
21400 		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
21401 		if (!un->un_f_eject_media_supported) {
21402 			err = ENOTTY;
21403 		} else {
21404 			err = sr_eject(dev);
21405 		}
21406 		break;
21407 
21408 	case CDROMVOLCTRL:
21409 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
21410 		if (!ISCD(un)) {
21411 			err = ENOTTY;
21412 		} else {
21413 			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
21414 		}
21415 		break;
21416 
21417 	case CDROMSUBCHNL:
21418 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
21419 		if (!ISCD(un)) {
21420 			err = ENOTTY;
21421 		} else {
21422 			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
21423 		}
21424 		break;
21425 
21426 	case CDROMREADMODE2:
21427 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
21428 		if (!ISCD(un)) {
21429 			err = ENOTTY;
21430 		} else if (un->un_f_cfg_is_atapi == TRUE) {
21431 			/*
21432 			 * If the drive supports READ CD, use that instead of
21433 			 * switching the LBA size via a MODE SELECT
21434 			 * Block Descriptor
21435 			 */
21436 			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
21437 		} else {
21438 			err = sr_read_mode2(dev, (caddr_t)arg, flag);
21439 		}
21440 		break;
21441 
21442 	case CDROMREADMODE1:
21443 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
21444 		if (!ISCD(un)) {
21445 			err = ENOTTY;
21446 		} else {
21447 			err = sr_read_mode1(dev, (caddr_t)arg, flag);
21448 		}
21449 		break;
21450 
21451 	case CDROMREADOFFSET:
21452 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
21453 		if (!ISCD(un)) {
21454 			err = ENOTTY;
21455 		} else {
21456 			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
21457 			    flag);
21458 		}
21459 		break;
21460 
21461 	case CDROMSBLKMODE:
21462 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
21463 		/*
21464 		 * There is no means of changing block size in case of atapi
21465 		 * drives, thus return ENOTTY if drive type is atapi
21466 		 */
21467 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
21468 			err = ENOTTY;
21469 		} else if (un->un_f_mmc_cap == TRUE) {
21470 
21471 			/*
21472 			 * MMC Devices do not support changing the
21473 			 * logical block size
21474 			 *
21475 			 * Note: EINVAL is being returned instead of ENOTTY to
21476 			 * maintain consistancy with the original mmc
21477 			 * driver update.
21478 			 */
21479 			err = EINVAL;
21480 		} else {
21481 			mutex_enter(SD_MUTEX(un));
21482 			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
21483 			    (un->un_ncmds_in_transport > 0)) {
21484 				mutex_exit(SD_MUTEX(un));
21485 				err = EINVAL;
21486 			} else {
21487 				mutex_exit(SD_MUTEX(un));
21488 				err = sr_change_blkmode(dev, cmd, arg, flag);
21489 			}
21490 		}
21491 		break;
21492 
21493 	case CDROMGBLKMODE:
21494 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
21495 		if (!ISCD(un)) {
21496 			err = ENOTTY;
21497 		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
21498 		    (un->un_f_blockcount_is_valid != FALSE)) {
21499 			/*
21500 			 * Drive is an ATAPI drive so return target block
21501 			 * size for ATAPI drives since we cannot change the
21502 			 * blocksize on ATAPI drives. Used primarily to detect
21503 			 * if an ATAPI cdrom is present.
21504 			 */
21505 			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
21506 			    sizeof (int), flag) != 0) {
21507 				err = EFAULT;
21508 			} else {
21509 				err = 0;
21510 			}
21511 
21512 		} else {
21513 			/*
21514 			 * Drive supports changing block sizes via a Mode
21515 			 * Select.
21516 			 */
21517 			err = sr_change_blkmode(dev, cmd, arg, flag);
21518 		}
21519 		break;
21520 
21521 	case CDROMGDRVSPEED:
21522 	case CDROMSDRVSPEED:
21523 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
21524 		if (!ISCD(un)) {
21525 			err = ENOTTY;
21526 		} else if (un->un_f_mmc_cap == TRUE) {
21527 			/*
21528 			 * Note: In the future the driver implementation
21529 			 * for getting and
21530 			 * setting cd speed should entail:
21531 			 * 1) If non-mmc try the Toshiba mode page
21532 			 *    (sr_change_speed)
21533 			 * 2) If mmc but no support for Real Time Streaming try
21534 			 *    the SET CD SPEED (0xBB) command
21535 			 *   (sr_atapi_change_speed)
21536 			 * 3) If mmc and support for Real Time Streaming
21537 			 *    try the GET PERFORMANCE and SET STREAMING
21538 			 *    commands (not yet implemented, 4380808)
21539 			 */
21540 			/*
21541 			 * As per recent MMC spec, CD-ROM speed is variable
21542 			 * and changes with LBA. Since there is no such
21543 			 * things as drive speed now, fail this ioctl.
21544 			 *
21545 			 * Note: EINVAL is returned for consistancy of original
21546 			 * implementation which included support for getting
21547 			 * the drive speed of mmc devices but not setting
21548 			 * the drive speed. Thus EINVAL would be returned
21549 			 * if a set request was made for an mmc device.
21550 			 * We no longer support get or set speed for
21551 			 * mmc but need to remain consistant with regard
21552 			 * to the error code returned.
21553 			 */
21554 			err = EINVAL;
21555 		} else if (un->un_f_cfg_is_atapi == TRUE) {
21556 			err = sr_atapi_change_speed(dev, cmd, arg, flag);
21557 		} else {
21558 			err = sr_change_speed(dev, cmd, arg, flag);
21559 		}
21560 		break;
21561 
21562 	case CDROMCDDA:
21563 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
21564 		if (!ISCD(un)) {
21565 			err = ENOTTY;
21566 		} else {
21567 			err = sr_read_cdda(dev, (void *)arg, flag);
21568 		}
21569 		break;
21570 
21571 	case CDROMCDXA:
21572 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
21573 		if (!ISCD(un)) {
21574 			err = ENOTTY;
21575 		} else {
21576 			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
21577 		}
21578 		break;
21579 
21580 	case CDROMSUBCODE:
21581 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
21582 		if (!ISCD(un)) {
21583 			err = ENOTTY;
21584 		} else {
21585 			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
21586 		}
21587 		break;
21588 
21589 	case DKIOCPARTINFO: {
21590 		/*
21591 		 * Return parameters describing the selected disk slice.
21592 		 * Note: this ioctl is for the intel platform only
21593 		 */
21594 #if defined(__i386) || defined(__amd64)
21595 		int part;
21596 
21597 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTINFO\n");
21598 		part = SDPART(dev);
21599 
21600 		/* don't check un_solaris_size for pN */
21601 		if (part < P0_RAW_DISK && un->un_solaris_size == 0) {
21602 			err = EIO;
21603 		} else {
21604 			struct part_info p;
21605 
21606 			p.p_start = (daddr_t)un->un_offset[part];
21607 			p.p_length = (int)un->un_map[part].dkl_nblk;
21608 #ifdef _MULTI_DATAMODEL
21609 			switch (ddi_model_convert_from(flag & FMODELS)) {
21610 			case DDI_MODEL_ILP32:
21611 			{
21612 				struct part_info32 p32;
21613 
21614 				p32.p_start = (daddr32_t)p.p_start;
21615 				p32.p_length = p.p_length;
21616 				if (ddi_copyout(&p32, (void *)arg,
21617 				    sizeof (p32), flag))
21618 					err = EFAULT;
21619 				break;
21620 			}
21621 
21622 			case DDI_MODEL_NONE:
21623 			{
21624 				if (ddi_copyout(&p, (void *)arg, sizeof (p),
21625 				    flag))
21626 					err = EFAULT;
21627 				break;
21628 			}
21629 			}
21630 #else /* ! _MULTI_DATAMODEL */
21631 			if (ddi_copyout(&p, (void *)arg, sizeof (p), flag))
21632 				err = EFAULT;
21633 #endif /* _MULTI_DATAMODEL */
21634 		}
21635 #else
21636 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCPARTINFO\n");
21637 		err = ENOTTY;
21638 #endif
21639 		break;
21640 	}
21641 
21642 	case DKIOCG_PHYGEOM: {
21643 		/* Return the driver's notion of the media physical geometry */
21644 #if defined(__i386) || defined(__amd64)
21645 		struct dk_geom	disk_geom;
21646 		struct dk_geom	*dkgp = &disk_geom;
21647 
21648 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_PHYGEOM\n");
21649 		mutex_enter(SD_MUTEX(un));
21650 
21651 		if (un->un_g.dkg_nhead != 0 &&
21652 		    un->un_g.dkg_nsect != 0) {
21653 			/*
21654 			 * We succeeded in getting a geometry, but
21655 			 * right now it is being reported as just the
21656 			 * Solaris fdisk partition, just like for
21657 			 * DKIOCGGEOM. We need to change that to be
21658 			 * correct for the entire disk now.
21659 			 */
21660 			bcopy(&un->un_g, dkgp, sizeof (*dkgp));
21661 			dkgp->dkg_acyl = 0;
21662 			dkgp->dkg_ncyl = un->un_blockcount /
21663 			    (dkgp->dkg_nhead * dkgp->dkg_nsect);
21664 		} else {
21665 			bzero(dkgp, sizeof (struct dk_geom));
21666 			/*
21667 			 * This disk does not have a Solaris VTOC
21668 			 * so we must present a physical geometry
21669 			 * that will remain consistent regardless
21670 			 * of how the disk is used. This will ensure
21671 			 * that the geometry does not change regardless
21672 			 * of the fdisk partition type (ie. EFI, FAT32,
21673 			 * Solaris, etc).
21674 			 */
21675 			if (ISCD(un)) {
21676 				dkgp->dkg_nhead = un->un_pgeom.g_nhead;
21677 				dkgp->dkg_nsect = un->un_pgeom.g_nsect;
21678 				dkgp->dkg_ncyl = un->un_pgeom.g_ncyl;
21679 				dkgp->dkg_acyl = un->un_pgeom.g_acyl;
21680 			} else {
21681 				/*
21682 				 * Invalid un_blockcount can generate invalid
21683 				 * dk_geom and may result in division by zero
21684 				 * system failure. Should make sure blockcount
21685 				 * is valid before using it here.
21686 				 */
21687 				if (un->un_f_blockcount_is_valid == FALSE) {
21688 					mutex_exit(SD_MUTEX(un));
21689 					err = EIO;
21690 
21691 					break;
21692 				}
21693 				sd_convert_geometry(un->un_blockcount, dkgp);
21694 				dkgp->dkg_acyl = 0;
21695 				dkgp->dkg_ncyl = un->un_blockcount /
21696 				    (dkgp->dkg_nhead * dkgp->dkg_nsect);
21697 			}
21698 		}
21699 		dkgp->dkg_pcyl = dkgp->dkg_ncyl + dkgp->dkg_acyl;
21700 
21701 		if (ddi_copyout(dkgp, (void *)arg,
21702 		    sizeof (struct dk_geom), flag)) {
21703 			mutex_exit(SD_MUTEX(un));
21704 			err = EFAULT;
21705 		} else {
21706 			mutex_exit(SD_MUTEX(un));
21707 			err = 0;
21708 		}
21709 #else
21710 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_PHYGEOM\n");
21711 		err = ENOTTY;
21712 #endif
21713 		break;
21714 	}
21715 
21716 	case DKIOCG_VIRTGEOM: {
21717 		/* Return the driver's notion of the media's logical geometry */
21718 #if defined(__i386) || defined(__amd64)
21719 		struct dk_geom	disk_geom;
21720 		struct dk_geom	*dkgp = &disk_geom;
21721 
21722 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_VIRTGEOM\n");
21723 		mutex_enter(SD_MUTEX(un));
21724 		/*
21725 		 * If there is no HBA geometry available, or
21726 		 * if the HBA returned us something that doesn't
21727 		 * really fit into an Int 13/function 8 geometry
21728 		 * result, just fail the ioctl.  See PSARC 1998/313.
21729 		 */
21730 		if (un->un_lgeom.g_nhead == 0 ||
21731 		    un->un_lgeom.g_nsect == 0 ||
21732 		    un->un_lgeom.g_ncyl > 1024) {
21733 			mutex_exit(SD_MUTEX(un));
21734 			err = EINVAL;
21735 		} else {
21736 			dkgp->dkg_ncyl	= un->un_lgeom.g_ncyl;
21737 			dkgp->dkg_acyl	= un->un_lgeom.g_acyl;
21738 			dkgp->dkg_pcyl	= dkgp->dkg_ncyl + dkgp->dkg_acyl;
21739 			dkgp->dkg_nhead	= un->un_lgeom.g_nhead;
21740 			dkgp->dkg_nsect	= un->un_lgeom.g_nsect;
21741 
21742 			if (ddi_copyout(dkgp, (void *)arg,
21743 			    sizeof (struct dk_geom), flag)) {
21744 				mutex_exit(SD_MUTEX(un));
21745 				err = EFAULT;
21746 			} else {
21747 				mutex_exit(SD_MUTEX(un));
21748 				err = 0;
21749 			}
21750 		}
21751 #else
21752 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCG_VIRTGEOM\n");
21753 		err = ENOTTY;
21754 #endif
21755 		break;
21756 	}
21757 #ifdef SDDEBUG
21758 /* RESET/ABORTS testing ioctls */
21759 	case DKIOCRESET: {
21760 		int	reset_level;
21761 
21762 		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
21763 			err = EFAULT;
21764 		} else {
21765 			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
21766 			    "reset_level = 0x%lx\n", reset_level);
21767 			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
21768 				err = 0;
21769 			} else {
21770 				err = EIO;
21771 			}
21772 		}
21773 		break;
21774 	}
21775 
21776 	case DKIOCABORT:
21777 		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
21778 		if (scsi_abort(SD_ADDRESS(un), NULL)) {
21779 			err = 0;
21780 		} else {
21781 			err = EIO;
21782 		}
21783 		break;
21784 #endif
21785 
21786 #ifdef SD_FAULT_INJECTION
21787 /* SDIOC FaultInjection testing ioctls */
21788 	case SDIOCSTART:
21789 	case SDIOCSTOP:
21790 	case SDIOCINSERTPKT:
21791 	case SDIOCINSERTXB:
21792 	case SDIOCINSERTUN:
21793 	case SDIOCINSERTARQ:
21794 	case SDIOCPUSH:
21795 	case SDIOCRETRIEVE:
21796 	case SDIOCRUN:
21797 		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
21798 		    "SDIOC detected cmd:0x%X:\n", cmd);
21799 		/* call error generator */
21800 		sd_faultinjection_ioctl(cmd, arg, un);
21801 		err = 0;
21802 		break;
21803 
21804 #endif /* SD_FAULT_INJECTION */
21805 
21806 	case DKIOCFLUSHWRITECACHE:
21807 		{
21808 			struct dk_callback *dkc = (struct dk_callback *)arg;
21809 
21810 			mutex_enter(SD_MUTEX(un));
21811 			if (!un->un_f_sync_cache_supported ||
21812 			    !un->un_f_write_cache_enabled) {
21813 				err = un->un_f_sync_cache_supported ?
21814 					0 : ENOTSUP;
21815 				mutex_exit(SD_MUTEX(un));
21816 				if ((flag & FKIOCTL) && dkc != NULL &&
21817 				    dkc->dkc_callback != NULL) {
21818 					(*dkc->dkc_callback)(dkc->dkc_cookie,
21819 					    err);
21820 					/*
21821 					 * Did callback and reported error.
21822 					 * Since we did a callback, ioctl
21823 					 * should return 0.
21824 					 */
21825 					err = 0;
21826 				}
21827 				break;
21828 			}
21829 			mutex_exit(SD_MUTEX(un));
21830 
21831 			if ((flag & FKIOCTL) && dkc != NULL &&
21832 			    dkc->dkc_callback != NULL) {
21833 				/* async SYNC CACHE request */
21834 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
21835 			} else {
21836 				/* synchronous SYNC CACHE request */
21837 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
21838 			}
21839 		}
21840 		break;
21841 
21842 	case DKIOCGETWCE: {
21843 
21844 		int wce;
21845 
21846 		if ((err = sd_get_write_cache_enabled(un, &wce)) != 0) {
21847 			break;
21848 		}
21849 
21850 		if (ddi_copyout(&wce, (void *)arg, sizeof (wce), flag)) {
21851 			err = EFAULT;
21852 		}
21853 		break;
21854 	}
21855 
21856 	case DKIOCSETWCE: {
21857 
21858 		int wce, sync_supported;
21859 
21860 		if (ddi_copyin((void *)arg, &wce, sizeof (wce), flag)) {
21861 			err = EFAULT;
21862 			break;
21863 		}
21864 
21865 		/*
21866 		 * Synchronize multiple threads trying to enable
21867 		 * or disable the cache via the un_f_wcc_cv
21868 		 * condition variable.
21869 		 */
21870 		mutex_enter(SD_MUTEX(un));
21871 
21872 		/*
21873 		 * Don't allow the cache to be enabled if the
21874 		 * config file has it disabled.
21875 		 */
21876 		if (un->un_f_opt_disable_cache && wce) {
21877 			mutex_exit(SD_MUTEX(un));
21878 			err = EINVAL;
21879 			break;
21880 		}
21881 
21882 		/*
21883 		 * Wait for write cache change in progress
21884 		 * bit to be clear before proceeding.
21885 		 */
21886 		while (un->un_f_wcc_inprog)
21887 			cv_wait(&un->un_wcc_cv, SD_MUTEX(un));
21888 
21889 		un->un_f_wcc_inprog = 1;
21890 
21891 		if (un->un_f_write_cache_enabled && wce == 0) {
21892 			/*
21893 			 * Disable the write cache.  Don't clear
21894 			 * un_f_write_cache_enabled until after
21895 			 * the mode select and flush are complete.
21896 			 */
21897 			sync_supported = un->un_f_sync_cache_supported;
21898 			mutex_exit(SD_MUTEX(un));
21899 			if ((err = sd_cache_control(un, SD_CACHE_NOCHANGE,
21900 			    SD_CACHE_DISABLE)) == 0 && sync_supported) {
21901 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
21902 			}
21903 
21904 			mutex_enter(SD_MUTEX(un));
21905 			if (err == 0) {
21906 				un->un_f_write_cache_enabled = 0;
21907 			}
21908 
21909 		} else if (!un->un_f_write_cache_enabled && wce != 0) {
21910 			/*
21911 			 * Set un_f_write_cache_enabled first, so there is
21912 			 * no window where the cache is enabled, but the
21913 			 * bit says it isn't.
21914 			 */
21915 			un->un_f_write_cache_enabled = 1;
21916 			mutex_exit(SD_MUTEX(un));
21917 
21918 			err = sd_cache_control(un, SD_CACHE_NOCHANGE,
21919 				SD_CACHE_ENABLE);
21920 
21921 			mutex_enter(SD_MUTEX(un));
21922 
21923 			if (err) {
21924 				un->un_f_write_cache_enabled = 0;
21925 			}
21926 		}
21927 
21928 		un->un_f_wcc_inprog = 0;
21929 		cv_broadcast(&un->un_wcc_cv);
21930 		mutex_exit(SD_MUTEX(un));
21931 		break;
21932 	}
21933 
21934 	default:
21935 		err = ENOTTY;
21936 		break;
21937 	}
21938 	mutex_enter(SD_MUTEX(un));
21939 	un->un_ncmds_in_driver--;
21940 	ASSERT(un->un_ncmds_in_driver >= 0);
21941 	mutex_exit(SD_MUTEX(un));
21942 
21943 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
21944 	return (err);
21945 }
21946 
21947 
21948 /*
21949  *    Function: sd_uscsi_ioctl
21950  *
21951  * Description: This routine is the driver entry point for handling USCSI ioctl
21952  *		requests (USCSICMD).
21953  *
21954  *   Arguments: dev	- the device number
21955  *		arg	- user provided scsi command
21956  *		flag	- this argument is a pass through to ddi_copyxxx()
21957  *			  directly from the mode argument of ioctl().
21958  *
21959  * Return Code: code returned by sd_send_scsi_cmd
21960  *		ENXIO
21961  *		EFAULT
21962  *		EAGAIN
21963  */
21964 
21965 static int
21966 sd_uscsi_ioctl(dev_t dev, caddr_t arg, int flag)
21967 {
21968 #ifdef _MULTI_DATAMODEL
21969 	/*
21970 	 * For use when a 32 bit app makes a call into a
21971 	 * 64 bit ioctl
21972 	 */
21973 	struct uscsi_cmd32	uscsi_cmd_32_for_64;
21974 	struct uscsi_cmd32	*ucmd32 = &uscsi_cmd_32_for_64;
21975 	model_t			model;
21976 #endif /* _MULTI_DATAMODEL */
21977 	struct uscsi_cmd	*scmd = NULL;
21978 	struct sd_lun		*un = NULL;
21979 	enum uio_seg		uioseg;
21980 	char			cdb[CDB_GROUP0];
21981 	int			rval = 0;
21982 
21983 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
21984 		return (ENXIO);
21985 	}
21986 
21987 	SD_TRACE(SD_LOG_IOCTL, un, "sd_uscsi_ioctl: entry: un:0x%p\n", un);
21988 
21989 	scmd = (struct uscsi_cmd *)
21990 	    kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
21991 
21992 #ifdef _MULTI_DATAMODEL
21993 	switch (model = ddi_model_convert_from(flag & FMODELS)) {
21994 	case DDI_MODEL_ILP32:
21995 	{
21996 		if (ddi_copyin((void *)arg, ucmd32, sizeof (*ucmd32), flag)) {
21997 			rval = EFAULT;
21998 			goto done;
21999 		}
22000 		/*
22001 		 * Convert the ILP32 uscsi data from the
22002 		 * application to LP64 for internal use.
22003 		 */
22004 		uscsi_cmd32touscsi_cmd(ucmd32, scmd);
22005 		break;
22006 	}
22007 	case DDI_MODEL_NONE:
22008 		if (ddi_copyin((void *)arg, scmd, sizeof (*scmd), flag)) {
22009 			rval = EFAULT;
22010 			goto done;
22011 		}
22012 		break;
22013 	}
22014 #else /* ! _MULTI_DATAMODEL */
22015 	if (ddi_copyin((void *)arg, scmd, sizeof (*scmd), flag)) {
22016 		rval = EFAULT;
22017 		goto done;
22018 	}
22019 #endif /* _MULTI_DATAMODEL */
22020 
22021 	scmd->uscsi_flags &= ~USCSI_NOINTR;
22022 	uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE : UIO_USERSPACE;
22023 	if (un->un_f_format_in_progress == TRUE) {
22024 		rval = EAGAIN;
22025 		goto done;
22026 	}
22027 
22028 	/*
22029 	 * Gotta do the ddi_copyin() here on the uscsi_cdb so that
22030 	 * we will have a valid cdb[0] to test.
22031 	 */
22032 	if ((ddi_copyin(scmd->uscsi_cdb, cdb, CDB_GROUP0, flag) == 0) &&
22033 	    (cdb[0] == SCMD_FORMAT)) {
22034 		SD_TRACE(SD_LOG_IOCTL, un,
22035 		    "sd_uscsi_ioctl: scmd->uscsi_cdb 0x%x\n", cdb[0]);
22036 		mutex_enter(SD_MUTEX(un));
22037 		un->un_f_format_in_progress = TRUE;
22038 		mutex_exit(SD_MUTEX(un));
22039 		rval = sd_send_scsi_cmd(dev, scmd, uioseg, uioseg, uioseg,
22040 		    SD_PATH_STANDARD);
22041 		mutex_enter(SD_MUTEX(un));
22042 		un->un_f_format_in_progress = FALSE;
22043 		mutex_exit(SD_MUTEX(un));
22044 	} else {
22045 		SD_TRACE(SD_LOG_IOCTL, un,
22046 		    "sd_uscsi_ioctl: scmd->uscsi_cdb 0x%x\n", cdb[0]);
22047 		/*
22048 		 * It's OK to fall into here even if the ddi_copyin()
22049 		 * on the uscsi_cdb above fails, because sd_send_scsi_cmd()
22050 		 * does this same copyin and will return the EFAULT
22051 		 * if it fails.
22052 		 */
22053 		rval = sd_send_scsi_cmd(dev, scmd, uioseg, uioseg, uioseg,
22054 		    SD_PATH_STANDARD);
22055 	}
22056 #ifdef _MULTI_DATAMODEL
22057 	switch (model) {
22058 	case DDI_MODEL_ILP32:
22059 		/*
22060 		 * Convert back to ILP32 before copyout to the
22061 		 * application
22062 		 */
22063 		uscsi_cmdtouscsi_cmd32(scmd, ucmd32);
22064 		if (ddi_copyout(ucmd32, (void *)arg, sizeof (*ucmd32), flag)) {
22065 			if (rval != 0) {
22066 				rval = EFAULT;
22067 			}
22068 		}
22069 		break;
22070 	case DDI_MODEL_NONE:
22071 		if (ddi_copyout(scmd, (void *)arg, sizeof (*scmd), flag)) {
22072 			if (rval != 0) {
22073 				rval = EFAULT;
22074 			}
22075 		}
22076 		break;
22077 	}
22078 #else /* ! _MULTI_DATAMODE */
22079 	if (ddi_copyout(scmd, (void *)arg, sizeof (*scmd), flag)) {
22080 		if (rval != 0) {
22081 			rval = EFAULT;
22082 		}
22083 	}
22084 #endif /* _MULTI_DATAMODE */
22085 done:
22086 	kmem_free(scmd, sizeof (struct uscsi_cmd));
22087 
22088 	SD_TRACE(SD_LOG_IOCTL, un, "sd_uscsi_ioctl: exit: un:0x%p\n", un);
22089 
22090 	return (rval);
22091 }
22092 
22093 
22094 /*
22095  *    Function: sd_dkio_ctrl_info
22096  *
22097  * Description: This routine is the driver entry point for handling controller
22098  *		information ioctl requests (DKIOCINFO).
22099  *
22100  *   Arguments: dev  - the device number
22101  *		arg  - pointer to user provided dk_cinfo structure
22102  *		       specifying the controller type and attributes.
22103  *		flag - this argument is a pass through to ddi_copyxxx()
22104  *		       directly from the mode argument of ioctl().
22105  *
22106  * Return Code: 0
22107  *		EFAULT
22108  *		ENXIO
22109  */
22110 
22111 static int
22112 sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
22113 {
22114 	struct sd_lun	*un = NULL;
22115 	struct dk_cinfo	*info;
22116 	dev_info_t	*pdip;
22117 	int		lun, tgt;
22118 
22119 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22120 		return (ENXIO);
22121 	}
22122 
22123 	info = (struct dk_cinfo *)
22124 		kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
22125 
22126 	switch (un->un_ctype) {
22127 	case CTYPE_CDROM:
22128 		info->dki_ctype = DKC_CDROM;
22129 		break;
22130 	default:
22131 		info->dki_ctype = DKC_SCSI_CCS;
22132 		break;
22133 	}
22134 	pdip = ddi_get_parent(SD_DEVINFO(un));
22135 	info->dki_cnum = ddi_get_instance(pdip);
22136 	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
22137 		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
22138 	} else {
22139 		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
22140 		    DK_DEVLEN - 1);
22141 	}
22142 
22143 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22144 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
22145 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
22146 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
22147 
22148 	/* Unit Information */
22149 	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
22150 	info->dki_slave = ((tgt << 3) | lun);
22151 	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
22152 	    DK_DEVLEN - 1);
22153 	info->dki_flags = DKI_FMTVOL;
22154 	info->dki_partition = SDPART(dev);
22155 
22156 	/* Max Transfer size of this device in blocks */
22157 	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
22158 	info->dki_addr = 0;
22159 	info->dki_space = 0;
22160 	info->dki_prio = 0;
22161 	info->dki_vec = 0;
22162 
22163 	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
22164 		kmem_free(info, sizeof (struct dk_cinfo));
22165 		return (EFAULT);
22166 	} else {
22167 		kmem_free(info, sizeof (struct dk_cinfo));
22168 		return (0);
22169 	}
22170 }
22171 
22172 
22173 /*
22174  *    Function: sd_get_media_info
22175  *
22176  * Description: This routine is the driver entry point for handling ioctl
22177  *		requests for the media type or command set profile used by the
22178  *		drive to operate on the media (DKIOCGMEDIAINFO).
22179  *
22180  *   Arguments: dev	- the device number
22181  *		arg	- pointer to user provided dk_minfo structure
22182  *			  specifying the media type, logical block size and
22183  *			  drive capacity.
22184  *		flag	- this argument is a pass through to ddi_copyxxx()
22185  *			  directly from the mode argument of ioctl().
22186  *
22187  * Return Code: 0
22188  *		EACCESS
22189  *		EFAULT
22190  *		ENXIO
22191  *		EIO
22192  */
22193 
22194 static int
22195 sd_get_media_info(dev_t dev, caddr_t arg, int flag)
22196 {
22197 	struct sd_lun		*un = NULL;
22198 	struct uscsi_cmd	com;
22199 	struct scsi_inquiry	*sinq;
22200 	struct dk_minfo		media_info;
22201 	u_longlong_t		media_capacity;
22202 	uint64_t		capacity;
22203 	uint_t			lbasize;
22204 	uchar_t			*out_data;
22205 	uchar_t			*rqbuf;
22206 	int			rval = 0;
22207 	int			rtn;
22208 
22209 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
22210 	    (un->un_state == SD_STATE_OFFLINE)) {
22211 		return (ENXIO);
22212 	}
22213 
22214 	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info: entry\n");
22215 
22216 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
22217 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
22218 
22219 	/* Issue a TUR to determine if the drive is ready with media present */
22220 	rval = sd_send_scsi_TEST_UNIT_READY(un, SD_CHECK_FOR_MEDIA);
22221 	if (rval == ENXIO) {
22222 		goto done;
22223 	}
22224 
22225 	/* Now get configuration data */
22226 	if (ISCD(un)) {
22227 		media_info.dki_media_type = DK_CDROM;
22228 
22229 		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
22230 		if (un->un_f_mmc_cap == TRUE) {
22231 			rtn = sd_send_scsi_GET_CONFIGURATION(un, &com, rqbuf,
22232 				SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN);
22233 
22234 			if (rtn) {
22235 				/*
22236 				 * Failed for other than an illegal request
22237 				 * or command not supported
22238 				 */
22239 				if ((com.uscsi_status == STATUS_CHECK) &&
22240 				    (com.uscsi_rqstatus == STATUS_GOOD)) {
22241 					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
22242 					    (rqbuf[12] != 0x20)) {
22243 						rval = EIO;
22244 						goto done;
22245 					}
22246 				}
22247 			} else {
22248 				/*
22249 				 * The GET CONFIGURATION command succeeded
22250 				 * so set the media type according to the
22251 				 * returned data
22252 				 */
22253 				media_info.dki_media_type = out_data[6];
22254 				media_info.dki_media_type <<= 8;
22255 				media_info.dki_media_type |= out_data[7];
22256 			}
22257 		}
22258 	} else {
22259 		/*
22260 		 * The profile list is not available, so we attempt to identify
22261 		 * the media type based on the inquiry data
22262 		 */
22263 		sinq = un->un_sd->sd_inq;
22264 		if (sinq->inq_qual == 0) {
22265 			/* This is a direct access device */
22266 			media_info.dki_media_type = DK_FIXED_DISK;
22267 
22268 			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
22269 			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
22270 				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
22271 					media_info.dki_media_type = DK_ZIP;
22272 				} else if (
22273 				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
22274 					media_info.dki_media_type = DK_JAZ;
22275 				}
22276 			}
22277 		} else {
22278 			/* Not a CD or direct access so return unknown media */
22279 			media_info.dki_media_type = DK_UNKNOWN;
22280 		}
22281 	}
22282 
22283 	/* Now read the capacity so we can provide the lbasize and capacity */
22284 	switch (sd_send_scsi_READ_CAPACITY(un, &capacity, &lbasize,
22285 	    SD_PATH_DIRECT)) {
22286 	case 0:
22287 		break;
22288 	case EACCES:
22289 		rval = EACCES;
22290 		goto done;
22291 	default:
22292 		rval = EIO;
22293 		goto done;
22294 	}
22295 
22296 	media_info.dki_lbsize = lbasize;
22297 	media_capacity = capacity;
22298 
22299 	/*
22300 	 * sd_send_scsi_READ_CAPACITY() reports capacity in
22301 	 * un->un_sys_blocksize chunks. So we need to convert it into
22302 	 * cap.lbasize chunks.
22303 	 */
22304 	media_capacity *= un->un_sys_blocksize;
22305 	media_capacity /= lbasize;
22306 	media_info.dki_capacity = media_capacity;
22307 
22308 	if (ddi_copyout(&media_info, arg, sizeof (struct dk_minfo), flag)) {
22309 		rval = EFAULT;
22310 		/* Put goto. Anybody might add some code below in future */
22311 		goto done;
22312 	}
22313 done:
22314 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
22315 	kmem_free(rqbuf, SENSE_LENGTH);
22316 	return (rval);
22317 }
22318 
22319 
22320 /*
22321  *    Function: sd_dkio_get_geometry
22322  *
22323  * Description: This routine is the driver entry point for handling user
22324  *		requests to get the device geometry (DKIOCGGEOM).
22325  *
22326  *   Arguments: dev  - the device number
22327  *		arg  - pointer to user provided dk_geom structure specifying
22328  *			the controller's notion of the current geometry.
22329  *		flag - this argument is a pass through to ddi_copyxxx()
22330  *		       directly from the mode argument of ioctl().
22331  *		geom_validated - flag indicating if the device geometry has been
22332  *				 previously validated in the sdioctl routine.
22333  *
22334  * Return Code: 0
22335  *		EFAULT
22336  *		ENXIO
22337  *		EIO
22338  */
22339 
22340 static int
22341 sd_dkio_get_geometry(dev_t dev, caddr_t arg, int flag, int geom_validated)
22342 {
22343 	struct sd_lun	*un = NULL;
22344 	struct dk_geom	*tmp_geom = NULL;
22345 	int		rval = 0;
22346 
22347 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22348 		return (ENXIO);
22349 	}
22350 
22351 	if (geom_validated == FALSE) {
22352 		/*
22353 		 * sd_validate_geometry does not spin a disk up
22354 		 * if it was spun down. We need to make sure it
22355 		 * is ready.
22356 		 */
22357 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22358 			return (rval);
22359 		}
22360 		mutex_enter(SD_MUTEX(un));
22361 		rval = sd_validate_geometry(un, SD_PATH_DIRECT);
22362 		mutex_exit(SD_MUTEX(un));
22363 	}
22364 	if (rval)
22365 		return (rval);
22366 
22367 	/*
22368 	 * It is possible that un_solaris_size is 0(uninitialized)
22369 	 * after sd_unit_attach. Reservation conflict may cause the
22370 	 * above situation. Thus, the zero check of un_solaris_size
22371 	 * should occur after the sd_validate_geometry() call.
22372 	 */
22373 #if defined(__i386) || defined(__amd64)
22374 	if (un->un_solaris_size == 0) {
22375 		return (EIO);
22376 	}
22377 #endif
22378 
22379 	/*
22380 	 * Make a local copy of the soft state geometry to avoid some potential
22381 	 * race conditions associated with holding the mutex and updating the
22382 	 * write_reinstruct value
22383 	 */
22384 	tmp_geom = kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP);
22385 	mutex_enter(SD_MUTEX(un));
22386 	bcopy(&un->un_g, tmp_geom, sizeof (struct dk_geom));
22387 	mutex_exit(SD_MUTEX(un));
22388 
22389 	if (tmp_geom->dkg_write_reinstruct == 0) {
22390 		tmp_geom->dkg_write_reinstruct =
22391 		    (int)((int)(tmp_geom->dkg_nsect * tmp_geom->dkg_rpm *
22392 		    sd_rot_delay) / (int)60000);
22393 	}
22394 
22395 	rval = ddi_copyout(tmp_geom, (void *)arg, sizeof (struct dk_geom),
22396 	    flag);
22397 	if (rval != 0) {
22398 		rval = EFAULT;
22399 	}
22400 
22401 	kmem_free(tmp_geom, sizeof (struct dk_geom));
22402 	return (rval);
22403 
22404 }
22405 
22406 
22407 /*
22408  *    Function: sd_dkio_set_geometry
22409  *
22410  * Description: This routine is the driver entry point for handling user
22411  *		requests to set the device geometry (DKIOCSGEOM). The actual
22412  *		device geometry is not updated, just the driver "notion" of it.
22413  *
22414  *   Arguments: dev  - the device number
22415  *		arg  - pointer to user provided dk_geom structure used to set
22416  *			the controller's notion of the current geometry.
22417  *		flag - this argument is a pass through to ddi_copyxxx()
22418  *		       directly from the mode argument of ioctl().
22419  *
22420  * Return Code: 0
22421  *		EFAULT
22422  *		ENXIO
22423  *		EIO
22424  */
22425 
22426 static int
22427 sd_dkio_set_geometry(dev_t dev, caddr_t arg, int flag)
22428 {
22429 	struct sd_lun	*un = NULL;
22430 	struct dk_geom	*tmp_geom;
22431 	struct dk_map	*lp;
22432 	int		rval = 0;
22433 	int		i;
22434 
22435 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22436 		return (ENXIO);
22437 	}
22438 
22439 	/*
22440 	 * Make sure there is no reservation conflict on the lun.
22441 	 */
22442 	if (sd_send_scsi_TEST_UNIT_READY(un, 0) == EACCES) {
22443 		return (EACCES);
22444 	}
22445 
22446 #if defined(__i386) || defined(__amd64)
22447 	if (un->un_solaris_size == 0) {
22448 		return (EIO);
22449 	}
22450 #endif
22451 
22452 	/*
22453 	 * We need to copy the user specified geometry into local
22454 	 * storage and then update the softstate. We don't want to hold
22455 	 * the mutex and copyin directly from the user to the soft state
22456 	 */
22457 	tmp_geom = (struct dk_geom *)
22458 	    kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP);
22459 	rval = ddi_copyin(arg, tmp_geom, sizeof (struct dk_geom), flag);
22460 	if (rval != 0) {
22461 		kmem_free(tmp_geom, sizeof (struct dk_geom));
22462 		return (EFAULT);
22463 	}
22464 
22465 	mutex_enter(SD_MUTEX(un));
22466 	bcopy(tmp_geom, &un->un_g, sizeof (struct dk_geom));
22467 	for (i = 0; i < NDKMAP; i++) {
22468 		lp  = &un->un_map[i];
22469 		un->un_offset[i] =
22470 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
22471 #if defined(__i386) || defined(__amd64)
22472 		un->un_offset[i] += un->un_solaris_offset;
22473 #endif
22474 	}
22475 	un->un_f_geometry_is_valid = FALSE;
22476 	mutex_exit(SD_MUTEX(un));
22477 	kmem_free(tmp_geom, sizeof (struct dk_geom));
22478 
22479 	return (rval);
22480 }
22481 
22482 
22483 /*
22484  *    Function: sd_dkio_get_partition
22485  *
22486  * Description: This routine is the driver entry point for handling user
22487  *		requests to get the partition table (DKIOCGAPART).
22488  *
22489  *   Arguments: dev  - the device number
22490  *		arg  - pointer to user provided dk_allmap structure specifying
22491  *			the controller's notion of the current partition table.
22492  *		flag - this argument is a pass through to ddi_copyxxx()
22493  *		       directly from the mode argument of ioctl().
22494  *		geom_validated - flag indicating if the device geometry has been
22495  *				 previously validated in the sdioctl routine.
22496  *
22497  * Return Code: 0
22498  *		EFAULT
22499  *		ENXIO
22500  *		EIO
22501  */
22502 
22503 static int
22504 sd_dkio_get_partition(dev_t dev, caddr_t arg, int flag, int geom_validated)
22505 {
22506 	struct sd_lun	*un = NULL;
22507 	int		rval = 0;
22508 	int		size;
22509 
22510 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22511 		return (ENXIO);
22512 	}
22513 
22514 	/*
22515 	 * Make sure the geometry is valid before getting the partition
22516 	 * information.
22517 	 */
22518 	mutex_enter(SD_MUTEX(un));
22519 	if (geom_validated == FALSE) {
22520 		/*
22521 		 * sd_validate_geometry does not spin a disk up
22522 		 * if it was spun down. We need to make sure it
22523 		 * is ready before validating the geometry.
22524 		 */
22525 		mutex_exit(SD_MUTEX(un));
22526 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22527 			return (rval);
22528 		}
22529 		mutex_enter(SD_MUTEX(un));
22530 
22531 		if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
22532 			mutex_exit(SD_MUTEX(un));
22533 			return (rval);
22534 		}
22535 	}
22536 	mutex_exit(SD_MUTEX(un));
22537 
22538 	/*
22539 	 * It is possible that un_solaris_size is 0(uninitialized)
22540 	 * after sd_unit_attach. Reservation conflict may cause the
22541 	 * above situation. Thus, the zero check of un_solaris_size
22542 	 * should occur after the sd_validate_geometry() call.
22543 	 */
22544 #if defined(__i386) || defined(__amd64)
22545 	if (un->un_solaris_size == 0) {
22546 		return (EIO);
22547 	}
22548 #endif
22549 
22550 #ifdef _MULTI_DATAMODEL
22551 	switch (ddi_model_convert_from(flag & FMODELS)) {
22552 	case DDI_MODEL_ILP32: {
22553 		struct dk_map32 dk_map32[NDKMAP];
22554 		int		i;
22555 
22556 		for (i = 0; i < NDKMAP; i++) {
22557 			dk_map32[i].dkl_cylno = un->un_map[i].dkl_cylno;
22558 			dk_map32[i].dkl_nblk  = un->un_map[i].dkl_nblk;
22559 		}
22560 		size = NDKMAP * sizeof (struct dk_map32);
22561 		rval = ddi_copyout(dk_map32, (void *)arg, size, flag);
22562 		if (rval != 0) {
22563 			rval = EFAULT;
22564 		}
22565 		break;
22566 	}
22567 	case DDI_MODEL_NONE:
22568 		size = NDKMAP * sizeof (struct dk_map);
22569 		rval = ddi_copyout(un->un_map, (void *)arg, size, flag);
22570 		if (rval != 0) {
22571 			rval = EFAULT;
22572 		}
22573 		break;
22574 	}
22575 #else /* ! _MULTI_DATAMODEL */
22576 	size = NDKMAP * sizeof (struct dk_map);
22577 	rval = ddi_copyout(un->un_map, (void *)arg, size, flag);
22578 	if (rval != 0) {
22579 		rval = EFAULT;
22580 	}
22581 #endif /* _MULTI_DATAMODEL */
22582 	return (rval);
22583 }
22584 
22585 
22586 /*
22587  *    Function: sd_dkio_set_partition
22588  *
22589  * Description: This routine is the driver entry point for handling user
22590  *		requests to set the partition table (DKIOCSAPART). The actual
22591  *		device partition is not updated.
22592  *
22593  *   Arguments: dev  - the device number
22594  *		arg  - pointer to user provided dk_allmap structure used to set
22595  *			the controller's notion of the partition table.
22596  *		flag - this argument is a pass through to ddi_copyxxx()
22597  *		       directly from the mode argument of ioctl().
22598  *
22599  * Return Code: 0
22600  *		EINVAL
22601  *		EFAULT
22602  *		ENXIO
22603  *		EIO
22604  */
22605 
22606 static int
22607 sd_dkio_set_partition(dev_t dev, caddr_t arg, int flag)
22608 {
22609 	struct sd_lun	*un = NULL;
22610 	struct dk_map	dk_map[NDKMAP];
22611 	struct dk_map	*lp;
22612 	int		rval = 0;
22613 	int		size;
22614 	int		i;
22615 #if defined(_SUNOS_VTOC_16)
22616 	struct dkl_partition	*vp;
22617 #endif
22618 
22619 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22620 		return (ENXIO);
22621 	}
22622 
22623 	/*
22624 	 * Set the map for all logical partitions.  We lock
22625 	 * the priority just to make sure an interrupt doesn't
22626 	 * come in while the map is half updated.
22627 	 */
22628 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_solaris_size))
22629 	mutex_enter(SD_MUTEX(un));
22630 	if (un->un_blockcount > DK_MAX_BLOCKS) {
22631 		mutex_exit(SD_MUTEX(un));
22632 		return (ENOTSUP);
22633 	}
22634 	mutex_exit(SD_MUTEX(un));
22635 
22636 	/*
22637 	 * Make sure there is no reservation conflict on the lun.
22638 	 */
22639 	if (sd_send_scsi_TEST_UNIT_READY(un, 0) == EACCES) {
22640 		return (EACCES);
22641 	}
22642 
22643 #if defined(__i386) || defined(__amd64)
22644 	if (un->un_solaris_size == 0) {
22645 		return (EIO);
22646 	}
22647 #endif
22648 
22649 #ifdef _MULTI_DATAMODEL
22650 	switch (ddi_model_convert_from(flag & FMODELS)) {
22651 	case DDI_MODEL_ILP32: {
22652 		struct dk_map32 dk_map32[NDKMAP];
22653 
22654 		size = NDKMAP * sizeof (struct dk_map32);
22655 		rval = ddi_copyin((void *)arg, dk_map32, size, flag);
22656 		if (rval != 0) {
22657 			return (EFAULT);
22658 		}
22659 		for (i = 0; i < NDKMAP; i++) {
22660 			dk_map[i].dkl_cylno = dk_map32[i].dkl_cylno;
22661 			dk_map[i].dkl_nblk  = dk_map32[i].dkl_nblk;
22662 		}
22663 		break;
22664 	}
22665 	case DDI_MODEL_NONE:
22666 		size = NDKMAP * sizeof (struct dk_map);
22667 		rval = ddi_copyin((void *)arg, dk_map, size, flag);
22668 		if (rval != 0) {
22669 			return (EFAULT);
22670 		}
22671 		break;
22672 	}
22673 #else /* ! _MULTI_DATAMODEL */
22674 	size = NDKMAP * sizeof (struct dk_map);
22675 	rval = ddi_copyin((void *)arg, dk_map, size, flag);
22676 	if (rval != 0) {
22677 		return (EFAULT);
22678 	}
22679 #endif /* _MULTI_DATAMODEL */
22680 
22681 	mutex_enter(SD_MUTEX(un));
22682 	/* Note: The size used in this bcopy is set based upon the data model */
22683 	bcopy(dk_map, un->un_map, size);
22684 #if defined(_SUNOS_VTOC_16)
22685 	vp = (struct dkl_partition *)&(un->un_vtoc);
22686 #endif	/* defined(_SUNOS_VTOC_16) */
22687 	for (i = 0; i < NDKMAP; i++) {
22688 		lp  = &un->un_map[i];
22689 		un->un_offset[i] =
22690 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
22691 #if defined(_SUNOS_VTOC_16)
22692 		vp->p_start = un->un_offset[i];
22693 		vp->p_size = lp->dkl_nblk;
22694 		vp++;
22695 #endif	/* defined(_SUNOS_VTOC_16) */
22696 #if defined(__i386) || defined(__amd64)
22697 		un->un_offset[i] += un->un_solaris_offset;
22698 #endif
22699 	}
22700 	mutex_exit(SD_MUTEX(un));
22701 	return (rval);
22702 }
22703 
22704 
22705 /*
22706  *    Function: sd_dkio_get_vtoc
22707  *
22708  * Description: This routine is the driver entry point for handling user
22709  *		requests to get the current volume table of contents
22710  *		(DKIOCGVTOC).
22711  *
22712  *   Arguments: dev  - the device number
22713  *		arg  - pointer to user provided vtoc structure specifying
22714  *			the current vtoc.
22715  *		flag - this argument is a pass through to ddi_copyxxx()
22716  *		       directly from the mode argument of ioctl().
22717  *		geom_validated - flag indicating if the device geometry has been
22718  *				 previously validated in the sdioctl routine.
22719  *
22720  * Return Code: 0
22721  *		EFAULT
22722  *		ENXIO
22723  *		EIO
22724  */
22725 
22726 static int
22727 sd_dkio_get_vtoc(dev_t dev, caddr_t arg, int flag, int geom_validated)
22728 {
22729 	struct sd_lun	*un = NULL;
22730 #if defined(_SUNOS_VTOC_8)
22731 	struct vtoc	user_vtoc;
22732 #endif	/* defined(_SUNOS_VTOC_8) */
22733 	int		rval = 0;
22734 
22735 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22736 		return (ENXIO);
22737 	}
22738 
22739 	mutex_enter(SD_MUTEX(un));
22740 	if (geom_validated == FALSE) {
22741 		/*
22742 		 * sd_validate_geometry does not spin a disk up
22743 		 * if it was spun down. We need to make sure it
22744 		 * is ready.
22745 		 */
22746 		mutex_exit(SD_MUTEX(un));
22747 		if ((rval = sd_send_scsi_TEST_UNIT_READY(un, 0)) != 0) {
22748 			return (rval);
22749 		}
22750 		mutex_enter(SD_MUTEX(un));
22751 		if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT)) != 0) {
22752 			mutex_exit(SD_MUTEX(un));
22753 			return (rval);
22754 		}
22755 	}
22756 
22757 #if defined(_SUNOS_VTOC_8)
22758 	sd_build_user_vtoc(un, &user_vtoc);
22759 	mutex_exit(SD_MUTEX(un));
22760 
22761 #ifdef _MULTI_DATAMODEL
22762 	switch (ddi_model_convert_from(flag & FMODELS)) {
22763 	case DDI_MODEL_ILP32: {
22764 		struct vtoc32 user_vtoc32;
22765 
22766 		vtoctovtoc32(user_vtoc, user_vtoc32);
22767 		if (ddi_copyout(&user_vtoc32, (void *)arg,
22768 		    sizeof (struct vtoc32), flag)) {
22769 			return (EFAULT);
22770 		}
22771 		break;
22772 	}
22773 
22774 	case DDI_MODEL_NONE:
22775 		if (ddi_copyout(&user_vtoc, (void *)arg,
22776 		    sizeof (struct vtoc), flag)) {
22777 			return (EFAULT);
22778 		}
22779 		break;
22780 	}
22781 #else /* ! _MULTI_DATAMODEL */
22782 	if (ddi_copyout(&user_vtoc, (void *)arg, sizeof (struct vtoc), flag)) {
22783 		return (EFAULT);
22784 	}
22785 #endif /* _MULTI_DATAMODEL */
22786 
22787 #elif defined(_SUNOS_VTOC_16)
22788 	mutex_exit(SD_MUTEX(un));
22789 
22790 #ifdef _MULTI_DATAMODEL
22791 	/*
22792 	 * The un_vtoc structure is a "struct dk_vtoc"  which is always
22793 	 * 32-bit to maintain compatibility with existing on-disk
22794 	 * structures.  Thus, we need to convert the structure when copying
22795 	 * it out to a datamodel-dependent "struct vtoc" in a 64-bit
22796 	 * program.  If the target is a 32-bit program, then no conversion
22797 	 * is necessary.
22798 	 */
22799 	/* LINTED: logical expression always true: op "||" */
22800 	ASSERT(sizeof (un->un_vtoc) == sizeof (struct vtoc32));
22801 	switch (ddi_model_convert_from(flag & FMODELS)) {
22802 	case DDI_MODEL_ILP32:
22803 		if (ddi_copyout(&(un->un_vtoc), (void *)arg,
22804 		    sizeof (un->un_vtoc), flag)) {
22805 			return (EFAULT);
22806 		}
22807 		break;
22808 
22809 	case DDI_MODEL_NONE: {
22810 		struct vtoc user_vtoc;
22811 
22812 		vtoc32tovtoc(un->un_vtoc, user_vtoc);
22813 		if (ddi_copyout(&user_vtoc, (void *)arg,
22814 		    sizeof (struct vtoc), flag)) {
22815 			return (EFAULT);
22816 		}
22817 		break;
22818 	}
22819 	}
22820 #else /* ! _MULTI_DATAMODEL */
22821 	if (ddi_copyout(&(un->un_vtoc), (void *)arg, sizeof (un->un_vtoc),
22822 	    flag)) {
22823 		return (EFAULT);
22824 	}
22825 #endif /* _MULTI_DATAMODEL */
22826 #else
22827 #error "No VTOC format defined."
22828 #endif
22829 
22830 	return (rval);
22831 }
22832 
22833 static int
22834 sd_dkio_get_efi(dev_t dev, caddr_t arg, int flag)
22835 {
22836 	struct sd_lun	*un = NULL;
22837 	dk_efi_t	user_efi;
22838 	int		rval = 0;
22839 	void		*buffer;
22840 
22841 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL)
22842 		return (ENXIO);
22843 
22844 	if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag))
22845 		return (EFAULT);
22846 
22847 	user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64;
22848 
22849 	if ((user_efi.dki_length % un->un_tgt_blocksize) ||
22850 	    (user_efi.dki_length > un->un_max_xfer_size))
22851 		return (EINVAL);
22852 
22853 	buffer = kmem_alloc(user_efi.dki_length, KM_SLEEP);
22854 	rval = sd_send_scsi_READ(un, buffer, user_efi.dki_length,
22855 	    user_efi.dki_lba, SD_PATH_DIRECT);
22856 	if (rval == 0 && ddi_copyout(buffer, user_efi.dki_data,
22857 	    user_efi.dki_length, flag) != 0)
22858 		rval = EFAULT;
22859 
22860 	kmem_free(buffer, user_efi.dki_length);
22861 	return (rval);
22862 }
22863 
22864 /*
22865  *    Function: sd_build_user_vtoc
22866  *
22867  * Description: This routine populates a pass by reference variable with the
22868  *		current volume table of contents.
22869  *
22870  *   Arguments: un - driver soft state (unit) structure
22871  *		user_vtoc - pointer to vtoc structure to be populated
22872  */
22873 
22874 static void
22875 sd_build_user_vtoc(struct sd_lun *un, struct vtoc *user_vtoc)
22876 {
22877 	struct dk_map2		*lpart;
22878 	struct dk_map		*lmap;
22879 	struct partition	*vpart;
22880 	int			nblks;
22881 	int			i;
22882 
22883 	ASSERT(mutex_owned(SD_MUTEX(un)));
22884 
22885 	/*
22886 	 * Return vtoc structure fields in the provided VTOC area, addressed
22887 	 * by *vtoc.
22888 	 */
22889 	bzero(user_vtoc, sizeof (struct vtoc));
22890 	user_vtoc->v_bootinfo[0] = un->un_vtoc.v_bootinfo[0];
22891 	user_vtoc->v_bootinfo[1] = un->un_vtoc.v_bootinfo[1];
22892 	user_vtoc->v_bootinfo[2] = un->un_vtoc.v_bootinfo[2];
22893 	user_vtoc->v_sanity	= VTOC_SANE;
22894 	user_vtoc->v_version	= un->un_vtoc.v_version;
22895 	bcopy(un->un_vtoc.v_volume, user_vtoc->v_volume, LEN_DKL_VVOL);
22896 	user_vtoc->v_sectorsz = un->un_sys_blocksize;
22897 	user_vtoc->v_nparts = un->un_vtoc.v_nparts;
22898 	bcopy(un->un_vtoc.v_reserved, user_vtoc->v_reserved,
22899 	    sizeof (un->un_vtoc.v_reserved));
22900 	/*
22901 	 * Convert partitioning information.
22902 	 *
22903 	 * Note the conversion from starting cylinder number
22904 	 * to starting sector number.
22905 	 */
22906 	lmap = un->un_map;
22907 	lpart = (struct dk_map2 *)un->un_vtoc.v_part;
22908 	vpart = user_vtoc->v_part;
22909 
22910 	nblks = un->un_g.dkg_nsect * un->un_g.dkg_nhead;
22911 
22912 	for (i = 0; i < V_NUMPAR; i++) {
22913 		vpart->p_tag	= lpart->p_tag;
22914 		vpart->p_flag	= lpart->p_flag;
22915 		vpart->p_start	= lmap->dkl_cylno * nblks;
22916 		vpart->p_size	= lmap->dkl_nblk;
22917 		lmap++;
22918 		lpart++;
22919 		vpart++;
22920 
22921 		/* (4364927) */
22922 		user_vtoc->timestamp[i] = (time_t)un->un_vtoc.v_timestamp[i];
22923 	}
22924 
22925 	bcopy(un->un_asciilabel, user_vtoc->v_asciilabel, LEN_DKL_ASCII);
22926 }
22927 
22928 static int
22929 sd_dkio_partition(dev_t dev, caddr_t arg, int flag)
22930 {
22931 	struct sd_lun		*un = NULL;
22932 	struct partition64	p64;
22933 	int			rval = 0;
22934 	uint_t			nparts;
22935 	efi_gpe_t		*partitions;
22936 	efi_gpt_t		*buffer;
22937 	diskaddr_t		gpe_lba;
22938 
22939 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22940 		return (ENXIO);
22941 	}
22942 
22943 	if (ddi_copyin((const void *)arg, &p64,
22944 	    sizeof (struct partition64), flag)) {
22945 		return (EFAULT);
22946 	}
22947 
22948 	buffer = kmem_alloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP);
22949 	rval = sd_send_scsi_READ(un, buffer, DEV_BSIZE,
22950 		1, SD_PATH_DIRECT);
22951 	if (rval != 0)
22952 		goto done_error;
22953 
22954 	sd_swap_efi_gpt(buffer);
22955 
22956 	if ((rval = sd_validate_efi(buffer)) != 0)
22957 		goto done_error;
22958 
22959 	nparts = buffer->efi_gpt_NumberOfPartitionEntries;
22960 	gpe_lba = buffer->efi_gpt_PartitionEntryLBA;
22961 	if (p64.p_partno > nparts) {
22962 		/* couldn't find it */
22963 		rval = ESRCH;
22964 		goto done_error;
22965 	}
22966 	/*
22967 	 * if we're dealing with a partition that's out of the normal
22968 	 * 16K block, adjust accordingly
22969 	 */
22970 	gpe_lba += p64.p_partno / sizeof (efi_gpe_t);
22971 	rval = sd_send_scsi_READ(un, buffer, EFI_MIN_ARRAY_SIZE,
22972 			gpe_lba, SD_PATH_DIRECT);
22973 	if (rval) {
22974 		goto done_error;
22975 	}
22976 	partitions = (efi_gpe_t *)buffer;
22977 
22978 	sd_swap_efi_gpe(nparts, partitions);
22979 
22980 	partitions += p64.p_partno;
22981 	bcopy(&partitions->efi_gpe_PartitionTypeGUID, &p64.p_type,
22982 	    sizeof (struct uuid));
22983 	p64.p_start = partitions->efi_gpe_StartingLBA;
22984 	p64.p_size = partitions->efi_gpe_EndingLBA -
22985 			p64.p_start + 1;
22986 
22987 	if (ddi_copyout(&p64, (void *)arg, sizeof (struct partition64), flag))
22988 		rval = EFAULT;
22989 
22990 done_error:
22991 	kmem_free(buffer, EFI_MIN_ARRAY_SIZE);
22992 	return (rval);
22993 }
22994 
22995 
22996 /*
22997  *    Function: sd_dkio_set_vtoc
22998  *
22999  * Description: This routine is the driver entry point for handling user
23000  *		requests to set the current volume table of contents
23001  *		(DKIOCSVTOC).
23002  *
23003  *   Arguments: dev  - the device number
23004  *		arg  - pointer to user provided vtoc structure used to set the
23005  *			current vtoc.
23006  *		flag - this argument is a pass through to ddi_copyxxx()
23007  *		       directly from the mode argument of ioctl().
23008  *
23009  * Return Code: 0
23010  *		EFAULT
23011  *		ENXIO
23012  *		EINVAL
23013  *		ENOTSUP
23014  */
23015 
23016 static int
23017 sd_dkio_set_vtoc(dev_t dev, caddr_t arg, int flag)
23018 {
23019 	struct sd_lun	*un = NULL;
23020 	struct vtoc	user_vtoc;
23021 	int		rval = 0;
23022 
23023 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23024 		return (ENXIO);
23025 	}
23026 
23027 #if defined(__i386) || defined(__amd64)
23028 	if (un->un_tgt_blocksize != un->un_sys_blocksize) {
23029 		return (EINVAL);
23030 	}
23031 #endif
23032 
23033 #ifdef _MULTI_DATAMODEL
23034 	switch (ddi_model_convert_from(flag & FMODELS)) {
23035 	case DDI_MODEL_ILP32: {
23036 		struct vtoc32 user_vtoc32;
23037 
23038 		if (ddi_copyin((const void *)arg, &user_vtoc32,
23039 		    sizeof (struct vtoc32), flag)) {
23040 			return (EFAULT);
23041 		}
23042 		vtoc32tovtoc(user_vtoc32, user_vtoc);
23043 		break;
23044 	}
23045 
23046 	case DDI_MODEL_NONE:
23047 		if (ddi_copyin((const void *)arg, &user_vtoc,
23048 		    sizeof (struct vtoc), flag)) {
23049 			return (EFAULT);
23050 		}
23051 		break;
23052 	}
23053 #else /* ! _MULTI_DATAMODEL */
23054 	if (ddi_copyin((const void *)arg, &user_vtoc,
23055 	    sizeof (struct vtoc), flag)) {
23056 		return (EFAULT);
23057 	}
23058 #endif /* _MULTI_DATAMODEL */
23059 
23060 	mutex_enter(SD_MUTEX(un));
23061 	if (un->un_blockcount > DK_MAX_BLOCKS) {
23062 		mutex_exit(SD_MUTEX(un));
23063 		return (ENOTSUP);
23064 	}
23065 	if (un->un_g.dkg_ncyl == 0) {
23066 		mutex_exit(SD_MUTEX(un));
23067 		return (EINVAL);
23068 	}
23069 
23070 	mutex_exit(SD_MUTEX(un));
23071 	sd_clear_efi(un);
23072 	ddi_remove_minor_node(SD_DEVINFO(un), "wd");
23073 	ddi_remove_minor_node(SD_DEVINFO(un), "wd,raw");
23074 	(void) ddi_create_minor_node(SD_DEVINFO(un), "h",
23075 	    S_IFBLK, (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23076 	    un->un_node_type, NULL);
23077 	(void) ddi_create_minor_node(SD_DEVINFO(un), "h,raw",
23078 	    S_IFCHR, (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23079 	    un->un_node_type, NULL);
23080 	mutex_enter(SD_MUTEX(un));
23081 
23082 	if ((rval = sd_build_label_vtoc(un, &user_vtoc)) == 0) {
23083 		if ((rval = sd_write_label(dev)) == 0) {
23084 			if ((rval = sd_validate_geometry(un, SD_PATH_DIRECT))
23085 			    != 0) {
23086 				SD_ERROR(SD_LOG_IOCTL_DKIO, un,
23087 				    "sd_dkio_set_vtoc: "
23088 				    "Failed validate geometry\n");
23089 			}
23090 		}
23091 	}
23092 
23093 	/*
23094 	 * If sd_build_label_vtoc, or sd_write_label failed above write the
23095 	 * devid anyway, what can it hurt? Also preserve the device id by
23096 	 * writing to the disk acyl for the case where a devid has been
23097 	 * fabricated.
23098 	 */
23099 	if (un->un_f_devid_supported &&
23100 	    (un->un_f_opt_fab_devid == TRUE)) {
23101 		if (un->un_devid == NULL) {
23102 			sd_register_devid(un, SD_DEVINFO(un),
23103 			    SD_TARGET_IS_UNRESERVED);
23104 		} else {
23105 			/*
23106 			 * The device id for this disk has been
23107 			 * fabricated. Fabricated device id's are
23108 			 * managed by storing them in the last 2
23109 			 * available sectors on the drive. The device
23110 			 * id must be preserved by writing it back out
23111 			 * to this location.
23112 			 */
23113 			if (sd_write_deviceid(un) != 0) {
23114 				ddi_devid_free(un->un_devid);
23115 				un->un_devid = NULL;
23116 			}
23117 		}
23118 	}
23119 	mutex_exit(SD_MUTEX(un));
23120 	return (rval);
23121 }
23122 
23123 
23124 /*
23125  *    Function: sd_build_label_vtoc
23126  *
23127  * Description: This routine updates the driver soft state current volume table
23128  *		of contents based on a user specified vtoc.
23129  *
23130  *   Arguments: un - driver soft state (unit) structure
23131  *		user_vtoc - pointer to vtoc structure specifying vtoc to be used
23132  *			    to update the driver soft state.
23133  *
23134  * Return Code: 0
23135  *		EINVAL
23136  */
23137 
23138 static int
23139 sd_build_label_vtoc(struct sd_lun *un, struct vtoc *user_vtoc)
23140 {
23141 	struct dk_map		*lmap;
23142 	struct partition	*vpart;
23143 	int			nblks;
23144 #if defined(_SUNOS_VTOC_8)
23145 	int			ncyl;
23146 	struct dk_map2		*lpart;
23147 #endif	/* defined(_SUNOS_VTOC_8) */
23148 	int			i;
23149 
23150 	ASSERT(mutex_owned(SD_MUTEX(un)));
23151 
23152 	/* Sanity-check the vtoc */
23153 	if (user_vtoc->v_sanity != VTOC_SANE ||
23154 	    user_vtoc->v_sectorsz != un->un_sys_blocksize ||
23155 	    user_vtoc->v_nparts != V_NUMPAR) {
23156 		return (EINVAL);
23157 	}
23158 
23159 	nblks = un->un_g.dkg_nsect * un->un_g.dkg_nhead;
23160 	if (nblks == 0) {
23161 		return (EINVAL);
23162 	}
23163 
23164 #if defined(_SUNOS_VTOC_8)
23165 	vpart = user_vtoc->v_part;
23166 	for (i = 0; i < V_NUMPAR; i++) {
23167 		if ((vpart->p_start % nblks) != 0) {
23168 			return (EINVAL);
23169 		}
23170 		ncyl = vpart->p_start / nblks;
23171 		ncyl += vpart->p_size / nblks;
23172 		if ((vpart->p_size % nblks) != 0) {
23173 			ncyl++;
23174 		}
23175 		if (ncyl > (int)un->un_g.dkg_ncyl) {
23176 			return (EINVAL);
23177 		}
23178 		vpart++;
23179 	}
23180 #endif	/* defined(_SUNOS_VTOC_8) */
23181 
23182 	/* Put appropriate vtoc structure fields into the disk label */
23183 #if defined(_SUNOS_VTOC_16)
23184 	/*
23185 	 * The vtoc is always a 32bit data structure to maintain the
23186 	 * on-disk format. Convert "in place" instead of bcopying it.
23187 	 */
23188 	vtoctovtoc32((*user_vtoc), (*((struct vtoc32 *)&(un->un_vtoc))));
23189 
23190 	/*
23191 	 * in the 16-slice vtoc, starting sectors are expressed in
23192 	 * numbers *relative* to the start of the Solaris fdisk partition.
23193 	 */
23194 	lmap = un->un_map;
23195 	vpart = user_vtoc->v_part;
23196 
23197 	for (i = 0; i < (int)user_vtoc->v_nparts; i++, lmap++, vpart++) {
23198 		lmap->dkl_cylno = vpart->p_start / nblks;
23199 		lmap->dkl_nblk = vpart->p_size;
23200 	}
23201 
23202 #elif defined(_SUNOS_VTOC_8)
23203 
23204 	un->un_vtoc.v_bootinfo[0] = (uint32_t)user_vtoc->v_bootinfo[0];
23205 	un->un_vtoc.v_bootinfo[1] = (uint32_t)user_vtoc->v_bootinfo[1];
23206 	un->un_vtoc.v_bootinfo[2] = (uint32_t)user_vtoc->v_bootinfo[2];
23207 
23208 	un->un_vtoc.v_sanity = (uint32_t)user_vtoc->v_sanity;
23209 	un->un_vtoc.v_version = (uint32_t)user_vtoc->v_version;
23210 
23211 	bcopy(user_vtoc->v_volume, un->un_vtoc.v_volume, LEN_DKL_VVOL);
23212 
23213 	un->un_vtoc.v_nparts = user_vtoc->v_nparts;
23214 
23215 	bcopy(user_vtoc->v_reserved, un->un_vtoc.v_reserved,
23216 	    sizeof (un->un_vtoc.v_reserved));
23217 
23218 	/*
23219 	 * Note the conversion from starting sector number
23220 	 * to starting cylinder number.
23221 	 * Return error if division results in a remainder.
23222 	 */
23223 	lmap = un->un_map;
23224 	lpart = un->un_vtoc.v_part;
23225 	vpart = user_vtoc->v_part;
23226 
23227 	for (i = 0; i < (int)user_vtoc->v_nparts; i++) {
23228 		lpart->p_tag  = vpart->p_tag;
23229 		lpart->p_flag = vpart->p_flag;
23230 		lmap->dkl_cylno = vpart->p_start / nblks;
23231 		lmap->dkl_nblk = vpart->p_size;
23232 
23233 		lmap++;
23234 		lpart++;
23235 		vpart++;
23236 
23237 		/* (4387723) */
23238 #ifdef _LP64
23239 		if (user_vtoc->timestamp[i] > TIME32_MAX) {
23240 			un->un_vtoc.v_timestamp[i] = TIME32_MAX;
23241 		} else {
23242 			un->un_vtoc.v_timestamp[i] = user_vtoc->timestamp[i];
23243 		}
23244 #else
23245 		un->un_vtoc.v_timestamp[i] = user_vtoc->timestamp[i];
23246 #endif
23247 	}
23248 
23249 	bcopy(user_vtoc->v_asciilabel, un->un_asciilabel, LEN_DKL_ASCII);
23250 #else
23251 #error "No VTOC format defined."
23252 #endif
23253 	return (0);
23254 }
23255 
23256 /*
23257  *    Function: sd_clear_efi
23258  *
23259  * Description: This routine clears all EFI labels.
23260  *
23261  *   Arguments: un - driver soft state (unit) structure
23262  *
23263  * Return Code: void
23264  */
23265 
23266 static void
23267 sd_clear_efi(struct sd_lun *un)
23268 {
23269 	efi_gpt_t	*gpt;
23270 	uint_t		lbasize;
23271 	uint64_t	cap;
23272 	int rval;
23273 
23274 	ASSERT(!mutex_owned(SD_MUTEX(un)));
23275 
23276 	gpt = kmem_alloc(sizeof (efi_gpt_t), KM_SLEEP);
23277 
23278 	if (sd_send_scsi_READ(un, gpt, DEV_BSIZE, 1, SD_PATH_DIRECT) != 0) {
23279 		goto done;
23280 	}
23281 
23282 	sd_swap_efi_gpt(gpt);
23283 	rval = sd_validate_efi(gpt);
23284 	if (rval == 0) {
23285 		/* clear primary */
23286 		bzero(gpt, sizeof (efi_gpt_t));
23287 		if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE, 1,
23288 			SD_PATH_DIRECT))) {
23289 			SD_INFO(SD_LOG_IO_PARTITION, un,
23290 				"sd_clear_efi: clear primary label failed\n");
23291 		}
23292 	}
23293 	/* the backup */
23294 	rval = sd_send_scsi_READ_CAPACITY(un, &cap, &lbasize,
23295 	    SD_PATH_DIRECT);
23296 	if (rval) {
23297 		goto done;
23298 	}
23299 	/*
23300 	 * The MMC standard allows READ CAPACITY to be
23301 	 * inaccurate by a bounded amount (in the interest of
23302 	 * response latency).  As a result, failed READs are
23303 	 * commonplace (due to the reading of metadata and not
23304 	 * data). Depending on the per-Vendor/drive Sense data,
23305 	 * the failed READ can cause many (unnecessary) retries.
23306 	 */
23307 	if ((rval = sd_send_scsi_READ(un, gpt, lbasize,
23308 	    cap - 1, ISCD(un) ? SD_PATH_DIRECT_PRIORITY :
23309 		SD_PATH_DIRECT)) != 0) {
23310 		goto done;
23311 	}
23312 	sd_swap_efi_gpt(gpt);
23313 	rval = sd_validate_efi(gpt);
23314 	if (rval == 0) {
23315 		/* clear backup */
23316 		SD_TRACE(SD_LOG_IOCTL, un, "sd_clear_efi clear backup@%lu\n",
23317 			cap-1);
23318 		bzero(gpt, sizeof (efi_gpt_t));
23319 		if ((rval = sd_send_scsi_WRITE(un, gpt, EFI_LABEL_SIZE,
23320 		    cap-1, SD_PATH_DIRECT))) {
23321 			SD_INFO(SD_LOG_IO_PARTITION, un,
23322 				"sd_clear_efi: clear backup label failed\n");
23323 		}
23324 	}
23325 
23326 done:
23327 	kmem_free(gpt, sizeof (efi_gpt_t));
23328 }
23329 
23330 /*
23331  *    Function: sd_set_vtoc
23332  *
23333  * Description: This routine writes data to the appropriate positions
23334  *
23335  *   Arguments: un - driver soft state (unit) structure
23336  *              dkl  - the data to be written
23337  *
23338  * Return: void
23339  */
23340 
23341 static int
23342 sd_set_vtoc(struct sd_lun *un, struct dk_label *dkl)
23343 {
23344 	void			*shadow_buf;
23345 	uint_t			label_addr;
23346 	int			sec;
23347 	int			blk;
23348 	int			head;
23349 	int			cyl;
23350 	int			rval;
23351 
23352 #if defined(__i386) || defined(__amd64)
23353 	label_addr = un->un_solaris_offset + DK_LABEL_LOC;
23354 #else
23355 	/* Write the primary label at block 0 of the solaris partition. */
23356 	label_addr = 0;
23357 #endif
23358 
23359 	if (NOT_DEVBSIZE(un)) {
23360 		shadow_buf = kmem_zalloc(un->un_tgt_blocksize, KM_SLEEP);
23361 		/*
23362 		 * Read the target's first block.
23363 		 */
23364 		if ((rval = sd_send_scsi_READ(un, shadow_buf,
23365 		    un->un_tgt_blocksize, label_addr,
23366 		    SD_PATH_STANDARD)) != 0) {
23367 			goto exit;
23368 		}
23369 		/*
23370 		 * Copy the contents of the label into the shadow buffer
23371 		 * which is of the size of target block size.
23372 		 */
23373 		bcopy(dkl, shadow_buf, sizeof (struct dk_label));
23374 	}
23375 
23376 	/* Write the primary label */
23377 	if (NOT_DEVBSIZE(un)) {
23378 		rval = sd_send_scsi_WRITE(un, shadow_buf, un->un_tgt_blocksize,
23379 		    label_addr, SD_PATH_STANDARD);
23380 	} else {
23381 		rval = sd_send_scsi_WRITE(un, dkl, un->un_sys_blocksize,
23382 		    label_addr, SD_PATH_STANDARD);
23383 	}
23384 	if (rval != 0) {
23385 		return (rval);
23386 	}
23387 
23388 	/*
23389 	 * Calculate where the backup labels go.  They are always on
23390 	 * the last alternate cylinder, but some older drives put them
23391 	 * on head 2 instead of the last head.	They are always on the
23392 	 * first 5 odd sectors of the appropriate track.
23393 	 *
23394 	 * We have no choice at this point, but to believe that the
23395 	 * disk label is valid.	 Use the geometry of the disk
23396 	 * as described in the label.
23397 	 */
23398 	cyl  = dkl->dkl_ncyl  + dkl->dkl_acyl - 1;
23399 	head = dkl->dkl_nhead - 1;
23400 
23401 	/*
23402 	 * Write and verify the backup labels. Make sure we don't try to
23403 	 * write past the last cylinder.
23404 	 */
23405 	for (sec = 1; ((sec < 5 * 2 + 1) && (sec < dkl->dkl_nsect)); sec += 2) {
23406 		blk = (daddr_t)(
23407 		    (cyl * ((dkl->dkl_nhead * dkl->dkl_nsect) - dkl->dkl_apc)) +
23408 		    (head * dkl->dkl_nsect) + sec);
23409 #if defined(__i386) || defined(__amd64)
23410 		blk += un->un_solaris_offset;
23411 #endif
23412 		if (NOT_DEVBSIZE(un)) {
23413 			uint64_t	tblk;
23414 			/*
23415 			 * Need to read the block first for read modify write.
23416 			 */
23417 			tblk = (uint64_t)blk;
23418 			blk = (int)((tblk * un->un_sys_blocksize) /
23419 			    un->un_tgt_blocksize);
23420 			if ((rval = sd_send_scsi_READ(un, shadow_buf,
23421 			    un->un_tgt_blocksize, blk,
23422 			    SD_PATH_STANDARD)) != 0) {
23423 				goto exit;
23424 			}
23425 			/*
23426 			 * Modify the shadow buffer with the label.
23427 			 */
23428 			bcopy(dkl, shadow_buf, sizeof (struct dk_label));
23429 			rval = sd_send_scsi_WRITE(un, shadow_buf,
23430 			    un->un_tgt_blocksize, blk, SD_PATH_STANDARD);
23431 		} else {
23432 			rval = sd_send_scsi_WRITE(un, dkl, un->un_sys_blocksize,
23433 			    blk, SD_PATH_STANDARD);
23434 			SD_INFO(SD_LOG_IO_PARTITION, un,
23435 			"sd_set_vtoc: wrote backup label %d\n", blk);
23436 		}
23437 		if (rval != 0) {
23438 			goto exit;
23439 		}
23440 	}
23441 exit:
23442 	if (NOT_DEVBSIZE(un)) {
23443 		kmem_free(shadow_buf, un->un_tgt_blocksize);
23444 	}
23445 	return (rval);
23446 }
23447 
23448 /*
23449  *    Function: sd_clear_vtoc
23450  *
23451  * Description: This routine clears out the VTOC labels.
23452  *
23453  *   Arguments: un - driver soft state (unit) structure
23454  *
23455  * Return: void
23456  */
23457 
23458 static void
23459 sd_clear_vtoc(struct sd_lun *un)
23460 {
23461 	struct dk_label		*dkl;
23462 
23463 	mutex_exit(SD_MUTEX(un));
23464 	dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
23465 	mutex_enter(SD_MUTEX(un));
23466 	/*
23467 	 * sd_set_vtoc uses these fields in order to figure out
23468 	 * where to overwrite the backup labels
23469 	 */
23470 	dkl->dkl_apc    = un->un_g.dkg_apc;
23471 	dkl->dkl_ncyl   = un->un_g.dkg_ncyl;
23472 	dkl->dkl_acyl   = un->un_g.dkg_acyl;
23473 	dkl->dkl_nhead  = un->un_g.dkg_nhead;
23474 	dkl->dkl_nsect  = un->un_g.dkg_nsect;
23475 	mutex_exit(SD_MUTEX(un));
23476 	(void) sd_set_vtoc(un, dkl);
23477 	kmem_free(dkl, sizeof (struct dk_label));
23478 
23479 	mutex_enter(SD_MUTEX(un));
23480 }
23481 
23482 /*
23483  *    Function: sd_write_label
23484  *
23485  * Description: This routine will validate and write the driver soft state vtoc
23486  *		contents to the device.
23487  *
23488  *   Arguments: dev - the device number
23489  *
23490  * Return Code: the code returned by sd_send_scsi_cmd()
23491  *		0
23492  *		EINVAL
23493  *		ENXIO
23494  *		ENOMEM
23495  */
23496 
23497 static int
23498 sd_write_label(dev_t dev)
23499 {
23500 	struct sd_lun		*un;
23501 	struct dk_label		*dkl;
23502 	short			sum;
23503 	short			*sp;
23504 	int			i;
23505 	int			rval;
23506 
23507 	if (((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) ||
23508 	    (un->un_state == SD_STATE_OFFLINE)) {
23509 		return (ENXIO);
23510 	}
23511 	ASSERT(mutex_owned(SD_MUTEX(un)));
23512 	mutex_exit(SD_MUTEX(un));
23513 	dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
23514 	mutex_enter(SD_MUTEX(un));
23515 
23516 	bcopy(&un->un_vtoc, &dkl->dkl_vtoc, sizeof (struct dk_vtoc));
23517 	dkl->dkl_rpm	= un->un_g.dkg_rpm;
23518 	dkl->dkl_pcyl	= un->un_g.dkg_pcyl;
23519 	dkl->dkl_apc	= un->un_g.dkg_apc;
23520 	dkl->dkl_intrlv = un->un_g.dkg_intrlv;
23521 	dkl->dkl_ncyl	= un->un_g.dkg_ncyl;
23522 	dkl->dkl_acyl	= un->un_g.dkg_acyl;
23523 	dkl->dkl_nhead	= un->un_g.dkg_nhead;
23524 	dkl->dkl_nsect	= un->un_g.dkg_nsect;
23525 
23526 #if defined(_SUNOS_VTOC_8)
23527 	dkl->dkl_obs1	= un->un_g.dkg_obs1;
23528 	dkl->dkl_obs2	= un->un_g.dkg_obs2;
23529 	dkl->dkl_obs3	= un->un_g.dkg_obs3;
23530 	for (i = 0; i < NDKMAP; i++) {
23531 		dkl->dkl_map[i].dkl_cylno = un->un_map[i].dkl_cylno;
23532 		dkl->dkl_map[i].dkl_nblk  = un->un_map[i].dkl_nblk;
23533 	}
23534 	bcopy(un->un_asciilabel, dkl->dkl_asciilabel, LEN_DKL_ASCII);
23535 #elif defined(_SUNOS_VTOC_16)
23536 	dkl->dkl_skew	= un->un_dkg_skew;
23537 #else
23538 #error "No VTOC format defined."
23539 #endif
23540 
23541 	dkl->dkl_magic			= DKL_MAGIC;
23542 	dkl->dkl_write_reinstruct	= un->un_g.dkg_write_reinstruct;
23543 	dkl->dkl_read_reinstruct	= un->un_g.dkg_read_reinstruct;
23544 
23545 	/* Construct checksum for the new disk label */
23546 	sum = 0;
23547 	sp = (short *)dkl;
23548 	i = sizeof (struct dk_label) / sizeof (short);
23549 	while (i--) {
23550 		sum ^= *sp++;
23551 	}
23552 	dkl->dkl_cksum = sum;
23553 
23554 	mutex_exit(SD_MUTEX(un));
23555 
23556 	rval = sd_set_vtoc(un, dkl);
23557 exit:
23558 	kmem_free(dkl, sizeof (struct dk_label));
23559 	mutex_enter(SD_MUTEX(un));
23560 	return (rval);
23561 }
23562 
23563 static int
23564 sd_dkio_set_efi(dev_t dev, caddr_t arg, int flag)
23565 {
23566 	struct sd_lun	*un = NULL;
23567 	dk_efi_t	user_efi;
23568 	int		rval = 0;
23569 	void		*buffer;
23570 
23571 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL)
23572 		return (ENXIO);
23573 
23574 	if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag))
23575 		return (EFAULT);
23576 
23577 	user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64;
23578 
23579 	if ((user_efi.dki_length % un->un_tgt_blocksize) ||
23580 	    (user_efi.dki_length > un->un_max_xfer_size))
23581 		return (EINVAL);
23582 
23583 	buffer = kmem_alloc(user_efi.dki_length, KM_SLEEP);
23584 	if (ddi_copyin(user_efi.dki_data, buffer, user_efi.dki_length, flag)) {
23585 		rval = EFAULT;
23586 	} else {
23587 		/*
23588 		 * let's clear the vtoc labels and clear the softstate
23589 		 * vtoc.
23590 		 */
23591 		mutex_enter(SD_MUTEX(un));
23592 		if (un->un_vtoc.v_sanity == VTOC_SANE) {
23593 			SD_TRACE(SD_LOG_IO_PARTITION, un,
23594 				"sd_dkio_set_efi: CLEAR VTOC\n");
23595 			sd_clear_vtoc(un);
23596 			bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
23597 			mutex_exit(SD_MUTEX(un));
23598 			ddi_remove_minor_node(SD_DEVINFO(un), "h");
23599 			ddi_remove_minor_node(SD_DEVINFO(un), "h,raw");
23600 			(void) ddi_create_minor_node(SD_DEVINFO(un), "wd",
23601 			    S_IFBLK,
23602 			    (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23603 			    un->un_node_type, NULL);
23604 			(void) ddi_create_minor_node(SD_DEVINFO(un), "wd,raw",
23605 			    S_IFCHR,
23606 			    (SDUNIT(dev) << SDUNIT_SHIFT) | WD_NODE,
23607 			    un->un_node_type, NULL);
23608 		} else
23609 			mutex_exit(SD_MUTEX(un));
23610 		rval = sd_send_scsi_WRITE(un, buffer, user_efi.dki_length,
23611 		    user_efi.dki_lba, SD_PATH_DIRECT);
23612 		if (rval == 0) {
23613 			mutex_enter(SD_MUTEX(un));
23614 			un->un_f_geometry_is_valid = FALSE;
23615 			mutex_exit(SD_MUTEX(un));
23616 		}
23617 	}
23618 	kmem_free(buffer, user_efi.dki_length);
23619 	return (rval);
23620 }
23621 
23622 /*
23623  *    Function: sd_dkio_get_mboot
23624  *
23625  * Description: This routine is the driver entry point for handling user
23626  *		requests to get the current device mboot (DKIOCGMBOOT)
23627  *
23628  *   Arguments: dev  - the device number
23629  *		arg  - pointer to user provided mboot structure specifying
23630  *			the current mboot.
23631  *		flag - this argument is a pass through to ddi_copyxxx()
23632  *		       directly from the mode argument of ioctl().
23633  *
23634  * Return Code: 0
23635  *		EINVAL
23636  *		EFAULT
23637  *		ENXIO
23638  */
23639 
23640 static int
23641 sd_dkio_get_mboot(dev_t dev, caddr_t arg, int flag)
23642 {
23643 	struct sd_lun	*un;
23644 	struct mboot	*mboot;
23645 	int		rval;
23646 	size_t		buffer_size;
23647 
23648 	if (((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) ||
23649 	    (un->un_state == SD_STATE_OFFLINE)) {
23650 		return (ENXIO);
23651 	}
23652 
23653 	if (!un->un_f_mboot_supported || arg == NULL) {
23654 		return (EINVAL);
23655 	}
23656 
23657 	/*
23658 	 * Read the mboot block, located at absolute block 0 on the target.
23659 	 */
23660 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct mboot));
23661 
23662 	SD_TRACE(SD_LOG_IO_PARTITION, un,
23663 	    "sd_dkio_get_mboot: allocation size: 0x%x\n", buffer_size);
23664 
23665 	mboot = kmem_zalloc(buffer_size, KM_SLEEP);
23666 	if ((rval = sd_send_scsi_READ(un, mboot, buffer_size, 0,
23667 	    SD_PATH_STANDARD)) == 0) {
23668 		if (ddi_copyout(mboot, (void *)arg,
23669 		    sizeof (struct mboot), flag) != 0) {
23670 			rval = EFAULT;
23671 		}
23672 	}
23673 	kmem_free(mboot, buffer_size);
23674 	return (rval);
23675 }
23676 
23677 
23678 /*
23679  *    Function: sd_dkio_set_mboot
23680  *
23681  * Description: This routine is the driver entry point for handling user
23682  *		requests to validate and set the device master boot
23683  *		(DKIOCSMBOOT).
23684  *
23685  *   Arguments: dev  - the device number
23686  *		arg  - pointer to user provided mboot structure used to set the
23687  *			master boot.
23688  *		flag - this argument is a pass through to ddi_copyxxx()
23689  *		       directly from the mode argument of ioctl().
23690  *
23691  * Return Code: 0
23692  *		EINVAL
23693  *		EFAULT
23694  *		ENXIO
23695  */
23696 
23697 static int
23698 sd_dkio_set_mboot(dev_t dev, caddr_t arg, int flag)
23699 {
23700 	struct sd_lun	*un = NULL;
23701 	struct mboot	*mboot = NULL;
23702 	int		rval;
23703 	ushort_t	magic;
23704 
23705 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23706 		return (ENXIO);
23707 	}
23708 
23709 	ASSERT(!mutex_owned(SD_MUTEX(un)));
23710 
23711 	if (!un->un_f_mboot_supported) {
23712 		return (EINVAL);
23713 	}
23714 
23715 	if (arg == NULL) {
23716 		return (EINVAL);
23717 	}
23718 
23719 	mboot = kmem_zalloc(sizeof (struct mboot), KM_SLEEP);
23720 
23721 	if (ddi_copyin((const void *)arg, mboot,
23722 	    sizeof (struct mboot), flag) != 0) {
23723 		kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23724 		return (EFAULT);
23725 	}
23726 
23727 	/* Is this really a master boot record? */
23728 	magic = LE_16(mboot->signature);
23729 	if (magic != MBB_MAGIC) {
23730 		kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23731 		return (EINVAL);
23732 	}
23733 
23734 	rval = sd_send_scsi_WRITE(un, mboot, un->un_sys_blocksize, 0,
23735 	    SD_PATH_STANDARD);
23736 
23737 	mutex_enter(SD_MUTEX(un));
23738 #if defined(__i386) || defined(__amd64)
23739 	if (rval == 0) {
23740 		/*
23741 		 * mboot has been written successfully.
23742 		 * update the fdisk and vtoc tables in memory
23743 		 */
23744 		rval = sd_update_fdisk_and_vtoc(un);
23745 		if ((un->un_f_geometry_is_valid == FALSE) || (rval != 0)) {
23746 			mutex_exit(SD_MUTEX(un));
23747 			kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23748 			return (rval);
23749 		}
23750 	}
23751 
23752 	/*
23753 	 * If the mboot write fails, write the devid anyway, what can it hurt?
23754 	 * Also preserve the device id by writing to the disk acyl for the case
23755 	 * where a devid has been fabricated.
23756 	 */
23757 	if (un->un_f_devid_supported && un->un_f_opt_fab_devid) {
23758 		if (un->un_devid == NULL) {
23759 			sd_register_devid(un, SD_DEVINFO(un),
23760 			    SD_TARGET_IS_UNRESERVED);
23761 		} else {
23762 			/*
23763 			 * The device id for this disk has been
23764 			 * fabricated. Fabricated device id's are
23765 			 * managed by storing them in the last 2
23766 			 * available sectors on the drive. The device
23767 			 * id must be preserved by writing it back out
23768 			 * to this location.
23769 			 */
23770 			if (sd_write_deviceid(un) != 0) {
23771 				ddi_devid_free(un->un_devid);
23772 				un->un_devid = NULL;
23773 			}
23774 		}
23775 	}
23776 
23777 #ifdef __lock_lint
23778 	sd_setup_default_geometry(un);
23779 #endif
23780 
23781 #else
23782 	if (rval == 0) {
23783 		/*
23784 		 * mboot has been written successfully.
23785 		 * set up the default geometry and VTOC
23786 		 */
23787 		if (un->un_blockcount <= DK_MAX_BLOCKS)
23788 			sd_setup_default_geometry(un);
23789 	}
23790 #endif
23791 	mutex_exit(SD_MUTEX(un));
23792 	kmem_free(mboot, (size_t)(sizeof (struct mboot)));
23793 	return (rval);
23794 }
23795 
23796 
23797 /*
23798  *    Function: sd_setup_default_geometry
23799  *
23800  * Description: This local utility routine sets the default geometry as part of
23801  *		setting the device mboot.
23802  *
23803  *   Arguments: un - driver soft state (unit) structure
23804  *
23805  * Note: This may be redundant with sd_build_default_label.
23806  */
23807 
23808 static void
23809 sd_setup_default_geometry(struct sd_lun *un)
23810 {
23811 	/* zero out the soft state geometry and partition table. */
23812 	bzero(&un->un_g, sizeof (struct dk_geom));
23813 	bzero(&un->un_vtoc, sizeof (struct dk_vtoc));
23814 	bzero(un->un_map, NDKMAP * (sizeof (struct dk_map)));
23815 	un->un_asciilabel[0] = '\0';
23816 
23817 	/*
23818 	 * For the rpm, we use the minimum for the disk.
23819 	 * For the head, cyl and number of sector per track,
23820 	 * if the capacity <= 1GB, head = 64, sect = 32.
23821 	 * else head = 255, sect 63
23822 	 * Note: the capacity should be equal to C*H*S values.
23823 	 * This will cause some truncation of size due to
23824 	 * round off errors. For CD-ROMs, this truncation can
23825 	 * have adverse side effects, so returning ncyl and
23826 	 * nhead as 1. The nsect will overflow for most of
23827 	 * CD-ROMs as nsect is of type ushort.
23828 	 */
23829 	if (ISCD(un)) {
23830 		un->un_g.dkg_ncyl = 1;
23831 		un->un_g.dkg_nhead = 1;
23832 		un->un_g.dkg_nsect = un->un_blockcount;
23833 	} else {
23834 		if (un->un_blockcount <= 0x1000) {
23835 			/* Needed for unlabeled SCSI floppies. */
23836 			un->un_g.dkg_nhead = 2;
23837 			un->un_g.dkg_ncyl = 80;
23838 			un->un_g.dkg_pcyl = 80;
23839 			un->un_g.dkg_nsect = un->un_blockcount / (2 * 80);
23840 		} else if (un->un_blockcount <= 0x200000) {
23841 			un->un_g.dkg_nhead = 64;
23842 			un->un_g.dkg_nsect = 32;
23843 			un->un_g.dkg_ncyl = un->un_blockcount / (64 * 32);
23844 		} else {
23845 			un->un_g.dkg_nhead = 255;
23846 			un->un_g.dkg_nsect = 63;
23847 			un->un_g.dkg_ncyl = un->un_blockcount / (255 * 63);
23848 		}
23849 		un->un_blockcount = un->un_g.dkg_ncyl *
23850 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect;
23851 	}
23852 	un->un_g.dkg_acyl = 0;
23853 	un->un_g.dkg_bcyl = 0;
23854 	un->un_g.dkg_intrlv = 1;
23855 	un->un_g.dkg_rpm = 200;
23856 	un->un_g.dkg_read_reinstruct = 0;
23857 	un->un_g.dkg_write_reinstruct = 0;
23858 	if (un->un_g.dkg_pcyl == 0) {
23859 		un->un_g.dkg_pcyl = un->un_g.dkg_ncyl + un->un_g.dkg_acyl;
23860 	}
23861 
23862 	un->un_map['a'-'a'].dkl_cylno = 0;
23863 	un->un_map['a'-'a'].dkl_nblk = un->un_blockcount;
23864 	un->un_map['c'-'a'].dkl_cylno = 0;
23865 	un->un_map['c'-'a'].dkl_nblk = un->un_blockcount;
23866 	un->un_f_geometry_is_valid = FALSE;
23867 }
23868 
23869 
23870 #if defined(__i386) || defined(__amd64)
23871 /*
23872  *    Function: sd_update_fdisk_and_vtoc
23873  *
23874  * Description: This local utility routine updates the device fdisk and vtoc
23875  *		as part of setting the device mboot.
23876  *
23877  *   Arguments: un - driver soft state (unit) structure
23878  *
23879  * Return Code: 0 for success or errno-type return code.
23880  *
23881  *    Note:x86: This looks like a duplicate of sd_validate_geometry(), but
23882  *		these did exist seperately in x86 sd.c!!!
23883  */
23884 
23885 static int
23886 sd_update_fdisk_and_vtoc(struct sd_lun *un)
23887 {
23888 	static char	labelstring[128];
23889 	static char	buf[256];
23890 	char		*label = 0;
23891 	int		count;
23892 	int		label_rc = 0;
23893 	int		gvalid = un->un_f_geometry_is_valid;
23894 	int		fdisk_rval;
23895 	int		lbasize;
23896 	int		capacity;
23897 
23898 	ASSERT(mutex_owned(SD_MUTEX(un)));
23899 
23900 	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
23901 		return (EINVAL);
23902 	}
23903 
23904 	if (un->un_f_blockcount_is_valid == FALSE) {
23905 		return (EINVAL);
23906 	}
23907 
23908 #if defined(_SUNOS_VTOC_16)
23909 	/*
23910 	 * Set up the "whole disk" fdisk partition; this should always
23911 	 * exist, regardless of whether the disk contains an fdisk table
23912 	 * or vtoc.
23913 	 */
23914 	un->un_map[P0_RAW_DISK].dkl_cylno = 0;
23915 	un->un_map[P0_RAW_DISK].dkl_nblk = un->un_blockcount;
23916 #endif	/* defined(_SUNOS_VTOC_16) */
23917 
23918 	/*
23919 	 * copy the lbasize and capacity so that if they're
23920 	 * reset while we're not holding the SD_MUTEX(un), we will
23921 	 * continue to use valid values after the SD_MUTEX(un) is
23922 	 * reacquired.
23923 	 */
23924 	lbasize  = un->un_tgt_blocksize;
23925 	capacity = un->un_blockcount;
23926 
23927 	/*
23928 	 * refresh the logical and physical geometry caches.
23929 	 * (data from mode sense format/rigid disk geometry pages,
23930 	 * and scsi_ifgetcap("geometry").
23931 	 */
23932 	sd_resync_geom_caches(un, capacity, lbasize, SD_PATH_DIRECT);
23933 
23934 	/*
23935 	 * Only DIRECT ACCESS devices will have Sun labels.
23936 	 * CD's supposedly have a Sun label, too
23937 	 */
23938 	if (un->un_f_vtoc_label_supported) {
23939 		fdisk_rval = sd_read_fdisk(un, capacity, lbasize,
23940 		    SD_PATH_DIRECT);
23941 		if (fdisk_rval == SD_CMD_FAILURE) {
23942 			ASSERT(mutex_owned(SD_MUTEX(un)));
23943 			return (EIO);
23944 		}
23945 
23946 		if (fdisk_rval == SD_CMD_RESERVATION_CONFLICT) {
23947 			ASSERT(mutex_owned(SD_MUTEX(un)));
23948 			return (EACCES);
23949 		}
23950 
23951 		if (un->un_solaris_size <= DK_LABEL_LOC) {
23952 			/*
23953 			 * Found fdisk table but no Solaris partition entry,
23954 			 * so don't call sd_uselabel() and don't create
23955 			 * a default label.
23956 			 */
23957 			label_rc = 0;
23958 			un->un_f_geometry_is_valid = TRUE;
23959 			goto no_solaris_partition;
23960 		}
23961 
23962 #if defined(_SUNOS_VTOC_8)
23963 		label = (char *)un->un_asciilabel;
23964 #elif defined(_SUNOS_VTOC_16)
23965 		label = (char *)un->un_vtoc.v_asciilabel;
23966 #else
23967 #error "No VTOC format defined."
23968 #endif
23969 	} else if (capacity < 0) {
23970 		ASSERT(mutex_owned(SD_MUTEX(un)));
23971 		return (EINVAL);
23972 	}
23973 
23974 	/*
23975 	 * For Removable media We reach here if we have found a
23976 	 * SOLARIS PARTITION.
23977 	 * If un_f_geometry_is_valid is FALSE it indicates that the SOLARIS
23978 	 * PARTITION has changed from the previous one, hence we will setup a
23979 	 * default VTOC in this case.
23980 	 */
23981 	if (un->un_f_geometry_is_valid == FALSE) {
23982 		sd_build_default_label(un);
23983 		label_rc = 0;
23984 	}
23985 
23986 no_solaris_partition:
23987 	if ((!un->un_f_has_removable_media ||
23988 	    (un->un_f_has_removable_media &&
23989 	    un->un_mediastate == DKIO_EJECTED)) &&
23990 		(un->un_state == SD_STATE_NORMAL && !gvalid)) {
23991 		/*
23992 		 * Print out a message indicating who and what we are.
23993 		 * We do this only when we happen to really validate the
23994 		 * geometry. We may call sd_validate_geometry() at other
23995 		 * times, ioctl()'s like Get VTOC in which case we
23996 		 * don't want to print the label.
23997 		 * If the geometry is valid, print the label string,
23998 		 * else print vendor and product info, if available
23999 		 */
24000 		if ((un->un_f_geometry_is_valid == TRUE) && (label != NULL)) {
24001 			SD_INFO(SD_LOG_IOCTL_DKIO, un, "?<%s>\n", label);
24002 		} else {
24003 			mutex_enter(&sd_label_mutex);
24004 			sd_inq_fill(SD_INQUIRY(un)->inq_vid, VIDMAX,
24005 			    labelstring);
24006 			sd_inq_fill(SD_INQUIRY(un)->inq_pid, PIDMAX,
24007 			    &labelstring[64]);
24008 			(void) sprintf(buf, "?Vendor '%s', product '%s'",
24009 			    labelstring, &labelstring[64]);
24010 			if (un->un_f_blockcount_is_valid == TRUE) {
24011 				(void) sprintf(&buf[strlen(buf)],
24012 				    ", %" PRIu64 " %u byte blocks\n",
24013 				    un->un_blockcount,
24014 				    un->un_tgt_blocksize);
24015 			} else {
24016 				(void) sprintf(&buf[strlen(buf)],
24017 				    ", (unknown capacity)\n");
24018 			}
24019 			SD_INFO(SD_LOG_IOCTL_DKIO, un, buf);
24020 			mutex_exit(&sd_label_mutex);
24021 		}
24022 	}
24023 
24024 #if defined(_SUNOS_VTOC_16)
24025 	/*
24026 	 * If we have valid geometry, set up the remaining fdisk partitions.
24027 	 * Note that dkl_cylno is not used for the fdisk map entries, so
24028 	 * we set it to an entirely bogus value.
24029 	 */
24030 	for (count = 0; count < FD_NUMPART; count++) {
24031 		un->un_map[FDISK_P1 + count].dkl_cylno = -1;
24032 		un->un_map[FDISK_P1 + count].dkl_nblk =
24033 		    un->un_fmap[count].fmap_nblk;
24034 		un->un_offset[FDISK_P1 + count] =
24035 		    un->un_fmap[count].fmap_start;
24036 	}
24037 #endif
24038 
24039 	for (count = 0; count < NDKMAP; count++) {
24040 #if defined(_SUNOS_VTOC_8)
24041 		struct dk_map *lp  = &un->un_map[count];
24042 		un->un_offset[count] =
24043 		    un->un_g.dkg_nhead * un->un_g.dkg_nsect * lp->dkl_cylno;
24044 #elif defined(_SUNOS_VTOC_16)
24045 		struct dkl_partition *vp = &un->un_vtoc.v_part[count];
24046 		un->un_offset[count] = vp->p_start + un->un_solaris_offset;
24047 #else
24048 #error "No VTOC format defined."
24049 #endif
24050 	}
24051 
24052 	ASSERT(mutex_owned(SD_MUTEX(un)));
24053 	return (label_rc);
24054 }
24055 #endif
24056 
24057 
24058 /*
24059  *    Function: sd_check_media
24060  *
24061  * Description: This utility routine implements the functionality for the
24062  *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
24063  *		driver state changes from that specified by the user
24064  *		(inserted or ejected). For example, if the user specifies
24065  *		DKIO_EJECTED and the current media state is inserted this
24066  *		routine will immediately return DKIO_INSERTED. However, if the
24067  *		current media state is not inserted the user thread will be
24068  *		blocked until the drive state changes. If DKIO_NONE is specified
24069  *		the user thread will block until a drive state change occurs.
24070  *
24071  *   Arguments: dev  - the device number
24072  *		state  - user pointer to a dkio_state, updated with the current
24073  *			drive state at return.
24074  *
24075  * Return Code: ENXIO
24076  *		EIO
24077  *		EAGAIN
24078  *		EINTR
24079  */
24080 
24081 static int
24082 sd_check_media(dev_t dev, enum dkio_state state)
24083 {
24084 	struct sd_lun		*un = NULL;
24085 	enum dkio_state		prev_state;
24086 	opaque_t		token = NULL;
24087 	int			rval = 0;
24088 
24089 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24090 		return (ENXIO);
24091 	}
24092 
24093 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
24094 
24095 	mutex_enter(SD_MUTEX(un));
24096 
24097 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
24098 	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
24099 
24100 	prev_state = un->un_mediastate;
24101 
24102 	/* is there anything to do? */
24103 	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
24104 		/*
24105 		 * submit the request to the scsi_watch service;
24106 		 * scsi_media_watch_cb() does the real work
24107 		 */
24108 		mutex_exit(SD_MUTEX(un));
24109 
24110 		/*
24111 		 * This change handles the case where a scsi watch request is
24112 		 * added to a device that is powered down. To accomplish this
24113 		 * we power up the device before adding the scsi watch request,
24114 		 * since the scsi watch sends a TUR directly to the device
24115 		 * which the device cannot handle if it is powered down.
24116 		 */
24117 		if (sd_pm_entry(un) != DDI_SUCCESS) {
24118 			mutex_enter(SD_MUTEX(un));
24119 			goto done;
24120 		}
24121 
24122 		token = scsi_watch_request_submit(SD_SCSI_DEVP(un),
24123 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
24124 		    (caddr_t)dev);
24125 
24126 		sd_pm_exit(un);
24127 
24128 		mutex_enter(SD_MUTEX(un));
24129 		if (token == NULL) {
24130 			rval = EAGAIN;
24131 			goto done;
24132 		}
24133 
24134 		/*
24135 		 * This is a special case IOCTL that doesn't return
24136 		 * until the media state changes. Routine sdpower
24137 		 * knows about and handles this so don't count it
24138 		 * as an active cmd in the driver, which would
24139 		 * keep the device busy to the pm framework.
24140 		 * If the count isn't decremented the device can't
24141 		 * be powered down.
24142 		 */
24143 		un->un_ncmds_in_driver--;
24144 		ASSERT(un->un_ncmds_in_driver >= 0);
24145 
24146 		/*
24147 		 * if a prior request had been made, this will be the same
24148 		 * token, as scsi_watch was designed that way.
24149 		 */
24150 		un->un_swr_token = token;
24151 		un->un_specified_mediastate = state;
24152 
24153 		/*
24154 		 * now wait for media change
24155 		 * we will not be signalled unless mediastate == state but it is
24156 		 * still better to test for this condition, since there is a
24157 		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
24158 		 */
24159 		SD_TRACE(SD_LOG_COMMON, un,
24160 		    "sd_check_media: waiting for media state change\n");
24161 		while (un->un_mediastate == state) {
24162 			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
24163 				SD_TRACE(SD_LOG_COMMON, un,
24164 				    "sd_check_media: waiting for media state "
24165 				    "was interrupted\n");
24166 				un->un_ncmds_in_driver++;
24167 				rval = EINTR;
24168 				goto done;
24169 			}
24170 			SD_TRACE(SD_LOG_COMMON, un,
24171 			    "sd_check_media: received signal, state=%x\n",
24172 			    un->un_mediastate);
24173 		}
24174 		/*
24175 		 * Inc the counter to indicate the device once again
24176 		 * has an active outstanding cmd.
24177 		 */
24178 		un->un_ncmds_in_driver++;
24179 	}
24180 
24181 	/* invalidate geometry */
24182 	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
24183 		sr_ejected(un);
24184 	}
24185 
24186 	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
24187 		uint64_t	capacity;
24188 		uint_t		lbasize;
24189 
24190 		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
24191 		mutex_exit(SD_MUTEX(un));
24192 		/*
24193 		 * Since the following routines use SD_PATH_DIRECT, we must
24194 		 * call PM directly before the upcoming disk accesses. This
24195 		 * may cause the disk to be power/spin up.
24196 		 */
24197 
24198 		if (sd_pm_entry(un) == DDI_SUCCESS) {
24199 			rval = sd_send_scsi_READ_CAPACITY(un,
24200 			    &capacity,
24201 			    &lbasize, SD_PATH_DIRECT);
24202 			if (rval != 0) {
24203 				sd_pm_exit(un);
24204 				mutex_enter(SD_MUTEX(un));
24205 				goto done;
24206 			}
24207 		} else {
24208 			rval = EIO;
24209 			mutex_enter(SD_MUTEX(un));
24210 			goto done;
24211 		}
24212 		mutex_enter(SD_MUTEX(un));
24213 
24214 		sd_update_block_info(un, lbasize, capacity);
24215 
24216 		un->un_f_geometry_is_valid	= FALSE;
24217 		(void) sd_validate_geometry(un, SD_PATH_DIRECT);
24218 
24219 		mutex_exit(SD_MUTEX(un));
24220 		rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_PREVENT,
24221 		    SD_PATH_DIRECT);
24222 		sd_pm_exit(un);
24223 
24224 		mutex_enter(SD_MUTEX(un));
24225 	}
24226 done:
24227 	un->un_f_watcht_stopped = FALSE;
24228 	if (un->un_swr_token) {
24229 		/*
24230 		 * Use of this local token and the mutex ensures that we avoid
24231 		 * some race conditions associated with terminating the
24232 		 * scsi watch.
24233 		 */
24234 		token = un->un_swr_token;
24235 		un->un_swr_token = (opaque_t)NULL;
24236 		mutex_exit(SD_MUTEX(un));
24237 		(void) scsi_watch_request_terminate(token,
24238 		    SCSI_WATCH_TERMINATE_WAIT);
24239 		mutex_enter(SD_MUTEX(un));
24240 	}
24241 
24242 	/*
24243 	 * Update the capacity kstat value, if no media previously
24244 	 * (capacity kstat is 0) and a media has been inserted
24245 	 * (un_f_blockcount_is_valid == TRUE)
24246 	 */
24247 	if (un->un_errstats) {
24248 		struct sd_errstats	*stp = NULL;
24249 
24250 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
24251 		if ((stp->sd_capacity.value.ui64 == 0) &&
24252 		    (un->un_f_blockcount_is_valid == TRUE)) {
24253 			stp->sd_capacity.value.ui64 =
24254 			    (uint64_t)((uint64_t)un->un_blockcount *
24255 			    un->un_sys_blocksize);
24256 		}
24257 	}
24258 	mutex_exit(SD_MUTEX(un));
24259 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
24260 	return (rval);
24261 }
24262 
24263 
24264 /*
24265  *    Function: sd_delayed_cv_broadcast
24266  *
24267  * Description: Delayed cv_broadcast to allow for target to recover from media
24268  *		insertion.
24269  *
24270  *   Arguments: arg - driver soft state (unit) structure
24271  */
24272 
24273 static void
24274 sd_delayed_cv_broadcast(void *arg)
24275 {
24276 	struct sd_lun *un = arg;
24277 
24278 	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
24279 
24280 	mutex_enter(SD_MUTEX(un));
24281 	un->un_dcvb_timeid = NULL;
24282 	cv_broadcast(&un->un_state_cv);
24283 	mutex_exit(SD_MUTEX(un));
24284 }
24285 
24286 
24287 /*
24288  *    Function: sd_media_watch_cb
24289  *
24290  * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
24291  *		routine processes the TUR sense data and updates the driver
24292  *		state if a transition has occurred. The user thread
24293  *		(sd_check_media) is then signalled.
24294  *
24295  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
24296  *			among multiple watches that share this callback function
24297  *		resultp - scsi watch facility result packet containing scsi
24298  *			  packet, status byte and sense data
24299  *
24300  * Return Code: 0 for success, -1 for failure
24301  */
24302 
24303 static int
24304 sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
24305 {
24306 	struct sd_lun			*un;
24307 	struct scsi_status		*statusp = resultp->statusp;
24308 	struct scsi_extended_sense	*sensep = resultp->sensep;
24309 	enum dkio_state			state = DKIO_NONE;
24310 	dev_t				dev = (dev_t)arg;
24311 	uchar_t				actual_sense_length;
24312 
24313 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24314 		return (-1);
24315 	}
24316 	actual_sense_length = resultp->actual_sense_length;
24317 
24318 	mutex_enter(SD_MUTEX(un));
24319 	SD_TRACE(SD_LOG_COMMON, un,
24320 	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
24321 	    *((char *)statusp), (void *)sensep, actual_sense_length);
24322 
24323 	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
24324 		un->un_mediastate = DKIO_DEV_GONE;
24325 		cv_broadcast(&un->un_state_cv);
24326 		mutex_exit(SD_MUTEX(un));
24327 
24328 		return (0);
24329 	}
24330 
24331 	/*
24332 	 * If there was a check condition then sensep points to valid sense data
24333 	 * If status was not a check condition but a reservation or busy status
24334 	 * then the new state is DKIO_NONE
24335 	 */
24336 	if (sensep != NULL) {
24337 		SD_INFO(SD_LOG_COMMON, un,
24338 		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
24339 		    sensep->es_key, sensep->es_add_code, sensep->es_qual_code);
24340 		/* This routine only uses up to 13 bytes of sense data. */
24341 		if (actual_sense_length >= 13) {
24342 			if (sensep->es_key == KEY_UNIT_ATTENTION) {
24343 				if (sensep->es_add_code == 0x28) {
24344 					state = DKIO_INSERTED;
24345 				}
24346 			} else {
24347 				/*
24348 				 * if 02/04/02  means that the host
24349 				 * should send start command. Explicitly
24350 				 * leave the media state as is
24351 				 * (inserted) as the media is inserted
24352 				 * and host has stopped device for PM
24353 				 * reasons. Upon next true read/write
24354 				 * to this media will bring the
24355 				 * device to the right state good for
24356 				 * media access.
24357 				 */
24358 				if ((sensep->es_key == KEY_NOT_READY) &&
24359 				    (sensep->es_add_code == 0x3a)) {
24360 					state = DKIO_EJECTED;
24361 				}
24362 
24363 				/*
24364 				 * If the drivge is busy with an operation
24365 				 * or long write, keep the media in an
24366 				 * inserted state.
24367 				 */
24368 
24369 				if ((sensep->es_key == KEY_NOT_READY) &&
24370 				    (sensep->es_add_code == 0x04) &&
24371 				    ((sensep->es_qual_code == 0x02) ||
24372 				    (sensep->es_qual_code == 0x07) ||
24373 				    (sensep->es_qual_code == 0x08))) {
24374 					state = DKIO_INSERTED;
24375 				}
24376 			}
24377 		}
24378 	} else if ((*((char *)statusp) == STATUS_GOOD) &&
24379 	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
24380 		state = DKIO_INSERTED;
24381 	}
24382 
24383 	SD_TRACE(SD_LOG_COMMON, un,
24384 	    "sd_media_watch_cb: state=%x, specified=%x\n",
24385 	    state, un->un_specified_mediastate);
24386 
24387 	/*
24388 	 * now signal the waiting thread if this is *not* the specified state;
24389 	 * delay the signal if the state is DKIO_INSERTED to allow the target
24390 	 * to recover
24391 	 */
24392 	if (state != un->un_specified_mediastate) {
24393 		un->un_mediastate = state;
24394 		if (state == DKIO_INSERTED) {
24395 			/*
24396 			 * delay the signal to give the drive a chance
24397 			 * to do what it apparently needs to do
24398 			 */
24399 			SD_TRACE(SD_LOG_COMMON, un,
24400 			    "sd_media_watch_cb: delayed cv_broadcast\n");
24401 			if (un->un_dcvb_timeid == NULL) {
24402 				un->un_dcvb_timeid =
24403 				    timeout(sd_delayed_cv_broadcast, un,
24404 				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
24405 			}
24406 		} else {
24407 			SD_TRACE(SD_LOG_COMMON, un,
24408 			    "sd_media_watch_cb: immediate cv_broadcast\n");
24409 			cv_broadcast(&un->un_state_cv);
24410 		}
24411 	}
24412 	mutex_exit(SD_MUTEX(un));
24413 	return (0);
24414 }
24415 
24416 
24417 /*
24418  *    Function: sd_dkio_get_temp
24419  *
24420  * Description: This routine is the driver entry point for handling ioctl
24421  *		requests to get the disk temperature.
24422  *
24423  *   Arguments: dev  - the device number
24424  *		arg  - pointer to user provided dk_temperature structure.
24425  *		flag - this argument is a pass through to ddi_copyxxx()
24426  *		       directly from the mode argument of ioctl().
24427  *
24428  * Return Code: 0
24429  *		EFAULT
24430  *		ENXIO
24431  *		EAGAIN
24432  */
24433 
24434 static int
24435 sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
24436 {
24437 	struct sd_lun		*un = NULL;
24438 	struct dk_temperature	*dktemp = NULL;
24439 	uchar_t			*temperature_page;
24440 	int			rval = 0;
24441 	int			path_flag = SD_PATH_STANDARD;
24442 
24443 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24444 		return (ENXIO);
24445 	}
24446 
24447 	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
24448 
24449 	/* copyin the disk temp argument to get the user flags */
24450 	if (ddi_copyin((void *)arg, dktemp,
24451 	    sizeof (struct dk_temperature), flag) != 0) {
24452 		rval = EFAULT;
24453 		goto done;
24454 	}
24455 
24456 	/* Initialize the temperature to invalid. */
24457 	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24458 	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24459 
24460 	/*
24461 	 * Note: Investigate removing the "bypass pm" semantic.
24462 	 * Can we just bypass PM always?
24463 	 */
24464 	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
24465 		path_flag = SD_PATH_DIRECT;
24466 		ASSERT(!mutex_owned(&un->un_pm_mutex));
24467 		mutex_enter(&un->un_pm_mutex);
24468 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
24469 			/*
24470 			 * If DKT_BYPASS_PM is set, and the drive happens to be
24471 			 * in low power mode, we can not wake it up, Need to
24472 			 * return EAGAIN.
24473 			 */
24474 			mutex_exit(&un->un_pm_mutex);
24475 			rval = EAGAIN;
24476 			goto done;
24477 		} else {
24478 			/*
24479 			 * Indicate to PM the device is busy. This is required
24480 			 * to avoid a race - i.e. the ioctl is issuing a
24481 			 * command and the pm framework brings down the device
24482 			 * to low power mode (possible power cut-off on some
24483 			 * platforms).
24484 			 */
24485 			mutex_exit(&un->un_pm_mutex);
24486 			if (sd_pm_entry(un) != DDI_SUCCESS) {
24487 				rval = EAGAIN;
24488 				goto done;
24489 			}
24490 		}
24491 	}
24492 
24493 	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
24494 
24495 	if ((rval = sd_send_scsi_LOG_SENSE(un, temperature_page,
24496 	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag)) != 0) {
24497 		goto done2;
24498 	}
24499 
24500 	/*
24501 	 * For the current temperature verify that the parameter length is 0x02
24502 	 * and the parameter code is 0x00
24503 	 */
24504 	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
24505 	    (temperature_page[5] == 0x00)) {
24506 		if (temperature_page[9] == 0xFF) {
24507 			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24508 		} else {
24509 			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
24510 		}
24511 	}
24512 
24513 	/*
24514 	 * For the reference temperature verify that the parameter
24515 	 * length is 0x02 and the parameter code is 0x01
24516 	 */
24517 	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
24518 	    (temperature_page[11] == 0x01)) {
24519 		if (temperature_page[15] == 0xFF) {
24520 			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24521 		} else {
24522 			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
24523 		}
24524 	}
24525 
24526 	/* Do the copyout regardless of the temperature commands status. */
24527 	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
24528 	    flag) != 0) {
24529 		rval = EFAULT;
24530 	}
24531 
24532 done2:
24533 	if (path_flag == SD_PATH_DIRECT) {
24534 		sd_pm_exit(un);
24535 	}
24536 
24537 	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
24538 done:
24539 	if (dktemp != NULL) {
24540 		kmem_free(dktemp, sizeof (struct dk_temperature));
24541 	}
24542 
24543 	return (rval);
24544 }
24545 
24546 
24547 /*
24548  *    Function: sd_log_page_supported
24549  *
24550  * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
24551  *		supported log pages.
24552  *
24553  *   Arguments: un -
24554  *		log_page -
24555  *
24556  * Return Code: -1 - on error (log sense is optional and may not be supported).
24557  *		0  - log page not found.
24558  *  		1  - log page found.
24559  */
24560 
24561 static int
24562 sd_log_page_supported(struct sd_lun *un, int log_page)
24563 {
24564 	uchar_t *log_page_data;
24565 	int	i;
24566 	int	match = 0;
24567 	int	log_size;
24568 
24569 	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
24570 
24571 	if (sd_send_scsi_LOG_SENSE(un, log_page_data, 0xFF, 0, 0x01, 0,
24572 	    SD_PATH_DIRECT) != 0) {
24573 		SD_ERROR(SD_LOG_COMMON, un,
24574 		    "sd_log_page_supported: failed log page retrieval\n");
24575 		kmem_free(log_page_data, 0xFF);
24576 		return (-1);
24577 	}
24578 	log_size = log_page_data[3];
24579 
24580 	/*
24581 	 * The list of supported log pages start from the fourth byte. Check
24582 	 * until we run out of log pages or a match is found.
24583 	 */
24584 	for (i = 4; (i < (log_size + 4)) && !match; i++) {
24585 		if (log_page_data[i] == log_page) {
24586 			match++;
24587 		}
24588 	}
24589 	kmem_free(log_page_data, 0xFF);
24590 	return (match);
24591 }
24592 
24593 
24594 /*
24595  *    Function: sd_mhdioc_failfast
24596  *
24597  * Description: This routine is the driver entry point for handling ioctl
24598  *		requests to enable/disable the multihost failfast option.
24599  *		(MHIOCENFAILFAST)
24600  *
24601  *   Arguments: dev	- the device number
24602  *		arg	- user specified probing interval.
24603  *		flag	- this argument is a pass through to ddi_copyxxx()
24604  *			  directly from the mode argument of ioctl().
24605  *
24606  * Return Code: 0
24607  *		EFAULT
24608  *		ENXIO
24609  */
24610 
24611 static int
24612 sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
24613 {
24614 	struct sd_lun	*un = NULL;
24615 	int		mh_time;
24616 	int		rval = 0;
24617 
24618 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24619 		return (ENXIO);
24620 	}
24621 
24622 	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
24623 		return (EFAULT);
24624 
24625 	if (mh_time) {
24626 		mutex_enter(SD_MUTEX(un));
24627 		un->un_resvd_status |= SD_FAILFAST;
24628 		mutex_exit(SD_MUTEX(un));
24629 		/*
24630 		 * If mh_time is INT_MAX, then this ioctl is being used for
24631 		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
24632 		 */
24633 		if (mh_time != INT_MAX) {
24634 			rval = sd_check_mhd(dev, mh_time);
24635 		}
24636 	} else {
24637 		(void) sd_check_mhd(dev, 0);
24638 		mutex_enter(SD_MUTEX(un));
24639 		un->un_resvd_status &= ~SD_FAILFAST;
24640 		mutex_exit(SD_MUTEX(un));
24641 	}
24642 	return (rval);
24643 }
24644 
24645 
24646 /*
24647  *    Function: sd_mhdioc_takeown
24648  *
24649  * Description: This routine is the driver entry point for handling ioctl
24650  *		requests to forcefully acquire exclusive access rights to the
24651  *		multihost disk (MHIOCTKOWN).
24652  *
24653  *   Arguments: dev	- the device number
24654  *		arg	- user provided structure specifying the delay
24655  *			  parameters in milliseconds
24656  *		flag	- this argument is a pass through to ddi_copyxxx()
24657  *			  directly from the mode argument of ioctl().
24658  *
24659  * Return Code: 0
24660  *		EFAULT
24661  *		ENXIO
24662  */
24663 
24664 static int
24665 sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
24666 {
24667 	struct sd_lun		*un = NULL;
24668 	struct mhioctkown	*tkown = NULL;
24669 	int			rval = 0;
24670 
24671 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24672 		return (ENXIO);
24673 	}
24674 
24675 	if (arg != NULL) {
24676 		tkown = (struct mhioctkown *)
24677 		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
24678 		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
24679 		if (rval != 0) {
24680 			rval = EFAULT;
24681 			goto error;
24682 		}
24683 	}
24684 
24685 	rval = sd_take_ownership(dev, tkown);
24686 	mutex_enter(SD_MUTEX(un));
24687 	if (rval == 0) {
24688 		un->un_resvd_status |= SD_RESERVE;
24689 		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
24690 			sd_reinstate_resv_delay =
24691 			    tkown->reinstate_resv_delay * 1000;
24692 		} else {
24693 			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
24694 		}
24695 		/*
24696 		 * Give the scsi_watch routine interval set by
24697 		 * the MHIOCENFAILFAST ioctl precedence here.
24698 		 */
24699 		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
24700 			mutex_exit(SD_MUTEX(un));
24701 			(void) sd_check_mhd(dev, sd_reinstate_resv_delay/1000);
24702 			SD_TRACE(SD_LOG_IOCTL_MHD, un,
24703 			    "sd_mhdioc_takeown : %d\n",
24704 			    sd_reinstate_resv_delay);
24705 		} else {
24706 			mutex_exit(SD_MUTEX(un));
24707 		}
24708 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
24709 		    sd_mhd_reset_notify_cb, (caddr_t)un);
24710 	} else {
24711 		un->un_resvd_status &= ~SD_RESERVE;
24712 		mutex_exit(SD_MUTEX(un));
24713 	}
24714 
24715 error:
24716 	if (tkown != NULL) {
24717 		kmem_free(tkown, sizeof (struct mhioctkown));
24718 	}
24719 	return (rval);
24720 }
24721 
24722 
24723 /*
24724  *    Function: sd_mhdioc_release
24725  *
24726  * Description: This routine is the driver entry point for handling ioctl
24727  *		requests to release exclusive access rights to the multihost
24728  *		disk (MHIOCRELEASE).
24729  *
24730  *   Arguments: dev	- the device number
24731  *
24732  * Return Code: 0
24733  *		ENXIO
24734  */
24735 
24736 static int
24737 sd_mhdioc_release(dev_t dev)
24738 {
24739 	struct sd_lun		*un = NULL;
24740 	timeout_id_t		resvd_timeid_save;
24741 	int			resvd_status_save;
24742 	int			rval = 0;
24743 
24744 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24745 		return (ENXIO);
24746 	}
24747 
24748 	mutex_enter(SD_MUTEX(un));
24749 	resvd_status_save = un->un_resvd_status;
24750 	un->un_resvd_status &=
24751 	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
24752 	if (un->un_resvd_timeid) {
24753 		resvd_timeid_save = un->un_resvd_timeid;
24754 		un->un_resvd_timeid = NULL;
24755 		mutex_exit(SD_MUTEX(un));
24756 		(void) untimeout(resvd_timeid_save);
24757 	} else {
24758 		mutex_exit(SD_MUTEX(un));
24759 	}
24760 
24761 	/*
24762 	 * destroy any pending timeout thread that may be attempting to
24763 	 * reinstate reservation on this device.
24764 	 */
24765 	sd_rmv_resv_reclaim_req(dev);
24766 
24767 	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
24768 		mutex_enter(SD_MUTEX(un));
24769 		if ((un->un_mhd_token) &&
24770 		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
24771 			mutex_exit(SD_MUTEX(un));
24772 			(void) sd_check_mhd(dev, 0);
24773 		} else {
24774 			mutex_exit(SD_MUTEX(un));
24775 		}
24776 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
24777 		    sd_mhd_reset_notify_cb, (caddr_t)un);
24778 	} else {
24779 		/*
24780 		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
24781 		 */
24782 		mutex_enter(SD_MUTEX(un));
24783 		un->un_resvd_status = resvd_status_save;
24784 		mutex_exit(SD_MUTEX(un));
24785 	}
24786 	return (rval);
24787 }
24788 
24789 
24790 /*
24791  *    Function: sd_mhdioc_register_devid
24792  *
24793  * Description: This routine is the driver entry point for handling ioctl
24794  *		requests to register the device id (MHIOCREREGISTERDEVID).
24795  *
24796  *		Note: The implementation for this ioctl has been updated to
24797  *		be consistent with the original PSARC case (1999/357)
24798  *		(4375899, 4241671, 4220005)
24799  *
24800  *   Arguments: dev	- the device number
24801  *
24802  * Return Code: 0
24803  *		ENXIO
24804  */
24805 
24806 static int
24807 sd_mhdioc_register_devid(dev_t dev)
24808 {
24809 	struct sd_lun	*un = NULL;
24810 	int		rval = 0;
24811 
24812 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24813 		return (ENXIO);
24814 	}
24815 
24816 	ASSERT(!mutex_owned(SD_MUTEX(un)));
24817 
24818 	mutex_enter(SD_MUTEX(un));
24819 
24820 	/* If a devid already exists, de-register it */
24821 	if (un->un_devid != NULL) {
24822 		ddi_devid_unregister(SD_DEVINFO(un));
24823 		/*
24824 		 * After unregister devid, needs to free devid memory
24825 		 */
24826 		ddi_devid_free(un->un_devid);
24827 		un->un_devid = NULL;
24828 	}
24829 
24830 	/* Check for reservation conflict */
24831 	mutex_exit(SD_MUTEX(un));
24832 	rval = sd_send_scsi_TEST_UNIT_READY(un, 0);
24833 	mutex_enter(SD_MUTEX(un));
24834 
24835 	switch (rval) {
24836 	case 0:
24837 		sd_register_devid(un, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
24838 		break;
24839 	case EACCES:
24840 		break;
24841 	default:
24842 		rval = EIO;
24843 	}
24844 
24845 	mutex_exit(SD_MUTEX(un));
24846 	return (rval);
24847 }
24848 
24849 
24850 /*
24851  *    Function: sd_mhdioc_inkeys
24852  *
24853  * Description: This routine is the driver entry point for handling ioctl
24854  *		requests to issue the SCSI-3 Persistent In Read Keys command
24855  *		to the device (MHIOCGRP_INKEYS).
24856  *
24857  *   Arguments: dev	- the device number
24858  *		arg	- user provided in_keys structure
24859  *		flag	- this argument is a pass through to ddi_copyxxx()
24860  *			  directly from the mode argument of ioctl().
24861  *
24862  * Return Code: code returned by sd_persistent_reservation_in_read_keys()
24863  *		ENXIO
24864  *		EFAULT
24865  */
24866 
24867 static int
24868 sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
24869 {
24870 	struct sd_lun		*un;
24871 	mhioc_inkeys_t		inkeys;
24872 	int			rval = 0;
24873 
24874 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24875 		return (ENXIO);
24876 	}
24877 
24878 #ifdef _MULTI_DATAMODEL
24879 	switch (ddi_model_convert_from(flag & FMODELS)) {
24880 	case DDI_MODEL_ILP32: {
24881 		struct mhioc_inkeys32	inkeys32;
24882 
24883 		if (ddi_copyin(arg, &inkeys32,
24884 		    sizeof (struct mhioc_inkeys32), flag) != 0) {
24885 			return (EFAULT);
24886 		}
24887 		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
24888 		if ((rval = sd_persistent_reservation_in_read_keys(un,
24889 		    &inkeys, flag)) != 0) {
24890 			return (rval);
24891 		}
24892 		inkeys32.generation = inkeys.generation;
24893 		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
24894 		    flag) != 0) {
24895 			return (EFAULT);
24896 		}
24897 		break;
24898 	}
24899 	case DDI_MODEL_NONE:
24900 		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
24901 		    flag) != 0) {
24902 			return (EFAULT);
24903 		}
24904 		if ((rval = sd_persistent_reservation_in_read_keys(un,
24905 		    &inkeys, flag)) != 0) {
24906 			return (rval);
24907 		}
24908 		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
24909 		    flag) != 0) {
24910 			return (EFAULT);
24911 		}
24912 		break;
24913 	}
24914 
24915 #else /* ! _MULTI_DATAMODEL */
24916 
24917 	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
24918 		return (EFAULT);
24919 	}
24920 	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
24921 	if (rval != 0) {
24922 		return (rval);
24923 	}
24924 	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
24925 		return (EFAULT);
24926 	}
24927 
24928 #endif /* _MULTI_DATAMODEL */
24929 
24930 	return (rval);
24931 }
24932 
24933 
24934 /*
24935  *    Function: sd_mhdioc_inresv
24936  *
24937  * Description: This routine is the driver entry point for handling ioctl
24938  *		requests to issue the SCSI-3 Persistent In Read Reservations
24939  *		command to the device (MHIOCGRP_INKEYS).
24940  *
24941  *   Arguments: dev	- the device number
24942  *		arg	- user provided in_resv structure
24943  *		flag	- this argument is a pass through to ddi_copyxxx()
24944  *			  directly from the mode argument of ioctl().
24945  *
24946  * Return Code: code returned by sd_persistent_reservation_in_read_resv()
24947  *		ENXIO
24948  *		EFAULT
24949  */
24950 
24951 static int
24952 sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
24953 {
24954 	struct sd_lun		*un;
24955 	mhioc_inresvs_t		inresvs;
24956 	int			rval = 0;
24957 
24958 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24959 		return (ENXIO);
24960 	}
24961 
24962 #ifdef _MULTI_DATAMODEL
24963 
24964 	switch (ddi_model_convert_from(flag & FMODELS)) {
24965 	case DDI_MODEL_ILP32: {
24966 		struct mhioc_inresvs32	inresvs32;
24967 
24968 		if (ddi_copyin(arg, &inresvs32,
24969 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
24970 			return (EFAULT);
24971 		}
24972 		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
24973 		if ((rval = sd_persistent_reservation_in_read_resv(un,
24974 		    &inresvs, flag)) != 0) {
24975 			return (rval);
24976 		}
24977 		inresvs32.generation = inresvs.generation;
24978 		if (ddi_copyout(&inresvs32, arg,
24979 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
24980 			return (EFAULT);
24981 		}
24982 		break;
24983 	}
24984 	case DDI_MODEL_NONE:
24985 		if (ddi_copyin(arg, &inresvs,
24986 		    sizeof (mhioc_inresvs_t), flag) != 0) {
24987 			return (EFAULT);
24988 		}
24989 		if ((rval = sd_persistent_reservation_in_read_resv(un,
24990 		    &inresvs, flag)) != 0) {
24991 			return (rval);
24992 		}
24993 		if (ddi_copyout(&inresvs, arg,
24994 		    sizeof (mhioc_inresvs_t), flag) != 0) {
24995 			return (EFAULT);
24996 		}
24997 		break;
24998 	}
24999 
25000 #else /* ! _MULTI_DATAMODEL */
25001 
25002 	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
25003 		return (EFAULT);
25004 	}
25005 	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
25006 	if (rval != 0) {
25007 		return (rval);
25008 	}
25009 	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
25010 		return (EFAULT);
25011 	}
25012 
25013 #endif /* ! _MULTI_DATAMODEL */
25014 
25015 	return (rval);
25016 }
25017 
25018 
25019 /*
25020  * The following routines support the clustering functionality described below
25021  * and implement lost reservation reclaim functionality.
25022  *
25023  * Clustering
25024  * ----------
25025  * The clustering code uses two different, independent forms of SCSI
25026  * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
25027  * Persistent Group Reservations. For any particular disk, it will use either
25028  * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
25029  *
25030  * SCSI-2
25031  * The cluster software takes ownership of a multi-hosted disk by issuing the
25032  * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
25033  * MHIOCRELEASE ioctl.Closely related is the MHIOCENFAILFAST ioctl -- a cluster,
25034  * just after taking ownership of the disk with the MHIOCTKOWN ioctl then issues
25035  * the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the driver. The
25036  * meaning of failfast is that if the driver (on this host) ever encounters the
25037  * scsi error return code RESERVATION_CONFLICT from the device, it should
25038  * immediately panic the host. The motivation for this ioctl is that if this
25039  * host does encounter reservation conflict, the underlying cause is that some
25040  * other host of the cluster has decided that this host is no longer in the
25041  * cluster and has seized control of the disks for itself. Since this host is no
25042  * longer in the cluster, it ought to panic itself. The MHIOCENFAILFAST ioctl
25043  * does two things:
25044  *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
25045  *      error to panic the host
25046  *      (b) it sets up a periodic timer to test whether this host still has
25047  *      "access" (in that no other host has reserved the device):  if the
25048  *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
25049  *      purpose of that periodic timer is to handle scenarios where the host is
25050  *      otherwise temporarily quiescent, temporarily doing no real i/o.
25051  * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
25052  * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
25053  * the device itself.
25054  *
25055  * SCSI-3 PGR
25056  * A direct semantic implementation of the SCSI-3 Persistent Reservation
25057  * facility is supported through the shared multihost disk ioctls
25058  * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
25059  * MHIOCGRP_PREEMPTANDABORT)
25060  *
25061  * Reservation Reclaim:
25062  * --------------------
25063  * To support the lost reservation reclaim operations this driver creates a
25064  * single thread to handle reinstating reservations on all devices that have
25065  * lost reservations sd_resv_reclaim_requests are logged for all devices that
25066  * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
25067  * and the reservation reclaim thread loops through the requests to regain the
25068  * lost reservations.
25069  */
25070 
25071 /*
25072  *    Function: sd_check_mhd()
25073  *
25074  * Description: This function sets up and submits a scsi watch request or
25075  *		terminates an existing watch request. This routine is used in
25076  *		support of reservation reclaim.
25077  *
25078  *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
25079  *			 among multiple watches that share the callback function
25080  *		interval - the number of microseconds specifying the watch
25081  *			   interval for issuing TEST UNIT READY commands. If
25082  *			   set to 0 the watch should be terminated. If the
25083  *			   interval is set to 0 and if the device is required
25084  *			   to hold reservation while disabling failfast, the
25085  *			   watch is restarted with an interval of
25086  *			   reinstate_resv_delay.
25087  *
25088  * Return Code: 0	   - Successful submit/terminate of scsi watch request
25089  *		ENXIO      - Indicates an invalid device was specified
25090  *		EAGAIN     - Unable to submit the scsi watch request
25091  */
25092 
25093 static int
25094 sd_check_mhd(dev_t dev, int interval)
25095 {
25096 	struct sd_lun	*un;
25097 	opaque_t	token;
25098 
25099 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25100 		return (ENXIO);
25101 	}
25102 
25103 	/* is this a watch termination request? */
25104 	if (interval == 0) {
25105 		mutex_enter(SD_MUTEX(un));
25106 		/* if there is an existing watch task then terminate it */
25107 		if (un->un_mhd_token) {
25108 			token = un->un_mhd_token;
25109 			un->un_mhd_token = NULL;
25110 			mutex_exit(SD_MUTEX(un));
25111 			(void) scsi_watch_request_terminate(token,
25112 			    SCSI_WATCH_TERMINATE_WAIT);
25113 			mutex_enter(SD_MUTEX(un));
25114 		} else {
25115 			mutex_exit(SD_MUTEX(un));
25116 			/*
25117 			 * Note: If we return here we don't check for the
25118 			 * failfast case. This is the original legacy
25119 			 * implementation but perhaps we should be checking
25120 			 * the failfast case.
25121 			 */
25122 			return (0);
25123 		}
25124 		/*
25125 		 * If the device is required to hold reservation while
25126 		 * disabling failfast, we need to restart the scsi_watch
25127 		 * routine with an interval of reinstate_resv_delay.
25128 		 */
25129 		if (un->un_resvd_status & SD_RESERVE) {
25130 			interval = sd_reinstate_resv_delay/1000;
25131 		} else {
25132 			/* no failfast so bail */
25133 			mutex_exit(SD_MUTEX(un));
25134 			return (0);
25135 		}
25136 		mutex_exit(SD_MUTEX(un));
25137 	}
25138 
25139 	/*
25140 	 * adjust minimum time interval to 1 second,
25141 	 * and convert from msecs to usecs
25142 	 */
25143 	if (interval > 0 && interval < 1000) {
25144 		interval = 1000;
25145 	}
25146 	interval *= 1000;
25147 
25148 	/*
25149 	 * submit the request to the scsi_watch service
25150 	 */
25151 	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
25152 	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
25153 	if (token == NULL) {
25154 		return (EAGAIN);
25155 	}
25156 
25157 	/*
25158 	 * save token for termination later on
25159 	 */
25160 	mutex_enter(SD_MUTEX(un));
25161 	un->un_mhd_token = token;
25162 	mutex_exit(SD_MUTEX(un));
25163 	return (0);
25164 }
25165 
25166 
25167 /*
25168  *    Function: sd_mhd_watch_cb()
25169  *
25170  * Description: This function is the call back function used by the scsi watch
25171  *		facility. The scsi watch facility sends the "Test Unit Ready"
25172  *		and processes the status. If applicable (i.e. a "Unit Attention"
25173  *		status and automatic "Request Sense" not used) the scsi watch
25174  *		facility will send a "Request Sense" and retrieve the sense data
25175  *		to be passed to this callback function. In either case the
25176  *		automatic "Request Sense" or the facility submitting one, this
25177  *		callback is passed the status and sense data.
25178  *
25179  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25180  *			among multiple watches that share this callback function
25181  *		resultp - scsi watch facility result packet containing scsi
25182  *			  packet, status byte and sense data
25183  *
25184  * Return Code: 0 - continue the watch task
25185  *		non-zero - terminate the watch task
25186  */
25187 
25188 static int
25189 sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
25190 {
25191 	struct sd_lun			*un;
25192 	struct scsi_status		*statusp;
25193 	struct scsi_extended_sense	*sensep;
25194 	struct scsi_pkt			*pkt;
25195 	uchar_t				actual_sense_length;
25196 	dev_t  				dev = (dev_t)arg;
25197 
25198 	ASSERT(resultp != NULL);
25199 	statusp			= resultp->statusp;
25200 	sensep			= resultp->sensep;
25201 	pkt			= resultp->pkt;
25202 	actual_sense_length	= resultp->actual_sense_length;
25203 
25204 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25205 		return (ENXIO);
25206 	}
25207 
25208 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
25209 	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
25210 	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
25211 
25212 	/* Begin processing of the status and/or sense data */
25213 	if (pkt->pkt_reason != CMD_CMPLT) {
25214 		/* Handle the incomplete packet */
25215 		sd_mhd_watch_incomplete(un, pkt);
25216 		return (0);
25217 	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
25218 		if (*((unsigned char *)statusp)
25219 		    == STATUS_RESERVATION_CONFLICT) {
25220 			/*
25221 			 * Handle a reservation conflict by panicking if
25222 			 * configured for failfast or by logging the conflict
25223 			 * and updating the reservation status
25224 			 */
25225 			mutex_enter(SD_MUTEX(un));
25226 			if ((un->un_resvd_status & SD_FAILFAST) &&
25227 			    (sd_failfast_enable)) {
25228 				sd_panic_for_res_conflict(un);
25229 				/*NOTREACHED*/
25230 			}
25231 			SD_INFO(SD_LOG_IOCTL_MHD, un,
25232 			    "sd_mhd_watch_cb: Reservation Conflict\n");
25233 			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
25234 			mutex_exit(SD_MUTEX(un));
25235 		}
25236 	}
25237 
25238 	if (sensep != NULL) {
25239 		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
25240 			mutex_enter(SD_MUTEX(un));
25241 			if ((sensep->es_add_code == SD_SCSI_RESET_SENSE_CODE) &&
25242 			    (un->un_resvd_status & SD_RESERVE)) {
25243 				/*
25244 				 * The additional sense code indicates a power
25245 				 * on or bus device reset has occurred; update
25246 				 * the reservation status.
25247 				 */
25248 				un->un_resvd_status |=
25249 				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25250 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25251 				    "sd_mhd_watch_cb: Lost Reservation\n");
25252 			}
25253 		} else {
25254 			return (0);
25255 		}
25256 	} else {
25257 		mutex_enter(SD_MUTEX(un));
25258 	}
25259 
25260 	if ((un->un_resvd_status & SD_RESERVE) &&
25261 	    (un->un_resvd_status & SD_LOST_RESERVE)) {
25262 		if (un->un_resvd_status & SD_WANT_RESERVE) {
25263 			/*
25264 			 * A reset occurred in between the last probe and this
25265 			 * one so if a timeout is pending cancel it.
25266 			 */
25267 			if (un->un_resvd_timeid) {
25268 				timeout_id_t temp_id = un->un_resvd_timeid;
25269 				un->un_resvd_timeid = NULL;
25270 				mutex_exit(SD_MUTEX(un));
25271 				(void) untimeout(temp_id);
25272 				mutex_enter(SD_MUTEX(un));
25273 			}
25274 			un->un_resvd_status &= ~SD_WANT_RESERVE;
25275 		}
25276 		if (un->un_resvd_timeid == 0) {
25277 			/* Schedule a timeout to handle the lost reservation */
25278 			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
25279 			    (void *)dev,
25280 			    drv_usectohz(sd_reinstate_resv_delay));
25281 		}
25282 	}
25283 	mutex_exit(SD_MUTEX(un));
25284 	return (0);
25285 }
25286 
25287 
25288 /*
25289  *    Function: sd_mhd_watch_incomplete()
25290  *
25291  * Description: This function is used to find out why a scsi pkt sent by the
25292  *		scsi watch facility was not completed. Under some scenarios this
25293  *		routine will return. Otherwise it will send a bus reset to see
25294  *		if the drive is still online.
25295  *
25296  *   Arguments: un  - driver soft state (unit) structure
25297  *		pkt - incomplete scsi pkt
25298  */
25299 
25300 static void
25301 sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
25302 {
25303 	int	be_chatty;
25304 	int	perr;
25305 
25306 	ASSERT(pkt != NULL);
25307 	ASSERT(un != NULL);
25308 	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
25309 	perr		= (pkt->pkt_statistics & STAT_PERR);
25310 
25311 	mutex_enter(SD_MUTEX(un));
25312 	if (un->un_state == SD_STATE_DUMPING) {
25313 		mutex_exit(SD_MUTEX(un));
25314 		return;
25315 	}
25316 
25317 	switch (pkt->pkt_reason) {
25318 	case CMD_UNX_BUS_FREE:
25319 		/*
25320 		 * If we had a parity error that caused the target to drop BSY*,
25321 		 * don't be chatty about it.
25322 		 */
25323 		if (perr && be_chatty) {
25324 			be_chatty = 0;
25325 		}
25326 		break;
25327 	case CMD_TAG_REJECT:
25328 		/*
25329 		 * The SCSI-2 spec states that a tag reject will be sent by the
25330 		 * target if tagged queuing is not supported. A tag reject may
25331 		 * also be sent during certain initialization periods or to
25332 		 * control internal resources. For the latter case the target
25333 		 * may also return Queue Full.
25334 		 *
25335 		 * If this driver receives a tag reject from a target that is
25336 		 * going through an init period or controlling internal
25337 		 * resources tagged queuing will be disabled. This is a less
25338 		 * than optimal behavior but the driver is unable to determine
25339 		 * the target state and assumes tagged queueing is not supported
25340 		 */
25341 		pkt->pkt_flags = 0;
25342 		un->un_tagflags = 0;
25343 
25344 		if (un->un_f_opt_queueing == TRUE) {
25345 			un->un_throttle = min(un->un_throttle, 3);
25346 		} else {
25347 			un->un_throttle = 1;
25348 		}
25349 		mutex_exit(SD_MUTEX(un));
25350 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
25351 		mutex_enter(SD_MUTEX(un));
25352 		break;
25353 	case CMD_INCOMPLETE:
25354 		/*
25355 		 * The transport stopped with an abnormal state, fallthrough and
25356 		 * reset the target and/or bus unless selection did not complete
25357 		 * (indicated by STATE_GOT_BUS) in which case we don't want to
25358 		 * go through a target/bus reset
25359 		 */
25360 		if (pkt->pkt_state == STATE_GOT_BUS) {
25361 			break;
25362 		}
25363 		/*FALLTHROUGH*/
25364 
25365 	case CMD_TIMEOUT:
25366 	default:
25367 		/*
25368 		 * The lun may still be running the command, so a lun reset
25369 		 * should be attempted. If the lun reset fails or cannot be
25370 		 * issued, than try a target reset. Lastly try a bus reset.
25371 		 */
25372 		if ((pkt->pkt_statistics &
25373 		    (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
25374 			int reset_retval = 0;
25375 			mutex_exit(SD_MUTEX(un));
25376 			if (un->un_f_allow_bus_device_reset == TRUE) {
25377 				if (un->un_f_lun_reset_enabled == TRUE) {
25378 					reset_retval =
25379 					    scsi_reset(SD_ADDRESS(un),
25380 					    RESET_LUN);
25381 				}
25382 				if (reset_retval == 0) {
25383 					reset_retval =
25384 					    scsi_reset(SD_ADDRESS(un),
25385 					    RESET_TARGET);
25386 				}
25387 			}
25388 			if (reset_retval == 0) {
25389 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
25390 			}
25391 			mutex_enter(SD_MUTEX(un));
25392 		}
25393 		break;
25394 	}
25395 
25396 	/* A device/bus reset has occurred; update the reservation status. */
25397 	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
25398 	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
25399 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25400 			un->un_resvd_status |=
25401 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25402 			SD_INFO(SD_LOG_IOCTL_MHD, un,
25403 			    "sd_mhd_watch_incomplete: Lost Reservation\n");
25404 		}
25405 	}
25406 
25407 	/*
25408 	 * The disk has been turned off; Update the device state.
25409 	 *
25410 	 * Note: Should we be offlining the disk here?
25411 	 */
25412 	if (pkt->pkt_state == STATE_GOT_BUS) {
25413 		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
25414 		    "Disk not responding to selection\n");
25415 		if (un->un_state != SD_STATE_OFFLINE) {
25416 			New_state(un, SD_STATE_OFFLINE);
25417 		}
25418 	} else if (be_chatty) {
25419 		/*
25420 		 * suppress messages if they are all the same pkt reason;
25421 		 * with TQ, many (up to 256) are returned with the same
25422 		 * pkt_reason
25423 		 */
25424 		if (pkt->pkt_reason != un->un_last_pkt_reason) {
25425 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25426 			    "sd_mhd_watch_incomplete: "
25427 			    "SCSI transport failed: reason '%s'\n",
25428 			    scsi_rname(pkt->pkt_reason));
25429 		}
25430 	}
25431 	un->un_last_pkt_reason = pkt->pkt_reason;
25432 	mutex_exit(SD_MUTEX(un));
25433 }
25434 
25435 
25436 /*
25437  *    Function: sd_sname()
25438  *
25439  * Description: This is a simple little routine to return a string containing
25440  *		a printable description of command status byte for use in
25441  *		logging.
25442  *
25443  *   Arguments: status - pointer to a status byte
25444  *
25445  * Return Code: char * - string containing status description.
25446  */
25447 
25448 static char *
25449 sd_sname(uchar_t status)
25450 {
25451 	switch (status & STATUS_MASK) {
25452 	case STATUS_GOOD:
25453 		return ("good status");
25454 	case STATUS_CHECK:
25455 		return ("check condition");
25456 	case STATUS_MET:
25457 		return ("condition met");
25458 	case STATUS_BUSY:
25459 		return ("busy");
25460 	case STATUS_INTERMEDIATE:
25461 		return ("intermediate");
25462 	case STATUS_INTERMEDIATE_MET:
25463 		return ("intermediate - condition met");
25464 	case STATUS_RESERVATION_CONFLICT:
25465 		return ("reservation_conflict");
25466 	case STATUS_TERMINATED:
25467 		return ("command terminated");
25468 	case STATUS_QFULL:
25469 		return ("queue full");
25470 	default:
25471 		return ("<unknown status>");
25472 	}
25473 }
25474 
25475 
25476 /*
25477  *    Function: sd_mhd_resvd_recover()
25478  *
25479  * Description: This function adds a reservation entry to the
25480  *		sd_resv_reclaim_request list and signals the reservation
25481  *		reclaim thread that there is work pending. If the reservation
25482  *		reclaim thread has not been previously created this function
25483  *		will kick it off.
25484  *
25485  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25486  *			among multiple watches that share this callback function
25487  *
25488  *     Context: This routine is called by timeout() and is run in interrupt
25489  *		context. It must not sleep or call other functions which may
25490  *		sleep.
25491  */
25492 
25493 static void
25494 sd_mhd_resvd_recover(void *arg)
25495 {
25496 	dev_t			dev = (dev_t)arg;
25497 	struct sd_lun		*un;
25498 	struct sd_thr_request	*sd_treq = NULL;
25499 	struct sd_thr_request	*sd_cur = NULL;
25500 	struct sd_thr_request	*sd_prev = NULL;
25501 	int			already_there = 0;
25502 
25503 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25504 		return;
25505 	}
25506 
25507 	mutex_enter(SD_MUTEX(un));
25508 	un->un_resvd_timeid = NULL;
25509 	if (un->un_resvd_status & SD_WANT_RESERVE) {
25510 		/*
25511 		 * There was a reset so don't issue the reserve, allow the
25512 		 * sd_mhd_watch_cb callback function to notice this and
25513 		 * reschedule the timeout for reservation.
25514 		 */
25515 		mutex_exit(SD_MUTEX(un));
25516 		return;
25517 	}
25518 	mutex_exit(SD_MUTEX(un));
25519 
25520 	/*
25521 	 * Add this device to the sd_resv_reclaim_request list and the
25522 	 * sd_resv_reclaim_thread should take care of the rest.
25523 	 *
25524 	 * Note: We can't sleep in this context so if the memory allocation
25525 	 * fails allow the sd_mhd_watch_cb callback function to notice this and
25526 	 * reschedule the timeout for reservation.  (4378460)
25527 	 */
25528 	sd_treq = (struct sd_thr_request *)
25529 	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
25530 	if (sd_treq == NULL) {
25531 		return;
25532 	}
25533 
25534 	sd_treq->sd_thr_req_next = NULL;
25535 	sd_treq->dev = dev;
25536 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25537 	if (sd_tr.srq_thr_req_head == NULL) {
25538 		sd_tr.srq_thr_req_head = sd_treq;
25539 	} else {
25540 		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
25541 		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
25542 			if (sd_cur->dev == dev) {
25543 				/*
25544 				 * already in Queue so don't log
25545 				 * another request for the device
25546 				 */
25547 				already_there = 1;
25548 				break;
25549 			}
25550 			sd_prev = sd_cur;
25551 		}
25552 		if (!already_there) {
25553 			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
25554 			    "logging request for %lx\n", dev);
25555 			sd_prev->sd_thr_req_next = sd_treq;
25556 		} else {
25557 			kmem_free(sd_treq, sizeof (struct sd_thr_request));
25558 		}
25559 	}
25560 
25561 	/*
25562 	 * Create a kernel thread to do the reservation reclaim and free up this
25563 	 * thread. We cannot block this thread while we go away to do the
25564 	 * reservation reclaim
25565 	 */
25566 	if (sd_tr.srq_resv_reclaim_thread == NULL)
25567 		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
25568 		    sd_resv_reclaim_thread, NULL,
25569 		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
25570 
25571 	/* Tell the reservation reclaim thread that it has work to do */
25572 	cv_signal(&sd_tr.srq_resv_reclaim_cv);
25573 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25574 }
25575 
25576 /*
25577  *    Function: sd_resv_reclaim_thread()
25578  *
25579  * Description: This function implements the reservation reclaim operations
25580  *
25581  *   Arguments: arg - the device 'dev_t' is used for context to discriminate
25582  *		      among multiple watches that share this callback function
25583  */
25584 
25585 static void
25586 sd_resv_reclaim_thread()
25587 {
25588 	struct sd_lun		*un;
25589 	struct sd_thr_request	*sd_mhreq;
25590 
25591 	/* Wait for work */
25592 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25593 	if (sd_tr.srq_thr_req_head == NULL) {
25594 		cv_wait(&sd_tr.srq_resv_reclaim_cv,
25595 		    &sd_tr.srq_resv_reclaim_mutex);
25596 	}
25597 
25598 	/* Loop while we have work */
25599 	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
25600 		un = ddi_get_soft_state(sd_state,
25601 		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
25602 		if (un == NULL) {
25603 			/*
25604 			 * softstate structure is NULL so just
25605 			 * dequeue the request and continue
25606 			 */
25607 			sd_tr.srq_thr_req_head =
25608 			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25609 			kmem_free(sd_tr.srq_thr_cur_req,
25610 			    sizeof (struct sd_thr_request));
25611 			continue;
25612 		}
25613 
25614 		/* dequeue the request */
25615 		sd_mhreq = sd_tr.srq_thr_cur_req;
25616 		sd_tr.srq_thr_req_head =
25617 		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25618 		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25619 
25620 		/*
25621 		 * Reclaim reservation only if SD_RESERVE is still set. There
25622 		 * may have been a call to MHIOCRELEASE before we got here.
25623 		 */
25624 		mutex_enter(SD_MUTEX(un));
25625 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25626 			/*
25627 			 * Note: The SD_LOST_RESERVE flag is cleared before
25628 			 * reclaiming the reservation. If this is done after the
25629 			 * call to sd_reserve_release a reservation loss in the
25630 			 * window between pkt completion of reserve cmd and
25631 			 * mutex_enter below may not be recognized
25632 			 */
25633 			un->un_resvd_status &= ~SD_LOST_RESERVE;
25634 			mutex_exit(SD_MUTEX(un));
25635 
25636 			if (sd_reserve_release(sd_mhreq->dev,
25637 			    SD_RESERVE) == 0) {
25638 				mutex_enter(SD_MUTEX(un));
25639 				un->un_resvd_status |= SD_RESERVE;
25640 				mutex_exit(SD_MUTEX(un));
25641 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25642 				    "sd_resv_reclaim_thread: "
25643 				    "Reservation Recovered\n");
25644 			} else {
25645 				mutex_enter(SD_MUTEX(un));
25646 				un->un_resvd_status |= SD_LOST_RESERVE;
25647 				mutex_exit(SD_MUTEX(un));
25648 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25649 				    "sd_resv_reclaim_thread: Failed "
25650 				    "Reservation Recovery\n");
25651 			}
25652 		} else {
25653 			mutex_exit(SD_MUTEX(un));
25654 		}
25655 		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25656 		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
25657 		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25658 		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
25659 		/*
25660 		 * wakeup the destroy thread if anyone is waiting on
25661 		 * us to complete.
25662 		 */
25663 		cv_signal(&sd_tr.srq_inprocess_cv);
25664 		SD_TRACE(SD_LOG_IOCTL_MHD, un,
25665 		    "sd_resv_reclaim_thread: cv_signalling current request \n");
25666 	}
25667 
25668 	/*
25669 	 * cleanup the sd_tr structure now that this thread will not exist
25670 	 */
25671 	ASSERT(sd_tr.srq_thr_req_head == NULL);
25672 	ASSERT(sd_tr.srq_thr_cur_req == NULL);
25673 	sd_tr.srq_resv_reclaim_thread = NULL;
25674 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25675 	thread_exit();
25676 }
25677 
25678 
25679 /*
25680  *    Function: sd_rmv_resv_reclaim_req()
25681  *
25682  * Description: This function removes any pending reservation reclaim requests
25683  *		for the specified device.
25684  *
25685  *   Arguments: dev - the device 'dev_t'
25686  */
25687 
25688 static void
25689 sd_rmv_resv_reclaim_req(dev_t dev)
25690 {
25691 	struct sd_thr_request *sd_mhreq;
25692 	struct sd_thr_request *sd_prev;
25693 
25694 	/* Remove a reservation reclaim request from the list */
25695 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25696 	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
25697 		/*
25698 		 * We are attempting to reinstate reservation for
25699 		 * this device. We wait for sd_reserve_release()
25700 		 * to return before we return.
25701 		 */
25702 		cv_wait(&sd_tr.srq_inprocess_cv,
25703 		    &sd_tr.srq_resv_reclaim_mutex);
25704 	} else {
25705 		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
25706 		if (sd_mhreq && sd_mhreq->dev == dev) {
25707 			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
25708 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25709 			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25710 			return;
25711 		}
25712 		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
25713 			if (sd_mhreq && sd_mhreq->dev == dev) {
25714 				break;
25715 			}
25716 			sd_prev = sd_mhreq;
25717 		}
25718 		if (sd_mhreq != NULL) {
25719 			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
25720 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25721 		}
25722 	}
25723 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25724 }
25725 
25726 
25727 /*
25728  *    Function: sd_mhd_reset_notify_cb()
25729  *
25730  * Description: This is a call back function for scsi_reset_notify. This
25731  *		function updates the softstate reserved status and logs the
25732  *		reset. The driver scsi watch facility callback function
25733  *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
25734  *		will reclaim the reservation.
25735  *
25736  *   Arguments: arg  - driver soft state (unit) structure
25737  */
25738 
25739 static void
25740 sd_mhd_reset_notify_cb(caddr_t arg)
25741 {
25742 	struct sd_lun *un = (struct sd_lun *)arg;
25743 
25744 	mutex_enter(SD_MUTEX(un));
25745 	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25746 		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
25747 		SD_INFO(SD_LOG_IOCTL_MHD, un,
25748 		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
25749 	}
25750 	mutex_exit(SD_MUTEX(un));
25751 }
25752 
25753 
25754 /*
25755  *    Function: sd_take_ownership()
25756  *
25757  * Description: This routine implements an algorithm to achieve a stable
25758  *		reservation on disks which don't implement priority reserve,
25759  *		and makes sure that other host lose re-reservation attempts.
25760  *		This algorithm contains of a loop that keeps issuing the RESERVE
25761  *		for some period of time (min_ownership_delay, default 6 seconds)
25762  *		During that loop, it looks to see if there has been a bus device
25763  *		reset or bus reset (both of which cause an existing reservation
25764  *		to be lost). If the reservation is lost issue RESERVE until a
25765  *		period of min_ownership_delay with no resets has gone by, or
25766  *		until max_ownership_delay has expired. This loop ensures that
25767  *		the host really did manage to reserve the device, in spite of
25768  *		resets. The looping for min_ownership_delay (default six
25769  *		seconds) is important to early generation clustering products,
25770  *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
25771  *		MHIOCENFAILFAST periodic timer of two seconds. By having
25772  *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
25773  *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
25774  *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
25775  *		have already noticed, via the MHIOCENFAILFAST polling, that it
25776  *		no longer "owns" the disk and will have panicked itself.  Thus,
25777  *		the host issuing the MHIOCTKOWN is assured (with timing
25778  *		dependencies) that by the time it actually starts to use the
25779  *		disk for real work, the old owner is no longer accessing it.
25780  *
25781  *		min_ownership_delay is the minimum amount of time for which the
25782  *		disk must be reserved continuously devoid of resets before the
25783  *		MHIOCTKOWN ioctl will return success.
25784  *
25785  *		max_ownership_delay indicates the amount of time by which the
25786  *		take ownership should succeed or timeout with an error.
25787  *
25788  *   Arguments: dev - the device 'dev_t'
25789  *		*p  - struct containing timing info.
25790  *
25791  * Return Code: 0 for success or error code
25792  */
25793 
25794 static int
25795 sd_take_ownership(dev_t dev, struct mhioctkown *p)
25796 {
25797 	struct sd_lun	*un;
25798 	int		rval;
25799 	int		err;
25800 	int		reservation_count   = 0;
25801 	int		min_ownership_delay =  6000000; /* in usec */
25802 	int		max_ownership_delay = 30000000; /* in usec */
25803 	clock_t		start_time;	/* starting time of this algorithm */
25804 	clock_t		end_time;	/* time limit for giving up */
25805 	clock_t		ownership_time;	/* time limit for stable ownership */
25806 	clock_t		current_time;
25807 	clock_t		previous_current_time;
25808 
25809 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25810 		return (ENXIO);
25811 	}
25812 
25813 	/*
25814 	 * Attempt a device reservation. A priority reservation is requested.
25815 	 */
25816 	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
25817 	    != SD_SUCCESS) {
25818 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
25819 		    "sd_take_ownership: return(1)=%d\n", rval);
25820 		return (rval);
25821 	}
25822 
25823 	/* Update the softstate reserved status to indicate the reservation */
25824 	mutex_enter(SD_MUTEX(un));
25825 	un->un_resvd_status |= SD_RESERVE;
25826 	un->un_resvd_status &=
25827 	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
25828 	mutex_exit(SD_MUTEX(un));
25829 
25830 	if (p != NULL) {
25831 		if (p->min_ownership_delay != 0) {
25832 			min_ownership_delay = p->min_ownership_delay * 1000;
25833 		}
25834 		if (p->max_ownership_delay != 0) {
25835 			max_ownership_delay = p->max_ownership_delay * 1000;
25836 		}
25837 	}
25838 	SD_INFO(SD_LOG_IOCTL_MHD, un,
25839 	    "sd_take_ownership: min, max delays: %d, %d\n",
25840 	    min_ownership_delay, max_ownership_delay);
25841 
25842 	start_time = ddi_get_lbolt();
25843 	current_time	= start_time;
25844 	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
25845 	end_time	= start_time + drv_usectohz(max_ownership_delay);
25846 
25847 	while (current_time - end_time < 0) {
25848 		delay(drv_usectohz(500000));
25849 
25850 		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
25851 			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
25852 				mutex_enter(SD_MUTEX(un));
25853 				rval = (un->un_resvd_status &
25854 				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
25855 				mutex_exit(SD_MUTEX(un));
25856 				break;
25857 			}
25858 		}
25859 		previous_current_time = current_time;
25860 		current_time = ddi_get_lbolt();
25861 		mutex_enter(SD_MUTEX(un));
25862 		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
25863 			ownership_time = ddi_get_lbolt() +
25864 			    drv_usectohz(min_ownership_delay);
25865 			reservation_count = 0;
25866 		} else {
25867 			reservation_count++;
25868 		}
25869 		un->un_resvd_status |= SD_RESERVE;
25870 		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
25871 		mutex_exit(SD_MUTEX(un));
25872 
25873 		SD_INFO(SD_LOG_IOCTL_MHD, un,
25874 		    "sd_take_ownership: ticks for loop iteration=%ld, "
25875 		    "reservation=%s\n", (current_time - previous_current_time),
25876 		    reservation_count ? "ok" : "reclaimed");
25877 
25878 		if (current_time - ownership_time >= 0 &&
25879 		    reservation_count >= 4) {
25880 			rval = 0; /* Achieved a stable ownership */
25881 			break;
25882 		}
25883 		if (current_time - end_time >= 0) {
25884 			rval = EACCES; /* No ownership in max possible time */
25885 			break;
25886 		}
25887 	}
25888 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
25889 	    "sd_take_ownership: return(2)=%d\n", rval);
25890 	return (rval);
25891 }
25892 
25893 
25894 /*
25895  *    Function: sd_reserve_release()
25896  *
25897  * Description: This function builds and sends scsi RESERVE, RELEASE, and
25898  *		PRIORITY RESERVE commands based on a user specified command type
25899  *
25900  *   Arguments: dev - the device 'dev_t'
25901  *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
25902  *		      SD_RESERVE, SD_RELEASE
25903  *
25904  * Return Code: 0 or Error Code
25905  */
25906 
25907 static int
25908 sd_reserve_release(dev_t dev, int cmd)
25909 {
25910 	struct uscsi_cmd	*com = NULL;
25911 	struct sd_lun		*un = NULL;
25912 	char			cdb[CDB_GROUP0];
25913 	int			rval;
25914 
25915 	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
25916 	    (cmd == SD_PRIORITY_RESERVE));
25917 
25918 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25919 		return (ENXIO);
25920 	}
25921 
25922 	/* instantiate and initialize the command and cdb */
25923 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25924 	bzero(cdb, CDB_GROUP0);
25925 	com->uscsi_flags   = USCSI_SILENT;
25926 	com->uscsi_timeout = un->un_reserve_release_time;
25927 	com->uscsi_cdblen  = CDB_GROUP0;
25928 	com->uscsi_cdb	   = cdb;
25929 	if (cmd == SD_RELEASE) {
25930 		cdb[0] = SCMD_RELEASE;
25931 	} else {
25932 		cdb[0] = SCMD_RESERVE;
25933 	}
25934 
25935 	/* Send the command. */
25936 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
25937 	    UIO_SYSSPACE, SD_PATH_STANDARD);
25938 
25939 	/*
25940 	 * "break" a reservation that is held by another host, by issuing a
25941 	 * reset if priority reserve is desired, and we could not get the
25942 	 * device.
25943 	 */
25944 	if ((cmd == SD_PRIORITY_RESERVE) &&
25945 	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
25946 		/*
25947 		 * First try to reset the LUN. If we cannot, then try a target
25948 		 * reset, followed by a bus reset if the target reset fails.
25949 		 */
25950 		int reset_retval = 0;
25951 		if (un->un_f_lun_reset_enabled == TRUE) {
25952 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
25953 		}
25954 		if (reset_retval == 0) {
25955 			/* The LUN reset either failed or was not issued */
25956 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
25957 		}
25958 		if ((reset_retval == 0) &&
25959 		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
25960 			rval = EIO;
25961 			kmem_free(com, sizeof (*com));
25962 			return (rval);
25963 		}
25964 
25965 		bzero(com, sizeof (struct uscsi_cmd));
25966 		com->uscsi_flags   = USCSI_SILENT;
25967 		com->uscsi_cdb	   = cdb;
25968 		com->uscsi_cdblen  = CDB_GROUP0;
25969 		com->uscsi_timeout = 5;
25970 
25971 		/*
25972 		 * Reissue the last reserve command, this time without request
25973 		 * sense.  Assume that it is just a regular reserve command.
25974 		 */
25975 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
25976 		    UIO_SYSSPACE, SD_PATH_STANDARD);
25977 	}
25978 
25979 	/* Return an error if still getting a reservation conflict. */
25980 	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
25981 		rval = EACCES;
25982 	}
25983 
25984 	kmem_free(com, sizeof (*com));
25985 	return (rval);
25986 }
25987 
25988 
25989 #define	SD_NDUMP_RETRIES	12
25990 /*
25991  *	System Crash Dump routine
25992  */
25993 
25994 static int
25995 sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
25996 {
25997 	int		instance;
25998 	int		partition;
25999 	int		i;
26000 	int		err;
26001 	struct sd_lun	*un;
26002 	struct dk_map	*lp;
26003 	struct scsi_pkt *wr_pktp;
26004 	struct buf	*wr_bp;
26005 	struct buf	wr_buf;
26006 	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
26007 	daddr_t		tgt_blkno;	/* rmw - blkno for target */
26008 	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
26009 	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
26010 	size_t		io_start_offset;
26011 	int		doing_rmw = FALSE;
26012 	int		rval;
26013 #if defined(__i386) || defined(__amd64)
26014 	ssize_t dma_resid;
26015 	daddr_t oblkno;
26016 #endif
26017 
26018 	instance = SDUNIT(dev);
26019 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
26020 	    (!un->un_f_geometry_is_valid) || ISCD(un)) {
26021 		return (ENXIO);
26022 	}
26023 
26024 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
26025 
26026 	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
26027 
26028 	partition = SDPART(dev);
26029 	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
26030 
26031 	/* Validate blocks to dump at against partition size. */
26032 	lp = &un->un_map[partition];
26033 	if ((blkno + nblk) > lp->dkl_nblk) {
26034 		SD_TRACE(SD_LOG_DUMP, un,
26035 		    "sddump: dump range larger than partition: "
26036 		    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
26037 		    blkno, nblk, lp->dkl_nblk);
26038 		return (EINVAL);
26039 	}
26040 
26041 	mutex_enter(&un->un_pm_mutex);
26042 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
26043 		struct scsi_pkt *start_pktp;
26044 
26045 		mutex_exit(&un->un_pm_mutex);
26046 
26047 		/*
26048 		 * use pm framework to power on HBA 1st
26049 		 */
26050 		(void) pm_raise_power(SD_DEVINFO(un), 0, SD_SPINDLE_ON);
26051 
26052 		/*
26053 		 * Dump no long uses sdpower to power on a device, it's
26054 		 * in-line here so it can be done in polled mode.
26055 		 */
26056 
26057 		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
26058 
26059 		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
26060 		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
26061 
26062 		if (start_pktp == NULL) {
26063 			/* We were not given a SCSI packet, fail. */
26064 			return (EIO);
26065 		}
26066 		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
26067 		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
26068 		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
26069 		start_pktp->pkt_flags = FLAG_NOINTR;
26070 
26071 		mutex_enter(SD_MUTEX(un));
26072 		SD_FILL_SCSI1_LUN(un, start_pktp);
26073 		mutex_exit(SD_MUTEX(un));
26074 		/*
26075 		 * Scsi_poll returns 0 (success) if the command completes and
26076 		 * the status block is STATUS_GOOD.
26077 		 */
26078 		if (sd_scsi_poll(un, start_pktp) != 0) {
26079 			scsi_destroy_pkt(start_pktp);
26080 			return (EIO);
26081 		}
26082 		scsi_destroy_pkt(start_pktp);
26083 		(void) sd_ddi_pm_resume(un);
26084 	} else {
26085 		mutex_exit(&un->un_pm_mutex);
26086 	}
26087 
26088 	mutex_enter(SD_MUTEX(un));
26089 	un->un_throttle = 0;
26090 
26091 	/*
26092 	 * The first time through, reset the specific target device.
26093 	 * However, when cpr calls sddump we know that sd is in a
26094 	 * a good state so no bus reset is required.
26095 	 * Clear sense data via Request Sense cmd.
26096 	 * In sddump we don't care about allow_bus_device_reset anymore
26097 	 */
26098 
26099 	if ((un->un_state != SD_STATE_SUSPENDED) &&
26100 	    (un->un_state != SD_STATE_DUMPING)) {
26101 
26102 		New_state(un, SD_STATE_DUMPING);
26103 
26104 		if (un->un_f_is_fibre == FALSE) {
26105 			mutex_exit(SD_MUTEX(un));
26106 			/*
26107 			 * Attempt a bus reset for parallel scsi.
26108 			 *
26109 			 * Note: A bus reset is required because on some host
26110 			 * systems (i.e. E420R) a bus device reset is
26111 			 * insufficient to reset the state of the target.
26112 			 *
26113 			 * Note: Don't issue the reset for fibre-channel,
26114 			 * because this tends to hang the bus (loop) for
26115 			 * too long while everyone is logging out and in
26116 			 * and the deadman timer for dumping will fire
26117 			 * before the dump is complete.
26118 			 */
26119 			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
26120 				mutex_enter(SD_MUTEX(un));
26121 				Restore_state(un);
26122 				mutex_exit(SD_MUTEX(un));
26123 				return (EIO);
26124 			}
26125 
26126 			/* Delay to give the device some recovery time. */
26127 			drv_usecwait(10000);
26128 
26129 			if (sd_send_polled_RQS(un) == SD_FAILURE) {
26130 				SD_INFO(SD_LOG_DUMP, un,
26131 					"sddump: sd_send_polled_RQS failed\n");
26132 			}
26133 			mutex_enter(SD_MUTEX(un));
26134 		}
26135 	}
26136 
26137 	/*
26138 	 * Convert the partition-relative block number to a
26139 	 * disk physical block number.
26140 	 */
26141 	blkno += un->un_offset[partition];
26142 	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
26143 
26144 
26145 	/*
26146 	 * Check if the device has a non-512 block size.
26147 	 */
26148 	wr_bp = NULL;
26149 	if (NOT_DEVBSIZE(un)) {
26150 		tgt_byte_offset = blkno * un->un_sys_blocksize;
26151 		tgt_byte_count = nblk * un->un_sys_blocksize;
26152 		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
26153 		    (tgt_byte_count % un->un_tgt_blocksize)) {
26154 			doing_rmw = TRUE;
26155 			/*
26156 			 * Calculate the block number and number of block
26157 			 * in terms of the media block size.
26158 			 */
26159 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
26160 			tgt_nblk =
26161 			    ((tgt_byte_offset + tgt_byte_count +
26162 				(un->un_tgt_blocksize - 1)) /
26163 				un->un_tgt_blocksize) - tgt_blkno;
26164 
26165 			/*
26166 			 * Invoke the routine which is going to do read part
26167 			 * of read-modify-write.
26168 			 * Note that this routine returns a pointer to
26169 			 * a valid bp in wr_bp.
26170 			 */
26171 			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
26172 			    &wr_bp);
26173 			if (err) {
26174 				mutex_exit(SD_MUTEX(un));
26175 				return (err);
26176 			}
26177 			/*
26178 			 * Offset is being calculated as -
26179 			 * (original block # * system block size) -
26180 			 * (new block # * target block size)
26181 			 */
26182 			io_start_offset =
26183 			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
26184 			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
26185 
26186 			ASSERT((io_start_offset >= 0) &&
26187 			    (io_start_offset < un->un_tgt_blocksize));
26188 			/*
26189 			 * Do the modify portion of read modify write.
26190 			 */
26191 			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
26192 			    (size_t)nblk * un->un_sys_blocksize);
26193 		} else {
26194 			doing_rmw = FALSE;
26195 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
26196 			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
26197 		}
26198 
26199 		/* Convert blkno and nblk to target blocks */
26200 		blkno = tgt_blkno;
26201 		nblk = tgt_nblk;
26202 	} else {
26203 		wr_bp = &wr_buf;
26204 		bzero(wr_bp, sizeof (struct buf));
26205 		wr_bp->b_flags		= B_BUSY;
26206 		wr_bp->b_un.b_addr	= addr;
26207 		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
26208 		wr_bp->b_resid		= 0;
26209 	}
26210 
26211 	mutex_exit(SD_MUTEX(un));
26212 
26213 	/*
26214 	 * Obtain a SCSI packet for the write command.
26215 	 * It should be safe to call the allocator here without
26216 	 * worrying about being locked for DVMA mapping because
26217 	 * the address we're passed is already a DVMA mapping
26218 	 *
26219 	 * We are also not going to worry about semaphore ownership
26220 	 * in the dump buffer. Dumping is single threaded at present.
26221 	 */
26222 
26223 	wr_pktp = NULL;
26224 
26225 #if defined(__i386) || defined(__amd64)
26226 	dma_resid = wr_bp->b_bcount;
26227 	oblkno = blkno;
26228 	while (dma_resid != 0) {
26229 #endif
26230 
26231 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26232 		wr_bp->b_flags &= ~B_ERROR;
26233 
26234 #if defined(__i386) || defined(__amd64)
26235 		blkno = oblkno +
26236 			((wr_bp->b_bcount - dma_resid) /
26237 			    un->un_tgt_blocksize);
26238 		nblk = dma_resid / un->un_tgt_blocksize;
26239 
26240 		if (wr_pktp) {
26241 			/* Partial DMA transfers after initial transfer */
26242 			rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
26243 			    blkno, nblk);
26244 		} else {
26245 			/* Initial transfer */
26246 			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26247 			    un->un_pkt_flags, NULL_FUNC, NULL,
26248 			    blkno, nblk);
26249 		}
26250 #else
26251 		rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26252 		    0, NULL_FUNC, NULL, blkno, nblk);
26253 #endif
26254 
26255 		if (rval == 0) {
26256 			/* We were given a SCSI packet, continue. */
26257 			break;
26258 		}
26259 
26260 		if (i == 0) {
26261 			if (wr_bp->b_flags & B_ERROR) {
26262 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26263 				    "no resources for dumping; "
26264 				    "error code: 0x%x, retrying",
26265 				    geterror(wr_bp));
26266 			} else {
26267 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26268 				    "no resources for dumping; retrying");
26269 			}
26270 		} else if (i != (SD_NDUMP_RETRIES - 1)) {
26271 			if (wr_bp->b_flags & B_ERROR) {
26272 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26273 				    "no resources for dumping; error code: "
26274 				    "0x%x, retrying\n", geterror(wr_bp));
26275 			}
26276 		} else {
26277 			if (wr_bp->b_flags & B_ERROR) {
26278 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26279 				    "no resources for dumping; "
26280 				    "error code: 0x%x, retries failed, "
26281 				    "giving up.\n", geterror(wr_bp));
26282 			} else {
26283 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26284 				    "no resources for dumping; "
26285 				    "retries failed, giving up.\n");
26286 			}
26287 			mutex_enter(SD_MUTEX(un));
26288 			Restore_state(un);
26289 			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
26290 				mutex_exit(SD_MUTEX(un));
26291 				scsi_free_consistent_buf(wr_bp);
26292 			} else {
26293 				mutex_exit(SD_MUTEX(un));
26294 			}
26295 			return (EIO);
26296 		}
26297 		drv_usecwait(10000);
26298 	}
26299 
26300 #if defined(__i386) || defined(__amd64)
26301 	/*
26302 	 * save the resid from PARTIAL_DMA
26303 	 */
26304 	dma_resid = wr_pktp->pkt_resid;
26305 	if (dma_resid != 0)
26306 		nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
26307 	wr_pktp->pkt_resid = 0;
26308 #endif
26309 
26310 	/* SunBug 1222170 */
26311 	wr_pktp->pkt_flags = FLAG_NOINTR;
26312 
26313 	err = EIO;
26314 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26315 
26316 		/*
26317 		 * Scsi_poll returns 0 (success) if the command completes and
26318 		 * the status block is STATUS_GOOD.  We should only check
26319 		 * errors if this condition is not true.  Even then we should
26320 		 * send our own request sense packet only if we have a check
26321 		 * condition and auto request sense has not been performed by
26322 		 * the hba.
26323 		 */
26324 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
26325 
26326 		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
26327 		    (wr_pktp->pkt_resid == 0)) {
26328 			err = SD_SUCCESS;
26329 			break;
26330 		}
26331 
26332 		/*
26333 		 * Check CMD_DEV_GONE 1st, give up if device is gone.
26334 		 */
26335 		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
26336 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26337 			    "Device is gone\n");
26338 			break;
26339 		}
26340 
26341 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
26342 			SD_INFO(SD_LOG_DUMP, un,
26343 			    "sddump: write failed with CHECK, try # %d\n", i);
26344 			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
26345 				(void) sd_send_polled_RQS(un);
26346 			}
26347 
26348 			continue;
26349 		}
26350 
26351 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
26352 			int reset_retval = 0;
26353 
26354 			SD_INFO(SD_LOG_DUMP, un,
26355 			    "sddump: write failed with BUSY, try # %d\n", i);
26356 
26357 			if (un->un_f_lun_reset_enabled == TRUE) {
26358 				reset_retval = scsi_reset(SD_ADDRESS(un),
26359 				    RESET_LUN);
26360 			}
26361 			if (reset_retval == 0) {
26362 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26363 			}
26364 			(void) sd_send_polled_RQS(un);
26365 
26366 		} else {
26367 			SD_INFO(SD_LOG_DUMP, un,
26368 			    "sddump: write failed with 0x%x, try # %d\n",
26369 			    SD_GET_PKT_STATUS(wr_pktp), i);
26370 			mutex_enter(SD_MUTEX(un));
26371 			sd_reset_target(un, wr_pktp);
26372 			mutex_exit(SD_MUTEX(un));
26373 		}
26374 
26375 		/*
26376 		 * If we are not getting anywhere with lun/target resets,
26377 		 * let's reset the bus.
26378 		 */
26379 		if (i == SD_NDUMP_RETRIES/2) {
26380 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
26381 			(void) sd_send_polled_RQS(un);
26382 		}
26383 
26384 	}
26385 #if defined(__i386) || defined(__amd64)
26386 	}	/* dma_resid */
26387 #endif
26388 
26389 	scsi_destroy_pkt(wr_pktp);
26390 	mutex_enter(SD_MUTEX(un));
26391 	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
26392 		mutex_exit(SD_MUTEX(un));
26393 		scsi_free_consistent_buf(wr_bp);
26394 	} else {
26395 		mutex_exit(SD_MUTEX(un));
26396 	}
26397 	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
26398 	return (err);
26399 }
26400 
26401 /*
26402  *    Function: sd_scsi_poll()
26403  *
26404  * Description: This is a wrapper for the scsi_poll call.
26405  *
26406  *   Arguments: sd_lun - The unit structure
26407  *              scsi_pkt - The scsi packet being sent to the device.
26408  *
26409  * Return Code: 0 - Command completed successfully with good status
26410  *             -1 - Command failed.  This could indicate a check condition
26411  *                  or other status value requiring recovery action.
26412  *
26413  */
26414 
26415 static int
26416 sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
26417 {
26418 	int status;
26419 
26420 	ASSERT(un != NULL);
26421 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26422 	ASSERT(pktp != NULL);
26423 
26424 	status = SD_SUCCESS;
26425 
26426 	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
26427 		pktp->pkt_flags |= un->un_tagflags;
26428 		pktp->pkt_flags &= ~FLAG_NODISCON;
26429 	}
26430 
26431 	status = sd_ddi_scsi_poll(pktp);
26432 	/*
26433 	 * Scsi_poll returns 0 (success) if the command completes and the
26434 	 * status block is STATUS_GOOD.  We should only check errors if this
26435 	 * condition is not true.  Even then we should send our own request
26436 	 * sense packet only if we have a check condition and auto
26437 	 * request sense has not been performed by the hba.
26438 	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
26439 	 */
26440 	if ((status != SD_SUCCESS) &&
26441 	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
26442 	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
26443 	    (pktp->pkt_reason != CMD_DEV_GONE))
26444 		(void) sd_send_polled_RQS(un);
26445 
26446 	return (status);
26447 }
26448 
26449 /*
26450  *    Function: sd_send_polled_RQS()
26451  *
26452  * Description: This sends the request sense command to a device.
26453  *
26454  *   Arguments: sd_lun - The unit structure
26455  *
26456  * Return Code: 0 - Command completed successfully with good status
26457  *             -1 - Command failed.
26458  *
26459  */
26460 
26461 static int
26462 sd_send_polled_RQS(struct sd_lun *un)
26463 {
26464 	int	ret_val;
26465 	struct	scsi_pkt	*rqs_pktp;
26466 	struct	buf		*rqs_bp;
26467 
26468 	ASSERT(un != NULL);
26469 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26470 
26471 	ret_val = SD_SUCCESS;
26472 
26473 	rqs_pktp = un->un_rqs_pktp;
26474 	rqs_bp	 = un->un_rqs_bp;
26475 
26476 	mutex_enter(SD_MUTEX(un));
26477 
26478 	if (un->un_sense_isbusy) {
26479 		ret_val = SD_FAILURE;
26480 		mutex_exit(SD_MUTEX(un));
26481 		return (ret_val);
26482 	}
26483 
26484 	/*
26485 	 * If the request sense buffer (and packet) is not in use,
26486 	 * let's set the un_sense_isbusy and send our packet
26487 	 */
26488 	un->un_sense_isbusy 	= 1;
26489 	rqs_pktp->pkt_resid  	= 0;
26490 	rqs_pktp->pkt_reason 	= 0;
26491 	rqs_pktp->pkt_flags |= FLAG_NOINTR;
26492 	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
26493 
26494 	mutex_exit(SD_MUTEX(un));
26495 
26496 	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
26497 	    " 0x%p\n", rqs_bp->b_un.b_addr);
26498 
26499 	/*
26500 	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
26501 	 * axle - it has a call into us!
26502 	 */
26503 	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
26504 		SD_INFO(SD_LOG_COMMON, un,
26505 		    "sd_send_polled_RQS: RQS failed\n");
26506 	}
26507 
26508 	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
26509 	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
26510 
26511 	mutex_enter(SD_MUTEX(un));
26512 	un->un_sense_isbusy = 0;
26513 	mutex_exit(SD_MUTEX(un));
26514 
26515 	return (ret_val);
26516 }
26517 
26518 /*
26519  * Defines needed for localized version of the scsi_poll routine.
26520  */
26521 #define	SD_CSEC		10000			/* usecs */
26522 #define	SD_SEC_TO_CSEC	(1000000/SD_CSEC)
26523 
26524 
26525 /*
26526  *    Function: sd_ddi_scsi_poll()
26527  *
26528  * Description: Localized version of the scsi_poll routine.  The purpose is to
26529  *		send a scsi_pkt to a device as a polled command.  This version
26530  *		is to ensure more robust handling of transport errors.
26531  *		Specifically this routine cures not ready, coming ready
26532  *		transition for power up and reset of sonoma's.  This can take
26533  *		up to 45 seconds for power-on and 20 seconds for reset of a
26534  * 		sonoma lun.
26535  *
26536  *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
26537  *
26538  * Return Code: 0 - Command completed successfully with good status
26539  *             -1 - Command failed.
26540  *
26541  */
26542 
26543 static int
26544 sd_ddi_scsi_poll(struct scsi_pkt *pkt)
26545 {
26546 	int busy_count;
26547 	int timeout;
26548 	int rval = SD_FAILURE;
26549 	int savef;
26550 	struct scsi_extended_sense *sensep;
26551 	long savet;
26552 	void (*savec)();
26553 	/*
26554 	 * The following is defined in machdep.c and is used in determining if
26555 	 * the scsi transport system will do polled I/O instead of interrupt
26556 	 * I/O when called from xx_dump().
26557 	 */
26558 	extern int do_polled_io;
26559 
26560 	/*
26561 	 * save old flags in pkt, to restore at end
26562 	 */
26563 	savef = pkt->pkt_flags;
26564 	savec = pkt->pkt_comp;
26565 	savet = pkt->pkt_time;
26566 
26567 	pkt->pkt_flags |= FLAG_NOINTR;
26568 
26569 	/*
26570 	 * XXX there is nothing in the SCSA spec that states that we should not
26571 	 * do a callback for polled cmds; however, removing this will break sd
26572 	 * and probably other target drivers
26573 	 */
26574 	pkt->pkt_comp = NULL;
26575 
26576 	/*
26577 	 * we don't like a polled command without timeout.
26578 	 * 60 seconds seems long enough.
26579 	 */
26580 	if (pkt->pkt_time == 0) {
26581 		pkt->pkt_time = SCSI_POLL_TIMEOUT;
26582 	}
26583 
26584 	/*
26585 	 * Send polled cmd.
26586 	 *
26587 	 * We do some error recovery for various errors.  Tran_busy,
26588 	 * queue full, and non-dispatched commands are retried every 10 msec.
26589 	 * as they are typically transient failures.  Busy status and Not
26590 	 * Ready are retried every second as this status takes a while to
26591 	 * change.  Unit attention is retried for pkt_time (60) times
26592 	 * with no delay.
26593 	 */
26594 	timeout = pkt->pkt_time * SD_SEC_TO_CSEC;
26595 
26596 	for (busy_count = 0; busy_count < timeout; busy_count++) {
26597 		int rc;
26598 		int poll_delay;
26599 
26600 		/*
26601 		 * Initialize pkt status variables.
26602 		 */
26603 		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
26604 
26605 		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
26606 			if (rc != TRAN_BUSY) {
26607 				/* Transport failed - give up. */
26608 				break;
26609 			} else {
26610 				/* Transport busy - try again. */
26611 				poll_delay = 1 * SD_CSEC; /* 10 msec */
26612 			}
26613 		} else {
26614 			/*
26615 			 * Transport accepted - check pkt status.
26616 			 */
26617 			rc = (*pkt->pkt_scbp) & STATUS_MASK;
26618 			if (pkt->pkt_reason == CMD_CMPLT &&
26619 			    rc == STATUS_CHECK &&
26620 			    pkt->pkt_state & STATE_ARQ_DONE) {
26621 				struct scsi_arq_status *arqstat =
26622 				    (struct scsi_arq_status *)(pkt->pkt_scbp);
26623 
26624 				sensep = &arqstat->sts_sensedata;
26625 			} else {
26626 				sensep = NULL;
26627 			}
26628 
26629 			if ((pkt->pkt_reason == CMD_CMPLT) &&
26630 			    (rc == STATUS_GOOD)) {
26631 				/* No error - we're done */
26632 				rval = SD_SUCCESS;
26633 				break;
26634 
26635 			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
26636 				/* Lost connection - give up */
26637 				break;
26638 
26639 			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
26640 			    (pkt->pkt_state == 0)) {
26641 				/* Pkt not dispatched - try again. */
26642 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
26643 
26644 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26645 			    (rc == STATUS_QFULL)) {
26646 				/* Queue full - try again. */
26647 				poll_delay = 1 * SD_CSEC; /* 10 msec. */
26648 
26649 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26650 			    (rc == STATUS_BUSY)) {
26651 				/* Busy - try again. */
26652 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
26653 				busy_count += (SD_SEC_TO_CSEC - 1);
26654 
26655 			} else if ((sensep != NULL) &&
26656 			    (sensep->es_key == KEY_UNIT_ATTENTION)) {
26657 				/* Unit Attention - try again */
26658 				busy_count += (SD_SEC_TO_CSEC - 1); /* 1 */
26659 				continue;
26660 
26661 			} else if ((sensep != NULL) &&
26662 			    (sensep->es_key == KEY_NOT_READY) &&
26663 			    (sensep->es_add_code == 0x04) &&
26664 			    (sensep->es_qual_code == 0x01)) {
26665 				/* Not ready -> ready - try again. */
26666 				poll_delay = 100 * SD_CSEC; /* 1 sec. */
26667 				busy_count += (SD_SEC_TO_CSEC - 1);
26668 
26669 			} else {
26670 				/* BAD status - give up. */
26671 				break;
26672 			}
26673 		}
26674 
26675 		if ((curthread->t_flag & T_INTR_THREAD) == 0 &&
26676 		    !do_polled_io) {
26677 			delay(drv_usectohz(poll_delay));
26678 		} else {
26679 			/* we busy wait during cpr_dump or interrupt threads */
26680 			drv_usecwait(poll_delay);
26681 		}
26682 	}
26683 
26684 	pkt->pkt_flags = savef;
26685 	pkt->pkt_comp = savec;
26686 	pkt->pkt_time = savet;
26687 	return (rval);
26688 }
26689 
26690 
26691 /*
26692  *    Function: sd_persistent_reservation_in_read_keys
26693  *
26694  * Description: This routine is the driver entry point for handling CD-ROM
26695  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
26696  *		by sending the SCSI-3 PRIN commands to the device.
26697  *		Processes the read keys command response by copying the
26698  *		reservation key information into the user provided buffer.
26699  *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
26700  *
26701  *   Arguments: un   -  Pointer to soft state struct for the target.
26702  *		usrp -	user provided pointer to multihost Persistent In Read
26703  *			Keys structure (mhioc_inkeys_t)
26704  *		flag -	this argument is a pass through to ddi_copyxxx()
26705  *			directly from the mode argument of ioctl().
26706  *
26707  * Return Code: 0   - Success
26708  *		EACCES
26709  *		ENOTSUP
26710  *		errno return code from sd_send_scsi_cmd()
26711  *
26712  *     Context: Can sleep. Does not return until command is completed.
26713  */
26714 
26715 static int
26716 sd_persistent_reservation_in_read_keys(struct sd_lun *un,
26717     mhioc_inkeys_t *usrp, int flag)
26718 {
26719 #ifdef _MULTI_DATAMODEL
26720 	struct mhioc_key_list32	li32;
26721 #endif
26722 	sd_prin_readkeys_t	*in;
26723 	mhioc_inkeys_t		*ptr;
26724 	mhioc_key_list_t	li;
26725 	uchar_t			*data_bufp;
26726 	int 			data_len;
26727 	int			rval;
26728 	size_t			copysz;
26729 
26730 	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
26731 		return (EINVAL);
26732 	}
26733 	bzero(&li, sizeof (mhioc_key_list_t));
26734 
26735 	/*
26736 	 * Get the listsize from user
26737 	 */
26738 #ifdef _MULTI_DATAMODEL
26739 
26740 	switch (ddi_model_convert_from(flag & FMODELS)) {
26741 	case DDI_MODEL_ILP32:
26742 		copysz = sizeof (struct mhioc_key_list32);
26743 		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
26744 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26745 			    "sd_persistent_reservation_in_read_keys: "
26746 			    "failed ddi_copyin: mhioc_key_list32_t\n");
26747 			rval = EFAULT;
26748 			goto done;
26749 		}
26750 		li.listsize = li32.listsize;
26751 		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
26752 		break;
26753 
26754 	case DDI_MODEL_NONE:
26755 		copysz = sizeof (mhioc_key_list_t);
26756 		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26757 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26758 			    "sd_persistent_reservation_in_read_keys: "
26759 			    "failed ddi_copyin: mhioc_key_list_t\n");
26760 			rval = EFAULT;
26761 			goto done;
26762 		}
26763 		break;
26764 	}
26765 
26766 #else /* ! _MULTI_DATAMODEL */
26767 	copysz = sizeof (mhioc_key_list_t);
26768 	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26769 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26770 		    "sd_persistent_reservation_in_read_keys: "
26771 		    "failed ddi_copyin: mhioc_key_list_t\n");
26772 		rval = EFAULT;
26773 		goto done;
26774 	}
26775 #endif
26776 
26777 	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
26778 	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
26779 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
26780 
26781 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_KEYS,
26782 	    data_len, data_bufp)) != 0) {
26783 		goto done;
26784 	}
26785 	in = (sd_prin_readkeys_t *)data_bufp;
26786 	ptr->generation = BE_32(in->generation);
26787 	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
26788 
26789 	/*
26790 	 * Return the min(listsize, listlen) keys
26791 	 */
26792 #ifdef _MULTI_DATAMODEL
26793 
26794 	switch (ddi_model_convert_from(flag & FMODELS)) {
26795 	case DDI_MODEL_ILP32:
26796 		li32.listlen = li.listlen;
26797 		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
26798 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26799 			    "sd_persistent_reservation_in_read_keys: "
26800 			    "failed ddi_copyout: mhioc_key_list32_t\n");
26801 			rval = EFAULT;
26802 			goto done;
26803 		}
26804 		break;
26805 
26806 	case DDI_MODEL_NONE:
26807 		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
26808 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26809 			    "sd_persistent_reservation_in_read_keys: "
26810 			    "failed ddi_copyout: mhioc_key_list_t\n");
26811 			rval = EFAULT;
26812 			goto done;
26813 		}
26814 		break;
26815 	}
26816 
26817 #else /* ! _MULTI_DATAMODEL */
26818 
26819 	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
26820 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26821 		    "sd_persistent_reservation_in_read_keys: "
26822 		    "failed ddi_copyout: mhioc_key_list_t\n");
26823 		rval = EFAULT;
26824 		goto done;
26825 	}
26826 
26827 #endif /* _MULTI_DATAMODEL */
26828 
26829 	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
26830 	    li.listsize * MHIOC_RESV_KEY_SIZE);
26831 	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
26832 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26833 		    "sd_persistent_reservation_in_read_keys: "
26834 		    "failed ddi_copyout: keylist\n");
26835 		rval = EFAULT;
26836 	}
26837 done:
26838 	kmem_free(data_bufp, data_len);
26839 	return (rval);
26840 }
26841 
26842 
26843 /*
26844  *    Function: sd_persistent_reservation_in_read_resv
26845  *
26846  * Description: This routine is the driver entry point for handling CD-ROM
26847  *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
26848  *		by sending the SCSI-3 PRIN commands to the device.
26849  *		Process the read persistent reservations command response by
26850  *		copying the reservation information into the user provided
26851  *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
26852  *
26853  *   Arguments: un   -  Pointer to soft state struct for the target.
26854  *		usrp -	user provided pointer to multihost Persistent In Read
26855  *			Keys structure (mhioc_inkeys_t)
26856  *		flag -	this argument is a pass through to ddi_copyxxx()
26857  *			directly from the mode argument of ioctl().
26858  *
26859  * Return Code: 0   - Success
26860  *		EACCES
26861  *		ENOTSUP
26862  *		errno return code from sd_send_scsi_cmd()
26863  *
26864  *     Context: Can sleep. Does not return until command is completed.
26865  */
26866 
26867 static int
26868 sd_persistent_reservation_in_read_resv(struct sd_lun *un,
26869     mhioc_inresvs_t *usrp, int flag)
26870 {
26871 #ifdef _MULTI_DATAMODEL
26872 	struct mhioc_resv_desc_list32 resvlist32;
26873 #endif
26874 	sd_prin_readresv_t	*in;
26875 	mhioc_inresvs_t		*ptr;
26876 	sd_readresv_desc_t	*readresv_ptr;
26877 	mhioc_resv_desc_list_t	resvlist;
26878 	mhioc_resv_desc_t 	resvdesc;
26879 	uchar_t			*data_bufp;
26880 	int 			data_len;
26881 	int			rval;
26882 	int			i;
26883 	size_t			copysz;
26884 	mhioc_resv_desc_t	*bufp;
26885 
26886 	if ((ptr = usrp) == NULL) {
26887 		return (EINVAL);
26888 	}
26889 
26890 	/*
26891 	 * Get the listsize from user
26892 	 */
26893 #ifdef _MULTI_DATAMODEL
26894 	switch (ddi_model_convert_from(flag & FMODELS)) {
26895 	case DDI_MODEL_ILP32:
26896 		copysz = sizeof (struct mhioc_resv_desc_list32);
26897 		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
26898 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26899 			    "sd_persistent_reservation_in_read_resv: "
26900 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26901 			rval = EFAULT;
26902 			goto done;
26903 		}
26904 		resvlist.listsize = resvlist32.listsize;
26905 		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
26906 		break;
26907 
26908 	case DDI_MODEL_NONE:
26909 		copysz = sizeof (mhioc_resv_desc_list_t);
26910 		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
26911 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26912 			    "sd_persistent_reservation_in_read_resv: "
26913 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26914 			rval = EFAULT;
26915 			goto done;
26916 		}
26917 		break;
26918 	}
26919 #else /* ! _MULTI_DATAMODEL */
26920 	copysz = sizeof (mhioc_resv_desc_list_t);
26921 	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
26922 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26923 		    "sd_persistent_reservation_in_read_resv: "
26924 		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26925 		rval = EFAULT;
26926 		goto done;
26927 	}
26928 #endif /* ! _MULTI_DATAMODEL */
26929 
26930 	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
26931 	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
26932 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
26933 
26934 	if ((rval = sd_send_scsi_PERSISTENT_RESERVE_IN(un, SD_READ_RESV,
26935 	    data_len, data_bufp)) != 0) {
26936 		goto done;
26937 	}
26938 	in = (sd_prin_readresv_t *)data_bufp;
26939 	ptr->generation = BE_32(in->generation);
26940 	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
26941 
26942 	/*
26943 	 * Return the min(listsize, listlen( keys
26944 	 */
26945 #ifdef _MULTI_DATAMODEL
26946 
26947 	switch (ddi_model_convert_from(flag & FMODELS)) {
26948 	case DDI_MODEL_ILP32:
26949 		resvlist32.listlen = resvlist.listlen;
26950 		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
26951 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26952 			    "sd_persistent_reservation_in_read_resv: "
26953 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26954 			rval = EFAULT;
26955 			goto done;
26956 		}
26957 		break;
26958 
26959 	case DDI_MODEL_NONE:
26960 		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
26961 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26962 			    "sd_persistent_reservation_in_read_resv: "
26963 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26964 			rval = EFAULT;
26965 			goto done;
26966 		}
26967 		break;
26968 	}
26969 
26970 #else /* ! _MULTI_DATAMODEL */
26971 
26972 	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
26973 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26974 		    "sd_persistent_reservation_in_read_resv: "
26975 		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26976 		rval = EFAULT;
26977 		goto done;
26978 	}
26979 
26980 #endif /* ! _MULTI_DATAMODEL */
26981 
26982 	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
26983 	bufp = resvlist.list;
26984 	copysz = sizeof (mhioc_resv_desc_t);
26985 	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
26986 	    i++, readresv_ptr++, bufp++) {
26987 
26988 		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
26989 		    MHIOC_RESV_KEY_SIZE);
26990 		resvdesc.type  = readresv_ptr->type;
26991 		resvdesc.scope = readresv_ptr->scope;
26992 		resvdesc.scope_specific_addr =
26993 		    BE_32(readresv_ptr->scope_specific_addr);
26994 
26995 		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
26996 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26997 			    "sd_persistent_reservation_in_read_resv: "
26998 			    "failed ddi_copyout: resvlist\n");
26999 			rval = EFAULT;
27000 			goto done;
27001 		}
27002 	}
27003 done:
27004 	kmem_free(data_bufp, data_len);
27005 	return (rval);
27006 }
27007 
27008 
27009 /*
27010  *    Function: sr_change_blkmode()
27011  *
27012  * Description: This routine is the driver entry point for handling CD-ROM
27013  *		block mode ioctl requests. Support for returning and changing
27014  *		the current block size in use by the device is implemented. The
27015  *		LBA size is changed via a MODE SELECT Block Descriptor.
27016  *
27017  *		This routine issues a mode sense with an allocation length of
27018  *		12 bytes for the mode page header and a single block descriptor.
27019  *
27020  *   Arguments: dev - the device 'dev_t'
27021  *		cmd - the request type; one of CDROMGBLKMODE (get) or
27022  *		      CDROMSBLKMODE (set)
27023  *		data - current block size or requested block size
27024  *		flag - this argument is a pass through to ddi_copyxxx() directly
27025  *		       from the mode argument of ioctl().
27026  *
27027  * Return Code: the code returned by sd_send_scsi_cmd()
27028  *		EINVAL if invalid arguments are provided
27029  *		EFAULT if ddi_copyxxx() fails
27030  *		ENXIO if fail ddi_get_soft_state
27031  *		EIO if invalid mode sense block descriptor length
27032  *
27033  */
27034 
27035 static int
27036 sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
27037 {
27038 	struct sd_lun			*un = NULL;
27039 	struct mode_header		*sense_mhp, *select_mhp;
27040 	struct block_descriptor		*sense_desc, *select_desc;
27041 	int				current_bsize;
27042 	int				rval = EINVAL;
27043 	uchar_t				*sense = NULL;
27044 	uchar_t				*select = NULL;
27045 
27046 	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
27047 
27048 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27049 		return (ENXIO);
27050 	}
27051 
27052 	/*
27053 	 * The block length is changed via the Mode Select block descriptor, the
27054 	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
27055 	 * required as part of this routine. Therefore the mode sense allocation
27056 	 * length is specified to be the length of a mode page header and a
27057 	 * block descriptor.
27058 	 */
27059 	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
27060 
27061 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
27062 	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD)) != 0) {
27063 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27064 		    "sr_change_blkmode: Mode Sense Failed\n");
27065 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27066 		return (rval);
27067 	}
27068 
27069 	/* Check the block descriptor len to handle only 1 block descriptor */
27070 	sense_mhp = (struct mode_header *)sense;
27071 	if ((sense_mhp->bdesc_length == 0) ||
27072 	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
27073 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27074 		    "sr_change_blkmode: Mode Sense returned invalid block"
27075 		    " descriptor length\n");
27076 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27077 		return (EIO);
27078 	}
27079 	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
27080 	current_bsize = ((sense_desc->blksize_hi << 16) |
27081 	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
27082 
27083 	/* Process command */
27084 	switch (cmd) {
27085 	case CDROMGBLKMODE:
27086 		/* Return the block size obtained during the mode sense */
27087 		if (ddi_copyout(&current_bsize, (void *)data,
27088 		    sizeof (int), flag) != 0)
27089 			rval = EFAULT;
27090 		break;
27091 	case CDROMSBLKMODE:
27092 		/* Validate the requested block size */
27093 		switch (data) {
27094 		case CDROM_BLK_512:
27095 		case CDROM_BLK_1024:
27096 		case CDROM_BLK_2048:
27097 		case CDROM_BLK_2056:
27098 		case CDROM_BLK_2336:
27099 		case CDROM_BLK_2340:
27100 		case CDROM_BLK_2352:
27101 		case CDROM_BLK_2368:
27102 		case CDROM_BLK_2448:
27103 		case CDROM_BLK_2646:
27104 		case CDROM_BLK_2647:
27105 			break;
27106 		default:
27107 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27108 			    "sr_change_blkmode: "
27109 			    "Block Size '%ld' Not Supported\n", data);
27110 			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27111 			return (EINVAL);
27112 		}
27113 
27114 		/*
27115 		 * The current block size matches the requested block size so
27116 		 * there is no need to send the mode select to change the size
27117 		 */
27118 		if (current_bsize == data) {
27119 			break;
27120 		}
27121 
27122 		/* Build the select data for the requested block size */
27123 		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
27124 		select_mhp = (struct mode_header *)select;
27125 		select_desc =
27126 		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
27127 		/*
27128 		 * The LBA size is changed via the block descriptor, so the
27129 		 * descriptor is built according to the user data
27130 		 */
27131 		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
27132 		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
27133 		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
27134 		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
27135 
27136 		/* Send the mode select for the requested block size */
27137 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
27138 		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
27139 		    SD_PATH_STANDARD)) != 0) {
27140 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27141 			    "sr_change_blkmode: Mode Select Failed\n");
27142 			/*
27143 			 * The mode select failed for the requested block size,
27144 			 * so reset the data for the original block size and
27145 			 * send it to the target. The error is indicated by the
27146 			 * return value for the failed mode select.
27147 			 */
27148 			select_desc->blksize_hi  = sense_desc->blksize_hi;
27149 			select_desc->blksize_mid = sense_desc->blksize_mid;
27150 			select_desc->blksize_lo  = sense_desc->blksize_lo;
27151 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0,
27152 			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
27153 			    SD_PATH_STANDARD);
27154 		} else {
27155 			ASSERT(!mutex_owned(SD_MUTEX(un)));
27156 			mutex_enter(SD_MUTEX(un));
27157 			sd_update_block_info(un, (uint32_t)data, 0);
27158 
27159 			mutex_exit(SD_MUTEX(un));
27160 		}
27161 		break;
27162 	default:
27163 		/* should not reach here, but check anyway */
27164 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27165 		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
27166 		rval = EINVAL;
27167 		break;
27168 	}
27169 
27170 	if (select) {
27171 		kmem_free(select, BUFLEN_CHG_BLK_MODE);
27172 	}
27173 	if (sense) {
27174 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27175 	}
27176 	return (rval);
27177 }
27178 
27179 
27180 /*
27181  * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
27182  * implement driver support for getting and setting the CD speed. The command
27183  * set used will be based on the device type. If the device has not been
27184  * identified as MMC the Toshiba vendor specific mode page will be used. If
27185  * the device is MMC but does not support the Real Time Streaming feature
27186  * the SET CD SPEED command will be used to set speed and mode page 0x2A will
27187  * be used to read the speed.
27188  */
27189 
27190 /*
27191  *    Function: sr_change_speed()
27192  *
27193  * Description: This routine is the driver entry point for handling CD-ROM
27194  *		drive speed ioctl requests for devices supporting the Toshiba
27195  *		vendor specific drive speed mode page. Support for returning
27196  *		and changing the current drive speed in use by the device is
27197  *		implemented.
27198  *
27199  *   Arguments: dev - the device 'dev_t'
27200  *		cmd - the request type; one of CDROMGDRVSPEED (get) or
27201  *		      CDROMSDRVSPEED (set)
27202  *		data - current drive speed or requested drive speed
27203  *		flag - this argument is a pass through to ddi_copyxxx() directly
27204  *		       from the mode argument of ioctl().
27205  *
27206  * Return Code: the code returned by sd_send_scsi_cmd()
27207  *		EINVAL if invalid arguments are provided
27208  *		EFAULT if ddi_copyxxx() fails
27209  *		ENXIO if fail ddi_get_soft_state
27210  *		EIO if invalid mode sense block descriptor length
27211  */
27212 
27213 static int
27214 sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27215 {
27216 	struct sd_lun			*un = NULL;
27217 	struct mode_header		*sense_mhp, *select_mhp;
27218 	struct mode_speed		*sense_page, *select_page;
27219 	int				current_speed;
27220 	int				rval = EINVAL;
27221 	int				bd_len;
27222 	uchar_t				*sense = NULL;
27223 	uchar_t				*select = NULL;
27224 
27225 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27226 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27227 		return (ENXIO);
27228 	}
27229 
27230 	/*
27231 	 * Note: The drive speed is being modified here according to a Toshiba
27232 	 * vendor specific mode page (0x31).
27233 	 */
27234 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27235 
27236 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
27237 	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
27238 	    SD_PATH_STANDARD)) != 0) {
27239 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27240 		    "sr_change_speed: Mode Sense Failed\n");
27241 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27242 		return (rval);
27243 	}
27244 	sense_mhp  = (struct mode_header *)sense;
27245 
27246 	/* Check the block descriptor len to handle only 1 block descriptor */
27247 	bd_len = sense_mhp->bdesc_length;
27248 	if (bd_len > MODE_BLK_DESC_LENGTH) {
27249 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27250 		    "sr_change_speed: Mode Sense returned invalid block "
27251 		    "descriptor length\n");
27252 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27253 		return (EIO);
27254 	}
27255 
27256 	sense_page = (struct mode_speed *)
27257 	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
27258 	current_speed = sense_page->speed;
27259 
27260 	/* Process command */
27261 	switch (cmd) {
27262 	case CDROMGDRVSPEED:
27263 		/* Return the drive speed obtained during the mode sense */
27264 		if (current_speed == 0x2) {
27265 			current_speed = CDROM_TWELVE_SPEED;
27266 		}
27267 		if (ddi_copyout(&current_speed, (void *)data,
27268 		    sizeof (int), flag) != 0) {
27269 			rval = EFAULT;
27270 		}
27271 		break;
27272 	case CDROMSDRVSPEED:
27273 		/* Validate the requested drive speed */
27274 		switch ((uchar_t)data) {
27275 		case CDROM_TWELVE_SPEED:
27276 			data = 0x2;
27277 			/*FALLTHROUGH*/
27278 		case CDROM_NORMAL_SPEED:
27279 		case CDROM_DOUBLE_SPEED:
27280 		case CDROM_QUAD_SPEED:
27281 		case CDROM_MAXIMUM_SPEED:
27282 			break;
27283 		default:
27284 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27285 			    "sr_change_speed: "
27286 			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
27287 			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27288 			return (EINVAL);
27289 		}
27290 
27291 		/*
27292 		 * The current drive speed matches the requested drive speed so
27293 		 * there is no need to send the mode select to change the speed
27294 		 */
27295 		if (current_speed == data) {
27296 			break;
27297 		}
27298 
27299 		/* Build the select data for the requested drive speed */
27300 		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27301 		select_mhp = (struct mode_header *)select;
27302 		select_mhp->bdesc_length = 0;
27303 		select_page =
27304 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27305 		select_page =
27306 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27307 		select_page->mode_page.code = CDROM_MODE_SPEED;
27308 		select_page->mode_page.length = 2;
27309 		select_page->speed = (uchar_t)data;
27310 
27311 		/* Send the mode select for the requested block size */
27312 		if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
27313 		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27314 		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
27315 			/*
27316 			 * The mode select failed for the requested drive speed,
27317 			 * so reset the data for the original drive speed and
27318 			 * send it to the target. The error is indicated by the
27319 			 * return value for the failed mode select.
27320 			 */
27321 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27322 			    "sr_drive_speed: Mode Select Failed\n");
27323 			select_page->speed = sense_page->speed;
27324 			(void) sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
27325 			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27326 			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27327 		}
27328 		break;
27329 	default:
27330 		/* should not reach here, but check anyway */
27331 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27332 		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
27333 		rval = EINVAL;
27334 		break;
27335 	}
27336 
27337 	if (select) {
27338 		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
27339 	}
27340 	if (sense) {
27341 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27342 	}
27343 
27344 	return (rval);
27345 }
27346 
27347 
27348 /*
27349  *    Function: sr_atapi_change_speed()
27350  *
27351  * Description: This routine is the driver entry point for handling CD-ROM
27352  *		drive speed ioctl requests for MMC devices that do not support
27353  *		the Real Time Streaming feature (0x107).
27354  *
27355  *		Note: This routine will use the SET SPEED command which may not
27356  *		be supported by all devices.
27357  *
27358  *   Arguments: dev- the device 'dev_t'
27359  *		cmd- the request type; one of CDROMGDRVSPEED (get) or
27360  *		     CDROMSDRVSPEED (set)
27361  *		data- current drive speed or requested drive speed
27362  *		flag- this argument is a pass through to ddi_copyxxx() directly
27363  *		      from the mode argument of ioctl().
27364  *
27365  * Return Code: the code returned by sd_send_scsi_cmd()
27366  *		EINVAL if invalid arguments are provided
27367  *		EFAULT if ddi_copyxxx() fails
27368  *		ENXIO if fail ddi_get_soft_state
27369  *		EIO if invalid mode sense block descriptor length
27370  */
27371 
27372 static int
27373 sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27374 {
27375 	struct sd_lun			*un;
27376 	struct uscsi_cmd		*com = NULL;
27377 	struct mode_header_grp2		*sense_mhp;
27378 	uchar_t				*sense_page;
27379 	uchar_t				*sense = NULL;
27380 	char				cdb[CDB_GROUP5];
27381 	int				bd_len;
27382 	int				current_speed = 0;
27383 	int				max_speed = 0;
27384 	int				rval;
27385 
27386 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27387 
27388 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27389 		return (ENXIO);
27390 	}
27391 
27392 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
27393 
27394 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
27395 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
27396 	    SD_PATH_STANDARD)) != 0) {
27397 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27398 		    "sr_atapi_change_speed: Mode Sense Failed\n");
27399 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27400 		return (rval);
27401 	}
27402 
27403 	/* Check the block descriptor len to handle only 1 block descriptor */
27404 	sense_mhp = (struct mode_header_grp2 *)sense;
27405 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
27406 	if (bd_len > MODE_BLK_DESC_LENGTH) {
27407 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27408 		    "sr_atapi_change_speed: Mode Sense returned invalid "
27409 		    "block descriptor length\n");
27410 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27411 		return (EIO);
27412 	}
27413 
27414 	/* Calculate the current and maximum drive speeds */
27415 	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
27416 	current_speed = (sense_page[14] << 8) | sense_page[15];
27417 	max_speed = (sense_page[8] << 8) | sense_page[9];
27418 
27419 	/* Process the command */
27420 	switch (cmd) {
27421 	case CDROMGDRVSPEED:
27422 		current_speed /= SD_SPEED_1X;
27423 		if (ddi_copyout(&current_speed, (void *)data,
27424 		    sizeof (int), flag) != 0)
27425 			rval = EFAULT;
27426 		break;
27427 	case CDROMSDRVSPEED:
27428 		/* Convert the speed code to KB/sec */
27429 		switch ((uchar_t)data) {
27430 		case CDROM_NORMAL_SPEED:
27431 			current_speed = SD_SPEED_1X;
27432 			break;
27433 		case CDROM_DOUBLE_SPEED:
27434 			current_speed = 2 * SD_SPEED_1X;
27435 			break;
27436 		case CDROM_QUAD_SPEED:
27437 			current_speed = 4 * SD_SPEED_1X;
27438 			break;
27439 		case CDROM_TWELVE_SPEED:
27440 			current_speed = 12 * SD_SPEED_1X;
27441 			break;
27442 		case CDROM_MAXIMUM_SPEED:
27443 			current_speed = 0xffff;
27444 			break;
27445 		default:
27446 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27447 			    "sr_atapi_change_speed: invalid drive speed %d\n",
27448 			    (uchar_t)data);
27449 			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27450 			return (EINVAL);
27451 		}
27452 
27453 		/* Check the request against the drive's max speed. */
27454 		if (current_speed != 0xffff) {
27455 			if (current_speed > max_speed) {
27456 				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27457 				return (EINVAL);
27458 			}
27459 		}
27460 
27461 		/*
27462 		 * Build and send the SET SPEED command
27463 		 *
27464 		 * Note: The SET SPEED (0xBB) command used in this routine is
27465 		 * obsolete per the SCSI MMC spec but still supported in the
27466 		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
27467 		 * therefore the command is still implemented in this routine.
27468 		 */
27469 		bzero(cdb, sizeof (cdb));
27470 		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
27471 		cdb[2] = (uchar_t)(current_speed >> 8);
27472 		cdb[3] = (uchar_t)current_speed;
27473 		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27474 		com->uscsi_cdb	   = (caddr_t)cdb;
27475 		com->uscsi_cdblen  = CDB_GROUP5;
27476 		com->uscsi_bufaddr = NULL;
27477 		com->uscsi_buflen  = 0;
27478 		com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT;
27479 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, 0,
27480 		    UIO_SYSSPACE, SD_PATH_STANDARD);
27481 		break;
27482 	default:
27483 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27484 		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
27485 		rval = EINVAL;
27486 	}
27487 
27488 	if (sense) {
27489 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27490 	}
27491 	if (com) {
27492 		kmem_free(com, sizeof (*com));
27493 	}
27494 	return (rval);
27495 }
27496 
27497 
27498 /*
27499  *    Function: sr_pause_resume()
27500  *
27501  * Description: This routine is the driver entry point for handling CD-ROM
27502  *		pause/resume ioctl requests. This only affects the audio play
27503  *		operation.
27504  *
27505  *   Arguments: dev - the device 'dev_t'
27506  *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
27507  *		      for setting the resume bit of the cdb.
27508  *
27509  * Return Code: the code returned by sd_send_scsi_cmd()
27510  *		EINVAL if invalid mode specified
27511  *
27512  */
27513 
27514 static int
27515 sr_pause_resume(dev_t dev, int cmd)
27516 {
27517 	struct sd_lun		*un;
27518 	struct uscsi_cmd	*com;
27519 	char			cdb[CDB_GROUP1];
27520 	int			rval;
27521 
27522 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27523 		return (ENXIO);
27524 	}
27525 
27526 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27527 	bzero(cdb, CDB_GROUP1);
27528 	cdb[0] = SCMD_PAUSE_RESUME;
27529 	switch (cmd) {
27530 	case CDROMRESUME:
27531 		cdb[8] = 1;
27532 		break;
27533 	case CDROMPAUSE:
27534 		cdb[8] = 0;
27535 		break;
27536 	default:
27537 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
27538 		    " Command '%x' Not Supported\n", cmd);
27539 		rval = EINVAL;
27540 		goto done;
27541 	}
27542 
27543 	com->uscsi_cdb    = cdb;
27544 	com->uscsi_cdblen = CDB_GROUP1;
27545 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27546 
27547 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27548 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27549 
27550 done:
27551 	kmem_free(com, sizeof (*com));
27552 	return (rval);
27553 }
27554 
27555 
27556 /*
27557  *    Function: sr_play_msf()
27558  *
27559  * Description: This routine is the driver entry point for handling CD-ROM
27560  *		ioctl requests to output the audio signals at the specified
27561  *		starting address and continue the audio play until the specified
27562  *		ending address (CDROMPLAYMSF) The address is in Minute Second
27563  *		Frame (MSF) format.
27564  *
27565  *   Arguments: dev	- the device 'dev_t'
27566  *		data	- pointer to user provided audio msf structure,
27567  *		          specifying start/end addresses.
27568  *		flag	- this argument is a pass through to ddi_copyxxx()
27569  *		          directly from the mode argument of ioctl().
27570  *
27571  * Return Code: the code returned by sd_send_scsi_cmd()
27572  *		EFAULT if ddi_copyxxx() fails
27573  *		ENXIO if fail ddi_get_soft_state
27574  *		EINVAL if data pointer is NULL
27575  */
27576 
27577 static int
27578 sr_play_msf(dev_t dev, caddr_t data, int flag)
27579 {
27580 	struct sd_lun		*un;
27581 	struct uscsi_cmd	*com;
27582 	struct cdrom_msf	msf_struct;
27583 	struct cdrom_msf	*msf = &msf_struct;
27584 	char			cdb[CDB_GROUP1];
27585 	int			rval;
27586 
27587 	if (data == NULL) {
27588 		return (EINVAL);
27589 	}
27590 
27591 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27592 		return (ENXIO);
27593 	}
27594 
27595 	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
27596 		return (EFAULT);
27597 	}
27598 
27599 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27600 	bzero(cdb, CDB_GROUP1);
27601 	cdb[0] = SCMD_PLAYAUDIO_MSF;
27602 	if (un->un_f_cfg_playmsf_bcd == TRUE) {
27603 		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
27604 		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
27605 		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
27606 		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
27607 		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
27608 		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
27609 	} else {
27610 		cdb[3] = msf->cdmsf_min0;
27611 		cdb[4] = msf->cdmsf_sec0;
27612 		cdb[5] = msf->cdmsf_frame0;
27613 		cdb[6] = msf->cdmsf_min1;
27614 		cdb[7] = msf->cdmsf_sec1;
27615 		cdb[8] = msf->cdmsf_frame1;
27616 	}
27617 	com->uscsi_cdb    = cdb;
27618 	com->uscsi_cdblen = CDB_GROUP1;
27619 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27620 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27621 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27622 	kmem_free(com, sizeof (*com));
27623 	return (rval);
27624 }
27625 
27626 
27627 /*
27628  *    Function: sr_play_trkind()
27629  *
27630  * Description: This routine is the driver entry point for handling CD-ROM
27631  *		ioctl requests to output the audio signals at the specified
27632  *		starting address and continue the audio play until the specified
27633  *		ending address (CDROMPLAYTRKIND). The address is in Track Index
27634  *		format.
27635  *
27636  *   Arguments: dev	- the device 'dev_t'
27637  *		data	- pointer to user provided audio track/index structure,
27638  *		          specifying start/end addresses.
27639  *		flag	- this argument is a pass through to ddi_copyxxx()
27640  *		          directly from the mode argument of ioctl().
27641  *
27642  * Return Code: the code returned by sd_send_scsi_cmd()
27643  *		EFAULT if ddi_copyxxx() fails
27644  *		ENXIO if fail ddi_get_soft_state
27645  *		EINVAL if data pointer is NULL
27646  */
27647 
27648 static int
27649 sr_play_trkind(dev_t dev, caddr_t data, int flag)
27650 {
27651 	struct cdrom_ti		ti_struct;
27652 	struct cdrom_ti		*ti = &ti_struct;
27653 	struct uscsi_cmd	*com = NULL;
27654 	char			cdb[CDB_GROUP1];
27655 	int			rval;
27656 
27657 	if (data == NULL) {
27658 		return (EINVAL);
27659 	}
27660 
27661 	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
27662 		return (EFAULT);
27663 	}
27664 
27665 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27666 	bzero(cdb, CDB_GROUP1);
27667 	cdb[0] = SCMD_PLAYAUDIO_TI;
27668 	cdb[4] = ti->cdti_trk0;
27669 	cdb[5] = ti->cdti_ind0;
27670 	cdb[7] = ti->cdti_trk1;
27671 	cdb[8] = ti->cdti_ind1;
27672 	com->uscsi_cdb    = cdb;
27673 	com->uscsi_cdblen = CDB_GROUP1;
27674 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27675 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27676 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27677 	kmem_free(com, sizeof (*com));
27678 	return (rval);
27679 }
27680 
27681 
27682 /*
27683  *    Function: sr_read_all_subcodes()
27684  *
27685  * Description: This routine is the driver entry point for handling CD-ROM
27686  *		ioctl requests to return raw subcode data while the target is
27687  *		playing audio (CDROMSUBCODE).
27688  *
27689  *   Arguments: dev	- the device 'dev_t'
27690  *		data	- pointer to user provided cdrom subcode structure,
27691  *		          specifying the transfer length and address.
27692  *		flag	- this argument is a pass through to ddi_copyxxx()
27693  *		          directly from the mode argument of ioctl().
27694  *
27695  * Return Code: the code returned by sd_send_scsi_cmd()
27696  *		EFAULT if ddi_copyxxx() fails
27697  *		ENXIO if fail ddi_get_soft_state
27698  *		EINVAL if data pointer is NULL
27699  */
27700 
27701 static int
27702 sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
27703 {
27704 	struct sd_lun		*un = NULL;
27705 	struct uscsi_cmd	*com = NULL;
27706 	struct cdrom_subcode	*subcode = NULL;
27707 	int			rval;
27708 	size_t			buflen;
27709 	char			cdb[CDB_GROUP5];
27710 
27711 #ifdef _MULTI_DATAMODEL
27712 	/* To support ILP32 applications in an LP64 world */
27713 	struct cdrom_subcode32		cdrom_subcode32;
27714 	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
27715 #endif
27716 	if (data == NULL) {
27717 		return (EINVAL);
27718 	}
27719 
27720 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27721 		return (ENXIO);
27722 	}
27723 
27724 	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
27725 
27726 #ifdef _MULTI_DATAMODEL
27727 	switch (ddi_model_convert_from(flag & FMODELS)) {
27728 	case DDI_MODEL_ILP32:
27729 		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
27730 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27731 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27732 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27733 			return (EFAULT);
27734 		}
27735 		/* Convert the ILP32 uscsi data from the application to LP64 */
27736 		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
27737 		break;
27738 	case DDI_MODEL_NONE:
27739 		if (ddi_copyin(data, subcode,
27740 		    sizeof (struct cdrom_subcode), flag)) {
27741 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27742 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27743 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27744 			return (EFAULT);
27745 		}
27746 		break;
27747 	}
27748 #else /* ! _MULTI_DATAMODEL */
27749 	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
27750 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27751 		    "sr_read_all_subcodes: ddi_copyin Failed\n");
27752 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27753 		return (EFAULT);
27754 	}
27755 #endif /* _MULTI_DATAMODEL */
27756 
27757 	/*
27758 	 * Since MMC-2 expects max 3 bytes for length, check if the
27759 	 * length input is greater than 3 bytes
27760 	 */
27761 	if ((subcode->cdsc_length & 0xFF000000) != 0) {
27762 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27763 		    "sr_read_all_subcodes: "
27764 		    "cdrom transfer length too large: %d (limit %d)\n",
27765 		    subcode->cdsc_length, 0xFFFFFF);
27766 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27767 		return (EINVAL);
27768 	}
27769 
27770 	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
27771 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27772 	bzero(cdb, CDB_GROUP5);
27773 
27774 	if (un->un_f_mmc_cap == TRUE) {
27775 		cdb[0] = (char)SCMD_READ_CD;
27776 		cdb[2] = (char)0xff;
27777 		cdb[3] = (char)0xff;
27778 		cdb[4] = (char)0xff;
27779 		cdb[5] = (char)0xff;
27780 		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
27781 		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
27782 		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
27783 		cdb[10] = 1;
27784 	} else {
27785 		/*
27786 		 * Note: A vendor specific command (0xDF) is being used her to
27787 		 * request a read of all subcodes.
27788 		 */
27789 		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
27790 		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
27791 		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
27792 		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
27793 		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
27794 	}
27795 	com->uscsi_cdb	   = cdb;
27796 	com->uscsi_cdblen  = CDB_GROUP5;
27797 	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
27798 	com->uscsi_buflen  = buflen;
27799 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27800 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
27801 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27802 	kmem_free(subcode, sizeof (struct cdrom_subcode));
27803 	kmem_free(com, sizeof (*com));
27804 	return (rval);
27805 }
27806 
27807 
27808 /*
27809  *    Function: sr_read_subchannel()
27810  *
27811  * Description: This routine is the driver entry point for handling CD-ROM
27812  *		ioctl requests to return the Q sub-channel data of the CD
27813  *		current position block. (CDROMSUBCHNL) The data includes the
27814  *		track number, index number, absolute CD-ROM address (LBA or MSF
27815  *		format per the user) , track relative CD-ROM address (LBA or MSF
27816  *		format per the user), control data and audio status.
27817  *
27818  *   Arguments: dev	- the device 'dev_t'
27819  *		data	- pointer to user provided cdrom sub-channel structure
27820  *		flag	- this argument is a pass through to ddi_copyxxx()
27821  *		          directly from the mode argument of ioctl().
27822  *
27823  * Return Code: the code returned by sd_send_scsi_cmd()
27824  *		EFAULT if ddi_copyxxx() fails
27825  *		ENXIO if fail ddi_get_soft_state
27826  *		EINVAL if data pointer is NULL
27827  */
27828 
27829 static int
27830 sr_read_subchannel(dev_t dev, caddr_t data, int flag)
27831 {
27832 	struct sd_lun		*un;
27833 	struct uscsi_cmd	*com;
27834 	struct cdrom_subchnl	subchanel;
27835 	struct cdrom_subchnl	*subchnl = &subchanel;
27836 	char			cdb[CDB_GROUP1];
27837 	caddr_t			buffer;
27838 	int			rval;
27839 
27840 	if (data == NULL) {
27841 		return (EINVAL);
27842 	}
27843 
27844 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27845 	    (un->un_state == SD_STATE_OFFLINE)) {
27846 		return (ENXIO);
27847 	}
27848 
27849 	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
27850 		return (EFAULT);
27851 	}
27852 
27853 	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
27854 	bzero(cdb, CDB_GROUP1);
27855 	cdb[0] = SCMD_READ_SUBCHANNEL;
27856 	/* Set the MSF bit based on the user requested address format */
27857 	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
27858 	/*
27859 	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
27860 	 * returned
27861 	 */
27862 	cdb[2] = 0x40;
27863 	/*
27864 	 * Set byte 3 to specify the return data format. A value of 0x01
27865 	 * indicates that the CD-ROM current position should be returned.
27866 	 */
27867 	cdb[3] = 0x01;
27868 	cdb[8] = 0x10;
27869 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27870 	com->uscsi_cdb	   = cdb;
27871 	com->uscsi_cdblen  = CDB_GROUP1;
27872 	com->uscsi_bufaddr = buffer;
27873 	com->uscsi_buflen  = 16;
27874 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27875 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
27876 	    UIO_SYSSPACE, SD_PATH_STANDARD);
27877 	if (rval != 0) {
27878 		kmem_free(buffer, 16);
27879 		kmem_free(com, sizeof (*com));
27880 		return (rval);
27881 	}
27882 
27883 	/* Process the returned Q sub-channel data */
27884 	subchnl->cdsc_audiostatus = buffer[1];
27885 	subchnl->cdsc_adr	= (buffer[5] & 0xF0);
27886 	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
27887 	subchnl->cdsc_trk	= buffer[6];
27888 	subchnl->cdsc_ind	= buffer[7];
27889 	if (subchnl->cdsc_format & CDROM_LBA) {
27890 		subchnl->cdsc_absaddr.lba =
27891 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
27892 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
27893 		subchnl->cdsc_reladdr.lba =
27894 		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
27895 		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
27896 	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
27897 		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
27898 		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
27899 		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
27900 		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
27901 		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
27902 		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
27903 	} else {
27904 		subchnl->cdsc_absaddr.msf.minute = buffer[9];
27905 		subchnl->cdsc_absaddr.msf.second = buffer[10];
27906 		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
27907 		subchnl->cdsc_reladdr.msf.minute = buffer[13];
27908 		subchnl->cdsc_reladdr.msf.second = buffer[14];
27909 		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
27910 	}
27911 	kmem_free(buffer, 16);
27912 	kmem_free(com, sizeof (*com));
27913 	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
27914 	    != 0) {
27915 		return (EFAULT);
27916 	}
27917 	return (rval);
27918 }
27919 
27920 
27921 /*
27922  *    Function: sr_read_tocentry()
27923  *
27924  * Description: This routine is the driver entry point for handling CD-ROM
27925  *		ioctl requests to read from the Table of Contents (TOC)
27926  *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
27927  *		fields, the starting address (LBA or MSF format per the user)
27928  *		and the data mode if the user specified track is a data track.
27929  *
27930  *		Note: The READ HEADER (0x44) command used in this routine is
27931  *		obsolete per the SCSI MMC spec but still supported in the
27932  *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
27933  *		therefore the command is still implemented in this routine.
27934  *
27935  *   Arguments: dev	- the device 'dev_t'
27936  *		data	- pointer to user provided toc entry structure,
27937  *			  specifying the track # and the address format
27938  *			  (LBA or MSF).
27939  *		flag	- this argument is a pass through to ddi_copyxxx()
27940  *		          directly from the mode argument of ioctl().
27941  *
27942  * Return Code: the code returned by sd_send_scsi_cmd()
27943  *		EFAULT if ddi_copyxxx() fails
27944  *		ENXIO if fail ddi_get_soft_state
27945  *		EINVAL if data pointer is NULL
27946  */
27947 
27948 static int
27949 sr_read_tocentry(dev_t dev, caddr_t data, int flag)
27950 {
27951 	struct sd_lun		*un = NULL;
27952 	struct uscsi_cmd	*com;
27953 	struct cdrom_tocentry	toc_entry;
27954 	struct cdrom_tocentry	*entry = &toc_entry;
27955 	caddr_t			buffer;
27956 	int			rval;
27957 	char			cdb[CDB_GROUP1];
27958 
27959 	if (data == NULL) {
27960 		return (EINVAL);
27961 	}
27962 
27963 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27964 	    (un->un_state == SD_STATE_OFFLINE)) {
27965 		return (ENXIO);
27966 	}
27967 
27968 	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
27969 		return (EFAULT);
27970 	}
27971 
27972 	/* Validate the requested track and address format */
27973 	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
27974 		return (EINVAL);
27975 	}
27976 
27977 	if (entry->cdte_track == 0) {
27978 		return (EINVAL);
27979 	}
27980 
27981 	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
27982 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27983 	bzero(cdb, CDB_GROUP1);
27984 
27985 	cdb[0] = SCMD_READ_TOC;
27986 	/* Set the MSF bit based on the user requested address format  */
27987 	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
27988 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
27989 		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
27990 	} else {
27991 		cdb[6] = entry->cdte_track;
27992 	}
27993 
27994 	/*
27995 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
27996 	 * (4 byte TOC response header + 8 byte track descriptor)
27997 	 */
27998 	cdb[8] = 12;
27999 	com->uscsi_cdb	   = cdb;
28000 	com->uscsi_cdblen  = CDB_GROUP1;
28001 	com->uscsi_bufaddr = buffer;
28002 	com->uscsi_buflen  = 0x0C;
28003 	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
28004 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28005 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28006 	if (rval != 0) {
28007 		kmem_free(buffer, 12);
28008 		kmem_free(com, sizeof (*com));
28009 		return (rval);
28010 	}
28011 
28012 	/* Process the toc entry */
28013 	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
28014 	entry->cdte_ctrl	= (buffer[5] & 0x0F);
28015 	if (entry->cdte_format & CDROM_LBA) {
28016 		entry->cdte_addr.lba =
28017 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28018 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28019 	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
28020 		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
28021 		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
28022 		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
28023 		/*
28024 		 * Send a READ TOC command using the LBA address format to get
28025 		 * the LBA for the track requested so it can be used in the
28026 		 * READ HEADER request
28027 		 *
28028 		 * Note: The MSF bit of the READ HEADER command specifies the
28029 		 * output format. The block address specified in that command
28030 		 * must be in LBA format.
28031 		 */
28032 		cdb[1] = 0;
28033 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28034 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28035 		if (rval != 0) {
28036 			kmem_free(buffer, 12);
28037 			kmem_free(com, sizeof (*com));
28038 			return (rval);
28039 		}
28040 	} else {
28041 		entry->cdte_addr.msf.minute	= buffer[9];
28042 		entry->cdte_addr.msf.second	= buffer[10];
28043 		entry->cdte_addr.msf.frame	= buffer[11];
28044 		/*
28045 		 * Send a READ TOC command using the LBA address format to get
28046 		 * the LBA for the track requested so it can be used in the
28047 		 * READ HEADER request
28048 		 *
28049 		 * Note: The MSF bit of the READ HEADER command specifies the
28050 		 * output format. The block address specified in that command
28051 		 * must be in LBA format.
28052 		 */
28053 		cdb[1] = 0;
28054 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28055 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28056 		if (rval != 0) {
28057 			kmem_free(buffer, 12);
28058 			kmem_free(com, sizeof (*com));
28059 			return (rval);
28060 		}
28061 	}
28062 
28063 	/*
28064 	 * Build and send the READ HEADER command to determine the data mode of
28065 	 * the user specified track.
28066 	 */
28067 	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
28068 	    (entry->cdte_track != CDROM_LEADOUT)) {
28069 		bzero(cdb, CDB_GROUP1);
28070 		cdb[0] = SCMD_READ_HEADER;
28071 		cdb[2] = buffer[8];
28072 		cdb[3] = buffer[9];
28073 		cdb[4] = buffer[10];
28074 		cdb[5] = buffer[11];
28075 		cdb[8] = 0x08;
28076 		com->uscsi_buflen = 0x08;
28077 		rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28078 		    UIO_SYSSPACE, SD_PATH_STANDARD);
28079 		if (rval == 0) {
28080 			entry->cdte_datamode = buffer[0];
28081 		} else {
28082 			/*
28083 			 * READ HEADER command failed, since this is
28084 			 * obsoleted in one spec, its better to return
28085 			 * -1 for an invlid track so that we can still
28086 			 * recieve the rest of the TOC data.
28087 			 */
28088 			entry->cdte_datamode = (uchar_t)-1;
28089 		}
28090 	} else {
28091 		entry->cdte_datamode = (uchar_t)-1;
28092 	}
28093 
28094 	kmem_free(buffer, 12);
28095 	kmem_free(com, sizeof (*com));
28096 	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
28097 		return (EFAULT);
28098 
28099 	return (rval);
28100 }
28101 
28102 
28103 /*
28104  *    Function: sr_read_tochdr()
28105  *
28106  * Description: This routine is the driver entry point for handling CD-ROM
28107  * 		ioctl requests to read the Table of Contents (TOC) header
28108  *		(CDROMREADTOHDR). The TOC header consists of the disk starting
28109  *		and ending track numbers
28110  *
28111  *   Arguments: dev	- the device 'dev_t'
28112  *		data	- pointer to user provided toc header structure,
28113  *			  specifying the starting and ending track numbers.
28114  *		flag	- this argument is a pass through to ddi_copyxxx()
28115  *			  directly from the mode argument of ioctl().
28116  *
28117  * Return Code: the code returned by sd_send_scsi_cmd()
28118  *		EFAULT if ddi_copyxxx() fails
28119  *		ENXIO if fail ddi_get_soft_state
28120  *		EINVAL if data pointer is NULL
28121  */
28122 
28123 static int
28124 sr_read_tochdr(dev_t dev, caddr_t data, int flag)
28125 {
28126 	struct sd_lun		*un;
28127 	struct uscsi_cmd	*com;
28128 	struct cdrom_tochdr	toc_header;
28129 	struct cdrom_tochdr	*hdr = &toc_header;
28130 	char			cdb[CDB_GROUP1];
28131 	int			rval;
28132 	caddr_t			buffer;
28133 
28134 	if (data == NULL) {
28135 		return (EINVAL);
28136 	}
28137 
28138 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28139 	    (un->un_state == SD_STATE_OFFLINE)) {
28140 		return (ENXIO);
28141 	}
28142 
28143 	buffer = kmem_zalloc(4, KM_SLEEP);
28144 	bzero(cdb, CDB_GROUP1);
28145 	cdb[0] = SCMD_READ_TOC;
28146 	/*
28147 	 * Specifying a track number of 0x00 in the READ TOC command indicates
28148 	 * that the TOC header should be returned
28149 	 */
28150 	cdb[6] = 0x00;
28151 	/*
28152 	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
28153 	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
28154 	 */
28155 	cdb[8] = 0x04;
28156 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28157 	com->uscsi_cdb	   = cdb;
28158 	com->uscsi_cdblen  = CDB_GROUP1;
28159 	com->uscsi_bufaddr = buffer;
28160 	com->uscsi_buflen  = 0x04;
28161 	com->uscsi_timeout = 300;
28162 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28163 
28164 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
28165 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28166 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
28167 		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
28168 		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
28169 	} else {
28170 		hdr->cdth_trk0 = buffer[2];
28171 		hdr->cdth_trk1 = buffer[3];
28172 	}
28173 	kmem_free(buffer, 4);
28174 	kmem_free(com, sizeof (*com));
28175 	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
28176 		return (EFAULT);
28177 	}
28178 	return (rval);
28179 }
28180 
28181 
28182 /*
28183  * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
28184  * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
28185  * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
28186  * digital audio and extended architecture digital audio. These modes are
28187  * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
28188  * MMC specs.
28189  *
28190  * In addition to support for the various data formats these routines also
28191  * include support for devices that implement only the direct access READ
28192  * commands (0x08, 0x28), devices that implement the READ_CD commands
28193  * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
28194  * READ CDXA commands (0xD8, 0xDB)
28195  */
28196 
28197 /*
28198  *    Function: sr_read_mode1()
28199  *
28200  * Description: This routine is the driver entry point for handling CD-ROM
28201  *		ioctl read mode1 requests (CDROMREADMODE1).
28202  *
28203  *   Arguments: dev	- the device 'dev_t'
28204  *		data	- pointer to user provided cd read structure specifying
28205  *			  the lba buffer address and length.
28206  *		flag	- this argument is a pass through to ddi_copyxxx()
28207  *			  directly from the mode argument of ioctl().
28208  *
28209  * Return Code: the code returned by sd_send_scsi_cmd()
28210  *		EFAULT if ddi_copyxxx() fails
28211  *		ENXIO if fail ddi_get_soft_state
28212  *		EINVAL if data pointer is NULL
28213  */
28214 
28215 static int
28216 sr_read_mode1(dev_t dev, caddr_t data, int flag)
28217 {
28218 	struct sd_lun		*un;
28219 	struct cdrom_read	mode1_struct;
28220 	struct cdrom_read	*mode1 = &mode1_struct;
28221 	int			rval;
28222 #ifdef _MULTI_DATAMODEL
28223 	/* To support ILP32 applications in an LP64 world */
28224 	struct cdrom_read32	cdrom_read32;
28225 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28226 #endif /* _MULTI_DATAMODEL */
28227 
28228 	if (data == NULL) {
28229 		return (EINVAL);
28230 	}
28231 
28232 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28233 	    (un->un_state == SD_STATE_OFFLINE)) {
28234 		return (ENXIO);
28235 	}
28236 
28237 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28238 	    "sd_read_mode1: entry: un:0x%p\n", un);
28239 
28240 #ifdef _MULTI_DATAMODEL
28241 	switch (ddi_model_convert_from(flag & FMODELS)) {
28242 	case DDI_MODEL_ILP32:
28243 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28244 			return (EFAULT);
28245 		}
28246 		/* Convert the ILP32 uscsi data from the application to LP64 */
28247 		cdrom_read32tocdrom_read(cdrd32, mode1);
28248 		break;
28249 	case DDI_MODEL_NONE:
28250 		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28251 			return (EFAULT);
28252 		}
28253 	}
28254 #else /* ! _MULTI_DATAMODEL */
28255 	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28256 		return (EFAULT);
28257 	}
28258 #endif /* _MULTI_DATAMODEL */
28259 
28260 	rval = sd_send_scsi_READ(un, mode1->cdread_bufaddr,
28261 	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
28262 
28263 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28264 	    "sd_read_mode1: exit: un:0x%p\n", un);
28265 
28266 	return (rval);
28267 }
28268 
28269 
28270 /*
28271  *    Function: sr_read_cd_mode2()
28272  *
28273  * Description: This routine is the driver entry point for handling CD-ROM
28274  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28275  *		support the READ CD (0xBE) command or the 1st generation
28276  *		READ CD (0xD4) command.
28277  *
28278  *   Arguments: dev	- the device 'dev_t'
28279  *		data	- pointer to user provided cd read structure specifying
28280  *			  the lba buffer address and length.
28281  *		flag	- this argument is a pass through to ddi_copyxxx()
28282  *			  directly from the mode argument of ioctl().
28283  *
28284  * Return Code: the code returned by sd_send_scsi_cmd()
28285  *		EFAULT if ddi_copyxxx() fails
28286  *		ENXIO if fail ddi_get_soft_state
28287  *		EINVAL if data pointer is NULL
28288  */
28289 
28290 static int
28291 sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
28292 {
28293 	struct sd_lun		*un;
28294 	struct uscsi_cmd	*com;
28295 	struct cdrom_read	mode2_struct;
28296 	struct cdrom_read	*mode2 = &mode2_struct;
28297 	uchar_t			cdb[CDB_GROUP5];
28298 	int			nblocks;
28299 	int			rval;
28300 #ifdef _MULTI_DATAMODEL
28301 	/*  To support ILP32 applications in an LP64 world */
28302 	struct cdrom_read32	cdrom_read32;
28303 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28304 #endif /* _MULTI_DATAMODEL */
28305 
28306 	if (data == NULL) {
28307 		return (EINVAL);
28308 	}
28309 
28310 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28311 	    (un->un_state == SD_STATE_OFFLINE)) {
28312 		return (ENXIO);
28313 	}
28314 
28315 #ifdef _MULTI_DATAMODEL
28316 	switch (ddi_model_convert_from(flag & FMODELS)) {
28317 	case DDI_MODEL_ILP32:
28318 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28319 			return (EFAULT);
28320 		}
28321 		/* Convert the ILP32 uscsi data from the application to LP64 */
28322 		cdrom_read32tocdrom_read(cdrd32, mode2);
28323 		break;
28324 	case DDI_MODEL_NONE:
28325 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28326 			return (EFAULT);
28327 		}
28328 		break;
28329 	}
28330 
28331 #else /* ! _MULTI_DATAMODEL */
28332 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28333 		return (EFAULT);
28334 	}
28335 #endif /* _MULTI_DATAMODEL */
28336 
28337 	bzero(cdb, sizeof (cdb));
28338 	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
28339 		/* Read command supported by 1st generation atapi drives */
28340 		cdb[0] = SCMD_READ_CDD4;
28341 	} else {
28342 		/* Universal CD Access Command */
28343 		cdb[0] = SCMD_READ_CD;
28344 	}
28345 
28346 	/*
28347 	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
28348 	 */
28349 	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
28350 
28351 	/* set the start address */
28352 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
28353 	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
28354 	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28355 	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
28356 
28357 	/* set the transfer length */
28358 	nblocks = mode2->cdread_buflen / 2336;
28359 	cdb[6] = (uchar_t)(nblocks >> 16);
28360 	cdb[7] = (uchar_t)(nblocks >> 8);
28361 	cdb[8] = (uchar_t)nblocks;
28362 
28363 	/* set the filter bits */
28364 	cdb[9] = CDROM_READ_CD_USERDATA;
28365 
28366 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28367 	com->uscsi_cdb = (caddr_t)cdb;
28368 	com->uscsi_cdblen = sizeof (cdb);
28369 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28370 	com->uscsi_buflen = mode2->cdread_buflen;
28371 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28372 
28373 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28374 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28375 	kmem_free(com, sizeof (*com));
28376 	return (rval);
28377 }
28378 
28379 
28380 /*
28381  *    Function: sr_read_mode2()
28382  *
28383  * Description: This routine is the driver entry point for handling CD-ROM
28384  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28385  *		do not support the READ CD (0xBE) command.
28386  *
28387  *   Arguments: dev	- the device 'dev_t'
28388  *		data	- pointer to user provided cd read structure specifying
28389  *			  the lba buffer address and length.
28390  *		flag	- this argument is a pass through to ddi_copyxxx()
28391  *			  directly from the mode argument of ioctl().
28392  *
28393  * Return Code: the code returned by sd_send_scsi_cmd()
28394  *		EFAULT if ddi_copyxxx() fails
28395  *		ENXIO if fail ddi_get_soft_state
28396  *		EINVAL if data pointer is NULL
28397  *		EIO if fail to reset block size
28398  *		EAGAIN if commands are in progress in the driver
28399  */
28400 
28401 static int
28402 sr_read_mode2(dev_t dev, caddr_t data, int flag)
28403 {
28404 	struct sd_lun		*un;
28405 	struct cdrom_read	mode2_struct;
28406 	struct cdrom_read	*mode2 = &mode2_struct;
28407 	int			rval;
28408 	uint32_t		restore_blksize;
28409 	struct uscsi_cmd	*com;
28410 	uchar_t			cdb[CDB_GROUP0];
28411 	int			nblocks;
28412 
28413 #ifdef _MULTI_DATAMODEL
28414 	/* To support ILP32 applications in an LP64 world */
28415 	struct cdrom_read32	cdrom_read32;
28416 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28417 #endif /* _MULTI_DATAMODEL */
28418 
28419 	if (data == NULL) {
28420 		return (EINVAL);
28421 	}
28422 
28423 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28424 	    (un->un_state == SD_STATE_OFFLINE)) {
28425 		return (ENXIO);
28426 	}
28427 
28428 	/*
28429 	 * Because this routine will update the device and driver block size
28430 	 * being used we want to make sure there are no commands in progress.
28431 	 * If commands are in progress the user will have to try again.
28432 	 *
28433 	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
28434 	 * in sdioctl to protect commands from sdioctl through to the top of
28435 	 * sd_uscsi_strategy. See sdioctl for details.
28436 	 */
28437 	mutex_enter(SD_MUTEX(un));
28438 	if (un->un_ncmds_in_driver != 1) {
28439 		mutex_exit(SD_MUTEX(un));
28440 		return (EAGAIN);
28441 	}
28442 	mutex_exit(SD_MUTEX(un));
28443 
28444 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28445 	    "sd_read_mode2: entry: un:0x%p\n", un);
28446 
28447 #ifdef _MULTI_DATAMODEL
28448 	switch (ddi_model_convert_from(flag & FMODELS)) {
28449 	case DDI_MODEL_ILP32:
28450 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28451 			return (EFAULT);
28452 		}
28453 		/* Convert the ILP32 uscsi data from the application to LP64 */
28454 		cdrom_read32tocdrom_read(cdrd32, mode2);
28455 		break;
28456 	case DDI_MODEL_NONE:
28457 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28458 			return (EFAULT);
28459 		}
28460 		break;
28461 	}
28462 #else /* ! _MULTI_DATAMODEL */
28463 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
28464 		return (EFAULT);
28465 	}
28466 #endif /* _MULTI_DATAMODEL */
28467 
28468 	/* Store the current target block size for restoration later */
28469 	restore_blksize = un->un_tgt_blocksize;
28470 
28471 	/* Change the device and soft state target block size to 2336 */
28472 	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
28473 		rval = EIO;
28474 		goto done;
28475 	}
28476 
28477 
28478 	bzero(cdb, sizeof (cdb));
28479 
28480 	/* set READ operation */
28481 	cdb[0] = SCMD_READ;
28482 
28483 	/* adjust lba for 2kbyte blocks from 512 byte blocks */
28484 	mode2->cdread_lba >>= 2;
28485 
28486 	/* set the start address */
28487 	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
28488 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28489 	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
28490 
28491 	/* set the transfer length */
28492 	nblocks = mode2->cdread_buflen / 2336;
28493 	cdb[4] = (uchar_t)nblocks & 0xFF;
28494 
28495 	/* build command */
28496 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28497 	com->uscsi_cdb = (caddr_t)cdb;
28498 	com->uscsi_cdblen = sizeof (cdb);
28499 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28500 	com->uscsi_buflen = mode2->cdread_buflen;
28501 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28502 
28503 	/*
28504 	 * Issue SCSI command with user space address for read buffer.
28505 	 *
28506 	 * This sends the command through main channel in the driver.
28507 	 *
28508 	 * Since this is accessed via an IOCTL call, we go through the
28509 	 * standard path, so that if the device was powered down, then
28510 	 * it would be 'awakened' to handle the command.
28511 	 */
28512 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28513 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28514 
28515 	kmem_free(com, sizeof (*com));
28516 
28517 	/* Restore the device and soft state target block size */
28518 	if (sr_sector_mode(dev, restore_blksize) != 0) {
28519 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28520 		    "can't do switch back to mode 1\n");
28521 		/*
28522 		 * If sd_send_scsi_READ succeeded we still need to report
28523 		 * an error because we failed to reset the block size
28524 		 */
28525 		if (rval == 0) {
28526 			rval = EIO;
28527 		}
28528 	}
28529 
28530 done:
28531 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28532 	    "sd_read_mode2: exit: un:0x%p\n", un);
28533 
28534 	return (rval);
28535 }
28536 
28537 
28538 /*
28539  *    Function: sr_sector_mode()
28540  *
28541  * Description: This utility function is used by sr_read_mode2 to set the target
28542  *		block size based on the user specified size. This is a legacy
28543  *		implementation based upon a vendor specific mode page
28544  *
28545  *   Arguments: dev	- the device 'dev_t'
28546  *		data	- flag indicating if block size is being set to 2336 or
28547  *			  512.
28548  *
28549  * Return Code: the code returned by sd_send_scsi_cmd()
28550  *		EFAULT if ddi_copyxxx() fails
28551  *		ENXIO if fail ddi_get_soft_state
28552  *		EINVAL if data pointer is NULL
28553  */
28554 
28555 static int
28556 sr_sector_mode(dev_t dev, uint32_t blksize)
28557 {
28558 	struct sd_lun	*un;
28559 	uchar_t		*sense;
28560 	uchar_t		*select;
28561 	int		rval;
28562 
28563 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28564 	    (un->un_state == SD_STATE_OFFLINE)) {
28565 		return (ENXIO);
28566 	}
28567 
28568 	sense = kmem_zalloc(20, KM_SLEEP);
28569 
28570 	/* Note: This is a vendor specific mode page (0x81) */
28571 	if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, 20, 0x81,
28572 	    SD_PATH_STANDARD)) != 0) {
28573 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28574 		    "sr_sector_mode: Mode Sense failed\n");
28575 		kmem_free(sense, 20);
28576 		return (rval);
28577 	}
28578 	select = kmem_zalloc(20, KM_SLEEP);
28579 	select[3] = 0x08;
28580 	select[10] = ((blksize >> 8) & 0xff);
28581 	select[11] = (blksize & 0xff);
28582 	select[12] = 0x01;
28583 	select[13] = 0x06;
28584 	select[14] = sense[14];
28585 	select[15] = sense[15];
28586 	if (blksize == SD_MODE2_BLKSIZE) {
28587 		select[14] |= 0x01;
28588 	}
28589 
28590 	if ((rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select, 20,
28591 	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD)) != 0) {
28592 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28593 		    "sr_sector_mode: Mode Select failed\n");
28594 	} else {
28595 		/*
28596 		 * Only update the softstate block size if we successfully
28597 		 * changed the device block mode.
28598 		 */
28599 		mutex_enter(SD_MUTEX(un));
28600 		sd_update_block_info(un, blksize, 0);
28601 		mutex_exit(SD_MUTEX(un));
28602 	}
28603 	kmem_free(sense, 20);
28604 	kmem_free(select, 20);
28605 	return (rval);
28606 }
28607 
28608 
28609 /*
28610  *    Function: sr_read_cdda()
28611  *
28612  * Description: This routine is the driver entry point for handling CD-ROM
28613  *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
28614  *		the target supports CDDA these requests are handled via a vendor
28615  *		specific command (0xD8) If the target does not support CDDA
28616  *		these requests are handled via the READ CD command (0xBE).
28617  *
28618  *   Arguments: dev	- the device 'dev_t'
28619  *		data	- pointer to user provided CD-DA structure specifying
28620  *			  the track starting address, transfer length, and
28621  *			  subcode options.
28622  *		flag	- this argument is a pass through to ddi_copyxxx()
28623  *			  directly from the mode argument of ioctl().
28624  *
28625  * Return Code: the code returned by sd_send_scsi_cmd()
28626  *		EFAULT if ddi_copyxxx() fails
28627  *		ENXIO if fail ddi_get_soft_state
28628  *		EINVAL if invalid arguments are provided
28629  *		ENOTTY
28630  */
28631 
28632 static int
28633 sr_read_cdda(dev_t dev, caddr_t data, int flag)
28634 {
28635 	struct sd_lun			*un;
28636 	struct uscsi_cmd		*com;
28637 	struct cdrom_cdda		*cdda;
28638 	int				rval;
28639 	size_t				buflen;
28640 	char				cdb[CDB_GROUP5];
28641 
28642 #ifdef _MULTI_DATAMODEL
28643 	/* To support ILP32 applications in an LP64 world */
28644 	struct cdrom_cdda32	cdrom_cdda32;
28645 	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
28646 #endif /* _MULTI_DATAMODEL */
28647 
28648 	if (data == NULL) {
28649 		return (EINVAL);
28650 	}
28651 
28652 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28653 		return (ENXIO);
28654 	}
28655 
28656 	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
28657 
28658 #ifdef _MULTI_DATAMODEL
28659 	switch (ddi_model_convert_from(flag & FMODELS)) {
28660 	case DDI_MODEL_ILP32:
28661 		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
28662 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28663 			    "sr_read_cdda: ddi_copyin Failed\n");
28664 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28665 			return (EFAULT);
28666 		}
28667 		/* Convert the ILP32 uscsi data from the application to LP64 */
28668 		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
28669 		break;
28670 	case DDI_MODEL_NONE:
28671 		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28672 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28673 			    "sr_read_cdda: ddi_copyin Failed\n");
28674 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28675 			return (EFAULT);
28676 		}
28677 		break;
28678 	}
28679 #else /* ! _MULTI_DATAMODEL */
28680 	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28681 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28682 		    "sr_read_cdda: ddi_copyin Failed\n");
28683 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28684 		return (EFAULT);
28685 	}
28686 #endif /* _MULTI_DATAMODEL */
28687 
28688 	/*
28689 	 * Since MMC-2 expects max 3 bytes for length, check if the
28690 	 * length input is greater than 3 bytes
28691 	 */
28692 	if ((cdda->cdda_length & 0xFF000000) != 0) {
28693 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
28694 		    "cdrom transfer length too large: %d (limit %d)\n",
28695 		    cdda->cdda_length, 0xFFFFFF);
28696 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28697 		return (EINVAL);
28698 	}
28699 
28700 	switch (cdda->cdda_subcode) {
28701 	case CDROM_DA_NO_SUBCODE:
28702 		buflen = CDROM_BLK_2352 * cdda->cdda_length;
28703 		break;
28704 	case CDROM_DA_SUBQ:
28705 		buflen = CDROM_BLK_2368 * cdda->cdda_length;
28706 		break;
28707 	case CDROM_DA_ALL_SUBCODE:
28708 		buflen = CDROM_BLK_2448 * cdda->cdda_length;
28709 		break;
28710 	case CDROM_DA_SUBCODE_ONLY:
28711 		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
28712 		break;
28713 	default:
28714 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28715 		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
28716 		    cdda->cdda_subcode);
28717 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28718 		return (EINVAL);
28719 	}
28720 
28721 	/* Build and send the command */
28722 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28723 	bzero(cdb, CDB_GROUP5);
28724 
28725 	if (un->un_f_cfg_cdda == TRUE) {
28726 		cdb[0] = (char)SCMD_READ_CD;
28727 		cdb[1] = 0x04;
28728 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28729 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28730 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28731 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28732 		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28733 		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28734 		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
28735 		cdb[9] = 0x10;
28736 		switch (cdda->cdda_subcode) {
28737 		case CDROM_DA_NO_SUBCODE :
28738 			cdb[10] = 0x0;
28739 			break;
28740 		case CDROM_DA_SUBQ :
28741 			cdb[10] = 0x2;
28742 			break;
28743 		case CDROM_DA_ALL_SUBCODE :
28744 			cdb[10] = 0x1;
28745 			break;
28746 		case CDROM_DA_SUBCODE_ONLY :
28747 			/* FALLTHROUGH */
28748 		default :
28749 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28750 			kmem_free(com, sizeof (*com));
28751 			return (ENOTTY);
28752 		}
28753 	} else {
28754 		cdb[0] = (char)SCMD_READ_CDDA;
28755 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28756 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28757 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28758 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28759 		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
28760 		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28761 		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28762 		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
28763 		cdb[10] = cdda->cdda_subcode;
28764 	}
28765 
28766 	com->uscsi_cdb = cdb;
28767 	com->uscsi_cdblen = CDB_GROUP5;
28768 	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
28769 	com->uscsi_buflen = buflen;
28770 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28771 
28772 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28773 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28774 
28775 	kmem_free(cdda, sizeof (struct cdrom_cdda));
28776 	kmem_free(com, sizeof (*com));
28777 	return (rval);
28778 }
28779 
28780 
28781 /*
28782  *    Function: sr_read_cdxa()
28783  *
28784  * Description: This routine is the driver entry point for handling CD-ROM
28785  *		ioctl requests to return CD-XA (Extended Architecture) data.
28786  *		(CDROMCDXA).
28787  *
28788  *   Arguments: dev	- the device 'dev_t'
28789  *		data	- pointer to user provided CD-XA structure specifying
28790  *			  the data starting address, transfer length, and format
28791  *		flag	- this argument is a pass through to ddi_copyxxx()
28792  *			  directly from the mode argument of ioctl().
28793  *
28794  * Return Code: the code returned by sd_send_scsi_cmd()
28795  *		EFAULT if ddi_copyxxx() fails
28796  *		ENXIO if fail ddi_get_soft_state
28797  *		EINVAL if data pointer is NULL
28798  */
28799 
28800 static int
28801 sr_read_cdxa(dev_t dev, caddr_t data, int flag)
28802 {
28803 	struct sd_lun		*un;
28804 	struct uscsi_cmd	*com;
28805 	struct cdrom_cdxa	*cdxa;
28806 	int			rval;
28807 	size_t			buflen;
28808 	char			cdb[CDB_GROUP5];
28809 	uchar_t			read_flags;
28810 
28811 #ifdef _MULTI_DATAMODEL
28812 	/* To support ILP32 applications in an LP64 world */
28813 	struct cdrom_cdxa32		cdrom_cdxa32;
28814 	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
28815 #endif /* _MULTI_DATAMODEL */
28816 
28817 	if (data == NULL) {
28818 		return (EINVAL);
28819 	}
28820 
28821 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28822 		return (ENXIO);
28823 	}
28824 
28825 	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
28826 
28827 #ifdef _MULTI_DATAMODEL
28828 	switch (ddi_model_convert_from(flag & FMODELS)) {
28829 	case DDI_MODEL_ILP32:
28830 		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
28831 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28832 			return (EFAULT);
28833 		}
28834 		/*
28835 		 * Convert the ILP32 uscsi data from the
28836 		 * application to LP64 for internal use.
28837 		 */
28838 		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
28839 		break;
28840 	case DDI_MODEL_NONE:
28841 		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
28842 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28843 			return (EFAULT);
28844 		}
28845 		break;
28846 	}
28847 #else /* ! _MULTI_DATAMODEL */
28848 	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
28849 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28850 		return (EFAULT);
28851 	}
28852 #endif /* _MULTI_DATAMODEL */
28853 
28854 	/*
28855 	 * Since MMC-2 expects max 3 bytes for length, check if the
28856 	 * length input is greater than 3 bytes
28857 	 */
28858 	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
28859 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
28860 		    "cdrom transfer length too large: %d (limit %d)\n",
28861 		    cdxa->cdxa_length, 0xFFFFFF);
28862 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28863 		return (EINVAL);
28864 	}
28865 
28866 	switch (cdxa->cdxa_format) {
28867 	case CDROM_XA_DATA:
28868 		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
28869 		read_flags = 0x10;
28870 		break;
28871 	case CDROM_XA_SECTOR_DATA:
28872 		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
28873 		read_flags = 0xf8;
28874 		break;
28875 	case CDROM_XA_DATA_W_ERROR:
28876 		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
28877 		read_flags = 0xfc;
28878 		break;
28879 	default:
28880 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28881 		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
28882 		    cdxa->cdxa_format);
28883 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28884 		return (EINVAL);
28885 	}
28886 
28887 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28888 	bzero(cdb, CDB_GROUP5);
28889 	if (un->un_f_mmc_cap == TRUE) {
28890 		cdb[0] = (char)SCMD_READ_CD;
28891 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
28892 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
28893 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
28894 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
28895 		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
28896 		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
28897 		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
28898 		cdb[9] = (char)read_flags;
28899 	} else {
28900 		/*
28901 		 * Note: A vendor specific command (0xDB) is being used her to
28902 		 * request a read of all subcodes.
28903 		 */
28904 		cdb[0] = (char)SCMD_READ_CDXA;
28905 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
28906 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
28907 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
28908 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
28909 		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
28910 		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
28911 		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
28912 		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
28913 		cdb[10] = cdxa->cdxa_format;
28914 	}
28915 	com->uscsi_cdb	   = cdb;
28916 	com->uscsi_cdblen  = CDB_GROUP5;
28917 	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
28918 	com->uscsi_buflen  = buflen;
28919 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28920 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_USERSPACE,
28921 	    UIO_SYSSPACE, SD_PATH_STANDARD);
28922 	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28923 	kmem_free(com, sizeof (*com));
28924 	return (rval);
28925 }
28926 
28927 
28928 /*
28929  *    Function: sr_eject()
28930  *
28931  * Description: This routine is the driver entry point for handling CD-ROM
28932  *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
28933  *
28934  *   Arguments: dev	- the device 'dev_t'
28935  *
28936  * Return Code: the code returned by sd_send_scsi_cmd()
28937  */
28938 
28939 static int
28940 sr_eject(dev_t dev)
28941 {
28942 	struct sd_lun	*un;
28943 	int		rval;
28944 
28945 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28946 	    (un->un_state == SD_STATE_OFFLINE)) {
28947 		return (ENXIO);
28948 	}
28949 	if ((rval = sd_send_scsi_DOORLOCK(un, SD_REMOVAL_ALLOW,
28950 	    SD_PATH_STANDARD)) != 0) {
28951 		return (rval);
28952 	}
28953 
28954 	rval = sd_send_scsi_START_STOP_UNIT(un, SD_TARGET_EJECT,
28955 	    SD_PATH_STANDARD);
28956 
28957 	if (rval == 0) {
28958 		mutex_enter(SD_MUTEX(un));
28959 		sr_ejected(un);
28960 		un->un_mediastate = DKIO_EJECTED;
28961 		cv_broadcast(&un->un_state_cv);
28962 		mutex_exit(SD_MUTEX(un));
28963 	}
28964 	return (rval);
28965 }
28966 
28967 
28968 /*
28969  *    Function: sr_ejected()
28970  *
28971  * Description: This routine updates the soft state structure to invalidate the
28972  *		geometry information after the media has been ejected or a
28973  *		media eject has been detected.
28974  *
28975  *   Arguments: un - driver soft state (unit) structure
28976  */
28977 
28978 static void
28979 sr_ejected(struct sd_lun *un)
28980 {
28981 	struct sd_errstats *stp;
28982 
28983 	ASSERT(un != NULL);
28984 	ASSERT(mutex_owned(SD_MUTEX(un)));
28985 
28986 	un->un_f_blockcount_is_valid	= FALSE;
28987 	un->un_f_tgt_blocksize_is_valid	= FALSE;
28988 	un->un_f_geometry_is_valid	= FALSE;
28989 
28990 	if (un->un_errstats != NULL) {
28991 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
28992 		stp->sd_capacity.value.ui64 = 0;
28993 	}
28994 }
28995 
28996 
28997 /*
28998  *    Function: sr_check_wp()
28999  *
29000  * Description: This routine checks the write protection of a removable media
29001  *		disk via the write protect bit of the Mode Page Header device
29002  *		specific field.  This routine has been implemented to use the
29003  *		error recovery mode page for all device types.
29004  *		Note: In the future use a sd_send_scsi_MODE_SENSE() routine
29005  *
29006  *   Arguments: dev		- the device 'dev_t'
29007  *
29008  * Return Code: int indicating if the device is write protected (1) or not (0)
29009  *
29010  *     Context: Kernel thread.
29011  *
29012  */
29013 
29014 static int
29015 sr_check_wp(dev_t dev)
29016 {
29017 	struct sd_lun	*un;
29018 	uchar_t		device_specific;
29019 	uchar_t		*sense;
29020 	int		hdrlen;
29021 	int		rval;
29022 	int		retry_flag = FALSE;
29023 
29024 	/*
29025 	 * Note: The return codes for this routine should be reworked to
29026 	 * properly handle the case of a NULL softstate.
29027 	 */
29028 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
29029 		return (FALSE);
29030 	}
29031 
29032 	if (un->un_f_cfg_is_atapi == TRUE) {
29033 		retry_flag = TRUE;
29034 	}
29035 
29036 retry:
29037 	if (un->un_f_cfg_is_atapi == TRUE) {
29038 		/*
29039 		 * The mode page contents are not required; set the allocation
29040 		 * length for the mode page header only
29041 		 */
29042 		hdrlen = MODE_HEADER_LENGTH_GRP2;
29043 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
29044 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense, hdrlen,
29045 		    MODEPAGE_ERR_RECOV, SD_PATH_STANDARD);
29046 		device_specific =
29047 		    ((struct mode_header_grp2 *)sense)->device_specific;
29048 	} else {
29049 		hdrlen = MODE_HEADER_LENGTH;
29050 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
29051 		rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense, hdrlen,
29052 		    MODEPAGE_ERR_RECOV, SD_PATH_STANDARD);
29053 		device_specific =
29054 		    ((struct mode_header *)sense)->device_specific;
29055 	}
29056 
29057 	if (rval != 0) {
29058 		if ((un->un_f_cfg_is_atapi == TRUE) && (retry_flag)) {
29059 			/*
29060 			 * For an Atapi Zip drive, observed the drive
29061 			 * reporting check condition for the first attempt.
29062 			 * Sense data indicating power on or bus device/reset.
29063 			 * Hence in case of failure need to try at least once
29064 			 * for Atapi devices.
29065 			 */
29066 			retry_flag = FALSE;
29067 			kmem_free(sense, hdrlen);
29068 			goto retry;
29069 		} else {
29070 			/*
29071 			 * Write protect mode sense failed; not all disks
29072 			 * understand this query. Return FALSE assuming that
29073 			 * these devices are not writable.
29074 			 */
29075 			rval = FALSE;
29076 		}
29077 	} else {
29078 		if (device_specific & WRITE_PROTECT) {
29079 			rval = TRUE;
29080 		} else {
29081 			rval = FALSE;
29082 		}
29083 	}
29084 	kmem_free(sense, hdrlen);
29085 	return (rval);
29086 }
29087 
29088 
29089 /*
29090  *    Function: sr_volume_ctrl()
29091  *
29092  * Description: This routine is the driver entry point for handling CD-ROM
29093  *		audio output volume ioctl requests. (CDROMVOLCTRL)
29094  *
29095  *   Arguments: dev	- the device 'dev_t'
29096  *		data	- pointer to user audio volume control structure
29097  *		flag	- this argument is a pass through to ddi_copyxxx()
29098  *			  directly from the mode argument of ioctl().
29099  *
29100  * Return Code: the code returned by sd_send_scsi_cmd()
29101  *		EFAULT if ddi_copyxxx() fails
29102  *		ENXIO if fail ddi_get_soft_state
29103  *		EINVAL if data pointer is NULL
29104  *
29105  */
29106 
29107 static int
29108 sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
29109 {
29110 	struct sd_lun		*un;
29111 	struct cdrom_volctrl    volume;
29112 	struct cdrom_volctrl    *vol = &volume;
29113 	uchar_t			*sense_page;
29114 	uchar_t			*select_page;
29115 	uchar_t			*sense;
29116 	uchar_t			*select;
29117 	int			sense_buflen;
29118 	int			select_buflen;
29119 	int			rval;
29120 
29121 	if (data == NULL) {
29122 		return (EINVAL);
29123 	}
29124 
29125 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29126 	    (un->un_state == SD_STATE_OFFLINE)) {
29127 		return (ENXIO);
29128 	}
29129 
29130 	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
29131 		return (EFAULT);
29132 	}
29133 
29134 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29135 		struct mode_header_grp2		*sense_mhp;
29136 		struct mode_header_grp2		*select_mhp;
29137 		int				bd_len;
29138 
29139 		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
29140 		select_buflen = MODE_HEADER_LENGTH_GRP2 +
29141 		    MODEPAGE_AUDIO_CTRL_LEN;
29142 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
29143 		select = kmem_zalloc(select_buflen, KM_SLEEP);
29144 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP1, sense,
29145 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
29146 		    SD_PATH_STANDARD)) != 0) {
29147 			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
29148 			    "sr_volume_ctrl: Mode Sense Failed\n");
29149 			kmem_free(sense, sense_buflen);
29150 			kmem_free(select, select_buflen);
29151 			return (rval);
29152 		}
29153 		sense_mhp = (struct mode_header_grp2 *)sense;
29154 		select_mhp = (struct mode_header_grp2 *)select;
29155 		bd_len = (sense_mhp->bdesc_length_hi << 8) |
29156 		    sense_mhp->bdesc_length_lo;
29157 		if (bd_len > MODE_BLK_DESC_LENGTH) {
29158 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29159 			    "sr_volume_ctrl: Mode Sense returned invalid "
29160 			    "block descriptor length\n");
29161 			kmem_free(sense, sense_buflen);
29162 			kmem_free(select, select_buflen);
29163 			return (EIO);
29164 		}
29165 		sense_page = (uchar_t *)
29166 		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
29167 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
29168 		select_mhp->length_msb = 0;
29169 		select_mhp->length_lsb = 0;
29170 		select_mhp->bdesc_length_hi = 0;
29171 		select_mhp->bdesc_length_lo = 0;
29172 	} else {
29173 		struct mode_header		*sense_mhp, *select_mhp;
29174 
29175 		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
29176 		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
29177 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
29178 		select = kmem_zalloc(select_buflen, KM_SLEEP);
29179 		if ((rval = sd_send_scsi_MODE_SENSE(un, CDB_GROUP0, sense,
29180 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
29181 		    SD_PATH_STANDARD)) != 0) {
29182 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29183 			    "sr_volume_ctrl: Mode Sense Failed\n");
29184 			kmem_free(sense, sense_buflen);
29185 			kmem_free(select, select_buflen);
29186 			return (rval);
29187 		}
29188 		sense_mhp  = (struct mode_header *)sense;
29189 		select_mhp = (struct mode_header *)select;
29190 		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
29191 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29192 			    "sr_volume_ctrl: Mode Sense returned invalid "
29193 			    "block descriptor length\n");
29194 			kmem_free(sense, sense_buflen);
29195 			kmem_free(select, select_buflen);
29196 			return (EIO);
29197 		}
29198 		sense_page = (uchar_t *)
29199 		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
29200 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
29201 		select_mhp->length = 0;
29202 		select_mhp->bdesc_length = 0;
29203 	}
29204 	/*
29205 	 * Note: An audio control data structure could be created and overlayed
29206 	 * on the following in place of the array indexing method implemented.
29207 	 */
29208 
29209 	/* Build the select data for the user volume data */
29210 	select_page[0] = MODEPAGE_AUDIO_CTRL;
29211 	select_page[1] = 0xE;
29212 	/* Set the immediate bit */
29213 	select_page[2] = 0x04;
29214 	/* Zero out reserved fields */
29215 	select_page[3] = 0x00;
29216 	select_page[4] = 0x00;
29217 	/* Return sense data for fields not to be modified */
29218 	select_page[5] = sense_page[5];
29219 	select_page[6] = sense_page[6];
29220 	select_page[7] = sense_page[7];
29221 	/* Set the user specified volume levels for channel 0 and 1 */
29222 	select_page[8] = 0x01;
29223 	select_page[9] = vol->channel0;
29224 	select_page[10] = 0x02;
29225 	select_page[11] = vol->channel1;
29226 	/* Channel 2 and 3 are currently unsupported so return the sense data */
29227 	select_page[12] = sense_page[12];
29228 	select_page[13] = sense_page[13];
29229 	select_page[14] = sense_page[14];
29230 	select_page[15] = sense_page[15];
29231 
29232 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29233 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP1, select,
29234 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29235 	} else {
29236 		rval = sd_send_scsi_MODE_SELECT(un, CDB_GROUP0, select,
29237 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29238 	}
29239 
29240 	kmem_free(sense, sense_buflen);
29241 	kmem_free(select, select_buflen);
29242 	return (rval);
29243 }
29244 
29245 
29246 /*
29247  *    Function: sr_read_sony_session_offset()
29248  *
29249  * Description: This routine is the driver entry point for handling CD-ROM
29250  *		ioctl requests for session offset information. (CDROMREADOFFSET)
29251  *		The address of the first track in the last session of a
29252  *		multi-session CD-ROM is returned
29253  *
29254  *		Note: This routine uses a vendor specific key value in the
29255  *		command control field without implementing any vendor check here
29256  *		or in the ioctl routine.
29257  *
29258  *   Arguments: dev	- the device 'dev_t'
29259  *		data	- pointer to an int to hold the requested address
29260  *		flag	- this argument is a pass through to ddi_copyxxx()
29261  *			  directly from the mode argument of ioctl().
29262  *
29263  * Return Code: the code returned by sd_send_scsi_cmd()
29264  *		EFAULT if ddi_copyxxx() fails
29265  *		ENXIO if fail ddi_get_soft_state
29266  *		EINVAL if data pointer is NULL
29267  */
29268 
29269 static int
29270 sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
29271 {
29272 	struct sd_lun		*un;
29273 	struct uscsi_cmd	*com;
29274 	caddr_t			buffer;
29275 	char			cdb[CDB_GROUP1];
29276 	int			session_offset = 0;
29277 	int			rval;
29278 
29279 	if (data == NULL) {
29280 		return (EINVAL);
29281 	}
29282 
29283 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29284 	    (un->un_state == SD_STATE_OFFLINE)) {
29285 		return (ENXIO);
29286 	}
29287 
29288 	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
29289 	bzero(cdb, CDB_GROUP1);
29290 	cdb[0] = SCMD_READ_TOC;
29291 	/*
29292 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
29293 	 * (4 byte TOC response header + 8 byte response data)
29294 	 */
29295 	cdb[8] = SONY_SESSION_OFFSET_LEN;
29296 	/* Byte 9 is the control byte. A vendor specific value is used */
29297 	cdb[9] = SONY_SESSION_OFFSET_KEY;
29298 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29299 	com->uscsi_cdb = cdb;
29300 	com->uscsi_cdblen = CDB_GROUP1;
29301 	com->uscsi_bufaddr = buffer;
29302 	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
29303 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
29304 
29305 	rval = sd_send_scsi_cmd(dev, com, UIO_SYSSPACE, UIO_SYSSPACE,
29306 	    UIO_SYSSPACE, SD_PATH_STANDARD);
29307 	if (rval != 0) {
29308 		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29309 		kmem_free(com, sizeof (*com));
29310 		return (rval);
29311 	}
29312 	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
29313 		session_offset =
29314 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
29315 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
29316 		/*
29317 		 * Offset returned offset in current lbasize block's. Convert to
29318 		 * 2k block's to return to the user
29319 		 */
29320 		if (un->un_tgt_blocksize == CDROM_BLK_512) {
29321 			session_offset >>= 2;
29322 		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
29323 			session_offset >>= 1;
29324 		}
29325 	}
29326 
29327 	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
29328 		rval = EFAULT;
29329 	}
29330 
29331 	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29332 	kmem_free(com, sizeof (*com));
29333 	return (rval);
29334 }
29335 
29336 
29337 /*
29338  *    Function: sd_wm_cache_constructor()
29339  *
29340  * Description: Cache Constructor for the wmap cache for the read/modify/write
29341  * 		devices.
29342  *
29343  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29344  *		un	- sd_lun structure for the device.
29345  *		flag	- the km flags passed to constructor
29346  *
29347  * Return Code: 0 on success.
29348  *		-1 on failure.
29349  */
29350 
29351 /*ARGSUSED*/
29352 static int
29353 sd_wm_cache_constructor(void *wm, void *un, int flags)
29354 {
29355 	bzero(wm, sizeof (struct sd_w_map));
29356 	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
29357 	return (0);
29358 }
29359 
29360 
29361 /*
29362  *    Function: sd_wm_cache_destructor()
29363  *
29364  * Description: Cache destructor for the wmap cache for the read/modify/write
29365  * 		devices.
29366  *
29367  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29368  *		un	- sd_lun structure for the device.
29369  */
29370 /*ARGSUSED*/
29371 static void
29372 sd_wm_cache_destructor(void *wm, void *un)
29373 {
29374 	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
29375 }
29376 
29377 
29378 /*
29379  *    Function: sd_range_lock()
29380  *
29381  * Description: Lock the range of blocks specified as parameter to ensure
29382  *		that read, modify write is atomic and no other i/o writes
29383  *		to the same location. The range is specified in terms
29384  *		of start and end blocks. Block numbers are the actual
29385  *		media block numbers and not system.
29386  *
29387  *   Arguments: un	- sd_lun structure for the device.
29388  *		startb - The starting block number
29389  *		endb - The end block number
29390  *		typ - type of i/o - simple/read_modify_write
29391  *
29392  * Return Code: wm  - pointer to the wmap structure.
29393  *
29394  *     Context: This routine can sleep.
29395  */
29396 
29397 static struct sd_w_map *
29398 sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
29399 {
29400 	struct sd_w_map *wmp = NULL;
29401 	struct sd_w_map *sl_wmp = NULL;
29402 	struct sd_w_map *tmp_wmp;
29403 	wm_state state = SD_WM_CHK_LIST;
29404 
29405 
29406 	ASSERT(un != NULL);
29407 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29408 
29409 	mutex_enter(SD_MUTEX(un));
29410 
29411 	while (state != SD_WM_DONE) {
29412 
29413 		switch (state) {
29414 		case SD_WM_CHK_LIST:
29415 			/*
29416 			 * This is the starting state. Check the wmap list
29417 			 * to see if the range is currently available.
29418 			 */
29419 			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
29420 				/*
29421 				 * If this is a simple write and no rmw
29422 				 * i/o is pending then try to lock the
29423 				 * range as the range should be available.
29424 				 */
29425 				state = SD_WM_LOCK_RANGE;
29426 			} else {
29427 				tmp_wmp = sd_get_range(un, startb, endb);
29428 				if (tmp_wmp != NULL) {
29429 					if ((wmp != NULL) && ONLIST(un, wmp)) {
29430 						/*
29431 						 * Should not keep onlist wmps
29432 						 * while waiting this macro
29433 						 * will also do wmp = NULL;
29434 						 */
29435 						FREE_ONLIST_WMAP(un, wmp);
29436 					}
29437 					/*
29438 					 * sl_wmp is the wmap on which wait
29439 					 * is done, since the tmp_wmp points
29440 					 * to the inuse wmap, set sl_wmp to
29441 					 * tmp_wmp and change the state to sleep
29442 					 */
29443 					sl_wmp = tmp_wmp;
29444 					state = SD_WM_WAIT_MAP;
29445 				} else {
29446 					state = SD_WM_LOCK_RANGE;
29447 				}
29448 
29449 			}
29450 			break;
29451 
29452 		case SD_WM_LOCK_RANGE:
29453 			ASSERT(un->un_wm_cache);
29454 			/*
29455 			 * The range need to be locked, try to get a wmap.
29456 			 * First attempt it with NO_SLEEP, want to avoid a sleep
29457 			 * if possible as we will have to release the sd mutex
29458 			 * if we have to sleep.
29459 			 */
29460 			if (wmp == NULL)
29461 				wmp = kmem_cache_alloc(un->un_wm_cache,
29462 				    KM_NOSLEEP);
29463 			if (wmp == NULL) {
29464 				mutex_exit(SD_MUTEX(un));
29465 				_NOTE(DATA_READABLE_WITHOUT_LOCK
29466 				    (sd_lun::un_wm_cache))
29467 				wmp = kmem_cache_alloc(un->un_wm_cache,
29468 				    KM_SLEEP);
29469 				mutex_enter(SD_MUTEX(un));
29470 				/*
29471 				 * we released the mutex so recheck and go to
29472 				 * check list state.
29473 				 */
29474 				state = SD_WM_CHK_LIST;
29475 			} else {
29476 				/*
29477 				 * We exit out of state machine since we
29478 				 * have the wmap. Do the housekeeping first.
29479 				 * place the wmap on the wmap list if it is not
29480 				 * on it already and then set the state to done.
29481 				 */
29482 				wmp->wm_start = startb;
29483 				wmp->wm_end = endb;
29484 				wmp->wm_flags = typ | SD_WM_BUSY;
29485 				if (typ & SD_WTYPE_RMW) {
29486 					un->un_rmw_count++;
29487 				}
29488 				/*
29489 				 * If not already on the list then link
29490 				 */
29491 				if (!ONLIST(un, wmp)) {
29492 					wmp->wm_next = un->un_wm;
29493 					wmp->wm_prev = NULL;
29494 					if (wmp->wm_next)
29495 						wmp->wm_next->wm_prev = wmp;
29496 					un->un_wm = wmp;
29497 				}
29498 				state = SD_WM_DONE;
29499 			}
29500 			break;
29501 
29502 		case SD_WM_WAIT_MAP:
29503 			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
29504 			/*
29505 			 * Wait is done on sl_wmp, which is set in the
29506 			 * check_list state.
29507 			 */
29508 			sl_wmp->wm_wanted_count++;
29509 			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
29510 			sl_wmp->wm_wanted_count--;
29511 			/*
29512 			 * We can reuse the memory from the completed sl_wmp
29513 			 * lock range for our new lock, but only if noone is
29514 			 * waiting for it.
29515 			 */
29516 			ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
29517 			if (sl_wmp->wm_wanted_count == 0) {
29518 				if (wmp != NULL)
29519 					CHK_N_FREEWMP(un, wmp);
29520 				wmp = sl_wmp;
29521 			}
29522 			sl_wmp = NULL;
29523 			/*
29524 			 * After waking up, need to recheck for availability of
29525 			 * range.
29526 			 */
29527 			state = SD_WM_CHK_LIST;
29528 			break;
29529 
29530 		default:
29531 			panic("sd_range_lock: "
29532 			    "Unknown state %d in sd_range_lock", state);
29533 			/*NOTREACHED*/
29534 		} /* switch(state) */
29535 
29536 	} /* while(state != SD_WM_DONE) */
29537 
29538 	mutex_exit(SD_MUTEX(un));
29539 
29540 	ASSERT(wmp != NULL);
29541 
29542 	return (wmp);
29543 }
29544 
29545 
29546 /*
29547  *    Function: sd_get_range()
29548  *
29549  * Description: Find if there any overlapping I/O to this one
29550  *		Returns the write-map of 1st such I/O, NULL otherwise.
29551  *
29552  *   Arguments: un	- sd_lun structure for the device.
29553  *		startb - The starting block number
29554  *		endb - The end block number
29555  *
29556  * Return Code: wm  - pointer to the wmap structure.
29557  */
29558 
29559 static struct sd_w_map *
29560 sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
29561 {
29562 	struct sd_w_map *wmp;
29563 
29564 	ASSERT(un != NULL);
29565 
29566 	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
29567 		if (!(wmp->wm_flags & SD_WM_BUSY)) {
29568 			continue;
29569 		}
29570 		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
29571 			break;
29572 		}
29573 		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
29574 			break;
29575 		}
29576 	}
29577 
29578 	return (wmp);
29579 }
29580 
29581 
29582 /*
29583  *    Function: sd_free_inlist_wmap()
29584  *
29585  * Description: Unlink and free a write map struct.
29586  *
29587  *   Arguments: un      - sd_lun structure for the device.
29588  *		wmp	- sd_w_map which needs to be unlinked.
29589  */
29590 
29591 static void
29592 sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
29593 {
29594 	ASSERT(un != NULL);
29595 
29596 	if (un->un_wm == wmp) {
29597 		un->un_wm = wmp->wm_next;
29598 	} else {
29599 		wmp->wm_prev->wm_next = wmp->wm_next;
29600 	}
29601 
29602 	if (wmp->wm_next) {
29603 		wmp->wm_next->wm_prev = wmp->wm_prev;
29604 	}
29605 
29606 	wmp->wm_next = wmp->wm_prev = NULL;
29607 
29608 	kmem_cache_free(un->un_wm_cache, wmp);
29609 }
29610 
29611 
29612 /*
29613  *    Function: sd_range_unlock()
29614  *
29615  * Description: Unlock the range locked by wm.
29616  *		Free write map if nobody else is waiting on it.
29617  *
29618  *   Arguments: un      - sd_lun structure for the device.
29619  *              wmp     - sd_w_map which needs to be unlinked.
29620  */
29621 
29622 static void
29623 sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
29624 {
29625 	ASSERT(un != NULL);
29626 	ASSERT(wm != NULL);
29627 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29628 
29629 	mutex_enter(SD_MUTEX(un));
29630 
29631 	if (wm->wm_flags & SD_WTYPE_RMW) {
29632 		un->un_rmw_count--;
29633 	}
29634 
29635 	if (wm->wm_wanted_count) {
29636 		wm->wm_flags = 0;
29637 		/*
29638 		 * Broadcast that the wmap is available now.
29639 		 */
29640 		cv_broadcast(&wm->wm_avail);
29641 	} else {
29642 		/*
29643 		 * If no one is waiting on the map, it should be free'ed.
29644 		 */
29645 		sd_free_inlist_wmap(un, wm);
29646 	}
29647 
29648 	mutex_exit(SD_MUTEX(un));
29649 }
29650 
29651 
29652 /*
29653  *    Function: sd_read_modify_write_task
29654  *
29655  * Description: Called from a taskq thread to initiate the write phase of
29656  *		a read-modify-write request.  This is used for targets where
29657  *		un->un_sys_blocksize != un->un_tgt_blocksize.
29658  *
29659  *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
29660  *
29661  *     Context: Called under taskq thread context.
29662  */
29663 
29664 static void
29665 sd_read_modify_write_task(void *arg)
29666 {
29667 	struct sd_mapblocksize_info	*bsp;
29668 	struct buf	*bp;
29669 	struct sd_xbuf	*xp;
29670 	struct sd_lun	*un;
29671 
29672 	bp = arg;	/* The bp is given in arg */
29673 	ASSERT(bp != NULL);
29674 
29675 	/* Get the pointer to the layer-private data struct */
29676 	xp = SD_GET_XBUF(bp);
29677 	ASSERT(xp != NULL);
29678 	bsp = xp->xb_private;
29679 	ASSERT(bsp != NULL);
29680 
29681 	un = SD_GET_UN(bp);
29682 	ASSERT(un != NULL);
29683 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29684 
29685 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29686 	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
29687 
29688 	/*
29689 	 * This is the write phase of a read-modify-write request, called
29690 	 * under the context of a taskq thread in response to the completion
29691 	 * of the read portion of the rmw request completing under interrupt
29692 	 * context. The write request must be sent from here down the iostart
29693 	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
29694 	 * we use the layer index saved in the layer-private data area.
29695 	 */
29696 	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
29697 
29698 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29699 	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
29700 }
29701 
29702 
29703 /*
29704  *    Function: sddump_do_read_of_rmw()
29705  *
29706  * Description: This routine will be called from sddump, If sddump is called
29707  *		with an I/O which not aligned on device blocksize boundary
29708  *		then the write has to be converted to read-modify-write.
29709  *		Do the read part here in order to keep sddump simple.
29710  *		Note - That the sd_mutex is held across the call to this
29711  *		routine.
29712  *
29713  *   Arguments: un	- sd_lun
29714  *		blkno	- block number in terms of media block size.
29715  *		nblk	- number of blocks.
29716  *		bpp	- pointer to pointer to the buf structure. On return
29717  *			from this function, *bpp points to the valid buffer
29718  *			to which the write has to be done.
29719  *
29720  * Return Code: 0 for success or errno-type return code
29721  */
29722 
29723 static int
29724 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
29725 	struct buf **bpp)
29726 {
29727 	int err;
29728 	int i;
29729 	int rval;
29730 	struct buf *bp;
29731 	struct scsi_pkt *pkt = NULL;
29732 	uint32_t target_blocksize;
29733 
29734 	ASSERT(un != NULL);
29735 	ASSERT(mutex_owned(SD_MUTEX(un)));
29736 
29737 	target_blocksize = un->un_tgt_blocksize;
29738 
29739 	mutex_exit(SD_MUTEX(un));
29740 
29741 	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
29742 	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
29743 	if (bp == NULL) {
29744 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29745 		    "no resources for dumping; giving up");
29746 		err = ENOMEM;
29747 		goto done;
29748 	}
29749 
29750 	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
29751 	    blkno, nblk);
29752 	if (rval != 0) {
29753 		scsi_free_consistent_buf(bp);
29754 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29755 		    "no resources for dumping; giving up");
29756 		err = ENOMEM;
29757 		goto done;
29758 	}
29759 
29760 	pkt->pkt_flags |= FLAG_NOINTR;
29761 
29762 	err = EIO;
29763 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
29764 
29765 		/*
29766 		 * Scsi_poll returns 0 (success) if the command completes and
29767 		 * the status block is STATUS_GOOD.  We should only check
29768 		 * errors if this condition is not true.  Even then we should
29769 		 * send our own request sense packet only if we have a check
29770 		 * condition and auto request sense has not been performed by
29771 		 * the hba.
29772 		 */
29773 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
29774 
29775 		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
29776 			err = 0;
29777 			break;
29778 		}
29779 
29780 		/*
29781 		 * Check CMD_DEV_GONE 1st, give up if device is gone,
29782 		 * no need to read RQS data.
29783 		 */
29784 		if (pkt->pkt_reason == CMD_DEV_GONE) {
29785 			scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
29786 			    "Device is gone\n");
29787 			break;
29788 		}
29789 
29790 		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
29791 			SD_INFO(SD_LOG_DUMP, un,
29792 			    "sddump: read failed with CHECK, try # %d\n", i);
29793 			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
29794 				(void) sd_send_polled_RQS(un);
29795 			}
29796 
29797 			continue;
29798 		}
29799 
29800 		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
29801 			int reset_retval = 0;
29802 
29803 			SD_INFO(SD_LOG_DUMP, un,
29804 			    "sddump: read failed with BUSY, try # %d\n", i);
29805 
29806 			if (un->un_f_lun_reset_enabled == TRUE) {
29807 				reset_retval = scsi_reset(SD_ADDRESS(un),
29808 				    RESET_LUN);
29809 			}
29810 			if (reset_retval == 0) {
29811 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
29812 			}
29813 			(void) sd_send_polled_RQS(un);
29814 
29815 		} else {
29816 			SD_INFO(SD_LOG_DUMP, un,
29817 			    "sddump: read failed with 0x%x, try # %d\n",
29818 			    SD_GET_PKT_STATUS(pkt), i);
29819 			mutex_enter(SD_MUTEX(un));
29820 			sd_reset_target(un, pkt);
29821 			mutex_exit(SD_MUTEX(un));
29822 		}
29823 
29824 		/*
29825 		 * If we are not getting anywhere with lun/target resets,
29826 		 * let's reset the bus.
29827 		 */
29828 		if (i > SD_NDUMP_RETRIES/2) {
29829 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
29830 			(void) sd_send_polled_RQS(un);
29831 		}
29832 
29833 	}
29834 	scsi_destroy_pkt(pkt);
29835 
29836 	if (err != 0) {
29837 		scsi_free_consistent_buf(bp);
29838 		*bpp = NULL;
29839 	} else {
29840 		*bpp = bp;
29841 	}
29842 
29843 done:
29844 	mutex_enter(SD_MUTEX(un));
29845 	return (err);
29846 }
29847 
29848 
29849 /*
29850  *    Function: sd_failfast_flushq
29851  *
29852  * Description: Take all bp's on the wait queue that have B_FAILFAST set
29853  *		in b_flags and move them onto the failfast queue, then kick
29854  *		off a thread to return all bp's on the failfast queue to
29855  *		their owners with an error set.
29856  *
29857  *   Arguments: un - pointer to the soft state struct for the instance.
29858  *
29859  *     Context: may execute in interrupt context.
29860  */
29861 
29862 static void
29863 sd_failfast_flushq(struct sd_lun *un)
29864 {
29865 	struct buf *bp;
29866 	struct buf *next_waitq_bp;
29867 	struct buf *prev_waitq_bp = NULL;
29868 
29869 	ASSERT(un != NULL);
29870 	ASSERT(mutex_owned(SD_MUTEX(un)));
29871 	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
29872 	ASSERT(un->un_failfast_bp == NULL);
29873 
29874 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
29875 	    "sd_failfast_flushq: entry: un:0x%p\n", un);
29876 
29877 	/*
29878 	 * Check if we should flush all bufs when entering failfast state, or
29879 	 * just those with B_FAILFAST set.
29880 	 */
29881 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
29882 		/*
29883 		 * Move *all* bp's on the wait queue to the failfast flush
29884 		 * queue, including those that do NOT have B_FAILFAST set.
29885 		 */
29886 		if (un->un_failfast_headp == NULL) {
29887 			ASSERT(un->un_failfast_tailp == NULL);
29888 			un->un_failfast_headp = un->un_waitq_headp;
29889 		} else {
29890 			ASSERT(un->un_failfast_tailp != NULL);
29891 			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
29892 		}
29893 
29894 		un->un_failfast_tailp = un->un_waitq_tailp;
29895 
29896 		/* update kstat for each bp moved out of the waitq */
29897 		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
29898 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
29899 		}
29900 
29901 		/* empty the waitq */
29902 		un->un_waitq_headp = un->un_waitq_tailp = NULL;
29903 
29904 	} else {
29905 		/*
29906 		 * Go thru the wait queue, pick off all entries with
29907 		 * B_FAILFAST set, and move these onto the failfast queue.
29908 		 */
29909 		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
29910 			/*
29911 			 * Save the pointer to the next bp on the wait queue,
29912 			 * so we get to it on the next iteration of this loop.
29913 			 */
29914 			next_waitq_bp = bp->av_forw;
29915 
29916 			/*
29917 			 * If this bp from the wait queue does NOT have
29918 			 * B_FAILFAST set, just move on to the next element
29919 			 * in the wait queue. Note, this is the only place
29920 			 * where it is correct to set prev_waitq_bp.
29921 			 */
29922 			if ((bp->b_flags & B_FAILFAST) == 0) {
29923 				prev_waitq_bp = bp;
29924 				continue;
29925 			}
29926 
29927 			/*
29928 			 * Remove the bp from the wait queue.
29929 			 */
29930 			if (bp == un->un_waitq_headp) {
29931 				/* The bp is the first element of the waitq. */
29932 				un->un_waitq_headp = next_waitq_bp;
29933 				if (un->un_waitq_headp == NULL) {
29934 					/* The wait queue is now empty */
29935 					un->un_waitq_tailp = NULL;
29936 				}
29937 			} else {
29938 				/*
29939 				 * The bp is either somewhere in the middle
29940 				 * or at the end of the wait queue.
29941 				 */
29942 				ASSERT(un->un_waitq_headp != NULL);
29943 				ASSERT(prev_waitq_bp != NULL);
29944 				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
29945 				    == 0);
29946 				if (bp == un->un_waitq_tailp) {
29947 					/* bp is the last entry on the waitq. */
29948 					ASSERT(next_waitq_bp == NULL);
29949 					un->un_waitq_tailp = prev_waitq_bp;
29950 				}
29951 				prev_waitq_bp->av_forw = next_waitq_bp;
29952 			}
29953 			bp->av_forw = NULL;
29954 
29955 			/*
29956 			 * update kstat since the bp is moved out of
29957 			 * the waitq
29958 			 */
29959 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
29960 
29961 			/*
29962 			 * Now put the bp onto the failfast queue.
29963 			 */
29964 			if (un->un_failfast_headp == NULL) {
29965 				/* failfast queue is currently empty */
29966 				ASSERT(un->un_failfast_tailp == NULL);
29967 				un->un_failfast_headp =
29968 				    un->un_failfast_tailp = bp;
29969 			} else {
29970 				/* Add the bp to the end of the failfast q */
29971 				ASSERT(un->un_failfast_tailp != NULL);
29972 				ASSERT(un->un_failfast_tailp->b_flags &
29973 				    B_FAILFAST);
29974 				un->un_failfast_tailp->av_forw = bp;
29975 				un->un_failfast_tailp = bp;
29976 			}
29977 		}
29978 	}
29979 
29980 	/*
29981 	 * Now return all bp's on the failfast queue to their owners.
29982 	 */
29983 	while ((bp = un->un_failfast_headp) != NULL) {
29984 
29985 		un->un_failfast_headp = bp->av_forw;
29986 		if (un->un_failfast_headp == NULL) {
29987 			un->un_failfast_tailp = NULL;
29988 		}
29989 
29990 		/*
29991 		 * We want to return the bp with a failure error code, but
29992 		 * we do not want a call to sd_start_cmds() to occur here,
29993 		 * so use sd_return_failed_command_no_restart() instead of
29994 		 * sd_return_failed_command().
29995 		 */
29996 		sd_return_failed_command_no_restart(un, bp, EIO);
29997 	}
29998 
29999 	/* Flush the xbuf queues if required. */
30000 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
30001 		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
30002 	}
30003 
30004 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
30005 	    "sd_failfast_flushq: exit: un:0x%p\n", un);
30006 }
30007 
30008 
30009 /*
30010  *    Function: sd_failfast_flushq_callback
30011  *
30012  * Description: Return TRUE if the given bp meets the criteria for failfast
30013  *		flushing. Used with ddi_xbuf_flushq(9F).
30014  *
30015  *   Arguments: bp - ptr to buf struct to be examined.
30016  *
30017  *     Context: Any
30018  */
30019 
30020 static int
30021 sd_failfast_flushq_callback(struct buf *bp)
30022 {
30023 	/*
30024 	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
30025 	 * state is entered; OR (2) the given bp has B_FAILFAST set.
30026 	 */
30027 	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
30028 	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
30029 }
30030 
30031 
30032 
30033 #if defined(__i386) || defined(__amd64)
30034 /*
30035  * Function: sd_setup_next_xfer
30036  *
30037  * Description: Prepare next I/O operation using DMA_PARTIAL
30038  *
30039  */
30040 
30041 static int
30042 sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
30043     struct scsi_pkt *pkt, struct sd_xbuf *xp)
30044 {
30045 	ssize_t	num_blks_not_xfered;
30046 	daddr_t	strt_blk_num;
30047 	ssize_t	bytes_not_xfered;
30048 	int	rval;
30049 
30050 	ASSERT(pkt->pkt_resid == 0);
30051 
30052 	/*
30053 	 * Calculate next block number and amount to be transferred.
30054 	 *
30055 	 * How much data NOT transfered to the HBA yet.
30056 	 */
30057 	bytes_not_xfered = xp->xb_dma_resid;
30058 
30059 	/*
30060 	 * figure how many blocks NOT transfered to the HBA yet.
30061 	 */
30062 	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
30063 
30064 	/*
30065 	 * set starting block number to the end of what WAS transfered.
30066 	 */
30067 	strt_blk_num = xp->xb_blkno +
30068 	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
30069 
30070 	/*
30071 	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
30072 	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
30073 	 * the disk mutex here.
30074 	 */
30075 	rval = sd_setup_next_rw_pkt(un, pkt, bp,
30076 	    strt_blk_num, num_blks_not_xfered);
30077 
30078 	if (rval == 0) {
30079 
30080 		/*
30081 		 * Success.
30082 		 *
30083 		 * Adjust things if there are still more blocks to be
30084 		 * transfered.
30085 		 */
30086 		xp->xb_dma_resid = pkt->pkt_resid;
30087 		pkt->pkt_resid = 0;
30088 
30089 		return (1);
30090 	}
30091 
30092 	/*
30093 	 * There's really only one possible return value from
30094 	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
30095 	 * returns NULL.
30096 	 */
30097 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
30098 
30099 	bp->b_resid = bp->b_bcount;
30100 	bp->b_flags |= B_ERROR;
30101 
30102 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
30103 	    "Error setting up next portion of DMA transfer\n");
30104 
30105 	return (0);
30106 }
30107 #endif
30108 
30109 /*
30110  *    Function: sd_panic_for_res_conflict
30111  *
30112  * Description: Call panic with a string formated with "Reservation Conflict"
30113  *		and a human readable identifier indicating the SD instance
30114  *		that experienced the reservation conflict.
30115  *
30116  *   Arguments: un - pointer to the soft state struct for the instance.
30117  *
30118  *     Context: may execute in interrupt context.
30119  */
30120 
30121 #define	SD_RESV_CONFLICT_FMT_LEN 40
30122 void
30123 sd_panic_for_res_conflict(struct sd_lun *un)
30124 {
30125 	char panic_str[SD_RESV_CONFLICT_FMT_LEN+MAXPATHLEN];
30126 	char path_str[MAXPATHLEN];
30127 
30128 	(void) snprintf(panic_str, sizeof (panic_str),
30129 	    "Reservation Conflict\nDisk: %s",
30130 	    ddi_pathname(SD_DEVINFO(un), path_str));
30131 
30132 	panic(panic_str);
30133 }
30134 
30135 /*
30136  * Note: The following sd_faultinjection_ioctl( ) routines implement
30137  * driver support for handling fault injection for error analysis
30138  * causing faults in multiple layers of the driver.
30139  *
30140  */
30141 
30142 #ifdef SD_FAULT_INJECTION
30143 static uint_t   sd_fault_injection_on = 0;
30144 
30145 /*
30146  *    Function: sd_faultinjection_ioctl()
30147  *
30148  * Description: This routine is the driver entry point for handling
30149  *              faultinjection ioctls to inject errors into the
30150  *              layer model
30151  *
30152  *   Arguments: cmd	- the ioctl cmd recieved
30153  *		arg	- the arguments from user and returns
30154  */
30155 
30156 static void
30157 sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un) {
30158 
30159 	uint_t i;
30160 	uint_t rval;
30161 
30162 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
30163 
30164 	mutex_enter(SD_MUTEX(un));
30165 
30166 	switch (cmd) {
30167 	case SDIOCRUN:
30168 		/* Allow pushed faults to be injected */
30169 		SD_INFO(SD_LOG_SDTEST, un,
30170 		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
30171 
30172 		sd_fault_injection_on = 1;
30173 
30174 		SD_INFO(SD_LOG_IOERR, un,
30175 		    "sd_faultinjection_ioctl: run finished\n");
30176 		break;
30177 
30178 	case SDIOCSTART:
30179 		/* Start Injection Session */
30180 		SD_INFO(SD_LOG_SDTEST, un,
30181 		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
30182 
30183 		sd_fault_injection_on = 0;
30184 		un->sd_injection_mask = 0xFFFFFFFF;
30185 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30186 			un->sd_fi_fifo_pkt[i] = NULL;
30187 			un->sd_fi_fifo_xb[i] = NULL;
30188 			un->sd_fi_fifo_un[i] = NULL;
30189 			un->sd_fi_fifo_arq[i] = NULL;
30190 		}
30191 		un->sd_fi_fifo_start = 0;
30192 		un->sd_fi_fifo_end = 0;
30193 
30194 		mutex_enter(&(un->un_fi_mutex));
30195 		un->sd_fi_log[0] = '\0';
30196 		un->sd_fi_buf_len = 0;
30197 		mutex_exit(&(un->un_fi_mutex));
30198 
30199 		SD_INFO(SD_LOG_IOERR, un,
30200 		    "sd_faultinjection_ioctl: start finished\n");
30201 		break;
30202 
30203 	case SDIOCSTOP:
30204 		/* Stop Injection Session */
30205 		SD_INFO(SD_LOG_SDTEST, un,
30206 		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
30207 		sd_fault_injection_on = 0;
30208 		un->sd_injection_mask = 0x0;
30209 
30210 		/* Empty stray or unuseds structs from fifo */
30211 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30212 			if (un->sd_fi_fifo_pkt[i] != NULL) {
30213 				kmem_free(un->sd_fi_fifo_pkt[i],
30214 				    sizeof (struct sd_fi_pkt));
30215 			}
30216 			if (un->sd_fi_fifo_xb[i] != NULL) {
30217 				kmem_free(un->sd_fi_fifo_xb[i],
30218 				    sizeof (struct sd_fi_xb));
30219 			}
30220 			if (un->sd_fi_fifo_un[i] != NULL) {
30221 				kmem_free(un->sd_fi_fifo_un[i],
30222 				    sizeof (struct sd_fi_un));
30223 			}
30224 			if (un->sd_fi_fifo_arq[i] != NULL) {
30225 				kmem_free(un->sd_fi_fifo_arq[i],
30226 				    sizeof (struct sd_fi_arq));
30227 			}
30228 			un->sd_fi_fifo_pkt[i] = NULL;
30229 			un->sd_fi_fifo_un[i] = NULL;
30230 			un->sd_fi_fifo_xb[i] = NULL;
30231 			un->sd_fi_fifo_arq[i] = NULL;
30232 		}
30233 		un->sd_fi_fifo_start = 0;
30234 		un->sd_fi_fifo_end = 0;
30235 
30236 		SD_INFO(SD_LOG_IOERR, un,
30237 		    "sd_faultinjection_ioctl: stop finished\n");
30238 		break;
30239 
30240 	case SDIOCINSERTPKT:
30241 		/* Store a packet struct to be pushed onto fifo */
30242 		SD_INFO(SD_LOG_SDTEST, un,
30243 		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
30244 
30245 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30246 
30247 		sd_fault_injection_on = 0;
30248 
30249 		/* No more that SD_FI_MAX_ERROR allowed in Queue */
30250 		if (un->sd_fi_fifo_pkt[i] != NULL) {
30251 			kmem_free(un->sd_fi_fifo_pkt[i],
30252 			    sizeof (struct sd_fi_pkt));
30253 		}
30254 		if (arg != NULL) {
30255 			un->sd_fi_fifo_pkt[i] =
30256 			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
30257 			if (un->sd_fi_fifo_pkt[i] == NULL) {
30258 				/* Alloc failed don't store anything */
30259 				break;
30260 			}
30261 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
30262 			    sizeof (struct sd_fi_pkt), 0);
30263 			if (rval == -1) {
30264 				kmem_free(un->sd_fi_fifo_pkt[i],
30265 				    sizeof (struct sd_fi_pkt));
30266 				un->sd_fi_fifo_pkt[i] = NULL;
30267 			}
30268 		} else {
30269 			SD_INFO(SD_LOG_IOERR, un,
30270 			    "sd_faultinjection_ioctl: pkt null\n");
30271 		}
30272 		break;
30273 
30274 	case SDIOCINSERTXB:
30275 		/* Store a xb struct to be pushed onto fifo */
30276 		SD_INFO(SD_LOG_SDTEST, un,
30277 		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
30278 
30279 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30280 
30281 		sd_fault_injection_on = 0;
30282 
30283 		if (un->sd_fi_fifo_xb[i] != NULL) {
30284 			kmem_free(un->sd_fi_fifo_xb[i],
30285 			    sizeof (struct sd_fi_xb));
30286 			un->sd_fi_fifo_xb[i] = NULL;
30287 		}
30288 		if (arg != NULL) {
30289 			un->sd_fi_fifo_xb[i] =
30290 			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
30291 			if (un->sd_fi_fifo_xb[i] == NULL) {
30292 				/* Alloc failed don't store anything */
30293 				break;
30294 			}
30295 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
30296 			    sizeof (struct sd_fi_xb), 0);
30297 
30298 			if (rval == -1) {
30299 				kmem_free(un->sd_fi_fifo_xb[i],
30300 				    sizeof (struct sd_fi_xb));
30301 				un->sd_fi_fifo_xb[i] = NULL;
30302 			}
30303 		} else {
30304 			SD_INFO(SD_LOG_IOERR, un,
30305 			    "sd_faultinjection_ioctl: xb null\n");
30306 		}
30307 		break;
30308 
30309 	case SDIOCINSERTUN:
30310 		/* Store a un struct to be pushed onto fifo */
30311 		SD_INFO(SD_LOG_SDTEST, un,
30312 		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
30313 
30314 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30315 
30316 		sd_fault_injection_on = 0;
30317 
30318 		if (un->sd_fi_fifo_un[i] != NULL) {
30319 			kmem_free(un->sd_fi_fifo_un[i],
30320 			    sizeof (struct sd_fi_un));
30321 			un->sd_fi_fifo_un[i] = NULL;
30322 		}
30323 		if (arg != NULL) {
30324 			un->sd_fi_fifo_un[i] =
30325 			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
30326 			if (un->sd_fi_fifo_un[i] == NULL) {
30327 				/* Alloc failed don't store anything */
30328 				break;
30329 			}
30330 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
30331 			    sizeof (struct sd_fi_un), 0);
30332 			if (rval == -1) {
30333 				kmem_free(un->sd_fi_fifo_un[i],
30334 				    sizeof (struct sd_fi_un));
30335 				un->sd_fi_fifo_un[i] = NULL;
30336 			}
30337 
30338 		} else {
30339 			SD_INFO(SD_LOG_IOERR, un,
30340 			    "sd_faultinjection_ioctl: un null\n");
30341 		}
30342 
30343 		break;
30344 
30345 	case SDIOCINSERTARQ:
30346 		/* Store a arq struct to be pushed onto fifo */
30347 		SD_INFO(SD_LOG_SDTEST, un,
30348 		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
30349 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30350 
30351 		sd_fault_injection_on = 0;
30352 
30353 		if (un->sd_fi_fifo_arq[i] != NULL) {
30354 			kmem_free(un->sd_fi_fifo_arq[i],
30355 			    sizeof (struct sd_fi_arq));
30356 			un->sd_fi_fifo_arq[i] = NULL;
30357 		}
30358 		if (arg != NULL) {
30359 			un->sd_fi_fifo_arq[i] =
30360 			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
30361 			if (un->sd_fi_fifo_arq[i] == NULL) {
30362 				/* Alloc failed don't store anything */
30363 				break;
30364 			}
30365 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
30366 			    sizeof (struct sd_fi_arq), 0);
30367 			if (rval == -1) {
30368 				kmem_free(un->sd_fi_fifo_arq[i],
30369 				    sizeof (struct sd_fi_arq));
30370 				un->sd_fi_fifo_arq[i] = NULL;
30371 			}
30372 
30373 		} else {
30374 			SD_INFO(SD_LOG_IOERR, un,
30375 			    "sd_faultinjection_ioctl: arq null\n");
30376 		}
30377 
30378 		break;
30379 
30380 	case SDIOCPUSH:
30381 		/* Push stored xb, pkt, un, and arq onto fifo */
30382 		sd_fault_injection_on = 0;
30383 
30384 		if (arg != NULL) {
30385 			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
30386 			if (rval != -1 &&
30387 			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30388 				un->sd_fi_fifo_end += i;
30389 			}
30390 		} else {
30391 			SD_INFO(SD_LOG_IOERR, un,
30392 			    "sd_faultinjection_ioctl: push arg null\n");
30393 			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30394 				un->sd_fi_fifo_end++;
30395 			}
30396 		}
30397 		SD_INFO(SD_LOG_IOERR, un,
30398 		    "sd_faultinjection_ioctl: push to end=%d\n",
30399 		    un->sd_fi_fifo_end);
30400 		break;
30401 
30402 	case SDIOCRETRIEVE:
30403 		/* Return buffer of log from Injection session */
30404 		SD_INFO(SD_LOG_SDTEST, un,
30405 		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
30406 
30407 		sd_fault_injection_on = 0;
30408 
30409 		mutex_enter(&(un->un_fi_mutex));
30410 		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
30411 		    un->sd_fi_buf_len+1, 0);
30412 		mutex_exit(&(un->un_fi_mutex));
30413 
30414 		if (rval == -1) {
30415 			/*
30416 			 * arg is possibly invalid setting
30417 			 * it to NULL for return
30418 			 */
30419 			arg = NULL;
30420 		}
30421 		break;
30422 	}
30423 
30424 	mutex_exit(SD_MUTEX(un));
30425 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl:"
30426 			    " exit\n");
30427 }
30428 
30429 
30430 /*
30431  *    Function: sd_injection_log()
30432  *
30433  * Description: This routine adds buff to the already existing injection log
30434  *              for retrieval via faultinjection_ioctl for use in fault
30435  *              detection and recovery
30436  *
30437  *   Arguments: buf - the string to add to the log
30438  */
30439 
30440 static void
30441 sd_injection_log(char *buf, struct sd_lun *un)
30442 {
30443 	uint_t len;
30444 
30445 	ASSERT(un != NULL);
30446 	ASSERT(buf != NULL);
30447 
30448 	mutex_enter(&(un->un_fi_mutex));
30449 
30450 	len = min(strlen(buf), 255);
30451 	/* Add logged value to Injection log to be returned later */
30452 	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
30453 		uint_t	offset = strlen((char *)un->sd_fi_log);
30454 		char *destp = (char *)un->sd_fi_log + offset;
30455 		int i;
30456 		for (i = 0; i < len; i++) {
30457 			*destp++ = *buf++;
30458 		}
30459 		un->sd_fi_buf_len += len;
30460 		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
30461 	}
30462 
30463 	mutex_exit(&(un->un_fi_mutex));
30464 }
30465 
30466 
30467 /*
30468  *    Function: sd_faultinjection()
30469  *
30470  * Description: This routine takes the pkt and changes its
30471  *		content based on error injection scenerio.
30472  *
30473  *   Arguments: pktp	- packet to be changed
30474  */
30475 
30476 static void
30477 sd_faultinjection(struct scsi_pkt *pktp)
30478 {
30479 	uint_t i;
30480 	struct sd_fi_pkt *fi_pkt;
30481 	struct sd_fi_xb *fi_xb;
30482 	struct sd_fi_un *fi_un;
30483 	struct sd_fi_arq *fi_arq;
30484 	struct buf *bp;
30485 	struct sd_xbuf *xb;
30486 	struct sd_lun *un;
30487 
30488 	ASSERT(pktp != NULL);
30489 
30490 	/* pull bp xb and un from pktp */
30491 	bp = (struct buf *)pktp->pkt_private;
30492 	xb = SD_GET_XBUF(bp);
30493 	un = SD_GET_UN(bp);
30494 
30495 	ASSERT(un != NULL);
30496 
30497 	mutex_enter(SD_MUTEX(un));
30498 
30499 	SD_TRACE(SD_LOG_SDTEST, un,
30500 	    "sd_faultinjection: entry Injection from sdintr\n");
30501 
30502 	/* if injection is off return */
30503 	if (sd_fault_injection_on == 0 ||
30504 		un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
30505 		mutex_exit(SD_MUTEX(un));
30506 		return;
30507 	}
30508 
30509 
30510 	/* take next set off fifo */
30511 	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
30512 
30513 	fi_pkt = un->sd_fi_fifo_pkt[i];
30514 	fi_xb = un->sd_fi_fifo_xb[i];
30515 	fi_un = un->sd_fi_fifo_un[i];
30516 	fi_arq = un->sd_fi_fifo_arq[i];
30517 
30518 
30519 	/* set variables accordingly */
30520 	/* set pkt if it was on fifo */
30521 	if (fi_pkt != NULL) {
30522 		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
30523 		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
30524 		SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
30525 		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
30526 		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
30527 		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
30528 
30529 	}
30530 
30531 	/* set xb if it was on fifo */
30532 	if (fi_xb != NULL) {
30533 		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
30534 		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
30535 		SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
30536 		SD_CONDSET(xb, xb, xb_victim_retry_count,
30537 		    "xb_victim_retry_count");
30538 		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
30539 		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
30540 		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
30541 
30542 		/* copy in block data from sense */
30543 		if (fi_xb->xb_sense_data[0] != -1) {
30544 			bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
30545 			    SENSE_LENGTH);
30546 		}
30547 
30548 		/* copy in extended sense codes */
30549 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_code,
30550 		    "es_code");
30551 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_key,
30552 		    "es_key");
30553 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb, es_add_code,
30554 		    "es_add_code");
30555 		SD_CONDSET(((struct scsi_extended_sense *)xb), xb,
30556 		    es_qual_code, "es_qual_code");
30557 	}
30558 
30559 	/* set un if it was on fifo */
30560 	if (fi_un != NULL) {
30561 		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
30562 		SD_CONDSET(un, un, un_ctype, "un_ctype");
30563 		SD_CONDSET(un, un, un_reset_retry_count,
30564 		    "un_reset_retry_count");
30565 		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
30566 		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
30567 		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
30568 		SD_CONDSET(un, un, un_f_geometry_is_valid,
30569 		    "un_f_geometry_is_valid");
30570 		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
30571 		    "un_f_allow_bus_device_reset");
30572 		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
30573 
30574 	}
30575 
30576 	/* copy in auto request sense if it was on fifo */
30577 	if (fi_arq != NULL) {
30578 		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
30579 	}
30580 
30581 	/* free structs */
30582 	if (un->sd_fi_fifo_pkt[i] != NULL) {
30583 		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
30584 	}
30585 	if (un->sd_fi_fifo_xb[i] != NULL) {
30586 		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
30587 	}
30588 	if (un->sd_fi_fifo_un[i] != NULL) {
30589 		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
30590 	}
30591 	if (un->sd_fi_fifo_arq[i] != NULL) {
30592 		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
30593 	}
30594 
30595 	/*
30596 	 * kmem_free does not gurantee to set to NULL
30597 	 * since we uses these to determine if we set
30598 	 * values or not lets confirm they are always
30599 	 * NULL after free
30600 	 */
30601 	un->sd_fi_fifo_pkt[i] = NULL;
30602 	un->sd_fi_fifo_un[i] = NULL;
30603 	un->sd_fi_fifo_xb[i] = NULL;
30604 	un->sd_fi_fifo_arq[i] = NULL;
30605 
30606 	un->sd_fi_fifo_start++;
30607 
30608 	mutex_exit(SD_MUTEX(un));
30609 
30610 	SD_TRACE(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
30611 }
30612 
30613 #endif /* SD_FAULT_INJECTION */
30614 
30615 /*
30616  * This routine is invoked in sd_unit_attach(). Before calling it, the
30617  * properties in conf file should be processed already, and "hotpluggable"
30618  * property was processed also.
30619  *
30620  * The sd driver distinguishes 3 different type of devices: removable media,
30621  * non-removable media, and hotpluggable. Below the differences are defined:
30622  *
30623  * 1. Device ID
30624  *
30625  *     The device ID of a device is used to identify this device. Refer to
30626  *     ddi_devid_register(9F).
30627  *
30628  *     For a non-removable media disk device which can provide 0x80 or 0x83
30629  *     VPD page (refer to INQUIRY command of SCSI SPC specification), a unique
30630  *     device ID is created to identify this device. For other non-removable
30631  *     media devices, a default device ID is created only if this device has
30632  *     at least 2 alter cylinders. Otherwise, this device has no devid.
30633  *
30634  *     -------------------------------------------------------
30635  *     removable media   hotpluggable  | Can Have Device ID
30636  *     -------------------------------------------------------
30637  *         false             false     |     Yes
30638  *         false             true      |     Yes
30639  *         true                x       |     No
30640  *     ------------------------------------------------------
30641  *
30642  *
30643  * 2. SCSI group 4 commands
30644  *
30645  *     In SCSI specs, only some commands in group 4 command set can use
30646  *     8-byte addresses that can be used to access >2TB storage spaces.
30647  *     Other commands have no such capability. Without supporting group4,
30648  *     it is impossible to make full use of storage spaces of a disk with
30649  *     capacity larger than 2TB.
30650  *
30651  *     -----------------------------------------------
30652  *     removable media   hotpluggable   LP64  |  Group
30653  *     -----------------------------------------------
30654  *           false          false       false |   1
30655  *           false          false       true  |   4
30656  *           false          true        false |   1
30657  *           false          true        true  |   4
30658  *           true             x           x   |   5
30659  *     -----------------------------------------------
30660  *
30661  *
30662  * 3. Check for VTOC Label
30663  *
30664  *     If a direct-access disk has no EFI label, sd will check if it has a
30665  *     valid VTOC label. Now, sd also does that check for removable media
30666  *     and hotpluggable devices.
30667  *
30668  *     --------------------------------------------------------------
30669  *     Direct-Access   removable media    hotpluggable |  Check Label
30670  *     -------------------------------------------------------------
30671  *         false          false           false        |   No
30672  *         false          false           true         |   No
30673  *         false          true            false        |   Yes
30674  *         false          true            true         |   Yes
30675  *         true            x                x          |   Yes
30676  *     --------------------------------------------------------------
30677  *
30678  *
30679  * 4. Building default VTOC label
30680  *
30681  *     As section 3 says, sd checks if some kinds of devices have VTOC label.
30682  *     If those devices have no valid VTOC label, sd(7d) will attempt to
30683  *     create default VTOC for them. Currently sd creates default VTOC label
30684  *     for all devices on x86 platform (VTOC_16), but only for removable
30685  *     media devices on SPARC (VTOC_8).
30686  *
30687  *     -----------------------------------------------------------
30688  *       removable media hotpluggable platform   |   Default Label
30689  *     -----------------------------------------------------------
30690  *             false          false    sparc     |     No
30691  *             false          true      x86      |     Yes
30692  *             false          true     sparc     |     Yes
30693  *             true             x        x       |     Yes
30694  *     ----------------------------------------------------------
30695  *
30696  *
30697  * 5. Supported blocksizes of target devices
30698  *
30699  *     Sd supports non-512-byte blocksize for removable media devices only.
30700  *     For other devices, only 512-byte blocksize is supported. This may be
30701  *     changed in near future because some RAID devices require non-512-byte
30702  *     blocksize
30703  *
30704  *     -----------------------------------------------------------
30705  *     removable media    hotpluggable    | non-512-byte blocksize
30706  *     -----------------------------------------------------------
30707  *           false          false         |   No
30708  *           false          true          |   No
30709  *           true             x           |   Yes
30710  *     -----------------------------------------------------------
30711  *
30712  *
30713  * 6. Automatic mount & unmount (i.e. vold)
30714  *
30715  *     Sd(7d) driver provides DKIOCREMOVABLE ioctl. This ioctl is used to query
30716  *     if a device is removable media device. It return 1 for removable media
30717  *     devices, and 0 for others.
30718  *
30719  *     Vold treats a device as removable one only if DKIOREMOVABLE returns 1.
30720  *     And it does automounting only for removable media devices. In order to
30721  *     preserve users' experience and let vold continue to do automounting for
30722  *     USB disk devices, DKIOCREMOVABLE ioctl still returns 1 for USB/1394 disk
30723  *     devices.
30724  *
30725  *      ------------------------------------------------------
30726  *       removable media    hotpluggable   |  automatic mount
30727  *      ------------------------------------------------------
30728  *             false          false        |   No
30729  *             false          true         |   Yes
30730  *             true             x          |   Yes
30731  *      ------------------------------------------------------
30732  *
30733  *
30734  * 7. fdisk partition management
30735  *
30736  *     Fdisk is traditional partition method on x86 platform. Sd(7d) driver
30737  *     just supports fdisk partitions on x86 platform. On sparc platform, sd
30738  *     doesn't support fdisk partitions at all. Note: pcfs(7fs) can recognize
30739  *     fdisk partitions on both x86 and SPARC platform.
30740  *
30741  *     -----------------------------------------------------------
30742  *       platform   removable media  USB/1394  |  fdisk supported
30743  *     -----------------------------------------------------------
30744  *        x86         X               X        |       true
30745  *     ------------------------------------------------------------
30746  *        sparc       X               X        |       false
30747  *     ------------------------------------------------------------
30748  *
30749  *
30750  * 8. MBOOT/MBR
30751  *
30752  *     Although sd(7d) doesn't support fdisk on SPARC platform, it does support
30753  *     read/write mboot for removable media devices on sparc platform.
30754  *
30755  *     -----------------------------------------------------------
30756  *       platform   removable media  USB/1394  |  mboot supported
30757  *     -----------------------------------------------------------
30758  *        x86         X               X        |       true
30759  *     ------------------------------------------------------------
30760  *        sparc      false           false     |       false
30761  *        sparc      false           true      |       true
30762  *        sparc      true            false     |       true
30763  *        sparc      true            true      |       true
30764  *     ------------------------------------------------------------
30765  *
30766  *
30767  * 9.  error handling during opening device
30768  *
30769  *     If failed to open a disk device, an errno is returned. For some kinds
30770  *     of errors, different errno is returned depending on if this device is
30771  *     a removable media device. This brings USB/1394 hard disks in line with
30772  *     expected hard disk behavior. It is not expected that this breaks any
30773  *     application.
30774  *
30775  *     ------------------------------------------------------
30776  *       removable media    hotpluggable   |  errno
30777  *     ------------------------------------------------------
30778  *             false          false        |   EIO
30779  *             false          true         |   EIO
30780  *             true             x          |   ENXIO
30781  *     ------------------------------------------------------
30782  *
30783  *
30784  * 10. off-by-1 workaround (bug 1175930, and 4996920) (x86 only)
30785  *
30786  *     [ this is a bit of very ugly history, soon to be removed ]
30787  *
30788  *     SCSI READ_CAPACITY command returns the last valid logical block number
30789  *     which starts from 0. So real capacity is larger than the returned
30790  *     value by 1. However, because scdk.c (which was EOL'ed) directly used
30791  *     the logical block number as capacity of disk devices, off-by-1 work-
30792  *     around was applied. This workaround causes fixed SCSI disk to loss a
30793  *     sector on x86 platform, and precludes exchanging fixed hard disks
30794  *     between sparc and x86.
30795  *
30796  *     ------------------------------------------------------
30797  *       removable media    hotplug        |   Off-by-1 works
30798  *     -------------------------------------------------------
30799  *             false          false        |     Yes
30800  *             false          true         |     No
30801  *             true           false        |     No
30802  *             true           true         |     No
30803  *     ------------------------------------------------------
30804  *
30805  *
30806  * 11. ioctls: DKIOCEJECT, CDROMEJECT
30807  *
30808  *     These IOCTLs are applicable only to removable media devices.
30809  *
30810  *     -----------------------------------------------------------
30811  *       removable media    hotpluggable   |DKIOCEJECT, CDROMEJECT
30812  *     -----------------------------------------------------------
30813  *             false          false        |     No
30814  *             false          true         |     No
30815  *             true            x           |     Yes
30816  *     -----------------------------------------------------------
30817  *
30818  *
30819  * 12. Kstats for partitions
30820  *
30821  *     sd creates partition kstat for non-removable media devices. USB and
30822  *     Firewire hard disks now have partition kstats
30823  *
30824  *      ------------------------------------------------------
30825  *       removable media    hotplugable    |   kstat
30826  *      ------------------------------------------------------
30827  *             false          false        |    Yes
30828  *             false          true         |    Yes
30829  *             true             x          |    No
30830  *       ------------------------------------------------------
30831  *
30832  *
30833  * 13. Removable media & hotpluggable properties
30834  *
30835  *     Sd driver creates a "removable-media" property for removable media
30836  *     devices. Parent nexus drivers create a "hotpluggable" property if
30837  *     it supports hotplugging.
30838  *
30839  *     ---------------------------------------------------------------------
30840  *     removable media   hotpluggable |  "removable-media"   " hotpluggable"
30841  *     ---------------------------------------------------------------------
30842  *       false            false       |    No                   No
30843  *       false            true        |    No                   Yes
30844  *       true             false       |    Yes                  No
30845  *       true             true        |    Yes                  Yes
30846  *     ---------------------------------------------------------------------
30847  *
30848  *
30849  * 14. Power Management
30850  *
30851  *     sd only power manages removable media devices or devices that support
30852  *     LOG_SENSE or have a "pm-capable" property  (PSARC/2002/250)
30853  *
30854  *     A parent nexus that supports hotplugging can also set "pm-capable"
30855  *     if the disk can be power managed.
30856  *
30857  *     ------------------------------------------------------------
30858  *       removable media hotpluggable pm-capable  |   power manage
30859  *     ------------------------------------------------------------
30860  *             false          false     false     |     No
30861  *             false          false     true      |     Yes
30862  *             false          true      false     |     No
30863  *             false          true      true      |     Yes
30864  *             true             x        x        |     Yes
30865  *     ------------------------------------------------------------
30866  *
30867  *      USB and firewire hard disks can now be power managed independently
30868  *      of the framebuffer
30869  *
30870  *
30871  * 15. Support for USB disks with capacity larger than 1TB
30872  *
30873  *     Currently, sd doesn't permit a fixed disk device with capacity
30874  *     larger than 1TB to be used in a 32-bit operating system environment.
30875  *     However, sd doesn't do that for removable media devices. Instead, it
30876  *     assumes that removable media devices cannot have a capacity larger
30877  *     than 1TB. Therefore, using those devices on 32-bit system is partially
30878  *     supported, which can cause some unexpected results.
30879  *
30880  *     ---------------------------------------------------------------------
30881  *       removable media    USB/1394 | Capacity > 1TB |   Used in 32-bit env
30882  *     ---------------------------------------------------------------------
30883  *             false          false  |   true         |     no
30884  *             false          true   |   true         |     no
30885  *             true           false  |   true         |     Yes
30886  *             true           true   |   true         |     Yes
30887  *     ---------------------------------------------------------------------
30888  *
30889  *
30890  * 16. Check write-protection at open time
30891  *
30892  *     When a removable media device is being opened for writing without NDELAY
30893  *     flag, sd will check if this device is writable. If attempting to open
30894  *     without NDELAY flag a write-protected device, this operation will abort.
30895  *
30896  *     ------------------------------------------------------------
30897  *       removable media    USB/1394   |   WP Check
30898  *     ------------------------------------------------------------
30899  *             false          false    |     No
30900  *             false          true     |     No
30901  *             true           false    |     Yes
30902  *             true           true     |     Yes
30903  *     ------------------------------------------------------------
30904  *
30905  *
30906  * 17. syslog when corrupted VTOC is encountered
30907  *
30908  *      Currently, if an invalid VTOC is encountered, sd only print syslog
30909  *      for fixed SCSI disks.
30910  *     ------------------------------------------------------------
30911  *       removable media    USB/1394   |   print syslog
30912  *     ------------------------------------------------------------
30913  *             false          false    |     Yes
30914  *             false          true     |     No
30915  *             true           false    |     No
30916  *             true           true     |     No
30917  *     ------------------------------------------------------------
30918  */
30919 static void
30920 sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi)
30921 {
30922 	int	pm_capable_prop;
30923 
30924 	ASSERT(un->un_sd);
30925 	ASSERT(un->un_sd->sd_inq);
30926 
30927 #if defined(_SUNOS_VTOC_16)
30928 	/*
30929 	 * For VTOC_16 devices, the default label will be created for all
30930 	 * devices. (see sd_build_default_label)
30931 	 */
30932 	un->un_f_default_vtoc_supported = TRUE;
30933 #endif
30934 
30935 	if (un->un_sd->sd_inq->inq_rmb) {
30936 		/*
30937 		 * The media of this device is removable. And for this kind
30938 		 * of devices, it is possible to change medium after openning
30939 		 * devices. Thus we should support this operation.
30940 		 */
30941 		un->un_f_has_removable_media = TRUE;
30942 
30943 #if defined(_SUNOS_VTOC_8)
30944 		/*
30945 		 * Note: currently, for VTOC_8 devices, default label is
30946 		 * created for removable and hotpluggable devices only.
30947 		 */
30948 		un->un_f_default_vtoc_supported = TRUE;
30949 #endif
30950 		/*
30951 		 * support non-512-byte blocksize of removable media devices
30952 		 */
30953 		un->un_f_non_devbsize_supported = TRUE;
30954 
30955 		/*
30956 		 * Assume that all removable media devices support DOOR_LOCK
30957 		 */
30958 		un->un_f_doorlock_supported = TRUE;
30959 
30960 		/*
30961 		 * For a removable media device, it is possible to be opened
30962 		 * with NDELAY flag when there is no media in drive, in this
30963 		 * case we don't care if device is writable. But if without
30964 		 * NDELAY flag, we need to check if media is write-protected.
30965 		 */
30966 		un->un_f_chk_wp_open = TRUE;
30967 
30968 		/*
30969 		 * need to start a SCSI watch thread to monitor media state,
30970 		 * when media is being inserted or ejected, notify syseventd.
30971 		 */
30972 		un->un_f_monitor_media_state = TRUE;
30973 
30974 		/*
30975 		 * Some devices don't support START_STOP_UNIT command.
30976 		 * Therefore, we'd better check if a device supports it
30977 		 * before sending it.
30978 		 */
30979 		un->un_f_check_start_stop = TRUE;
30980 
30981 		/*
30982 		 * support eject media ioctl:
30983 		 *		FDEJECT, DKIOCEJECT, CDROMEJECT
30984 		 */
30985 		un->un_f_eject_media_supported = TRUE;
30986 
30987 		/*
30988 		 * Because many removable-media devices don't support
30989 		 * LOG_SENSE, we couldn't use this command to check if
30990 		 * a removable media device support power-management.
30991 		 * We assume that they support power-management via
30992 		 * START_STOP_UNIT command and can be spun up and down
30993 		 * without limitations.
30994 		 */
30995 		un->un_f_pm_supported = TRUE;
30996 
30997 		/*
30998 		 * Need to create a zero length (Boolean) property
30999 		 * removable-media for the removable media devices.
31000 		 * Note that the return value of the property is not being
31001 		 * checked, since if unable to create the property
31002 		 * then do not want the attach to fail altogether. Consistent
31003 		 * with other property creation in attach.
31004 		 */
31005 		(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
31006 		    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
31007 
31008 	} else {
31009 		/*
31010 		 * create device ID for device
31011 		 */
31012 		un->un_f_devid_supported = TRUE;
31013 
31014 		/*
31015 		 * Spin up non-removable-media devices once it is attached
31016 		 */
31017 		un->un_f_attach_spinup = TRUE;
31018 
31019 		/*
31020 		 * According to SCSI specification, Sense data has two kinds of
31021 		 * format: fixed format, and descriptor format. At present, we
31022 		 * don't support descriptor format sense data for removable
31023 		 * media.
31024 		 */
31025 		if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
31026 			un->un_f_descr_format_supported = TRUE;
31027 		}
31028 
31029 		/*
31030 		 * kstats are created only for non-removable media devices.
31031 		 *
31032 		 * Set this in sd.conf to 0 in order to disable kstats.  The
31033 		 * default is 1, so they are enabled by default.
31034 		 */
31035 		un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
31036 		    SD_DEVINFO(un), DDI_PROP_DONTPASS,
31037 			"enable-partition-kstats", 1));
31038 
31039 		/*
31040 		 * Check if HBA has set the "pm-capable" property.
31041 		 * If "pm-capable" exists and is non-zero then we can
31042 		 * power manage the device without checking the start/stop
31043 		 * cycle count log sense page.
31044 		 *
31045 		 * If "pm-capable" exists and is SD_PM_CAPABLE_FALSE (0)
31046 		 * then we should not power manage the device.
31047 		 *
31048 		 * If "pm-capable" doesn't exist then pm_capable_prop will
31049 		 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case,
31050 		 * sd will check the start/stop cycle count log sense page
31051 		 * and power manage the device if the cycle count limit has
31052 		 * not been exceeded.
31053 		 */
31054 		pm_capable_prop = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
31055 		    DDI_PROP_DONTPASS, "pm-capable", SD_PM_CAPABLE_UNDEFINED);
31056 		if (pm_capable_prop == SD_PM_CAPABLE_UNDEFINED) {
31057 			un->un_f_log_sense_supported = TRUE;
31058 		} else {
31059 			/*
31060 			 * pm-capable property exists.
31061 			 *
31062 			 * Convert "TRUE" values for pm_capable_prop to
31063 			 * SD_PM_CAPABLE_TRUE (1) to make it easier to check
31064 			 * later. "TRUE" values are any values except
31065 			 * SD_PM_CAPABLE_FALSE (0) and
31066 			 * SD_PM_CAPABLE_UNDEFINED (-1)
31067 			 */
31068 			if (pm_capable_prop == SD_PM_CAPABLE_FALSE) {
31069 				un->un_f_log_sense_supported = FALSE;
31070 			} else {
31071 				un->un_f_pm_supported = TRUE;
31072 			}
31073 
31074 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
31075 			    "sd_unit_attach: un:0x%p pm-capable "
31076 			    "property set to %d.\n", un, un->un_f_pm_supported);
31077 		}
31078 	}
31079 
31080 	if (un->un_f_is_hotpluggable) {
31081 #if defined(_SUNOS_VTOC_8)
31082 		/*
31083 		 * Note: currently, for VTOC_8 devices, default label is
31084 		 * created for removable and hotpluggable devices only.
31085 		 */
31086 		un->un_f_default_vtoc_supported = TRUE;
31087 #endif
31088 
31089 		/*
31090 		 * Temporarily, let hotpluggable devices pretend to be
31091 		 * removable-media devices for vold.
31092 		 */
31093 		un->un_f_monitor_media_state = TRUE;
31094 
31095 		un->un_f_check_start_stop = TRUE;
31096 
31097 	}
31098 
31099 	/*
31100 	 * By default, only DIRECT ACCESS devices and CDs will have Sun
31101 	 * labels.
31102 	 */
31103 	if ((SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) ||
31104 	    (un->un_sd->sd_inq->inq_rmb)) {
31105 		/*
31106 		 * Direct access devices have disk label
31107 		 */
31108 		un->un_f_vtoc_label_supported = TRUE;
31109 	}
31110 
31111 	/*
31112 	 * Fdisk partitions are supported for all direct access devices on
31113 	 * x86 platform, and just for removable media and hotpluggable
31114 	 * devices on SPARC platform. Later, we will set the following flag
31115 	 * to FALSE if current device is not removable media or hotpluggable
31116 	 * device and if sd works on SAPRC platform.
31117 	 */
31118 	if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
31119 		un->un_f_mboot_supported = TRUE;
31120 	}
31121 
31122 	if (!un->un_f_is_hotpluggable &&
31123 	    !un->un_sd->sd_inq->inq_rmb) {
31124 
31125 #if defined(_SUNOS_VTOC_8)
31126 		/*
31127 		 * Don't support fdisk on fixed disk
31128 		 */
31129 		un->un_f_mboot_supported = FALSE;
31130 #endif
31131 
31132 		/*
31133 		 * Fixed disk support SYNC CACHE
31134 		 */
31135 		un->un_f_sync_cache_supported = TRUE;
31136 
31137 		/*
31138 		 * For fixed disk, if its VTOC is not valid, we will write
31139 		 * errlog into system log
31140 		 */
31141 		if (un->un_f_vtoc_label_supported)
31142 			un->un_f_vtoc_errlog_supported = TRUE;
31143 	}
31144 }
31145